// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // +build ignore package main import ( "bytes" "io/ioutil" "log" "text/template" ) const ( copyright = "" + "// Copyright 2016 The Go Authors. All rights reserved.\n" + "// Use of this source code is governed by a BSD-style\n" + "// license that can be found in the LICENSE file.\n" doNotEdit = "// generated by go run gen.go; DO NOT EDIT\n" dashDashDash = "// --------" ) func main() { tmpl, err := ioutil.ReadFile("gen_acc_amd64.s.tmpl") if err != nil { log.Fatalf("ReadFile: %v", err) } if !bytes.HasPrefix(tmpl, []byte(copyright)) { log.Fatal("source template did not start with the copyright header") } tmpl = tmpl[len(copyright):] preamble := []byte(nil) if i := bytes.Index(tmpl, []byte(dashDashDash)); i < 0 { log.Fatalf("source template did not contain %q", dashDashDash) } else { preamble, tmpl = tmpl[:i], tmpl[i:] } t, err := template.New("").Parse(string(tmpl)) if err != nil { log.Fatalf("Parse: %v", err) } out := bytes.NewBuffer(nil) out.WriteString(doNotEdit) out.Write(preamble) for i, v := range instances { if i != 0 { out.WriteString("\n") } if err := t.Execute(out, v); err != nil { log.Fatalf("Execute(%q): %v", v.ShortName, err) } } if err := ioutil.WriteFile("acc_amd64.s", out.Bytes(), 0666); err != nil { log.Fatalf("WriteFile: %v", err) } } var instances = []struct { LongName string ShortName string FrameSize string SrcType string XMM3 string XMM4 string XMM5 string Setup string Cleanup string Add string ClampAndScale string ConvertToInt32 string }{{ LongName: "fixedAccumulateOpSrc", ShortName: "fxAccOpSrc", FrameSize: fxFrameSize, SrcType: fxSrcType, XMM3: fxXMM3, XMM4: fxXMM4, XMM5: fxXMM5, Setup: fxSetup, Cleanup: fxCleanup, Add: fxAdd, ClampAndScale: fxClampAndScale, ConvertToInt32: fxConvertToInt32, }, { LongName: "floatingAccumulateOpSrc", ShortName: "flAccOpSrc", FrameSize: flFrameSize, SrcType: flSrcType, XMM3: flXMM3, XMM4: flXMM4, XMM5: flXMM5, Setup: flSetup, Cleanup: flCleanup, Add: flAdd, ClampAndScale: flClampAndScale, ConvertToInt32: flConvertToInt32, }} const ( fxFrameSize = `0` flFrameSize = `8` fxSrcType = `[]uint32` flSrcType = `[]float32` fxXMM3 = `-` flXMM3 = `flAlmost256` fxXMM4 = `-` flXMM4 = `flOne` fxXMM5 = `fxAlmost256` flXMM5 = `flSignMask` fxSetup = ` // fxAlmost256 := XMM(0x000000ff repeated four times) // Maximum of an uint8. MOVOU fxAlmost256<>(SB), X5 ` flSetup = ` // Set MXCSR bits 13 and 14, so that the CVTPS2PL below is "Round To Zero". STMXCSR mxcsrOrig-8(SP) MOVL mxcsrOrig-8(SP), AX ORL $0x6000, AX MOVL AX, mxcsrNew-4(SP) LDMXCSR mxcsrNew-4(SP) // flAlmost256 := XMM(0x437fffff repeated four times) // 255.99998 as a float32. // flOne := XMM(0x3f800000 repeated four times) // 1 as a float32. // flSignMask := XMM(0x7fffffff repeated four times) // All but the sign bit of a float32. MOVOU flAlmost256<>(SB), X3 MOVOU flOne<>(SB), X4 MOVOU flSignMask<>(SB), X5 ` fxCleanup = `// No-op.` flCleanup = `LDMXCSR mxcsrOrig-8(SP)` fxAdd = `PADDD` flAdd = `ADDPS` fxClampAndScale = ` // y = abs(x) // y >>= 12 // Shift by 2*ϕ - 8. // y = min(y, fxAlmost256) // // pabsd %xmm1,%xmm2 // psrld $0xc,%xmm2 // pminud %xmm5,%xmm2 // // Hopefully we'll get these opcode mnemonics into the assembler for Go // 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but // it's similar. BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1 BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x0c BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5 ` flClampAndScale = ` // y = x & flSignMask // y = min(y, flOne) // y = mul(y, flAlmost256) MOVOU X5, X2 ANDPS X1, X2 MINPS X4, X2 MULPS X3, X2 ` fxConvertToInt32 = `// No-op.` flConvertToInt32 = `CVTPS2PL X2, X2` )