golang-image/vector/gen.go

// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build ignore

package main

import (
	"bytes"
	"io/ioutil"
	"log"
	"text/template"
)

const (
	copyright = "" +
		"// Copyright 2016 The Go Authors. All rights reserved.\n" +
		"// Use of this source code is governed by a BSD-style\n" +
		"// license that can be found in the LICENSE file.\n"

	doNotEdit = "// generated by go run gen.go; DO NOT EDIT\n"

	dashDashDash = "// --------"
)

func main() {
	tmpl, err := ioutil.ReadFile("gen_acc_amd64.s.tmpl")
	if err != nil {
		log.Fatalf("ReadFile: %v", err)
	}
	if !bytes.HasPrefix(tmpl, []byte(copyright)) {
		log.Fatal("source template did not start with the copyright header")
	}
	tmpl = tmpl[len(copyright):]

	preamble := []byte(nil)
	if i := bytes.Index(tmpl, []byte(dashDashDash)); i < 0 {
		log.Fatalf("source template did not contain %q", dashDashDash)
	} else {
		preamble, tmpl = tmpl[:i], tmpl[i:]
	}

	t, err := template.New("").Parse(string(tmpl))
	if err != nil {
		log.Fatalf("Parse: %v", err)
	}

	out := bytes.NewBuffer(nil)
	out.WriteString(doNotEdit)
	out.Write(preamble)

	for i, v := range instances {
		if i != 0 {
			out.WriteString("\n")
		}
		if err := t.Execute(out, v); err != nil {
			log.Fatalf("Execute(%q): %v", v.ShortName, err)
		}
	}

	if err := ioutil.WriteFile("acc_amd64.s", out.Bytes(), 0666); err != nil {
		log.Fatalf("WriteFile: %v", err)
	}
}

var instances = []struct {
	LongName       string
	ShortName      string
	FrameSize      string
	SrcType        string
	XMM3           string
	XMM4           string
	XMM5           string
	Setup          string
	Cleanup        string
	Add            string
	ClampAndScale  string
	ConvertToInt32 string
}{{
	LongName:       "fixedAccumulateOpSrc",
	ShortName:      "fxAccOpSrc",
	FrameSize:      fxFrameSize,
	SrcType:        fxSrcType,
	XMM3:           fxXMM3,
	XMM4:           fxXMM4,
	XMM5:           fxXMM5,
	Setup:          fxSetup,
	Cleanup:        fxCleanup,
	Add:            fxAdd,
	ClampAndScale:  fxClampAndScale,
	ConvertToInt32: fxConvertToInt32,
}, {
	LongName:       "floatingAccumulateOpSrc",
	ShortName:      "flAccOpSrc",
	FrameSize:      flFrameSize,
	SrcType:        flSrcType,
	XMM3:           flXMM3,
	XMM4:           flXMM4,
	XMM5:           flXMM5,
	Setup:          flSetup,
	Cleanup:        flCleanup,
	Add:            flAdd,
	ClampAndScale:  flClampAndScale,
	ConvertToInt32: flConvertToInt32,
}}

const (
	fxFrameSize = `0`
	flFrameSize = `8`

	fxSrcType = `[]uint32`
	flSrcType = `[]float32`

	fxXMM3 = `-`
	flXMM3 = `flAlmost256`

	fxXMM4 = `-`
	flXMM4 = `flOne`

	fxXMM5 = `fxAlmost256`
	flXMM5 = `flSignMask`

	fxSetup = `
		// fxAlmost256 := XMM(0x000000ff repeated four times) // Maximum of an uint8.
		MOVOU fxAlmost256<>(SB), X5
		`
	flSetup = `
		// Set MXCSR bits 13 and 14, so that the CVTPS2PL below is "Round To Zero".
		STMXCSR mxcsrOrig-8(SP)
		MOVL    mxcsrOrig-8(SP), AX
		ORL     $0x6000, AX
		MOVL    AX, mxcsrNew-4(SP)
		LDMXCSR mxcsrNew-4(SP)

		// flAlmost256 := XMM(0x437fffff repeated four times) // 255.99998 as a float32.
		// flOne       := XMM(0x3f800000 repeated four times) // 1 as a float32.
		// flSignMask  := XMM(0x7fffffff repeated four times) // All but the sign bit of a float32.
		MOVOU flAlmost256<>(SB), X3
		MOVOU flOne<>(SB), X4
		MOVOU flSignMask<>(SB), X5
		`

	fxCleanup = `// No-op.`
	flCleanup = `LDMXCSR mxcsrOrig-8(SP)`

	fxAdd = `PADDD`
	flAdd = `ADDPS`

	fxClampAndScale = `
		// y = abs(x)
		// y >>= 12 // Shift by 2*ϕ - 8.
		// y = min(y, fxAlmost256)
		//
		// pabsd  %xmm1,%xmm2
		// psrld  $0xc,%xmm2
		// pminud %xmm5,%xmm2
		//
		// Hopefully we'll get these opcode mnemonics into the assembler for Go
		// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
		// it's similar.
		BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
		BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x0c
		BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
		`
	flClampAndScale = `
		// y = x & flSignMask
		// y = min(y, flOne)
		// y = mul(y, flAlmost256)
		MOVOU X5, X2
		ANDPS X1, X2
		MINPS X4, X2
		MULPS X3, X2
		`

	fxConvertToInt32 = `// No-op.`
	flConvertToInt32 = `CVTPS2PL X2, X2`
)