4180bcbc4a
Only the YCbCr benchmarks show significant changes. The other benchmark changes look noisy. The PixOffset and YOffset calls were previously already inlined by the gc compiler. COffset was different because it's more complicated than YOffset, and the switch inside the COffset body is redundant when you already know the src image sratio. http://golang.org/src/image/ycbcr.go?s=2377:2414#L77 benchmark old ns/op new ns/op delta BenchmarkScaleLargeDownNN 1037504 908236 -12.46% BenchmarkScaleLargeDownAB 3196568 2735776 -14.42% BenchmarkScaleLargeDownBL 357165552 311463393 -12.80% BenchmarkScaleLargeDownCR 649403305 544985134 -16.08% BenchmarkScaleSrcYCbCr 3204063 2699147 -15.76% BenchmarkTformABSrcYCbCr 2155142 1968540 -8.66% BenchmarkTformCRSrcYCbCr 11672625 9865358 -15.48% Change-Id: Ifa109363a1282ab114b2fdb0b577dcafef927333 Reviewed-on: https://go-review.googlesource.com/7880 Reviewed-by: Rob Pike <r@golang.org>
1072 lines
29 KiB
Go
1072 lines
29 KiB
Go
// Copyright 2015 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// +build ignore
|
|
|
|
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"flag"
|
|
"fmt"
|
|
"go/format"
|
|
"io/ioutil"
|
|
"log"
|
|
"os"
|
|
"strings"
|
|
)
|
|
|
|
var debug = flag.Bool("debug", false, "")
|
|
|
|
func main() {
|
|
flag.Parse()
|
|
|
|
w := new(bytes.Buffer)
|
|
w.WriteString("// generated by \"go run gen.go\". DO NOT EDIT.\n\n" +
|
|
"package draw\n\nimport (\n" +
|
|
"\"image\"\n" +
|
|
"\"image/color\"\n" +
|
|
"\"math\"\n" +
|
|
"\n" +
|
|
"\"golang.org/x/image/math/f64\"\n" +
|
|
")\n")
|
|
|
|
gen(w, "nnInterpolator", codeNNScaleLeaf, codeNNTransformLeaf)
|
|
gen(w, "ablInterpolator", codeABLScaleLeaf, codeABLTransformLeaf)
|
|
genKernel(w)
|
|
|
|
if *debug {
|
|
os.Stdout.Write(w.Bytes())
|
|
return
|
|
}
|
|
out, err := format.Source(w.Bytes())
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
if err := ioutil.WriteFile("impl.go", out, 0660); err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
}
|
|
|
|
var (
|
|
// dsTypes are the (dst image type, src image type) pairs to generate
|
|
// scale_DType_SType implementations for. The last element in the slice
|
|
// should be the fallback pair ("Image", "image.Image").
|
|
//
|
|
// TODO: add *image.CMYK src type after Go 1.5 is released.
|
|
dsTypes = []struct{ dType, sType string }{
|
|
{"*image.RGBA", "*image.Gray"},
|
|
{"*image.RGBA", "*image.NRGBA"},
|
|
{"*image.RGBA", "*image.RGBA"},
|
|
{"*image.RGBA", "*image.Uniform"},
|
|
{"*image.RGBA", "*image.YCbCr"},
|
|
{"*image.RGBA", "image.Image"},
|
|
{"Image", "image.Image"},
|
|
}
|
|
dTypes, sTypes []string
|
|
sTypesForDType = map[string][]string{}
|
|
subsampleRatios = []string{
|
|
"444",
|
|
"422",
|
|
"420",
|
|
"440",
|
|
}
|
|
)
|
|
|
|
func init() {
|
|
dTypesSeen := map[string]bool{}
|
|
sTypesSeen := map[string]bool{}
|
|
for _, t := range dsTypes {
|
|
if !sTypesSeen[t.sType] {
|
|
sTypesSeen[t.sType] = true
|
|
sTypes = append(sTypes, t.sType)
|
|
}
|
|
if !dTypesSeen[t.dType] {
|
|
dTypesSeen[t.dType] = true
|
|
dTypes = append(dTypes, t.dType)
|
|
}
|
|
sTypesForDType[t.dType] = append(sTypesForDType[t.dType], t.sType)
|
|
}
|
|
sTypesForDType["anyDType"] = sTypes
|
|
}
|
|
|
|
type data struct {
|
|
dType string
|
|
sType string
|
|
sratio string
|
|
receiver string
|
|
}
|
|
|
|
func gen(w *bytes.Buffer, receiver string, codes ...string) {
|
|
expn(w, codeRoot, &data{receiver: receiver})
|
|
for _, code := range codes {
|
|
for _, t := range dsTypes {
|
|
expn(w, code, &data{
|
|
dType: t.dType,
|
|
sType: t.sType,
|
|
receiver: receiver,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func genKernel(w *bytes.Buffer) {
|
|
expn(w, codeKernelRoot, &data{})
|
|
for _, sType := range sTypes {
|
|
expn(w, codeKernelScaleLeafX, &data{
|
|
sType: sType,
|
|
})
|
|
}
|
|
for _, dType := range dTypes {
|
|
expn(w, codeKernelScaleLeafY, &data{
|
|
dType: dType,
|
|
})
|
|
}
|
|
for _, t := range dsTypes {
|
|
expn(w, codeKernelTransformLeaf, &data{
|
|
dType: t.dType,
|
|
sType: t.sType,
|
|
})
|
|
}
|
|
}
|
|
|
|
func expn(w *bytes.Buffer, code string, d *data) {
|
|
if d.sType == "*image.YCbCr" && d.sratio == "" {
|
|
for _, sratio := range subsampleRatios {
|
|
e := *d
|
|
e.sratio = sratio
|
|
expn(w, code, &e)
|
|
}
|
|
return
|
|
}
|
|
|
|
for _, line := range strings.Split(code, "\n") {
|
|
line = expnLine(line, d)
|
|
if line == ";" {
|
|
continue
|
|
}
|
|
fmt.Fprintln(w, line)
|
|
}
|
|
}
|
|
|
|
func expnLine(line string, d *data) string {
|
|
for {
|
|
i := strings.IndexByte(line, '$')
|
|
if i < 0 {
|
|
break
|
|
}
|
|
prefix, s := line[:i], line[i+1:]
|
|
|
|
i = len(s)
|
|
for j, c := range s {
|
|
if !('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z') {
|
|
i = j
|
|
break
|
|
}
|
|
}
|
|
dollar, suffix := s[:i], s[i:]
|
|
|
|
e := expnDollar(prefix, dollar, suffix, d)
|
|
if e == "" {
|
|
log.Fatalf("couldn't expand %q", line)
|
|
}
|
|
line = e
|
|
}
|
|
return line
|
|
}
|
|
|
|
// expnDollar expands a "$foo" fragment in a line of generated code. It returns
|
|
// the empty string if there was a problem. It returns ";" if the generated
|
|
// code is a no-op.
|
|
func expnDollar(prefix, dollar, suffix string, d *data) string {
|
|
switch dollar {
|
|
case "dType":
|
|
return prefix + d.dType + suffix
|
|
case "dTypeRN":
|
|
return prefix + relName(d.dType) + suffix
|
|
case "sratio":
|
|
return prefix + d.sratio + suffix
|
|
case "sType":
|
|
return prefix + d.sType + suffix
|
|
case "sTypeRN":
|
|
return prefix + relName(d.sType) + suffix
|
|
case "receiver":
|
|
return prefix + d.receiver + suffix
|
|
|
|
case "switch":
|
|
return expnSwitch("", true, suffix)
|
|
case "switchD":
|
|
return expnSwitch("", false, suffix)
|
|
case "switchS":
|
|
return expnSwitch("anyDType", false, suffix)
|
|
|
|
case "preOuter":
|
|
switch d.dType {
|
|
default:
|
|
return ";"
|
|
case "Image":
|
|
return "" +
|
|
"dstColorRGBA64 := &color.RGBA64{}\n" +
|
|
"dstColor := color.Color(dstColorRGBA64)"
|
|
}
|
|
|
|
case "preInner":
|
|
switch d.dType {
|
|
default:
|
|
return ";"
|
|
case "*image.RGBA":
|
|
return "d := " + pixOffset("dst", "dr.Min.X+adr.Min.X", "dr.Min.Y+int(dy)", "*4", "*dst.Stride")
|
|
}
|
|
|
|
case "preKernelInner":
|
|
switch d.dType {
|
|
default:
|
|
return ";"
|
|
case "*image.RGBA":
|
|
return "d := " + pixOffset("dst", "dr.Min.X+int(dx)", "dr.Min.Y+adr.Min.Y", "*4", "*dst.Stride")
|
|
}
|
|
|
|
case "blend":
|
|
args, _ := splitArgs(suffix)
|
|
if len(args) != 4 {
|
|
return ""
|
|
}
|
|
switch d.sType {
|
|
default:
|
|
return fmt.Sprintf(""+
|
|
"%sr = %s*%sr + %s*%sr\n"+
|
|
"%sg = %s*%sg + %s*%sg\n"+
|
|
"%sb = %s*%sb + %s*%sb\n"+
|
|
"%sa = %s*%sa + %s*%sa",
|
|
args[3], args[0], args[1], args[2], args[3],
|
|
args[3], args[0], args[1], args[2], args[3],
|
|
args[3], args[0], args[1], args[2], args[3],
|
|
args[3], args[0], args[1], args[2], args[3],
|
|
)
|
|
case "*image.Gray":
|
|
return fmt.Sprintf(""+
|
|
"%sr = %s*%sr + %s*%sr",
|
|
args[3], args[0], args[1], args[2], args[3],
|
|
)
|
|
case "*image.YCbCr":
|
|
return fmt.Sprintf(""+
|
|
"%sr = %s*%sr + %s*%sr\n"+
|
|
"%sg = %s*%sg + %s*%sg\n"+
|
|
"%sb = %s*%sb + %s*%sb",
|
|
args[3], args[0], args[1], args[2], args[3],
|
|
args[3], args[0], args[1], args[2], args[3],
|
|
args[3], args[0], args[1], args[2], args[3],
|
|
)
|
|
}
|
|
|
|
case "outputu":
|
|
args, _ := splitArgs(suffix)
|
|
if len(args) != 3 {
|
|
return ""
|
|
}
|
|
switch d.dType {
|
|
default:
|
|
log.Fatalf("bad dType %q", d.dType)
|
|
case "Image":
|
|
switch d.sType {
|
|
default:
|
|
return fmt.Sprintf(""+
|
|
"dstColorRGBA64.R = uint16(%sr)\n"+
|
|
"dstColorRGBA64.G = uint16(%sg)\n"+
|
|
"dstColorRGBA64.B = uint16(%sb)\n"+
|
|
"dstColorRGBA64.A = uint16(%sa)\n"+
|
|
"dst.Set(%s, %s, dstColor)",
|
|
args[2], args[2], args[2], args[2],
|
|
args[0], args[1],
|
|
)
|
|
case "*image.Gray":
|
|
return fmt.Sprintf(""+
|
|
"out := uint16(%sr)\n"+
|
|
"dstColorRGBA64.R = out\n"+
|
|
"dstColorRGBA64.G = out\n"+
|
|
"dstColorRGBA64.B = out\n"+
|
|
"dstColorRGBA64.A = 0xffff\n"+
|
|
"dst.Set(%s, %s, dstColor)",
|
|
args[2],
|
|
args[0], args[1],
|
|
)
|
|
case "*image.YCbCr":
|
|
return fmt.Sprintf(""+
|
|
"dstColorRGBA64.R = uint16(%sr)\n"+
|
|
"dstColorRGBA64.G = uint16(%sg)\n"+
|
|
"dstColorRGBA64.B = uint16(%sb)\n"+
|
|
"dstColorRGBA64.A = 0xffff\n"+
|
|
"dst.Set(%s, %s, dstColor)",
|
|
args[2], args[2], args[2],
|
|
args[0], args[1],
|
|
)
|
|
}
|
|
case "*image.RGBA":
|
|
switch d.sType {
|
|
default:
|
|
return fmt.Sprintf(""+
|
|
"dst.Pix[d+0] = uint8(uint32(%sr) >> 8)\n"+
|
|
"dst.Pix[d+1] = uint8(uint32(%sg) >> 8)\n"+
|
|
"dst.Pix[d+2] = uint8(uint32(%sb) >> 8)\n"+
|
|
"dst.Pix[d+3] = uint8(uint32(%sa) >> 8)",
|
|
args[2], args[2], args[2], args[2],
|
|
)
|
|
case "*image.Gray":
|
|
return fmt.Sprintf(""+
|
|
"out := uint8(uint32(%sr) >> 8)\n"+
|
|
"dst.Pix[d+0] = out\n"+
|
|
"dst.Pix[d+1] = out\n"+
|
|
"dst.Pix[d+2] = out\n"+
|
|
"dst.Pix[d+3] = 0xff",
|
|
args[2],
|
|
)
|
|
case "*image.YCbCr":
|
|
return fmt.Sprintf(""+
|
|
"dst.Pix[d+0] = uint8(uint32(%sr) >> 8)\n"+
|
|
"dst.Pix[d+1] = uint8(uint32(%sg) >> 8)\n"+
|
|
"dst.Pix[d+2] = uint8(uint32(%sb) >> 8)\n"+
|
|
"dst.Pix[d+3] = 0xff",
|
|
args[2], args[2], args[2],
|
|
)
|
|
}
|
|
}
|
|
|
|
case "outputf":
|
|
args, _ := splitArgs(suffix)
|
|
if len(args) != 5 {
|
|
return ""
|
|
}
|
|
ret := ""
|
|
switch d.dType {
|
|
default:
|
|
log.Fatalf("bad dType %q", d.dType)
|
|
case "Image":
|
|
switch d.sType {
|
|
default:
|
|
ret = fmt.Sprintf(""+
|
|
"dstColorRGBA64.R = %s(%sr * %s)\n"+
|
|
"dstColorRGBA64.G = %s(%sg * %s)\n"+
|
|
"dstColorRGBA64.B = %s(%sb * %s)\n"+
|
|
"dstColorRGBA64.A = %s(%sa * %s)\n"+
|
|
"dst.Set(%s, %s, dstColor)",
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
args[0], args[1],
|
|
)
|
|
case "*image.Gray":
|
|
ret = fmt.Sprintf(""+
|
|
"out := %s(%sr * %s)\n"+
|
|
"dstColorRGBA64.R = out\n"+
|
|
"dstColorRGBA64.G = out\n"+
|
|
"dstColorRGBA64.B = out\n"+
|
|
"dstColorRGBA64.A = 0xffff\n"+
|
|
"dst.Set(%s, %s, dstColor)",
|
|
args[2], args[3], args[4],
|
|
args[0], args[1],
|
|
)
|
|
case "*image.YCbCr":
|
|
ret = fmt.Sprintf(""+
|
|
"dstColorRGBA64.R = %s(%sr * %s)\n"+
|
|
"dstColorRGBA64.G = %s(%sg * %s)\n"+
|
|
"dstColorRGBA64.B = %s(%sb * %s)\n"+
|
|
"dstColorRGBA64.A = 0xffff\n"+
|
|
"dst.Set(%s, %s, dstColor)",
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
args[0], args[1],
|
|
)
|
|
}
|
|
case "*image.RGBA":
|
|
switch d.sType {
|
|
default:
|
|
ret = fmt.Sprintf(""+
|
|
"dst.Pix[d+0] = uint8(%s(%sr * %s) >> 8)\n"+
|
|
"dst.Pix[d+1] = uint8(%s(%sg * %s) >> 8)\n"+
|
|
"dst.Pix[d+2] = uint8(%s(%sb * %s) >> 8)\n"+
|
|
"dst.Pix[d+3] = uint8(%s(%sa * %s) >> 8)",
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
)
|
|
case "*image.Gray":
|
|
ret = fmt.Sprintf(""+
|
|
"out := uint8(%s(%sr * %s) >> 8)\n"+
|
|
"dst.Pix[d+0] = out\n"+
|
|
"dst.Pix[d+1] = out\n"+
|
|
"dst.Pix[d+2] = out\n"+
|
|
"dst.Pix[d+3] = 0xff",
|
|
args[2], args[3], args[4],
|
|
)
|
|
case "*image.YCbCr":
|
|
ret = fmt.Sprintf(""+
|
|
"dst.Pix[d+0] = uint8(%s(%sr * %s) >> 8)\n"+
|
|
"dst.Pix[d+1] = uint8(%s(%sg * %s) >> 8)\n"+
|
|
"dst.Pix[d+2] = uint8(%s(%sb * %s) >> 8)\n"+
|
|
"dst.Pix[d+3] = 0xff",
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
)
|
|
}
|
|
}
|
|
return strings.Replace(ret, " * 1)", ")", -1)
|
|
|
|
case "srcf", "srcu":
|
|
lhs, eqOp := splitEq(prefix)
|
|
if lhs == "" {
|
|
return ""
|
|
}
|
|
args, extra := splitArgs(suffix)
|
|
if len(args) != 2 {
|
|
return ""
|
|
}
|
|
|
|
tmp := ""
|
|
if dollar == "srcf" {
|
|
tmp = "u"
|
|
}
|
|
|
|
// TODO: there's no need to multiply by 0x101 in the switch below if
|
|
// the next thing we're going to do is shift right by 8.
|
|
|
|
buf := new(bytes.Buffer)
|
|
switch d.sType {
|
|
default:
|
|
log.Fatalf("bad sType %q", d.sType)
|
|
case "image.Image", "*image.Uniform": // TODO: separate code for concrete types.
|
|
fmt.Fprintf(buf, ""+
|
|
"%sr%s, %sg%s, %sb%s, %sa%s := src.At(%s, %s).RGBA()\n",
|
|
lhs, tmp, lhs, tmp, lhs, tmp, lhs, tmp, args[0], args[1],
|
|
)
|
|
case "*image.Gray":
|
|
fmt.Fprintf(buf, ""+
|
|
"%si := %s\n"+
|
|
"%sr%s := uint32(src.Pix[%si]) * 0x101\n",
|
|
lhs, pixOffset("src", args[0], args[1], "", "*src.Stride"),
|
|
lhs, tmp, lhs,
|
|
)
|
|
case "*image.NRGBA":
|
|
fmt.Fprintf(buf, ""+
|
|
"%si := %s\n"+
|
|
"%sa%s := uint32(src.Pix[%si+3]) * 0x101\n"+
|
|
"%sr%s := uint32(src.Pix[%si+0]) * %sa%s / 0xff\n"+
|
|
"%sg%s := uint32(src.Pix[%si+1]) * %sa%s / 0xff\n"+
|
|
"%sb%s := uint32(src.Pix[%si+2]) * %sa%s / 0xff\n",
|
|
lhs, pixOffset("src", args[0], args[1], "*4", "*src.Stride"),
|
|
lhs, tmp, lhs,
|
|
lhs, tmp, lhs, lhs, tmp,
|
|
lhs, tmp, lhs, lhs, tmp,
|
|
lhs, tmp, lhs, lhs, tmp,
|
|
)
|
|
case "*image.RGBA":
|
|
fmt.Fprintf(buf, ""+
|
|
"%si := %s\n"+
|
|
"%sr%s := uint32(src.Pix[%si+0]) * 0x101\n"+
|
|
"%sg%s := uint32(src.Pix[%si+1]) * 0x101\n"+
|
|
"%sb%s := uint32(src.Pix[%si+2]) * 0x101\n"+
|
|
"%sa%s := uint32(src.Pix[%si+3]) * 0x101\n",
|
|
lhs, pixOffset("src", args[0], args[1], "*4", "*src.Stride"),
|
|
lhs, tmp, lhs,
|
|
lhs, tmp, lhs,
|
|
lhs, tmp, lhs,
|
|
lhs, tmp, lhs,
|
|
)
|
|
case "*image.YCbCr":
|
|
// TODO: inline the color.YCbCrToRGB call.
|
|
// TODO: should we have a color.YCbCrToRGB48 function that returns
|
|
// 16-bit color?
|
|
fmt.Fprintf(buf, ""+
|
|
"%si := %s\n"+
|
|
"%sj := %s\n"+
|
|
"%sr8, %sg8, %sb8 := color.YCbCrToRGB(src.Y[%si], src.Cb[%sj], src.Cr[%sj])\n"+
|
|
"%sr%s := uint32(%sr8) * 0x101\n"+
|
|
"%sg%s := uint32(%sg8) * 0x101\n"+
|
|
"%sb%s := uint32(%sb8) * 0x101\n",
|
|
lhs, pixOffset("src", args[0], args[1], "", "*src.YStride"),
|
|
lhs, cOffset(args[0], args[1], d.sratio),
|
|
lhs, lhs, lhs, lhs, lhs, lhs,
|
|
lhs, tmp, lhs,
|
|
lhs, tmp, lhs,
|
|
lhs, tmp, lhs,
|
|
)
|
|
}
|
|
|
|
if dollar == "srcf" {
|
|
switch d.sType {
|
|
default:
|
|
fmt.Fprintf(buf, ""+
|
|
"%sr %s float64(%sru)%s\n"+
|
|
"%sg %s float64(%sgu)%s\n"+
|
|
"%sb %s float64(%sbu)%s\n"+
|
|
"%sa %s float64(%sau)%s\n",
|
|
lhs, eqOp, lhs, extra,
|
|
lhs, eqOp, lhs, extra,
|
|
lhs, eqOp, lhs, extra,
|
|
lhs, eqOp, lhs, extra,
|
|
)
|
|
case "*image.Gray":
|
|
fmt.Fprintf(buf, ""+
|
|
"%sr %s float64(%sru)%s\n",
|
|
lhs, eqOp, lhs, extra,
|
|
)
|
|
case "*image.YCbCr":
|
|
fmt.Fprintf(buf, ""+
|
|
"%sr %s float64(%sru)%s\n"+
|
|
"%sg %s float64(%sgu)%s\n"+
|
|
"%sb %s float64(%sbu)%s\n",
|
|
lhs, eqOp, lhs, extra,
|
|
lhs, eqOp, lhs, extra,
|
|
lhs, eqOp, lhs, extra,
|
|
)
|
|
}
|
|
}
|
|
|
|
return strings.TrimSpace(buf.String())
|
|
|
|
case "tweakD":
|
|
if d.dType == "*image.RGBA" {
|
|
return "d += dst.Stride"
|
|
}
|
|
return ";"
|
|
|
|
case "tweakDx":
|
|
if d.dType == "*image.RGBA" {
|
|
return strings.Replace(prefix, "dx++", "dx, d = dx+1, d+4", 1)
|
|
}
|
|
return prefix
|
|
|
|
case "tweakDy":
|
|
if d.dType == "*image.RGBA" {
|
|
return strings.Replace(prefix, "for dy, s", "for _, s", 1)
|
|
}
|
|
return prefix
|
|
|
|
case "tweakP":
|
|
switch d.sType {
|
|
case "*image.Gray":
|
|
if strings.HasPrefix(strings.TrimSpace(prefix), "pa * ") {
|
|
return "1,"
|
|
}
|
|
return "pr,"
|
|
case "*image.YCbCr":
|
|
if strings.HasPrefix(strings.TrimSpace(prefix), "pa * ") {
|
|
return "1,"
|
|
}
|
|
}
|
|
return prefix
|
|
|
|
case "tweakPr":
|
|
if d.sType == "*image.Gray" {
|
|
return "pr *= s.invTotalWeightFFFF"
|
|
}
|
|
return ";"
|
|
|
|
case "tweakVarP":
|
|
switch d.sType {
|
|
case "*image.Gray":
|
|
return strings.Replace(prefix, "var pr, pg, pb, pa", "var pr", 1)
|
|
case "*image.YCbCr":
|
|
return strings.Replace(prefix, "var pr, pg, pb, pa", "var pr, pg, pb", 1)
|
|
}
|
|
return prefix
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func expnSwitch(dType string, expandBoth bool, template string) string {
|
|
switchVar := "dst"
|
|
if dType != "" {
|
|
switchVar = "src"
|
|
}
|
|
lines := []string{fmt.Sprintf("switch %s := %s.(type) {", switchVar, switchVar)}
|
|
|
|
fallback, values := "Image", dTypes
|
|
if dType != "" {
|
|
fallback, values = "image.Image", sTypesForDType[dType]
|
|
}
|
|
for _, v := range values {
|
|
if v == fallback {
|
|
lines = append(lines, "default:")
|
|
} else {
|
|
lines = append(lines, fmt.Sprintf("case %s:", v))
|
|
}
|
|
|
|
if dType != "" {
|
|
if v == "*image.YCbCr" {
|
|
lines = append(lines, expnSwitchYCbCr(dType, template))
|
|
} else {
|
|
lines = append(lines, expnLine(template, &data{dType: dType, sType: v}))
|
|
}
|
|
} else if !expandBoth {
|
|
lines = append(lines, expnLine(template, &data{dType: v}))
|
|
} else {
|
|
lines = append(lines, expnSwitch(v, false, template))
|
|
}
|
|
}
|
|
|
|
lines = append(lines, "}")
|
|
return strings.Join(lines, "\n")
|
|
}
|
|
|
|
func expnSwitchYCbCr(dType, template string) string {
|
|
lines := []string{
|
|
"switch src.SubsampleRatio {",
|
|
"default:",
|
|
expnLine(template, &data{dType: dType, sType: "image.Image"}),
|
|
}
|
|
for _, sratio := range subsampleRatios {
|
|
lines = append(lines,
|
|
fmt.Sprintf("case image.YCbCrSubsampleRatio%s:", sratio),
|
|
expnLine(template, &data{dType: dType, sType: "*image.YCbCr", sratio: sratio}),
|
|
)
|
|
}
|
|
lines = append(lines, "}")
|
|
return strings.Join(lines, "\n")
|
|
}
|
|
|
|
func pixOffset(m, x, y, xstride, ystride string) string {
|
|
return fmt.Sprintf("(%s-%s.Rect.Min.Y)%s + (%s-%s.Rect.Min.X)%s", y, m, ystride, x, m, xstride)
|
|
}
|
|
|
|
func cOffset(x, y, sratio string) string {
|
|
switch sratio {
|
|
case "444":
|
|
return fmt.Sprintf("( %s - src.Rect.Min.Y )*src.CStride + ( %s - src.Rect.Min.X )", y, x)
|
|
case "422":
|
|
return fmt.Sprintf("( %s - src.Rect.Min.Y )*src.CStride + ((%s)/2 - src.Rect.Min.X/2)", y, x)
|
|
case "420":
|
|
return fmt.Sprintf("((%s)/2 - src.Rect.Min.Y/2)*src.CStride + ((%s)/2 - src.Rect.Min.X/2)", y, x)
|
|
case "440":
|
|
return fmt.Sprintf("((%s)/2 - src.Rect.Min.Y/2)*src.CStride + ( %s - src.Rect.Min.X )", y, x)
|
|
}
|
|
return fmt.Sprintf("unsupported sratio %q", sratio)
|
|
}
|
|
|
|
func split(s, sep string) (string, string) {
|
|
if i := strings.Index(s, sep); i >= 0 {
|
|
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+len(sep):])
|
|
}
|
|
return "", ""
|
|
}
|
|
|
|
func splitEq(s string) (lhs, eqOp string) {
|
|
s = strings.TrimSpace(s)
|
|
if lhs, _ = split(s, ":="); lhs != "" {
|
|
return lhs, ":="
|
|
}
|
|
if lhs, _ = split(s, "+="); lhs != "" {
|
|
return lhs, "+="
|
|
}
|
|
return "", ""
|
|
}
|
|
|
|
func splitArgs(s string) (args []string, extra string) {
|
|
s = strings.TrimSpace(s)
|
|
if s == "" || s[0] != '[' {
|
|
return nil, ""
|
|
}
|
|
s = s[1:]
|
|
|
|
i := strings.IndexByte(s, ']')
|
|
if i < 0 {
|
|
return nil, ""
|
|
}
|
|
args, extra = strings.Split(s[:i], ","), s[i+1:]
|
|
for i := range args {
|
|
args[i] = strings.TrimSpace(args[i])
|
|
}
|
|
return args, extra
|
|
}
|
|
|
|
func relName(s string) string {
|
|
if i := strings.LastIndex(s, "."); i >= 0 {
|
|
return s[i+1:]
|
|
}
|
|
return s
|
|
}
|
|
|
|
const (
|
|
codeRoot = `
|
|
func (z $receiver) Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
|
|
// adr is the affected destination pixels, relative to dr.Min.
|
|
adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
|
|
if adr.Empty() || sr.Empty() {
|
|
return
|
|
}
|
|
// sr is the source pixels. If it extends beyond the src bounds,
|
|
// we cannot use the type-specific fast paths, as they access
|
|
// the Pix fields directly without bounds checking.
|
|
if !sr.In(src.Bounds()) {
|
|
z.scale_Image_Image(dst, dr, adr, src, sr)
|
|
} else {
|
|
$switch z.scale_$dTypeRN_$sTypeRN$sratio(dst, dr, adr, src, sr)
|
|
}
|
|
}
|
|
|
|
func (z $receiver) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) {
|
|
dr := transformRect(s2d, &sr)
|
|
// adr is the affected destination pixels, relative to dr.Min.
|
|
adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
|
|
if adr.Empty() || sr.Empty() {
|
|
return
|
|
}
|
|
d2s := invert(s2d)
|
|
// sr is the source pixels. If it extends beyond the src bounds,
|
|
// we cannot use the type-specific fast paths, as they access
|
|
// the Pix fields directly without bounds checking.
|
|
if !sr.In(src.Bounds()) {
|
|
z.transform_Image_Image(dst, dr, adr, &d2s, src, sr)
|
|
} else {
|
|
$switch z.transform_$dTypeRN_$sTypeRN$sratio(dst, dr, adr, &d2s, src, sr)
|
|
}
|
|
}
|
|
`
|
|
|
|
codeNNScaleLeaf = `
|
|
func (nnInterpolator) scale_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, src $sType, sr image.Rectangle) {
|
|
dw2 := uint64(dr.Dx()) * 2
|
|
dh2 := uint64(dr.Dy()) * 2
|
|
sw := uint64(sr.Dx())
|
|
sh := uint64(sr.Dy())
|
|
$preOuter
|
|
for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
|
|
sy := (2*uint64(dy) + 1) * sh / dh2
|
|
$preInner
|
|
for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
|
|
sx := (2*uint64(dx) + 1) * sw / dw2
|
|
p := $srcu[sr.Min.X + int(sx), sr.Min.Y + int(sy)]
|
|
$outputu[dr.Min.X + int(dx), dr.Min.Y + int(dy), p]
|
|
}
|
|
}
|
|
}
|
|
`
|
|
|
|
codeNNTransformLeaf = `
|
|
func (nnInterpolator) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle) {
|
|
$preOuter
|
|
for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
|
|
dyf := float64(dr.Min.Y + int(dy)) + 0.5
|
|
$preInner
|
|
for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
|
|
dxf := float64(dr.Min.X + int(dx)) + 0.5
|
|
// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
|
|
sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
|
|
sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
|
|
if !(image.Point{sx0, sy0}).In(sr) {
|
|
continue
|
|
}
|
|
p := $srcu[sx0, sy0]
|
|
$outputu[dr.Min.X + int(dx), dr.Min.Y + int(dy), p]
|
|
}
|
|
}
|
|
}
|
|
`
|
|
|
|
codeABLScaleLeaf = `
|
|
func (ablInterpolator) scale_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, src $sType, sr image.Rectangle) {
|
|
sw := int32(sr.Dx())
|
|
sh := int32(sr.Dy())
|
|
yscale := float64(sh) / float64(dr.Dy())
|
|
xscale := float64(sw) / float64(dr.Dx())
|
|
swMinus1, shMinus1 := sw - 1, sh - 1
|
|
$preOuter
|
|
|
|
for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
|
|
sy := (float64(dy)+0.5)*yscale - 0.5
|
|
// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
|
|
// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
|
|
// sx, below.
|
|
sy0 := int32(sy)
|
|
yFrac0 := sy - float64(sy0)
|
|
yFrac1 := 1 - yFrac0
|
|
sy1 := sy0 + 1
|
|
if sy < 0 {
|
|
sy0, sy1 = 0, 0
|
|
yFrac0, yFrac1 = 0, 1
|
|
} else if sy1 > shMinus1 {
|
|
sy0, sy1 = shMinus1, shMinus1
|
|
yFrac0, yFrac1 = 1, 0
|
|
}
|
|
$preInner
|
|
|
|
for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
|
|
sx := (float64(dx)+0.5)*xscale - 0.5
|
|
sx0 := int32(sx)
|
|
xFrac0 := sx - float64(sx0)
|
|
xFrac1 := 1 - xFrac0
|
|
sx1 := sx0 + 1
|
|
if sx < 0 {
|
|
sx0, sx1 = 0, 0
|
|
xFrac0, xFrac1 = 0, 1
|
|
} else if sx1 > swMinus1 {
|
|
sx0, sx1 = swMinus1, swMinus1
|
|
xFrac0, xFrac1 = 1, 0
|
|
}
|
|
|
|
s00 := $srcf[sr.Min.X + int(sx0), sr.Min.Y + int(sy0)]
|
|
s10 := $srcf[sr.Min.X + int(sx1), sr.Min.Y + int(sy0)]
|
|
$blend[xFrac1, s00, xFrac0, s10]
|
|
s01 := $srcf[sr.Min.X + int(sx0), sr.Min.Y + int(sy1)]
|
|
s11 := $srcf[sr.Min.X + int(sx1), sr.Min.Y + int(sy1)]
|
|
$blend[xFrac1, s01, xFrac0, s11]
|
|
$blend[yFrac1, s10, yFrac0, s11]
|
|
$outputu[dr.Min.X + int(dx), dr.Min.Y + int(dy), s11]
|
|
}
|
|
}
|
|
}
|
|
`
|
|
|
|
codeABLTransformLeaf = `
|
|
func (ablInterpolator) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle) {
|
|
$preOuter
|
|
for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
|
|
dyf := float64(dr.Min.Y + int(dy)) + 0.5
|
|
$preInner
|
|
for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
|
|
dxf := float64(dr.Min.X + int(dx)) + 0.5
|
|
// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
|
|
sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
|
|
sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
|
|
if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
|
|
continue
|
|
}
|
|
|
|
sx -= 0.5
|
|
sxf := math.Floor(sx)
|
|
xFrac0 := sx - sxf
|
|
xFrac1 := 1 - xFrac0
|
|
sx0 := int(sxf)
|
|
sx1 := sx0 + 1
|
|
if sx0 < sr.Min.X {
|
|
sx0, sx1 = sr.Min.X, sr.Min.X
|
|
xFrac0, xFrac1 = 0, 1
|
|
} else if sx1 >= sr.Max.X {
|
|
sx0, sx1 = sr.Max.X-1, sr.Max.X-1
|
|
xFrac0, xFrac1 = 1, 0
|
|
}
|
|
|
|
sy -= 0.5
|
|
syf := math.Floor(sy)
|
|
yFrac0 := sy - syf
|
|
yFrac1 := 1 - yFrac0
|
|
sy0 := int(syf)
|
|
sy1 := sy0 + 1
|
|
if sy0 < sr.Min.Y {
|
|
sy0, sy1 = sr.Min.Y, sr.Min.Y
|
|
yFrac0, yFrac1 = 0, 1
|
|
} else if sy1 >= sr.Max.Y {
|
|
sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
|
|
yFrac0, yFrac1 = 1, 0
|
|
}
|
|
|
|
s00 := $srcf[sx0, sy0]
|
|
s10 := $srcf[sx1, sy0]
|
|
$blend[xFrac1, s00, xFrac0, s10]
|
|
s01 := $srcf[sx0, sy1]
|
|
s11 := $srcf[sx1, sy1]
|
|
$blend[xFrac1, s01, xFrac0, s11]
|
|
$blend[yFrac1, s10, yFrac0, s11]
|
|
$outputu[dr.Min.X + int(dx), dr.Min.Y + int(dy), s11]
|
|
}
|
|
}
|
|
}
|
|
`
|
|
|
|
codeKernelRoot = `
|
|
func (z *kernelScaler) Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
|
|
if z.dw != int32(dr.Dx()) || z.dh != int32(dr.Dy()) || z.sw != int32(sr.Dx()) || z.sh != int32(sr.Dy()) {
|
|
z.kernel.Scale(dst, dr, src, sr, opts)
|
|
return
|
|
}
|
|
// adr is the affected destination pixels, relative to dr.Min.
|
|
adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
|
|
if adr.Empty() || sr.Empty() {
|
|
return
|
|
}
|
|
// Create a temporary buffer:
|
|
// scaleX distributes the source image's columns over the temporary image.
|
|
// scaleY distributes the temporary image's rows over the destination image.
|
|
// TODO: is it worth having a sync.Pool for this temporary buffer?
|
|
tmp := make([][4]float64, z.dw*z.sh)
|
|
|
|
// sr is the source pixels. If it extends beyond the src bounds,
|
|
// we cannot use the type-specific fast paths, as they access
|
|
// the Pix fields directly without bounds checking.
|
|
if !sr.In(src.Bounds()) {
|
|
z.scaleX_Image(tmp, src, sr)
|
|
} else {
|
|
$switchS z.scaleX_$sTypeRN$sratio(tmp, src, sr)
|
|
}
|
|
|
|
$switchD z.scaleY_$dTypeRN(dst, dr, adr, tmp)
|
|
}
|
|
|
|
func (q *Kernel) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) {
|
|
dr := transformRect(s2d, &sr)
|
|
// adr is the affected destination pixels, relative to dr.Min.
|
|
adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
|
|
if adr.Empty() || sr.Empty() {
|
|
return
|
|
}
|
|
d2s := invert(s2d)
|
|
|
|
xscale := abs(d2s[0])
|
|
if s := abs(d2s[1]); xscale < s {
|
|
xscale = s
|
|
}
|
|
yscale := abs(d2s[3])
|
|
if s := abs(d2s[4]); yscale < s {
|
|
yscale = s
|
|
}
|
|
|
|
// sr is the source pixels. If it extends beyond the src bounds,
|
|
// we cannot use the type-specific fast paths, as they access
|
|
// the Pix fields directly without bounds checking.
|
|
if !sr.In(src.Bounds()) {
|
|
q.transform_Image_Image(dst, dr, adr, &d2s, src, sr, xscale, yscale)
|
|
} else {
|
|
$switch q.transform_$dTypeRN_$sTypeRN$sratio(dst, dr, adr, &d2s, src, sr, xscale, yscale)
|
|
}
|
|
}
|
|
`
|
|
|
|
codeKernelScaleLeafX = `
|
|
func (z *kernelScaler) scaleX_$sTypeRN$sratio(tmp [][4]float64, src $sType, sr image.Rectangle) {
|
|
t := 0
|
|
for y := int32(0); y < z.sh; y++ {
|
|
for _, s := range z.horizontal.sources {
|
|
var pr, pg, pb, pa float64 $tweakVarP
|
|
for _, c := range z.horizontal.contribs[s.i:s.j] {
|
|
p += $srcf[sr.Min.X + int(c.coord), sr.Min.Y + int(y)] * c.weight
|
|
}
|
|
$tweakPr
|
|
tmp[t] = [4]float64{
|
|
pr * s.invTotalWeightFFFF, $tweakP
|
|
pg * s.invTotalWeightFFFF, $tweakP
|
|
pb * s.invTotalWeightFFFF, $tweakP
|
|
pa * s.invTotalWeightFFFF, $tweakP
|
|
}
|
|
t++
|
|
}
|
|
}
|
|
}
|
|
`
|
|
|
|
codeKernelScaleLeafY = `
|
|
func (z *kernelScaler) scaleY_$dTypeRN(dst $dType, dr, adr image.Rectangle, tmp [][4]float64) {
|
|
$preOuter
|
|
for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
|
|
$preKernelInner
|
|
for dy, s := range z.vertical.sources[adr.Min.Y:adr.Max.Y] { $tweakDy
|
|
var pr, pg, pb, pa float64
|
|
for _, c := range z.vertical.contribs[s.i:s.j] {
|
|
p := &tmp[c.coord*z.dw+dx]
|
|
pr += p[0] * c.weight
|
|
pg += p[1] * c.weight
|
|
pb += p[2] * c.weight
|
|
pa += p[3] * c.weight
|
|
}
|
|
$outputf[dr.Min.X + int(dx), dr.Min.Y + int(adr.Min.Y + dy), ftou, p, s.invTotalWeight]
|
|
$tweakD
|
|
}
|
|
}
|
|
}
|
|
`
|
|
|
|
codeKernelTransformLeaf = `
|
|
func (q *Kernel) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle, xscale, yscale float64) {
|
|
// When shrinking, broaden the effective kernel support so that we still
|
|
// visit every source pixel.
|
|
xHalfWidth, xKernelArgScale := q.Support, 1.0
|
|
if xscale > 1 {
|
|
xHalfWidth *= xscale
|
|
xKernelArgScale = 1 / xscale
|
|
}
|
|
yHalfWidth, yKernelArgScale := q.Support, 1.0
|
|
if yscale > 1 {
|
|
yHalfWidth *= yscale
|
|
yKernelArgScale = 1 / yscale
|
|
}
|
|
|
|
xWeights := make([]float64, 1 + 2*int(math.Ceil(xHalfWidth)))
|
|
yWeights := make([]float64, 1 + 2*int(math.Ceil(yHalfWidth)))
|
|
|
|
$preOuter
|
|
for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
|
|
dyf := float64(dr.Min.Y + int(dy)) + 0.5
|
|
$preInner
|
|
for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
|
|
dxf := float64(dr.Min.X + int(dx)) + 0.5
|
|
// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
|
|
sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
|
|
sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
|
|
if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
|
|
continue
|
|
}
|
|
|
|
sx -= 0.5
|
|
ix := int(math.Floor(sx - xHalfWidth))
|
|
if ix < sr.Min.X {
|
|
ix = sr.Min.X
|
|
}
|
|
jx := int(math.Ceil(sx + xHalfWidth))
|
|
if jx > sr.Max.X {
|
|
jx = sr.Max.X
|
|
}
|
|
|
|
totalXWeight := 0.0
|
|
for kx := ix; kx < jx; kx++ {
|
|
xWeight := 0.0
|
|
if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
|
|
xWeight = q.At(t)
|
|
}
|
|
xWeights[kx - ix] = xWeight
|
|
totalXWeight += xWeight
|
|
}
|
|
for x := range xWeights[:jx-ix] {
|
|
xWeights[x] /= totalXWeight
|
|
}
|
|
|
|
sy -= 0.5
|
|
iy := int(math.Floor(sy - yHalfWidth))
|
|
if iy < sr.Min.Y {
|
|
iy = sr.Min.Y
|
|
}
|
|
jy := int(math.Ceil(sy + yHalfWidth))
|
|
if jy > sr.Max.Y {
|
|
jy = sr.Max.Y
|
|
}
|
|
|
|
totalYWeight := 0.0
|
|
for ky := iy; ky < jy; ky++ {
|
|
yWeight := 0.0
|
|
if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
|
|
yWeight = q.At(t)
|
|
}
|
|
yWeights[ky - iy] = yWeight
|
|
totalYWeight += yWeight
|
|
}
|
|
for y := range yWeights[:jy-iy] {
|
|
yWeights[y] /= totalYWeight
|
|
}
|
|
|
|
var pr, pg, pb, pa float64 $tweakVarP
|
|
for ky := iy; ky < jy; ky++ {
|
|
yWeight := yWeights[ky - iy]
|
|
for kx := ix; kx < jx; kx++ {
|
|
w := xWeights[kx - ix] * yWeight
|
|
p += $srcf[kx, ky] * w
|
|
}
|
|
}
|
|
$outputf[dr.Min.X + int(dx), dr.Min.Y + int(dy), fffftou, p, 1]
|
|
}
|
|
}
|
|
}
|
|
`
|
|
)
|