golang-image/draw/gen.go
Nigel Tao 2c27a34d37 draw: make Scale an Interpolator method instead of a function.
This means that only Kernel values have a NewScaler method, which
re-uses computation when scaling multiple images of the same dst and src
dimensions. The NearestNeighbor and ApproxBiLinear scalers don't get any
pre-computation to re-use, so don't need a NewScaler method just to
satisfy the previous Interpolator interface. As a small bonus, NN.Scale
and ABL.Scale should no longer allocate on the fast paths.

This change is consistent the upcoming Transformer method, so that the
Interpolator interface will be

type Interpolator interface {
	Scale(etc)
	Transform(etc)
}

instead of

type Interpolator interface {
	NewScaler(etc) Scaler
	Transform(etc)
}

I don't have a good theory for why the "func (ablInterpolator)
scale_RGBA_RGBA" benchmark is such a dramatic improvement, but at least
it's in the right direction. I'm calling the other benchmark changes as
noise.

benchmark                     old ns/op      new ns/op      delta
BenchmarkScaleLargeDownNN     3233406        3169060        -1.99%
BenchmarkScaleLargeDownAB     12018178       12011348       -0.06%
BenchmarkScaleLargeDownBL     1420827834     1409335695     -0.81%
BenchmarkScaleLargeDownCR     2820669690     2795534035     -0.89%
BenchmarkScaleDownNN          866628         869241         +0.30%
BenchmarkScaleDownAB          3175963        3216041        +1.26%
BenchmarkScaleDownBL          26639767       26677003       +0.14%
BenchmarkScaleDownCR          51720996       51621628       -0.19%
BenchmarkScaleUpNN            42758485       43258611       +1.17%
BenchmarkScaleUpAB            156693813      156943367      +0.16%
BenchmarkScaleUpBL            69511444       69621698       +0.16%
BenchmarkScaleUpCR            124530191      124885601      +0.29%
BenchmarkScaleSrcGray         8992205        9129321        +1.52%
BenchmarkScaleSrcNRGBA        9807837        9894466        +0.88%
BenchmarkScaleSrcRGBA         1333188        1104282        -17.17%
BenchmarkScaleSrcUniform      1147788        1162488        +1.28%
BenchmarkScaleSrcYCbCr        12164542       12305373       +1.16%

Change-Id: I2aee6c392eb7437e843260775aed97ce145b4d47
Reviewed-on: https://go-review.googlesource.com/6556
Reviewed-by: Rob Pike <r@golang.org>
2015-03-04 22:50:18 +00:00

564 lines
14 KiB
Go

// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bytes"
"flag"
"fmt"
"go/format"
"io/ioutil"
"log"
"os"
"strings"
)
var debug = flag.Bool("debug", false, "")
func main() {
flag.Parse()
w := new(bytes.Buffer)
w.WriteString("// generated by \"go run gen.go\". DO NOT EDIT.\n\n" +
"package draw\n\nimport (\n\"image\"\n\"image/color\"\n)\n")
gen(w, "nnInterpolator", codeNNLeaf)
gen(w, "ablInterpolator", codeABLLeaf)
genKernel(w)
if *debug {
os.Stdout.Write(w.Bytes())
return
}
out, err := format.Source(w.Bytes())
if err != nil {
log.Fatal(err)
}
if err := ioutil.WriteFile("impl.go", out, 0660); err != nil {
log.Fatal(err)
}
}
var (
// dsTypes are the (dst image type, src image type) pairs to generate
// scale_DType_SType implementations for. The last element in the slice
// should be the fallback pair ("Image", "image.Image").
//
// TODO: add *image.CMYK src type after Go 1.5 is released.
dsTypes = []struct{ dType, sType string }{
{"*image.RGBA", "*image.Gray"},
{"*image.RGBA", "*image.NRGBA"},
{"*image.RGBA", "*image.RGBA"},
{"*image.RGBA", "*image.Uniform"},
{"*image.RGBA", "*image.YCbCr"},
{"*image.RGBA", "image.Image"},
{"Image", "image.Image"},
}
dTypes, sTypes []string
sTypesForDType = map[string][]string{}
)
func init() {
dTypesSeen := map[string]bool{}
sTypesSeen := map[string]bool{}
for _, t := range dsTypes {
if !sTypesSeen[t.sType] {
sTypesSeen[t.sType] = true
sTypes = append(sTypes, t.sType)
}
if !dTypesSeen[t.dType] {
dTypesSeen[t.dType] = true
dTypes = append(dTypes, t.dType)
}
sTypesForDType[t.dType] = append(sTypesForDType[t.dType], t.sType)
}
sTypesForDType["anyDType"] = sTypes
}
type data struct {
dType string
sType string
receiver string
}
func gen(w *bytes.Buffer, receiver string, code string) {
expn(w, codeRoot, &data{receiver: receiver})
for _, t := range dsTypes {
expn(w, code, &data{
dType: t.dType,
sType: t.sType,
receiver: receiver,
})
}
}
func genKernel(w *bytes.Buffer) {
expn(w, codeKernelRoot, &data{})
for _, sType := range sTypes {
expn(w, codeKernelLeafX, &data{
sType: sType,
})
}
for _, dType := range dTypes {
expn(w, codeKernelLeafY, &data{
dType: dType,
})
}
}
func expn(w *bytes.Buffer, code string, d *data) {
for _, line := range strings.Split(code, "\n") {
line = expnLine(line, d)
if line == ";" {
continue
}
fmt.Fprintln(w, line)
}
}
func expnLine(line string, d *data) string {
for {
i := strings.IndexByte(line, '$')
if i < 0 {
break
}
prefix, s := line[:i], line[i+1:]
i = len(s)
for j, c := range s {
if !('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z') {
i = j
break
}
}
dollar, suffix := s[:i], s[i:]
e := expnDollar(prefix, dollar, suffix, d)
if e == "" {
log.Fatalf("couldn't expand %q", line)
}
line = e
}
return line
}
func expnDollar(prefix, dollar, suffix string, d *data) string {
switch dollar {
case "dType":
return prefix + d.dType + suffix
case "dTypeRN":
return prefix + relName(d.dType) + suffix
case "sType":
return prefix + d.sType + suffix
case "sTypeRN":
return prefix + relName(d.sType) + suffix
case "receiver":
return prefix + d.receiver + suffix
case "switch":
return expnSwitch("", true, suffix)
case "switchD":
return expnSwitch("", false, suffix)
case "switchS":
return expnSwitch("anyDType", false, suffix)
case "preOuter":
switch d.dType {
default:
return ";"
case "Image":
return "" +
"dstColorRGBA64 := &color.RGBA64{}\n" +
"dstColor := color.Color(dstColorRGBA64)"
}
case "preInner":
switch d.dType {
default:
return ";"
case "*image.RGBA":
return "d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy))"
}
case "preKernelInner":
switch d.dType {
default:
return ";"
case "*image.RGBA":
return "d := dst.PixOffset(dr.Min.X+int(dx), dr.Min.Y+adr.Min.Y)"
}
case "blend":
args, _ := splitArgs(suffix)
if len(args) != 4 {
return ""
}
return fmt.Sprintf(""+
"%sr = %s*%sr + %s*%sr\n"+
"%sg = %s*%sg + %s*%sg\n"+
"%sb = %s*%sb + %s*%sb\n"+
"%sa = %s*%sa + %s*%sa",
args[3], args[0], args[1], args[2], args[3],
args[3], args[0], args[1], args[2], args[3],
args[3], args[0], args[1], args[2], args[3],
args[3], args[0], args[1], args[2], args[3],
)
case "outputu":
args, _ := splitArgs(suffix)
if len(args) != 3 {
return ""
}
switch d.dType {
default:
log.Fatalf("bad dType %q", d.dType)
case "Image":
return fmt.Sprintf(""+
"dstColorRGBA64.R = uint16(%sr)\n"+
"dstColorRGBA64.G = uint16(%sg)\n"+
"dstColorRGBA64.B = uint16(%sb)\n"+
"dstColorRGBA64.A = uint16(%sa)\n"+
"dst.Set(dr.Min.X+int(%s), dr.Min.Y+int(%s), dstColor)",
args[2], args[2], args[2], args[2],
args[0], args[1],
)
case "*image.RGBA":
return fmt.Sprintf(""+
"dst.Pix[d+0] = uint8(uint32(%sr) >> 8)\n"+
"dst.Pix[d+1] = uint8(uint32(%sg) >> 8)\n"+
"dst.Pix[d+2] = uint8(uint32(%sb) >> 8)\n"+
"dst.Pix[d+3] = uint8(uint32(%sa) >> 8)\n"+
"d += 4",
args[2], args[2], args[2], args[2],
)
}
case "outputf":
args, _ := splitArgs(suffix)
if len(args) != 4 {
return ""
}
switch d.dType {
default:
log.Fatalf("bad dType %q", d.dType)
case "Image":
return fmt.Sprintf(""+
"dstColorRGBA64.R = ftou(%sr * %s)\n"+
"dstColorRGBA64.G = ftou(%sg * %s)\n"+
"dstColorRGBA64.B = ftou(%sb * %s)\n"+
"dstColorRGBA64.A = ftou(%sa * %s)\n"+
"dst.Set(dr.Min.X+int(%s), dr.Min.Y+int(%s), dstColor)",
args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3],
args[0], args[1],
)
case "*image.RGBA":
return fmt.Sprintf(""+
"dst.Pix[d+0] = uint8(ftou(%sr * %s) >> 8)\n"+
"dst.Pix[d+1] = uint8(ftou(%sg * %s) >> 8)\n"+
"dst.Pix[d+2] = uint8(ftou(%sb * %s) >> 8)\n"+
"dst.Pix[d+3] = uint8(ftou(%sa * %s) >> 8)\n"+
"d += dst.Stride",
args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3],
)
}
case "srcf", "srcu":
lhs, eqOp := splitEq(prefix)
if lhs == "" {
return ""
}
args, extra := splitArgs(suffix)
if len(args) != 2 {
return ""
}
tmp := ""
if dollar == "srcf" {
tmp = "u"
}
buf := new(bytes.Buffer)
switch d.sType {
default:
log.Fatalf("bad sType %q", d.sType)
case "image.Image", "*image.Gray", "*image.NRGBA", "*image.Uniform", "*image.YCbCr": // TODO: separate code for concrete types.
fmt.Fprintf(buf, "%sr%s, %sg%s, %sb%s, %sa%s := "+
"src.At(sr.Min.X + int(%s), sr.Min.Y+int(%s)).RGBA()\n",
lhs, tmp, lhs, tmp, lhs, tmp, lhs, tmp,
args[0], args[1],
)
case "*image.RGBA":
// TODO: there's no need to multiply by 0x101 if the next thing
// we're going to do is shift right by 8.
fmt.Fprintf(buf, "%si := src.PixOffset(sr.Min.X + int(%s), sr.Min.Y+int(%s))\n"+
"%sr%s := uint32(src.Pix[%si+0]) * 0x101\n"+
"%sg%s := uint32(src.Pix[%si+1]) * 0x101\n"+
"%sb%s := uint32(src.Pix[%si+2]) * 0x101\n"+
"%sa%s := uint32(src.Pix[%si+3]) * 0x101\n",
lhs, args[0], args[1],
lhs, tmp, lhs,
lhs, tmp, lhs,
lhs, tmp, lhs,
lhs, tmp, lhs,
)
}
if dollar == "srcf" {
fmt.Fprintf(buf, ""+
"%sr %s float64(%sru)%s\n"+
"%sg %s float64(%sgu)%s\n"+
"%sb %s float64(%sbu)%s\n"+
"%sa %s float64(%sau)%s\n",
lhs, eqOp, lhs, extra,
lhs, eqOp, lhs, extra,
lhs, eqOp, lhs, extra,
lhs, eqOp, lhs, extra,
)
}
return strings.TrimSpace(buf.String())
case "tweakDy":
if d.dType == "*image.RGBA" {
return strings.Replace(suffix, "for dy, s", "for _, s", 1)
}
return suffix
}
return ""
}
func expnSwitch(dType string, expandBoth bool, template string) string {
switchVar := "dst"
if dType != "" {
switchVar = "src"
}
lines := []string{fmt.Sprintf("switch %s := %s.(type) {", switchVar, switchVar)}
fallback, values := "Image", dTypes
if dType != "" {
fallback, values = "image.Image", sTypesForDType[dType]
}
for _, v := range values {
if v == fallback {
lines = append(lines, "default:")
} else {
lines = append(lines, fmt.Sprintf("case %s:", v))
}
if dType != "" {
lines = append(lines, expnLine(template, &data{dType: dType, sType: v}))
} else if !expandBoth {
lines = append(lines, expnLine(template, &data{dType: v}))
} else {
lines = append(lines, expnSwitch(v, false, template))
}
}
lines = append(lines, "}")
return strings.Join(lines, "\n")
}
func split(s, sep string) (string, string) {
if i := strings.Index(s, sep); i >= 0 {
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+len(sep):])
}
return "", ""
}
func splitEq(s string) (lhs, eqOp string) {
s = strings.TrimSpace(s)
if lhs, _ = split(s, ":="); lhs != "" {
return lhs, ":="
}
if lhs, _ = split(s, "+="); lhs != "" {
return lhs, "+="
}
return "", ""
}
func splitArgs(s string) (args []string, extra string) {
s = strings.TrimSpace(s)
if s == "" || s[0] != '[' {
return nil, ""
}
s = s[1:]
i := strings.IndexByte(s, ']')
if i < 0 {
return nil, ""
}
args, extra = strings.Split(s[:i], ","), s[i+1:]
for i := range args {
args[i] = strings.TrimSpace(args[i])
}
return args, extra
}
func relName(s string) string {
if i := strings.LastIndex(s, "."); i >= 0 {
return s[i+1:]
}
return s
}
const (
codeRoot = `
func (z $receiver) Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle) {
// adr is the affected destination pixels, relative to dr.Min.
adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
if adr.Empty() || sr.Empty() {
return
}
// sr is the source pixels. If it extends beyond the src bounds,
// we cannot use the type-specific fast paths, as they access
// the Pix fields directly without bounds checking.
if !sr.In(src.Bounds()) {
z.scale_Image_Image(dst, dr, adr, src, sr)
} else {
$switch z.scale_$dTypeRN_$sTypeRN(dst, dr, adr, src, sr)
}
}
`
codeNNLeaf = `
func (nnInterpolator) scale_$dTypeRN_$sTypeRN(dst $dType, dr, adr image.Rectangle, src $sType, sr image.Rectangle) {
dw2 := uint64(dr.Dx()) * 2
dh2 := uint64(dr.Dy()) * 2
sw := uint64(sr.Dx())
sh := uint64(sr.Dy())
$preOuter
for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
sy := (2*uint64(dy) + 1) * sh / dh2
$preInner
for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
sx := (2*uint64(dx) + 1) * sw / dw2
p := $srcu[sx, sy]
$outputu[dx, dy, p]
}
}
}
`
codeABLLeaf = `
func (ablInterpolator) scale_$dTypeRN_$sTypeRN(dst $dType, dr, adr image.Rectangle, src $sType, sr image.Rectangle) {
sw := int32(sr.Dx())
sh := int32(sr.Dy())
yscale := float64(sh) / float64(dr.Dy())
xscale := float64(sw) / float64(dr.Dx())
$preOuter
for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
sy := (float64(dy)+0.5)*yscale - 0.5
sy0 := int32(sy)
yFrac0 := sy - float64(sy0)
yFrac1 := 1 - yFrac0
sy1 := sy0 + 1
if sy < 0 {
sy0, sy1 = 0, 0
yFrac0, yFrac1 = 0, 1
} else if sy1 >= sh {
sy1 = sy0
yFrac0, yFrac1 = 1, 0
}
$preInner
for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
sx := (float64(dx)+0.5)*xscale - 0.5
sx0 := int32(sx)
xFrac0 := sx - float64(sx0)
xFrac1 := 1 - xFrac0
sx1 := sx0 + 1
if sx < 0 {
sx0, sx1 = 0, 0
xFrac0, xFrac1 = 0, 1
} else if sx1 >= sw {
sx1 = sx0
xFrac0, xFrac1 = 1, 0
}
s00 := $srcf[sx0, sy0]
s10 := $srcf[sx1, sy0]
$blend[xFrac1, s00, xFrac0, s10]
s01 := $srcf[sx0, sy1]
s11 := $srcf[sx1, sy1]
$blend[xFrac1, s01, xFrac0, s11]
$blend[yFrac1, s10, yFrac0, s11]
$outputu[dx, dy, s11]
}
}
}
`
codeKernelRoot = `
func (z *kernelScaler) Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle) {
if z.dw != int32(dr.Dx()) || z.dh != int32(dr.Dy()) || z.sw != int32(sr.Dx()) || z.sh != int32(sr.Dy()) {
z.kernel.Scale(dst, dr, src, sr)
return
}
// adr is the affected destination pixels, relative to dr.Min.
adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
if adr.Empty() || sr.Empty() {
return
}
// Create a temporary buffer:
// scaleX distributes the source image's columns over the temporary image.
// scaleY distributes the temporary image's rows over the destination image.
// TODO: is it worth having a sync.Pool for this temporary buffer?
tmp := make([][4]float64, z.dw*z.sh)
// sr is the source pixels. If it extends beyond the src bounds,
// we cannot use the type-specific fast paths, as they access
// the Pix fields directly without bounds checking.
if !sr.In(src.Bounds()) {
z.scaleX_Image(tmp, src, sr)
} else {
$switchS z.scaleX_$sTypeRN(tmp, src, sr)
}
$switchD z.scaleY_$dTypeRN(dst, dr, adr, tmp)
}
`
codeKernelLeafX = `
func (z *kernelScaler) scaleX_$sTypeRN(tmp [][4]float64, src $sType, sr image.Rectangle) {
t := 0
for y := int32(0); y < z.sh; y++ {
for _, s := range z.horizontal.sources {
var pr, pg, pb, pa float64
for _, c := range z.horizontal.contribs[s.i:s.j] {
p += $srcf[c.coord, y] * c.weight
}
tmp[t] = [4]float64{
pr * s.invTotalWeightFFFF,
pg * s.invTotalWeightFFFF,
pb * s.invTotalWeightFFFF,
pa * s.invTotalWeightFFFF,
}
t++
}
}
}
`
codeKernelLeafY = `
func (z *kernelScaler) scaleY_$dTypeRN(dst $dType, dr, adr image.Rectangle, tmp [][4]float64) {
$preOuter
for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
$preKernelInner
$tweakDy for dy, s := range z.vertical.sources[adr.Min.Y:adr.Max.Y] {
var pr, pg, pb, pa float64
for _, c := range z.vertical.contribs[s.i:s.j] {
p := &tmp[c.coord*z.dw+dx]
pr += p[0] * c.weight
pg += p[1] * c.weight
pb += p[2] * c.weight
pa += p[3] * c.weight
}
$outputf[dx, adr.Min.Y+dy, p, s.invTotalWeight]
}
}
}
`
)