golang-image/draw/gen.go
Nigel Tao db892dd957 draw: clip scaling to the dst bounds.
This is necessary for the upcoming RGBA dst fast path. The RGBA.Set slow
path will clip automatically. Accessing RGBA.Pix directly will not.

Benchmarks look like noise to me:
benchmark                     old ns/op      new ns/op      delta
BenchmarkScaleLargeDownNN     6212108        6131166        -1.30%
BenchmarkScaleLargeDownAB     15586042       15656681       +0.45%
BenchmarkScaleLargeDownBL     1518783517     1508124217     -0.70%
BenchmarkScaleLargeDownCR     2998969089     2978114154     -0.70%
BenchmarkScaleDownNN          1821187        1809314        -0.65%
BenchmarkScaleDownAB          4286983        4248974        -0.89%
BenchmarkScaleDownBL          29396818       30181926       +2.67%
BenchmarkScaleDownCR          56441945       57952417       +2.68%
BenchmarkScaleUpNN            90325384       89734496       -0.65%
BenchmarkScaleUpAB            211613922      211625435      +0.01%
BenchmarkScaleUpBL            119730880      120817135      +0.91%
BenchmarkScaleUpCR            178592665      182305702      +2.08%
BenchmarkScaleSrcNRGBA        13271034       13210760       -0.45%
BenchmarkScaleSrcRGBA         13082234       12997551       -0.65%
BenchmarkScaleSrcUniform      4003966        3934184        -1.74%
BenchmarkScaleSrcYCbCr        15939182       15900123       -0.25%

Change-Id: Ibf2843bb3c4eb695b58030e7314053c669533016
Reviewed-on: https://go-review.googlesource.com/6073
Reviewed-by: Rob Pike <r@golang.org>
2015-02-26 22:56:31 +00:00

477 lines
12 KiB
Go

// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bytes"
"flag"
"fmt"
"go/format"
"io/ioutil"
"log"
"os"
"strings"
)
var debug = flag.Bool("debug", false, "")
func main() {
flag.Parse()
w := new(bytes.Buffer)
w.WriteString("// generated by \"go run gen.go\". DO NOT EDIT.\n\n" +
"package draw\n\nimport (\n\"image\"\n\"image/color\"\n)\n")
gen(w, "nnScaler", codeNNLeaf)
gen(w, "ablScaler", codeABLLeaf)
genKernel(w)
if *debug {
os.Stdout.Write(w.Bytes())
return
}
out, err := format.Source(w.Bytes())
if err != nil {
log.Fatal(err)
}
if err := ioutil.WriteFile("impl.go", out, 0660); err != nil {
log.Fatal(err)
}
}
var (
// dsTypes are the (dst image type, src image type) pairs to generate
// scale_DType_SType implementations for. The last element in the slice
// should be the fallback pair ("Image", "image.Image").
//
// TODO: add *image.CMYK src type after Go 1.5 is released.
dsTypes = []struct{ dType, sType string }{
{"*image.RGBA", "*image.NRGBA"},
{"*image.RGBA", "*image.RGBA"},
{"*image.RGBA", "*image.Uniform"},
{"*image.RGBA", "*image.YCbCr"},
{"*image.RGBA", "image.Image"},
{"Image", "image.Image"},
}
dTypes, sTypes []string
sTypesForDType = map[string][]string{}
)
func init() {
dTypesSeen := map[string]bool{}
sTypesSeen := map[string]bool{}
for _, t := range dsTypes {
if !sTypesSeen[t.sType] {
sTypesSeen[t.sType] = true
sTypes = append(sTypes, t.sType)
}
if !dTypesSeen[t.dType] {
dTypesSeen[t.dType] = true
dTypes = append(dTypes, t.dType)
}
sTypesForDType[t.dType] = append(sTypesForDType[t.dType], t.sType)
}
sTypesForDType["anyDType"] = sTypes
}
type data struct {
dType string
sType string
receiver string
}
func gen(w *bytes.Buffer, receiver string, code string) {
expn(w, codeRoot, &data{receiver: receiver})
for _, t := range dsTypes {
expn(w, code, &data{
dType: t.dType,
sType: t.sType,
receiver: receiver,
})
}
}
func genKernel(w *bytes.Buffer) {
expn(w, codeKernelRoot, &data{})
for _, sType := range sTypes {
expn(w, codeKernelLeafX, &data{
sType: sType,
})
}
for _, dType := range dTypes {
expn(w, codeKernelLeafY, &data{
dType: dType,
})
}
}
func expn(w *bytes.Buffer, code string, d *data) {
for _, line := range strings.Split(code, "\n") {
fmt.Fprintln(w, expnLine(line, d))
}
}
func expnLine(line string, d *data) string {
for {
i := strings.IndexByte(line, '$')
if i < 0 {
break
}
prefix, s := line[:i], line[i+1:]
i = len(s)
for j, c := range s {
if !('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z') {
i = j
break
}
}
dollar, suffix := s[:i], s[i:]
e := expnDollar(prefix, dollar, suffix, d)
if e == "" {
log.Fatalf("couldn't expand %q", line)
}
line = e
}
return line
}
func expnDollar(prefix, dollar, suffix string, d *data) string {
switch dollar {
case "dType":
return prefix + d.dType + suffix
case "dTypeRN":
return prefix + relName(d.dType) + suffix
case "sType":
return prefix + d.sType + suffix
case "sTypeRN":
return prefix + relName(d.sType) + suffix
case "receiver":
return prefix + d.receiver + suffix
case "switch":
return expnSwitch("", true, suffix)
case "switchD":
return expnSwitch("", false, suffix)
case "switchS":
return expnSwitch("anyDType", false, suffix)
case "dstColorDecl":
if d.dType == "Image" || d.dType == "*image.RGBA" { // TODO: separate code for concrete types.
return "dstColorRGBA64 := &color.RGBA64{}\n" +
"dstColor := color.Color(dstColorRGBA64)"
}
return ";"
case "blend":
args, _ := splitArgs(suffix)
if len(args) != 4 {
return ""
}
return fmt.Sprintf(""+
"%sr = %s*%sr + %s*%sr\n"+
"%sg = %s*%sg + %s*%sg\n"+
"%sb = %s*%sb + %s*%sb\n"+
"%sa = %s*%sa + %s*%sa",
args[3], args[0], args[1], args[2], args[3],
args[3], args[0], args[1], args[2], args[3],
args[3], args[0], args[1], args[2], args[3],
args[3], args[0], args[1], args[2], args[3],
)
case "outputu":
args, _ := splitArgs(suffix)
if len(args) != 3 {
return ""
}
switch d.dType {
default:
log.Fatalf("bad dType %q", d.dType)
case "Image", "*image.RGBA": // TODO: separate code for concrete types.
return fmt.Sprintf(""+
"dstColorRGBA64.R = uint16(%sr)\n"+
"dstColorRGBA64.G = uint16(%sg)\n"+
"dstColorRGBA64.B = uint16(%sb)\n"+
"dstColorRGBA64.A = uint16(%sa)\n"+
"dst.Set(dp.X+int(%s), dp.Y+int(%s), dstColor)",
args[2], args[2], args[2], args[2],
args[0], args[1],
)
}
case "outputf":
args, _ := splitArgs(suffix)
if len(args) != 4 {
return ""
}
switch d.dType {
default:
log.Fatalf("bad dType %q", d.dType)
case "Image", "*image.RGBA": // TODO: separate code for concrete types.
return fmt.Sprintf(""+
"dstColorRGBA64.R = ftou(%sr * %s)\n"+
"dstColorRGBA64.G = ftou(%sg * %s)\n"+
"dstColorRGBA64.B = ftou(%sb * %s)\n"+
"dstColorRGBA64.A = ftou(%sa * %s)\n"+
"dst.Set(dp.X+int(%s), dp.Y+int(%s), dstColor)",
args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3],
args[0], args[1],
)
}
case "srcf", "srcu":
lhs, eqOp := splitEq(prefix)
if lhs == "" {
return ""
}
args, extra := splitArgs(suffix)
if len(args) != 2 {
return ""
}
tmp := ""
if dollar == "srcf" {
tmp = "u"
}
buf := new(bytes.Buffer)
switch d.sType {
default:
log.Fatalf("bad sType %q", d.sType)
case "image.Image", "*image.NRGBA", "*image.RGBA", "*image.Uniform", "*image.YCbCr": // TODO: separate code for concrete types.
fmt.Fprintf(buf, "%sr%s, %sg%s, %sb%s, %sa%s := "+
"src.At(sp.X + int(%s), sp.Y+int(%s)).RGBA()\n",
lhs, tmp, lhs, tmp, lhs, tmp, lhs, tmp, args[0], args[1])
}
if dollar == "srcf" {
fmt.Fprintf(buf, ""+
"%sr %s float64(%sru)%s\n"+
"%sg %s float64(%sgu)%s\n"+
"%sb %s float64(%sbu)%s\n"+
"%sa %s float64(%sau)%s\n",
lhs, eqOp, lhs, extra,
lhs, eqOp, lhs, extra,
lhs, eqOp, lhs, extra,
lhs, eqOp, lhs, extra,
)
}
return strings.TrimSpace(buf.String())
}
return ""
}
func expnSwitch(dType string, expandBoth bool, template string) string {
switchVar := "dst"
if dType != "" {
switchVar = "src"
}
lines := []string{fmt.Sprintf("switch %s := %s.(type) {", switchVar, switchVar)}
fallback, values := "Image", dTypes
if dType != "" {
fallback, values = "image.Image", sTypesForDType[dType]
}
for _, v := range values {
if v == fallback {
lines = append(lines, "default:")
} else {
lines = append(lines, fmt.Sprintf("case %s:", v))
}
if dType != "" {
lines = append(lines, expnLine(template, &data{dType: dType, sType: v}))
} else if !expandBoth {
lines = append(lines, expnLine(template, &data{dType: v}))
} else {
lines = append(lines, expnSwitch(v, false, template))
}
}
lines = append(lines, "}")
return strings.Join(lines, "\n")
}
func split(s, sep string) (string, string) {
if i := strings.Index(s, sep); i >= 0 {
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+len(sep):])
}
return "", ""
}
func splitEq(s string) (lhs, eqOp string) {
s = strings.TrimSpace(s)
if lhs, _ = split(s, ":="); lhs != "" {
return lhs, ":="
}
if lhs, _ = split(s, "+="); lhs != "" {
return lhs, "+="
}
return "", ""
}
func splitArgs(s string) (args []string, extra string) {
s = strings.TrimSpace(s)
if s == "" || s[0] != '[' {
return nil, ""
}
s = s[1:]
i := strings.IndexByte(s, ']')
if i < 0 {
return nil, ""
}
args, extra = strings.Split(s[:i], ","), s[i+1:]
for i := range args {
args[i] = strings.TrimSpace(args[i])
}
return args, extra
}
func relName(s string) string {
if i := strings.LastIndex(s, "."); i >= 0 {
return s[i+1:]
}
return s
}
const (
codeRoot = `
func (z *$receiver) Scale(dst Image, dp image.Point, src image.Image, sp image.Point) {
if z.dw <= 0 || z.dh <= 0 || z.sw <= 0 || z.sh <= 0 {
return
}
// dr is the affected destination pixels, relative to dp.
dr := dst.Bounds().Sub(dp).Intersect(image.Rectangle{Max: image.Point{int(z.dw), int(z.dh)}})
if dr.Empty() {
return
}
$switch z.scale_$dTypeRN_$sTypeRN(dst, dp, dr, src, sp)
}
`
codeNNLeaf = `
func (z *nnScaler) scale_$dTypeRN_$sTypeRN(dst $dType, dp image.Point, dr image.Rectangle, src $sType, sp image.Point) {
$dstColorDecl
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
p := $srcu[sx, sy]
$outputu[dx, dy, p]
}
}
}
`
codeABLLeaf = `
func (z *ablScaler) scale_$dTypeRN_$sTypeRN(dst $dType, dp image.Point, dr image.Rectangle, src $sType, sp image.Point) {
yscale := float64(z.sh) / float64(z.dh)
xscale := float64(z.sw) / float64(z.dw)
$dstColorDecl
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (float64(dy)+0.5)*yscale - 0.5
sy0 := int32(sy)
yFrac0 := sy - float64(sy0)
yFrac1 := 1 - yFrac0
sy1 := sy0 + 1
if sy < 0 {
sy0, sy1 = 0, 0
yFrac0, yFrac1 = 0, 1
} else if sy1 >= z.sh {
sy1 = sy0
yFrac0, yFrac1 = 1, 0
}
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (float64(dx)+0.5)*xscale - 0.5
sx0 := int32(sx)
xFrac0 := sx - float64(sx0)
xFrac1 := 1 - xFrac0
sx1 := sx0 + 1
if sx < 0 {
sx0, sx1 = 0, 0
xFrac0, xFrac1 = 0, 1
} else if sx1 >= z.sw {
sx1 = sx0
xFrac0, xFrac1 = 1, 0
}
s00 := $srcf[sx0, sy0]
s10 := $srcf[sx1, sy0]
$blend[xFrac1, s00, xFrac0, s10]
s01 := $srcf[sx0, sy1]
s11 := $srcf[sx1, sy1]
$blend[xFrac1, s01, xFrac0, s11]
$blend[yFrac1, s10, yFrac0, s11]
$outputu[dx, dy, s11]
}
}
}
`
codeKernelRoot = `
func (z *kernelScaler) Scale(dst Image, dp image.Point, src image.Image, sp image.Point) {
if z.dw <= 0 || z.dh <= 0 || z.sw <= 0 || z.sh <= 0 {
return
}
// dr is the affected destination pixels, relative to dp.
dr := dst.Bounds().Sub(dp).Intersect(image.Rectangle{Max: image.Point{int(z.dw), int(z.dh)}})
if dr.Empty() {
return
}
// Create a temporary buffer:
// scaleX distributes the source image's columns over the temporary image.
// scaleY distributes the temporary image's rows over the destination image.
// TODO: is it worth having a sync.Pool for this temporary buffer?
tmp := make([][4]float64, z.dw*z.sh)
$switchS z.scaleX_$sTypeRN(tmp, src, sp)
$switchD z.scaleY_$dTypeRN(dst, dp, dr, tmp)
}
`
codeKernelLeafX = `
func (z *kernelScaler) scaleX_$sTypeRN(tmp [][4]float64, src $sType, sp image.Point) {
t := 0
for y := int32(0); y < z.sh; y++ {
for _, s := range z.horizontal.sources {
var pr, pg, pb, pa float64
for _, c := range z.horizontal.contribs[s.i:s.j] {
p += $srcf[c.coord, y] * c.weight
}
tmp[t] = [4]float64{
pr * s.invTotalWeightFFFF,
pg * s.invTotalWeightFFFF,
pb * s.invTotalWeightFFFF,
pa * s.invTotalWeightFFFF,
}
t++
}
}
}
`
codeKernelLeafY = `
func (z *kernelScaler) scaleY_$dTypeRN(dst $dType, dp image.Point, dr image.Rectangle, tmp [][4]float64) {
$dstColorDecl
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
for dy, s := range z.vertical.sources[dr.Min.Y:dr.Max.Y] {
var pr, pg, pb, pa float64
for _, c := range z.vertical.contribs[s.i:s.j] {
p := &tmp[c.coord*z.dw+dx]
pr += p[0] * c.weight
pg += p[1] * c.weight
pb += p[2] * c.weight
pa += p[3] * c.weight
}
$outputf[dx, dr.Min.Y+dy, p, s.invTotalWeight]
}
}
}
`
)