2f47ec36fb
benchmark old ns/op new ns/op delta BenchmarkScaleSrcGray 9296680 552705 -94.05% BenchmarkTformABSrcGray 6323894 817986 -87.07% BenchmarkTformCRSrcGray 39229583 4193194 -89.31% Change-Id: Ie7d43dfe323d49b245b47c3206b5aad2b50cb7fb Reviewed-on: https://go-review.googlesource.com/7711 Reviewed-by: Rob Pike <r@golang.org>
918 lines
24 KiB
Go
918 lines
24 KiB
Go
// Copyright 2015 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// +build ignore
|
|
|
|
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"flag"
|
|
"fmt"
|
|
"go/format"
|
|
"io/ioutil"
|
|
"log"
|
|
"os"
|
|
"strings"
|
|
)
|
|
|
|
var debug = flag.Bool("debug", false, "")
|
|
|
|
func main() {
|
|
flag.Parse()
|
|
|
|
w := new(bytes.Buffer)
|
|
w.WriteString("// generated by \"go run gen.go\". DO NOT EDIT.\n\n" +
|
|
"package draw\n\nimport (\n" +
|
|
"\"image\"\n" +
|
|
"\"image/color\"\n" +
|
|
"\"math\"\n" +
|
|
"\n" +
|
|
"\"golang.org/x/image/math/f64\"\n" +
|
|
")\n")
|
|
|
|
gen(w, "nnInterpolator", codeNNScaleLeaf, codeNNTransformLeaf)
|
|
gen(w, "ablInterpolator", codeABLScaleLeaf, codeABLTransformLeaf)
|
|
genKernel(w)
|
|
|
|
if *debug {
|
|
os.Stdout.Write(w.Bytes())
|
|
return
|
|
}
|
|
out, err := format.Source(w.Bytes())
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
if err := ioutil.WriteFile("impl.go", out, 0660); err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
}
|
|
|
|
var (
|
|
// dsTypes are the (dst image type, src image type) pairs to generate
|
|
// scale_DType_SType implementations for. The last element in the slice
|
|
// should be the fallback pair ("Image", "image.Image").
|
|
//
|
|
// TODO: add *image.CMYK src type after Go 1.5 is released.
|
|
dsTypes = []struct{ dType, sType string }{
|
|
{"*image.RGBA", "*image.Gray"},
|
|
{"*image.RGBA", "*image.NRGBA"},
|
|
{"*image.RGBA", "*image.RGBA"},
|
|
{"*image.RGBA", "*image.Uniform"},
|
|
{"*image.RGBA", "*image.YCbCr"},
|
|
{"*image.RGBA", "image.Image"},
|
|
{"Image", "image.Image"},
|
|
}
|
|
dTypes, sTypes []string
|
|
sTypesForDType = map[string][]string{}
|
|
)
|
|
|
|
func init() {
|
|
dTypesSeen := map[string]bool{}
|
|
sTypesSeen := map[string]bool{}
|
|
for _, t := range dsTypes {
|
|
if !sTypesSeen[t.sType] {
|
|
sTypesSeen[t.sType] = true
|
|
sTypes = append(sTypes, t.sType)
|
|
}
|
|
if !dTypesSeen[t.dType] {
|
|
dTypesSeen[t.dType] = true
|
|
dTypes = append(dTypes, t.dType)
|
|
}
|
|
sTypesForDType[t.dType] = append(sTypesForDType[t.dType], t.sType)
|
|
}
|
|
sTypesForDType["anyDType"] = sTypes
|
|
}
|
|
|
|
type data struct {
|
|
dType string
|
|
sType string
|
|
receiver string
|
|
}
|
|
|
|
func gen(w *bytes.Buffer, receiver string, codes ...string) {
|
|
expn(w, codeRoot, &data{receiver: receiver})
|
|
for _, code := range codes {
|
|
for _, t := range dsTypes {
|
|
expn(w, code, &data{
|
|
dType: t.dType,
|
|
sType: t.sType,
|
|
receiver: receiver,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func genKernel(w *bytes.Buffer) {
|
|
expn(w, codeKernelRoot, &data{})
|
|
for _, sType := range sTypes {
|
|
expn(w, codeKernelScaleLeafX, &data{
|
|
sType: sType,
|
|
})
|
|
}
|
|
for _, dType := range dTypes {
|
|
expn(w, codeKernelScaleLeafY, &data{
|
|
dType: dType,
|
|
})
|
|
}
|
|
for _, t := range dsTypes {
|
|
expn(w, codeKernelTransformLeaf, &data{
|
|
dType: t.dType,
|
|
sType: t.sType,
|
|
})
|
|
}
|
|
}
|
|
|
|
func expn(w *bytes.Buffer, code string, d *data) {
|
|
for _, line := range strings.Split(code, "\n") {
|
|
line = expnLine(line, d)
|
|
if line == ";" {
|
|
continue
|
|
}
|
|
fmt.Fprintln(w, line)
|
|
}
|
|
}
|
|
|
|
func expnLine(line string, d *data) string {
|
|
for {
|
|
i := strings.IndexByte(line, '$')
|
|
if i < 0 {
|
|
break
|
|
}
|
|
prefix, s := line[:i], line[i+1:]
|
|
|
|
i = len(s)
|
|
for j, c := range s {
|
|
if !('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z') {
|
|
i = j
|
|
break
|
|
}
|
|
}
|
|
dollar, suffix := s[:i], s[i:]
|
|
|
|
e := expnDollar(prefix, dollar, suffix, d)
|
|
if e == "" {
|
|
log.Fatalf("couldn't expand %q", line)
|
|
}
|
|
line = e
|
|
}
|
|
return line
|
|
}
|
|
|
|
// expnDollar expands a "$foo" fragment in a line of generated code. It returns
|
|
// the empty string if there was a problem. It returns ";" if the generated
|
|
// code is a no-op.
|
|
func expnDollar(prefix, dollar, suffix string, d *data) string {
|
|
switch dollar {
|
|
case "dType":
|
|
return prefix + d.dType + suffix
|
|
case "dTypeRN":
|
|
return prefix + relName(d.dType) + suffix
|
|
case "sType":
|
|
return prefix + d.sType + suffix
|
|
case "sTypeRN":
|
|
return prefix + relName(d.sType) + suffix
|
|
case "receiver":
|
|
return prefix + d.receiver + suffix
|
|
|
|
case "switch":
|
|
return expnSwitch("", true, suffix)
|
|
case "switchD":
|
|
return expnSwitch("", false, suffix)
|
|
case "switchS":
|
|
return expnSwitch("anyDType", false, suffix)
|
|
|
|
case "preOuter":
|
|
switch d.dType {
|
|
default:
|
|
return ";"
|
|
case "Image":
|
|
return "" +
|
|
"dstColorRGBA64 := &color.RGBA64{}\n" +
|
|
"dstColor := color.Color(dstColorRGBA64)"
|
|
}
|
|
|
|
case "preInner":
|
|
switch d.dType {
|
|
default:
|
|
return ";"
|
|
case "*image.Gray", "*image.RGBA":
|
|
return "d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy))"
|
|
}
|
|
|
|
case "preKernelInner":
|
|
switch d.dType {
|
|
default:
|
|
return ";"
|
|
case "*image.Gray", "*image.RGBA":
|
|
return "d := dst.PixOffset(dr.Min.X+int(dx), dr.Min.Y+adr.Min.Y)"
|
|
}
|
|
|
|
case "blend":
|
|
args, _ := splitArgs(suffix)
|
|
if len(args) != 4 {
|
|
return ""
|
|
}
|
|
switch d.sType {
|
|
default:
|
|
return fmt.Sprintf(""+
|
|
"%sr = %s*%sr + %s*%sr\n"+
|
|
"%sg = %s*%sg + %s*%sg\n"+
|
|
"%sb = %s*%sb + %s*%sb\n"+
|
|
"%sa = %s*%sa + %s*%sa",
|
|
args[3], args[0], args[1], args[2], args[3],
|
|
args[3], args[0], args[1], args[2], args[3],
|
|
args[3], args[0], args[1], args[2], args[3],
|
|
args[3], args[0], args[1], args[2], args[3],
|
|
)
|
|
case "*image.Gray":
|
|
return fmt.Sprintf(""+
|
|
"%sr = %s*%sr + %s*%sr",
|
|
args[3], args[0], args[1], args[2], args[3],
|
|
)
|
|
}
|
|
|
|
case "outputu":
|
|
args, _ := splitArgs(suffix)
|
|
if len(args) != 3 {
|
|
return ""
|
|
}
|
|
switch d.dType {
|
|
default:
|
|
log.Fatalf("bad dType %q", d.dType)
|
|
case "Image":
|
|
switch d.sType {
|
|
default:
|
|
return fmt.Sprintf(""+
|
|
"dstColorRGBA64.R = uint16(%sr)\n"+
|
|
"dstColorRGBA64.G = uint16(%sg)\n"+
|
|
"dstColorRGBA64.B = uint16(%sb)\n"+
|
|
"dstColorRGBA64.A = uint16(%sa)\n"+
|
|
"dst.Set(%s, %s, dstColor)",
|
|
args[2], args[2], args[2], args[2],
|
|
args[0], args[1],
|
|
)
|
|
case "*image.Gray":
|
|
return fmt.Sprintf(""+
|
|
"out := uint16(%sr)\n"+
|
|
"dstColorRGBA64.R = out\n"+
|
|
"dstColorRGBA64.G = out\n"+
|
|
"dstColorRGBA64.B = out\n"+
|
|
"dstColorRGBA64.A = 0xffff\n"+
|
|
"dst.Set(%s, %s, dstColor)",
|
|
args[2],
|
|
args[0], args[1],
|
|
)
|
|
}
|
|
case "*image.RGBA":
|
|
switch d.sType {
|
|
default:
|
|
return fmt.Sprintf(""+
|
|
"dst.Pix[d+0] = uint8(uint32(%sr) >> 8)\n"+
|
|
"dst.Pix[d+1] = uint8(uint32(%sg) >> 8)\n"+
|
|
"dst.Pix[d+2] = uint8(uint32(%sb) >> 8)\n"+
|
|
"dst.Pix[d+3] = uint8(uint32(%sa) >> 8)",
|
|
args[2], args[2], args[2], args[2],
|
|
)
|
|
case "*image.Gray":
|
|
return fmt.Sprintf(""+
|
|
"out := uint8(uint32(%sr) >> 8)\n"+
|
|
"dst.Pix[d+0] = out\n"+
|
|
"dst.Pix[d+1] = out\n"+
|
|
"dst.Pix[d+2] = out\n"+
|
|
"dst.Pix[d+3] = 0xff",
|
|
args[2],
|
|
)
|
|
}
|
|
}
|
|
|
|
case "outputf":
|
|
args, _ := splitArgs(suffix)
|
|
if len(args) != 5 {
|
|
return ""
|
|
}
|
|
ret := ""
|
|
switch d.dType {
|
|
default:
|
|
log.Fatalf("bad dType %q", d.dType)
|
|
case "Image":
|
|
switch d.sType {
|
|
default:
|
|
ret = fmt.Sprintf(""+
|
|
"dstColorRGBA64.R = %s(%sr * %s)\n"+
|
|
"dstColorRGBA64.G = %s(%sg * %s)\n"+
|
|
"dstColorRGBA64.B = %s(%sb * %s)\n"+
|
|
"dstColorRGBA64.A = %s(%sa * %s)\n"+
|
|
"dst.Set(%s, %s, dstColor)",
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
args[0], args[1],
|
|
)
|
|
case "*image.Gray":
|
|
ret = fmt.Sprintf(""+
|
|
"out := %s(%sr * %s)\n"+
|
|
"dstColorRGBA64.R = out\n"+
|
|
"dstColorRGBA64.G = out\n"+
|
|
"dstColorRGBA64.B = out\n"+
|
|
"dstColorRGBA64.A = 0xffff\n"+
|
|
"dst.Set(%s, %s, dstColor)",
|
|
args[2], args[3], args[4],
|
|
args[0], args[1],
|
|
)
|
|
}
|
|
case "*image.RGBA":
|
|
switch d.sType {
|
|
default:
|
|
ret = fmt.Sprintf(""+
|
|
"dst.Pix[d+0] = uint8(%s(%sr * %s) >> 8)\n"+
|
|
"dst.Pix[d+1] = uint8(%s(%sg * %s) >> 8)\n"+
|
|
"dst.Pix[d+2] = uint8(%s(%sb * %s) >> 8)\n"+
|
|
"dst.Pix[d+3] = uint8(%s(%sa * %s) >> 8)",
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
args[2], args[3], args[4],
|
|
)
|
|
case "*image.Gray":
|
|
ret = fmt.Sprintf(""+
|
|
"out := uint8(%s(%sr * %s) >> 8)\n"+
|
|
"dst.Pix[d+0] = out\n"+
|
|
"dst.Pix[d+1] = out\n"+
|
|
"dst.Pix[d+2] = out\n"+
|
|
"dst.Pix[d+3] = 0xff",
|
|
args[2], args[3], args[4],
|
|
)
|
|
}
|
|
}
|
|
return strings.Replace(ret, " * 1)", ")", -1)
|
|
|
|
case "srcf", "srcu":
|
|
lhs, eqOp := splitEq(prefix)
|
|
if lhs == "" {
|
|
return ""
|
|
}
|
|
args, extra := splitArgs(suffix)
|
|
if len(args) != 2 {
|
|
return ""
|
|
}
|
|
|
|
tmp := ""
|
|
if dollar == "srcf" {
|
|
tmp = "u"
|
|
}
|
|
|
|
buf := new(bytes.Buffer)
|
|
switch d.sType {
|
|
default:
|
|
log.Fatalf("bad sType %q", d.sType)
|
|
case "image.Image", "*image.NRGBA", "*image.Uniform", "*image.YCbCr": // TODO: separate code for concrete types.
|
|
fmt.Fprintf(buf, "%sr%s, %sg%s, %sb%s, %sa%s := "+
|
|
"src.At(%s, %s).RGBA()\n",
|
|
lhs, tmp, lhs, tmp, lhs, tmp, lhs, tmp,
|
|
args[0], args[1],
|
|
)
|
|
case "*image.Gray":
|
|
// TODO: there's no need to multiply by 0x101 if the next thing
|
|
// we're going to do is shift right by 8.
|
|
fmt.Fprintf(buf, "%si := src.PixOffset(%s, %s)\n"+
|
|
"%sr%s := uint32(src.Pix[%si]) * 0x101\n",
|
|
lhs, args[0], args[1],
|
|
lhs, tmp, lhs,
|
|
)
|
|
case "*image.RGBA":
|
|
// TODO: there's no need to multiply by 0x101 if the next thing
|
|
// we're going to do is shift right by 8.
|
|
fmt.Fprintf(buf, "%si := src.PixOffset(%s, %s)\n"+
|
|
"%sr%s := uint32(src.Pix[%si+0]) * 0x101\n"+
|
|
"%sg%s := uint32(src.Pix[%si+1]) * 0x101\n"+
|
|
"%sb%s := uint32(src.Pix[%si+2]) * 0x101\n"+
|
|
"%sa%s := uint32(src.Pix[%si+3]) * 0x101\n",
|
|
lhs, args[0], args[1],
|
|
lhs, tmp, lhs,
|
|
lhs, tmp, lhs,
|
|
lhs, tmp, lhs,
|
|
lhs, tmp, lhs,
|
|
)
|
|
}
|
|
|
|
if dollar == "srcf" {
|
|
switch d.sType {
|
|
default:
|
|
fmt.Fprintf(buf, ""+
|
|
"%sr %s float64(%sru)%s\n"+
|
|
"%sg %s float64(%sgu)%s\n"+
|
|
"%sb %s float64(%sbu)%s\n"+
|
|
"%sa %s float64(%sau)%s\n",
|
|
lhs, eqOp, lhs, extra,
|
|
lhs, eqOp, lhs, extra,
|
|
lhs, eqOp, lhs, extra,
|
|
lhs, eqOp, lhs, extra,
|
|
)
|
|
case "*image.Gray":
|
|
fmt.Fprintf(buf, ""+
|
|
"%sr %s float64(%sru)%s\n",
|
|
lhs, eqOp, lhs, extra,
|
|
)
|
|
}
|
|
}
|
|
|
|
return strings.TrimSpace(buf.String())
|
|
|
|
case "tweakD":
|
|
if d.dType == "*image.RGBA" {
|
|
return "d += dst.Stride"
|
|
}
|
|
return ";"
|
|
|
|
case "tweakDx":
|
|
if d.dType == "*image.RGBA" {
|
|
return strings.Replace(suffix, "dx++", "dx, d = dx+1, d+4", 1)
|
|
}
|
|
return suffix
|
|
|
|
case "tweakDy":
|
|
if d.dType == "*image.RGBA" {
|
|
return strings.Replace(suffix, "for dy, s", "for _, s", 1)
|
|
}
|
|
return suffix
|
|
|
|
case "tweakP":
|
|
if d.sType == "*image.Gray" {
|
|
if strings.HasPrefix(strings.TrimSpace(suffix), "pa * ") {
|
|
return "1,"
|
|
}
|
|
return "pr,"
|
|
}
|
|
return suffix
|
|
|
|
case "tweakPr":
|
|
if d.sType == "*image.Gray" {
|
|
return "pr *= s.invTotalWeightFFFF"
|
|
}
|
|
return ";"
|
|
|
|
case "tweakVarP":
|
|
if d.sType == "*image.Gray" {
|
|
return strings.Replace(suffix, "var pr, pg, pb, pa", "var pr", 1)
|
|
}
|
|
return suffix
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func expnSwitch(dType string, expandBoth bool, template string) string {
|
|
switchVar := "dst"
|
|
if dType != "" {
|
|
switchVar = "src"
|
|
}
|
|
lines := []string{fmt.Sprintf("switch %s := %s.(type) {", switchVar, switchVar)}
|
|
|
|
fallback, values := "Image", dTypes
|
|
if dType != "" {
|
|
fallback, values = "image.Image", sTypesForDType[dType]
|
|
}
|
|
for _, v := range values {
|
|
if v == fallback {
|
|
lines = append(lines, "default:")
|
|
} else {
|
|
lines = append(lines, fmt.Sprintf("case %s:", v))
|
|
}
|
|
|
|
if dType != "" {
|
|
lines = append(lines, expnLine(template, &data{dType: dType, sType: v}))
|
|
} else if !expandBoth {
|
|
lines = append(lines, expnLine(template, &data{dType: v}))
|
|
} else {
|
|
lines = append(lines, expnSwitch(v, false, template))
|
|
}
|
|
}
|
|
|
|
lines = append(lines, "}")
|
|
return strings.Join(lines, "\n")
|
|
}
|
|
|
|
func split(s, sep string) (string, string) {
|
|
if i := strings.Index(s, sep); i >= 0 {
|
|
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+len(sep):])
|
|
}
|
|
return "", ""
|
|
}
|
|
|
|
func splitEq(s string) (lhs, eqOp string) {
|
|
s = strings.TrimSpace(s)
|
|
if lhs, _ = split(s, ":="); lhs != "" {
|
|
return lhs, ":="
|
|
}
|
|
if lhs, _ = split(s, "+="); lhs != "" {
|
|
return lhs, "+="
|
|
}
|
|
return "", ""
|
|
}
|
|
|
|
func splitArgs(s string) (args []string, extra string) {
|
|
s = strings.TrimSpace(s)
|
|
if s == "" || s[0] != '[' {
|
|
return nil, ""
|
|
}
|
|
s = s[1:]
|
|
|
|
i := strings.IndexByte(s, ']')
|
|
if i < 0 {
|
|
return nil, ""
|
|
}
|
|
args, extra = strings.Split(s[:i], ","), s[i+1:]
|
|
for i := range args {
|
|
args[i] = strings.TrimSpace(args[i])
|
|
}
|
|
return args, extra
|
|
}
|
|
|
|
func relName(s string) string {
|
|
if i := strings.LastIndex(s, "."); i >= 0 {
|
|
return s[i+1:]
|
|
}
|
|
return s
|
|
}
|
|
|
|
const (
|
|
codeRoot = `
|
|
func (z $receiver) Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
|
|
// adr is the affected destination pixels, relative to dr.Min.
|
|
adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
|
|
if adr.Empty() || sr.Empty() {
|
|
return
|
|
}
|
|
// sr is the source pixels. If it extends beyond the src bounds,
|
|
// we cannot use the type-specific fast paths, as they access
|
|
// the Pix fields directly without bounds checking.
|
|
if !sr.In(src.Bounds()) {
|
|
z.scale_Image_Image(dst, dr, adr, src, sr)
|
|
} else {
|
|
$switch z.scale_$dTypeRN_$sTypeRN(dst, dr, adr, src, sr)
|
|
}
|
|
}
|
|
|
|
func (z $receiver) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) {
|
|
dr := transformRect(s2d, &sr)
|
|
// adr is the affected destination pixels, relative to dr.Min.
|
|
adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
|
|
if adr.Empty() || sr.Empty() {
|
|
return
|
|
}
|
|
d2s := invert(s2d)
|
|
// sr is the source pixels. If it extends beyond the src bounds,
|
|
// we cannot use the type-specific fast paths, as they access
|
|
// the Pix fields directly without bounds checking.
|
|
if !sr.In(src.Bounds()) {
|
|
z.transform_Image_Image(dst, dr, adr, &d2s, src, sr)
|
|
} else {
|
|
$switch z.transform_$dTypeRN_$sTypeRN(dst, dr, adr, &d2s, src, sr)
|
|
}
|
|
}
|
|
`
|
|
|
|
codeNNScaleLeaf = `
|
|
func (nnInterpolator) scale_$dTypeRN_$sTypeRN(dst $dType, dr, adr image.Rectangle, src $sType, sr image.Rectangle) {
|
|
dw2 := uint64(dr.Dx()) * 2
|
|
dh2 := uint64(dr.Dy()) * 2
|
|
sw := uint64(sr.Dx())
|
|
sh := uint64(sr.Dy())
|
|
$preOuter
|
|
for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
|
|
sy := (2*uint64(dy) + 1) * sh / dh2
|
|
$preInner
|
|
$tweakDx for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
|
|
sx := (2*uint64(dx) + 1) * sw / dw2
|
|
p := $srcu[sr.Min.X + int(sx), sr.Min.Y + int(sy)]
|
|
$outputu[dr.Min.X + int(dx), dr.Min.Y + int(dy), p]
|
|
}
|
|
}
|
|
}
|
|
`
|
|
|
|
codeNNTransformLeaf = `
|
|
func (nnInterpolator) transform_$dTypeRN_$sTypeRN(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle) {
|
|
$preOuter
|
|
for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
|
|
dyf := float64(dr.Min.Y + int(dy)) + 0.5
|
|
$preInner
|
|
$tweakDx for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
|
|
dxf := float64(dr.Min.X + int(dx)) + 0.5
|
|
// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
|
|
sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
|
|
sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
|
|
if !(image.Point{sx0, sy0}).In(sr) {
|
|
continue
|
|
}
|
|
p := $srcu[sx0, sy0]
|
|
$outputu[dr.Min.X + int(dx), dr.Min.Y + int(dy), p]
|
|
}
|
|
}
|
|
}
|
|
`
|
|
|
|
codeABLScaleLeaf = `
|
|
func (ablInterpolator) scale_$dTypeRN_$sTypeRN(dst $dType, dr, adr image.Rectangle, src $sType, sr image.Rectangle) {
|
|
sw := int32(sr.Dx())
|
|
sh := int32(sr.Dy())
|
|
yscale := float64(sh) / float64(dr.Dy())
|
|
xscale := float64(sw) / float64(dr.Dx())
|
|
swMinus1, shMinus1 := sw - 1, sh - 1
|
|
$preOuter
|
|
|
|
for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
|
|
sy := (float64(dy)+0.5)*yscale - 0.5
|
|
// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
|
|
// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
|
|
// sx, below.
|
|
sy0 := int32(sy)
|
|
yFrac0 := sy - float64(sy0)
|
|
yFrac1 := 1 - yFrac0
|
|
sy1 := sy0 + 1
|
|
if sy < 0 {
|
|
sy0, sy1 = 0, 0
|
|
yFrac0, yFrac1 = 0, 1
|
|
} else if sy1 > shMinus1 {
|
|
sy0, sy1 = shMinus1, shMinus1
|
|
yFrac0, yFrac1 = 1, 0
|
|
}
|
|
$preInner
|
|
|
|
$tweakDx for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
|
|
sx := (float64(dx)+0.5)*xscale - 0.5
|
|
sx0 := int32(sx)
|
|
xFrac0 := sx - float64(sx0)
|
|
xFrac1 := 1 - xFrac0
|
|
sx1 := sx0 + 1
|
|
if sx < 0 {
|
|
sx0, sx1 = 0, 0
|
|
xFrac0, xFrac1 = 0, 1
|
|
} else if sx1 > swMinus1 {
|
|
sx0, sx1 = swMinus1, swMinus1
|
|
xFrac0, xFrac1 = 1, 0
|
|
}
|
|
|
|
s00 := $srcf[sr.Min.X + int(sx0), sr.Min.Y + int(sy0)]
|
|
s10 := $srcf[sr.Min.X + int(sx1), sr.Min.Y + int(sy0)]
|
|
$blend[xFrac1, s00, xFrac0, s10]
|
|
s01 := $srcf[sr.Min.X + int(sx0), sr.Min.Y + int(sy1)]
|
|
s11 := $srcf[sr.Min.X + int(sx1), sr.Min.Y + int(sy1)]
|
|
$blend[xFrac1, s01, xFrac0, s11]
|
|
$blend[yFrac1, s10, yFrac0, s11]
|
|
$outputu[dr.Min.X + int(dx), dr.Min.Y + int(dy), s11]
|
|
}
|
|
}
|
|
}
|
|
`
|
|
|
|
codeABLTransformLeaf = `
|
|
func (ablInterpolator) transform_$dTypeRN_$sTypeRN(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle) {
|
|
$preOuter
|
|
for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
|
|
dyf := float64(dr.Min.Y + int(dy)) + 0.5
|
|
$preInner
|
|
$tweakDx for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
|
|
dxf := float64(dr.Min.X + int(dx)) + 0.5
|
|
// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
|
|
sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
|
|
sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
|
|
if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
|
|
continue
|
|
}
|
|
|
|
sx -= 0.5
|
|
sxf := math.Floor(sx)
|
|
xFrac0 := sx - sxf
|
|
xFrac1 := 1 - xFrac0
|
|
sx0 := int(sxf)
|
|
sx1 := sx0 + 1
|
|
if sx0 < sr.Min.X {
|
|
sx0, sx1 = sr.Min.X, sr.Min.X
|
|
xFrac0, xFrac1 = 0, 1
|
|
} else if sx1 >= sr.Max.X {
|
|
sx0, sx1 = sr.Max.X-1, sr.Max.X-1
|
|
xFrac0, xFrac1 = 1, 0
|
|
}
|
|
|
|
sy -= 0.5
|
|
syf := math.Floor(sy)
|
|
yFrac0 := sy - syf
|
|
yFrac1 := 1 - yFrac0
|
|
sy0 := int(syf)
|
|
sy1 := sy0 + 1
|
|
if sy0 < sr.Min.Y {
|
|
sy0, sy1 = sr.Min.Y, sr.Min.Y
|
|
yFrac0, yFrac1 = 0, 1
|
|
} else if sy1 >= sr.Max.Y {
|
|
sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
|
|
yFrac0, yFrac1 = 1, 0
|
|
}
|
|
|
|
s00 := $srcf[sx0, sy0]
|
|
s10 := $srcf[sx1, sy0]
|
|
$blend[xFrac1, s00, xFrac0, s10]
|
|
s01 := $srcf[sx0, sy1]
|
|
s11 := $srcf[sx1, sy1]
|
|
$blend[xFrac1, s01, xFrac0, s11]
|
|
$blend[yFrac1, s10, yFrac0, s11]
|
|
$outputu[dr.Min.X + int(dx), dr.Min.Y + int(dy), s11]
|
|
}
|
|
}
|
|
}
|
|
`
|
|
|
|
codeKernelRoot = `
|
|
func (z *kernelScaler) Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
|
|
if z.dw != int32(dr.Dx()) || z.dh != int32(dr.Dy()) || z.sw != int32(sr.Dx()) || z.sh != int32(sr.Dy()) {
|
|
z.kernel.Scale(dst, dr, src, sr, opts)
|
|
return
|
|
}
|
|
// adr is the affected destination pixels, relative to dr.Min.
|
|
adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
|
|
if adr.Empty() || sr.Empty() {
|
|
return
|
|
}
|
|
// Create a temporary buffer:
|
|
// scaleX distributes the source image's columns over the temporary image.
|
|
// scaleY distributes the temporary image's rows over the destination image.
|
|
// TODO: is it worth having a sync.Pool for this temporary buffer?
|
|
tmp := make([][4]float64, z.dw*z.sh)
|
|
|
|
// sr is the source pixels. If it extends beyond the src bounds,
|
|
// we cannot use the type-specific fast paths, as they access
|
|
// the Pix fields directly without bounds checking.
|
|
if !sr.In(src.Bounds()) {
|
|
z.scaleX_Image(tmp, src, sr)
|
|
} else {
|
|
$switchS z.scaleX_$sTypeRN(tmp, src, sr)
|
|
}
|
|
|
|
$switchD z.scaleY_$dTypeRN(dst, dr, adr, tmp)
|
|
}
|
|
|
|
func (q *Kernel) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) {
|
|
dr := transformRect(s2d, &sr)
|
|
// adr is the affected destination pixels, relative to dr.Min.
|
|
adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
|
|
if adr.Empty() || sr.Empty() {
|
|
return
|
|
}
|
|
d2s := invert(s2d)
|
|
|
|
xscale := abs(d2s[0])
|
|
if s := abs(d2s[1]); xscale < s {
|
|
xscale = s
|
|
}
|
|
yscale := abs(d2s[3])
|
|
if s := abs(d2s[4]); yscale < s {
|
|
yscale = s
|
|
}
|
|
|
|
// sr is the source pixels. If it extends beyond the src bounds,
|
|
// we cannot use the type-specific fast paths, as they access
|
|
// the Pix fields directly without bounds checking.
|
|
if !sr.In(src.Bounds()) {
|
|
q.transform_Image_Image(dst, dr, adr, &d2s, src, sr, xscale, yscale)
|
|
} else {
|
|
$switch q.transform_$dTypeRN_$sTypeRN(dst, dr, adr, &d2s, src, sr, xscale, yscale)
|
|
}
|
|
}
|
|
`
|
|
|
|
codeKernelScaleLeafX = `
|
|
func (z *kernelScaler) scaleX_$sTypeRN(tmp [][4]float64, src $sType, sr image.Rectangle) {
|
|
t := 0
|
|
for y := int32(0); y < z.sh; y++ {
|
|
for _, s := range z.horizontal.sources {
|
|
$tweakVarP var pr, pg, pb, pa float64
|
|
for _, c := range z.horizontal.contribs[s.i:s.j] {
|
|
p += $srcf[sr.Min.X + int(c.coord), sr.Min.Y + int(y)] * c.weight
|
|
}
|
|
$tweakPr
|
|
tmp[t] = [4]float64{
|
|
$tweakP pr * s.invTotalWeightFFFF,
|
|
$tweakP pg * s.invTotalWeightFFFF,
|
|
$tweakP pb * s.invTotalWeightFFFF,
|
|
$tweakP pa * s.invTotalWeightFFFF,
|
|
}
|
|
t++
|
|
}
|
|
}
|
|
}
|
|
`
|
|
|
|
codeKernelScaleLeafY = `
|
|
func (z *kernelScaler) scaleY_$dTypeRN(dst $dType, dr, adr image.Rectangle, tmp [][4]float64) {
|
|
$preOuter
|
|
for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
|
|
$preKernelInner
|
|
$tweakDy for dy, s := range z.vertical.sources[adr.Min.Y:adr.Max.Y] {
|
|
var pr, pg, pb, pa float64
|
|
for _, c := range z.vertical.contribs[s.i:s.j] {
|
|
p := &tmp[c.coord*z.dw+dx]
|
|
pr += p[0] * c.weight
|
|
pg += p[1] * c.weight
|
|
pb += p[2] * c.weight
|
|
pa += p[3] * c.weight
|
|
}
|
|
$outputf[dr.Min.X + int(dx), dr.Min.Y + int(adr.Min.Y + dy), ftou, p, s.invTotalWeight]
|
|
$tweakD
|
|
}
|
|
}
|
|
}
|
|
`
|
|
|
|
codeKernelTransformLeaf = `
|
|
func (q *Kernel) transform_$dTypeRN_$sTypeRN(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle, xscale, yscale float64) {
|
|
// When shrinking, broaden the effective kernel support so that we still
|
|
// visit every source pixel.
|
|
xHalfWidth, xKernelArgScale := q.Support, 1.0
|
|
if xscale > 1 {
|
|
xHalfWidth *= xscale
|
|
xKernelArgScale = 1 / xscale
|
|
}
|
|
yHalfWidth, yKernelArgScale := q.Support, 1.0
|
|
if yscale > 1 {
|
|
yHalfWidth *= yscale
|
|
yKernelArgScale = 1 / yscale
|
|
}
|
|
|
|
xWeights := make([]float64, 1 + 2*int(math.Ceil(xHalfWidth)))
|
|
yWeights := make([]float64, 1 + 2*int(math.Ceil(yHalfWidth)))
|
|
|
|
$preOuter
|
|
for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
|
|
dyf := float64(dr.Min.Y + int(dy)) + 0.5
|
|
$preInner
|
|
$tweakDx for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
|
|
dxf := float64(dr.Min.X + int(dx)) + 0.5
|
|
// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
|
|
sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
|
|
sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
|
|
if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
|
|
continue
|
|
}
|
|
|
|
sx -= 0.5
|
|
ix := int(math.Floor(sx - xHalfWidth))
|
|
if ix < sr.Min.X {
|
|
ix = sr.Min.X
|
|
}
|
|
jx := int(math.Ceil(sx + xHalfWidth))
|
|
if jx > sr.Max.X {
|
|
jx = sr.Max.X
|
|
}
|
|
|
|
totalXWeight := 0.0
|
|
for kx := ix; kx < jx; kx++ {
|
|
xWeight := 0.0
|
|
if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
|
|
xWeight = q.At(t)
|
|
}
|
|
xWeights[kx - ix] = xWeight
|
|
totalXWeight += xWeight
|
|
}
|
|
for x := range xWeights[:jx-ix] {
|
|
xWeights[x] /= totalXWeight
|
|
}
|
|
|
|
sy -= 0.5
|
|
iy := int(math.Floor(sy - yHalfWidth))
|
|
if iy < sr.Min.Y {
|
|
iy = sr.Min.Y
|
|
}
|
|
jy := int(math.Ceil(sy + yHalfWidth))
|
|
if jy > sr.Max.Y {
|
|
jy = sr.Max.Y
|
|
}
|
|
|
|
totalYWeight := 0.0
|
|
for ky := iy; ky < jy; ky++ {
|
|
yWeight := 0.0
|
|
if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
|
|
yWeight = q.At(t)
|
|
}
|
|
yWeights[ky - iy] = yWeight
|
|
totalYWeight += yWeight
|
|
}
|
|
for y := range yWeights[:jy-iy] {
|
|
yWeights[y] /= totalYWeight
|
|
}
|
|
|
|
$tweakVarP var pr, pg, pb, pa float64
|
|
for ky := iy; ky < jy; ky++ {
|
|
yWeight := yWeights[ky - iy]
|
|
for kx := ix; kx < jx; kx++ {
|
|
p += $srcf[kx, ky] * xWeights[kx - ix] * yWeight
|
|
}
|
|
}
|
|
$outputf[dr.Min.X + int(dx), dr.Min.Y + int(dy), fffftou, p, 1]
|
|
}
|
|
}
|
|
}
|
|
`
|
|
)
|