draw: RGBA dst fast path for scaling.

benchmark                     old ns/op      new ns/op      delta
BenchmarkScaleLargeDownNN     6124873        3348203        -45.33%
BenchmarkScaleLargeDownAB     15608417       12626534       -19.10%
BenchmarkScaleLargeDownBL     1503354937     1482605150     -1.38%
BenchmarkScaleLargeDownCR     2987623786     2937846270     -1.67%
BenchmarkScaleDownNN          1793478        935896         -47.82%
BenchmarkScaleDownAB          4277596        3405613        -20.38%
BenchmarkScaleDownBL          29932226       29268085       -2.22%
BenchmarkScaleDownCR          57563042       57322266       -0.42%
BenchmarkScaleUpNN            89694138       46216098       -48.47%
BenchmarkScaleUpAB            212318283      169267373      -20.28%
BenchmarkScaleUpBL            120899444      80215032       -33.65%
BenchmarkScaleUpCR            181116518      140140247      -22.62%
BenchmarkScaleSrcNRGBA        13229017       10620746       -19.72%
BenchmarkScaleSrcRGBA         12993292       10155919       -21.84%
BenchmarkScaleSrcUniform      3964808        1146947        -71.07%
BenchmarkScaleSrcYCbCr        15871184       12779895       -19.48%

Change-Id: I7d92bd9f4c20692c5a52ea31019fe3852e657535
Reviewed-on: https://go-review.googlesource.com/6230
Reviewed-by: Rob Pike <r@golang.org>
This commit is contained in:
Nigel Tao 2015-02-27 16:18:34 +11:00
parent db892dd957
commit 2a40e8dacf
2 changed files with 128 additions and 89 deletions

View File

@ -111,7 +111,11 @@ func genKernel(w *bytes.Buffer) {
func expn(w *bytes.Buffer, code string, d *data) { func expn(w *bytes.Buffer, code string, d *data) {
for _, line := range strings.Split(code, "\n") { for _, line := range strings.Split(code, "\n") {
fmt.Fprintln(w, expnLine(line, d)) line = expnLine(line, d)
if line == ";" {
continue
}
fmt.Fprintln(w, line)
} }
} }
@ -161,12 +165,31 @@ func expnDollar(prefix, dollar, suffix string, d *data) string {
case "switchS": case "switchS":
return expnSwitch("anyDType", false, suffix) return expnSwitch("anyDType", false, suffix)
case "dstColorDecl": case "preOuter":
if d.dType == "Image" || d.dType == "*image.RGBA" { // TODO: separate code for concrete types. switch d.dType {
return "dstColorRGBA64 := &color.RGBA64{}\n" + default:
return ";"
case "Image":
return "" +
"dstColorRGBA64 := &color.RGBA64{}\n" +
"dstColor := color.Color(dstColorRGBA64)" "dstColor := color.Color(dstColorRGBA64)"
} }
return ";"
case "preInner":
switch d.dType {
default:
return ";"
case "*image.RGBA":
return "d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))"
}
case "preKernelInner":
switch d.dType {
default:
return ";"
case "*image.RGBA":
return "d := dst.PixOffset(dp.X+int(dx), dp.Y+dr.Min.Y)"
}
case "blend": case "blend":
args, _ := splitArgs(suffix) args, _ := splitArgs(suffix)
@ -192,7 +215,7 @@ func expnDollar(prefix, dollar, suffix string, d *data) string {
switch d.dType { switch d.dType {
default: default:
log.Fatalf("bad dType %q", d.dType) log.Fatalf("bad dType %q", d.dType)
case "Image", "*image.RGBA": // TODO: separate code for concrete types. case "Image":
return fmt.Sprintf(""+ return fmt.Sprintf(""+
"dstColorRGBA64.R = uint16(%sr)\n"+ "dstColorRGBA64.R = uint16(%sr)\n"+
"dstColorRGBA64.G = uint16(%sg)\n"+ "dstColorRGBA64.G = uint16(%sg)\n"+
@ -202,6 +225,15 @@ func expnDollar(prefix, dollar, suffix string, d *data) string {
args[2], args[2], args[2], args[2], args[2], args[2], args[2], args[2],
args[0], args[1], args[0], args[1],
) )
case "*image.RGBA":
return fmt.Sprintf(""+
"dst.Pix[d+0] = uint8(uint32(%sr) >> 8)\n"+
"dst.Pix[d+1] = uint8(uint32(%sg) >> 8)\n"+
"dst.Pix[d+2] = uint8(uint32(%sb) >> 8)\n"+
"dst.Pix[d+3] = uint8(uint32(%sa) >> 8)\n"+
"d += 4",
args[2], args[2], args[2], args[2],
)
} }
case "outputf": case "outputf":
@ -212,7 +244,7 @@ func expnDollar(prefix, dollar, suffix string, d *data) string {
switch d.dType { switch d.dType {
default: default:
log.Fatalf("bad dType %q", d.dType) log.Fatalf("bad dType %q", d.dType)
case "Image", "*image.RGBA": // TODO: separate code for concrete types. case "Image":
return fmt.Sprintf(""+ return fmt.Sprintf(""+
"dstColorRGBA64.R = ftou(%sr * %s)\n"+ "dstColorRGBA64.R = ftou(%sr * %s)\n"+
"dstColorRGBA64.G = ftou(%sg * %s)\n"+ "dstColorRGBA64.G = ftou(%sg * %s)\n"+
@ -222,6 +254,15 @@ func expnDollar(prefix, dollar, suffix string, d *data) string {
args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3],
args[0], args[1], args[0], args[1],
) )
case "*image.RGBA":
return fmt.Sprintf(""+
"dst.Pix[d+0] = uint8(ftou(%sr * %s) >> 8)\n"+
"dst.Pix[d+1] = uint8(ftou(%sg * %s) >> 8)\n"+
"dst.Pix[d+2] = uint8(ftou(%sb * %s) >> 8)\n"+
"dst.Pix[d+3] = uint8(ftou(%sa * %s) >> 8)\n"+
"d += dst.Stride",
args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3],
)
} }
case "srcf", "srcu": case "srcf", "srcu":
@ -263,6 +304,12 @@ func expnDollar(prefix, dollar, suffix string, d *data) string {
} }
return strings.TrimSpace(buf.String()) return strings.TrimSpace(buf.String())
case "tweakDy":
if d.dType == "*image.RGBA" {
return strings.Replace(suffix, "for dy, s", "for _, s", 1)
}
return suffix
} }
return "" return ""
} }
@ -358,9 +405,10 @@ const (
codeNNLeaf = ` codeNNLeaf = `
func (z *nnScaler) scale_$dTypeRN_$sTypeRN(dst $dType, dp image.Point, dr image.Rectangle, src $sType, sp image.Point) { func (z *nnScaler) scale_$dTypeRN_$sTypeRN(dst $dType, dp image.Point, dr image.Rectangle, src $sType, sp image.Point) {
$dstColorDecl $preOuter
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh)) sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
$preInner
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw)) sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
p := $srcu[sx, sy] p := $srcu[sx, sy]
@ -374,7 +422,7 @@ const (
func (z *ablScaler) scale_$dTypeRN_$sTypeRN(dst $dType, dp image.Point, dr image.Rectangle, src $sType, sp image.Point) { func (z *ablScaler) scale_$dTypeRN_$sTypeRN(dst $dType, dp image.Point, dr image.Rectangle, src $sType, sp image.Point) {
yscale := float64(z.sh) / float64(z.dh) yscale := float64(z.sh) / float64(z.dh)
xscale := float64(z.sw) / float64(z.dw) xscale := float64(z.sw) / float64(z.dw)
$dstColorDecl $preOuter
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (float64(dy)+0.5)*yscale - 0.5 sy := (float64(dy)+0.5)*yscale - 0.5
sy0 := int32(sy) sy0 := int32(sy)
@ -388,6 +436,7 @@ const (
sy1 = sy0 sy1 = sy0
yFrac0, yFrac1 = 1, 0 yFrac0, yFrac1 = 1, 0
} }
$preInner
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (float64(dx)+0.5)*xscale - 0.5 sx := (float64(dx)+0.5)*xscale - 0.5
sx0 := int32(sx) sx0 := int32(sx)
@ -457,9 +506,10 @@ const (
codeKernelLeafY = ` codeKernelLeafY = `
func (z *kernelScaler) scaleY_$dTypeRN(dst $dType, dp image.Point, dr image.Rectangle, tmp [][4]float64) { func (z *kernelScaler) scaleY_$dTypeRN(dst $dType, dp image.Point, dr image.Rectangle, tmp [][4]float64) {
$dstColorDecl $preOuter
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
for dy, s := range z.vertical.sources[dr.Min.Y:dr.Max.Y] { $preKernelInner
$tweakDy for dy, s := range z.vertical.sources[dr.Min.Y:dr.Max.Y] {
var pr, pg, pb, pa float64 var pr, pg, pb, pa float64
for _, c := range z.vertical.contribs[s.i:s.j] { for _, c := range z.vertical.contribs[s.i:s.j] {
p := &tmp[c.coord*z.dw+dx] p := &tmp[c.coord*z.dw+dx]

View File

@ -39,86 +39,81 @@ func (z *nnScaler) Scale(dst Image, dp image.Point, src image.Image, sp image.Po
} }
func (z *nnScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.NRGBA, sp image.Point) { func (z *nnScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.NRGBA, sp image.Point) {
dstColorRGBA64 := &color.RGBA64{}
dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh)) sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw)) sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA() pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
dstColorRGBA64.R = uint16(pr) dst.Pix[d+0] = uint8(uint32(pr) >> 8)
dstColorRGBA64.G = uint16(pg) dst.Pix[d+1] = uint8(uint32(pg) >> 8)
dstColorRGBA64.B = uint16(pb) dst.Pix[d+2] = uint8(uint32(pb) >> 8)
dstColorRGBA64.A = uint16(pa) dst.Pix[d+3] = uint8(uint32(pa) >> 8)
dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) d += 4
} }
} }
} }
func (z *nnScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.RGBA, sp image.Point) { func (z *nnScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.RGBA, sp image.Point) {
dstColorRGBA64 := &color.RGBA64{}
dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh)) sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw)) sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA() pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
dstColorRGBA64.R = uint16(pr) dst.Pix[d+0] = uint8(uint32(pr) >> 8)
dstColorRGBA64.G = uint16(pg) dst.Pix[d+1] = uint8(uint32(pg) >> 8)
dstColorRGBA64.B = uint16(pb) dst.Pix[d+2] = uint8(uint32(pb) >> 8)
dstColorRGBA64.A = uint16(pa) dst.Pix[d+3] = uint8(uint32(pa) >> 8)
dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) d += 4
} }
} }
} }
func (z *nnScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.Uniform, sp image.Point) { func (z *nnScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.Uniform, sp image.Point) {
dstColorRGBA64 := &color.RGBA64{}
dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh)) sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw)) sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA() pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
dstColorRGBA64.R = uint16(pr) dst.Pix[d+0] = uint8(uint32(pr) >> 8)
dstColorRGBA64.G = uint16(pg) dst.Pix[d+1] = uint8(uint32(pg) >> 8)
dstColorRGBA64.B = uint16(pb) dst.Pix[d+2] = uint8(uint32(pb) >> 8)
dstColorRGBA64.A = uint16(pa) dst.Pix[d+3] = uint8(uint32(pa) >> 8)
dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) d += 4
} }
} }
} }
func (z *nnScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.YCbCr, sp image.Point) { func (z *nnScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.YCbCr, sp image.Point) {
dstColorRGBA64 := &color.RGBA64{}
dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh)) sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw)) sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA() pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
dstColorRGBA64.R = uint16(pr) dst.Pix[d+0] = uint8(uint32(pr) >> 8)
dstColorRGBA64.G = uint16(pg) dst.Pix[d+1] = uint8(uint32(pg) >> 8)
dstColorRGBA64.B = uint16(pb) dst.Pix[d+2] = uint8(uint32(pb) >> 8)
dstColorRGBA64.A = uint16(pa) dst.Pix[d+3] = uint8(uint32(pa) >> 8)
dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) d += 4
} }
} }
} }
func (z *nnScaler) scale_RGBA_Image(dst *image.RGBA, dp image.Point, dr image.Rectangle, src image.Image, sp image.Point) { func (z *nnScaler) scale_RGBA_Image(dst *image.RGBA, dp image.Point, dr image.Rectangle, src image.Image, sp image.Point) {
dstColorRGBA64 := &color.RGBA64{}
dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh)) sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw)) sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA() pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
dstColorRGBA64.R = uint16(pr) dst.Pix[d+0] = uint8(uint32(pr) >> 8)
dstColorRGBA64.G = uint16(pg) dst.Pix[d+1] = uint8(uint32(pg) >> 8)
dstColorRGBA64.B = uint16(pb) dst.Pix[d+2] = uint8(uint32(pb) >> 8)
dstColorRGBA64.A = uint16(pa) dst.Pix[d+3] = uint8(uint32(pa) >> 8)
dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) d += 4
} }
} }
} }
@ -174,8 +169,6 @@ func (z *ablScaler) Scale(dst Image, dp image.Point, src image.Image, sp image.P
func (z *ablScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.NRGBA, sp image.Point) { func (z *ablScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.NRGBA, sp image.Point) {
yscale := float64(z.sh) / float64(z.dh) yscale := float64(z.sh) / float64(z.dh)
xscale := float64(z.sw) / float64(z.dw) xscale := float64(z.sw) / float64(z.dw)
dstColorRGBA64 := &color.RGBA64{}
dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (float64(dy)+0.5)*yscale - 0.5 sy := (float64(dy)+0.5)*yscale - 0.5
sy0 := int32(sy) sy0 := int32(sy)
@ -189,6 +182,7 @@ func (z *ablScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.R
sy1 = sy0 sy1 = sy0
yFrac0, yFrac1 = 1, 0 yFrac0, yFrac1 = 1, 0
} }
d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (float64(dx)+0.5)*xscale - 0.5 sx := (float64(dx)+0.5)*xscale - 0.5
sx0 := int32(sx) sx0 := int32(sx)
@ -234,11 +228,11 @@ func (z *ablScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.R
s11g = yFrac1*s10g + yFrac0*s11g s11g = yFrac1*s10g + yFrac0*s11g
s11b = yFrac1*s10b + yFrac0*s11b s11b = yFrac1*s10b + yFrac0*s11b
s11a = yFrac1*s10a + yFrac0*s11a s11a = yFrac1*s10a + yFrac0*s11a
dstColorRGBA64.R = uint16(s11r) dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
dstColorRGBA64.G = uint16(s11g) dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
dstColorRGBA64.B = uint16(s11b) dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
dstColorRGBA64.A = uint16(s11a) dst.Pix[d+3] = uint8(uint32(s11a) >> 8)
dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) d += 4
} }
} }
} }
@ -246,8 +240,6 @@ func (z *ablScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.R
func (z *ablScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.RGBA, sp image.Point) { func (z *ablScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.RGBA, sp image.Point) {
yscale := float64(z.sh) / float64(z.dh) yscale := float64(z.sh) / float64(z.dh)
xscale := float64(z.sw) / float64(z.dw) xscale := float64(z.sw) / float64(z.dw)
dstColorRGBA64 := &color.RGBA64{}
dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (float64(dy)+0.5)*yscale - 0.5 sy := (float64(dy)+0.5)*yscale - 0.5
sy0 := int32(sy) sy0 := int32(sy)
@ -261,6 +253,7 @@ func (z *ablScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Re
sy1 = sy0 sy1 = sy0
yFrac0, yFrac1 = 1, 0 yFrac0, yFrac1 = 1, 0
} }
d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (float64(dx)+0.5)*xscale - 0.5 sx := (float64(dx)+0.5)*xscale - 0.5
sx0 := int32(sx) sx0 := int32(sx)
@ -306,11 +299,11 @@ func (z *ablScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Re
s11g = yFrac1*s10g + yFrac0*s11g s11g = yFrac1*s10g + yFrac0*s11g
s11b = yFrac1*s10b + yFrac0*s11b s11b = yFrac1*s10b + yFrac0*s11b
s11a = yFrac1*s10a + yFrac0*s11a s11a = yFrac1*s10a + yFrac0*s11a
dstColorRGBA64.R = uint16(s11r) dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
dstColorRGBA64.G = uint16(s11g) dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
dstColorRGBA64.B = uint16(s11b) dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
dstColorRGBA64.A = uint16(s11a) dst.Pix[d+3] = uint8(uint32(s11a) >> 8)
dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) d += 4
} }
} }
} }
@ -318,8 +311,6 @@ func (z *ablScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Re
func (z *ablScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.Uniform, sp image.Point) { func (z *ablScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.Uniform, sp image.Point) {
yscale := float64(z.sh) / float64(z.dh) yscale := float64(z.sh) / float64(z.dh)
xscale := float64(z.sw) / float64(z.dw) xscale := float64(z.sw) / float64(z.dw)
dstColorRGBA64 := &color.RGBA64{}
dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (float64(dy)+0.5)*yscale - 0.5 sy := (float64(dy)+0.5)*yscale - 0.5
sy0 := int32(sy) sy0 := int32(sy)
@ -333,6 +324,7 @@ func (z *ablScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, dr image
sy1 = sy0 sy1 = sy0
yFrac0, yFrac1 = 1, 0 yFrac0, yFrac1 = 1, 0
} }
d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (float64(dx)+0.5)*xscale - 0.5 sx := (float64(dx)+0.5)*xscale - 0.5
sx0 := int32(sx) sx0 := int32(sx)
@ -378,11 +370,11 @@ func (z *ablScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, dr image
s11g = yFrac1*s10g + yFrac0*s11g s11g = yFrac1*s10g + yFrac0*s11g
s11b = yFrac1*s10b + yFrac0*s11b s11b = yFrac1*s10b + yFrac0*s11b
s11a = yFrac1*s10a + yFrac0*s11a s11a = yFrac1*s10a + yFrac0*s11a
dstColorRGBA64.R = uint16(s11r) dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
dstColorRGBA64.G = uint16(s11g) dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
dstColorRGBA64.B = uint16(s11b) dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
dstColorRGBA64.A = uint16(s11a) dst.Pix[d+3] = uint8(uint32(s11a) >> 8)
dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) d += 4
} }
} }
} }
@ -390,8 +382,6 @@ func (z *ablScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, dr image
func (z *ablScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.YCbCr, sp image.Point) { func (z *ablScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.YCbCr, sp image.Point) {
yscale := float64(z.sh) / float64(z.dh) yscale := float64(z.sh) / float64(z.dh)
xscale := float64(z.sw) / float64(z.dw) xscale := float64(z.sw) / float64(z.dw)
dstColorRGBA64 := &color.RGBA64{}
dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (float64(dy)+0.5)*yscale - 0.5 sy := (float64(dy)+0.5)*yscale - 0.5
sy0 := int32(sy) sy0 := int32(sy)
@ -405,6 +395,7 @@ func (z *ablScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, dr image.R
sy1 = sy0 sy1 = sy0
yFrac0, yFrac1 = 1, 0 yFrac0, yFrac1 = 1, 0
} }
d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (float64(dx)+0.5)*xscale - 0.5 sx := (float64(dx)+0.5)*xscale - 0.5
sx0 := int32(sx) sx0 := int32(sx)
@ -450,11 +441,11 @@ func (z *ablScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, dr image.R
s11g = yFrac1*s10g + yFrac0*s11g s11g = yFrac1*s10g + yFrac0*s11g
s11b = yFrac1*s10b + yFrac0*s11b s11b = yFrac1*s10b + yFrac0*s11b
s11a = yFrac1*s10a + yFrac0*s11a s11a = yFrac1*s10a + yFrac0*s11a
dstColorRGBA64.R = uint16(s11r) dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
dstColorRGBA64.G = uint16(s11g) dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
dstColorRGBA64.B = uint16(s11b) dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
dstColorRGBA64.A = uint16(s11a) dst.Pix[d+3] = uint8(uint32(s11a) >> 8)
dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) d += 4
} }
} }
} }
@ -462,8 +453,6 @@ func (z *ablScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, dr image.R
func (z *ablScaler) scale_RGBA_Image(dst *image.RGBA, dp image.Point, dr image.Rectangle, src image.Image, sp image.Point) { func (z *ablScaler) scale_RGBA_Image(dst *image.RGBA, dp image.Point, dr image.Rectangle, src image.Image, sp image.Point) {
yscale := float64(z.sh) / float64(z.dh) yscale := float64(z.sh) / float64(z.dh)
xscale := float64(z.sw) / float64(z.dw) xscale := float64(z.sw) / float64(z.dw)
dstColorRGBA64 := &color.RGBA64{}
dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (float64(dy)+0.5)*yscale - 0.5 sy := (float64(dy)+0.5)*yscale - 0.5
sy0 := int32(sy) sy0 := int32(sy)
@ -477,6 +466,7 @@ func (z *ablScaler) scale_RGBA_Image(dst *image.RGBA, dp image.Point, dr image.R
sy1 = sy0 sy1 = sy0
yFrac0, yFrac1 = 1, 0 yFrac0, yFrac1 = 1, 0
} }
d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (float64(dx)+0.5)*xscale - 0.5 sx := (float64(dx)+0.5)*xscale - 0.5
sx0 := int32(sx) sx0 := int32(sx)
@ -522,11 +512,11 @@ func (z *ablScaler) scale_RGBA_Image(dst *image.RGBA, dp image.Point, dr image.R
s11g = yFrac1*s10g + yFrac0*s11g s11g = yFrac1*s10g + yFrac0*s11g
s11b = yFrac1*s10b + yFrac0*s11b s11b = yFrac1*s10b + yFrac0*s11b
s11a = yFrac1*s10a + yFrac0*s11a s11a = yFrac1*s10a + yFrac0*s11a
dstColorRGBA64.R = uint16(s11r) dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
dstColorRGBA64.G = uint16(s11g) dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
dstColorRGBA64.B = uint16(s11b) dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
dstColorRGBA64.A = uint16(s11a) dst.Pix[d+3] = uint8(uint32(s11a) >> 8)
dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) d += 4
} }
} }
} }
@ -753,10 +743,9 @@ func (z *kernelScaler) scaleX_Image(tmp [][4]float64, src image.Image, sp image.
} }
func (z *kernelScaler) scaleY_RGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, tmp [][4]float64) { func (z *kernelScaler) scaleY_RGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, tmp [][4]float64) {
dstColorRGBA64 := &color.RGBA64{}
dstColor := color.Color(dstColorRGBA64)
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
for dy, s := range z.vertical.sources[dr.Min.Y:dr.Max.Y] { d := dst.PixOffset(dp.X+int(dx), dp.Y+dr.Min.Y)
for _, s := range z.vertical.sources[dr.Min.Y:dr.Max.Y] {
var pr, pg, pb, pa float64 var pr, pg, pb, pa float64
for _, c := range z.vertical.contribs[s.i:s.j] { for _, c := range z.vertical.contribs[s.i:s.j] {
p := &tmp[c.coord*z.dw+dx] p := &tmp[c.coord*z.dw+dx]
@ -765,11 +754,11 @@ func (z *kernelScaler) scaleY_RGBA(dst *image.RGBA, dp image.Point, dr image.Rec
pb += p[2] * c.weight pb += p[2] * c.weight
pa += p[3] * c.weight pa += p[3] * c.weight
} }
dstColorRGBA64.R = ftou(pr * s.invTotalWeight) dst.Pix[d+0] = uint8(ftou(pr*s.invTotalWeight) >> 8)
dstColorRGBA64.G = ftou(pg * s.invTotalWeight) dst.Pix[d+1] = uint8(ftou(pg*s.invTotalWeight) >> 8)
dstColorRGBA64.B = ftou(pb * s.invTotalWeight) dst.Pix[d+2] = uint8(ftou(pb*s.invTotalWeight) >> 8)
dstColorRGBA64.A = ftou(pa * s.invTotalWeight) dst.Pix[d+3] = uint8(ftou(pa*s.invTotalWeight) >> 8)
dst.Set(dp.X+int(dx), dp.Y+int(dr.Min.Y+dy), dstColor) d += dst.Stride
} }
} }
} }