From 2a40e8dacf5ce048a67b89ffe4ed4a0181e8befb Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Fri, 27 Feb 2015 16:18:34 +1100 Subject: [PATCH] draw: RGBA dst fast path for scaling. benchmark old ns/op new ns/op delta BenchmarkScaleLargeDownNN 6124873 3348203 -45.33% BenchmarkScaleLargeDownAB 15608417 12626534 -19.10% BenchmarkScaleLargeDownBL 1503354937 1482605150 -1.38% BenchmarkScaleLargeDownCR 2987623786 2937846270 -1.67% BenchmarkScaleDownNN 1793478 935896 -47.82% BenchmarkScaleDownAB 4277596 3405613 -20.38% BenchmarkScaleDownBL 29932226 29268085 -2.22% BenchmarkScaleDownCR 57563042 57322266 -0.42% BenchmarkScaleUpNN 89694138 46216098 -48.47% BenchmarkScaleUpAB 212318283 169267373 -20.28% BenchmarkScaleUpBL 120899444 80215032 -33.65% BenchmarkScaleUpCR 181116518 140140247 -22.62% BenchmarkScaleSrcNRGBA 13229017 10620746 -19.72% BenchmarkScaleSrcRGBA 12993292 10155919 -21.84% BenchmarkScaleSrcUniform 3964808 1146947 -71.07% BenchmarkScaleSrcYCbCr 15871184 12779895 -19.48% Change-Id: I7d92bd9f4c20692c5a52ea31019fe3852e657535 Reviewed-on: https://go-review.googlesource.com/6230 Reviewed-by: Rob Pike --- draw/gen.go | 72 +++++++++++++++++++++---- draw/impl.go | 145 ++++++++++++++++++++++++--------------------------- 2 files changed, 128 insertions(+), 89 deletions(-) diff --git a/draw/gen.go b/draw/gen.go index 387b879..7b7bfc3 100644 --- a/draw/gen.go +++ b/draw/gen.go @@ -111,7 +111,11 @@ func genKernel(w *bytes.Buffer) { func expn(w *bytes.Buffer, code string, d *data) { for _, line := range strings.Split(code, "\n") { - fmt.Fprintln(w, expnLine(line, d)) + line = expnLine(line, d) + if line == ";" { + continue + } + fmt.Fprintln(w, line) } } @@ -161,12 +165,31 @@ func expnDollar(prefix, dollar, suffix string, d *data) string { case "switchS": return expnSwitch("anyDType", false, suffix) - case "dstColorDecl": - if d.dType == "Image" || d.dType == "*image.RGBA" { // TODO: separate code for concrete types. - return "dstColorRGBA64 := &color.RGBA64{}\n" + + case "preOuter": + switch d.dType { + default: + return ";" + case "Image": + return "" + + "dstColorRGBA64 := &color.RGBA64{}\n" + "dstColor := color.Color(dstColorRGBA64)" } - return ";" + + case "preInner": + switch d.dType { + default: + return ";" + case "*image.RGBA": + return "d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))" + } + + case "preKernelInner": + switch d.dType { + default: + return ";" + case "*image.RGBA": + return "d := dst.PixOffset(dp.X+int(dx), dp.Y+dr.Min.Y)" + } case "blend": args, _ := splitArgs(suffix) @@ -192,7 +215,7 @@ func expnDollar(prefix, dollar, suffix string, d *data) string { switch d.dType { default: log.Fatalf("bad dType %q", d.dType) - case "Image", "*image.RGBA": // TODO: separate code for concrete types. + case "Image": return fmt.Sprintf(""+ "dstColorRGBA64.R = uint16(%sr)\n"+ "dstColorRGBA64.G = uint16(%sg)\n"+ @@ -202,6 +225,15 @@ func expnDollar(prefix, dollar, suffix string, d *data) string { args[2], args[2], args[2], args[2], args[0], args[1], ) + case "*image.RGBA": + return fmt.Sprintf(""+ + "dst.Pix[d+0] = uint8(uint32(%sr) >> 8)\n"+ + "dst.Pix[d+1] = uint8(uint32(%sg) >> 8)\n"+ + "dst.Pix[d+2] = uint8(uint32(%sb) >> 8)\n"+ + "dst.Pix[d+3] = uint8(uint32(%sa) >> 8)\n"+ + "d += 4", + args[2], args[2], args[2], args[2], + ) } case "outputf": @@ -212,7 +244,7 @@ func expnDollar(prefix, dollar, suffix string, d *data) string { switch d.dType { default: log.Fatalf("bad dType %q", d.dType) - case "Image", "*image.RGBA": // TODO: separate code for concrete types. + case "Image": return fmt.Sprintf(""+ "dstColorRGBA64.R = ftou(%sr * %s)\n"+ "dstColorRGBA64.G = ftou(%sg * %s)\n"+ @@ -222,6 +254,15 @@ func expnDollar(prefix, dollar, suffix string, d *data) string { args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3], args[0], args[1], ) + case "*image.RGBA": + return fmt.Sprintf(""+ + "dst.Pix[d+0] = uint8(ftou(%sr * %s) >> 8)\n"+ + "dst.Pix[d+1] = uint8(ftou(%sg * %s) >> 8)\n"+ + "dst.Pix[d+2] = uint8(ftou(%sb * %s) >> 8)\n"+ + "dst.Pix[d+3] = uint8(ftou(%sa * %s) >> 8)\n"+ + "d += dst.Stride", + args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3], + ) } case "srcf", "srcu": @@ -263,6 +304,12 @@ func expnDollar(prefix, dollar, suffix string, d *data) string { } return strings.TrimSpace(buf.String()) + + case "tweakDy": + if d.dType == "*image.RGBA" { + return strings.Replace(suffix, "for dy, s", "for _, s", 1) + } + return suffix } return "" } @@ -358,9 +405,10 @@ const ( codeNNLeaf = ` func (z *nnScaler) scale_$dTypeRN_$sTypeRN(dst $dType, dp image.Point, dr image.Rectangle, src $sType, sp image.Point) { - $dstColorDecl + $preOuter for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh)) + $preInner for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw)) p := $srcu[sx, sy] @@ -374,7 +422,7 @@ const ( func (z *ablScaler) scale_$dTypeRN_$sTypeRN(dst $dType, dp image.Point, dr image.Rectangle, src $sType, sp image.Point) { yscale := float64(z.sh) / float64(z.dh) xscale := float64(z.sw) / float64(z.dw) - $dstColorDecl + $preOuter for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { sy := (float64(dy)+0.5)*yscale - 0.5 sy0 := int32(sy) @@ -388,6 +436,7 @@ const ( sy1 = sy0 yFrac0, yFrac1 = 1, 0 } + $preInner for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { sx := (float64(dx)+0.5)*xscale - 0.5 sx0 := int32(sx) @@ -457,9 +506,10 @@ const ( codeKernelLeafY = ` func (z *kernelScaler) scaleY_$dTypeRN(dst $dType, dp image.Point, dr image.Rectangle, tmp [][4]float64) { - $dstColorDecl + $preOuter for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { - for dy, s := range z.vertical.sources[dr.Min.Y:dr.Max.Y] { + $preKernelInner + $tweakDy for dy, s := range z.vertical.sources[dr.Min.Y:dr.Max.Y] { var pr, pg, pb, pa float64 for _, c := range z.vertical.contribs[s.i:s.j] { p := &tmp[c.coord*z.dw+dx] diff --git a/draw/impl.go b/draw/impl.go index 0cfee91..c284c4c 100644 --- a/draw/impl.go +++ b/draw/impl.go @@ -39,86 +39,81 @@ func (z *nnScaler) Scale(dst Image, dp image.Point, src image.Image, sp image.Po } func (z *nnScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.NRGBA, sp image.Point) { - dstColorRGBA64 := &color.RGBA64{} - dstColor := color.Color(dstColorRGBA64) for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh)) + d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy)) for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw)) pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA() - dstColorRGBA64.R = uint16(pr) - dstColorRGBA64.G = uint16(pg) - dstColorRGBA64.B = uint16(pb) - dstColorRGBA64.A = uint16(pa) - dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) + dst.Pix[d+0] = uint8(uint32(pr) >> 8) + dst.Pix[d+1] = uint8(uint32(pg) >> 8) + dst.Pix[d+2] = uint8(uint32(pb) >> 8) + dst.Pix[d+3] = uint8(uint32(pa) >> 8) + d += 4 } } } func (z *nnScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.RGBA, sp image.Point) { - dstColorRGBA64 := &color.RGBA64{} - dstColor := color.Color(dstColorRGBA64) for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh)) + d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy)) for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw)) pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA() - dstColorRGBA64.R = uint16(pr) - dstColorRGBA64.G = uint16(pg) - dstColorRGBA64.B = uint16(pb) - dstColorRGBA64.A = uint16(pa) - dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) + dst.Pix[d+0] = uint8(uint32(pr) >> 8) + dst.Pix[d+1] = uint8(uint32(pg) >> 8) + dst.Pix[d+2] = uint8(uint32(pb) >> 8) + dst.Pix[d+3] = uint8(uint32(pa) >> 8) + d += 4 } } } func (z *nnScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.Uniform, sp image.Point) { - dstColorRGBA64 := &color.RGBA64{} - dstColor := color.Color(dstColorRGBA64) for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh)) + d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy)) for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw)) pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA() - dstColorRGBA64.R = uint16(pr) - dstColorRGBA64.G = uint16(pg) - dstColorRGBA64.B = uint16(pb) - dstColorRGBA64.A = uint16(pa) - dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) + dst.Pix[d+0] = uint8(uint32(pr) >> 8) + dst.Pix[d+1] = uint8(uint32(pg) >> 8) + dst.Pix[d+2] = uint8(uint32(pb) >> 8) + dst.Pix[d+3] = uint8(uint32(pa) >> 8) + d += 4 } } } func (z *nnScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.YCbCr, sp image.Point) { - dstColorRGBA64 := &color.RGBA64{} - dstColor := color.Color(dstColorRGBA64) for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh)) + d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy)) for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw)) pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA() - dstColorRGBA64.R = uint16(pr) - dstColorRGBA64.G = uint16(pg) - dstColorRGBA64.B = uint16(pb) - dstColorRGBA64.A = uint16(pa) - dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) + dst.Pix[d+0] = uint8(uint32(pr) >> 8) + dst.Pix[d+1] = uint8(uint32(pg) >> 8) + dst.Pix[d+2] = uint8(uint32(pb) >> 8) + dst.Pix[d+3] = uint8(uint32(pa) >> 8) + d += 4 } } } func (z *nnScaler) scale_RGBA_Image(dst *image.RGBA, dp image.Point, dr image.Rectangle, src image.Image, sp image.Point) { - dstColorRGBA64 := &color.RGBA64{} - dstColor := color.Color(dstColorRGBA64) for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh)) + d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy)) for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw)) pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA() - dstColorRGBA64.R = uint16(pr) - dstColorRGBA64.G = uint16(pg) - dstColorRGBA64.B = uint16(pb) - dstColorRGBA64.A = uint16(pa) - dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) + dst.Pix[d+0] = uint8(uint32(pr) >> 8) + dst.Pix[d+1] = uint8(uint32(pg) >> 8) + dst.Pix[d+2] = uint8(uint32(pb) >> 8) + dst.Pix[d+3] = uint8(uint32(pa) >> 8) + d += 4 } } } @@ -174,8 +169,6 @@ func (z *ablScaler) Scale(dst Image, dp image.Point, src image.Image, sp image.P func (z *ablScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.NRGBA, sp image.Point) { yscale := float64(z.sh) / float64(z.dh) xscale := float64(z.sw) / float64(z.dw) - dstColorRGBA64 := &color.RGBA64{} - dstColor := color.Color(dstColorRGBA64) for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { sy := (float64(dy)+0.5)*yscale - 0.5 sy0 := int32(sy) @@ -189,6 +182,7 @@ func (z *ablScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.R sy1 = sy0 yFrac0, yFrac1 = 1, 0 } + d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy)) for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { sx := (float64(dx)+0.5)*xscale - 0.5 sx0 := int32(sx) @@ -234,11 +228,11 @@ func (z *ablScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.R s11g = yFrac1*s10g + yFrac0*s11g s11b = yFrac1*s10b + yFrac0*s11b s11a = yFrac1*s10a + yFrac0*s11a - dstColorRGBA64.R = uint16(s11r) - dstColorRGBA64.G = uint16(s11g) - dstColorRGBA64.B = uint16(s11b) - dstColorRGBA64.A = uint16(s11a) - dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) + dst.Pix[d+0] = uint8(uint32(s11r) >> 8) + dst.Pix[d+1] = uint8(uint32(s11g) >> 8) + dst.Pix[d+2] = uint8(uint32(s11b) >> 8) + dst.Pix[d+3] = uint8(uint32(s11a) >> 8) + d += 4 } } } @@ -246,8 +240,6 @@ func (z *ablScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.R func (z *ablScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.RGBA, sp image.Point) { yscale := float64(z.sh) / float64(z.dh) xscale := float64(z.sw) / float64(z.dw) - dstColorRGBA64 := &color.RGBA64{} - dstColor := color.Color(dstColorRGBA64) for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { sy := (float64(dy)+0.5)*yscale - 0.5 sy0 := int32(sy) @@ -261,6 +253,7 @@ func (z *ablScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Re sy1 = sy0 yFrac0, yFrac1 = 1, 0 } + d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy)) for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { sx := (float64(dx)+0.5)*xscale - 0.5 sx0 := int32(sx) @@ -306,11 +299,11 @@ func (z *ablScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Re s11g = yFrac1*s10g + yFrac0*s11g s11b = yFrac1*s10b + yFrac0*s11b s11a = yFrac1*s10a + yFrac0*s11a - dstColorRGBA64.R = uint16(s11r) - dstColorRGBA64.G = uint16(s11g) - dstColorRGBA64.B = uint16(s11b) - dstColorRGBA64.A = uint16(s11a) - dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) + dst.Pix[d+0] = uint8(uint32(s11r) >> 8) + dst.Pix[d+1] = uint8(uint32(s11g) >> 8) + dst.Pix[d+2] = uint8(uint32(s11b) >> 8) + dst.Pix[d+3] = uint8(uint32(s11a) >> 8) + d += 4 } } } @@ -318,8 +311,6 @@ func (z *ablScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Re func (z *ablScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.Uniform, sp image.Point) { yscale := float64(z.sh) / float64(z.dh) xscale := float64(z.sw) / float64(z.dw) - dstColorRGBA64 := &color.RGBA64{} - dstColor := color.Color(dstColorRGBA64) for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { sy := (float64(dy)+0.5)*yscale - 0.5 sy0 := int32(sy) @@ -333,6 +324,7 @@ func (z *ablScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, dr image sy1 = sy0 yFrac0, yFrac1 = 1, 0 } + d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy)) for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { sx := (float64(dx)+0.5)*xscale - 0.5 sx0 := int32(sx) @@ -378,11 +370,11 @@ func (z *ablScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, dr image s11g = yFrac1*s10g + yFrac0*s11g s11b = yFrac1*s10b + yFrac0*s11b s11a = yFrac1*s10a + yFrac0*s11a - dstColorRGBA64.R = uint16(s11r) - dstColorRGBA64.G = uint16(s11g) - dstColorRGBA64.B = uint16(s11b) - dstColorRGBA64.A = uint16(s11a) - dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) + dst.Pix[d+0] = uint8(uint32(s11r) >> 8) + dst.Pix[d+1] = uint8(uint32(s11g) >> 8) + dst.Pix[d+2] = uint8(uint32(s11b) >> 8) + dst.Pix[d+3] = uint8(uint32(s11a) >> 8) + d += 4 } } } @@ -390,8 +382,6 @@ func (z *ablScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, dr image func (z *ablScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.YCbCr, sp image.Point) { yscale := float64(z.sh) / float64(z.dh) xscale := float64(z.sw) / float64(z.dw) - dstColorRGBA64 := &color.RGBA64{} - dstColor := color.Color(dstColorRGBA64) for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { sy := (float64(dy)+0.5)*yscale - 0.5 sy0 := int32(sy) @@ -405,6 +395,7 @@ func (z *ablScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, dr image.R sy1 = sy0 yFrac0, yFrac1 = 1, 0 } + d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy)) for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { sx := (float64(dx)+0.5)*xscale - 0.5 sx0 := int32(sx) @@ -450,11 +441,11 @@ func (z *ablScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, dr image.R s11g = yFrac1*s10g + yFrac0*s11g s11b = yFrac1*s10b + yFrac0*s11b s11a = yFrac1*s10a + yFrac0*s11a - dstColorRGBA64.R = uint16(s11r) - dstColorRGBA64.G = uint16(s11g) - dstColorRGBA64.B = uint16(s11b) - dstColorRGBA64.A = uint16(s11a) - dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) + dst.Pix[d+0] = uint8(uint32(s11r) >> 8) + dst.Pix[d+1] = uint8(uint32(s11g) >> 8) + dst.Pix[d+2] = uint8(uint32(s11b) >> 8) + dst.Pix[d+3] = uint8(uint32(s11a) >> 8) + d += 4 } } } @@ -462,8 +453,6 @@ func (z *ablScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, dr image.R func (z *ablScaler) scale_RGBA_Image(dst *image.RGBA, dp image.Point, dr image.Rectangle, src image.Image, sp image.Point) { yscale := float64(z.sh) / float64(z.dh) xscale := float64(z.sw) / float64(z.dw) - dstColorRGBA64 := &color.RGBA64{} - dstColor := color.Color(dstColorRGBA64) for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ { sy := (float64(dy)+0.5)*yscale - 0.5 sy0 := int32(sy) @@ -477,6 +466,7 @@ func (z *ablScaler) scale_RGBA_Image(dst *image.RGBA, dp image.Point, dr image.R sy1 = sy0 yFrac0, yFrac1 = 1, 0 } + d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy)) for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { sx := (float64(dx)+0.5)*xscale - 0.5 sx0 := int32(sx) @@ -522,11 +512,11 @@ func (z *ablScaler) scale_RGBA_Image(dst *image.RGBA, dp image.Point, dr image.R s11g = yFrac1*s10g + yFrac0*s11g s11b = yFrac1*s10b + yFrac0*s11b s11a = yFrac1*s10a + yFrac0*s11a - dstColorRGBA64.R = uint16(s11r) - dstColorRGBA64.G = uint16(s11g) - dstColorRGBA64.B = uint16(s11b) - dstColorRGBA64.A = uint16(s11a) - dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) + dst.Pix[d+0] = uint8(uint32(s11r) >> 8) + dst.Pix[d+1] = uint8(uint32(s11g) >> 8) + dst.Pix[d+2] = uint8(uint32(s11b) >> 8) + dst.Pix[d+3] = uint8(uint32(s11a) >> 8) + d += 4 } } } @@ -753,10 +743,9 @@ func (z *kernelScaler) scaleX_Image(tmp [][4]float64, src image.Image, sp image. } func (z *kernelScaler) scaleY_RGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, tmp [][4]float64) { - dstColorRGBA64 := &color.RGBA64{} - dstColor := color.Color(dstColorRGBA64) for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ { - for dy, s := range z.vertical.sources[dr.Min.Y:dr.Max.Y] { + d := dst.PixOffset(dp.X+int(dx), dp.Y+dr.Min.Y) + for _, s := range z.vertical.sources[dr.Min.Y:dr.Max.Y] { var pr, pg, pb, pa float64 for _, c := range z.vertical.contribs[s.i:s.j] { p := &tmp[c.coord*z.dw+dx] @@ -765,11 +754,11 @@ func (z *kernelScaler) scaleY_RGBA(dst *image.RGBA, dp image.Point, dr image.Rec pb += p[2] * c.weight pa += p[3] * c.weight } - dstColorRGBA64.R = ftou(pr * s.invTotalWeight) - dstColorRGBA64.G = ftou(pg * s.invTotalWeight) - dstColorRGBA64.B = ftou(pb * s.invTotalWeight) - dstColorRGBA64.A = ftou(pa * s.invTotalWeight) - dst.Set(dp.X+int(dx), dp.Y+int(dr.Min.Y+dy), dstColor) + dst.Pix[d+0] = uint8(ftou(pr*s.invTotalWeight) >> 8) + dst.Pix[d+1] = uint8(ftou(pg*s.invTotalWeight) >> 8) + dst.Pix[d+2] = uint8(ftou(pb*s.invTotalWeight) >> 8) + dst.Pix[d+3] = uint8(ftou(pa*s.invTotalWeight) >> 8) + d += dst.Stride } } }