draw: inline the generated PixOffset calls.

Only the YCbCr benchmarks show significant changes. The other benchmark
changes look noisy. The PixOffset and YOffset calls were previously
already inlined by the gc compiler. COffset was different because it's
more complicated than YOffset, and the switch inside the COffset body is
redundant when you already know the src image sratio.
http://golang.org/src/image/ycbcr.go?s=2377:2414#L77

benchmark                      old ns/op     new ns/op     delta
BenchmarkScaleLargeDownNN      1037504       908236        -12.46%
BenchmarkScaleLargeDownAB      3196568       2735776       -14.42%
BenchmarkScaleLargeDownBL      357165552     311463393     -12.80%
BenchmarkScaleLargeDownCR      649403305     544985134     -16.08%
BenchmarkScaleSrcYCbCr         3204063       2699147       -15.76%
BenchmarkTformABSrcYCbCr       2155142       1968540       -8.66%
BenchmarkTformCRSrcYCbCr       11672625      9865358       -15.48%

Change-Id: Ifa109363a1282ab114b2fdb0b577dcafef927333
Reviewed-on: https://go-review.googlesource.com/7880
Reviewed-by: Rob Pike <r@golang.org>
This commit is contained in:
Nigel Tao 2015-03-20 16:36:51 +11:00
parent 415cb3b420
commit 4180bcbc4a
2 changed files with 208 additions and 191 deletions

View File

@ -216,7 +216,7 @@ func expnDollar(prefix, dollar, suffix string, d *data) string {
default:
return ";"
case "*image.RGBA":
return "d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy))"
return "d := " + pixOffset("dst", "dr.Min.X+adr.Min.X", "dr.Min.Y+int(dy)", "*4", "*dst.Stride")
}
case "preKernelInner":
@ -224,7 +224,7 @@ func expnDollar(prefix, dollar, suffix string, d *data) string {
default:
return ";"
case "*image.RGBA":
return "d := dst.PixOffset(dr.Min.X+int(dx), dr.Min.Y+adr.Min.Y)"
return "d := " + pixOffset("dst", "dr.Min.X+int(dx)", "dr.Min.Y+adr.Min.Y", "*4", "*dst.Stride")
}
case "blend":
@ -445,19 +445,19 @@ func expnDollar(prefix, dollar, suffix string, d *data) string {
)
case "*image.Gray":
fmt.Fprintf(buf, ""+
"%si := src.PixOffset(%s, %s)\n"+
"%si := %s\n"+
"%sr%s := uint32(src.Pix[%si]) * 0x101\n",
lhs, args[0], args[1],
lhs, pixOffset("src", args[0], args[1], "", "*src.Stride"),
lhs, tmp, lhs,
)
case "*image.NRGBA":
fmt.Fprintf(buf, ""+
"%si := src.PixOffset(%s, %s)\n"+
"%si := %s\n"+
"%sa%s := uint32(src.Pix[%si+3]) * 0x101\n"+
"%sr%s := uint32(src.Pix[%si+0]) * %sa%s / 0xff\n"+
"%sg%s := uint32(src.Pix[%si+1]) * %sa%s / 0xff\n"+
"%sb%s := uint32(src.Pix[%si+2]) * %sa%s / 0xff\n",
lhs, args[0], args[1],
lhs, pixOffset("src", args[0], args[1], "*4", "*src.Stride"),
lhs, tmp, lhs,
lhs, tmp, lhs, lhs, tmp,
lhs, tmp, lhs, lhs, tmp,
@ -465,31 +465,30 @@ func expnDollar(prefix, dollar, suffix string, d *data) string {
)
case "*image.RGBA":
fmt.Fprintf(buf, ""+
"%si := src.PixOffset(%s, %s)\n"+
"%si := %s\n"+
"%sr%s := uint32(src.Pix[%si+0]) * 0x101\n"+
"%sg%s := uint32(src.Pix[%si+1]) * 0x101\n"+
"%sb%s := uint32(src.Pix[%si+2]) * 0x101\n"+
"%sa%s := uint32(src.Pix[%si+3]) * 0x101\n",
lhs, args[0], args[1],
lhs, pixOffset("src", args[0], args[1], "*4", "*src.Stride"),
lhs, tmp, lhs,
lhs, tmp, lhs,
lhs, tmp, lhs,
lhs, tmp, lhs,
)
case "*image.YCbCr":
// TODO: inline the COffset call, with the known d.sratio value.
// TODO: inline the color.YCbCrToRGB call.
// TODO: should we have a color.YCbCrToRGB48 function that returns
// 16-bit color?
fmt.Fprintf(buf, ""+
"%si := src.YOffset(%s, %s)\n"+
"%sj := src.COffset(%s, %s)\n"+
"%si := %s\n"+
"%sj := %s\n"+
"%sr8, %sg8, %sb8 := color.YCbCrToRGB(src.Y[%si], src.Cb[%sj], src.Cr[%sj])\n"+
"%sr%s := uint32(%sr8) * 0x101\n"+
"%sg%s := uint32(%sg8) * 0x101\n"+
"%sb%s := uint32(%sb8) * 0x101\n",
lhs, args[0], args[1],
lhs, args[0], args[1],
lhs, pixOffset("src", args[0], args[1], "", "*src.YStride"),
lhs, cOffset(args[0], args[1], d.sratio),
lhs, lhs, lhs, lhs, lhs, lhs,
lhs, tmp, lhs,
lhs, tmp, lhs,
@ -630,6 +629,24 @@ func expnSwitchYCbCr(dType, template string) string {
return strings.Join(lines, "\n")
}
func pixOffset(m, x, y, xstride, ystride string) string {
return fmt.Sprintf("(%s-%s.Rect.Min.Y)%s + (%s-%s.Rect.Min.X)%s", y, m, ystride, x, m, xstride)
}
func cOffset(x, y, sratio string) string {
switch sratio {
case "444":
return fmt.Sprintf("( %s - src.Rect.Min.Y )*src.CStride + ( %s - src.Rect.Min.X )", y, x)
case "422":
return fmt.Sprintf("( %s - src.Rect.Min.Y )*src.CStride + ((%s)/2 - src.Rect.Min.X/2)", y, x)
case "420":
return fmt.Sprintf("((%s)/2 - src.Rect.Min.Y/2)*src.CStride + ((%s)/2 - src.Rect.Min.X/2)", y, x)
case "440":
return fmt.Sprintf("((%s)/2 - src.Rect.Min.Y/2)*src.CStride + ( %s - src.Rect.Min.X )", y, x)
}
return fmt.Sprintf("unsupported sratio %q", sratio)
}
func split(s, sep string) (string, string) {
if i := strings.Index(s, sep); i >= 0 {
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+len(sep):])

File diff suppressed because it is too large Load Diff