draw: optimize some multiply-by-zeroes in Kernel.Transform.

benchmark                      old ns/op     new ns/op     delta
BenchmarkTformCRSrcGray        5096041       4820642       -5.40%
BenchmarkTformCRSrcNRGBA       10476578      8414331       -19.68%
BenchmarkTformCRSrcRGBA        10361135      7954413       -23.23%
BenchmarkTformCRSrcYCbCr       11952218      9824899       -17.80%

Change-Id: I8b4cfe68ecae85e447ae65ceecf185261445a8a2
Reviewed-on: https://go-review.googlesource.com/7991
Reviewed-by: Rob Pike <r@golang.org>
This commit is contained in:
Nigel Tao 2015-03-24 20:16:40 +11:00
parent 575b100276
commit 500a27f912
2 changed files with 202 additions and 182 deletions

View File

@ -1131,12 +1131,14 @@ const (
var pr, pg, pb, pa float64 $tweakVarP
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky - iy]
if yWeight := yWeights[ky - iy]; yWeight != 0 {
for kx := ix; kx < jx; kx++ {
w := xWeights[kx - ix] * yWeight
if w := xWeights[kx - ix] * yWeight; w != 0 {
p += $srcf[kx, ky] * w
}
}
}
}
$outputf[dr.Min.X + int(dx), dr.Min.Y + int(dy), fffftou, p, 1]
}
}

View File

@ -3706,14 +3706,16 @@ func (q *Kernel) transform_RGBA_Gray(dst *image.RGBA, dr, adr image.Rectangle, d
var pr float64
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
if yWeight := yWeights[ky-iy]; yWeight != 0 {
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
if w := xWeights[kx-ix] * yWeight; w != 0 {
pi := (ky-src.Rect.Min.Y)*src.Stride + (kx - src.Rect.Min.X)
pru := uint32(src.Pix[pi]) * 0x101
pr += float64(pru) * w
}
}
}
}
out := uint8(fffftou(pr) >> 8)
dst.Pix[d+0] = out
dst.Pix[d+1] = out
@ -3803,9 +3805,9 @@ func (q *Kernel) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Rectangle,
var pr, pg, pb, pa float64
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
if yWeight := yWeights[ky-iy]; yWeight != 0 {
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
if w := xWeights[kx-ix] * yWeight; w != 0 {
pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4
pau := uint32(src.Pix[pi+3]) * 0x101
pru := uint32(src.Pix[pi+0]) * pau / 0xff
@ -3817,6 +3819,8 @@ func (q *Kernel) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Rectangle,
pa += float64(pau) * w
}
}
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
@ -3905,9 +3909,9 @@ func (q *Kernel) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectangle, d
var pr, pg, pb, pa float64
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
if yWeight := yWeights[ky-iy]; yWeight != 0 {
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
if w := xWeights[kx-ix] * yWeight; w != 0 {
pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4
pru := uint32(src.Pix[pi+0]) * 0x101
pgu := uint32(src.Pix[pi+1]) * 0x101
@ -3919,6 +3923,8 @@ func (q *Kernel) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectangle, d
pa += float64(pau) * w
}
}
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
@ -4007,9 +4013,9 @@ func (q *Kernel) transform_RGBA_YCbCr444(dst *image.RGBA, dr, adr image.Rectangl
var pr, pg, pb float64
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
if yWeight := yWeights[ky-iy]; yWeight != 0 {
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
if w := xWeights[kx-ix] * yWeight; w != 0 {
pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
pj := (ky-src.Rect.Min.Y)*src.CStride + (kx - src.Rect.Min.X)
@ -4044,6 +4050,8 @@ func (q *Kernel) transform_RGBA_YCbCr444(dst *image.RGBA, dr, adr image.Rectangl
pb += float64(pbu) * w
}
}
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
@ -4132,9 +4140,9 @@ func (q *Kernel) transform_RGBA_YCbCr422(dst *image.RGBA, dr, adr image.Rectangl
var pr, pg, pb float64
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
if yWeight := yWeights[ky-iy]; yWeight != 0 {
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
if w := xWeights[kx-ix] * yWeight; w != 0 {
pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
pj := (ky-src.Rect.Min.Y)*src.CStride + ((kx)/2 - src.Rect.Min.X/2)
@ -4169,6 +4177,8 @@ func (q *Kernel) transform_RGBA_YCbCr422(dst *image.RGBA, dr, adr image.Rectangl
pb += float64(pbu) * w
}
}
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
@ -4257,9 +4267,9 @@ func (q *Kernel) transform_RGBA_YCbCr420(dst *image.RGBA, dr, adr image.Rectangl
var pr, pg, pb float64
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
if yWeight := yWeights[ky-iy]; yWeight != 0 {
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
if w := xWeights[kx-ix] * yWeight; w != 0 {
pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + ((kx)/2 - src.Rect.Min.X/2)
@ -4294,6 +4304,8 @@ func (q *Kernel) transform_RGBA_YCbCr420(dst *image.RGBA, dr, adr image.Rectangl
pb += float64(pbu) * w
}
}
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
@ -4382,9 +4394,9 @@ func (q *Kernel) transform_RGBA_YCbCr440(dst *image.RGBA, dr, adr image.Rectangl
var pr, pg, pb float64
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
if yWeight := yWeights[ky-iy]; yWeight != 0 {
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
if w := xWeights[kx-ix] * yWeight; w != 0 {
pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + (kx - src.Rect.Min.X)
@ -4419,6 +4431,8 @@ func (q *Kernel) transform_RGBA_YCbCr440(dst *image.RGBA, dr, adr image.Rectangl
pb += float64(pbu) * w
}
}
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
@ -4507,9 +4521,9 @@ func (q *Kernel) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Rectangle,
var pr, pg, pb, pa float64
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
if yWeight := yWeights[ky-iy]; yWeight != 0 {
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
if w := xWeights[kx-ix] * yWeight; w != 0 {
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
pr += float64(pru) * w
pg += float64(pgu) * w
@ -4517,6 +4531,8 @@ func (q *Kernel) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Rectangle,
pa += float64(pau) * w
}
}
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
@ -4606,9 +4622,9 @@ func (q *Kernel) transform_Image_Image(dst Image, dr, adr image.Rectangle, d2s *
var pr, pg, pb, pa float64
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
if yWeight := yWeights[ky-iy]; yWeight != 0 {
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
if w := xWeights[kx-ix] * yWeight; w != 0 {
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
pr += float64(pru) * w
pg += float64(pgu) * w
@ -4616,6 +4632,8 @@ func (q *Kernel) transform_Image_Image(dst Image, dr, adr image.Rectangle, d2s *
pa += float64(pau) * w
}
}
}
}
dstColorRGBA64.R = fffftou(pr)
dstColorRGBA64.G = fffftou(pg)
dstColorRGBA64.B = fffftou(pb)