draw: optimize Kernel.Transform.

benchmark                      old ns/op      new ns/op      delta
BenchmarkTformCRSrcGray        6111610        5344117        -12.56%
BenchmarkTformCRSrcNRGBA       62070281       59295178       -4.47%
BenchmarkTformCRSrcRGBA        13840290       10612547       -23.32%
BenchmarkTformCRSrcUniform     591637         587621         -0.68%
BenchmarkTformCRSrcYCbCr       72219184       69404747       -3.90%

As of current origin/master, Gray and RGBA have fast paths but the other src
image types do not. They have more fat, so the relative improvement is smaller.

Change-Id: Ibbae91cd3cb3c139efb1dcc8fda1cb6432505189
Reviewed-on: https://go-review.googlesource.com/7794
Reviewed-by: Rob Pike <r@golang.org>
This commit is contained in:
Nigel Tao 2015-03-19 20:41:38 +11:00
parent ffd1dee1b5
commit c62001d39b
2 changed files with 49 additions and 38 deletions

View File

@ -958,7 +958,8 @@ const (
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky - iy]
for kx := ix; kx < jx; kx++ {
p += $srcf[kx, ky] * xWeights[kx - ix] * yWeight
w := xWeights[kx - ix] * yWeight
p += $srcf[kx, ky] * w
}
}
$outputf[dr.Min.X + int(dx), dr.Min.Y + int(dy), fffftou, p, 1]

View File

@ -2719,9 +2719,10 @@ func (q *Kernel) transform_RGBA_Gray(dst *image.RGBA, dr, adr image.Rectangle, d
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
pi := src.PixOffset(kx, ky)
pru := uint32(src.Pix[pi]) * 0x101
pr += float64(pru) * xWeights[kx-ix] * yWeight
pr += float64(pru) * w
}
}
out := uint8(fffftou(pr) >> 8)
@ -2812,15 +2813,16 @@ func (q *Kernel) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Rectangle,
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
pi := src.PixOffset(kx, ky)
pau := uint32(src.Pix[pi+3]) * 0x101
pru := uint32(src.Pix[pi+0]) * pau / 0xff
pgu := uint32(src.Pix[pi+1]) * pau / 0xff
pbu := uint32(src.Pix[pi+2]) * pau / 0xff
pr += float64(pru) * xWeights[kx-ix] * yWeight
pg += float64(pgu) * xWeights[kx-ix] * yWeight
pb += float64(pbu) * xWeights[kx-ix] * yWeight
pa += float64(pau) * xWeights[kx-ix] * yWeight
pr += float64(pru) * w
pg += float64(pgu) * w
pb += float64(pbu) * w
pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@ -2910,15 +2912,16 @@ func (q *Kernel) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectangle, d
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
pi := src.PixOffset(kx, ky)
pru := uint32(src.Pix[pi+0]) * 0x101
pgu := uint32(src.Pix[pi+1]) * 0x101
pbu := uint32(src.Pix[pi+2]) * 0x101
pau := uint32(src.Pix[pi+3]) * 0x101
pr += float64(pru) * xWeights[kx-ix] * yWeight
pg += float64(pgu) * xWeights[kx-ix] * yWeight
pb += float64(pbu) * xWeights[kx-ix] * yWeight
pa += float64(pau) * xWeights[kx-ix] * yWeight
pr += float64(pru) * w
pg += float64(pgu) * w
pb += float64(pbu) * w
pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@ -3008,11 +3011,12 @@ func (q *Kernel) transform_RGBA_Uniform(dst *image.RGBA, dr, adr image.Rectangle
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
pr += float64(pru) * xWeights[kx-ix] * yWeight
pg += float64(pgu) * xWeights[kx-ix] * yWeight
pb += float64(pbu) * xWeights[kx-ix] * yWeight
pa += float64(pau) * xWeights[kx-ix] * yWeight
pr += float64(pru) * w
pg += float64(pgu) * w
pb += float64(pbu) * w
pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@ -3102,11 +3106,12 @@ func (q *Kernel) transform_RGBA_YCbCr444(dst *image.RGBA, dr, adr image.Rectangl
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
pr += float64(pru) * xWeights[kx-ix] * yWeight
pg += float64(pgu) * xWeights[kx-ix] * yWeight
pb += float64(pbu) * xWeights[kx-ix] * yWeight
pa += float64(pau) * xWeights[kx-ix] * yWeight
pr += float64(pru) * w
pg += float64(pgu) * w
pb += float64(pbu) * w
pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@ -3196,11 +3201,12 @@ func (q *Kernel) transform_RGBA_YCbCr422(dst *image.RGBA, dr, adr image.Rectangl
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
pr += float64(pru) * xWeights[kx-ix] * yWeight
pg += float64(pgu) * xWeights[kx-ix] * yWeight
pb += float64(pbu) * xWeights[kx-ix] * yWeight
pa += float64(pau) * xWeights[kx-ix] * yWeight
pr += float64(pru) * w
pg += float64(pgu) * w
pb += float64(pbu) * w
pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@ -3290,11 +3296,12 @@ func (q *Kernel) transform_RGBA_YCbCr420(dst *image.RGBA, dr, adr image.Rectangl
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
pr += float64(pru) * xWeights[kx-ix] * yWeight
pg += float64(pgu) * xWeights[kx-ix] * yWeight
pb += float64(pbu) * xWeights[kx-ix] * yWeight
pa += float64(pau) * xWeights[kx-ix] * yWeight
pr += float64(pru) * w
pg += float64(pgu) * w
pb += float64(pbu) * w
pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@ -3384,11 +3391,12 @@ func (q *Kernel) transform_RGBA_YCbCr440(dst *image.RGBA, dr, adr image.Rectangl
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
pr += float64(pru) * xWeights[kx-ix] * yWeight
pg += float64(pgu) * xWeights[kx-ix] * yWeight
pb += float64(pbu) * xWeights[kx-ix] * yWeight
pa += float64(pau) * xWeights[kx-ix] * yWeight
pr += float64(pru) * w
pg += float64(pgu) * w
pb += float64(pbu) * w
pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@ -3478,11 +3486,12 @@ func (q *Kernel) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Rectangle,
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
pr += float64(pru) * xWeights[kx-ix] * yWeight
pg += float64(pgu) * xWeights[kx-ix] * yWeight
pb += float64(pbu) * xWeights[kx-ix] * yWeight
pa += float64(pau) * xWeights[kx-ix] * yWeight
pr += float64(pru) * w
pg += float64(pgu) * w
pb += float64(pbu) * w
pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@ -3573,11 +3582,12 @@ func (q *Kernel) transform_Image_Image(dst Image, dr, adr image.Rectangle, d2s *
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
w := xWeights[kx-ix] * yWeight
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
pr += float64(pru) * xWeights[kx-ix] * yWeight
pg += float64(pgu) * xWeights[kx-ix] * yWeight
pb += float64(pbu) * xWeights[kx-ix] * yWeight
pa += float64(pau) * xWeights[kx-ix] * yWeight
pr += float64(pru) * w
pg += float64(pgu) * w
pb += float64(pbu) * w
pa += float64(pau) * w
}
}
dstColorRGBA64.R = fffftou(pr)