draw: optimize some multiply-by-zeroes in Kernel.Transform.
benchmark old ns/op new ns/op delta BenchmarkTformCRSrcGray 5096041 4820642 -5.40% BenchmarkTformCRSrcNRGBA 10476578 8414331 -19.68% BenchmarkTformCRSrcRGBA 10361135 7954413 -23.23% BenchmarkTformCRSrcYCbCr 11952218 9824899 -17.80% Change-Id: I8b4cfe68ecae85e447ae65ceecf185261445a8a2 Reviewed-on: https://go-review.googlesource.com/7991 Reviewed-by: Rob Pike <r@golang.org>
This commit is contained in:
parent
575b100276
commit
500a27f912
|
@ -1131,12 +1131,14 @@ const (
|
||||||
|
|
||||||
var pr, pg, pb, pa float64 $tweakVarP
|
var pr, pg, pb, pa float64 $tweakVarP
|
||||||
for ky := iy; ky < jy; ky++ {
|
for ky := iy; ky < jy; ky++ {
|
||||||
yWeight := yWeights[ky - iy]
|
if yWeight := yWeights[ky - iy]; yWeight != 0 {
|
||||||
for kx := ix; kx < jx; kx++ {
|
for kx := ix; kx < jx; kx++ {
|
||||||
w := xWeights[kx - ix] * yWeight
|
if w := xWeights[kx - ix] * yWeight; w != 0 {
|
||||||
p += $srcf[kx, ky] * w
|
p += $srcf[kx, ky] * w
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
$outputf[dr.Min.X + int(dx), dr.Min.Y + int(dy), fffftou, p, 1]
|
$outputf[dr.Min.X + int(dx), dr.Min.Y + int(dy), fffftou, p, 1]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
54
draw/impl.go
54
draw/impl.go
|
@ -3706,14 +3706,16 @@ func (q *Kernel) transform_RGBA_Gray(dst *image.RGBA, dr, adr image.Rectangle, d
|
||||||
|
|
||||||
var pr float64
|
var pr float64
|
||||||
for ky := iy; ky < jy; ky++ {
|
for ky := iy; ky < jy; ky++ {
|
||||||
yWeight := yWeights[ky-iy]
|
if yWeight := yWeights[ky-iy]; yWeight != 0 {
|
||||||
for kx := ix; kx < jx; kx++ {
|
for kx := ix; kx < jx; kx++ {
|
||||||
w := xWeights[kx-ix] * yWeight
|
if w := xWeights[kx-ix] * yWeight; w != 0 {
|
||||||
pi := (ky-src.Rect.Min.Y)*src.Stride + (kx - src.Rect.Min.X)
|
pi := (ky-src.Rect.Min.Y)*src.Stride + (kx - src.Rect.Min.X)
|
||||||
pru := uint32(src.Pix[pi]) * 0x101
|
pru := uint32(src.Pix[pi]) * 0x101
|
||||||
pr += float64(pru) * w
|
pr += float64(pru) * w
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
out := uint8(fffftou(pr) >> 8)
|
out := uint8(fffftou(pr) >> 8)
|
||||||
dst.Pix[d+0] = out
|
dst.Pix[d+0] = out
|
||||||
dst.Pix[d+1] = out
|
dst.Pix[d+1] = out
|
||||||
|
@ -3803,9 +3805,9 @@ func (q *Kernel) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Rectangle,
|
||||||
|
|
||||||
var pr, pg, pb, pa float64
|
var pr, pg, pb, pa float64
|
||||||
for ky := iy; ky < jy; ky++ {
|
for ky := iy; ky < jy; ky++ {
|
||||||
yWeight := yWeights[ky-iy]
|
if yWeight := yWeights[ky-iy]; yWeight != 0 {
|
||||||
for kx := ix; kx < jx; kx++ {
|
for kx := ix; kx < jx; kx++ {
|
||||||
w := xWeights[kx-ix] * yWeight
|
if w := xWeights[kx-ix] * yWeight; w != 0 {
|
||||||
pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4
|
pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4
|
||||||
pau := uint32(src.Pix[pi+3]) * 0x101
|
pau := uint32(src.Pix[pi+3]) * 0x101
|
||||||
pru := uint32(src.Pix[pi+0]) * pau / 0xff
|
pru := uint32(src.Pix[pi+0]) * pau / 0xff
|
||||||
|
@ -3817,6 +3819,8 @@ func (q *Kernel) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Rectangle,
|
||||||
pa += float64(pau) * w
|
pa += float64(pau) * w
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
|
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
|
||||||
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
|
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
|
||||||
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
|
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
|
||||||
|
@ -3905,9 +3909,9 @@ func (q *Kernel) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectangle, d
|
||||||
|
|
||||||
var pr, pg, pb, pa float64
|
var pr, pg, pb, pa float64
|
||||||
for ky := iy; ky < jy; ky++ {
|
for ky := iy; ky < jy; ky++ {
|
||||||
yWeight := yWeights[ky-iy]
|
if yWeight := yWeights[ky-iy]; yWeight != 0 {
|
||||||
for kx := ix; kx < jx; kx++ {
|
for kx := ix; kx < jx; kx++ {
|
||||||
w := xWeights[kx-ix] * yWeight
|
if w := xWeights[kx-ix] * yWeight; w != 0 {
|
||||||
pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4
|
pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4
|
||||||
pru := uint32(src.Pix[pi+0]) * 0x101
|
pru := uint32(src.Pix[pi+0]) * 0x101
|
||||||
pgu := uint32(src.Pix[pi+1]) * 0x101
|
pgu := uint32(src.Pix[pi+1]) * 0x101
|
||||||
|
@ -3919,6 +3923,8 @@ func (q *Kernel) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectangle, d
|
||||||
pa += float64(pau) * w
|
pa += float64(pau) * w
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
|
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
|
||||||
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
|
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
|
||||||
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
|
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
|
||||||
|
@ -4007,9 +4013,9 @@ func (q *Kernel) transform_RGBA_YCbCr444(dst *image.RGBA, dr, adr image.Rectangl
|
||||||
|
|
||||||
var pr, pg, pb float64
|
var pr, pg, pb float64
|
||||||
for ky := iy; ky < jy; ky++ {
|
for ky := iy; ky < jy; ky++ {
|
||||||
yWeight := yWeights[ky-iy]
|
if yWeight := yWeights[ky-iy]; yWeight != 0 {
|
||||||
for kx := ix; kx < jx; kx++ {
|
for kx := ix; kx < jx; kx++ {
|
||||||
w := xWeights[kx-ix] * yWeight
|
if w := xWeights[kx-ix] * yWeight; w != 0 {
|
||||||
pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
|
pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
|
||||||
pj := (ky-src.Rect.Min.Y)*src.CStride + (kx - src.Rect.Min.X)
|
pj := (ky-src.Rect.Min.Y)*src.CStride + (kx - src.Rect.Min.X)
|
||||||
|
|
||||||
|
@ -4044,6 +4050,8 @@ func (q *Kernel) transform_RGBA_YCbCr444(dst *image.RGBA, dr, adr image.Rectangl
|
||||||
pb += float64(pbu) * w
|
pb += float64(pbu) * w
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
|
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
|
||||||
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
|
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
|
||||||
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
|
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
|
||||||
|
@ -4132,9 +4140,9 @@ func (q *Kernel) transform_RGBA_YCbCr422(dst *image.RGBA, dr, adr image.Rectangl
|
||||||
|
|
||||||
var pr, pg, pb float64
|
var pr, pg, pb float64
|
||||||
for ky := iy; ky < jy; ky++ {
|
for ky := iy; ky < jy; ky++ {
|
||||||
yWeight := yWeights[ky-iy]
|
if yWeight := yWeights[ky-iy]; yWeight != 0 {
|
||||||
for kx := ix; kx < jx; kx++ {
|
for kx := ix; kx < jx; kx++ {
|
||||||
w := xWeights[kx-ix] * yWeight
|
if w := xWeights[kx-ix] * yWeight; w != 0 {
|
||||||
pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
|
pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
|
||||||
pj := (ky-src.Rect.Min.Y)*src.CStride + ((kx)/2 - src.Rect.Min.X/2)
|
pj := (ky-src.Rect.Min.Y)*src.CStride + ((kx)/2 - src.Rect.Min.X/2)
|
||||||
|
|
||||||
|
@ -4169,6 +4177,8 @@ func (q *Kernel) transform_RGBA_YCbCr422(dst *image.RGBA, dr, adr image.Rectangl
|
||||||
pb += float64(pbu) * w
|
pb += float64(pbu) * w
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
|
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
|
||||||
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
|
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
|
||||||
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
|
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
|
||||||
|
@ -4257,9 +4267,9 @@ func (q *Kernel) transform_RGBA_YCbCr420(dst *image.RGBA, dr, adr image.Rectangl
|
||||||
|
|
||||||
var pr, pg, pb float64
|
var pr, pg, pb float64
|
||||||
for ky := iy; ky < jy; ky++ {
|
for ky := iy; ky < jy; ky++ {
|
||||||
yWeight := yWeights[ky-iy]
|
if yWeight := yWeights[ky-iy]; yWeight != 0 {
|
||||||
for kx := ix; kx < jx; kx++ {
|
for kx := ix; kx < jx; kx++ {
|
||||||
w := xWeights[kx-ix] * yWeight
|
if w := xWeights[kx-ix] * yWeight; w != 0 {
|
||||||
pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
|
pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
|
||||||
pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + ((kx)/2 - src.Rect.Min.X/2)
|
pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + ((kx)/2 - src.Rect.Min.X/2)
|
||||||
|
|
||||||
|
@ -4294,6 +4304,8 @@ func (q *Kernel) transform_RGBA_YCbCr420(dst *image.RGBA, dr, adr image.Rectangl
|
||||||
pb += float64(pbu) * w
|
pb += float64(pbu) * w
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
|
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
|
||||||
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
|
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
|
||||||
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
|
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
|
||||||
|
@ -4382,9 +4394,9 @@ func (q *Kernel) transform_RGBA_YCbCr440(dst *image.RGBA, dr, adr image.Rectangl
|
||||||
|
|
||||||
var pr, pg, pb float64
|
var pr, pg, pb float64
|
||||||
for ky := iy; ky < jy; ky++ {
|
for ky := iy; ky < jy; ky++ {
|
||||||
yWeight := yWeights[ky-iy]
|
if yWeight := yWeights[ky-iy]; yWeight != 0 {
|
||||||
for kx := ix; kx < jx; kx++ {
|
for kx := ix; kx < jx; kx++ {
|
||||||
w := xWeights[kx-ix] * yWeight
|
if w := xWeights[kx-ix] * yWeight; w != 0 {
|
||||||
pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
|
pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
|
||||||
pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + (kx - src.Rect.Min.X)
|
pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + (kx - src.Rect.Min.X)
|
||||||
|
|
||||||
|
@ -4419,6 +4431,8 @@ func (q *Kernel) transform_RGBA_YCbCr440(dst *image.RGBA, dr, adr image.Rectangl
|
||||||
pb += float64(pbu) * w
|
pb += float64(pbu) * w
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
|
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
|
||||||
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
|
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
|
||||||
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
|
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
|
||||||
|
@ -4507,9 +4521,9 @@ func (q *Kernel) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Rectangle,
|
||||||
|
|
||||||
var pr, pg, pb, pa float64
|
var pr, pg, pb, pa float64
|
||||||
for ky := iy; ky < jy; ky++ {
|
for ky := iy; ky < jy; ky++ {
|
||||||
yWeight := yWeights[ky-iy]
|
if yWeight := yWeights[ky-iy]; yWeight != 0 {
|
||||||
for kx := ix; kx < jx; kx++ {
|
for kx := ix; kx < jx; kx++ {
|
||||||
w := xWeights[kx-ix] * yWeight
|
if w := xWeights[kx-ix] * yWeight; w != 0 {
|
||||||
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
|
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
|
||||||
pr += float64(pru) * w
|
pr += float64(pru) * w
|
||||||
pg += float64(pgu) * w
|
pg += float64(pgu) * w
|
||||||
|
@ -4517,6 +4531,8 @@ func (q *Kernel) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Rectangle,
|
||||||
pa += float64(pau) * w
|
pa += float64(pau) * w
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
|
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
|
||||||
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
|
dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
|
||||||
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
|
dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
|
||||||
|
@ -4606,9 +4622,9 @@ func (q *Kernel) transform_Image_Image(dst Image, dr, adr image.Rectangle, d2s *
|
||||||
|
|
||||||
var pr, pg, pb, pa float64
|
var pr, pg, pb, pa float64
|
||||||
for ky := iy; ky < jy; ky++ {
|
for ky := iy; ky < jy; ky++ {
|
||||||
yWeight := yWeights[ky-iy]
|
if yWeight := yWeights[ky-iy]; yWeight != 0 {
|
||||||
for kx := ix; kx < jx; kx++ {
|
for kx := ix; kx < jx; kx++ {
|
||||||
w := xWeights[kx-ix] * yWeight
|
if w := xWeights[kx-ix] * yWeight; w != 0 {
|
||||||
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
|
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
|
||||||
pr += float64(pru) * w
|
pr += float64(pru) * w
|
||||||
pg += float64(pgu) * w
|
pg += float64(pgu) * w
|
||||||
|
@ -4616,6 +4632,8 @@ func (q *Kernel) transform_Image_Image(dst Image, dr, adr image.Rectangle, d2s *
|
||||||
pa += float64(pau) * w
|
pa += float64(pau) * w
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
dstColorRGBA64.R = fffftou(pr)
|
dstColorRGBA64.R = fffftou(pr)
|
||||||
dstColorRGBA64.G = fffftou(pg)
|
dstColorRGBA64.G = fffftou(pg)
|
||||||
dstColorRGBA64.B = fffftou(pb)
|
dstColorRGBA64.B = fffftou(pb)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user