From 500a27f9124b984b776e4066e28c220b48ce1413 Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Tue, 24 Mar 2015 20:16:40 +1100 Subject: [PATCH] draw: optimize some multiply-by-zeroes in Kernel.Transform. benchmark old ns/op new ns/op delta BenchmarkTformCRSrcGray 5096041 4820642 -5.40% BenchmarkTformCRSrcNRGBA 10476578 8414331 -19.68% BenchmarkTformCRSrcRGBA 10361135 7954413 -23.23% BenchmarkTformCRSrcYCbCr 11952218 9824899 -17.80% Change-Id: I8b4cfe68ecae85e447ae65ceecf185261445a8a2 Reviewed-on: https://go-review.googlesource.com/7991 Reviewed-by: Rob Pike --- draw/gen.go | 10 +- draw/impl.go | 374 +++++++++++++++++++++++++++------------------------ 2 files changed, 202 insertions(+), 182 deletions(-) diff --git a/draw/gen.go b/draw/gen.go index b670521..3ff3d84 100644 --- a/draw/gen.go +++ b/draw/gen.go @@ -1131,10 +1131,12 @@ const ( var pr, pg, pb, pa float64 $tweakVarP for ky := iy; ky < jy; ky++ { - yWeight := yWeights[ky - iy] - for kx := ix; kx < jx; kx++ { - w := xWeights[kx - ix] * yWeight - p += $srcf[kx, ky] * w + if yWeight := yWeights[ky - iy]; yWeight != 0 { + for kx := ix; kx < jx; kx++ { + if w := xWeights[kx - ix] * yWeight; w != 0 { + p += $srcf[kx, ky] * w + } + } } } $outputf[dr.Min.X + int(dx), dr.Min.Y + int(dy), fffftou, p, 1] diff --git a/draw/impl.go b/draw/impl.go index 9b4ca07..f144531 100644 --- a/draw/impl.go +++ b/draw/impl.go @@ -3706,12 +3706,14 @@ func (q *Kernel) transform_RGBA_Gray(dst *image.RGBA, dr, adr image.Rectangle, d var pr float64 for ky := iy; ky < jy; ky++ { - yWeight := yWeights[ky-iy] - for kx := ix; kx < jx; kx++ { - w := xWeights[kx-ix] * yWeight - pi := (ky-src.Rect.Min.Y)*src.Stride + (kx - src.Rect.Min.X) - pru := uint32(src.Pix[pi]) * 0x101 - pr += float64(pru) * w + if yWeight := yWeights[ky-iy]; yWeight != 0 { + for kx := ix; kx < jx; kx++ { + if w := xWeights[kx-ix] * yWeight; w != 0 { + pi := (ky-src.Rect.Min.Y)*src.Stride + (kx - src.Rect.Min.X) + pru := uint32(src.Pix[pi]) * 0x101 + pr += float64(pru) * w + } + } } } out := uint8(fffftou(pr) >> 8) @@ -3803,18 +3805,20 @@ func (q *Kernel) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Rectangle, var pr, pg, pb, pa float64 for ky := iy; ky < jy; ky++ { - yWeight := yWeights[ky-iy] - for kx := ix; kx < jx; kx++ { - w := xWeights[kx-ix] * yWeight - pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4 - pau := uint32(src.Pix[pi+3]) * 0x101 - pru := uint32(src.Pix[pi+0]) * pau / 0xff - pgu := uint32(src.Pix[pi+1]) * pau / 0xff - pbu := uint32(src.Pix[pi+2]) * pau / 0xff - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w - pa += float64(pau) * w + if yWeight := yWeights[ky-iy]; yWeight != 0 { + for kx := ix; kx < jx; kx++ { + if w := xWeights[kx-ix] * yWeight; w != 0 { + pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4 + pau := uint32(src.Pix[pi+3]) * 0x101 + pru := uint32(src.Pix[pi+0]) * pau / 0xff + pgu := uint32(src.Pix[pi+1]) * pau / 0xff + pbu := uint32(src.Pix[pi+2]) * pau / 0xff + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + pa += float64(pau) * w + } + } } } dst.Pix[d+0] = uint8(fffftou(pr) >> 8) @@ -3905,18 +3909,20 @@ func (q *Kernel) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectangle, d var pr, pg, pb, pa float64 for ky := iy; ky < jy; ky++ { - yWeight := yWeights[ky-iy] - for kx := ix; kx < jx; kx++ { - w := xWeights[kx-ix] * yWeight - pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4 - pru := uint32(src.Pix[pi+0]) * 0x101 - pgu := uint32(src.Pix[pi+1]) * 0x101 - pbu := uint32(src.Pix[pi+2]) * 0x101 - pau := uint32(src.Pix[pi+3]) * 0x101 - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w - pa += float64(pau) * w + if yWeight := yWeights[ky-iy]; yWeight != 0 { + for kx := ix; kx < jx; kx++ { + if w := xWeights[kx-ix] * yWeight; w != 0 { + pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4 + pru := uint32(src.Pix[pi+0]) * 0x101 + pgu := uint32(src.Pix[pi+1]) * 0x101 + pbu := uint32(src.Pix[pi+2]) * 0x101 + pau := uint32(src.Pix[pi+3]) * 0x101 + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + pa += float64(pau) * w + } + } } } dst.Pix[d+0] = uint8(fffftou(pr) >> 8) @@ -4007,41 +4013,43 @@ func (q *Kernel) transform_RGBA_YCbCr444(dst *image.RGBA, dr, adr image.Rectangl var pr, pg, pb float64 for ky := iy; ky < jy; ky++ { - yWeight := yWeights[ky-iy] - for kx := ix; kx < jx; kx++ { - w := xWeights[kx-ix] * yWeight - pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X) - pj := (ky-src.Rect.Min.Y)*src.CStride + (kx - src.Rect.Min.X) + if yWeight := yWeights[ky-iy]; yWeight != 0 { + for kx := ix; kx < jx; kx++ { + if w := xWeights[kx-ix] * yWeight; w != 0 { + pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X) + pj := (ky-src.Rect.Min.Y)*src.CStride + (kx - src.Rect.Min.X) - // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. - pyy1 := int(src.Y[pi])<<16 + 1<<15 - pcb1 := int(src.Cb[pj]) - 128 - pcr1 := int(src.Cr[pj]) - 128 - pr8 := (pyy1 + 91881*pcr1) >> 16 - pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16 - pb8 := (pyy1 + 116130*pcb1) >> 16 - if pr8 < 0 { - pr8 = 0 - } else if pr8 > 0xff { - pr8 = 0xff - } - if pg8 < 0 { - pg8 = 0 - } else if pg8 > 0xff { - pg8 = 0xff - } - if pb8 < 0 { - pb8 = 0 - } else if pb8 > 0xff { - pb8 = 0xff - } + // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. + pyy1 := int(src.Y[pi])<<16 + 1<<15 + pcb1 := int(src.Cb[pj]) - 128 + pcr1 := int(src.Cr[pj]) - 128 + pr8 := (pyy1 + 91881*pcr1) >> 16 + pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16 + pb8 := (pyy1 + 116130*pcb1) >> 16 + if pr8 < 0 { + pr8 = 0 + } else if pr8 > 0xff { + pr8 = 0xff + } + if pg8 < 0 { + pg8 = 0 + } else if pg8 > 0xff { + pg8 = 0xff + } + if pb8 < 0 { + pb8 = 0 + } else if pb8 > 0xff { + pb8 = 0xff + } - pru := uint32(pr8) * 0x101 - pgu := uint32(pg8) * 0x101 - pbu := uint32(pb8) * 0x101 - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w + pru := uint32(pr8) * 0x101 + pgu := uint32(pg8) * 0x101 + pbu := uint32(pb8) * 0x101 + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + } + } } } dst.Pix[d+0] = uint8(fffftou(pr) >> 8) @@ -4132,41 +4140,43 @@ func (q *Kernel) transform_RGBA_YCbCr422(dst *image.RGBA, dr, adr image.Rectangl var pr, pg, pb float64 for ky := iy; ky < jy; ky++ { - yWeight := yWeights[ky-iy] - for kx := ix; kx < jx; kx++ { - w := xWeights[kx-ix] * yWeight - pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X) - pj := (ky-src.Rect.Min.Y)*src.CStride + ((kx)/2 - src.Rect.Min.X/2) + if yWeight := yWeights[ky-iy]; yWeight != 0 { + for kx := ix; kx < jx; kx++ { + if w := xWeights[kx-ix] * yWeight; w != 0 { + pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X) + pj := (ky-src.Rect.Min.Y)*src.CStride + ((kx)/2 - src.Rect.Min.X/2) - // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. - pyy1 := int(src.Y[pi])<<16 + 1<<15 - pcb1 := int(src.Cb[pj]) - 128 - pcr1 := int(src.Cr[pj]) - 128 - pr8 := (pyy1 + 91881*pcr1) >> 16 - pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16 - pb8 := (pyy1 + 116130*pcb1) >> 16 - if pr8 < 0 { - pr8 = 0 - } else if pr8 > 0xff { - pr8 = 0xff - } - if pg8 < 0 { - pg8 = 0 - } else if pg8 > 0xff { - pg8 = 0xff - } - if pb8 < 0 { - pb8 = 0 - } else if pb8 > 0xff { - pb8 = 0xff - } + // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. + pyy1 := int(src.Y[pi])<<16 + 1<<15 + pcb1 := int(src.Cb[pj]) - 128 + pcr1 := int(src.Cr[pj]) - 128 + pr8 := (pyy1 + 91881*pcr1) >> 16 + pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16 + pb8 := (pyy1 + 116130*pcb1) >> 16 + if pr8 < 0 { + pr8 = 0 + } else if pr8 > 0xff { + pr8 = 0xff + } + if pg8 < 0 { + pg8 = 0 + } else if pg8 > 0xff { + pg8 = 0xff + } + if pb8 < 0 { + pb8 = 0 + } else if pb8 > 0xff { + pb8 = 0xff + } - pru := uint32(pr8) * 0x101 - pgu := uint32(pg8) * 0x101 - pbu := uint32(pb8) * 0x101 - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w + pru := uint32(pr8) * 0x101 + pgu := uint32(pg8) * 0x101 + pbu := uint32(pb8) * 0x101 + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + } + } } } dst.Pix[d+0] = uint8(fffftou(pr) >> 8) @@ -4257,41 +4267,43 @@ func (q *Kernel) transform_RGBA_YCbCr420(dst *image.RGBA, dr, adr image.Rectangl var pr, pg, pb float64 for ky := iy; ky < jy; ky++ { - yWeight := yWeights[ky-iy] - for kx := ix; kx < jx; kx++ { - w := xWeights[kx-ix] * yWeight - pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X) - pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + ((kx)/2 - src.Rect.Min.X/2) + if yWeight := yWeights[ky-iy]; yWeight != 0 { + for kx := ix; kx < jx; kx++ { + if w := xWeights[kx-ix] * yWeight; w != 0 { + pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X) + pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + ((kx)/2 - src.Rect.Min.X/2) - // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. - pyy1 := int(src.Y[pi])<<16 + 1<<15 - pcb1 := int(src.Cb[pj]) - 128 - pcr1 := int(src.Cr[pj]) - 128 - pr8 := (pyy1 + 91881*pcr1) >> 16 - pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16 - pb8 := (pyy1 + 116130*pcb1) >> 16 - if pr8 < 0 { - pr8 = 0 - } else if pr8 > 0xff { - pr8 = 0xff - } - if pg8 < 0 { - pg8 = 0 - } else if pg8 > 0xff { - pg8 = 0xff - } - if pb8 < 0 { - pb8 = 0 - } else if pb8 > 0xff { - pb8 = 0xff - } + // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. + pyy1 := int(src.Y[pi])<<16 + 1<<15 + pcb1 := int(src.Cb[pj]) - 128 + pcr1 := int(src.Cr[pj]) - 128 + pr8 := (pyy1 + 91881*pcr1) >> 16 + pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16 + pb8 := (pyy1 + 116130*pcb1) >> 16 + if pr8 < 0 { + pr8 = 0 + } else if pr8 > 0xff { + pr8 = 0xff + } + if pg8 < 0 { + pg8 = 0 + } else if pg8 > 0xff { + pg8 = 0xff + } + if pb8 < 0 { + pb8 = 0 + } else if pb8 > 0xff { + pb8 = 0xff + } - pru := uint32(pr8) * 0x101 - pgu := uint32(pg8) * 0x101 - pbu := uint32(pb8) * 0x101 - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w + pru := uint32(pr8) * 0x101 + pgu := uint32(pg8) * 0x101 + pbu := uint32(pb8) * 0x101 + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + } + } } } dst.Pix[d+0] = uint8(fffftou(pr) >> 8) @@ -4382,41 +4394,43 @@ func (q *Kernel) transform_RGBA_YCbCr440(dst *image.RGBA, dr, adr image.Rectangl var pr, pg, pb float64 for ky := iy; ky < jy; ky++ { - yWeight := yWeights[ky-iy] - for kx := ix; kx < jx; kx++ { - w := xWeights[kx-ix] * yWeight - pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X) - pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + (kx - src.Rect.Min.X) + if yWeight := yWeights[ky-iy]; yWeight != 0 { + for kx := ix; kx < jx; kx++ { + if w := xWeights[kx-ix] * yWeight; w != 0 { + pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X) + pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + (kx - src.Rect.Min.X) - // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. - pyy1 := int(src.Y[pi])<<16 + 1<<15 - pcb1 := int(src.Cb[pj]) - 128 - pcr1 := int(src.Cr[pj]) - 128 - pr8 := (pyy1 + 91881*pcr1) >> 16 - pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16 - pb8 := (pyy1 + 116130*pcb1) >> 16 - if pr8 < 0 { - pr8 = 0 - } else if pr8 > 0xff { - pr8 = 0xff - } - if pg8 < 0 { - pg8 = 0 - } else if pg8 > 0xff { - pg8 = 0xff - } - if pb8 < 0 { - pb8 = 0 - } else if pb8 > 0xff { - pb8 = 0xff - } + // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB. + pyy1 := int(src.Y[pi])<<16 + 1<<15 + pcb1 := int(src.Cb[pj]) - 128 + pcr1 := int(src.Cr[pj]) - 128 + pr8 := (pyy1 + 91881*pcr1) >> 16 + pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16 + pb8 := (pyy1 + 116130*pcb1) >> 16 + if pr8 < 0 { + pr8 = 0 + } else if pr8 > 0xff { + pr8 = 0xff + } + if pg8 < 0 { + pg8 = 0 + } else if pg8 > 0xff { + pg8 = 0xff + } + if pb8 < 0 { + pb8 = 0 + } else if pb8 > 0xff { + pb8 = 0xff + } - pru := uint32(pr8) * 0x101 - pgu := uint32(pg8) * 0x101 - pbu := uint32(pb8) * 0x101 - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w + pru := uint32(pr8) * 0x101 + pgu := uint32(pg8) * 0x101 + pbu := uint32(pb8) * 0x101 + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + } + } } } dst.Pix[d+0] = uint8(fffftou(pr) >> 8) @@ -4507,14 +4521,16 @@ func (q *Kernel) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Rectangle, var pr, pg, pb, pa float64 for ky := iy; ky < jy; ky++ { - yWeight := yWeights[ky-iy] - for kx := ix; kx < jx; kx++ { - w := xWeights[kx-ix] * yWeight - pru, pgu, pbu, pau := src.At(kx, ky).RGBA() - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w - pa += float64(pau) * w + if yWeight := yWeights[ky-iy]; yWeight != 0 { + for kx := ix; kx < jx; kx++ { + if w := xWeights[kx-ix] * yWeight; w != 0 { + pru, pgu, pbu, pau := src.At(kx, ky).RGBA() + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + pa += float64(pau) * w + } + } } } dst.Pix[d+0] = uint8(fffftou(pr) >> 8) @@ -4606,14 +4622,16 @@ func (q *Kernel) transform_Image_Image(dst Image, dr, adr image.Rectangle, d2s * var pr, pg, pb, pa float64 for ky := iy; ky < jy; ky++ { - yWeight := yWeights[ky-iy] - for kx := ix; kx < jx; kx++ { - w := xWeights[kx-ix] * yWeight - pru, pgu, pbu, pau := src.At(kx, ky).RGBA() - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w - pa += float64(pau) * w + if yWeight := yWeights[ky-iy]; yWeight != 0 { + for kx := ix; kx < jx; kx++ { + if w := xWeights[kx-ix] * yWeight; w != 0 { + pru, pgu, pbu, pau := src.At(kx, ky).RGBA() + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + pa += float64(pau) * w + } + } } } dstColorRGBA64.R = fffftou(pr)