From c62001d39ba355ad24588ecae27cfce07399a74a Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Thu, 19 Mar 2015 20:41:38 +1100 Subject: [PATCH] draw: optimize Kernel.Transform. benchmark old ns/op new ns/op delta BenchmarkTformCRSrcGray 6111610 5344117 -12.56% BenchmarkTformCRSrcNRGBA 62070281 59295178 -4.47% BenchmarkTformCRSrcRGBA 13840290 10612547 -23.32% BenchmarkTformCRSrcUniform 591637 587621 -0.68% BenchmarkTformCRSrcYCbCr 72219184 69404747 -3.90% As of current origin/master, Gray and RGBA have fast paths but the other src image types do not. They have more fat, so the relative improvement is smaller. Change-Id: Ibbae91cd3cb3c139efb1dcc8fda1cb6432505189 Reviewed-on: https://go-review.googlesource.com/7794 Reviewed-by: Rob Pike --- draw/gen.go | 3 +- draw/impl.go | 84 +++++++++++++++++++++++++++++----------------------- 2 files changed, 49 insertions(+), 38 deletions(-) diff --git a/draw/gen.go b/draw/gen.go index 4048e19..79ef527 100644 --- a/draw/gen.go +++ b/draw/gen.go @@ -958,7 +958,8 @@ const ( for ky := iy; ky < jy; ky++ { yWeight := yWeights[ky - iy] for kx := ix; kx < jx; kx++ { - p += $srcf[kx, ky] * xWeights[kx - ix] * yWeight + w := xWeights[kx - ix] * yWeight + p += $srcf[kx, ky] * w } } $outputf[dr.Min.X + int(dx), dr.Min.Y + int(dy), fffftou, p, 1] diff --git a/draw/impl.go b/draw/impl.go index 06b8432..89e7cd0 100644 --- a/draw/impl.go +++ b/draw/impl.go @@ -2719,9 +2719,10 @@ func (q *Kernel) transform_RGBA_Gray(dst *image.RGBA, dr, adr image.Rectangle, d for ky := iy; ky < jy; ky++ { yWeight := yWeights[ky-iy] for kx := ix; kx < jx; kx++ { + w := xWeights[kx-ix] * yWeight pi := src.PixOffset(kx, ky) pru := uint32(src.Pix[pi]) * 0x101 - pr += float64(pru) * xWeights[kx-ix] * yWeight + pr += float64(pru) * w } } out := uint8(fffftou(pr) >> 8) @@ -2812,15 +2813,16 @@ func (q *Kernel) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Rectangle, for ky := iy; ky < jy; ky++ { yWeight := yWeights[ky-iy] for kx := ix; kx < jx; kx++ { + w := xWeights[kx-ix] * yWeight pi := src.PixOffset(kx, ky) pau := uint32(src.Pix[pi+3]) * 0x101 pru := uint32(src.Pix[pi+0]) * pau / 0xff pgu := uint32(src.Pix[pi+1]) * pau / 0xff pbu := uint32(src.Pix[pi+2]) * pau / 0xff - pr += float64(pru) * xWeights[kx-ix] * yWeight - pg += float64(pgu) * xWeights[kx-ix] * yWeight - pb += float64(pbu) * xWeights[kx-ix] * yWeight - pa += float64(pau) * xWeights[kx-ix] * yWeight + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + pa += float64(pau) * w } } dst.Pix[d+0] = uint8(fffftou(pr) >> 8) @@ -2910,15 +2912,16 @@ func (q *Kernel) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectangle, d for ky := iy; ky < jy; ky++ { yWeight := yWeights[ky-iy] for kx := ix; kx < jx; kx++ { + w := xWeights[kx-ix] * yWeight pi := src.PixOffset(kx, ky) pru := uint32(src.Pix[pi+0]) * 0x101 pgu := uint32(src.Pix[pi+1]) * 0x101 pbu := uint32(src.Pix[pi+2]) * 0x101 pau := uint32(src.Pix[pi+3]) * 0x101 - pr += float64(pru) * xWeights[kx-ix] * yWeight - pg += float64(pgu) * xWeights[kx-ix] * yWeight - pb += float64(pbu) * xWeights[kx-ix] * yWeight - pa += float64(pau) * xWeights[kx-ix] * yWeight + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + pa += float64(pau) * w } } dst.Pix[d+0] = uint8(fffftou(pr) >> 8) @@ -3008,11 +3011,12 @@ func (q *Kernel) transform_RGBA_Uniform(dst *image.RGBA, dr, adr image.Rectangle for ky := iy; ky < jy; ky++ { yWeight := yWeights[ky-iy] for kx := ix; kx < jx; kx++ { + w := xWeights[kx-ix] * yWeight pru, pgu, pbu, pau := src.At(kx, ky).RGBA() - pr += float64(pru) * xWeights[kx-ix] * yWeight - pg += float64(pgu) * xWeights[kx-ix] * yWeight - pb += float64(pbu) * xWeights[kx-ix] * yWeight - pa += float64(pau) * xWeights[kx-ix] * yWeight + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + pa += float64(pau) * w } } dst.Pix[d+0] = uint8(fffftou(pr) >> 8) @@ -3102,11 +3106,12 @@ func (q *Kernel) transform_RGBA_YCbCr444(dst *image.RGBA, dr, adr image.Rectangl for ky := iy; ky < jy; ky++ { yWeight := yWeights[ky-iy] for kx := ix; kx < jx; kx++ { + w := xWeights[kx-ix] * yWeight pru, pgu, pbu, pau := src.At(kx, ky).RGBA() - pr += float64(pru) * xWeights[kx-ix] * yWeight - pg += float64(pgu) * xWeights[kx-ix] * yWeight - pb += float64(pbu) * xWeights[kx-ix] * yWeight - pa += float64(pau) * xWeights[kx-ix] * yWeight + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + pa += float64(pau) * w } } dst.Pix[d+0] = uint8(fffftou(pr) >> 8) @@ -3196,11 +3201,12 @@ func (q *Kernel) transform_RGBA_YCbCr422(dst *image.RGBA, dr, adr image.Rectangl for ky := iy; ky < jy; ky++ { yWeight := yWeights[ky-iy] for kx := ix; kx < jx; kx++ { + w := xWeights[kx-ix] * yWeight pru, pgu, pbu, pau := src.At(kx, ky).RGBA() - pr += float64(pru) * xWeights[kx-ix] * yWeight - pg += float64(pgu) * xWeights[kx-ix] * yWeight - pb += float64(pbu) * xWeights[kx-ix] * yWeight - pa += float64(pau) * xWeights[kx-ix] * yWeight + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + pa += float64(pau) * w } } dst.Pix[d+0] = uint8(fffftou(pr) >> 8) @@ -3290,11 +3296,12 @@ func (q *Kernel) transform_RGBA_YCbCr420(dst *image.RGBA, dr, adr image.Rectangl for ky := iy; ky < jy; ky++ { yWeight := yWeights[ky-iy] for kx := ix; kx < jx; kx++ { + w := xWeights[kx-ix] * yWeight pru, pgu, pbu, pau := src.At(kx, ky).RGBA() - pr += float64(pru) * xWeights[kx-ix] * yWeight - pg += float64(pgu) * xWeights[kx-ix] * yWeight - pb += float64(pbu) * xWeights[kx-ix] * yWeight - pa += float64(pau) * xWeights[kx-ix] * yWeight + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + pa += float64(pau) * w } } dst.Pix[d+0] = uint8(fffftou(pr) >> 8) @@ -3384,11 +3391,12 @@ func (q *Kernel) transform_RGBA_YCbCr440(dst *image.RGBA, dr, adr image.Rectangl for ky := iy; ky < jy; ky++ { yWeight := yWeights[ky-iy] for kx := ix; kx < jx; kx++ { + w := xWeights[kx-ix] * yWeight pru, pgu, pbu, pau := src.At(kx, ky).RGBA() - pr += float64(pru) * xWeights[kx-ix] * yWeight - pg += float64(pgu) * xWeights[kx-ix] * yWeight - pb += float64(pbu) * xWeights[kx-ix] * yWeight - pa += float64(pau) * xWeights[kx-ix] * yWeight + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + pa += float64(pau) * w } } dst.Pix[d+0] = uint8(fffftou(pr) >> 8) @@ -3478,11 +3486,12 @@ func (q *Kernel) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Rectangle, for ky := iy; ky < jy; ky++ { yWeight := yWeights[ky-iy] for kx := ix; kx < jx; kx++ { + w := xWeights[kx-ix] * yWeight pru, pgu, pbu, pau := src.At(kx, ky).RGBA() - pr += float64(pru) * xWeights[kx-ix] * yWeight - pg += float64(pgu) * xWeights[kx-ix] * yWeight - pb += float64(pbu) * xWeights[kx-ix] * yWeight - pa += float64(pau) * xWeights[kx-ix] * yWeight + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + pa += float64(pau) * w } } dst.Pix[d+0] = uint8(fffftou(pr) >> 8) @@ -3573,11 +3582,12 @@ func (q *Kernel) transform_Image_Image(dst Image, dr, adr image.Rectangle, d2s * for ky := iy; ky < jy; ky++ { yWeight := yWeights[ky-iy] for kx := ix; kx < jx; kx++ { + w := xWeights[kx-ix] * yWeight pru, pgu, pbu, pau := src.At(kx, ky).RGBA() - pr += float64(pru) * xWeights[kx-ix] * yWeight - pg += float64(pgu) * xWeights[kx-ix] * yWeight - pb += float64(pbu) * xWeights[kx-ix] * yWeight - pa += float64(pau) * xWeights[kx-ix] * yWeight + pr += float64(pru) * w + pg += float64(pgu) * w + pb += float64(pbu) * w + pa += float64(pau) * w } } dstColorRGBA64.R = fffftou(pr)