From c53fa16781cb10dd75d2e62c6a4db691cbde3e55 Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Fri, 27 Mar 2015 11:21:16 +1100 Subject: [PATCH] draw: use a sync.Pool for kernel scaling's temporary buffers. benchmark old ns/op new ns/op delta BenchmarkScaleBLLargeDown 257715146 260286012 +1.00% BenchmarkScaleCRLargeDown 426797448 430078734 +0.77% BenchmarkScaleBLDown 4449939 4222542 -5.11% BenchmarkScaleCRDown 8160446 8010056 -1.84% BenchmarkScaleBLUp 22290312 21044122 -5.59% BenchmarkScaleCRUp 33010722 32021468 -3.00% BenchmarkScaleCRSrcGray 13307961 13020192 -2.16% BenchmarkScaleCRSrcNRGBA 40567431 40801939 +0.58% BenchmarkScaleCRSrcRGBA 39892971 40240558 +0.87% BenchmarkScaleCRSrcYCbCr 59020222 59686699 +1.13% benchmark old allocs new allocs delta BenchmarkScaleBLLargeDown 1 1 +0.00% BenchmarkScaleCRLargeDown 1 2 +100.00% BenchmarkScaleBLDown 1 0 -100.00% BenchmarkScaleCRDown 1 0 -100.00% BenchmarkScaleBLUp 1 0 -100.00% BenchmarkScaleCRUp 1 0 -100.00% BenchmarkScaleCRSrcGray 1 0 -100.00% BenchmarkScaleCRSrcNRGBA 1 0 -100.00% BenchmarkScaleCRSrcRGBA 1 0 -100.00% BenchmarkScaleCRSrcYCbCr 1 0 -100.00% benchmark old bytes new bytes delta BenchmarkScaleBLLargeDown 14745600 2949200 -80.00% BenchmarkScaleCRLargeDown 14745600 4915333 -66.67% BenchmarkScaleBLDown 1523712 5079 -99.67% BenchmarkScaleCRDown 1523712 7619 -99.50% BenchmarkScaleBLUp 10117120 101175 -99.00% BenchmarkScaleCRUp 10117120 202350 -98.00% BenchmarkScaleCRSrcGray 4915200 49156 -99.00% BenchmarkScaleCRSrcNRGBA 4915200 163853 -96.67% BenchmarkScaleCRSrcRGBA 4915200 163853 -96.67% BenchmarkScaleCRSrcYCbCr 4915200 245780 -95.00% The increase in BenchmarkScale??LargeDown number of allocs I think is an accounting error due to the low number of iterations: a low denominator. I suspect that there are one or two extra allocs up front for using the sync.Pool, but one fewer alloc per iteration. The number of iterations is only 5 for BL and 3 for CR, for the default timeout. If I increase the -test.benchtime value to 5s, then the reported average (allocs/op) drop from 2 to 0, so the delta should actually be -100% instead of +0 or +100%. Change-Id: I21d9bb0086bdb25517b6a430e8a21bdf3db026f6 Reviewed-on: https://go-review.googlesource.com/8150 Reviewed-by: Rob Pike --- draw/gen.go | 10 ++++++++-- draw/impl.go | 10 ++++++++-- draw/scale.go | 21 +++++++++++++++++++-- draw/scale_test.go | 2 ++ 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/draw/gen.go b/draw/gen.go index 1acae2e..6efdafa 100644 --- a/draw/gen.go +++ b/draw/gen.go @@ -936,8 +936,14 @@ const ( // Create a temporary buffer: // scaleX distributes the source image's columns over the temporary image. // scaleY distributes the temporary image's rows over the destination image. - // TODO: is it worth having a sync.Pool for this temporary buffer? - tmp := make([][4]float64, z.dw*z.sh) + var tmp [][4]float64 + if z.pool.New != nil { + tmpp := z.pool.Get().(*[][4]float64) + defer z.pool.Put(tmpp) + tmp = *tmpp + } else { + tmp = z.makeTmpBuf() + } // sr is the source pixels. If it extends beyond the src bounds, // we cannot use the type-specific fast paths, as they access diff --git a/draw/impl.go b/draw/impl.go index 6278c92..a1de1cd 100644 --- a/draw/impl.go +++ b/draw/impl.go @@ -3036,8 +3036,14 @@ func (z *kernelScaler) Scale(dst Image, dr image.Rectangle, src image.Image, sr // Create a temporary buffer: // scaleX distributes the source image's columns over the temporary image. // scaleY distributes the temporary image's rows over the destination image. - // TODO: is it worth having a sync.Pool for this temporary buffer? - tmp := make([][4]float64, z.dw*z.sh) + var tmp [][4]float64 + if z.pool.New != nil { + tmpp := z.pool.Get().(*[][4]float64) + defer z.pool.Put(tmpp) + tmp = *tmpp + } else { + tmp = z.makeTmpBuf() + } // sr is the source pixels. If it extends beyond the src bounds, // we cannot use the type-specific fast paths, as they access diff --git a/draw/scale.go b/draw/scale.go index 1e95971..2e481b1 100644 --- a/draw/scale.go +++ b/draw/scale.go @@ -10,6 +10,7 @@ import ( "image" "image/color" "math" + "sync" "golang.org/x/image/math/f64" ) @@ -88,13 +89,17 @@ type Kernel struct { // Scale implements the Scaler interface. func (q *Kernel) Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) { - q.NewScaler(dr.Dx(), dr.Dy(), sr.Dx(), sr.Dy()).Scale(dst, dr, src, sr, opts) + q.newScaler(dr.Dx(), dr.Dy(), sr.Dx(), sr.Dy(), false).Scale(dst, dr, src, sr, opts) } // NewScaler returns a Scaler that is optimized for scaling multiple times with // the same fixed destination and source width and height. func (q *Kernel) NewScaler(dw, dh, sw, sh int) Scaler { - return &kernelScaler{ + return q.newScaler(dw, dh, sw, sh, true) +} + +func (q *Kernel) newScaler(dw, dh, sw, sh int, usePool bool) Scaler { + z := &kernelScaler{ kernel: q, dw: int32(dw), dh: int32(dh), @@ -103,6 +108,13 @@ func (q *Kernel) NewScaler(dw, dh, sw, sh int) Scaler { horizontal: newDistrib(q, int32(dw), int32(sw)), vertical: newDistrib(q, int32(dh), int32(sh)), } + if usePool { + z.pool.New = func() interface{} { + tmp := z.makeTmpBuf() + return &tmp + } + } + return z } var ( @@ -152,6 +164,11 @@ type kernelScaler struct { kernel *Kernel dw, dh, sw, sh int32 horizontal, vertical distrib + pool sync.Pool +} + +func (z *kernelScaler) makeTmpBuf() [][4]float64 { + return make([][4]float64, z.dw*z.sh) } // source is a range of contribs, their inverse total weight, and that ITW diff --git a/draw/scale_test.go b/draw/scale_test.go index bad9c5f..9cce5b1 100644 --- a/draw/scale_test.go +++ b/draw/scale_test.go @@ -393,6 +393,7 @@ func benchScale(b *testing.B, srcf func(image.Rectangle) (image.Image, error), w scaler = n.NewScaler(dr.Dx(), dr.Dy(), sr.Dx(), sr.Dy()) } + b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { scaler.Scale(dst, dr, src, sr, nil) @@ -408,6 +409,7 @@ func benchTform(b *testing.B, srcf func(image.Rectangle) (image.Image, error), w sr := src.Bounds() m := transformMatrix(40, 10) + b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { q.Transform(dst, m, src, sr, nil)