draw: use a sync.Pool for kernel scaling's temporary buffers.
benchmark old ns/op new ns/op delta BenchmarkScaleBLLargeDown 257715146 260286012 +1.00% BenchmarkScaleCRLargeDown 426797448 430078734 +0.77% BenchmarkScaleBLDown 4449939 4222542 -5.11% BenchmarkScaleCRDown 8160446 8010056 -1.84% BenchmarkScaleBLUp 22290312 21044122 -5.59% BenchmarkScaleCRUp 33010722 32021468 -3.00% BenchmarkScaleCRSrcGray 13307961 13020192 -2.16% BenchmarkScaleCRSrcNRGBA 40567431 40801939 +0.58% BenchmarkScaleCRSrcRGBA 39892971 40240558 +0.87% BenchmarkScaleCRSrcYCbCr 59020222 59686699 +1.13% benchmark old allocs new allocs delta BenchmarkScaleBLLargeDown 1 1 +0.00% BenchmarkScaleCRLargeDown 1 2 +100.00% BenchmarkScaleBLDown 1 0 -100.00% BenchmarkScaleCRDown 1 0 -100.00% BenchmarkScaleBLUp 1 0 -100.00% BenchmarkScaleCRUp 1 0 -100.00% BenchmarkScaleCRSrcGray 1 0 -100.00% BenchmarkScaleCRSrcNRGBA 1 0 -100.00% BenchmarkScaleCRSrcRGBA 1 0 -100.00% BenchmarkScaleCRSrcYCbCr 1 0 -100.00% benchmark old bytes new bytes delta BenchmarkScaleBLLargeDown 14745600 2949200 -80.00% BenchmarkScaleCRLargeDown 14745600 4915333 -66.67% BenchmarkScaleBLDown 1523712 5079 -99.67% BenchmarkScaleCRDown 1523712 7619 -99.50% BenchmarkScaleBLUp 10117120 101175 -99.00% BenchmarkScaleCRUp 10117120 202350 -98.00% BenchmarkScaleCRSrcGray 4915200 49156 -99.00% BenchmarkScaleCRSrcNRGBA 4915200 163853 -96.67% BenchmarkScaleCRSrcRGBA 4915200 163853 -96.67% BenchmarkScaleCRSrcYCbCr 4915200 245780 -95.00% The increase in BenchmarkScale??LargeDown number of allocs I think is an accounting error due to the low number of iterations: a low denominator. I suspect that there are one or two extra allocs up front for using the sync.Pool, but one fewer alloc per iteration. The number of iterations is only 5 for BL and 3 for CR, for the default timeout. If I increase the -test.benchtime value to 5s, then the reported average (allocs/op) drop from 2 to 0, so the delta should actually be -100% instead of +0 or +100%. Change-Id: I21d9bb0086bdb25517b6a430e8a21bdf3db026f6 Reviewed-on: https://go-review.googlesource.com/8150 Reviewed-by: Rob Pike <r@golang.org>
This commit is contained in:
parent
d5c5125b57
commit
c53fa16781
10
draw/gen.go
10
draw/gen.go
|
@ -936,8 +936,14 @@ const (
|
||||||
// Create a temporary buffer:
|
// Create a temporary buffer:
|
||||||
// scaleX distributes the source image's columns over the temporary image.
|
// scaleX distributes the source image's columns over the temporary image.
|
||||||
// scaleY distributes the temporary image's rows over the destination image.
|
// scaleY distributes the temporary image's rows over the destination image.
|
||||||
// TODO: is it worth having a sync.Pool for this temporary buffer?
|
var tmp [][4]float64
|
||||||
tmp := make([][4]float64, z.dw*z.sh)
|
if z.pool.New != nil {
|
||||||
|
tmpp := z.pool.Get().(*[][4]float64)
|
||||||
|
defer z.pool.Put(tmpp)
|
||||||
|
tmp = *tmpp
|
||||||
|
} else {
|
||||||
|
tmp = z.makeTmpBuf()
|
||||||
|
}
|
||||||
|
|
||||||
// sr is the source pixels. If it extends beyond the src bounds,
|
// sr is the source pixels. If it extends beyond the src bounds,
|
||||||
// we cannot use the type-specific fast paths, as they access
|
// we cannot use the type-specific fast paths, as they access
|
||||||
|
|
10
draw/impl.go
10
draw/impl.go
|
@ -3036,8 +3036,14 @@ func (z *kernelScaler) Scale(dst Image, dr image.Rectangle, src image.Image, sr
|
||||||
// Create a temporary buffer:
|
// Create a temporary buffer:
|
||||||
// scaleX distributes the source image's columns over the temporary image.
|
// scaleX distributes the source image's columns over the temporary image.
|
||||||
// scaleY distributes the temporary image's rows over the destination image.
|
// scaleY distributes the temporary image's rows over the destination image.
|
||||||
// TODO: is it worth having a sync.Pool for this temporary buffer?
|
var tmp [][4]float64
|
||||||
tmp := make([][4]float64, z.dw*z.sh)
|
if z.pool.New != nil {
|
||||||
|
tmpp := z.pool.Get().(*[][4]float64)
|
||||||
|
defer z.pool.Put(tmpp)
|
||||||
|
tmp = *tmpp
|
||||||
|
} else {
|
||||||
|
tmp = z.makeTmpBuf()
|
||||||
|
}
|
||||||
|
|
||||||
// sr is the source pixels. If it extends beyond the src bounds,
|
// sr is the source pixels. If it extends beyond the src bounds,
|
||||||
// we cannot use the type-specific fast paths, as they access
|
// we cannot use the type-specific fast paths, as they access
|
||||||
|
|
|
@ -10,6 +10,7 @@ import (
|
||||||
"image"
|
"image"
|
||||||
"image/color"
|
"image/color"
|
||||||
"math"
|
"math"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"golang.org/x/image/math/f64"
|
"golang.org/x/image/math/f64"
|
||||||
)
|
)
|
||||||
|
@ -88,13 +89,17 @@ type Kernel struct {
|
||||||
|
|
||||||
// Scale implements the Scaler interface.
|
// Scale implements the Scaler interface.
|
||||||
func (q *Kernel) Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
|
func (q *Kernel) Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
|
||||||
q.NewScaler(dr.Dx(), dr.Dy(), sr.Dx(), sr.Dy()).Scale(dst, dr, src, sr, opts)
|
q.newScaler(dr.Dx(), dr.Dy(), sr.Dx(), sr.Dy(), false).Scale(dst, dr, src, sr, opts)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewScaler returns a Scaler that is optimized for scaling multiple times with
|
// NewScaler returns a Scaler that is optimized for scaling multiple times with
|
||||||
// the same fixed destination and source width and height.
|
// the same fixed destination and source width and height.
|
||||||
func (q *Kernel) NewScaler(dw, dh, sw, sh int) Scaler {
|
func (q *Kernel) NewScaler(dw, dh, sw, sh int) Scaler {
|
||||||
return &kernelScaler{
|
return q.newScaler(dw, dh, sw, sh, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *Kernel) newScaler(dw, dh, sw, sh int, usePool bool) Scaler {
|
||||||
|
z := &kernelScaler{
|
||||||
kernel: q,
|
kernel: q,
|
||||||
dw: int32(dw),
|
dw: int32(dw),
|
||||||
dh: int32(dh),
|
dh: int32(dh),
|
||||||
|
@ -103,6 +108,13 @@ func (q *Kernel) NewScaler(dw, dh, sw, sh int) Scaler {
|
||||||
horizontal: newDistrib(q, int32(dw), int32(sw)),
|
horizontal: newDistrib(q, int32(dw), int32(sw)),
|
||||||
vertical: newDistrib(q, int32(dh), int32(sh)),
|
vertical: newDistrib(q, int32(dh), int32(sh)),
|
||||||
}
|
}
|
||||||
|
if usePool {
|
||||||
|
z.pool.New = func() interface{} {
|
||||||
|
tmp := z.makeTmpBuf()
|
||||||
|
return &tmp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return z
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -152,6 +164,11 @@ type kernelScaler struct {
|
||||||
kernel *Kernel
|
kernel *Kernel
|
||||||
dw, dh, sw, sh int32
|
dw, dh, sw, sh int32
|
||||||
horizontal, vertical distrib
|
horizontal, vertical distrib
|
||||||
|
pool sync.Pool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (z *kernelScaler) makeTmpBuf() [][4]float64 {
|
||||||
|
return make([][4]float64, z.dw*z.sh)
|
||||||
}
|
}
|
||||||
|
|
||||||
// source is a range of contribs, their inverse total weight, and that ITW
|
// source is a range of contribs, their inverse total weight, and that ITW
|
||||||
|
|
|
@ -393,6 +393,7 @@ func benchScale(b *testing.B, srcf func(image.Rectangle) (image.Image, error), w
|
||||||
scaler = n.NewScaler(dr.Dx(), dr.Dy(), sr.Dx(), sr.Dy())
|
scaler = n.NewScaler(dr.Dx(), dr.Dy(), sr.Dx(), sr.Dy())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
b.ReportAllocs()
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
scaler.Scale(dst, dr, src, sr, nil)
|
scaler.Scale(dst, dr, src, sr, nil)
|
||||||
|
@ -408,6 +409,7 @@ func benchTform(b *testing.B, srcf func(image.Rectangle) (image.Image, error), w
|
||||||
sr := src.Bounds()
|
sr := src.Bounds()
|
||||||
m := transformMatrix(40, 10)
|
m := transformMatrix(40, 10)
|
||||||
|
|
||||||
|
b.ReportAllocs()
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
q.Transform(dst, m, src, sr, nil)
|
q.Transform(dst, m, src, sr, nil)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user