draw: eliminate some math.Floor calls in Transform methods.

benchmark                      old ns/op     new ns/op     delta
BenchmarkTformNNSrcRGBA        524533        293230        -44.10%
BenchmarkTformNNSrcUniform     362974        149389        -58.84%
BenchmarkTformABSrcGray        827461        427720        -48.31%
BenchmarkTformABSrcNRGBA       1286930       919391        -28.56%
BenchmarkTformABSrcRGBA        1115444       794334        -28.79%
BenchmarkTformABSrcYCbCr       1732420       1379846       -20.35%
BenchmarkTformCRSrcGray        3629048       3467404       -4.45%
BenchmarkTformCRSrcNRGBA       7569407       7418874       -1.99%
BenchmarkTformCRSrcRGBA        7552459       7432745       -1.59%
BenchmarkTformCRSrcYCbCr       8072351       7854715       -2.70%

Change-Id: I6c01e631d9f88c36ae99d0cd181104ad5ac48db0
Reviewed-on: https://go-review.googlesource.com/7926
Reviewed-by: Rob Pike <r@golang.org>
This commit is contained in:
Nigel Tao 2015-03-23 13:30:31 +11:00
parent 41f747b91e
commit 575b100276
4 changed files with 332 additions and 246 deletions

View File

@ -739,22 +739,36 @@ const (
func (z $receiver) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) {
dr := transformRect(s2d, &sr)
// adr is the affected destination pixels, relative to dr.Min.
adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
// adr is the affected destination pixels.
adr := dst.Bounds().Intersect(dr)
if adr.Empty() || sr.Empty() {
return
}
d2s := invert(s2d)
// bias is a translation of the mapping from dst co-ordinates to
// src co-ordinates such that the latter temporarily have
// non-negative X and Y co-ordinates. This allows us to write
// int(f) instead of int(math.Floor(f)), since "round to zero" and
// "round down" are equivalent when f >= 0, but the former is much
// cheaper. The X-- and Y-- are because the TransformLeaf methods
// have a "sx -= 0.5" adjustment.
bias := transformRect(&d2s, &adr).Min
bias.X--
bias.Y--
d2s[2] -= float64(bias.X)
d2s[5] -= float64(bias.Y)
// Make adr relative to dr.Min.
adr = adr.Sub(dr.Min)
// sr is the source pixels. If it extends beyond the src bounds,
// we cannot use the type-specific fast paths, as they access
// the Pix fields directly without bounds checking.
if !sr.In(src.Bounds()) {
z.transform_Image_Image(dst, dr, adr, &d2s, src, sr)
z.transform_Image_Image(dst, dr, adr, &d2s, src, sr, bias)
} else if u, ok := src.(*image.Uniform); ok {
// TODO: get the Op from opts.
transform_Uniform(dst, dr, adr, &d2s, u, sr, Src)
transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, Src)
} else {
$switch z.transform_$dTypeRN_$sTypeRN$sratio(dst, dr, adr, &d2s, src, sr)
$switch z.transform_$dTypeRN_$sTypeRN$sratio(dst, dr, adr, &d2s, src, sr, bias)
}
}
`
@ -779,16 +793,15 @@ const (
`
codeNNTransformLeaf = `
func (nnInterpolator) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle) {
func (nnInterpolator) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle, bias image.Point) {
$preOuter
for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
dyf := float64(dr.Min.Y + int(dy)) + 0.5
$preInner
for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
dxf := float64(dr.Min.X + int(dx)) + 0.5
// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
sx0 := int(d2s[0]*dxf + d2s[1]*dyf + d2s[2]) + bias.X
sy0 := int(d2s[3]*dxf + d2s[4]*dyf + d2s[5]) + bias.Y
if !(image.Point{sx0, sy0}).In(sr) {
continue
}
@ -854,25 +867,24 @@ const (
`
codeABLTransformLeaf = `
func (ablInterpolator) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle) {
func (ablInterpolator) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle, bias image.Point) {
$preOuter
for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
dyf := float64(dr.Min.Y + int(dy)) + 0.5
$preInner
for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
dxf := float64(dr.Min.X + int(dx)) + 0.5
// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
continue
}
sx -= 0.5
sxf := math.Floor(sx)
xFrac0 := sx - sxf
sx0 := int(sx)
xFrac0 := sx - float64(sx0)
xFrac1 := 1 - xFrac0
sx0 := int(sxf)
sx0 += bias.X
sx1 := sx0 + 1
if sx0 < sr.Min.X {
sx0, sx1 = sr.Min.X, sr.Min.X
@ -883,10 +895,10 @@ const (
}
sy -= 0.5
syf := math.Floor(sy)
yFrac0 := sy - syf
sy0 := int(sy)
yFrac0 := sy - float64(sy0)
yFrac1 := 1 - yFrac0
sy0 := int(syf)
sy0 += bias.Y
sy1 := sy0 + 1
if sy0 < sr.Min.Y {
sy0, sy1 = sr.Min.Y, sr.Min.Y
@ -947,16 +959,30 @@ const (
func (q *Kernel) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) {
dr := transformRect(s2d, &sr)
// adr is the affected destination pixels, relative to dr.Min.
adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
// adr is the affected destination pixels.
adr := dst.Bounds().Intersect(dr)
if adr.Empty() || sr.Empty() {
return
}
d2s := invert(s2d)
// bias is a translation of the mapping from dst co-ordinates to
// src co-ordinates such that the latter temporarily have
// non-negative X and Y co-ordinates. This allows us to write
// int(f) instead of int(math.Floor(f)), since "round to zero" and
// "round down" are equivalent when f >= 0, but the former is much
// cheaper. The X-- and Y-- are because the TransformLeaf methods
// have a "sx -= 0.5" adjustment.
bias := transformRect(&d2s, &adr).Min
bias.X--
bias.Y--
d2s[2] -= float64(bias.X)
d2s[5] -= float64(bias.Y)
// Make adr relative to dr.Min.
adr = adr.Sub(dr.Min)
if u, ok := src.(*image.Uniform); ok && sr.In(src.Bounds()) {
// TODO: get the Op from opts.
transform_Uniform(dst, dr, adr, &d2s, u, sr, Src)
transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, Src)
return
}
@ -973,9 +999,9 @@ const (
// we cannot use the type-specific fast paths, as they access
// the Pix fields directly without bounds checking.
if !sr.In(src.Bounds()) {
q.transform_Image_Image(dst, dr, adr, &d2s, src, sr, xscale, yscale)
q.transform_Image_Image(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
} else {
$switch q.transform_$dTypeRN_$sTypeRN$sratio(dst, dr, adr, &d2s, src, sr, xscale, yscale)
$switch q.transform_$dTypeRN_$sTypeRN$sratio(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
}
}
`
@ -1024,7 +1050,7 @@ const (
`
codeKernelTransformLeaf = `
func (q *Kernel) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle, xscale, yscale float64) {
func (q *Kernel) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
// When shrinking, broaden the effective kernel support so that we still
// visit every source pixel.
xHalfWidth, xKernelArgScale := q.Support, 1.0
@ -1047,13 +1073,15 @@ const (
$preInner
for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
dxf := float64(dr.Min.X + int(dx)) + 0.5
// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
continue
}
// TODO: adjust the bias so that we can use int(f) instead
// of math.Floor(f) and math.Ceil(f).
sx += float64(bias.X)
sx -= 0.5
ix := int(math.Floor(sx - xHalfWidth))
if ix < sr.Min.X {
@ -1077,6 +1105,7 @@ const (
xWeights[x] /= totalXWeight
}
sy += float64(bias.Y)
sy -= 0.5
iy := int(math.Floor(sy - yHalfWidth))
if iy < sr.Min.Y {

File diff suppressed because it is too large Load Diff

View File

@ -353,7 +353,7 @@ func transformRect(s2d *f64.Aff3, sr *image.Rectangle) (dr image.Rectangle) {
return dr
}
func transform_Uniform(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Uniform, sr image.Rectangle, op Op) {
func transform_Uniform(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Uniform, sr image.Rectangle, bias image.Point, op Op) {
switch dst := dst.(type) {
case *image.RGBA:
pr, pg, pb, pa := src.C.RGBA()
@ -367,9 +367,8 @@ func transform_Uniform(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src *i
d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy))
for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
dxf := float64(dr.Min.X+int(dx)) + 0.5
// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
if !(image.Point{sx0, sy0}).In(sr) {
continue
}
@ -394,9 +393,8 @@ func transform_Uniform(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src *i
dyf := float64(dr.Min.Y+int(dy)) + 0.5
for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
dxf := float64(dr.Min.X+int(dx)) + 0.5
// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
if !(image.Point{sx0, sy0}).In(sr) {
continue
}

View File

@ -414,35 +414,43 @@ func benchTform(b *testing.B, srcf func(image.Rectangle) (image.Image, error), w
}
}
func BenchmarkScaleLargeDownNN(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, NearestNeighbor) }
func BenchmarkScaleLargeDownAB(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, ApproxBiLinear) }
func BenchmarkScaleLargeDownBL(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, BiLinear) }
func BenchmarkScaleLargeDownCR(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, CatmullRom) }
func BenchmarkScaleNNLargeDown(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, NearestNeighbor) }
func BenchmarkScaleABLargeDown(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, ApproxBiLinear) }
func BenchmarkScaleBLLargeDown(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, BiLinear) }
func BenchmarkScaleCRLargeDown(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, CatmullRom) }
func BenchmarkScaleDownNN(b *testing.B) { benchScale(b, srcTux, 120, 80, NearestNeighbor) }
func BenchmarkScaleDownAB(b *testing.B) { benchScale(b, srcTux, 120, 80, ApproxBiLinear) }
func BenchmarkScaleDownBL(b *testing.B) { benchScale(b, srcTux, 120, 80, BiLinear) }
func BenchmarkScaleDownCR(b *testing.B) { benchScale(b, srcTux, 120, 80, CatmullRom) }
func BenchmarkScaleNNDown(b *testing.B) { benchScale(b, srcTux, 120, 80, NearestNeighbor) }
func BenchmarkScaleABDown(b *testing.B) { benchScale(b, srcTux, 120, 80, ApproxBiLinear) }
func BenchmarkScaleBLDown(b *testing.B) { benchScale(b, srcTux, 120, 80, BiLinear) }
func BenchmarkScaleCRDown(b *testing.B) { benchScale(b, srcTux, 120, 80, CatmullRom) }
func BenchmarkScaleUpNN(b *testing.B) { benchScale(b, srcTux, 800, 600, NearestNeighbor) }
func BenchmarkScaleUpAB(b *testing.B) { benchScale(b, srcTux, 800, 600, ApproxBiLinear) }
func BenchmarkScaleUpBL(b *testing.B) { benchScale(b, srcTux, 800, 600, BiLinear) }
func BenchmarkScaleUpCR(b *testing.B) { benchScale(b, srcTux, 800, 600, CatmullRom) }
func BenchmarkScaleNNUp(b *testing.B) { benchScale(b, srcTux, 800, 600, NearestNeighbor) }
func BenchmarkScaleABUp(b *testing.B) { benchScale(b, srcTux, 800, 600, ApproxBiLinear) }
func BenchmarkScaleBLUp(b *testing.B) { benchScale(b, srcTux, 800, 600, BiLinear) }
func BenchmarkScaleCRUp(b *testing.B) { benchScale(b, srcTux, 800, 600, CatmullRom) }
func BenchmarkScaleSrcGray(b *testing.B) { benchScale(b, srcGray, 200, 150, ApproxBiLinear) }
func BenchmarkScaleSrcNRGBA(b *testing.B) { benchScale(b, srcNRGBA, 200, 150, ApproxBiLinear) }
func BenchmarkScaleSrcRGBA(b *testing.B) { benchScale(b, srcRGBA, 200, 150, ApproxBiLinear) }
func BenchmarkScaleSrcUniform(b *testing.B) { benchScale(b, srcUniform, 200, 150, ApproxBiLinear) }
func BenchmarkScaleSrcYCbCr(b *testing.B) { benchScale(b, srcYCbCr, 200, 150, ApproxBiLinear) }
func BenchmarkScaleNNSrcRGBA(b *testing.B) { benchScale(b, srcRGBA, 200, 150, NearestNeighbor) }
func BenchmarkScaleNNSrcUniform(b *testing.B) { benchScale(b, srcUniform, 200, 150, NearestNeighbor) }
func BenchmarkTformABSrcGray(b *testing.B) { benchTform(b, srcGray, 200, 150, ApproxBiLinear) }
func BenchmarkTformABSrcNRGBA(b *testing.B) { benchTform(b, srcNRGBA, 200, 150, ApproxBiLinear) }
func BenchmarkTformABSrcRGBA(b *testing.B) { benchTform(b, srcRGBA, 200, 150, ApproxBiLinear) }
func BenchmarkTformABSrcUniform(b *testing.B) { benchTform(b, srcUniform, 200, 150, ApproxBiLinear) }
func BenchmarkTformABSrcYCbCr(b *testing.B) { benchTform(b, srcYCbCr, 200, 150, ApproxBiLinear) }
func BenchmarkTformNNSrcRGBA(b *testing.B) { benchTform(b, srcRGBA, 200, 150, NearestNeighbor) }
func BenchmarkTformNNSrcUniform(b *testing.B) { benchTform(b, srcUniform, 200, 150, NearestNeighbor) }
func BenchmarkTformCRSrcGray(b *testing.B) { benchTform(b, srcGray, 200, 150, CatmullRom) }
func BenchmarkTformCRSrcNRGBA(b *testing.B) { benchTform(b, srcNRGBA, 200, 150, CatmullRom) }
func BenchmarkTformCRSrcRGBA(b *testing.B) { benchTform(b, srcRGBA, 200, 150, CatmullRom) }
func BenchmarkTformCRSrcUniform(b *testing.B) { benchTform(b, srcUniform, 200, 150, CatmullRom) }
func BenchmarkTformCRSrcYCbCr(b *testing.B) { benchTform(b, srcYCbCr, 200, 150, CatmullRom) }
func BenchmarkScaleABSrcGray(b *testing.B) { benchScale(b, srcGray, 200, 150, ApproxBiLinear) }
func BenchmarkScaleABSrcNRGBA(b *testing.B) { benchScale(b, srcNRGBA, 200, 150, ApproxBiLinear) }
func BenchmarkScaleABSrcRGBA(b *testing.B) { benchScale(b, srcRGBA, 200, 150, ApproxBiLinear) }
func BenchmarkScaleABSrcYCbCr(b *testing.B) { benchScale(b, srcYCbCr, 200, 150, ApproxBiLinear) }
func BenchmarkTformABSrcGray(b *testing.B) { benchTform(b, srcGray, 200, 150, ApproxBiLinear) }
func BenchmarkTformABSrcNRGBA(b *testing.B) { benchTform(b, srcNRGBA, 200, 150, ApproxBiLinear) }
func BenchmarkTformABSrcRGBA(b *testing.B) { benchTform(b, srcRGBA, 200, 150, ApproxBiLinear) }
func BenchmarkTformABSrcYCbCr(b *testing.B) { benchTform(b, srcYCbCr, 200, 150, ApproxBiLinear) }
func BenchmarkScaleCRSrcGray(b *testing.B) { benchScale(b, srcGray, 200, 150, CatmullRom) }
func BenchmarkScaleCRSrcNRGBA(b *testing.B) { benchScale(b, srcNRGBA, 200, 150, CatmullRom) }
func BenchmarkScaleCRSrcRGBA(b *testing.B) { benchScale(b, srcRGBA, 200, 150, CatmullRom) }
func BenchmarkScaleCRSrcYCbCr(b *testing.B) { benchScale(b, srcYCbCr, 200, 150, CatmullRom) }
func BenchmarkTformCRSrcGray(b *testing.B) { benchTform(b, srcGray, 200, 150, CatmullRom) }
func BenchmarkTformCRSrcNRGBA(b *testing.B) { benchTform(b, srcNRGBA, 200, 150, CatmullRom) }
func BenchmarkTformCRSrcRGBA(b *testing.B) { benchTform(b, srcRGBA, 200, 150, CatmullRom) }
func BenchmarkTformCRSrcYCbCr(b *testing.B) { benchTform(b, srcYCbCr, 200, 150, CatmullRom) }