draw: eliminate some math.Floor calls in Transform methods.

benchmark old ns/op new ns/op delta BenchmarkTformNNSrcRGBA 524533 293230 -44.10% BenchmarkTformNNSrcUniform 362974 149389 -58.84% BenchmarkTformABSrcGray 827461 427720 -48.31% BenchmarkTformABSrcNRGBA 1286930 919391 -28.56% BenchmarkTformABSrcRGBA 1115444 794334 -28.79% BenchmarkTformABSrcYCbCr 1732420 1379846 -20.35% BenchmarkTformCRSrcGray 3629048 3467404 -4.45% BenchmarkTformCRSrcNRGBA 7569407 7418874 -1.99% BenchmarkTformCRSrcRGBA 7552459 7432745 -1.59% BenchmarkTformCRSrcYCbCr 8072351 7854715 -2.70% Change-Id: I6c01e631d9f88c36ae99d0cd181104ad5ac48db0 Reviewed-on: https://go-review.googlesource.com/7926 Reviewed-by: Rob Pike <r@golang.org>
2015-03-23 13:30:31 +11:00 · 2015-03-23 13:30:31 +11:00 · 575b100276
commit 575b100276
parent 41f747b91e
4 changed files with 332 additions and 246 deletions
--- a/draw/gen.go
+++ b/draw/gen.go
@ -739,22 +739,36 @@ const (

 		func (z $receiver) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) {
 			dr := transformRect(s2d, &sr)
-			// adr is the affected destination pixels, relative to dr.Min.
-			adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
+			// adr is the affected destination pixels.
+			adr := dst.Bounds().Intersect(dr)
 			if adr.Empty() || sr.Empty() {
 				return
 			}
 			d2s := invert(s2d)
+			// bias is a translation of the mapping from dst co-ordinates to
+			// src co-ordinates such that the latter temporarily have
+			// non-negative X and Y co-ordinates. This allows us to write
+			// int(f) instead of int(math.Floor(f)), since "round to zero" and
+			// "round down" are equivalent when f >= 0, but the former is much
+			// cheaper. The X-- and Y-- are because the TransformLeaf methods
+			// have a "sx -= 0.5" adjustment.
+			bias := transformRect(&d2s, &adr).Min
+			bias.X--
+			bias.Y--
+			d2s[2] -= float64(bias.X)
+			d2s[5] -= float64(bias.Y)
+			// Make adr relative to dr.Min.
+			adr = adr.Sub(dr.Min)
 			// sr is the source pixels. If it extends beyond the src bounds,
 			// we cannot use the type-specific fast paths, as they access
 			// the Pix fields directly without bounds checking.
 			if !sr.In(src.Bounds()) {
-				z.transform_Image_Image(dst, dr, adr, &d2s, src, sr)
+				z.transform_Image_Image(dst, dr, adr, &d2s, src, sr, bias)
 			} else if u, ok := src.(*image.Uniform); ok {
 				// TODO: get the Op from opts.
-				transform_Uniform(dst, dr, adr, &d2s, u, sr, Src)
+				transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, Src)
 			} else {
-				$switch z.transform_$dTypeRN_$sTypeRN$sratio(dst, dr, adr, &d2s, src, sr)
+				$switch z.transform_$dTypeRN_$sTypeRN$sratio(dst, dr, adr, &d2s, src, sr, bias)
 			}
 		}
 	`
@ -779,16 +793,15 @@ const (
 	`

 	codeNNTransformLeaf = `
-		func (nnInterpolator) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle) {
+		func (nnInterpolator) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle, bias image.Point) {
 			$preOuter
 			for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 				dyf := float64(dr.Min.Y + int(dy)) + 0.5
 				$preInner
 				for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
 					dxf := float64(dr.Min.X + int(dx)) + 0.5
-					// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
-					sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
-					sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
+					sx0 := int(d2s[0]*dxf + d2s[1]*dyf + d2s[2]) + bias.X
+					sy0 := int(d2s[3]*dxf + d2s[4]*dyf + d2s[5]) + bias.Y
 					if !(image.Point{sx0, sy0}).In(sr) {
 						continue
 					}
@ -854,25 +867,24 @@ const (
 	`

 	codeABLTransformLeaf = `
-		func (ablInterpolator) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle) {
+		func (ablInterpolator) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle, bias image.Point) {
 			$preOuter
 			for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 				dyf := float64(dr.Min.Y + int(dy)) + 0.5
 				$preInner
 				for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
 					dxf := float64(dr.Min.X + int(dx)) + 0.5
-					// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 					sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 					sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-					if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+					if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 						continue
 					}

 					sx -= 0.5
-					sxf := math.Floor(sx)
-					xFrac0 := sx - sxf
+					sx0 := int(sx)
+					xFrac0 := sx - float64(sx0)
 					xFrac1 := 1 - xFrac0
-					sx0 := int(sxf)
+					sx0 += bias.X
 					sx1 := sx0 + 1
 					if sx0 < sr.Min.X {
 						sx0, sx1 = sr.Min.X, sr.Min.X
@ -883,10 +895,10 @@ const (
 					}

 					sy -= 0.5
-					syf := math.Floor(sy)
-					yFrac0 := sy - syf
+					sy0 := int(sy)
+					yFrac0 := sy - float64(sy0)
 					yFrac1 := 1 - yFrac0
-					sy0 := int(syf)
+					sy0 += bias.Y
 					sy1 := sy0 + 1
 					if sy0 < sr.Min.Y {
 						sy0, sy1 = sr.Min.Y, sr.Min.Y
@ -947,16 +959,30 @@ const (

 		func (q *Kernel) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) {
 			dr := transformRect(s2d, &sr)
-			// adr is the affected destination pixels, relative to dr.Min.
-			adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
+			// adr is the affected destination pixels.
+			adr := dst.Bounds().Intersect(dr)
 			if adr.Empty() || sr.Empty() {
 				return
 			}
 			d2s := invert(s2d)
+			// bias is a translation of the mapping from dst co-ordinates to
+			// src co-ordinates such that the latter temporarily have
+			// non-negative X and Y co-ordinates. This allows us to write
+			// int(f) instead of int(math.Floor(f)), since "round to zero" and
+			// "round down" are equivalent when f >= 0, but the former is much
+			// cheaper. The X-- and Y-- are because the TransformLeaf methods
+			// have a "sx -= 0.5" adjustment.
+			bias := transformRect(&d2s, &adr).Min
+			bias.X--
+			bias.Y--
+			d2s[2] -= float64(bias.X)
+			d2s[5] -= float64(bias.Y)
+			// Make adr relative to dr.Min.
+			adr = adr.Sub(dr.Min)

 			if u, ok := src.(*image.Uniform); ok && sr.In(src.Bounds()) {
 				// TODO: get the Op from opts.
-				transform_Uniform(dst, dr, adr, &d2s, u, sr, Src)
+				transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, Src)
 				return
 			}

@ -973,9 +999,9 @@ const (
 			// we cannot use the type-specific fast paths, as they access
 			// the Pix fields directly without bounds checking.
 			if !sr.In(src.Bounds()) {
-				q.transform_Image_Image(dst, dr, adr, &d2s, src, sr, xscale, yscale)
+				q.transform_Image_Image(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 			} else {
-				$switch q.transform_$dTypeRN_$sTypeRN$sratio(dst, dr, adr, &d2s, src, sr, xscale, yscale)
+				$switch q.transform_$dTypeRN_$sTypeRN$sratio(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 			}
 		}
 	`
@ -1024,7 +1050,7 @@ const (
 	`

 	codeKernelTransformLeaf = `
-		func (q *Kernel) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle, xscale, yscale float64) {
+		func (q *Kernel) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
 			// When shrinking, broaden the effective kernel support so that we still
 			// visit every source pixel.
 			xHalfWidth, xKernelArgScale := q.Support, 1.0
@ -1047,13 +1073,15 @@ const (
 				$preInner
 				for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
 					dxf := float64(dr.Min.X + int(dx)) + 0.5
-					// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 					sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 					sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-					if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+					if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 						continue
 					}

+					// TODO: adjust the bias so that we can use int(f) instead
+					// of math.Floor(f) and math.Ceil(f).
+					sx += float64(bias.X)
 					sx -= 0.5
 					ix := int(math.Floor(sx - xHalfWidth))
 					if ix < sr.Min.X {
@ -1077,6 +1105,7 @@ const (
 						xWeights[x] /= totalXWeight
 					}

+					sy += float64(bias.Y)
 					sy -= 0.5
 					iy := int(math.Floor(sy - yHalfWidth))
 					if iy < sr.Min.Y {
--- a/draw/impl.go
+++ b/draw/impl.go
--- a/draw/scale.go
+++ b/draw/scale.go
@ -353,7 +353,7 @@ func transformRect(s2d *f64.Aff3, sr *image.Rectangle) (dr image.Rectangle) {
 	return dr
 }

-func transform_Uniform(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Uniform, sr image.Rectangle, op Op) {
+func transform_Uniform(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Uniform, sr image.Rectangle, bias image.Point, op Op) {
 	switch dst := dst.(type) {
 	case *image.RGBA:
 		pr, pg, pb, pa := src.C.RGBA()
@ -367,9 +367,8 @@ func transform_Uniform(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src *i
 			d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy))
 			for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 				dxf := float64(dr.Min.X+int(dx)) + 0.5
-				// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
-				sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
-				sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
+				sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+				sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
 				if !(image.Point{sx0, sy0}).In(sr) {
 					continue
 				}
@ -394,9 +393,8 @@ func transform_Uniform(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src *i
 			dyf := float64(dr.Min.Y+int(dy)) + 0.5
 			for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 				dxf := float64(dr.Min.X+int(dx)) + 0.5
-				// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
-				sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
-				sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
+				sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+				sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
 				if !(image.Point{sx0, sy0}).In(sr) {
 					continue
 				}
--- a/draw/scale_test.go
+++ b/draw/scale_test.go
@ -414,35 +414,43 @@ func benchTform(b *testing.B, srcf func(image.Rectangle) (image.Image, error), w
 	}
 }

-func BenchmarkScaleLargeDownNN(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, NearestNeighbor) }
-func BenchmarkScaleLargeDownAB(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, ApproxBiLinear) }
-func BenchmarkScaleLargeDownBL(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, BiLinear) }
-func BenchmarkScaleLargeDownCR(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, CatmullRom) }
+func BenchmarkScaleNNLargeDown(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, NearestNeighbor) }
+func BenchmarkScaleABLargeDown(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, ApproxBiLinear) }
+func BenchmarkScaleBLLargeDown(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, BiLinear) }
+func BenchmarkScaleCRLargeDown(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, CatmullRom) }

-func BenchmarkScaleDownNN(b *testing.B) { benchScale(b, srcTux, 120, 80, NearestNeighbor) }
-func BenchmarkScaleDownAB(b *testing.B) { benchScale(b, srcTux, 120, 80, ApproxBiLinear) }
-func BenchmarkScaleDownBL(b *testing.B) { benchScale(b, srcTux, 120, 80, BiLinear) }
-func BenchmarkScaleDownCR(b *testing.B) { benchScale(b, srcTux, 120, 80, CatmullRom) }
+func BenchmarkScaleNNDown(b *testing.B) { benchScale(b, srcTux, 120, 80, NearestNeighbor) }
+func BenchmarkScaleABDown(b *testing.B) { benchScale(b, srcTux, 120, 80, ApproxBiLinear) }
+func BenchmarkScaleBLDown(b *testing.B) { benchScale(b, srcTux, 120, 80, BiLinear) }
+func BenchmarkScaleCRDown(b *testing.B) { benchScale(b, srcTux, 120, 80, CatmullRom) }

-func BenchmarkScaleUpNN(b *testing.B) { benchScale(b, srcTux, 800, 600, NearestNeighbor) }
-func BenchmarkScaleUpAB(b *testing.B) { benchScale(b, srcTux, 800, 600, ApproxBiLinear) }
-func BenchmarkScaleUpBL(b *testing.B) { benchScale(b, srcTux, 800, 600, BiLinear) }
-func BenchmarkScaleUpCR(b *testing.B) { benchScale(b, srcTux, 800, 600, CatmullRom) }
+func BenchmarkScaleNNUp(b *testing.B) { benchScale(b, srcTux, 800, 600, NearestNeighbor) }
+func BenchmarkScaleABUp(b *testing.B) { benchScale(b, srcTux, 800, 600, ApproxBiLinear) }
+func BenchmarkScaleBLUp(b *testing.B) { benchScale(b, srcTux, 800, 600, BiLinear) }
+func BenchmarkScaleCRUp(b *testing.B) { benchScale(b, srcTux, 800, 600, CatmullRom) }

-func BenchmarkScaleSrcGray(b *testing.B)    { benchScale(b, srcGray, 200, 150, ApproxBiLinear) }
-func BenchmarkScaleSrcNRGBA(b *testing.B)   { benchScale(b, srcNRGBA, 200, 150, ApproxBiLinear) }
-func BenchmarkScaleSrcRGBA(b *testing.B)    { benchScale(b, srcRGBA, 200, 150, ApproxBiLinear) }
-func BenchmarkScaleSrcUniform(b *testing.B) { benchScale(b, srcUniform, 200, 150, ApproxBiLinear) }
-func BenchmarkScaleSrcYCbCr(b *testing.B)   { benchScale(b, srcYCbCr, 200, 150, ApproxBiLinear) }
+func BenchmarkScaleNNSrcRGBA(b *testing.B)    { benchScale(b, srcRGBA, 200, 150, NearestNeighbor) }
+func BenchmarkScaleNNSrcUniform(b *testing.B) { benchScale(b, srcUniform, 200, 150, NearestNeighbor) }

-func BenchmarkTformABSrcGray(b *testing.B)    { benchTform(b, srcGray, 200, 150, ApproxBiLinear) }
-func BenchmarkTformABSrcNRGBA(b *testing.B)   { benchTform(b, srcNRGBA, 200, 150, ApproxBiLinear) }
-func BenchmarkTformABSrcRGBA(b *testing.B)    { benchTform(b, srcRGBA, 200, 150, ApproxBiLinear) }
-func BenchmarkTformABSrcUniform(b *testing.B) { benchTform(b, srcUniform, 200, 150, ApproxBiLinear) }
-func BenchmarkTformABSrcYCbCr(b *testing.B)   { benchTform(b, srcYCbCr, 200, 150, ApproxBiLinear) }
+func BenchmarkTformNNSrcRGBA(b *testing.B)    { benchTform(b, srcRGBA, 200, 150, NearestNeighbor) }
+func BenchmarkTformNNSrcUniform(b *testing.B) { benchTform(b, srcUniform, 200, 150, NearestNeighbor) }

-func BenchmarkTformCRSrcGray(b *testing.B)    { benchTform(b, srcGray, 200, 150, CatmullRom) }
-func BenchmarkTformCRSrcNRGBA(b *testing.B)   { benchTform(b, srcNRGBA, 200, 150, CatmullRom) }
-func BenchmarkTformCRSrcRGBA(b *testing.B)    { benchTform(b, srcRGBA, 200, 150, CatmullRom) }
-func BenchmarkTformCRSrcUniform(b *testing.B) { benchTform(b, srcUniform, 200, 150, CatmullRom) }
-func BenchmarkTformCRSrcYCbCr(b *testing.B)   { benchTform(b, srcYCbCr, 200, 150, CatmullRom) }
+func BenchmarkScaleABSrcGray(b *testing.B)  { benchScale(b, srcGray, 200, 150, ApproxBiLinear) }
+func BenchmarkScaleABSrcNRGBA(b *testing.B) { benchScale(b, srcNRGBA, 200, 150, ApproxBiLinear) }
+func BenchmarkScaleABSrcRGBA(b *testing.B)  { benchScale(b, srcRGBA, 200, 150, ApproxBiLinear) }
+func BenchmarkScaleABSrcYCbCr(b *testing.B) { benchScale(b, srcYCbCr, 200, 150, ApproxBiLinear) }
+
+func BenchmarkTformABSrcGray(b *testing.B)  { benchTform(b, srcGray, 200, 150, ApproxBiLinear) }
+func BenchmarkTformABSrcNRGBA(b *testing.B) { benchTform(b, srcNRGBA, 200, 150, ApproxBiLinear) }
+func BenchmarkTformABSrcRGBA(b *testing.B)  { benchTform(b, srcRGBA, 200, 150, ApproxBiLinear) }
+func BenchmarkTformABSrcYCbCr(b *testing.B) { benchTform(b, srcYCbCr, 200, 150, ApproxBiLinear) }
+
+func BenchmarkScaleCRSrcGray(b *testing.B)  { benchScale(b, srcGray, 200, 150, CatmullRom) }
+func BenchmarkScaleCRSrcNRGBA(b *testing.B) { benchScale(b, srcNRGBA, 200, 150, CatmullRom) }
+func BenchmarkScaleCRSrcRGBA(b *testing.B)  { benchScale(b, srcRGBA, 200, 150, CatmullRom) }
+func BenchmarkScaleCRSrcYCbCr(b *testing.B) { benchScale(b, srcYCbCr, 200, 150, CatmullRom) }
+
+func BenchmarkTformCRSrcGray(b *testing.B)  { benchTform(b, srcGray, 200, 150, CatmullRom) }
+func BenchmarkTformCRSrcNRGBA(b *testing.B) { benchTform(b, srcNRGBA, 200, 150, CatmullRom) }
+func BenchmarkTformCRSrcRGBA(b *testing.B)  { benchTform(b, srcRGBA, 200, 150, CatmullRom) }
+func BenchmarkTformCRSrcYCbCr(b *testing.B) { benchTform(b, srcYCbCr, 200, 150, CatmullRom) }