From fa54d6fa1c2aaf941da0bc4885015df17693f8ec Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Sat, 15 Oct 2016 13:23:35 +1100 Subject: [PATCH] vector: simplify fixedLineTo computation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit name old time/op new time/op delta GlyphAlpha16Over-8 3.38µs ± 0% 3.36µs ± 0% -0.54% (p=0.000 n=10+10) GlyphAlpha16Src-8 3.28µs ± 0% 3.26µs ± 0% -0.69% (p=0.000 n=10+9) GlyphAlpha32Over-8 5.23µs ± 1% 5.20µs ± 0% -0.58% (p=0.000 n=10+8) GlyphAlpha32Src-8 4.83µs ± 0% 4.81µs ± 1% -0.46% (p=0.001 n=10+9) GlyphAlpha64Over-8 10.2µs ± 0% 10.2µs ± 0% -0.21% (p=0.003 n=10+9) GlyphAlpha64Src-8 8.68µs ± 1% 8.62µs ± 0% -0.70% (p=0.000 n=10+9) GlyphAlpha128Over-8 24.2µs ± 1% 24.1µs ± 0% -0.58% (p=0.001 n=10+9) GlyphAlpha128Src-8 18.0µs ± 1% 17.9µs ± 0% -0.61% (p=0.001 n=10+9) GlyphAlpha256Over-8 70.3µs ± 0% 70.1µs ± 0% -0.37% (p=0.019 n=10+10) GlyphAlpha256Src-8 45.4µs ± 0% 45.2µs ± 1% -0.38% (p=0.041 n=8+10) GlyphRGBA16Over-8 5.14µs ± 0% 5.12µs ± 0% -0.43% (p=0.000 n=9+10) GlyphRGBA16Src-8 4.59µs ± 0% 4.57µs ± 1% -0.43% (p=0.005 n=9+9) GlyphRGBA32Over-8 12.2µs ± 1% 12.1µs ± 0% -0.70% (p=0.000 n=10+10) GlyphRGBA32Src-8 10.0µs ± 0% 10.0µs ± 0% ~ (p=0.092 n=7+10) GlyphRGBA64Over-8 37.5µs ± 1% 37.2µs ± 0% -0.75% (p=0.000 n=10+9) GlyphRGBA64Src-8 29.1µs ± 0% 29.0µs ± 1% ~ (p=0.243 n=10+9) GlyphRGBA128Over-8 135µs ± 0% 134µs ± 1% -0.72% (p=0.000 n=9+9) GlyphRGBA128Src-8 101µs ± 1% 100µs ± 1% ~ (p=0.197 n=10+10) GlyphRGBA256Over-8 511µs ± 0% 506µs ± 0% -0.97% (p=0.000 n=10+10) GlyphRGBA256Src-8 374µs ± 0% 373µs ± 0% -0.29% (p=0.002 n=10+10) Change-Id: Ic05a900935cb59e55711374db1e62b055d75c8e3 Reviewed-on: https://go-review.googlesource.com/31116 Reviewed-by: David Crawshaw --- vector/raster_fixed.go | 49 +++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/vector/raster_fixed.go b/vector/raster_fixed.go index 8a433c5..fe5f75a 100644 --- a/vector/raster_fixed.go +++ b/vector/raster_fixed.go @@ -20,7 +20,7 @@ const ( // When changing this number, also change the assembly code (search for ϕ // in the .s files). // - // TODO: drop ϕ from 10 to 9, so that ±1<<(3*ϕ+2) doesn't overflow an int32 + // TODO: drop ϕ from 10 to 9, so that ±1<<(3*ϕ+3) doesn't overflow an int32 // and we can therefore use int32 math instead of the slower int64 math in // Rasterizer.fixedLineTo below. ϕ = 10 @@ -228,26 +228,41 @@ func (z *Rasterizer) fixedLineTo(b f32.Vec2) { // = twoOverS<<ϕ - a1*twoOverS - (int1ϕ(x1i-x0i-3)<<(2*ϕ))*2 - x1f*x1f // = twoOverS<<ϕ - a1*twoOverS - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f // Substituting for a1, given above, yields: - // A = twoOverS<<ϕ - ((fxOneAndAHalf - x0f)<<ϕ)*2 - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f - // = twoOverS<<ϕ - (fxOneAndAHalf - x0f)<<(ϕ+1) - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f - // - // TODO: re-factor that equation some more: twoOverS equals - // 2*(x1-x0), so a substantial part of twoOverS<<ϕ and - // int1ϕ(x1i-x0i-3)<<(2*ϕ+1) should cancel each other out. - // Doing subtract-then-shift instead of shift-then-subtract - // could mean that we can use the faster int32 math, - // instead of int64, but still avoid overflow: - // A = B<<ϕ - x1f*x1f + // A = twoOverS<<ϕ - ((fxOneAndAHalf-x0f)<<ϕ)*2 - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f + // = twoOverS<<ϕ - (fxOneAndAHalf-x0f)<<(ϕ+1) - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f + // = B<<ϕ - x1f*x1f // where - // B = twoOverS - (fxOneAndAHalf - x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1) + // B = twoOverS - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1) + // = (x1-x0)<<1 - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1) + // + // Re-arranging the defintions given above: + // x0Floor := int1ϕ(x0i) << ϕ + // x0f := x0 - x0Floor + // x1Ceil := int1ϕ(x1i) << ϕ + // x1f := x1 - x1Ceil + fxOne + // combined with fxOne = 1<<ϕ yields: + // x0 = x0f + int1ϕ(x0i)<<ϕ + // x1 = x1f + int1ϕ(x1i-1)<<ϕ + // so that expanding (x1-x0) yields: + // B = (x1f-x0f + int1ϕ(x1i-x0i-1)<<ϕ)<<1 - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1) + // = (x1f-x0f)<<1 + int1ϕ(x1i-x0i-1)<<(ϕ+1) - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1) + // A large part of the second and fourth terms cancel: + // B = (x1f-x0f)<<1 - (fxOneAndAHalf-x0f)<<1 - int1ϕ(-2)<<(ϕ+1) + // = (x1f-x0f)<<1 - (fxOneAndAHalf-x0f)<<1 + 1<<(ϕ+2) + // = (x1f - fxOneAndAHalf)<<1 + 1<<(ϕ+2) + // The first term, (x1f - fxOneAndAHalf)<<1, is a negative + // number, bounded below by -fxOneAndAHalf<<1, which is + // greater than -fxOne<<2, or -1<<(ϕ+2). Thus, B ranges up + // to ±1<<(ϕ+2). One final simplification: + // B = x1f<<1 + (1<<(ϕ+2) - fxOneAndAHalf<<1) // // Convert to int64 to avoid overflow. Without that, // TestRasterizePolygon fails. - D := int64(twoOverS) << ϕ - D -= int64((fxOneAndAHalf - x0f)) << (ϕ + 1) - D -= int64((x1i - x0i - 3)) << (2*ϕ + 1) - D -= int64(x1fSquared) - D *= int64(d) + const C = 1<<(ϕ+2) - fxOneAndAHalf<<1 + D := int64(x1f<<1 + C) // D ranges up to ±1<<(1*ϕ+2). + D <<= ϕ // D ranges up to ±1<<(2*ϕ+2). + D -= int64(x1fSquared) // D ranges up to ±1<<(2*ϕ+3). + D *= int64(d) // D ranges up to ±1<<(3*ϕ+3). D /= int64(twoOverS) buf[i] += uint32(D) }