From f72412cfe3a8cdcc3cdaa89638a8f6f7def78584 Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Fri, 14 Oct 2016 17:12:05 +1100 Subject: [PATCH] vector: fix overflow when rasterizing a 30 degree line. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some obvious TODOs, but they will be follow-up commits. This is about correctness, not performance, but for the record: name old time/op new time/op delta GlyphAlpha16Over-8 3.16µs ± 0% 3.38µs ± 0% +6.96% (p=0.000 n=9+10) GlyphAlpha16Src-8 3.06µs ± 0% 3.28µs ± 0% +7.21% (p=0.000 n=10+10) GlyphAlpha32Over-8 4.92µs ± 1% 5.23µs ± 1% +6.24% (p=0.000 n=10+10) GlyphAlpha32Src-8 4.53µs ± 1% 4.83µs ± 0% +6.69% (p=0.000 n=10+10) GlyphAlpha64Over-8 9.60µs ± 1% 10.21µs ± 0% +6.36% (p=0.000 n=10+10) GlyphAlpha64Src-8 8.04µs ± 0% 8.68µs ± 1% +7.99% (p=0.000 n=9+10) GlyphAlpha128Over-8 23.1µs ± 0% 24.2µs ± 1% +5.08% (p=0.000 n=9+10) GlyphAlpha128Src-8 16.8µs ± 0% 18.0µs ± 1% +6.76% (p=0.000 n=10+10) GlyphAlpha256Over-8 68.6µs ± 1% 70.3µs ± 0% +2.50% (p=0.000 n=10+10) GlyphAlpha256Src-8 43.6µs ± 0% 45.4µs ± 0% +4.08% (p=0.000 n=10+8) GlyphRGBA16Over-8 4.92µs ± 0% 5.14µs ± 0% +4.48% (p=0.000 n=9+9) GlyphRGBA16Src-8 4.39µs ± 0% 4.59µs ± 0% +4.60% (p=0.000 n=8+9) GlyphRGBA32Over-8 11.8µs ± 0% 12.2µs ± 1% +2.89% (p=0.000 n=9+10) GlyphRGBA32Src-8 9.79µs ± 1% 10.03µs ± 0% +2.49% (p=0.000 n=10+7) GlyphRGBA64Over-8 36.7µs ± 1% 37.5µs ± 1% +2.23% (p=0.000 n=10+10) GlyphRGBA64Src-8 28.5µs ± 0% 29.1µs ± 0% +2.09% (p=0.000 n=10+10) GlyphRGBA128Over-8 133µs ± 0% 135µs ± 0% +1.51% (p=0.000 n=10+9) GlyphRGBA128Src-8 99.1µs ± 0% 100.5µs ± 1% +1.47% (p=0.000 n=9+10) GlyphRGBA256Over-8 505µs ± 0% 511µs ± 0% +1.18% (p=0.000 n=9+10) GlyphRGBA256Src-8 372µs ± 0% 374µs ± 0% +0.69% (p=0.000 n=9+10) Change-Id: Ice1d77de5bc2649f8cd88366bcae3c00e78d65c2 Reviewed-on: https://go-review.googlesource.com/31113 Reviewed-by: David Crawshaw --- vector/raster_fixed.go | 91 ++++++++++++++++++++++++++++++++++++------ vector/vector.go | 2 +- vector/vector_test.go | 56 ++++++++++++++++++++++++-- 3 files changed, 132 insertions(+), 17 deletions(-) diff --git a/vector/raster_fixed.go b/vector/raster_fixed.go index 7a8ea36..8a433c5 100644 --- a/vector/raster_fixed.go +++ b/vector/raster_fixed.go @@ -19,6 +19,10 @@ const ( // // When changing this number, also change the assembly code (search for ϕ // in the .s files). + // + // TODO: drop ϕ from 10 to 9, so that ±1<<(3*ϕ+2) doesn't overflow an int32 + // and we can therefore use int32 math instead of the slower int64 math in + // Rasterizer.fixedLineTo below. ϕ = 10 fxOne int1ϕ = 1 << ϕ @@ -93,7 +97,7 @@ func (z *Rasterizer) fixedLineTo(b f32.Vec2) { continue } buf := z.bufU32[y*width:] - d := dy * dir + d := dy * dir // d ranges up to ±1<<(1*ϕ). x0, x1 := x, xNext if x > xNext { x0, x1 = x1, x0 @@ -131,8 +135,8 @@ func (z *Rasterizer) fixedLineTo(b f32.Vec2) { if i := clamp(x0i, width); i < uint(len(buf)) { // In ideal math: buf[i] += uint32(d * a0) - D := oneMinusX0fSquared - D *= d + D := oneMinusX0fSquared // D ranges up to ±1<<(1*ϕ). + D *= d // D ranges up to ±1<<(2*ϕ). D /= twoOverS buf[i] += uint32(D) } @@ -140,9 +144,15 @@ func (z *Rasterizer) fixedLineTo(b f32.Vec2) { if x1i == x0i+2 { if i := clamp(x0i+1, width); i < uint(len(buf)) { // In ideal math: buf[i] += uint32(d * (fxOne - a0 - am)) - D := twoOverS<<ϕ - oneMinusX0fSquared - x1fSquared - D *= d - D /= twoOverS + // + // (x1i == x0i+2) and (twoOverS == 2 * (x1 - x0)) implies + // that int64(twoOverS) ranges up to +1<<(1*ϕ+2). + // + // Convert to int64 to avoid overflow. Without that, + // TestRasterize30Degrees fails. + D := int64(twoOverS<<ϕ - oneMinusX0fSquared - x1fSquared) // D ranges up to ±1<<(2*ϕ+2). + D *= int64(d) // D ranges up to ±1<<(3*ϕ+2). + D /= int64(twoOverS) buf[i] += uint32(D) } } else { @@ -151,12 +161,44 @@ func (z *Rasterizer) fixedLineTo(b f32.Vec2) { // a1 := ((fxOneAndAHalf - x0f) << ϕ) / oneOverS if i := clamp(x0i+1, width); i < uint(len(buf)) { - // In ideal math: buf[i] += uint32(d * (a1 - a0)) + // In ideal math: + // buf[i] += uint32(d * (a1 - a0)) + // or equivalently (but better in non-ideal, integer math, + // with respect to rounding errors), + // buf[i] += uint32(A * d / twoOverS) + // where + // A = (a1 - a0) * twoOverS + // = a1*twoOverS - a0*twoOverS + // Noting that twoOverS/oneOverS equals 2, substituting for + // a0 and then a1, given above, yields: + // A = a1*twoOverS - oneMinusX0fSquared + // = (fxOneAndAHalf-x0f)<<(ϕ+1) - oneMinusX0fSquared + // = fxOneAndAHalf<<(ϕ+1) - x0f<<(ϕ+1) - oneMinusX0fSquared + // + // This is a positive number minus two non-negative + // numbers. For an upper bound on A, the positive number is + // P = fxOneAndAHalf<<(ϕ+1) + // < (2*fxOne)<<(ϕ+1) + // = fxOne<<(ϕ+2) + // = 1<<(2*ϕ+2) + // + // For a lower bound on A, the two non-negative numbers are + // N = x0f<<(ϕ+1) + oneMinusX0fSquared + // ≤ x0f<<(ϕ+1) + fxOne*fxOne + // = x0f<<(ϕ+1) + 1<<(2*ϕ) + // < x0f<<(ϕ+1) + 1<<(2*ϕ+1) + // ≤ fxOne<<(ϕ+1) + 1<<(2*ϕ+1) + // = 1<<(2*ϕ+1) + 1<<(2*ϕ+1) + // = 1<<(2*ϕ+2) + // + // Thus, A ranges up to ±1<<(2*ϕ+2). It is possible to + // derive a tighter bound, but this bound is sufficient to + // reason about overflow. // // Convert to int64 to avoid overflow. Without that, // TestRasterizePolygon fails. - D := int64((fxOneAndAHalf-x0f)<<(ϕ+1) - oneMinusX0fSquared) - D *= int64(d) + D := int64((fxOneAndAHalf-x0f)<<(ϕ+1) - oneMinusX0fSquared) // D ranges up to ±1<<(2*ϕ+2). + D *= int64(d) // D ranges up to ±1<<(3*ϕ+2). D /= int64(twoOverS) buf[i] += uint32(D) } @@ -172,7 +214,32 @@ func (z *Rasterizer) fixedLineTo(b f32.Vec2) { // a2 := a1 + (int1ϕ(x1i-x0i-3)<<(2*ϕ))/oneOverS if i := clamp(x1i-1, width); i < uint(len(buf)) { - // In ideal math: buf[i] += uint32(d * (fxOne - a2 - am)) + // In ideal math: + // buf[i] += uint32(d * (fxOne - a2 - am)) + // or equivalently (but better in non-ideal, integer math, + // with respect to rounding errors), + // buf[i] += uint32(A * d / twoOverS) + // where + // A = (fxOne - a2 - am) * twoOverS + // = twoOverS<<ϕ - a2*twoOverS - am*twoOverS + // Noting that twoOverS/oneOverS equals 2, substituting for + // am and then a2, given above, yields: + // A = twoOverS<<ϕ - a2*twoOverS - x1f*x1f + // = twoOverS<<ϕ - a1*twoOverS - (int1ϕ(x1i-x0i-3)<<(2*ϕ))*2 - x1f*x1f + // = twoOverS<<ϕ - a1*twoOverS - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f + // Substituting for a1, given above, yields: + // A = twoOverS<<ϕ - ((fxOneAndAHalf - x0f)<<ϕ)*2 - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f + // = twoOverS<<ϕ - (fxOneAndAHalf - x0f)<<(ϕ+1) - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f + // + // TODO: re-factor that equation some more: twoOverS equals + // 2*(x1-x0), so a substantial part of twoOverS<<ϕ and + // int1ϕ(x1i-x0i-3)<<(2*ϕ+1) should cancel each other out. + // Doing subtract-then-shift instead of shift-then-subtract + // could mean that we can use the faster int32 math, + // instead of int64, but still avoid overflow: + // A = B<<ϕ - x1f*x1f + // where + // B = twoOverS - (fxOneAndAHalf - x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1) // // Convert to int64 to avoid overflow. Without that, // TestRasterizePolygon fails. @@ -188,8 +255,8 @@ func (z *Rasterizer) fixedLineTo(b f32.Vec2) { if i := clamp(x1i, width); i < uint(len(buf)) { // In ideal math: buf[i] += uint32(d * am) - D := x1fSquared - D *= d + D := x1fSquared // D ranges up to ±1<<(2*ϕ). + D *= d // D ranges up to ±1<<(3*ϕ). D /= twoOverS buf[i] += uint32(D) } diff --git a/vector/vector.go b/vector/vector.go index 9caccf8..8538bb9 100644 --- a/vector/vector.go +++ b/vector/vector.go @@ -30,7 +30,7 @@ import ( "golang.org/x/image/math/f32" ) -// floatingPointMathThreshold is the width or hight above which the rasterizer +// floatingPointMathThreshold is the width or height above which the rasterizer // chooses to used floating point math instead of fixed point math. // // Both implementations of line segmentation rasterization (see raster_fixed.go diff --git a/vector/vector_test.go b/vector/vector_test.go index 6e8a1d1..e2cbbdb 100644 --- a/vector/vector_test.go +++ b/vector/vector_test.go @@ -13,6 +13,7 @@ import ( "image/draw" "image/png" "math" + "math/rand" "os" "path/filepath" "testing" @@ -146,9 +147,60 @@ func TestRasterizeWideAlmostHorizontalLines(t *testing.T) { } } +func TestRasterize30Degrees(t *testing.T) { + z := NewRasterizer(8, 8) + z.MoveTo(f32.Vec2{4, 4}) + z.LineTo(f32.Vec2{8, 4}) + z.LineTo(f32.Vec2{4, 6}) + z.ClosePath() + + dst := image.NewAlpha(z.Bounds()) + z.Draw(dst, dst.Bounds(), image.Opaque, image.Point{}) + + if err := checkCornersCenter(dst); err != nil { + t.Error(err) + } +} + +func TestRasterizeRandomLineTos(t *testing.T) { + var z Rasterizer + for i := 5; i < 50; i++ { + n, rng := 0, rand.New(rand.NewSource(int64(i))) + + z.Reset(i+2, i+2) + z.MoveTo(f32.Vec2{float32(i / 2), float32(i / 2)}) + for ; rng.Intn(16) != 0; n++ { + x := 1 + rng.Intn(i) + y := 1 + rng.Intn(i) + z.LineTo(f32.Vec2{float32(x), float32(y)}) + } + z.ClosePath() + + dst := image.NewAlpha(z.Bounds()) + z.Draw(dst, dst.Bounds(), image.Opaque, image.Point{}) + + if err := checkCorners(dst); err != nil { + t.Errorf("i=%d (%d nodes): %v", i, n, err) + } + } +} + // checkCornersCenter checks that the corners of the image are all 0x00 and the // center is 0xff. func checkCornersCenter(m *image.Alpha) error { + if err := checkCorners(m); err != nil { + return err + } + size := m.Bounds().Size() + center := m.Pix[(size.Y/2)*m.Stride+(size.X/2)] + if center != 0xff { + return fmt.Errorf("center: got %#02x, want 0xff", center) + } + return nil +} + +// checkCorners checks that the corners of the image are all 0x00. +func checkCorners(m *image.Alpha) error { size := m.Bounds().Size() corners := [4]uint8{ m.Pix[(0*size.Y+0)*m.Stride+(0*size.X+0)], @@ -159,10 +211,6 @@ func checkCornersCenter(m *image.Alpha) error { if corners != [4]uint8{} { return fmt.Errorf("corners were not all zero: %v", corners) } - center := m.Pix[(size.Y/2)*m.Stride+(size.X/2)] - if center != 0xff { - return fmt.Errorf("center: got %#02x, want 0xff", center) - } return nil }