vector: fix overflow when rasterizing a 30 degree line.

There are some obvious TODOs, but they will be follow-up commits.

This is about correctness, not performance, but for the record:

name                              old time/op  new time/op   delta
GlyphAlpha16Over-8                3.16µs ± 0%   3.38µs ± 0%   +6.96%         (p=0.000 n=9+10)
GlyphAlpha16Src-8                 3.06µs ± 0%   3.28µs ± 0%   +7.21%        (p=0.000 n=10+10)
GlyphAlpha32Over-8                4.92µs ± 1%   5.23µs ± 1%   +6.24%        (p=0.000 n=10+10)
GlyphAlpha32Src-8                 4.53µs ± 1%   4.83µs ± 0%   +6.69%        (p=0.000 n=10+10)
GlyphAlpha64Over-8                9.60µs ± 1%  10.21µs ± 0%   +6.36%        (p=0.000 n=10+10)
GlyphAlpha64Src-8                 8.04µs ± 0%   8.68µs ± 1%   +7.99%         (p=0.000 n=9+10)
GlyphAlpha128Over-8               23.1µs ± 0%   24.2µs ± 1%   +5.08%         (p=0.000 n=9+10)
GlyphAlpha128Src-8                16.8µs ± 0%   18.0µs ± 1%   +6.76%        (p=0.000 n=10+10)
GlyphAlpha256Over-8               68.6µs ± 1%   70.3µs ± 0%   +2.50%        (p=0.000 n=10+10)
GlyphAlpha256Src-8                43.6µs ± 0%   45.4µs ± 0%   +4.08%         (p=0.000 n=10+8)
GlyphRGBA16Over-8                 4.92µs ± 0%   5.14µs ± 0%   +4.48%          (p=0.000 n=9+9)
GlyphRGBA16Src-8                  4.39µs ± 0%   4.59µs ± 0%   +4.60%          (p=0.000 n=8+9)
GlyphRGBA32Over-8                 11.8µs ± 0%   12.2µs ± 1%   +2.89%         (p=0.000 n=9+10)
GlyphRGBA32Src-8                  9.79µs ± 1%  10.03µs ± 0%   +2.49%         (p=0.000 n=10+7)
GlyphRGBA64Over-8                 36.7µs ± 1%   37.5µs ± 1%   +2.23%        (p=0.000 n=10+10)
GlyphRGBA64Src-8                  28.5µs ± 0%   29.1µs ± 0%   +2.09%        (p=0.000 n=10+10)
GlyphRGBA128Over-8                 133µs ± 0%    135µs ± 0%   +1.51%         (p=0.000 n=10+9)
GlyphRGBA128Src-8                 99.1µs ± 0%  100.5µs ± 1%   +1.47%         (p=0.000 n=9+10)
GlyphRGBA256Over-8                 505µs ± 0%    511µs ± 0%   +1.18%         (p=0.000 n=9+10)
GlyphRGBA256Src-8                  372µs ± 0%    374µs ± 0%   +0.69%         (p=0.000 n=9+10)

Change-Id: Ice1d77de5bc2649f8cd88366bcae3c00e78d65c2
Reviewed-on: https://go-review.googlesource.com/31113
Reviewed-by: David Crawshaw <crawshaw@golang.org>
This commit is contained in:
Nigel Tao 2016-10-14 17:12:05 +11:00
parent e78c45720c
commit f72412cfe3
3 changed files with 132 additions and 17 deletions

View File

@ -19,6 +19,10 @@ const (
// //
// When changing this number, also change the assembly code (search for ϕ // When changing this number, also change the assembly code (search for ϕ
// in the .s files). // in the .s files).
//
// TODO: drop ϕ from 10 to 9, so that ±1<<(3*ϕ+2) doesn't overflow an int32
// and we can therefore use int32 math instead of the slower int64 math in
// Rasterizer.fixedLineTo below.
ϕ = 10 ϕ = 10
fxOne int1ϕ = 1 << ϕ fxOne int1ϕ = 1 << ϕ
@ -93,7 +97,7 @@ func (z *Rasterizer) fixedLineTo(b f32.Vec2) {
continue continue
} }
buf := z.bufU32[y*width:] buf := z.bufU32[y*width:]
d := dy * dir d := dy * dir // d ranges up to ±1<<(1*ϕ).
x0, x1 := x, xNext x0, x1 := x, xNext
if x > xNext { if x > xNext {
x0, x1 = x1, x0 x0, x1 = x1, x0
@ -131,8 +135,8 @@ func (z *Rasterizer) fixedLineTo(b f32.Vec2) {
if i := clamp(x0i, width); i < uint(len(buf)) { if i := clamp(x0i, width); i < uint(len(buf)) {
// In ideal math: buf[i] += uint32(d * a0) // In ideal math: buf[i] += uint32(d * a0)
D := oneMinusX0fSquared D := oneMinusX0fSquared // D ranges up to ±1<<(1*ϕ).
D *= d D *= d // D ranges up to ±1<<(2*ϕ).
D /= twoOverS D /= twoOverS
buf[i] += uint32(D) buf[i] += uint32(D)
} }
@ -140,9 +144,15 @@ func (z *Rasterizer) fixedLineTo(b f32.Vec2) {
if x1i == x0i+2 { if x1i == x0i+2 {
if i := clamp(x0i+1, width); i < uint(len(buf)) { if i := clamp(x0i+1, width); i < uint(len(buf)) {
// In ideal math: buf[i] += uint32(d * (fxOne - a0 - am)) // In ideal math: buf[i] += uint32(d * (fxOne - a0 - am))
D := twoOverS<<ϕ - oneMinusX0fSquared - x1fSquared //
D *= d // (x1i == x0i+2) and (twoOverS == 2 * (x1 - x0)) implies
D /= twoOverS // that int64(twoOverS) ranges up to +1<<(1*ϕ+2).
//
// Convert to int64 to avoid overflow. Without that,
// TestRasterize30Degrees fails.
D := int64(twoOverS<<ϕ - oneMinusX0fSquared - x1fSquared) // D ranges up to ±1<<(2*ϕ+2).
D *= int64(d) // D ranges up to ±1<<(3*ϕ+2).
D /= int64(twoOverS)
buf[i] += uint32(D) buf[i] += uint32(D)
} }
} else { } else {
@ -151,12 +161,44 @@ func (z *Rasterizer) fixedLineTo(b f32.Vec2) {
// a1 := ((fxOneAndAHalf - x0f) << ϕ) / oneOverS // a1 := ((fxOneAndAHalf - x0f) << ϕ) / oneOverS
if i := clamp(x0i+1, width); i < uint(len(buf)) { if i := clamp(x0i+1, width); i < uint(len(buf)) {
// In ideal math: buf[i] += uint32(d * (a1 - a0)) // In ideal math:
// buf[i] += uint32(d * (a1 - a0))
// or equivalently (but better in non-ideal, integer math,
// with respect to rounding errors),
// buf[i] += uint32(A * d / twoOverS)
// where
// A = (a1 - a0) * twoOverS
// = a1*twoOverS - a0*twoOverS
// Noting that twoOverS/oneOverS equals 2, substituting for
// a0 and then a1, given above, yields:
// A = a1*twoOverS - oneMinusX0fSquared
// = (fxOneAndAHalf-x0f)<<(ϕ+1) - oneMinusX0fSquared
// = fxOneAndAHalf<<(ϕ+1) - x0f<<(ϕ+1) - oneMinusX0fSquared
//
// This is a positive number minus two non-negative
// numbers. For an upper bound on A, the positive number is
// P = fxOneAndAHalf<<(ϕ+1)
// < (2*fxOne)<<(ϕ+1)
// = fxOne<<(ϕ+2)
// = 1<<(2*ϕ+2)
//
// For a lower bound on A, the two non-negative numbers are
// N = x0f<<(ϕ+1) + oneMinusX0fSquared
// ≤ x0f<<(ϕ+1) + fxOne*fxOne
// = x0f<<(ϕ+1) + 1<<(2*ϕ)
// < x0f<<(ϕ+1) + 1<<(2*ϕ+1)
// ≤ fxOne<<(ϕ+1) + 1<<(2*ϕ+1)
// = 1<<(2*ϕ+1) + 1<<(2*ϕ+1)
// = 1<<(2*ϕ+2)
//
// Thus, A ranges up to ±1<<(2*ϕ+2). It is possible to
// derive a tighter bound, but this bound is sufficient to
// reason about overflow.
// //
// Convert to int64 to avoid overflow. Without that, // Convert to int64 to avoid overflow. Without that,
// TestRasterizePolygon fails. // TestRasterizePolygon fails.
D := int64((fxOneAndAHalf-x0f)<<(ϕ+1) - oneMinusX0fSquared) D := int64((fxOneAndAHalf-x0f)<<(ϕ+1) - oneMinusX0fSquared) // D ranges up to ±1<<(2*ϕ+2).
D *= int64(d) D *= int64(d) // D ranges up to ±1<<(3*ϕ+2).
D /= int64(twoOverS) D /= int64(twoOverS)
buf[i] += uint32(D) buf[i] += uint32(D)
} }
@ -172,7 +214,32 @@ func (z *Rasterizer) fixedLineTo(b f32.Vec2) {
// a2 := a1 + (int1ϕ(x1i-x0i-3)<<(2*ϕ))/oneOverS // a2 := a1 + (int1ϕ(x1i-x0i-3)<<(2*ϕ))/oneOverS
if i := clamp(x1i-1, width); i < uint(len(buf)) { if i := clamp(x1i-1, width); i < uint(len(buf)) {
// In ideal math: buf[i] += uint32(d * (fxOne - a2 - am)) // In ideal math:
// buf[i] += uint32(d * (fxOne - a2 - am))
// or equivalently (but better in non-ideal, integer math,
// with respect to rounding errors),
// buf[i] += uint32(A * d / twoOverS)
// where
// A = (fxOne - a2 - am) * twoOverS
// = twoOverS<<ϕ - a2*twoOverS - am*twoOverS
// Noting that twoOverS/oneOverS equals 2, substituting for
// am and then a2, given above, yields:
// A = twoOverS<<ϕ - a2*twoOverS - x1f*x1f
// = twoOverS<<ϕ - a1*twoOverS - (int1ϕ(x1i-x0i-3)<<(2*ϕ))*2 - x1f*x1f
// = twoOverS<<ϕ - a1*twoOverS - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f
// Substituting for a1, given above, yields:
// A = twoOverS<<ϕ - ((fxOneAndAHalf - x0f)<<ϕ)*2 - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f
// = twoOverS<<ϕ - (fxOneAndAHalf - x0f)<<(ϕ+1) - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f
//
// TODO: re-factor that equation some more: twoOverS equals
// 2*(x1-x0), so a substantial part of twoOverS<<ϕ and
// int1ϕ(x1i-x0i-3)<<(2*ϕ+1) should cancel each other out.
// Doing subtract-then-shift instead of shift-then-subtract
// could mean that we can use the faster int32 math,
// instead of int64, but still avoid overflow:
// A = B<<ϕ - x1f*x1f
// where
// B = twoOverS - (fxOneAndAHalf - x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1)
// //
// Convert to int64 to avoid overflow. Without that, // Convert to int64 to avoid overflow. Without that,
// TestRasterizePolygon fails. // TestRasterizePolygon fails.
@ -188,8 +255,8 @@ func (z *Rasterizer) fixedLineTo(b f32.Vec2) {
if i := clamp(x1i, width); i < uint(len(buf)) { if i := clamp(x1i, width); i < uint(len(buf)) {
// In ideal math: buf[i] += uint32(d * am) // In ideal math: buf[i] += uint32(d * am)
D := x1fSquared D := x1fSquared // D ranges up to ±1<<(2*ϕ).
D *= d D *= d // D ranges up to ±1<<(3*ϕ).
D /= twoOverS D /= twoOverS
buf[i] += uint32(D) buf[i] += uint32(D)
} }

View File

@ -30,7 +30,7 @@ import (
"golang.org/x/image/math/f32" "golang.org/x/image/math/f32"
) )
// floatingPointMathThreshold is the width or hight above which the rasterizer // floatingPointMathThreshold is the width or height above which the rasterizer
// chooses to used floating point math instead of fixed point math. // chooses to used floating point math instead of fixed point math.
// //
// Both implementations of line segmentation rasterization (see raster_fixed.go // Both implementations of line segmentation rasterization (see raster_fixed.go

View File

@ -13,6 +13,7 @@ import (
"image/draw" "image/draw"
"image/png" "image/png"
"math" "math"
"math/rand"
"os" "os"
"path/filepath" "path/filepath"
"testing" "testing"
@ -146,9 +147,60 @@ func TestRasterizeWideAlmostHorizontalLines(t *testing.T) {
} }
} }
func TestRasterize30Degrees(t *testing.T) {
z := NewRasterizer(8, 8)
z.MoveTo(f32.Vec2{4, 4})
z.LineTo(f32.Vec2{8, 4})
z.LineTo(f32.Vec2{4, 6})
z.ClosePath()
dst := image.NewAlpha(z.Bounds())
z.Draw(dst, dst.Bounds(), image.Opaque, image.Point{})
if err := checkCornersCenter(dst); err != nil {
t.Error(err)
}
}
func TestRasterizeRandomLineTos(t *testing.T) {
var z Rasterizer
for i := 5; i < 50; i++ {
n, rng := 0, rand.New(rand.NewSource(int64(i)))
z.Reset(i+2, i+2)
z.MoveTo(f32.Vec2{float32(i / 2), float32(i / 2)})
for ; rng.Intn(16) != 0; n++ {
x := 1 + rng.Intn(i)
y := 1 + rng.Intn(i)
z.LineTo(f32.Vec2{float32(x), float32(y)})
}
z.ClosePath()
dst := image.NewAlpha(z.Bounds())
z.Draw(dst, dst.Bounds(), image.Opaque, image.Point{})
if err := checkCorners(dst); err != nil {
t.Errorf("i=%d (%d nodes): %v", i, n, err)
}
}
}
// checkCornersCenter checks that the corners of the image are all 0x00 and the // checkCornersCenter checks that the corners of the image are all 0x00 and the
// center is 0xff. // center is 0xff.
func checkCornersCenter(m *image.Alpha) error { func checkCornersCenter(m *image.Alpha) error {
if err := checkCorners(m); err != nil {
return err
}
size := m.Bounds().Size()
center := m.Pix[(size.Y/2)*m.Stride+(size.X/2)]
if center != 0xff {
return fmt.Errorf("center: got %#02x, want 0xff", center)
}
return nil
}
// checkCorners checks that the corners of the image are all 0x00.
func checkCorners(m *image.Alpha) error {
size := m.Bounds().Size() size := m.Bounds().Size()
corners := [4]uint8{ corners := [4]uint8{
m.Pix[(0*size.Y+0)*m.Stride+(0*size.X+0)], m.Pix[(0*size.Y+0)*m.Stride+(0*size.X+0)],
@ -159,10 +211,6 @@ func checkCornersCenter(m *image.Alpha) error {
if corners != [4]uint8{} { if corners != [4]uint8{} {
return fmt.Errorf("corners were not all zero: %v", corners) return fmt.Errorf("corners were not all zero: %v", corners)
} }
center := m.Pix[(size.Y/2)*m.Stride+(size.X/2)]
if center != 0xff {
return fmt.Errorf("center: got %#02x, want 0xff", center)
}
return nil return nil
} }