From e20db36d77bd0cb36cea8fe49d5c37d82d21591f Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Mon, 4 Sep 2017 01:25:37 +0100 Subject: [PATCH] vector: prevent fused multiply-add being used in floatingLineTo The test code now produces bit-identical results across platforms. Fixes golang/go#21460. Change-Id: I58b72dacf3c7930e2804b4e37bd3ccd9ef3dc2f0 Reviewed-on: https://go-review.googlesource.com/61024 Run-TryBot: Michael Munday TryBot-Result: Gobot Gobot Reviewed-by: Nigel Tao --- vector/raster_floating.go | 42 +++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/vector/raster_floating.go b/vector/raster_floating.go index 143b376..fd11db1 100644 --- a/vector/raster_floating.go +++ b/vector/raster_floating.go @@ -54,13 +54,25 @@ func (z *Rasterizer) floatingLineTo(bx, by float32) { for ; y < yMax; y++ { dy := floatingMin(float32(y+1), by) - floatingMax(float32(y), ay) - xNext := x + dy*dxdy + + // The "float32" in expressions like "float32(foo*bar)" here and below + // look redundant, since foo and bar already have type float32, but are + // explicit in order to disable the compiler's Fused Multiply Add (FMA) + // instruction selection, which can improve performance but can result + // in different rounding errors in floating point computations. + // + // This package aims to have bit-exact identical results across all + // GOARCHes, and across pure Go code and assembly, so it disables FMA. + // + // See the discussion at + // https://groups.google.com/d/topic/golang-dev/Sti0bl2xUXQ/discussion + xNext := x + float32(dy*dxdy) if y < 0 { x = xNext continue } buf := z.bufF32[y*width:] - d := dy * dir + d := float32(dy * dir) x0, x1 := x, xNext if x > xNext { x0, x1 = x1, x0 @@ -71,48 +83,48 @@ func (z *Rasterizer) floatingLineTo(bx, by float32) { x1Ceil := float32(x1i) if x1i <= x0i+1 { - xmf := 0.5*(x+xNext) - x0Floor + xmf := float32(0.5*(x+xNext)) - x0Floor if i := clamp(x0i+0, width); i < uint(len(buf)) { - buf[i] += d - d*xmf + buf[i] += d - float32(d*xmf) } if i := clamp(x0i+1, width); i < uint(len(buf)) { - buf[i] += d * xmf + buf[i] += float32(d * xmf) } } else { s := 1 / (x1 - x0) x0f := x0 - x0Floor oneMinusX0f := 1 - x0f - a0 := 0.5 * s * oneMinusX0f * oneMinusX0f + a0 := float32(0.5 * s * oneMinusX0f * oneMinusX0f) x1f := x1 - x1Ceil + 1 - am := 0.5 * s * x1f * x1f + am := float32(0.5 * s * x1f * x1f) if i := clamp(x0i, width); i < uint(len(buf)) { - buf[i] += d * a0 + buf[i] += float32(d * a0) } if x1i == x0i+2 { if i := clamp(x0i+1, width); i < uint(len(buf)) { - buf[i] += d * (1 - a0 - am) + buf[i] += float32(d * (1 - a0 - am)) } } else { - a1 := s * (1.5 - x0f) + a1 := float32(s * (1.5 - x0f)) if i := clamp(x0i+1, width); i < uint(len(buf)) { - buf[i] += d * (a1 - a0) + buf[i] += float32(d * (a1 - a0)) } - dTimesS := d * s + dTimesS := float32(d * s) for xi := x0i + 2; xi < x1i-1; xi++ { if i := clamp(xi, width); i < uint(len(buf)) { buf[i] += dTimesS } } - a2 := a1 + s*float32(x1i-x0i-3) + a2 := a1 + float32(s*float32(x1i-x0i-3)) if i := clamp(x1i-1, width); i < uint(len(buf)) { - buf[i] += d * (1 - a2 - am) + buf[i] += float32(d * (1 - a2 - am)) } } if i := clamp(x1i, width); i < uint(len(buf)) { - buf[i] += d * am + buf[i] += float32(d * am) } }