vector: prevent fused multiply-add being used in floatingLineTo

The test code now produces bit-identical results across platforms.

Fixes golang/go#21460.

Change-Id: I58b72dacf3c7930e2804b4e37bd3ccd9ef3dc2f0
Reviewed-on: https://go-review.googlesource.com/61024
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
This commit is contained in:
Michael Munday 2017-09-04 01:25:37 +01:00 committed by Nigel Tao
parent d1087bc575
commit e20db36d77

View File

@ -54,13 +54,25 @@ func (z *Rasterizer) floatingLineTo(bx, by float32) {
for ; y < yMax; y++ { for ; y < yMax; y++ {
dy := floatingMin(float32(y+1), by) - floatingMax(float32(y), ay) dy := floatingMin(float32(y+1), by) - floatingMax(float32(y), ay)
xNext := x + dy*dxdy
// The "float32" in expressions like "float32(foo*bar)" here and below
// look redundant, since foo and bar already have type float32, but are
// explicit in order to disable the compiler's Fused Multiply Add (FMA)
// instruction selection, which can improve performance but can result
// in different rounding errors in floating point computations.
//
// This package aims to have bit-exact identical results across all
// GOARCHes, and across pure Go code and assembly, so it disables FMA.
//
// See the discussion at
// https://groups.google.com/d/topic/golang-dev/Sti0bl2xUXQ/discussion
xNext := x + float32(dy*dxdy)
if y < 0 { if y < 0 {
x = xNext x = xNext
continue continue
} }
buf := z.bufF32[y*width:] buf := z.bufF32[y*width:]
d := dy * dir d := float32(dy * dir)
x0, x1 := x, xNext x0, x1 := x, xNext
if x > xNext { if x > xNext {
x0, x1 = x1, x0 x0, x1 = x1, x0
@ -71,48 +83,48 @@ func (z *Rasterizer) floatingLineTo(bx, by float32) {
x1Ceil := float32(x1i) x1Ceil := float32(x1i)
if x1i <= x0i+1 { if x1i <= x0i+1 {
xmf := 0.5*(x+xNext) - x0Floor xmf := float32(0.5*(x+xNext)) - x0Floor
if i := clamp(x0i+0, width); i < uint(len(buf)) { if i := clamp(x0i+0, width); i < uint(len(buf)) {
buf[i] += d - d*xmf buf[i] += d - float32(d*xmf)
} }
if i := clamp(x0i+1, width); i < uint(len(buf)) { if i := clamp(x0i+1, width); i < uint(len(buf)) {
buf[i] += d * xmf buf[i] += float32(d * xmf)
} }
} else { } else {
s := 1 / (x1 - x0) s := 1 / (x1 - x0)
x0f := x0 - x0Floor x0f := x0 - x0Floor
oneMinusX0f := 1 - x0f oneMinusX0f := 1 - x0f
a0 := 0.5 * s * oneMinusX0f * oneMinusX0f a0 := float32(0.5 * s * oneMinusX0f * oneMinusX0f)
x1f := x1 - x1Ceil + 1 x1f := x1 - x1Ceil + 1
am := 0.5 * s * x1f * x1f am := float32(0.5 * s * x1f * x1f)
if i := clamp(x0i, width); i < uint(len(buf)) { if i := clamp(x0i, width); i < uint(len(buf)) {
buf[i] += d * a0 buf[i] += float32(d * a0)
} }
if x1i == x0i+2 { if x1i == x0i+2 {
if i := clamp(x0i+1, width); i < uint(len(buf)) { if i := clamp(x0i+1, width); i < uint(len(buf)) {
buf[i] += d * (1 - a0 - am) buf[i] += float32(d * (1 - a0 - am))
} }
} else { } else {
a1 := s * (1.5 - x0f) a1 := float32(s * (1.5 - x0f))
if i := clamp(x0i+1, width); i < uint(len(buf)) { if i := clamp(x0i+1, width); i < uint(len(buf)) {
buf[i] += d * (a1 - a0) buf[i] += float32(d * (a1 - a0))
} }
dTimesS := d * s dTimesS := float32(d * s)
for xi := x0i + 2; xi < x1i-1; xi++ { for xi := x0i + 2; xi < x1i-1; xi++ {
if i := clamp(xi, width); i < uint(len(buf)) { if i := clamp(xi, width); i < uint(len(buf)) {
buf[i] += dTimesS buf[i] += dTimesS
} }
} }
a2 := a1 + s*float32(x1i-x0i-3) a2 := a1 + float32(s*float32(x1i-x0i-3))
if i := clamp(x1i-1, width); i < uint(len(buf)) { if i := clamp(x1i-1, width); i < uint(len(buf)) {
buf[i] += d * (1 - a2 - am) buf[i] += float32(d * (1 - a2 - am))
} }
} }
if i := clamp(x1i, width); i < uint(len(buf)) { if i := clamp(x1i, width); i < uint(len(buf)) {
buf[i] += d * am buf[i] += float32(d * am)
} }
} }