Pre-calculate start offset, optimize bound check

Increased performance by 30% for RGBA and 45% for Gray images, minor
performance increase for 16-bit images.

The start offset calculated by createWeights are stored in a slice and
passed to the resize functions to prevent duplication of effort.
This commit is contained in:
Charlie Vieth 2014-07-29 16:53:35 -04:00
parent be07665345
commit 80b3fc2b3f
3 changed files with 146 additions and 125 deletions

View File

@ -43,33 +43,36 @@ func clampUint16(in int64) uint16 {
return uint16(in)
}
func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []int32, filterLength int) {
func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) {
oldBounds := in.Bounds()
newBounds := out.Bounds()
for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X)
start := int(interpX) - filterLength/2 + 1
var rgba [4]int64
var sum int64
start := offset[y]
ci := (y - newBounds.Min.Y) * filterLength
for i := 0; i < filterLength; i++ {
xx := start + i
if xx < oldBounds.Min.X {
xx = oldBounds.Min.X
} else if xx >= oldBounds.Max.X {
xx = oldBounds.Max.X - 1
coeff := coeffs[ci+i]
if coeff != 0 {
xi := start + i
switch {
case uint(xi) < uint(oldBounds.Max.X):
break
case xi >= oldBounds.Max.X:
xi = oldBounds.Min.X
default:
xi = oldBounds.Max.X - 1
}
coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i]
r, g, b, a := in.At(xx, x).RGBA()
r, g, b, a := in.At(xi, x).RGBA()
rgba[0] += int64(coeff) * int64(r)
rgba[1] += int64(coeff) * int64(g)
rgba[2] += int64(coeff) * int64(b)
rgba[3] += int64(coeff) * int64(a)
sum += int64(coeff)
}
}
offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8
value := clampUint16(rgba[0] / sum)
@ -88,115 +91,127 @@ func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []in
}
}
func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16, filterLength int) {
func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16, offset []int, filterLength int) {
oldBounds := in.Bounds()
newBounds := out.Bounds()
minX := oldBounds.Min.X * 4
maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 4
for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:]
for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X)
start := int(interpX) - filterLength/2 + 1
var rgba [4]int32
var sum int32
start := offset[y]
ci := (y - newBounds.Min.Y) * filterLength
for i := 0; i < filterLength; i++ {
xx := start + i
if xx < oldBounds.Min.X {
xx = oldBounds.Min.X
} else if xx >= oldBounds.Max.X {
xx = oldBounds.Max.X - 1
coeff := coeffs[ci+i]
if coeff != 0 {
xi := start + i
switch {
case uint(xi) < uint(oldBounds.Max.X):
xi *= 4
case xi >= oldBounds.Max.X:
xi = maxX
default:
xi = minX
}
coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i]
offset := (xx - oldBounds.Min.X) * 4
rgba[0] += int32(coeff) * int32(row[offset+0])
rgba[1] += int32(coeff) * int32(row[offset+1])
rgba[2] += int32(coeff) * int32(row[offset+2])
rgba[3] += int32(coeff) * int32(row[offset+3])
rgba[0] += int32(coeff) * int32(row[xi+0])
rgba[1] += int32(coeff) * int32(row[xi+1])
rgba[2] += int32(coeff) * int32(row[xi+2])
rgba[3] += int32(coeff) * int32(row[xi+3])
sum += int32(coeff)
}
}
offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4
out.Pix[offset+0] = clampUint8(rgba[0] / sum)
out.Pix[offset+1] = clampUint8(rgba[1] / sum)
out.Pix[offset+2] = clampUint8(rgba[2] / sum)
out.Pix[offset+3] = clampUint8(rgba[3] / sum)
xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4
out.Pix[xo+0] = clampUint8(rgba[0] / sum)
out.Pix[xo+1] = clampUint8(rgba[1] / sum)
out.Pix[xo+2] = clampUint8(rgba[2] / sum)
out.Pix[xo+3] = clampUint8(rgba[3] / sum)
}
}
}
func resizeRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []int32, filterLength int) {
func resizeRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) {
oldBounds := in.Bounds()
newBounds := out.Bounds()
minX := oldBounds.Min.X * 8
maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 8
for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:]
for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X)
start := int(interpX) - filterLength/2 + 1
var rgba [4]int64
var sum int64
start := offset[y]
ci := (y - newBounds.Min.Y) * filterLength
for i := 0; i < filterLength; i++ {
xx := start + i
if xx < oldBounds.Min.X {
xx = oldBounds.Min.X
} else if xx >= oldBounds.Max.X {
xx = oldBounds.Max.X - 1
coeff := coeffs[ci+i]
if coeff != 0 {
xi := start + i
switch {
case uint(xi) < uint(oldBounds.Max.X):
xi *= 8
case xi >= oldBounds.Max.X:
xi = maxX
default:
xi = minX
}
coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i]
offset := (xx - oldBounds.Min.X) * 8
rgba[0] += int64(coeff) * int64(uint16(row[offset+0])<<8|uint16(row[offset+1]))
rgba[1] += int64(coeff) * int64(uint16(row[offset+2])<<8|uint16(row[offset+3]))
rgba[2] += int64(coeff) * int64(uint16(row[offset+4])<<8|uint16(row[offset+5]))
rgba[3] += int64(coeff) * int64(uint16(row[offset+6])<<8|uint16(row[offset+7]))
rgba[0] += int64(coeff) * int64(uint16(row[xi+0])<<8|uint16(row[xi+1]))
rgba[1] += int64(coeff) * int64(uint16(row[xi+2])<<8|uint16(row[xi+3]))
rgba[2] += int64(coeff) * int64(uint16(row[xi+4])<<8|uint16(row[xi+5]))
rgba[3] += int64(coeff) * int64(uint16(row[xi+6])<<8|uint16(row[xi+7]))
sum += int64(coeff)
}
}
offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8
xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8
value := clampUint16(rgba[0] / sum)
out.Pix[offset+0] = uint8(value >> 8)
out.Pix[offset+1] = uint8(value)
out.Pix[xo+0] = uint8(value >> 8)
out.Pix[xo+1] = uint8(value)
value = clampUint16(rgba[1] / sum)
out.Pix[offset+2] = uint8(value >> 8)
out.Pix[offset+3] = uint8(value)
out.Pix[xo+2] = uint8(value >> 8)
out.Pix[xo+3] = uint8(value)
value = clampUint16(rgba[2] / sum)
out.Pix[offset+4] = uint8(value >> 8)
out.Pix[offset+5] = uint8(value)
out.Pix[xo+4] = uint8(value >> 8)
out.Pix[xo+5] = uint8(value)
value = clampUint16(rgba[3] / sum)
out.Pix[offset+6] = uint8(value >> 8)
out.Pix[offset+7] = uint8(value)
out.Pix[xo+6] = uint8(value >> 8)
out.Pix[xo+7] = uint8(value)
}
}
}
func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16, filterLength int) {
func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16, offset []int, filterLength int) {
oldBounds := in.Bounds()
newBounds := out.Bounds()
minX := oldBounds.Min.X
maxX := (oldBounds.Max.X - oldBounds.Min.X - 1)
for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:]
for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X)
start := int(interpX) - filterLength/2 + 1
var gray int32
var sum int32
start := offset[y]
ci := (y - newBounds.Min.Y) * filterLength
for i := 0; i < filterLength; i++ {
xx := start + i
if xx < oldBounds.Min.X {
xx = oldBounds.Min.X
} else if xx >= oldBounds.Max.X {
xx = oldBounds.Max.X - 1
coeff := coeffs[ci+i]
if coeff != 0 {
xi := start + i
switch {
case uint(xi) < uint(oldBounds.Max.X):
break
case xi >= oldBounds.Max.X:
xi = maxX
default:
xi = minX
}
coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i]
offset := (xx - oldBounds.Min.X)
gray += int32(coeff) * int32(row[offset])
gray += int32(coeff) * int32(row[xi])
sum += int32(coeff)
}
}
offset := (y-newBounds.Min.Y)*out.Stride + (x - newBounds.Min.X)
out.Pix[offset] = clampUint8(gray / sum)
@ -204,31 +219,35 @@ func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16,
}
}
func resizeGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []int32, filterLength int) {
func resizeGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []int32, offset []int, filterLength int) {
oldBounds := in.Bounds()
newBounds := out.Bounds()
minX := oldBounds.Min.X * 2
maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 2
for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:]
for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X)
start := int(interpX) - filterLength/2 + 1
var gray int64
var sum int64
start := offset[y]
ci := (y - newBounds.Min.Y) * filterLength
for i := 0; i < filterLength; i++ {
xx := start + i
if xx < oldBounds.Min.X {
xx = oldBounds.Min.X
} else if xx >= oldBounds.Max.X {
xx = oldBounds.Max.X - 1
coeff := coeffs[ci+i]
if coeff != 0 {
xi := start + i
switch {
case uint(xi) < uint(oldBounds.Max.X):
xi *= 2
case xi >= oldBounds.Max.X:
xi = maxX
default:
xi = minX
}
coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i]
offset := (xx - oldBounds.Min.X) * 2
gray += int64(coeff) * int64(uint16(row[offset+0])<<8|uint16(row[offset+1]))
gray += int64(coeff) * int64(uint16(row[xi+0])<<8|uint16(row[xi+1]))
sum += int64(coeff)
}
}
offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*2
value := clampUint16(gray / sum)

View File

@ -80,37 +80,39 @@ func lanczos3(in float64) float64 {
}
// range [-256,256]
func createWeights8(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int16, int) {
func createWeights8(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int16, []int, int) {
filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1))
filterFactor := math.Min(1./(blur*scale), 1)
coeffs := make([]int16, dy*filterLength)
start := make([]int, dy)
for y := 0; y < dy; y++ {
interpX := scale*(float64(y)+0.5) + float64(minx)
start := int(interpX) - filterLength/2 + 1
start[y] = int(interpX) - filterLength/2 + 1
for i := 0; i < filterLength; i++ {
in := (interpX - float64(start) - float64(i)) * filterFactor
in := (interpX - float64(start[y]) - float64(i)) * filterFactor
coeffs[y*filterLength+i] = int16(kernel(in) * 256)
}
}
return coeffs, filterLength
return coeffs, start, filterLength
}
// range [-65536,65536]
func createWeights16(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int32, int) {
func createWeights16(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int32, []int, int) {
filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1))
filterFactor := math.Min(1./(blur*scale), 1)
coeffs := make([]int32, dy*filterLength)
start := make([]int, dy)
for y := 0; y < dy; y++ {
interpX := scale*(float64(y)+0.5) + float64(minx)
start := int(interpX) - filterLength/2 + 1
start[y] = int(interpX) - filterLength/2 + 1
for i := 0; i < filterLength; i++ {
in := (interpX - float64(start) - float64(i)) * filterFactor
in := (interpX - float64(start[y]) - float64(i)) * filterFactor
coeffs[y*filterLength+i] = int32(kernel(in) * 65536)
}
}
return coeffs, filterLength
return coeffs, start, filterLength
}

View File

@ -95,25 +95,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
result := image.NewRGBA(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.RGBA)
go func() {
defer wg.Done()
resizeRGBA(input, slice, scaleX, coeffs, filterLength)
resizeRGBA(input, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.RGBA)
go func() {
defer wg.Done()
resizeRGBA(temp, slice, scaleY, coeffs, filterLength)
resizeRGBA(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
@ -127,25 +127,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
result := image.NewRGBA(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.RGBA)
go func() {
defer wg.Done()
resizeRGBA(inputAsRGBA, slice, scaleX, coeffs, filterLength)
resizeRGBA(inputAsRGBA, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.RGBA)
go func() {
defer wg.Done()
resizeRGBA(temp, slice, scaleY, coeffs, filterLength)
resizeRGBA(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
@ -156,25 +156,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.RGBA64)
go func() {
defer wg.Done()
resizeRGBA64(input, slice, scaleX, coeffs, filterLength)
resizeRGBA64(input, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.RGBA64)
go func() {
defer wg.Done()
resizeGeneric(temp, slice, scaleY, coeffs, filterLength)
resizeGeneric(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
@ -185,25 +185,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
result := image.NewGray(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.Gray)
go func() {
defer wg.Done()
resizeGray(input, slice, scaleX, coeffs, filterLength)
resizeGray(input, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.Gray)
go func() {
defer wg.Done()
resizeGray(temp, slice, scaleY, coeffs, filterLength)
resizeGray(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
@ -214,25 +214,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
result := image.NewGray16(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.Gray16)
go func() {
defer wg.Done()
resizeGray16(input, slice, scaleX, coeffs, filterLength)
resizeGray16(input, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.Gray16)
go func() {
defer wg.Done()
resizeGray16(temp, slice, scaleY, coeffs, filterLength)
resizeGray16(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
@ -243,25 +243,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, filterLength := createWeights16(temp.Bounds().Dy(), img.Bounds().Min.X, taps, blur, scaleX, kernel)
coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), img.Bounds().Min.X, taps, blur, scaleX, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.RGBA64)
go func() {
defer wg.Done()
resizeGeneric(img, slice, scaleX, coeffs, filterLength)
resizeGeneric(img, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.RGBA64)
go func() {
defer wg.Done()
resizeRGBA64(temp, slice, scaleY, coeffs, filterLength)
resizeRGBA64(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()