Optimized Nearest-Neighbor function - 2x faster

This commit is contained in:
Charlie Vieth 2014-07-29 18:32:58 -04:00
parent 80b3fc2b3f
commit 427b8d133e
4 changed files with 520 additions and 31 deletions

View File

@ -89,8 +89,9 @@ func createWeights8(dy, minx, filterLength int, blur, scale float64, kernel func
for y := 0; y < dy; y++ { for y := 0; y < dy; y++ {
interpX := scale*(float64(y)+0.5) + float64(minx) interpX := scale*(float64(y)+0.5) + float64(minx)
start[y] = int(interpX) - filterLength/2 + 1 start[y] = int(interpX) - filterLength/2 + 1
interpX -= float64(start[y])
for i := 0; i < filterLength; i++ { for i := 0; i < filterLength; i++ {
in := (interpX - float64(start[y]) - float64(i)) * filterFactor in := (interpX - float64(i)) * filterFactor
coeffs[y*filterLength+i] = int16(kernel(in) * 256) coeffs[y*filterLength+i] = int16(kernel(in) * 256)
} }
} }
@ -108,11 +109,35 @@ func createWeights16(dy, minx, filterLength int, blur, scale float64, kernel fun
for y := 0; y < dy; y++ { for y := 0; y < dy; y++ {
interpX := scale*(float64(y)+0.5) + float64(minx) interpX := scale*(float64(y)+0.5) + float64(minx)
start[y] = int(interpX) - filterLength/2 + 1 start[y] = int(interpX) - filterLength/2 + 1
interpX -= float64(start[y])
for i := 0; i < filterLength; i++ { for i := 0; i < filterLength; i++ {
in := (interpX - float64(start[y]) - float64(i)) * filterFactor in := (interpX - float64(i)) * filterFactor
coeffs[y*filterLength+i] = int32(kernel(in) * 65536) coeffs[y*filterLength+i] = int32(kernel(in) * 65536)
} }
} }
return coeffs, start, filterLength return coeffs, start, filterLength
} }
func createWeightsNearest(dy, minx, filterLength int, blur, scale float64) ([]bool, []int, int) {
filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1))
filterFactor := math.Min(1./(blur*scale), 1)
coeffs := make([]bool, dy*filterLength)
start := make([]int, dy)
for y := 0; y < dy; y++ {
interpX := scale*(float64(y)+0.5) + float64(minx)
start[y] = int(interpX) - filterLength/2 + 1
interpX -= float64(start[y])
for i := 0; i < filterLength; i++ {
in := (interpX - float64(i)) * filterFactor
if in >= -0.5 && in < 0.5 {
coeffs[y*filterLength+i] = true
} else {
coeffs[y*filterLength+i] = false
}
}
}
return coeffs, start, filterLength
}

228
nearest.go Normal file
View File

@ -0,0 +1,228 @@
package resize
import "image"
func floatToUint8(x float32) uint8 {
// Nearest-neighbor values are always
// positive no need to check lower-bound.
if x > 0xfe {
return 0xff
}
return uint8(x)
}
func floatToUint16(x float32) uint16 {
if x > 0xfffe {
return 0xffff
}
return uint16(x)
}
func nearestGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []bool, offset []int, filterLength int) {
oldBounds := in.Bounds()
newBounds := out.Bounds()
for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
var rgba [4]float32
var sum float32
start := offset[y]
ci := (y - newBounds.Min.Y) * filterLength
for i := 0; i < filterLength; i++ {
if coeffs[ci+i] {
xi := start + i
switch {
case uint(xi) < uint(oldBounds.Max.X):
break
case xi >= oldBounds.Max.X:
xi = oldBounds.Min.X
default:
xi = oldBounds.Max.X - 1
}
r, g, b, a := in.At(xi, x).RGBA()
rgba[0] += float32(r)
rgba[1] += float32(g)
rgba[2] += float32(b)
rgba[3] += float32(a)
sum++
}
}
offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8
value := floatToUint16(rgba[0] / sum)
out.Pix[offset+0] = uint8(value >> 8)
out.Pix[offset+1] = uint8(value)
value = floatToUint16(rgba[1] / sum)
out.Pix[offset+2] = uint8(value >> 8)
out.Pix[offset+3] = uint8(value)
value = floatToUint16(rgba[2] / sum)
out.Pix[offset+4] = uint8(value >> 8)
out.Pix[offset+5] = uint8(value)
value = floatToUint16(rgba[3] / sum)
out.Pix[offset+6] = uint8(value >> 8)
out.Pix[offset+7] = uint8(value)
}
}
}
func nearestRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []bool, offset []int, filterLength int) {
oldBounds := in.Bounds()
newBounds := out.Bounds()
minX := oldBounds.Min.X * 4
maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 4
for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:]
for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
var rgba [4]float32
var sum float32
start := offset[y]
ci := (y - newBounds.Min.Y) * filterLength
for i := 0; i < filterLength; i++ {
if coeffs[ci+i] {
xi := start + i
switch {
case uint(xi) < uint(oldBounds.Max.X):
xi *= 4
case xi >= oldBounds.Max.X:
xi = maxX
default:
xi = minX
}
rgba[0] += float32(row[xi+0])
rgba[1] += float32(row[xi+1])
rgba[2] += float32(row[xi+2])
rgba[3] += float32(row[xi+3])
sum++
}
}
xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4
out.Pix[xo+0] = floatToUint8(rgba[0] / sum)
out.Pix[xo+1] = floatToUint8(rgba[1] / sum)
out.Pix[xo+2] = floatToUint8(rgba[2] / sum)
out.Pix[xo+3] = floatToUint8(rgba[3] / sum)
}
}
}
func nearestRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []bool, offset []int, filterLength int) {
oldBounds := in.Bounds()
newBounds := out.Bounds()
minX := oldBounds.Min.X * 8
maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 8
for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:]
for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
var rgba [4]float32
var sum float32
start := offset[y]
ci := (y - newBounds.Min.Y) * filterLength
for i := 0; i < filterLength; i++ {
if coeffs[ci+i] {
xi := start + i
switch {
case uint(xi) < uint(oldBounds.Max.X):
xi *= 8
case xi >= oldBounds.Max.X:
xi = maxX
default:
xi = minX
}
rgba[0] += float32(uint16(row[xi+0])<<8 | uint16(row[xi+1]))
rgba[1] += float32(uint16(row[xi+2])<<8 | uint16(row[xi+3]))
rgba[2] += float32(uint16(row[xi+4])<<8 | uint16(row[xi+5]))
rgba[3] += float32(uint16(row[xi+6])<<8 | uint16(row[xi+7]))
sum++
}
}
xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8
value := floatToUint16(rgba[0] / sum)
out.Pix[xo+0] = uint8(value >> 8)
out.Pix[xo+1] = uint8(value)
value = floatToUint16(rgba[1] / sum)
out.Pix[xo+2] = uint8(value >> 8)
out.Pix[xo+3] = uint8(value)
value = floatToUint16(rgba[2] / sum)
out.Pix[xo+4] = uint8(value >> 8)
out.Pix[xo+5] = uint8(value)
value = floatToUint16(rgba[3] / sum)
out.Pix[xo+6] = uint8(value >> 8)
out.Pix[xo+7] = uint8(value)
}
}
}
func nearestGray(in *image.Gray, out *image.Gray, scale float64, coeffs []bool, offset []int, filterLength int) {
oldBounds := in.Bounds()
newBounds := out.Bounds()
minX := oldBounds.Min.X
maxX := (oldBounds.Max.X - oldBounds.Min.X - 1)
for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:]
for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
var gray float32
var sum float32
start := offset[y]
ci := (y - newBounds.Min.Y) * filterLength
for i := 0; i < filterLength; i++ {
if coeffs[ci+i] {
xi := start + i
switch {
case uint(xi) < uint(oldBounds.Max.X):
break
case xi >= oldBounds.Max.X:
xi = maxX
default:
xi = minX
}
gray += float32(row[xi])
sum++
}
}
offset := (y-newBounds.Min.Y)*out.Stride + (x - newBounds.Min.X)
out.Pix[offset] = floatToUint8(gray / sum)
}
}
}
func nearestGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []bool, offset []int, filterLength int) {
oldBounds := in.Bounds()
newBounds := out.Bounds()
minX := oldBounds.Min.X * 2
maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 2
for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:]
for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
var gray float32
var sum float32
start := offset[y]
ci := (y - newBounds.Min.Y) * filterLength
for i := 0; i < filterLength; i++ {
if coeffs[ci+i] {
xi := start + i
switch {
case uint(xi) < uint(oldBounds.Max.X):
xi *= 2
case xi >= oldBounds.Max.X:
xi = maxX
default:
xi = minX
}
gray += float32(uint16(row[xi+0])<<8 | uint16(row[xi+1]))
sum++
}
}
offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*2
value := floatToUint16(gray / sum)
out.Pix[offset+0] = uint8(value >> 8)
out.Pix[offset+1] = uint8(value)
}
}
}

41
nearest_test.go Normal file
View File

@ -0,0 +1,41 @@
package resize
import "testing"
func Test_FloatToUint8(t *testing.T) {
var testData = []struct {
in float32
expected uint8
}{
{0, 0},
{255, 255},
{128, 128},
{1, 1},
{256, 255},
}
for _, test := range testData {
actual := floatToUint8(test.in)
if actual != test.expected {
t.Fail()
}
}
}
func Test_FloatToUint16(t *testing.T) {
var testData = []struct {
in float32
expected uint16
}{
{0, 0},
{65535, 65535},
{128, 128},
{1, 1},
{65536, 65535},
}
for _, test := range testData {
actual := floatToUint16(test.in)
if actual != test.expected {
t.Fail()
}
}
}

249
resize.go
View File

@ -33,36 +33,41 @@ import (
// An InterpolationFunction provides the parameters that describe an // An InterpolationFunction provides the parameters that describe an
// interpolation kernel. It returns the number of samples to take // interpolation kernel. It returns the number of samples to take
// and the kernel function to use for sampling. // and the kernel function to use for sampling.
type InterpolationFunction func() (int, func(float64) float64) type InterpolationFunction int
// InterpolationFunction constants
const (
// Nearest-neighbor interpolation // Nearest-neighbor interpolation
func NearestNeighbor() (int, func(float64) float64) { NearestNeighbor InterpolationFunction = iota
// Bilinear interpolation
Bilinear
// Bicubic interpolation (with cubic hermite spline)
Bicubic
// Mitchell-Netravali interpolation
MitchellNetravali
// Lanczos interpolation (a=2)
Lanczos2
// Lanczos interpolation (a=3)
Lanczos3
)
// kernal, returns an InterpolationFunctions taps and kernel.
func (i InterpolationFunction) kernel() (int, func(float64) float64) {
switch i {
case Bilinear:
return 2, linear
case Bicubic:
return 4, cubic
case MitchellNetravali:
return 4, mitchellnetravali
case Lanczos2:
return 4, lanczos2
case Lanczos3:
return 6, lanczos3
default:
// Default to NearestNeighbor.
return 2, nearest return 2, nearest
} }
// Bilinear interpolation
func Bilinear() (int, func(float64) float64) {
return 2, linear
}
// Bicubic interpolation (with cubic hermite spline)
func Bicubic() (int, func(float64) float64) {
return 4, cubic
}
// Mitchell-Netravali interpolation
func MitchellNetravali() (int, func(float64) float64) {
return 4, mitchellnetravali
}
// Lanczos interpolation (a=2)
func Lanczos2() (int, func(float64) float64) {
return 4, lanczos2
}
// Lanczos interpolation (a=3)
func Lanczos3() (int, func(float64) float64) {
return 6, lanczos3
} }
// values <1 will sharpen the image // values <1 will sharpen the image
@ -81,8 +86,11 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
if height == 0 { if height == 0 {
height = uint(0.7 + float64(img.Bounds().Dy())/scaleY) height = uint(0.7 + float64(img.Bounds().Dy())/scaleY)
} }
if interp == NearestNeighbor {
return resizeNearest(width, height, scaleX, scaleY, img, interp)
}
taps, kernel := interp() taps, kernel := interp.kernel()
cpus := runtime.NumCPU() cpus := runtime.NumCPU()
wg := sync.WaitGroup{} wg := sync.WaitGroup{}
@ -269,6 +277,193 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
} }
} }
func resizeNearest(width, height uint, scaleX, scaleY float64, img image.Image, interp InterpolationFunction) image.Image {
taps, _ := interp.kernel()
cpus := runtime.NumCPU()
wg := sync.WaitGroup{}
switch input := img.(type) {
case *image.RGBA:
// 8-bit precision
temp := image.NewRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width)))
result := image.NewRGBA(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.RGBA)
go func() {
defer wg.Done()
nearestRGBA(input, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.RGBA)
go func() {
defer wg.Done()
nearestRGBA(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
return result
case *image.YCbCr:
// 8-bit precision
// accessing the YCbCr arrays in a tight loop is slow.
// converting the image before filtering will improve performance.
inputAsRGBA := convertYCbCrToRGBA(input)
temp := image.NewRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width)))
result := image.NewRGBA(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.RGBA)
go func() {
defer wg.Done()
nearestRGBA(inputAsRGBA, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.RGBA)
go func() {
defer wg.Done()
nearestRGBA(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
return result
case *image.RGBA64:
// 16-bit precision
temp := image.NewRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width)))
result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.RGBA64)
go func() {
defer wg.Done()
nearestRGBA64(input, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.RGBA64)
go func() {
defer wg.Done()
nearestGeneric(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
return result
case *image.Gray:
// 8-bit precision
temp := image.NewGray(image.Rect(0, 0, input.Bounds().Dy(), int(width)))
result := image.NewGray(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.Gray)
go func() {
defer wg.Done()
nearestGray(input, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.Gray)
go func() {
defer wg.Done()
nearestGray(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
return result
case *image.Gray16:
// 16-bit precision
temp := image.NewGray16(image.Rect(0, 0, input.Bounds().Dy(), int(width)))
result := image.NewGray16(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.Gray16)
go func() {
defer wg.Done()
nearestGray16(input, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.Gray16)
go func() {
defer wg.Done()
nearestGray16(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
return result
default:
// 16-bit precision
temp := image.NewRGBA64(image.Rect(0, 0, img.Bounds().Dy(), int(width)))
result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), img.Bounds().Min.X, taps, blur, scaleX)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.RGBA64)
go func() {
defer wg.Done()
nearestGeneric(img, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.RGBA64)
go func() {
defer wg.Done()
nearestRGBA64(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
return result
}
}
// Calculates scaling factors using old and new image dimensions. // Calculates scaling factors using old and new image dimensions.
func calcFactors(width, height uint, oldWidth, oldHeight float64) (scaleX, scaleY float64) { func calcFactors(width, height uint, oldWidth, oldHeight float64) (scaleX, scaleY float64) {
if width == 0 { if width == 0 {