From 427b8d133e710b7781794fbf781ade2c1a71dd2e Mon Sep 17 00:00:00 2001 From: Charlie Vieth Date: Tue, 29 Jul 2014 18:32:58 -0400 Subject: [PATCH] Optimized Nearest-Neighbor function - 2x faster --- filters.go | 29 +++++- nearest.go | 228 +++++++++++++++++++++++++++++++++++++++++++ nearest_test.go | 41 ++++++++ resize.go | 253 ++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 520 insertions(+), 31 deletions(-) create mode 100644 nearest.go create mode 100644 nearest_test.go diff --git a/filters.go b/filters.go index a5f6e79..0cc738c 100644 --- a/filters.go +++ b/filters.go @@ -89,8 +89,9 @@ func createWeights8(dy, minx, filterLength int, blur, scale float64, kernel func for y := 0; y < dy; y++ { interpX := scale*(float64(y)+0.5) + float64(minx) start[y] = int(interpX) - filterLength/2 + 1 + interpX -= float64(start[y]) for i := 0; i < filterLength; i++ { - in := (interpX - float64(start[y]) - float64(i)) * filterFactor + in := (interpX - float64(i)) * filterFactor coeffs[y*filterLength+i] = int16(kernel(in) * 256) } } @@ -108,11 +109,35 @@ func createWeights16(dy, minx, filterLength int, blur, scale float64, kernel fun for y := 0; y < dy; y++ { interpX := scale*(float64(y)+0.5) + float64(minx) start[y] = int(interpX) - filterLength/2 + 1 + interpX -= float64(start[y]) for i := 0; i < filterLength; i++ { - in := (interpX - float64(start[y]) - float64(i)) * filterFactor + in := (interpX - float64(i)) * filterFactor coeffs[y*filterLength+i] = int32(kernel(in) * 65536) } } return coeffs, start, filterLength } + +func createWeightsNearest(dy, minx, filterLength int, blur, scale float64) ([]bool, []int, int) { + filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1)) + filterFactor := math.Min(1./(blur*scale), 1) + + coeffs := make([]bool, dy*filterLength) + start := make([]int, dy) + for y := 0; y < dy; y++ { + interpX := scale*(float64(y)+0.5) + float64(minx) + start[y] = int(interpX) - filterLength/2 + 1 + interpX -= float64(start[y]) + for i := 0; i < filterLength; i++ { + in := (interpX - float64(i)) * filterFactor + if in >= -0.5 && in < 0.5 { + coeffs[y*filterLength+i] = true + } else { + coeffs[y*filterLength+i] = false + } + } + } + + return coeffs, start, filterLength +} diff --git a/nearest.go b/nearest.go new file mode 100644 index 0000000..5708fd0 --- /dev/null +++ b/nearest.go @@ -0,0 +1,228 @@ +package resize + +import "image" + +func floatToUint8(x float32) uint8 { + // Nearest-neighbor values are always + // positive no need to check lower-bound. + if x > 0xfe { + return 0xff + } + return uint8(x) +} + +func floatToUint16(x float32) uint16 { + if x > 0xfffe { + return 0xffff + } + return uint16(x) +} + +func nearestGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var rgba [4]float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + break + case xi >= oldBounds.Max.X: + xi = oldBounds.Min.X + default: + xi = oldBounds.Max.X - 1 + } + r, g, b, a := in.At(xi, x).RGBA() + rgba[0] += float32(r) + rgba[1] += float32(g) + rgba[2] += float32(b) + rgba[3] += float32(a) + sum++ + } + } + + offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 + value := floatToUint16(rgba[0] / sum) + out.Pix[offset+0] = uint8(value >> 8) + out.Pix[offset+1] = uint8(value) + value = floatToUint16(rgba[1] / sum) + out.Pix[offset+2] = uint8(value >> 8) + out.Pix[offset+3] = uint8(value) + value = floatToUint16(rgba[2] / sum) + out.Pix[offset+4] = uint8(value >> 8) + out.Pix[offset+5] = uint8(value) + value = floatToUint16(rgba[3] / sum) + out.Pix[offset+6] = uint8(value >> 8) + out.Pix[offset+7] = uint8(value) + } + } +} + +func nearestRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + minX := oldBounds.Min.X * 4 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 4 + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var rgba [4]float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 4 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + rgba[0] += float32(row[xi+0]) + rgba[1] += float32(row[xi+1]) + rgba[2] += float32(row[xi+2]) + rgba[3] += float32(row[xi+3]) + sum++ + } + } + + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4 + out.Pix[xo+0] = floatToUint8(rgba[0] / sum) + out.Pix[xo+1] = floatToUint8(rgba[1] / sum) + out.Pix[xo+2] = floatToUint8(rgba[2] / sum) + out.Pix[xo+3] = floatToUint8(rgba[3] / sum) + } + } +} + +func nearestRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + minX := oldBounds.Min.X * 8 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 8 + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var rgba [4]float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 8 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + rgba[0] += float32(uint16(row[xi+0])<<8 | uint16(row[xi+1])) + rgba[1] += float32(uint16(row[xi+2])<<8 | uint16(row[xi+3])) + rgba[2] += float32(uint16(row[xi+4])<<8 | uint16(row[xi+5])) + rgba[3] += float32(uint16(row[xi+6])<<8 | uint16(row[xi+7])) + sum++ + } + } + + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 + value := floatToUint16(rgba[0] / sum) + out.Pix[xo+0] = uint8(value >> 8) + out.Pix[xo+1] = uint8(value) + value = floatToUint16(rgba[1] / sum) + out.Pix[xo+2] = uint8(value >> 8) + out.Pix[xo+3] = uint8(value) + value = floatToUint16(rgba[2] / sum) + out.Pix[xo+4] = uint8(value >> 8) + out.Pix[xo+5] = uint8(value) + value = floatToUint16(rgba[3] / sum) + out.Pix[xo+6] = uint8(value >> 8) + out.Pix[xo+7] = uint8(value) + } + } +} + +func nearestGray(in *image.Gray, out *image.Gray, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + minX := oldBounds.Min.X + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var gray float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + break + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + gray += float32(row[xi]) + sum++ + } + } + + offset := (y-newBounds.Min.Y)*out.Stride + (x - newBounds.Min.X) + out.Pix[offset] = floatToUint8(gray / sum) + } + } +} + +func nearestGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + minX := oldBounds.Min.X * 2 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 2 + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var gray float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 2 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + gray += float32(uint16(row[xi+0])<<8 | uint16(row[xi+1])) + sum++ + } + } + + offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*2 + value := floatToUint16(gray / sum) + out.Pix[offset+0] = uint8(value >> 8) + out.Pix[offset+1] = uint8(value) + } + } +} diff --git a/nearest_test.go b/nearest_test.go new file mode 100644 index 0000000..08adbda --- /dev/null +++ b/nearest_test.go @@ -0,0 +1,41 @@ +package resize + +import "testing" + +func Test_FloatToUint8(t *testing.T) { + var testData = []struct { + in float32 + expected uint8 + }{ + {0, 0}, + {255, 255}, + {128, 128}, + {1, 1}, + {256, 255}, + } + for _, test := range testData { + actual := floatToUint8(test.in) + if actual != test.expected { + t.Fail() + } + } +} + +func Test_FloatToUint16(t *testing.T) { + var testData = []struct { + in float32 + expected uint16 + }{ + {0, 0}, + {65535, 65535}, + {128, 128}, + {1, 1}, + {65536, 65535}, + } + for _, test := range testData { + actual := floatToUint16(test.in) + if actual != test.expected { + t.Fail() + } + } +} diff --git a/resize.go b/resize.go index b9eb775..fad39b4 100644 --- a/resize.go +++ b/resize.go @@ -33,36 +33,41 @@ import ( // An InterpolationFunction provides the parameters that describe an // interpolation kernel. It returns the number of samples to take // and the kernel function to use for sampling. -type InterpolationFunction func() (int, func(float64) float64) +type InterpolationFunction int -// Nearest-neighbor interpolation -func NearestNeighbor() (int, func(float64) float64) { - return 2, nearest -} +// InterpolationFunction constants +const ( + // Nearest-neighbor interpolation + NearestNeighbor InterpolationFunction = iota + // Bilinear interpolation + Bilinear + // Bicubic interpolation (with cubic hermite spline) + Bicubic + // Mitchell-Netravali interpolation + MitchellNetravali + // Lanczos interpolation (a=2) + Lanczos2 + // Lanczos interpolation (a=3) + Lanczos3 +) -// Bilinear interpolation -func Bilinear() (int, func(float64) float64) { - return 2, linear -} - -// Bicubic interpolation (with cubic hermite spline) -func Bicubic() (int, func(float64) float64) { - return 4, cubic -} - -// Mitchell-Netravali interpolation -func MitchellNetravali() (int, func(float64) float64) { - return 4, mitchellnetravali -} - -// Lanczos interpolation (a=2) -func Lanczos2() (int, func(float64) float64) { - return 4, lanczos2 -} - -// Lanczos interpolation (a=3) -func Lanczos3() (int, func(float64) float64) { - return 6, lanczos3 +// kernal, returns an InterpolationFunctions taps and kernel. +func (i InterpolationFunction) kernel() (int, func(float64) float64) { + switch i { + case Bilinear: + return 2, linear + case Bicubic: + return 4, cubic + case MitchellNetravali: + return 4, mitchellnetravali + case Lanczos2: + return 4, lanczos2 + case Lanczos3: + return 6, lanczos3 + default: + // Default to NearestNeighbor. + return 2, nearest + } } // values <1 will sharpen the image @@ -81,8 +86,11 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i if height == 0 { height = uint(0.7 + float64(img.Bounds().Dy())/scaleY) } + if interp == NearestNeighbor { + return resizeNearest(width, height, scaleX, scaleY, img, interp) + } - taps, kernel := interp() + taps, kernel := interp.kernel() cpus := runtime.NumCPU() wg := sync.WaitGroup{} @@ -269,6 +277,193 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i } } +func resizeNearest(width, height uint, scaleX, scaleY float64, img image.Image, interp InterpolationFunction) image.Image { + taps, _ := interp.kernel() + cpus := runtime.NumCPU() + wg := sync.WaitGroup{} + + switch input := img.(type) { + case *image.RGBA: + // 8-bit precision + temp := image.NewRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.RGBA) + go func() { + defer wg.Done() + nearestRGBA(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.RGBA) + go func() { + defer wg.Done() + nearestRGBA(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + case *image.YCbCr: + // 8-bit precision + // accessing the YCbCr arrays in a tight loop is slow. + // converting the image before filtering will improve performance. + inputAsRGBA := convertYCbCrToRGBA(input) + temp := image.NewRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.RGBA) + go func() { + defer wg.Done() + nearestRGBA(inputAsRGBA, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.RGBA) + go func() { + defer wg.Done() + nearestRGBA(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + case *image.RGBA64: + // 16-bit precision + temp := image.NewRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.RGBA64) + go func() { + defer wg.Done() + nearestRGBA64(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.RGBA64) + go func() { + defer wg.Done() + nearestGeneric(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + case *image.Gray: + // 8-bit precision + temp := image.NewGray(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewGray(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.Gray) + go func() { + defer wg.Done() + nearestGray(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.Gray) + go func() { + defer wg.Done() + nearestGray(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + case *image.Gray16: + // 16-bit precision + temp := image.NewGray16(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewGray16(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.Gray16) + go func() { + defer wg.Done() + nearestGray16(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.Gray16) + go func() { + defer wg.Done() + nearestGray16(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + default: + // 16-bit precision + temp := image.NewRGBA64(image.Rect(0, 0, img.Bounds().Dy(), int(width))) + result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), img.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.RGBA64) + go func() { + defer wg.Done() + nearestGeneric(img, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.RGBA64) + go func() { + defer wg.Done() + nearestRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + } + +} + // Calculates scaling factors using old and new image dimensions. func calcFactors(width, height uint, oldWidth, oldHeight float64) (scaleX, scaleY float64) { if width == 0 {