From 494d8de4e58cee6b5b9072a4a1c97e8dc9863a16 Mon Sep 17 00:00:00 2001 From: jst Date: Mon, 18 Nov 2013 19:54:31 +0100 Subject: [PATCH] Speedup calculation by exploiting the separability of the resizing filter. Should be ~5x faster! More optimization will follow. before: > go test -bench . PASS Benchmark_BigResizeLanczos3-4 1 2438137093 ns/op Benchmark_BigResizeLanczos3Lut-4 1 1157612362 ns/op Benchmark_Reduction-4 2 743950618 ns/op after: > go test -bench . PASS Benchmark_BigResizeLanczos3-4 5 403685685 ns/op Benchmark_BigResizeLanczos3Lut-4 10 225539497 ns/op Benchmark_Reduction-4 10 207004759 ns/op --- filters.go | 56 ++++++++++++++++++++++-------------------------- resize.go | 63 +++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 75 insertions(+), 44 deletions(-) diff --git a/filters.go b/filters.go index b3853c7..e383479 100644 --- a/filters.go +++ b/filters.go @@ -42,13 +42,13 @@ type filterModel struct { // instead of blurring an image before downscaling to avoid aliasing, // the filter is scaled by a factor which leads to a similar effect - factor [2]float32 + factor float32 // for optimized access to image points converter - // temporaries used by Interpolate - tempRow, tempCol []colorArray + // temporary used by Interpolate + tempRow []colorArray } func (f *filterModel) convolution1d(x float32, p []colorArray, factor float32) colorArray { @@ -72,20 +72,15 @@ func (f *filterModel) convolution1d(x float32, p []colorArray, factor float32) c return c } -func (f *filterModel) Interpolate(x, y float32) color.RGBA64 { - xf, yf := int(x)-len(f.tempRow)/2+1, int(y)-len(f.tempCol)/2+1 - x -= float32(xf) - y -= float32(yf) +func (f *filterModel) Interpolate(u float32, y int) color.RGBA64 { + uf := int(u) - len(f.tempRow)/2 + 1 + u -= float32(uf) - for i := range f.tempCol { - for j := range f.tempRow { - f.tempRow[j] = f.at(xf+j, yf+i) - } - - f.tempCol[i] = f.convolution1d(x, f.tempRow, f.factor[0]) + for i := range f.tempRow { + f.tempRow[i] = f.at(uf+i, y) } - c := f.convolution1d(y, f.tempCol, f.factor[1]) + c := f.convolution1d(u, f.tempRow, f.factor) return color.RGBA64{ clampToUint16(c[0]), clampToUint16(c[1]), @@ -96,46 +91,45 @@ func (f *filterModel) Interpolate(x, y float32) color.RGBA64 { // createFilter tries to find an optimized converter for the given input image // and initializes all filterModel members to their defaults -func createFilter(img image.Image, factor [2]float32, size int, kernel func(float32) float32) (f Filter) { - sizeX := size * (int(math.Ceil(float64(factor[0])))) - sizeY := size * (int(math.Ceil(float64(factor[1])))) +func createFilter(img image.Image, factor float32, size int, kernel func(float32) float32) (f Filter) { + sizeX := size * (int(math.Ceil(float64(factor)))) switch img.(type) { default: f = &filterModel{ kernel, factor, &genericConverter{img}, - make([]colorArray, sizeX), make([]colorArray, sizeY), + make([]colorArray, sizeX), } case *image.RGBA: f = &filterModel{ kernel, factor, &rgbaConverter{img.(*image.RGBA)}, - make([]colorArray, sizeX), make([]colorArray, sizeY), + make([]colorArray, sizeX), } case *image.RGBA64: f = &filterModel{ kernel, factor, &rgba64Converter{img.(*image.RGBA64)}, - make([]colorArray, sizeX), make([]colorArray, sizeY), + make([]colorArray, sizeX), } case *image.Gray: f = &filterModel{ kernel, factor, &grayConverter{img.(*image.Gray)}, - make([]colorArray, sizeX), make([]colorArray, sizeY), + make([]colorArray, sizeX), } case *image.Gray16: f = &filterModel{ kernel, factor, &gray16Converter{img.(*image.Gray16)}, - make([]colorArray, sizeX), make([]colorArray, sizeY), + make([]colorArray, sizeX), } case *image.YCbCr: f = &filterModel{ kernel, factor, &ycbcrConverter{img.(*image.YCbCr)}, - make([]colorArray, sizeX), make([]colorArray, sizeY), + make([]colorArray, sizeX), } } @@ -172,7 +166,7 @@ func tableKernel(kernel func(float32) float32, tableSize int, } // Nearest-neighbor interpolation -func NearestNeighbor(img image.Image, factor [2]float32) Filter { +func NearestNeighbor(img image.Image, factor float32) Filter { return createFilter(img, factor, 2, func(x float32) (y float32) { if x >= -0.5 && x < 0.5 { y = 1 @@ -185,7 +179,7 @@ func NearestNeighbor(img image.Image, factor [2]float32) Filter { } // Bilinear interpolation -func Bilinear(img image.Image, factor [2]float32) Filter { +func Bilinear(img image.Image, factor float32) Filter { return createFilter(img, factor, 2, func(x float32) (y float32) { absX := float32(math.Abs(float64(x))) if absX <= 1 { @@ -199,12 +193,12 @@ func Bilinear(img image.Image, factor [2]float32) Filter { } // Bicubic interpolation (with cubic hermite spline) -func Bicubic(img image.Image, factor [2]float32) Filter { +func Bicubic(img image.Image, factor float32) Filter { return createFilter(img, factor, 4, splineKernel(0, 0.5)) } // Mitchell-Netravali interpolation -func MitchellNetravali(img image.Image, factor [2]float32) Filter { +func MitchellNetravali(img image.Image, factor float32) Filter { return createFilter(img, factor, 4, splineKernel(1.0/3.0, 1.0/3.0)) } @@ -245,25 +239,25 @@ func lanczosKernel(a uint) func(float32) float32 { const lanczosTableSize = 300 // Lanczos interpolation (a=2) -func Lanczos2(img image.Image, factor [2]float32) Filter { +func Lanczos2(img image.Image, factor float32) Filter { return createFilter(img, factor, 4, lanczosKernel(2)) } // Lanczos interpolation (a=2) using a look-up table // to speed up computation -func Lanczos2Lut(img image.Image, factor [2]float32) Filter { +func Lanczos2Lut(img image.Image, factor float32) Filter { return createFilter(img, factor, 4, tableKernel(lanczosKernel(2), lanczosTableSize, 2.0)) } // Lanczos interpolation (a=3) -func Lanczos3(img image.Image, factor [2]float32) Filter { +func Lanczos3(img image.Image, factor float32) Filter { return createFilter(img, factor, 6, lanczosKernel(3)) } // Lanczos interpolation (a=3) using a look-up table // to speed up computation -func Lanczos3Lut(img image.Image, factor [2]float32) Filter { +func Lanczos3Lut(img image.Image, factor float32) Filter { return createFilter(img, factor, 6, tableKernel(lanczosKernel(3), lanczosTableSize, 3.0)) } diff --git a/resize.go b/resize.go index dbd8d68..ed5bb01 100644 --- a/resize.go +++ b/resize.go @@ -42,14 +42,14 @@ func (t *Trans2) Eval(x, y float32) (u, v float32) { // Filter can interpolate at points (x,y) type Filter interface { - Interpolate(x, y float32) color.RGBA64 + Interpolate(u float32, y int) color.RGBA64 } // InterpolationFunction return a Filter implementation // that operates on an image. Two factors // allow to scale the filter kernels in x- and y-direction // to prevent moire patterns. -type InterpolationFunction func(image.Image, [2]float32) Filter +type InterpolationFunction func(image.Image, float32) Filter // Resize an image to new width and height using the interpolation function interp. // A new image with the given dimensions will be returned. @@ -64,24 +64,26 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i scaleX, scaleY := calcFactors(width, height, oldWidth, oldHeight) t := Trans2{scaleX, 0, float32(oldBounds.Min.X), 0, scaleY, float32(oldBounds.Min.Y)} - resizedImg := image.NewRGBA64(image.Rect(0, 0, int(0.7+oldWidth/scaleX), int(0.7+oldHeight/scaleY))) + //resizedImg := image.NewRGBA64(image.Rect(0, 0, int(0.7+oldWidth/scaleX), int(0.7+oldHeight/scaleY))) + resizedImg := image.NewRGBA64(image.Rect(0, 0, oldBounds.Dy(), int(0.7+oldWidth/scaleX))) b := resizedImg.Bounds() - adjustX := 0.5 * ((oldWidth-1.0)/scaleX - float32(b.Dx()-1)) - adjustY := 0.5 * ((oldHeight-1.0)/scaleY - float32(b.Dy()-1)) + adjustX := 0.5 * ((oldWidth-1.0)/scaleX - float32(b.Dy()-1)) + //adjustY := 0.5 * ((oldHeight-1.0)/scaleY - float32(b.Dy()-1)) n := numJobs(b.Dy()) c := make(chan int, n) for i := 0; i < n; i++ { go func(b image.Rectangle, c chan int) { - filter := interp(img, [2]float32{clampFactor(scaleX), clampFactor(scaleY)}) - var u, v float32 + filter := interp(img, float32(clampFactor(scaleX))) + var u float32 var color color.RGBA64 - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - u, v = t.Eval(float32(x)+adjustX, float32(y)+adjustY) - color = filter.Interpolate(u, v) + for y := b.Min.X; y < b.Max.X; y++ { + for x := b.Min.Y; x < b.Max.Y; x++ { + u = t[0]*(float32(x)+adjustX) + t[2] - i := resizedImg.PixOffset(x, y) + color = filter.Interpolate(u, y) + + i := resizedImg.PixOffset(y, x) resizedImg.Pix[i+0] = uint8(color.R >> 8) resizedImg.Pix[i+1] = uint8(color.R) resizedImg.Pix[i+2] = uint8(color.G >> 8) @@ -100,7 +102,42 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i <-c } - return resizedImg + resultImg := image.NewRGBA64(image.Rect(0, 0, int(0.7+oldWidth/scaleX), int(0.7+oldHeight/scaleY))) + b = resultImg.Bounds() + adjustX = 0.5 * ((oldWidth-1.0)/scaleX - float32(b.Dx()-1)) + + for i := 0; i < n; i++ { + go func(b image.Rectangle, c chan int) { + filter := interp(resizedImg, float32(clampFactor(scaleY))) + var u float32 + var color color.RGBA64 + for y := b.Min.X; y < b.Max.X; y++ { + for x := b.Min.Y; x < b.Max.Y; x++ { + u = t[4]*(float32(x)+adjustX) + t[5] + + color = filter.Interpolate(u, y) + + i := resultImg.PixOffset(y, x) + resultImg.Pix[i+0] = uint8(color.R >> 8) + resultImg.Pix[i+1] = uint8(color.R) + resultImg.Pix[i+2] = uint8(color.G >> 8) + resultImg.Pix[i+3] = uint8(color.G) + resultImg.Pix[i+4] = uint8(color.B >> 8) + resultImg.Pix[i+5] = uint8(color.B) + resultImg.Pix[i+6] = uint8(color.A >> 8) + resultImg.Pix[i+7] = uint8(color.A) + } + } + c <- 1 + }(image.Rect(b.Min.X, b.Min.Y+i*(b.Dy())/n, b.Max.X, b.Min.Y+(i+1)*(b.Dy())/n), c) + + } + + for i := 0; i < n; i++ { + <-c + } + + return resultImg } // Calculate scaling factors using old and new image dimensions.