From 80b3fc2b3f2d53ca32f7520fff4263dc14f55882 Mon Sep 17 00:00:00 2001 From: Charlie Vieth Date: Tue, 29 Jul 2014 16:53:35 -0400 Subject: [PATCH] Pre-calculate start offset, optimize bound check Increased performance by 30% for RGBA and 45% for Gray images, minor performance increase for 16-bit images. The start offset calculated by createWeights are stored in a slice and passed to the resize functions to prevent duplication of effort. --- converter.go | 205 ++++++++++++++++++++++++++++----------------------- filters.go | 18 +++-- resize.go | 48 ++++++------ 3 files changed, 146 insertions(+), 125 deletions(-) diff --git a/converter.go b/converter.go index 8c6ac84..f023d9c 100644 --- a/converter.go +++ b/converter.go @@ -43,32 +43,35 @@ func clampUint16(in int64) uint16 { return uint16(in) } -func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []int32, filterLength int) { +func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) { oldBounds := in.Bounds() newBounds := out.Bounds() for x := newBounds.Min.X; x < newBounds.Max.X; x++ { for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X) - start := int(interpX) - filterLength/2 + 1 - var rgba [4]int64 var sum int64 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength for i := 0; i < filterLength; i++ { - xx := start + i - if xx < oldBounds.Min.X { - xx = oldBounds.Min.X - } else if xx >= oldBounds.Max.X { - xx = oldBounds.Max.X - 1 + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + break + case xi >= oldBounds.Max.X: + xi = oldBounds.Min.X + default: + xi = oldBounds.Max.X - 1 + } + r, g, b, a := in.At(xi, x).RGBA() + rgba[0] += int64(coeff) * int64(r) + rgba[1] += int64(coeff) * int64(g) + rgba[2] += int64(coeff) * int64(b) + rgba[3] += int64(coeff) * int64(a) + sum += int64(coeff) } - - coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i] - r, g, b, a := in.At(xx, x).RGBA() - rgba[0] += int64(coeff) * int64(r) - rgba[1] += int64(coeff) * int64(g) - rgba[2] += int64(coeff) * int64(b) - rgba[3] += int64(coeff) * int64(a) - sum += int64(coeff) } offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 @@ -88,114 +91,126 @@ func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []in } } -func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16, filterLength int) { +func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16, offset []int, filterLength int) { oldBounds := in.Bounds() newBounds := out.Bounds() + minX := oldBounds.Min.X * 4 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 4 for x := newBounds.Min.X; x < newBounds.Max.X; x++ { row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X) - start := int(interpX) - filterLength/2 + 1 - var rgba [4]int32 var sum int32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength for i := 0; i < filterLength; i++ { - xx := start + i - if xx < oldBounds.Min.X { - xx = oldBounds.Min.X - } else if xx >= oldBounds.Max.X { - xx = oldBounds.Max.X - 1 + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 4 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + rgba[0] += int32(coeff) * int32(row[xi+0]) + rgba[1] += int32(coeff) * int32(row[xi+1]) + rgba[2] += int32(coeff) * int32(row[xi+2]) + rgba[3] += int32(coeff) * int32(row[xi+3]) + sum += int32(coeff) } - - coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i] - offset := (xx - oldBounds.Min.X) * 4 - rgba[0] += int32(coeff) * int32(row[offset+0]) - rgba[1] += int32(coeff) * int32(row[offset+1]) - rgba[2] += int32(coeff) * int32(row[offset+2]) - rgba[3] += int32(coeff) * int32(row[offset+3]) - sum += int32(coeff) } - offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4 - out.Pix[offset+0] = clampUint8(rgba[0] / sum) - out.Pix[offset+1] = clampUint8(rgba[1] / sum) - out.Pix[offset+2] = clampUint8(rgba[2] / sum) - out.Pix[offset+3] = clampUint8(rgba[3] / sum) + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4 + out.Pix[xo+0] = clampUint8(rgba[0] / sum) + out.Pix[xo+1] = clampUint8(rgba[1] / sum) + out.Pix[xo+2] = clampUint8(rgba[2] / sum) + out.Pix[xo+3] = clampUint8(rgba[3] / sum) } } } -func resizeRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []int32, filterLength int) { +func resizeRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) { oldBounds := in.Bounds() newBounds := out.Bounds() + minX := oldBounds.Min.X * 8 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 8 for x := newBounds.Min.X; x < newBounds.Max.X; x++ { row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X) - start := int(interpX) - filterLength/2 + 1 - var rgba [4]int64 var sum int64 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength for i := 0; i < filterLength; i++ { - xx := start + i - if xx < oldBounds.Min.X { - xx = oldBounds.Min.X - } else if xx >= oldBounds.Max.X { - xx = oldBounds.Max.X - 1 + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 8 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + rgba[0] += int64(coeff) * int64(uint16(row[xi+0])<<8|uint16(row[xi+1])) + rgba[1] += int64(coeff) * int64(uint16(row[xi+2])<<8|uint16(row[xi+3])) + rgba[2] += int64(coeff) * int64(uint16(row[xi+4])<<8|uint16(row[xi+5])) + rgba[3] += int64(coeff) * int64(uint16(row[xi+6])<<8|uint16(row[xi+7])) + sum += int64(coeff) } - - coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i] - offset := (xx - oldBounds.Min.X) * 8 - rgba[0] += int64(coeff) * int64(uint16(row[offset+0])<<8|uint16(row[offset+1])) - rgba[1] += int64(coeff) * int64(uint16(row[offset+2])<<8|uint16(row[offset+3])) - rgba[2] += int64(coeff) * int64(uint16(row[offset+4])<<8|uint16(row[offset+5])) - rgba[3] += int64(coeff) * int64(uint16(row[offset+6])<<8|uint16(row[offset+7])) - sum += int64(coeff) } - offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 value := clampUint16(rgba[0] / sum) - out.Pix[offset+0] = uint8(value >> 8) - out.Pix[offset+1] = uint8(value) + out.Pix[xo+0] = uint8(value >> 8) + out.Pix[xo+1] = uint8(value) value = clampUint16(rgba[1] / sum) - out.Pix[offset+2] = uint8(value >> 8) - out.Pix[offset+3] = uint8(value) + out.Pix[xo+2] = uint8(value >> 8) + out.Pix[xo+3] = uint8(value) value = clampUint16(rgba[2] / sum) - out.Pix[offset+4] = uint8(value >> 8) - out.Pix[offset+5] = uint8(value) + out.Pix[xo+4] = uint8(value >> 8) + out.Pix[xo+5] = uint8(value) value = clampUint16(rgba[3] / sum) - out.Pix[offset+6] = uint8(value >> 8) - out.Pix[offset+7] = uint8(value) + out.Pix[xo+6] = uint8(value >> 8) + out.Pix[xo+7] = uint8(value) } } } -func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16, filterLength int) { +func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16, offset []int, filterLength int) { oldBounds := in.Bounds() newBounds := out.Bounds() + minX := oldBounds.Min.X + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) for x := newBounds.Min.X; x < newBounds.Max.X; x++ { row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X) - start := int(interpX) - filterLength/2 + 1 - var gray int32 var sum int32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength for i := 0; i < filterLength; i++ { - xx := start + i - if xx < oldBounds.Min.X { - xx = oldBounds.Min.X - } else if xx >= oldBounds.Max.X { - xx = oldBounds.Max.X - 1 + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + break + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + gray += int32(coeff) * int32(row[xi]) + sum += int32(coeff) } - - coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i] - offset := (xx - oldBounds.Min.X) - gray += int32(coeff) * int32(row[offset]) - sum += int32(coeff) } offset := (y-newBounds.Min.Y)*out.Stride + (x - newBounds.Min.X) @@ -204,30 +219,34 @@ func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16, } } -func resizeGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []int32, filterLength int) { +func resizeGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []int32, offset []int, filterLength int) { oldBounds := in.Bounds() newBounds := out.Bounds() + minX := oldBounds.Min.X * 2 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 2 for x := newBounds.Min.X; x < newBounds.Max.X; x++ { row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X) - start := int(interpX) - filterLength/2 + 1 - var gray int64 var sum int64 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength for i := 0; i < filterLength; i++ { - xx := start + i - if xx < oldBounds.Min.X { - xx = oldBounds.Min.X - } else if xx >= oldBounds.Max.X { - xx = oldBounds.Max.X - 1 + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 2 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + gray += int64(coeff) * int64(uint16(row[xi+0])<<8|uint16(row[xi+1])) + sum += int64(coeff) } - - coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i] - offset := (xx - oldBounds.Min.X) * 2 - gray += int64(coeff) * int64(uint16(row[offset+0])<<8|uint16(row[offset+1])) - sum += int64(coeff) } offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*2 diff --git a/filters.go b/filters.go index a6bd4e8..a5f6e79 100644 --- a/filters.go +++ b/filters.go @@ -80,37 +80,39 @@ func lanczos3(in float64) float64 { } // range [-256,256] -func createWeights8(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int16, int) { +func createWeights8(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int16, []int, int) { filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1)) filterFactor := math.Min(1./(blur*scale), 1) coeffs := make([]int16, dy*filterLength) + start := make([]int, dy) for y := 0; y < dy; y++ { interpX := scale*(float64(y)+0.5) + float64(minx) - start := int(interpX) - filterLength/2 + 1 + start[y] = int(interpX) - filterLength/2 + 1 for i := 0; i < filterLength; i++ { - in := (interpX - float64(start) - float64(i)) * filterFactor + in := (interpX - float64(start[y]) - float64(i)) * filterFactor coeffs[y*filterLength+i] = int16(kernel(in) * 256) } } - return coeffs, filterLength + return coeffs, start, filterLength } // range [-65536,65536] -func createWeights16(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int32, int) { +func createWeights16(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int32, []int, int) { filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1)) filterFactor := math.Min(1./(blur*scale), 1) coeffs := make([]int32, dy*filterLength) + start := make([]int, dy) for y := 0; y < dy; y++ { interpX := scale*(float64(y)+0.5) + float64(minx) - start := int(interpX) - filterLength/2 + 1 + start[y] = int(interpX) - filterLength/2 + 1 for i := 0; i < filterLength; i++ { - in := (interpX - float64(start) - float64(i)) * filterFactor + in := (interpX - float64(start[y]) - float64(i)) * filterFactor coeffs[y*filterLength+i] = int32(kernel(in) * 65536) } } - return coeffs, filterLength + return coeffs, start, filterLength } diff --git a/resize.go b/resize.go index 0c3ec9a..b9eb775 100644 --- a/resize.go +++ b/resize.go @@ -95,25 +95,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.RGBA) go func() { defer wg.Done() - resizeRGBA(input, slice, scaleX, coeffs, filterLength) + resizeRGBA(input, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.RGBA) go func() { defer wg.Done() - resizeRGBA(temp, slice, scaleY, coeffs, filterLength) + resizeRGBA(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -127,25 +127,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.RGBA) go func() { defer wg.Done() - resizeRGBA(inputAsRGBA, slice, scaleX, coeffs, filterLength) + resizeRGBA(inputAsRGBA, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.RGBA) go func() { defer wg.Done() - resizeRGBA(temp, slice, scaleY, coeffs, filterLength) + resizeRGBA(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -156,25 +156,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.RGBA64) go func() { defer wg.Done() - resizeRGBA64(input, slice, scaleX, coeffs, filterLength) + resizeRGBA64(input, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.RGBA64) go func() { defer wg.Done() - resizeGeneric(temp, slice, scaleY, coeffs, filterLength) + resizeGeneric(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -185,25 +185,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewGray(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.Gray) go func() { defer wg.Done() - resizeGray(input, slice, scaleX, coeffs, filterLength) + resizeGray(input, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.Gray) go func() { defer wg.Done() - resizeGray(temp, slice, scaleY, coeffs, filterLength) + resizeGray(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -214,25 +214,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewGray16(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.Gray16) go func() { defer wg.Done() - resizeGray16(input, slice, scaleX, coeffs, filterLength) + resizeGray16(input, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.Gray16) go func() { defer wg.Done() - resizeGray16(temp, slice, scaleY, coeffs, filterLength) + resizeGray16(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -243,25 +243,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights16(temp.Bounds().Dy(), img.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), img.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.RGBA64) go func() { defer wg.Done() - resizeGeneric(img, slice, scaleX, coeffs, filterLength) + resizeGeneric(img, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.RGBA64) go func() { defer wg.Done() - resizeRGBA64(temp, slice, scaleY, coeffs, filterLength) + resizeRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait()