From 80b3fc2b3f2d53ca32f7520fff4263dc14f55882 Mon Sep 17 00:00:00 2001 From: Charlie Vieth Date: Tue, 29 Jul 2014 16:53:35 -0400 Subject: [PATCH 1/3] Pre-calculate start offset, optimize bound check Increased performance by 30% for RGBA and 45% for Gray images, minor performance increase for 16-bit images. The start offset calculated by createWeights are stored in a slice and passed to the resize functions to prevent duplication of effort. --- converter.go | 205 ++++++++++++++++++++++++++++----------------------- filters.go | 18 +++-- resize.go | 48 ++++++------ 3 files changed, 146 insertions(+), 125 deletions(-) diff --git a/converter.go b/converter.go index 8c6ac84..f023d9c 100644 --- a/converter.go +++ b/converter.go @@ -43,32 +43,35 @@ func clampUint16(in int64) uint16 { return uint16(in) } -func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []int32, filterLength int) { +func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) { oldBounds := in.Bounds() newBounds := out.Bounds() for x := newBounds.Min.X; x < newBounds.Max.X; x++ { for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X) - start := int(interpX) - filterLength/2 + 1 - var rgba [4]int64 var sum int64 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength for i := 0; i < filterLength; i++ { - xx := start + i - if xx < oldBounds.Min.X { - xx = oldBounds.Min.X - } else if xx >= oldBounds.Max.X { - xx = oldBounds.Max.X - 1 + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + break + case xi >= oldBounds.Max.X: + xi = oldBounds.Min.X + default: + xi = oldBounds.Max.X - 1 + } + r, g, b, a := in.At(xi, x).RGBA() + rgba[0] += int64(coeff) * int64(r) + rgba[1] += int64(coeff) * int64(g) + rgba[2] += int64(coeff) * int64(b) + rgba[3] += int64(coeff) * int64(a) + sum += int64(coeff) } - - coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i] - r, g, b, a := in.At(xx, x).RGBA() - rgba[0] += int64(coeff) * int64(r) - rgba[1] += int64(coeff) * int64(g) - rgba[2] += int64(coeff) * int64(b) - rgba[3] += int64(coeff) * int64(a) - sum += int64(coeff) } offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 @@ -88,114 +91,126 @@ func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []in } } -func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16, filterLength int) { +func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16, offset []int, filterLength int) { oldBounds := in.Bounds() newBounds := out.Bounds() + minX := oldBounds.Min.X * 4 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 4 for x := newBounds.Min.X; x < newBounds.Max.X; x++ { row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X) - start := int(interpX) - filterLength/2 + 1 - var rgba [4]int32 var sum int32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength for i := 0; i < filterLength; i++ { - xx := start + i - if xx < oldBounds.Min.X { - xx = oldBounds.Min.X - } else if xx >= oldBounds.Max.X { - xx = oldBounds.Max.X - 1 + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 4 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + rgba[0] += int32(coeff) * int32(row[xi+0]) + rgba[1] += int32(coeff) * int32(row[xi+1]) + rgba[2] += int32(coeff) * int32(row[xi+2]) + rgba[3] += int32(coeff) * int32(row[xi+3]) + sum += int32(coeff) } - - coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i] - offset := (xx - oldBounds.Min.X) * 4 - rgba[0] += int32(coeff) * int32(row[offset+0]) - rgba[1] += int32(coeff) * int32(row[offset+1]) - rgba[2] += int32(coeff) * int32(row[offset+2]) - rgba[3] += int32(coeff) * int32(row[offset+3]) - sum += int32(coeff) } - offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4 - out.Pix[offset+0] = clampUint8(rgba[0] / sum) - out.Pix[offset+1] = clampUint8(rgba[1] / sum) - out.Pix[offset+2] = clampUint8(rgba[2] / sum) - out.Pix[offset+3] = clampUint8(rgba[3] / sum) + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4 + out.Pix[xo+0] = clampUint8(rgba[0] / sum) + out.Pix[xo+1] = clampUint8(rgba[1] / sum) + out.Pix[xo+2] = clampUint8(rgba[2] / sum) + out.Pix[xo+3] = clampUint8(rgba[3] / sum) } } } -func resizeRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []int32, filterLength int) { +func resizeRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) { oldBounds := in.Bounds() newBounds := out.Bounds() + minX := oldBounds.Min.X * 8 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 8 for x := newBounds.Min.X; x < newBounds.Max.X; x++ { row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X) - start := int(interpX) - filterLength/2 + 1 - var rgba [4]int64 var sum int64 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength for i := 0; i < filterLength; i++ { - xx := start + i - if xx < oldBounds.Min.X { - xx = oldBounds.Min.X - } else if xx >= oldBounds.Max.X { - xx = oldBounds.Max.X - 1 + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 8 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + rgba[0] += int64(coeff) * int64(uint16(row[xi+0])<<8|uint16(row[xi+1])) + rgba[1] += int64(coeff) * int64(uint16(row[xi+2])<<8|uint16(row[xi+3])) + rgba[2] += int64(coeff) * int64(uint16(row[xi+4])<<8|uint16(row[xi+5])) + rgba[3] += int64(coeff) * int64(uint16(row[xi+6])<<8|uint16(row[xi+7])) + sum += int64(coeff) } - - coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i] - offset := (xx - oldBounds.Min.X) * 8 - rgba[0] += int64(coeff) * int64(uint16(row[offset+0])<<8|uint16(row[offset+1])) - rgba[1] += int64(coeff) * int64(uint16(row[offset+2])<<8|uint16(row[offset+3])) - rgba[2] += int64(coeff) * int64(uint16(row[offset+4])<<8|uint16(row[offset+5])) - rgba[3] += int64(coeff) * int64(uint16(row[offset+6])<<8|uint16(row[offset+7])) - sum += int64(coeff) } - offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 value := clampUint16(rgba[0] / sum) - out.Pix[offset+0] = uint8(value >> 8) - out.Pix[offset+1] = uint8(value) + out.Pix[xo+0] = uint8(value >> 8) + out.Pix[xo+1] = uint8(value) value = clampUint16(rgba[1] / sum) - out.Pix[offset+2] = uint8(value >> 8) - out.Pix[offset+3] = uint8(value) + out.Pix[xo+2] = uint8(value >> 8) + out.Pix[xo+3] = uint8(value) value = clampUint16(rgba[2] / sum) - out.Pix[offset+4] = uint8(value >> 8) - out.Pix[offset+5] = uint8(value) + out.Pix[xo+4] = uint8(value >> 8) + out.Pix[xo+5] = uint8(value) value = clampUint16(rgba[3] / sum) - out.Pix[offset+6] = uint8(value >> 8) - out.Pix[offset+7] = uint8(value) + out.Pix[xo+6] = uint8(value >> 8) + out.Pix[xo+7] = uint8(value) } } } -func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16, filterLength int) { +func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16, offset []int, filterLength int) { oldBounds := in.Bounds() newBounds := out.Bounds() + minX := oldBounds.Min.X + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) for x := newBounds.Min.X; x < newBounds.Max.X; x++ { row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X) - start := int(interpX) - filterLength/2 + 1 - var gray int32 var sum int32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength for i := 0; i < filterLength; i++ { - xx := start + i - if xx < oldBounds.Min.X { - xx = oldBounds.Min.X - } else if xx >= oldBounds.Max.X { - xx = oldBounds.Max.X - 1 + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + break + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + gray += int32(coeff) * int32(row[xi]) + sum += int32(coeff) } - - coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i] - offset := (xx - oldBounds.Min.X) - gray += int32(coeff) * int32(row[offset]) - sum += int32(coeff) } offset := (y-newBounds.Min.Y)*out.Stride + (x - newBounds.Min.X) @@ -204,30 +219,34 @@ func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16, } } -func resizeGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []int32, filterLength int) { +func resizeGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []int32, offset []int, filterLength int) { oldBounds := in.Bounds() newBounds := out.Bounds() + minX := oldBounds.Min.X * 2 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 2 for x := newBounds.Min.X; x < newBounds.Max.X; x++ { row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X) - start := int(interpX) - filterLength/2 + 1 - var gray int64 var sum int64 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength for i := 0; i < filterLength; i++ { - xx := start + i - if xx < oldBounds.Min.X { - xx = oldBounds.Min.X - } else if xx >= oldBounds.Max.X { - xx = oldBounds.Max.X - 1 + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 2 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + gray += int64(coeff) * int64(uint16(row[xi+0])<<8|uint16(row[xi+1])) + sum += int64(coeff) } - - coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i] - offset := (xx - oldBounds.Min.X) * 2 - gray += int64(coeff) * int64(uint16(row[offset+0])<<8|uint16(row[offset+1])) - sum += int64(coeff) } offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*2 diff --git a/filters.go b/filters.go index a6bd4e8..a5f6e79 100644 --- a/filters.go +++ b/filters.go @@ -80,37 +80,39 @@ func lanczos3(in float64) float64 { } // range [-256,256] -func createWeights8(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int16, int) { +func createWeights8(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int16, []int, int) { filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1)) filterFactor := math.Min(1./(blur*scale), 1) coeffs := make([]int16, dy*filterLength) + start := make([]int, dy) for y := 0; y < dy; y++ { interpX := scale*(float64(y)+0.5) + float64(minx) - start := int(interpX) - filterLength/2 + 1 + start[y] = int(interpX) - filterLength/2 + 1 for i := 0; i < filterLength; i++ { - in := (interpX - float64(start) - float64(i)) * filterFactor + in := (interpX - float64(start[y]) - float64(i)) * filterFactor coeffs[y*filterLength+i] = int16(kernel(in) * 256) } } - return coeffs, filterLength + return coeffs, start, filterLength } // range [-65536,65536] -func createWeights16(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int32, int) { +func createWeights16(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int32, []int, int) { filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1)) filterFactor := math.Min(1./(blur*scale), 1) coeffs := make([]int32, dy*filterLength) + start := make([]int, dy) for y := 0; y < dy; y++ { interpX := scale*(float64(y)+0.5) + float64(minx) - start := int(interpX) - filterLength/2 + 1 + start[y] = int(interpX) - filterLength/2 + 1 for i := 0; i < filterLength; i++ { - in := (interpX - float64(start) - float64(i)) * filterFactor + in := (interpX - float64(start[y]) - float64(i)) * filterFactor coeffs[y*filterLength+i] = int32(kernel(in) * 65536) } } - return coeffs, filterLength + return coeffs, start, filterLength } diff --git a/resize.go b/resize.go index 0c3ec9a..b9eb775 100644 --- a/resize.go +++ b/resize.go @@ -95,25 +95,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.RGBA) go func() { defer wg.Done() - resizeRGBA(input, slice, scaleX, coeffs, filterLength) + resizeRGBA(input, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.RGBA) go func() { defer wg.Done() - resizeRGBA(temp, slice, scaleY, coeffs, filterLength) + resizeRGBA(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -127,25 +127,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.RGBA) go func() { defer wg.Done() - resizeRGBA(inputAsRGBA, slice, scaleX, coeffs, filterLength) + resizeRGBA(inputAsRGBA, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.RGBA) go func() { defer wg.Done() - resizeRGBA(temp, slice, scaleY, coeffs, filterLength) + resizeRGBA(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -156,25 +156,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.RGBA64) go func() { defer wg.Done() - resizeRGBA64(input, slice, scaleX, coeffs, filterLength) + resizeRGBA64(input, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.RGBA64) go func() { defer wg.Done() - resizeGeneric(temp, slice, scaleY, coeffs, filterLength) + resizeGeneric(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -185,25 +185,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewGray(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.Gray) go func() { defer wg.Done() - resizeGray(input, slice, scaleX, coeffs, filterLength) + resizeGray(input, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.Gray) go func() { defer wg.Done() - resizeGray(temp, slice, scaleY, coeffs, filterLength) + resizeGray(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -214,25 +214,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewGray16(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.Gray16) go func() { defer wg.Done() - resizeGray16(input, slice, scaleX, coeffs, filterLength) + resizeGray16(input, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.Gray16) go func() { defer wg.Done() - resizeGray16(temp, slice, scaleY, coeffs, filterLength) + resizeGray16(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -243,25 +243,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights16(temp.Bounds().Dy(), img.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), img.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.RGBA64) go func() { defer wg.Done() - resizeGeneric(img, slice, scaleX, coeffs, filterLength) + resizeGeneric(img, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.RGBA64) go func() { defer wg.Done() - resizeRGBA64(temp, slice, scaleY, coeffs, filterLength) + resizeRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() From 427b8d133e710b7781794fbf781ade2c1a71dd2e Mon Sep 17 00:00:00 2001 From: Charlie Vieth Date: Tue, 29 Jul 2014 18:32:58 -0400 Subject: [PATCH 2/3] Optimized Nearest-Neighbor function - 2x faster --- filters.go | 29 +++++- nearest.go | 228 +++++++++++++++++++++++++++++++++++++++++++ nearest_test.go | 41 ++++++++ resize.go | 253 ++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 520 insertions(+), 31 deletions(-) create mode 100644 nearest.go create mode 100644 nearest_test.go diff --git a/filters.go b/filters.go index a5f6e79..0cc738c 100644 --- a/filters.go +++ b/filters.go @@ -89,8 +89,9 @@ func createWeights8(dy, minx, filterLength int, blur, scale float64, kernel func for y := 0; y < dy; y++ { interpX := scale*(float64(y)+0.5) + float64(minx) start[y] = int(interpX) - filterLength/2 + 1 + interpX -= float64(start[y]) for i := 0; i < filterLength; i++ { - in := (interpX - float64(start[y]) - float64(i)) * filterFactor + in := (interpX - float64(i)) * filterFactor coeffs[y*filterLength+i] = int16(kernel(in) * 256) } } @@ -108,11 +109,35 @@ func createWeights16(dy, minx, filterLength int, blur, scale float64, kernel fun for y := 0; y < dy; y++ { interpX := scale*(float64(y)+0.5) + float64(minx) start[y] = int(interpX) - filterLength/2 + 1 + interpX -= float64(start[y]) for i := 0; i < filterLength; i++ { - in := (interpX - float64(start[y]) - float64(i)) * filterFactor + in := (interpX - float64(i)) * filterFactor coeffs[y*filterLength+i] = int32(kernel(in) * 65536) } } return coeffs, start, filterLength } + +func createWeightsNearest(dy, minx, filterLength int, blur, scale float64) ([]bool, []int, int) { + filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1)) + filterFactor := math.Min(1./(blur*scale), 1) + + coeffs := make([]bool, dy*filterLength) + start := make([]int, dy) + for y := 0; y < dy; y++ { + interpX := scale*(float64(y)+0.5) + float64(minx) + start[y] = int(interpX) - filterLength/2 + 1 + interpX -= float64(start[y]) + for i := 0; i < filterLength; i++ { + in := (interpX - float64(i)) * filterFactor + if in >= -0.5 && in < 0.5 { + coeffs[y*filterLength+i] = true + } else { + coeffs[y*filterLength+i] = false + } + } + } + + return coeffs, start, filterLength +} diff --git a/nearest.go b/nearest.go new file mode 100644 index 0000000..5708fd0 --- /dev/null +++ b/nearest.go @@ -0,0 +1,228 @@ +package resize + +import "image" + +func floatToUint8(x float32) uint8 { + // Nearest-neighbor values are always + // positive no need to check lower-bound. + if x > 0xfe { + return 0xff + } + return uint8(x) +} + +func floatToUint16(x float32) uint16 { + if x > 0xfffe { + return 0xffff + } + return uint16(x) +} + +func nearestGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var rgba [4]float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + break + case xi >= oldBounds.Max.X: + xi = oldBounds.Min.X + default: + xi = oldBounds.Max.X - 1 + } + r, g, b, a := in.At(xi, x).RGBA() + rgba[0] += float32(r) + rgba[1] += float32(g) + rgba[2] += float32(b) + rgba[3] += float32(a) + sum++ + } + } + + offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 + value := floatToUint16(rgba[0] / sum) + out.Pix[offset+0] = uint8(value >> 8) + out.Pix[offset+1] = uint8(value) + value = floatToUint16(rgba[1] / sum) + out.Pix[offset+2] = uint8(value >> 8) + out.Pix[offset+3] = uint8(value) + value = floatToUint16(rgba[2] / sum) + out.Pix[offset+4] = uint8(value >> 8) + out.Pix[offset+5] = uint8(value) + value = floatToUint16(rgba[3] / sum) + out.Pix[offset+6] = uint8(value >> 8) + out.Pix[offset+7] = uint8(value) + } + } +} + +func nearestRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + minX := oldBounds.Min.X * 4 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 4 + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var rgba [4]float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 4 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + rgba[0] += float32(row[xi+0]) + rgba[1] += float32(row[xi+1]) + rgba[2] += float32(row[xi+2]) + rgba[3] += float32(row[xi+3]) + sum++ + } + } + + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4 + out.Pix[xo+0] = floatToUint8(rgba[0] / sum) + out.Pix[xo+1] = floatToUint8(rgba[1] / sum) + out.Pix[xo+2] = floatToUint8(rgba[2] / sum) + out.Pix[xo+3] = floatToUint8(rgba[3] / sum) + } + } +} + +func nearestRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + minX := oldBounds.Min.X * 8 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 8 + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var rgba [4]float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 8 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + rgba[0] += float32(uint16(row[xi+0])<<8 | uint16(row[xi+1])) + rgba[1] += float32(uint16(row[xi+2])<<8 | uint16(row[xi+3])) + rgba[2] += float32(uint16(row[xi+4])<<8 | uint16(row[xi+5])) + rgba[3] += float32(uint16(row[xi+6])<<8 | uint16(row[xi+7])) + sum++ + } + } + + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 + value := floatToUint16(rgba[0] / sum) + out.Pix[xo+0] = uint8(value >> 8) + out.Pix[xo+1] = uint8(value) + value = floatToUint16(rgba[1] / sum) + out.Pix[xo+2] = uint8(value >> 8) + out.Pix[xo+3] = uint8(value) + value = floatToUint16(rgba[2] / sum) + out.Pix[xo+4] = uint8(value >> 8) + out.Pix[xo+5] = uint8(value) + value = floatToUint16(rgba[3] / sum) + out.Pix[xo+6] = uint8(value >> 8) + out.Pix[xo+7] = uint8(value) + } + } +} + +func nearestGray(in *image.Gray, out *image.Gray, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + minX := oldBounds.Min.X + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var gray float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + break + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + gray += float32(row[xi]) + sum++ + } + } + + offset := (y-newBounds.Min.Y)*out.Stride + (x - newBounds.Min.X) + out.Pix[offset] = floatToUint8(gray / sum) + } + } +} + +func nearestGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + minX := oldBounds.Min.X * 2 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 2 + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var gray float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 2 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + gray += float32(uint16(row[xi+0])<<8 | uint16(row[xi+1])) + sum++ + } + } + + offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*2 + value := floatToUint16(gray / sum) + out.Pix[offset+0] = uint8(value >> 8) + out.Pix[offset+1] = uint8(value) + } + } +} diff --git a/nearest_test.go b/nearest_test.go new file mode 100644 index 0000000..08adbda --- /dev/null +++ b/nearest_test.go @@ -0,0 +1,41 @@ +package resize + +import "testing" + +func Test_FloatToUint8(t *testing.T) { + var testData = []struct { + in float32 + expected uint8 + }{ + {0, 0}, + {255, 255}, + {128, 128}, + {1, 1}, + {256, 255}, + } + for _, test := range testData { + actual := floatToUint8(test.in) + if actual != test.expected { + t.Fail() + } + } +} + +func Test_FloatToUint16(t *testing.T) { + var testData = []struct { + in float32 + expected uint16 + }{ + {0, 0}, + {65535, 65535}, + {128, 128}, + {1, 1}, + {65536, 65535}, + } + for _, test := range testData { + actual := floatToUint16(test.in) + if actual != test.expected { + t.Fail() + } + } +} diff --git a/resize.go b/resize.go index b9eb775..fad39b4 100644 --- a/resize.go +++ b/resize.go @@ -33,36 +33,41 @@ import ( // An InterpolationFunction provides the parameters that describe an // interpolation kernel. It returns the number of samples to take // and the kernel function to use for sampling. -type InterpolationFunction func() (int, func(float64) float64) +type InterpolationFunction int -// Nearest-neighbor interpolation -func NearestNeighbor() (int, func(float64) float64) { - return 2, nearest -} +// InterpolationFunction constants +const ( + // Nearest-neighbor interpolation + NearestNeighbor InterpolationFunction = iota + // Bilinear interpolation + Bilinear + // Bicubic interpolation (with cubic hermite spline) + Bicubic + // Mitchell-Netravali interpolation + MitchellNetravali + // Lanczos interpolation (a=2) + Lanczos2 + // Lanczos interpolation (a=3) + Lanczos3 +) -// Bilinear interpolation -func Bilinear() (int, func(float64) float64) { - return 2, linear -} - -// Bicubic interpolation (with cubic hermite spline) -func Bicubic() (int, func(float64) float64) { - return 4, cubic -} - -// Mitchell-Netravali interpolation -func MitchellNetravali() (int, func(float64) float64) { - return 4, mitchellnetravali -} - -// Lanczos interpolation (a=2) -func Lanczos2() (int, func(float64) float64) { - return 4, lanczos2 -} - -// Lanczos interpolation (a=3) -func Lanczos3() (int, func(float64) float64) { - return 6, lanczos3 +// kernal, returns an InterpolationFunctions taps and kernel. +func (i InterpolationFunction) kernel() (int, func(float64) float64) { + switch i { + case Bilinear: + return 2, linear + case Bicubic: + return 4, cubic + case MitchellNetravali: + return 4, mitchellnetravali + case Lanczos2: + return 4, lanczos2 + case Lanczos3: + return 6, lanczos3 + default: + // Default to NearestNeighbor. + return 2, nearest + } } // values <1 will sharpen the image @@ -81,8 +86,11 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i if height == 0 { height = uint(0.7 + float64(img.Bounds().Dy())/scaleY) } + if interp == NearestNeighbor { + return resizeNearest(width, height, scaleX, scaleY, img, interp) + } - taps, kernel := interp() + taps, kernel := interp.kernel() cpus := runtime.NumCPU() wg := sync.WaitGroup{} @@ -269,6 +277,193 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i } } +func resizeNearest(width, height uint, scaleX, scaleY float64, img image.Image, interp InterpolationFunction) image.Image { + taps, _ := interp.kernel() + cpus := runtime.NumCPU() + wg := sync.WaitGroup{} + + switch input := img.(type) { + case *image.RGBA: + // 8-bit precision + temp := image.NewRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.RGBA) + go func() { + defer wg.Done() + nearestRGBA(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.RGBA) + go func() { + defer wg.Done() + nearestRGBA(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + case *image.YCbCr: + // 8-bit precision + // accessing the YCbCr arrays in a tight loop is slow. + // converting the image before filtering will improve performance. + inputAsRGBA := convertYCbCrToRGBA(input) + temp := image.NewRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.RGBA) + go func() { + defer wg.Done() + nearestRGBA(inputAsRGBA, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.RGBA) + go func() { + defer wg.Done() + nearestRGBA(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + case *image.RGBA64: + // 16-bit precision + temp := image.NewRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.RGBA64) + go func() { + defer wg.Done() + nearestRGBA64(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.RGBA64) + go func() { + defer wg.Done() + nearestGeneric(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + case *image.Gray: + // 8-bit precision + temp := image.NewGray(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewGray(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.Gray) + go func() { + defer wg.Done() + nearestGray(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.Gray) + go func() { + defer wg.Done() + nearestGray(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + case *image.Gray16: + // 16-bit precision + temp := image.NewGray16(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewGray16(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.Gray16) + go func() { + defer wg.Done() + nearestGray16(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.Gray16) + go func() { + defer wg.Done() + nearestGray16(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + default: + // 16-bit precision + temp := image.NewRGBA64(image.Rect(0, 0, img.Bounds().Dy(), int(width))) + result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), img.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.RGBA64) + go func() { + defer wg.Done() + nearestGeneric(img, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.RGBA64) + go func() { + defer wg.Done() + nearestRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + } + +} + // Calculates scaling factors using old and new image dimensions. func calcFactors(width, height uint, oldWidth, oldHeight float64) (scaleX, scaleY float64) { if width == 0 { From eefd4737aa938d38cedc7fc0a6e93e310dbc1001 Mon Sep 17 00:00:00 2001 From: Charlie Vieth Date: Wed, 30 Jul 2014 02:08:58 -0400 Subject: [PATCH 3/3] Optimize YCbCr image resize --- converter.go | 93 ++++++++++++++++---- nearest.go | 16 ++++ nearest_test.go | 16 ++++ resize.go | 40 ++++----- ycc.go | 226 ++++++++++++++++++++++++++++++++++++++++++++++++ ycc_test.go | 121 ++++++++++++++++++++++++++ 6 files changed, 473 insertions(+), 39 deletions(-) create mode 100644 ycc.go create mode 100644 ycc_test.go diff --git a/converter.go b/converter.go index f023d9c..bab59ef 100644 --- a/converter.go +++ b/converter.go @@ -16,10 +16,7 @@ THIS SOFTWARE. package resize -import ( - "image" - "image/color" -) +import "image" // Keep value in [0,255] range. func clampUint8(in int32) uint8 { @@ -257,19 +254,81 @@ func resizeGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []i } } -func convertYCbCrToRGBA(in *image.YCbCr) *image.RGBA { - out := image.NewRGBA(in.Bounds()) - for y := 0; y < out.Bounds().Dy(); y++ { - for x := 0; x < out.Bounds().Dx(); x++ { - p := out.Pix[y*out.Stride+4*x:] - yi := in.YOffset(x, y) - ci := in.COffset(x, y) - r, g, b := color.YCbCrToRGB(in.Y[yi], in.Cb[ci], in.Cr[ci]) - p[0] = r - p[1] = g - p[2] = b - p[3] = 0xff +func resizeYCbCr(in *ycc, out *ycc, scale float64, coeffs []int16, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + minX := oldBounds.Min.X * 3 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 3 + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var p [3]int32 + var sum int32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 3 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + p[0] += int32(coeff) * int32(row[xi+0]) + p[1] += int32(coeff) * int32(row[xi+1]) + p[2] += int32(coeff) * int32(row[xi+2]) + sum += int32(coeff) + } + } + + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*3 + out.Pix[xo+0] = clampUint8(p[0] / sum) + out.Pix[xo+1] = clampUint8(p[1] / sum) + out.Pix[xo+2] = clampUint8(p[2] / sum) + } + } +} + +func nearestYCbCr(in *ycc, out *ycc, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + minX := oldBounds.Min.X * 3 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 3 + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var p [3]float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 3 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + p[0] += float32(row[xi+0]) + p[1] += float32(row[xi+1]) + p[2] += float32(row[xi+2]) + sum++ + } + } + + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*3 + out.Pix[xo+0] = floatToUint8(p[0] / sum) + out.Pix[xo+1] = floatToUint8(p[1] / sum) + out.Pix[xo+2] = floatToUint8(p[2] / sum) } } - return out } diff --git a/nearest.go b/nearest.go index 5708fd0..6ff22f6 100644 --- a/nearest.go +++ b/nearest.go @@ -1,3 +1,19 @@ +/* +Copyright (c) 2014, Charlie Vieth + +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright notice +and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +*/ + package resize import "image" diff --git a/nearest_test.go b/nearest_test.go index 08adbda..d4a76dd 100644 --- a/nearest_test.go +++ b/nearest_test.go @@ -1,3 +1,19 @@ +/* +Copyright (c) 2014, Charlie Vieth + +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright notice +and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +*/ + package resize import "testing" diff --git a/resize.go b/resize.go index fad39b4..7ee533a 100644 --- a/resize.go +++ b/resize.go @@ -129,35 +129,33 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i case *image.YCbCr: // 8-bit precision // accessing the YCbCr arrays in a tight loop is slow. - // converting the image before filtering will improve performance. - inputAsRGBA := convertYCbCrToRGBA(input) - temp := image.NewRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) + // converting the image to ycc increases performance by 2x. + temp := newYCC(image.Rect(0, 0, input.Bounds().Dy(), int(width)), input.SubsampleRatio) + result := newYCC(image.Rect(0, 0, int(width), int(height)), input.SubsampleRatio) - // horizontal filter, results in transposed temporary image coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + in := imageYCbCrToYCC(input) wg.Add(cpus) for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.RGBA) + slice := makeSlice(temp, i, cpus).(*ycc) go func() { defer wg.Done() - resizeRGBA(inputAsRGBA, slice, scaleX, coeffs, offset, filterLength) + resizeYCbCr(in, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() - // horizontal filter on transposed image, result is not transposed coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.RGBA) + slice := makeSlice(result, i, cpus).(*ycc) go func() { defer wg.Done() - resizeRGBA(temp, slice, scaleY, coeffs, offset, filterLength) + resizeYCbCr(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() - return result + return result.YCbCr() case *image.RGBA64: // 16-bit precision temp := image.NewRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width))) @@ -315,35 +313,33 @@ func resizeNearest(width, height uint, scaleX, scaleY float64, img image.Image, case *image.YCbCr: // 8-bit precision // accessing the YCbCr arrays in a tight loop is slow. - // converting the image before filtering will improve performance. - inputAsRGBA := convertYCbCrToRGBA(input) - temp := image.NewRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) + // converting the image to ycc increases performance by 2x. + temp := newYCC(image.Rect(0, 0, input.Bounds().Dy(), int(width)), input.SubsampleRatio) + result := newYCC(image.Rect(0, 0, int(width), int(height)), input.SubsampleRatio) - // horizontal filter, results in transposed temporary image coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + in := imageYCbCrToYCC(input) wg.Add(cpus) for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.RGBA) + slice := makeSlice(temp, i, cpus).(*ycc) go func() { defer wg.Done() - nearestRGBA(inputAsRGBA, slice, scaleX, coeffs, offset, filterLength) + nearestYCbCr(in, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() - // horizontal filter on transposed image, result is not transposed coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) wg.Add(cpus) for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.RGBA) + slice := makeSlice(result, i, cpus).(*ycc) go func() { defer wg.Done() - nearestRGBA(temp, slice, scaleY, coeffs, offset, filterLength) + nearestYCbCr(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() - return result + return result.YCbCr() case *image.RGBA64: // 16-bit precision temp := image.NewRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width))) diff --git a/ycc.go b/ycc.go new file mode 100644 index 0000000..db6ffe2 --- /dev/null +++ b/ycc.go @@ -0,0 +1,226 @@ +/* +Copyright (c) 2014, Charlie Vieth + +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright notice +and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +*/ + +package resize + +import ( + "image" + "image/color" +) + +// ycc is an in memory YCbCr image. The Y, Cb and Cr samples are held in a +// single slice to increase resizing performance. +type ycc struct { + // Pix holds the image's pixels, in Y, Cb, Cr order. The pixel at + // (x, y) starts at Pix[(y-Rect.Min.Y)*Stride + (x-Rect.Min.X)*3]. + Pix []uint8 + // Stride is the Pix stride (in bytes) between vertically adjacent pixels. + Stride int + // Rect is the image's bounds. + Rect image.Rectangle + // SubsampleRatio is the subsample ratio of the original YCbCr image. + SubsampleRatio image.YCbCrSubsampleRatio +} + +// PixOffset returns the index of the first element of Pix that corresponds to +// the pixel at (x, y). +func (p *ycc) PixOffset(x, y int) int { + return (y * p.Stride) + (x * 3) +} + +func (p *ycc) Bounds() image.Rectangle { + return p.Rect +} + +func (p *ycc) ColorModel() color.Model { + return color.YCbCrModel +} + +func (p *ycc) At(x, y int) color.Color { + if !(image.Point{x, y}.In(p.Rect)) { + return color.YCbCr{} + } + i := p.PixOffset(x, y) + return color.YCbCr{ + p.Pix[i+0], + p.Pix[i+1], + p.Pix[i+2], + } +} + +func (p *ycc) Opaque() bool { + return true +} + +// SubImage returns an image representing the portion of the image p visible +// through r. The returned value shares pixels with the original image. +func (p *ycc) SubImage(r image.Rectangle) image.Image { + r = r.Intersect(p.Rect) + if r.Empty() { + return &ycc{SubsampleRatio: p.SubsampleRatio} + } + i := p.PixOffset(r.Min.X, r.Min.Y) + return &ycc{ + Pix: p.Pix[i:], + Stride: p.Stride, + Rect: r, + SubsampleRatio: p.SubsampleRatio, + } +} + +// newYCC returns a new ycc with the given bounds and subsample ratio. +func newYCC(r image.Rectangle, s image.YCbCrSubsampleRatio) *ycc { + w, h := r.Dx(), r.Dy() + buf := make([]uint8, 3*w*h) + return &ycc{Pix: buf, Stride: 3 * w, Rect: r, SubsampleRatio: s} +} + +// YCbCr converts ycc to a YCbCr image with the same subsample ratio +// as the YCbCr image that ycc was generated from. +func (p *ycc) YCbCr() *image.YCbCr { + ycbcr := image.NewYCbCr(p.Rect, p.SubsampleRatio) + var off int + + switch ycbcr.SubsampleRatio { + case image.YCbCrSubsampleRatio422: + for y := ycbcr.Rect.Min.Y; y < ycbcr.Rect.Max.Y; y++ { + yy := (y - ycbcr.Rect.Min.Y) * ycbcr.YStride + cy := (y - ycbcr.Rect.Min.Y) * ycbcr.CStride + for x := ycbcr.Rect.Min.X; x < ycbcr.Rect.Max.X; x++ { + xx := (x - ycbcr.Rect.Min.X) + yi := yy + xx + ci := cy + xx/2 + ycbcr.Y[yi] = p.Pix[off+0] + ycbcr.Cb[ci] = p.Pix[off+1] + ycbcr.Cr[ci] = p.Pix[off+2] + off += 3 + } + } + case image.YCbCrSubsampleRatio420: + for y := ycbcr.Rect.Min.Y; y < ycbcr.Rect.Max.Y; y++ { + yy := (y - ycbcr.Rect.Min.Y) * ycbcr.YStride + cy := (y/2 - ycbcr.Rect.Min.Y/2) * ycbcr.CStride + for x := ycbcr.Rect.Min.X; x < ycbcr.Rect.Max.X; x++ { + xx := (x - ycbcr.Rect.Min.X) + yi := yy + xx + ci := cy + xx/2 + ycbcr.Y[yi] = p.Pix[off+0] + ycbcr.Cb[ci] = p.Pix[off+1] + ycbcr.Cr[ci] = p.Pix[off+2] + off += 3 + } + } + case image.YCbCrSubsampleRatio440: + for y := ycbcr.Rect.Min.Y; y < ycbcr.Rect.Max.Y; y++ { + yy := (y - ycbcr.Rect.Min.Y) * ycbcr.YStride + cy := (y/2 - ycbcr.Rect.Min.Y/2) * ycbcr.CStride + for x := ycbcr.Rect.Min.X; x < ycbcr.Rect.Max.X; x++ { + xx := (x - ycbcr.Rect.Min.X) + yi := yy + xx + ci := cy + xx + ycbcr.Y[yi] = p.Pix[off+0] + ycbcr.Cb[ci] = p.Pix[off+1] + ycbcr.Cr[ci] = p.Pix[off+2] + off += 3 + } + } + default: + // Default to 4:4:4 subsampling. + for y := ycbcr.Rect.Min.Y; y < ycbcr.Rect.Max.Y; y++ { + yy := (y - ycbcr.Rect.Min.Y) * ycbcr.YStride + cy := (y - ycbcr.Rect.Min.Y) * ycbcr.CStride + for x := ycbcr.Rect.Min.X; x < ycbcr.Rect.Max.X; x++ { + xx := (x - ycbcr.Rect.Min.X) + yi := yy + xx + ci := cy + xx + ycbcr.Y[yi] = p.Pix[off+0] + ycbcr.Cb[ci] = p.Pix[off+1] + ycbcr.Cr[ci] = p.Pix[off+2] + off += 3 + } + } + } + return ycbcr +} + +// imageYCbCrToYCC converts a YCbCr image to a ycc image for resizing. +func imageYCbCrToYCC(in *image.YCbCr) *ycc { + w, h := in.Rect.Dx(), in.Rect.Dy() + buf := make([]uint8, 3*w*h) + p := ycc{Pix: buf, Stride: 3 * w, Rect: in.Rect, SubsampleRatio: in.SubsampleRatio} + var off int + + switch in.SubsampleRatio { + case image.YCbCrSubsampleRatio422: + for y := in.Rect.Min.Y; y < in.Rect.Max.Y; y++ { + yy := (y - in.Rect.Min.Y) * in.YStride + cy := (y - in.Rect.Min.Y) * in.CStride + for x := in.Rect.Min.X; x < in.Rect.Max.X; x++ { + xx := (x - in.Rect.Min.X) + yi := yy + xx + ci := cy + xx/2 + p.Pix[off+0] = in.Y[yi] + p.Pix[off+1] = in.Cb[ci] + p.Pix[off+2] = in.Cr[ci] + off += 3 + } + } + case image.YCbCrSubsampleRatio420: + for y := in.Rect.Min.Y; y < in.Rect.Max.Y; y++ { + yy := (y - in.Rect.Min.Y) * in.YStride + cy := (y/2 - in.Rect.Min.Y/2) * in.CStride + for x := in.Rect.Min.X; x < in.Rect.Max.X; x++ { + xx := (x - in.Rect.Min.X) + yi := yy + xx + ci := cy + xx/2 + p.Pix[off+0] = in.Y[yi] + p.Pix[off+1] = in.Cb[ci] + p.Pix[off+2] = in.Cr[ci] + off += 3 + } + } + case image.YCbCrSubsampleRatio440: + for y := in.Rect.Min.Y; y < in.Rect.Max.Y; y++ { + yy := (y - in.Rect.Min.Y) * in.YStride + cy := (y/2 - in.Rect.Min.Y/2) * in.CStride + for x := in.Rect.Min.X; x < in.Rect.Max.X; x++ { + xx := (x - in.Rect.Min.X) + yi := yy + xx + ci := cy + xx + p.Pix[off+0] = in.Y[yi] + p.Pix[off+1] = in.Cb[ci] + p.Pix[off+2] = in.Cr[ci] + off += 3 + } + } + default: + // Default to 4:4:4 subsampling. + for y := in.Rect.Min.Y; y < in.Rect.Max.Y; y++ { + yy := (y - in.Rect.Min.Y) * in.YStride + cy := (y - in.Rect.Min.Y) * in.CStride + for x := in.Rect.Min.X; x < in.Rect.Max.X; x++ { + xx := (x - in.Rect.Min.X) + yi := yy + xx + ci := cy + xx + p.Pix[off+0] = in.Y[yi] + p.Pix[off+1] = in.Cb[ci] + p.Pix[off+2] = in.Cr[ci] + off += 3 + } + } + } + return &p +} diff --git a/ycc_test.go b/ycc_test.go new file mode 100644 index 0000000..5128173 --- /dev/null +++ b/ycc_test.go @@ -0,0 +1,121 @@ +/* +Copyright (c) 2014, Charlie Vieth + +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright notice +and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +*/ + +package resize + +import ( + "image" + "testing" +) + +type Image interface { + image.Image + SubImage(image.Rectangle) image.Image +} + +func TestImage(t *testing.T) { + testImage := []Image{ + newYCC(image.Rect(0, 0, 10, 10), image.YCbCrSubsampleRatio420), + newYCC(image.Rect(0, 0, 10, 10), image.YCbCrSubsampleRatio422), + newYCC(image.Rect(0, 0, 10, 10), image.YCbCrSubsampleRatio440), + newYCC(image.Rect(0, 0, 10, 10), image.YCbCrSubsampleRatio444), + } + for _, m := range testImage { + if !image.Rect(0, 0, 10, 10).Eq(m.Bounds()) { + t.Errorf("%T: want bounds %v, got %v", + m, image.Rect(0, 0, 10, 10), m.Bounds()) + continue + } + m = m.SubImage(image.Rect(3, 2, 9, 8)).(Image) + if !image.Rect(3, 2, 9, 8).Eq(m.Bounds()) { + t.Errorf("%T: sub-image want bounds %v, got %v", + m, image.Rect(3, 2, 9, 8), m.Bounds()) + continue + } + // Test that taking an empty sub-image starting at a corner does not panic. + m.SubImage(image.Rect(0, 0, 0, 0)) + m.SubImage(image.Rect(10, 0, 10, 0)) + m.SubImage(image.Rect(0, 10, 0, 10)) + m.SubImage(image.Rect(10, 10, 10, 10)) + } +} + +func TestConvertYCbCr(t *testing.T) { + testImage := []Image{ + image.NewYCbCr(image.Rect(0, 0, 50, 50), image.YCbCrSubsampleRatio420), + image.NewYCbCr(image.Rect(0, 0, 50, 50), image.YCbCrSubsampleRatio422), + image.NewYCbCr(image.Rect(0, 0, 50, 50), image.YCbCrSubsampleRatio440), + image.NewYCbCr(image.Rect(0, 0, 50, 50), image.YCbCrSubsampleRatio444), + } + + for _, img := range testImage { + m := img.(*image.YCbCr) + for y := m.Rect.Min.Y; y < m.Rect.Max.Y; y++ { + for x := m.Rect.Min.X; x < m.Rect.Max.X; x++ { + yi := m.YOffset(x, y) + ci := m.COffset(x, y) + m.Y[yi] = uint8(16*y + x) + m.Cb[ci] = uint8(y + 16*x) + m.Cr[ci] = uint8(y + 16*x) + } + } + + // test conversion from YCbCr to ycc + yc := imageYCbCrToYCC(m) + for y := m.Rect.Min.Y; y < m.Rect.Max.Y; y++ { + for x := m.Rect.Min.X; x < m.Rect.Max.X; x++ { + ystride := 3 * (m.Rect.Max.X - m.Rect.Min.X) + xstride := 3 + yi := m.YOffset(x, y) + ci := m.COffset(x, y) + si := (y * ystride) + (x * xstride) + if m.Y[yi] != yc.Pix[si] { + t.Errorf("Err Y - found: %d expected: %d x: %d y: %d yi: %d si: %d", + m.Y[yi], yc.Pix[si], x, y, yi, si) + } + if m.Cb[ci] != yc.Pix[si+1] { + t.Errorf("Err Cb - found: %d expected: %d x: %d y: %d ci: %d si: %d", + m.Cb[ci], yc.Pix[si+1], x, y, ci, si+1) + } + if m.Cr[ci] != yc.Pix[si+2] { + t.Errorf("Err Cr - found: %d expected: %d x: %d y: %d ci: %d si: %d", + m.Cr[ci], yc.Pix[si+2], x, y, ci, si+2) + } + } + } + + // test conversion from ycc back to YCbCr + ym := yc.YCbCr() + for y := m.Rect.Min.Y; y < m.Rect.Max.Y; y++ { + for x := m.Rect.Min.X; x < m.Rect.Max.X; x++ { + yi := m.YOffset(x, y) + ci := m.COffset(x, y) + if m.Y[yi] != ym.Y[yi] { + t.Errorf("Err Y - found: %d expected: %d x: %d y: %d yi: %d", + m.Y[yi], ym.Y[yi], x, y, yi) + } + if m.Cb[ci] != ym.Cb[ci] { + t.Errorf("Err Cb - found: %d expected: %d x: %d y: %d ci: %d", + m.Cb[ci], ym.Cb[ci], x, y, ci) + } + if m.Cr[ci] != ym.Cr[ci] { + t.Errorf("Err Cr - found: %d expected: %d x: %d y: %d ci: %d", + m.Cr[ci], ym.Cr[ci], x, y, ci) + } + } + } + } +}