From 314cea661237c1cdc0e072f3b38d11c5ecd38e98 Mon Sep 17 00:00:00 2001
From: jst <janschlicht@gmail.com>
Date: Fri, 17 Jan 2014 22:54:15 +0100
Subject: [PATCH] Avoid temporaries to save some cycles.

---
 converter.go | 80 ++++++++++++++++++++++++----------------------------
 filters.go   |  7 ++---
 2 files changed, 40 insertions(+), 47 deletions(-)

diff --git a/converter.go b/converter.go
index c301fee..c4a1bdc 100644
--- a/converter.go
+++ b/converter.go
@@ -43,94 +43,88 @@ func replicateBorder(x, y int, rect image.Rectangle) (xx, yy int) {
 // the idea is to speed up computation by providing optimized implementations
 // for different image types instead of relying on image.Image.At().
 type converter interface {
-	at(x, y int) colorArray
+	at(x, y int, color *colorArray)
 }
 
 type genericConverter struct {
 	src image.Image
 }
 
-func (c *genericConverter) at(x, y int) colorArray {
+func (c *genericConverter) at(x, y int, result *colorArray) {
 	r, g, b, a := c.src.At(replicateBorder(x, y, c.src.Bounds())).RGBA()
-	return colorArray{
-		float32(r),
-		float32(g),
-		float32(b),
-		float32(a),
-	}
+	result[0] = float32(r)
+	result[1] = float32(g)
+	result[2] = float32(b)
+	result[3] = float32(a)
+	return
 }
 
 type rgbaConverter struct {
 	src *image.RGBA
 }
 
-func (c *rgbaConverter) at(x, y int) colorArray {
+func (c *rgbaConverter) at(x, y int, result *colorArray) {
 	i := c.src.PixOffset(replicateBorder(x, y, c.src.Rect))
-	return colorArray{
-		float32(uint16(c.src.Pix[i+0])<<8 | uint16(c.src.Pix[i+0])),
-		float32(uint16(c.src.Pix[i+1])<<8 | uint16(c.src.Pix[i+1])),
-		float32(uint16(c.src.Pix[i+2])<<8 | uint16(c.src.Pix[i+2])),
-		float32(uint16(c.src.Pix[i+3])<<8 | uint16(c.src.Pix[i+3])),
-	}
+	result[0] = float32(uint16(c.src.Pix[i+0])<<8 | uint16(c.src.Pix[i+0]))
+	result[1] = float32(uint16(c.src.Pix[i+1])<<8 | uint16(c.src.Pix[i+1]))
+	result[2] = float32(uint16(c.src.Pix[i+2])<<8 | uint16(c.src.Pix[i+2]))
+	result[3] = float32(uint16(c.src.Pix[i+3])<<8 | uint16(c.src.Pix[i+3]))
+	return
 }
 
 type rgba64Converter struct {
 	src *image.RGBA64
 }
 
-func (c *rgba64Converter) at(x, y int) colorArray {
+func (c *rgba64Converter) at(x, y int, result *colorArray) {
 	i := c.src.PixOffset(replicateBorder(x, y, c.src.Rect))
-	return colorArray{
-		float32(uint16(c.src.Pix[i+0])<<8 | uint16(c.src.Pix[i+1])),
-		float32(uint16(c.src.Pix[i+2])<<8 | uint16(c.src.Pix[i+3])),
-		float32(uint16(c.src.Pix[i+4])<<8 | uint16(c.src.Pix[i+5])),
-		float32(uint16(c.src.Pix[i+6])<<8 | uint16(c.src.Pix[i+7])),
-	}
+	result[0] = float32(uint16(c.src.Pix[i+0])<<8 | uint16(c.src.Pix[i+1]))
+	result[1] = float32(uint16(c.src.Pix[i+2])<<8 | uint16(c.src.Pix[i+3]))
+	result[2] = float32(uint16(c.src.Pix[i+4])<<8 | uint16(c.src.Pix[i+5]))
+	result[3] = float32(uint16(c.src.Pix[i+6])<<8 | uint16(c.src.Pix[i+7]))
+	return
 }
 
 type grayConverter struct {
 	src *image.Gray
 }
 
-func (c *grayConverter) at(x, y int) colorArray {
+func (c *grayConverter) at(x, y int, result *colorArray) {
 	i := c.src.PixOffset(replicateBorder(x, y, c.src.Rect))
 	g := float32(uint16(c.src.Pix[i])<<8 | uint16(c.src.Pix[i]))
-	return colorArray{
-		g,
-		g,
-		g,
-		float32(0xffff),
-	}
+	result[0] = g
+	result[1] = g
+	result[2] = g
+	result[3] = float32(0xffff)
+	return
 }
 
 type gray16Converter struct {
 	src *image.Gray16
 }
 
-func (c *gray16Converter) at(x, y int) colorArray {
+func (c *gray16Converter) at(x, y int, result *colorArray) {
 	i := c.src.PixOffset(replicateBorder(x, y, c.src.Rect))
 	g := float32(uint16(c.src.Pix[i+0])<<8 | uint16(c.src.Pix[i+1]))
-	return colorArray{
-		g,
-		g,
-		g,
-		float32(0xffff),
-	}
+	result[0] = g
+	result[1] = g
+	result[2] = g
+	result[3] = float32(0xffff)
+	return
 }
 
 type ycbcrConverter struct {
 	src *image.YCbCr
 }
 
-func (c *ycbcrConverter) at(x, y int) colorArray {
+func (c *ycbcrConverter) at(x, y int, result *colorArray) {
 	xx, yy := replicateBorder(x, y, c.src.Rect)
 	yi := c.src.YOffset(xx, yy)
 	ci := c.src.COffset(xx, yy)
 	r, g, b := color.YCbCrToRGB(c.src.Y[yi], c.src.Cb[ci], c.src.Cr[ci])
-	return colorArray{
-		float32(uint16(r) * 0x101),
-		float32(uint16(g) * 0x101),
-		float32(uint16(b) * 0x101),
-		float32(0xffff),
-	}
+	result[0] = float32(uint16(r) * 0x101)
+	result[1] = float32(uint16(g) * 0x101)
+	result[2] = float32(uint16(b) * 0x101)
+	result[3] = float32(0xffff)
+	return
 }
diff --git a/filters.go b/filters.go
index e383479..caed5a6 100644
--- a/filters.go
+++ b/filters.go
@@ -51,10 +51,9 @@ type filterModel struct {
 	tempRow []colorArray
 }
 
-func (f *filterModel) convolution1d(x float32, p []colorArray, factor float32) colorArray {
+func (f *filterModel) convolution1d(x float32, p []colorArray, factor float32) (c colorArray) {
 	var k float32
 	var sum float32 = 0
-	c := colorArray{0.0, 0.0, 0.0, 0.0}
 
 	for j := range p {
 		k = f.kernel((x - float32(j)) / factor)
@@ -69,7 +68,7 @@ func (f *filterModel) convolution1d(x float32, p []colorArray, factor float32) c
 		c[i] = c[i] / sum
 	}
 
-	return c
+	return
 }
 
 func (f *filterModel) Interpolate(u float32, y int) color.RGBA64 {
@@ -77,7 +76,7 @@ func (f *filterModel) Interpolate(u float32, y int) color.RGBA64 {
 	u -= float32(uf)
 
 	for i := range f.tempRow {
-		f.tempRow[i] = f.at(uf+i, y)
+		f.at(uf+i, y, &f.tempRow[i])
 	}
 
 	c := f.convolution1d(u, f.tempRow, f.factor)