diff --git a/draw/gen.go b/draw/gen.go index ced1c9a..b429465 100644 --- a/draw/gen.go +++ b/draw/gen.go @@ -116,6 +116,12 @@ func genKernel(w *bytes.Buffer) { dType: dType, }) } + for _, t := range dsTypes { + expn(w, codeKernelTransformLeaf, &data{ + dType: t.dType, + sType: t.sType, + }) + } } func expn(w *bytes.Buffer, code string, d *data) { @@ -154,6 +160,9 @@ func expnLine(line string, d *data) string { return line } +// expnDollar expands a "$foo" fragment in a line of generated code. It returns +// the empty string if there was a problem. It returns ";" if the generated +// code is a no-op. func expnDollar(prefix, dollar, suffix string, d *data) string { switch dollar { case "dType": @@ -246,32 +255,39 @@ func expnDollar(prefix, dollar, suffix string, d *data) string { case "outputf": args, _ := splitArgs(suffix) - if len(args) != 4 { + if len(args) != 5 { return "" } + ret := "" switch d.dType { default: log.Fatalf("bad dType %q", d.dType) case "Image": - return fmt.Sprintf(""+ - "dstColorRGBA64.R = ftou(%sr * %s)\n"+ - "dstColorRGBA64.G = ftou(%sg * %s)\n"+ - "dstColorRGBA64.B = ftou(%sb * %s)\n"+ - "dstColorRGBA64.A = ftou(%sa * %s)\n"+ + ret = fmt.Sprintf(""+ + "dstColorRGBA64.R = %s(%sr * %s)\n"+ + "dstColorRGBA64.G = %s(%sg * %s)\n"+ + "dstColorRGBA64.B = %s(%sb * %s)\n"+ + "dstColorRGBA64.A = %s(%sa * %s)\n"+ "dst.Set(%s, %s, dstColor)", - args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3], + args[2], args[3], args[4], + args[2], args[3], args[4], + args[2], args[3], args[4], + args[2], args[3], args[4], args[0], args[1], ) case "*image.RGBA": - return fmt.Sprintf(""+ - "dst.Pix[d+0] = uint8(ftou(%sr * %s) >> 8)\n"+ - "dst.Pix[d+1] = uint8(ftou(%sg * %s) >> 8)\n"+ - "dst.Pix[d+2] = uint8(ftou(%sb * %s) >> 8)\n"+ - "dst.Pix[d+3] = uint8(ftou(%sa * %s) >> 8)\n"+ - "d += dst.Stride", - args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3], + ret = fmt.Sprintf(""+ + "dst.Pix[d+0] = uint8(%s(%sr * %s) >> 8)\n"+ + "dst.Pix[d+1] = uint8(%s(%sg * %s) >> 8)\n"+ + "dst.Pix[d+2] = uint8(%s(%sb * %s) >> 8)\n"+ + "dst.Pix[d+3] = uint8(%s(%sa * %s) >> 8)", + args[2], args[3], args[4], + args[2], args[3], args[4], + args[2], args[3], args[4], + args[2], args[3], args[4], ) } + return strings.Replace(ret, " * 1)", ")", -1) case "srcf", "srcu": lhs, eqOp := splitEq(prefix) @@ -329,6 +345,12 @@ func expnDollar(prefix, dollar, suffix string, d *data) string { return strings.TrimSpace(buf.String()) + case "tweakD": + if d.dType == "*image.RGBA" { + return "d += dst.Stride" + } + return ";" + case "tweakDx": if d.dType == "*image.RGBA" { return strings.Replace(suffix, "dx++", "dx, d = dx+1, d+4", 1) @@ -444,7 +466,14 @@ const ( return } d2s := invert(s2d) - $switch z.transform_$dTypeRN_$sTypeRN(dst, dr, adr, &d2s, src, sr) + // sr is the source pixels. If it extends beyond the src bounds, + // we cannot use the type-specific fast paths, as they access + // the Pix fields directly without bounds checking. + if !sr.In(src.Bounds()) { + z.transform_Image_Image(dst, dr, adr, &d2s, src, sr) + } else { + $switch z.transform_$dTypeRN_$sTypeRN(dst, dr, adr, &d2s, src, sr) + } } ` @@ -475,6 +504,7 @@ const ( $preInner $tweakDx for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { dxf := float64(dr.Min.X + int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2])) sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5])) if !(image.Point{sx0, sy0}).In(sr) { @@ -549,6 +579,7 @@ const ( $preInner $tweakDx for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { dxf := float64(dr.Min.X + int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { @@ -625,8 +656,32 @@ const ( $switchD z.scaleY_$dTypeRN(dst, dr, adr, tmp) } - func (z *Kernel) Transform(dst Image, m *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) { - panic("unimplemented") + func (q *Kernel) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) { + dr := transformRect(s2d, &sr) + // adr is the affected destination pixels, relative to dr.Min. + adr := dst.Bounds().Intersect(dr).Sub(dr.Min) + if adr.Empty() || sr.Empty() { + return + } + d2s := invert(s2d) + + xscale := abs(d2s[0]) + if s := abs(d2s[1]); xscale < s { + xscale = s + } + yscale := abs(d2s[3]) + if s := abs(d2s[4]); yscale < s { + yscale = s + } + + // sr is the source pixels. If it extends beyond the src bounds, + // we cannot use the type-specific fast paths, as they access + // the Pix fields directly without bounds checking. + if !sr.In(src.Bounds()) { + q.transform_Image_Image(dst, dr, adr, &d2s, src, sr, xscale, yscale) + } else { + $switch q.transform_$dTypeRN_$sTypeRN(dst, dr, adr, &d2s, src, sr, xscale, yscale) + } } ` @@ -665,7 +720,98 @@ const ( pb += p[2] * c.weight pa += p[3] * c.weight } - $outputf[dr.Min.X + int(dx), dr.Min.Y + int(adr.Min.Y + dy), p, s.invTotalWeight] + $outputf[dr.Min.X + int(dx), dr.Min.Y + int(adr.Min.Y + dy), ftou, p, s.invTotalWeight] + $tweakD + } + } + } + ` + + codeKernelTransformLeaf = ` + func (q *Kernel) transform_$dTypeRN_$sTypeRN(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle, xscale, yscale float64) { + // When shrinking, broaden the effective kernel support so that we still + // visit every source pixel. + xHalfWidth, xKernelArgScale := q.Support, 1.0 + if xscale > 1 { + xHalfWidth *= xscale + xKernelArgScale = 1 / xscale + } + yHalfWidth, yKernelArgScale := q.Support, 1.0 + if yscale > 1 { + yHalfWidth *= yscale + yKernelArgScale = 1 / yscale + } + + xWeights := make([]float64, 1 + 2*int(math.Ceil(xHalfWidth))) + yWeights := make([]float64, 1 + 2*int(math.Ceil(yHalfWidth))) + + $preOuter + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y + int(dy)) + 0.5 + $preInner + $tweakDx for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { + dxf := float64(dr.Min.X + int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). + sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] + sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { + continue + } + + sx -= 0.5 + ix := int(math.Floor(sx - xHalfWidth)) + if ix < sr.Min.X { + ix = sr.Min.X + } + jx := int(math.Ceil(sx + xHalfWidth)) + if jx > sr.Max.X { + jx = sr.Max.X + } + + totalXWeight := 0.0 + for kx := ix; kx < jx; kx++ { + xWeight := 0.0 + if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support { + xWeight = q.At(t) + } + xWeights[kx - ix] = xWeight + totalXWeight += xWeight + } + for x := range xWeights[:jx-ix] { + xWeights[x] /= totalXWeight + } + + sy -= 0.5 + iy := int(math.Floor(sy - yHalfWidth)) + if iy < sr.Min.Y { + iy = sr.Min.Y + } + jy := int(math.Ceil(sy + yHalfWidth)) + if jy > sr.Max.Y { + jy = sr.Max.Y + } + + totalYWeight := 0.0 + for ky := iy; ky < jy; ky++ { + yWeight := 0.0 + if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support { + yWeight = q.At(t) + } + yWeights[ky - iy] = yWeight + totalYWeight += yWeight + } + for y := range yWeights[:jy-iy] { + yWeights[y] /= totalYWeight + } + + var pr, pg, pb, pa float64 + for ky := iy; ky < jy; ky++ { + yWeight := yWeights[ky - iy] + for kx := ix; kx < jx; kx++ { + p += $srcf[kx, ky] * xWeights[kx - ix] * yWeight + } + } + $outputf[dr.Min.X + int(dx), dr.Min.Y + int(dy), fffftou, p, 1] } } } diff --git a/draw/impl.go b/draw/impl.go index e6f215a..8ebdd6f 100644 --- a/draw/impl.go +++ b/draw/impl.go @@ -55,26 +55,33 @@ func (z nnInterpolator) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr return } d2s := invert(s2d) - switch dst := dst.(type) { - case *image.RGBA: - switch src := src.(type) { - case *image.Gray: - z.transform_RGBA_Gray(dst, dr, adr, &d2s, src, sr) - case *image.NRGBA: - z.transform_RGBA_NRGBA(dst, dr, adr, &d2s, src, sr) + // sr is the source pixels. If it extends beyond the src bounds, + // we cannot use the type-specific fast paths, as they access + // the Pix fields directly without bounds checking. + if !sr.In(src.Bounds()) { + z.transform_Image_Image(dst, dr, adr, &d2s, src, sr) + } else { + switch dst := dst.(type) { case *image.RGBA: - z.transform_RGBA_RGBA(dst, dr, adr, &d2s, src, sr) - case *image.Uniform: - z.transform_RGBA_Uniform(dst, dr, adr, &d2s, src, sr) - case *image.YCbCr: - z.transform_RGBA_YCbCr(dst, dr, adr, &d2s, src, sr) + switch src := src.(type) { + case *image.Gray: + z.transform_RGBA_Gray(dst, dr, adr, &d2s, src, sr) + case *image.NRGBA: + z.transform_RGBA_NRGBA(dst, dr, adr, &d2s, src, sr) + case *image.RGBA: + z.transform_RGBA_RGBA(dst, dr, adr, &d2s, src, sr) + case *image.Uniform: + z.transform_RGBA_Uniform(dst, dr, adr, &d2s, src, sr) + case *image.YCbCr: + z.transform_RGBA_YCbCr(dst, dr, adr, &d2s, src, sr) + default: + z.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr) + } default: - z.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr) - } - default: - switch src := src.(type) { - default: - z.transform_Image_Image(dst, dr, adr, &d2s, src, sr) + switch src := src.(type) { + default: + z.transform_Image_Image(dst, dr, adr, &d2s, src, sr) + } } } } @@ -224,6 +231,7 @@ func (nnInterpolator) transform_RGBA_Gray(dst *image.RGBA, dr, adr image.Rectang d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2])) sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5])) if !(image.Point{sx0, sy0}).In(sr) { @@ -244,6 +252,7 @@ func (nnInterpolator) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Rectan d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2])) sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5])) if !(image.Point{sx0, sy0}).In(sr) { @@ -264,6 +273,7 @@ func (nnInterpolator) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectang d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2])) sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5])) if !(image.Point{sx0, sy0}).In(sr) { @@ -288,6 +298,7 @@ func (nnInterpolator) transform_RGBA_Uniform(dst *image.RGBA, dr, adr image.Rect d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2])) sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5])) if !(image.Point{sx0, sy0}).In(sr) { @@ -308,6 +319,7 @@ func (nnInterpolator) transform_RGBA_YCbCr(dst *image.RGBA, dr, adr image.Rectan d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2])) sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5])) if !(image.Point{sx0, sy0}).In(sr) { @@ -328,6 +340,7 @@ func (nnInterpolator) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Rectan d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2])) sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5])) if !(image.Point{sx0, sy0}).In(sr) { @@ -349,6 +362,7 @@ func (nnInterpolator) transform_Image_Image(dst Image, dr, adr image.Rectangle, dyf := float64(dr.Min.Y+int(dy)) + 0.5 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2])) sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5])) if !(image.Point{sx0, sy0}).In(sr) { @@ -409,26 +423,33 @@ func (z ablInterpolator) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr return } d2s := invert(s2d) - switch dst := dst.(type) { - case *image.RGBA: - switch src := src.(type) { - case *image.Gray: - z.transform_RGBA_Gray(dst, dr, adr, &d2s, src, sr) - case *image.NRGBA: - z.transform_RGBA_NRGBA(dst, dr, adr, &d2s, src, sr) + // sr is the source pixels. If it extends beyond the src bounds, + // we cannot use the type-specific fast paths, as they access + // the Pix fields directly without bounds checking. + if !sr.In(src.Bounds()) { + z.transform_Image_Image(dst, dr, adr, &d2s, src, sr) + } else { + switch dst := dst.(type) { case *image.RGBA: - z.transform_RGBA_RGBA(dst, dr, adr, &d2s, src, sr) - case *image.Uniform: - z.transform_RGBA_Uniform(dst, dr, adr, &d2s, src, sr) - case *image.YCbCr: - z.transform_RGBA_YCbCr(dst, dr, adr, &d2s, src, sr) + switch src := src.(type) { + case *image.Gray: + z.transform_RGBA_Gray(dst, dr, adr, &d2s, src, sr) + case *image.NRGBA: + z.transform_RGBA_NRGBA(dst, dr, adr, &d2s, src, sr) + case *image.RGBA: + z.transform_RGBA_RGBA(dst, dr, adr, &d2s, src, sr) + case *image.Uniform: + z.transform_RGBA_Uniform(dst, dr, adr, &d2s, src, sr) + case *image.YCbCr: + z.transform_RGBA_YCbCr(dst, dr, adr, &d2s, src, sr) + default: + z.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr) + } default: - z.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr) - } - default: - switch src := src.(type) { - default: - z.transform_Image_Image(dst, dr, adr, &d2s, src, sr) + switch src := src.(type) { + default: + z.transform_Image_Image(dst, dr, adr, &d2s, src, sr) + } } } } @@ -1010,6 +1031,7 @@ func (ablInterpolator) transform_RGBA_Gray(dst *image.RGBA, dr, adr image.Rectan d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { @@ -1090,6 +1112,7 @@ func (ablInterpolator) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Recta d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { @@ -1170,6 +1193,7 @@ func (ablInterpolator) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectan d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { @@ -1266,6 +1290,7 @@ func (ablInterpolator) transform_RGBA_Uniform(dst *image.RGBA, dr, adr image.Rec d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { @@ -1346,6 +1371,7 @@ func (ablInterpolator) transform_RGBA_YCbCr(dst *image.RGBA, dr, adr image.Recta d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { @@ -1426,6 +1452,7 @@ func (ablInterpolator) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Recta d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { @@ -1507,6 +1534,7 @@ func (ablInterpolator) transform_Image_Image(dst Image, dr, adr image.Rectangle, dyf := float64(dr.Min.Y+int(dy)) + 0.5 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { @@ -1628,8 +1656,53 @@ func (z *kernelScaler) Scale(dst Image, dr image.Rectangle, src image.Image, sr } } -func (z *Kernel) Transform(dst Image, m *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) { - panic("unimplemented") +func (q *Kernel) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) { + dr := transformRect(s2d, &sr) + // adr is the affected destination pixels, relative to dr.Min. + adr := dst.Bounds().Intersect(dr).Sub(dr.Min) + if adr.Empty() || sr.Empty() { + return + } + d2s := invert(s2d) + + xscale := abs(d2s[0]) + if s := abs(d2s[1]); xscale < s { + xscale = s + } + yscale := abs(d2s[3]) + if s := abs(d2s[4]); yscale < s { + yscale = s + } + + // sr is the source pixels. If it extends beyond the src bounds, + // we cannot use the type-specific fast paths, as they access + // the Pix fields directly without bounds checking. + if !sr.In(src.Bounds()) { + q.transform_Image_Image(dst, dr, adr, &d2s, src, sr, xscale, yscale) + } else { + switch dst := dst.(type) { + case *image.RGBA: + switch src := src.(type) { + case *image.Gray: + q.transform_RGBA_Gray(dst, dr, adr, &d2s, src, sr, xscale, yscale) + case *image.NRGBA: + q.transform_RGBA_NRGBA(dst, dr, adr, &d2s, src, sr, xscale, yscale) + case *image.RGBA: + q.transform_RGBA_RGBA(dst, dr, adr, &d2s, src, sr, xscale, yscale) + case *image.Uniform: + q.transform_RGBA_Uniform(dst, dr, adr, &d2s, src, sr, xscale, yscale) + case *image.YCbCr: + q.transform_RGBA_YCbCr(dst, dr, adr, &d2s, src, sr, xscale, yscale) + default: + q.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr, xscale, yscale) + } + default: + switch src := src.(type) { + default: + q.transform_Image_Image(dst, dr, adr, &d2s, src, sr, xscale, yscale) + } + } + } } func (z *kernelScaler) scaleX_Gray(tmp [][4]float64, src *image.Gray, sr image.Rectangle) { @@ -1816,3 +1889,667 @@ func (z *kernelScaler) scaleY_Image(dst Image, dr, adr image.Rectangle, tmp [][4 } } } + +func (q *Kernel) transform_RGBA_Gray(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle, xscale, yscale float64) { + // When shrinking, broaden the effective kernel support so that we still + // visit every source pixel. + xHalfWidth, xKernelArgScale := q.Support, 1.0 + if xscale > 1 { + xHalfWidth *= xscale + xKernelArgScale = 1 / xscale + } + yHalfWidth, yKernelArgScale := q.Support, 1.0 + if yscale > 1 { + yHalfWidth *= yscale + yKernelArgScale = 1 / yscale + } + + xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth))) + yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth))) + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). + sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] + sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { + continue + } + + sx -= 0.5 + ix := int(math.Floor(sx - xHalfWidth)) + if ix < sr.Min.X { + ix = sr.Min.X + } + jx := int(math.Ceil(sx + xHalfWidth)) + if jx > sr.Max.X { + jx = sr.Max.X + } + + totalXWeight := 0.0 + for kx := ix; kx < jx; kx++ { + xWeight := 0.0 + if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support { + xWeight = q.At(t) + } + xWeights[kx-ix] = xWeight + totalXWeight += xWeight + } + for x := range xWeights[:jx-ix] { + xWeights[x] /= totalXWeight + } + + sy -= 0.5 + iy := int(math.Floor(sy - yHalfWidth)) + if iy < sr.Min.Y { + iy = sr.Min.Y + } + jy := int(math.Ceil(sy + yHalfWidth)) + if jy > sr.Max.Y { + jy = sr.Max.Y + } + + totalYWeight := 0.0 + for ky := iy; ky < jy; ky++ { + yWeight := 0.0 + if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support { + yWeight = q.At(t) + } + yWeights[ky-iy] = yWeight + totalYWeight += yWeight + } + for y := range yWeights[:jy-iy] { + yWeights[y] /= totalYWeight + } + + var pr, pg, pb, pa float64 + for ky := iy; ky < jy; ky++ { + yWeight := yWeights[ky-iy] + for kx := ix; kx < jx; kx++ { + pru, pgu, pbu, pau := src.At(kx, ky).RGBA() + pr += float64(pru) * xWeights[kx-ix] * yWeight + pg += float64(pgu) * xWeights[kx-ix] * yWeight + pb += float64(pbu) * xWeights[kx-ix] * yWeight + pa += float64(pau) * xWeights[kx-ix] * yWeight + } + } + dst.Pix[d+0] = uint8(fffftou(pr) >> 8) + dst.Pix[d+1] = uint8(fffftou(pg) >> 8) + dst.Pix[d+2] = uint8(fffftou(pb) >> 8) + dst.Pix[d+3] = uint8(fffftou(pa) >> 8) + } + } +} + +func (q *Kernel) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.NRGBA, sr image.Rectangle, xscale, yscale float64) { + // When shrinking, broaden the effective kernel support so that we still + // visit every source pixel. + xHalfWidth, xKernelArgScale := q.Support, 1.0 + if xscale > 1 { + xHalfWidth *= xscale + xKernelArgScale = 1 / xscale + } + yHalfWidth, yKernelArgScale := q.Support, 1.0 + if yscale > 1 { + yHalfWidth *= yscale + yKernelArgScale = 1 / yscale + } + + xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth))) + yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth))) + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). + sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] + sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { + continue + } + + sx -= 0.5 + ix := int(math.Floor(sx - xHalfWidth)) + if ix < sr.Min.X { + ix = sr.Min.X + } + jx := int(math.Ceil(sx + xHalfWidth)) + if jx > sr.Max.X { + jx = sr.Max.X + } + + totalXWeight := 0.0 + for kx := ix; kx < jx; kx++ { + xWeight := 0.0 + if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support { + xWeight = q.At(t) + } + xWeights[kx-ix] = xWeight + totalXWeight += xWeight + } + for x := range xWeights[:jx-ix] { + xWeights[x] /= totalXWeight + } + + sy -= 0.5 + iy := int(math.Floor(sy - yHalfWidth)) + if iy < sr.Min.Y { + iy = sr.Min.Y + } + jy := int(math.Ceil(sy + yHalfWidth)) + if jy > sr.Max.Y { + jy = sr.Max.Y + } + + totalYWeight := 0.0 + for ky := iy; ky < jy; ky++ { + yWeight := 0.0 + if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support { + yWeight = q.At(t) + } + yWeights[ky-iy] = yWeight + totalYWeight += yWeight + } + for y := range yWeights[:jy-iy] { + yWeights[y] /= totalYWeight + } + + var pr, pg, pb, pa float64 + for ky := iy; ky < jy; ky++ { + yWeight := yWeights[ky-iy] + for kx := ix; kx < jx; kx++ { + pru, pgu, pbu, pau := src.At(kx, ky).RGBA() + pr += float64(pru) * xWeights[kx-ix] * yWeight + pg += float64(pgu) * xWeights[kx-ix] * yWeight + pb += float64(pbu) * xWeights[kx-ix] * yWeight + pa += float64(pau) * xWeights[kx-ix] * yWeight + } + } + dst.Pix[d+0] = uint8(fffftou(pr) >> 8) + dst.Pix[d+1] = uint8(fffftou(pg) >> 8) + dst.Pix[d+2] = uint8(fffftou(pb) >> 8) + dst.Pix[d+3] = uint8(fffftou(pa) >> 8) + } + } +} + +func (q *Kernel) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.RGBA, sr image.Rectangle, xscale, yscale float64) { + // When shrinking, broaden the effective kernel support so that we still + // visit every source pixel. + xHalfWidth, xKernelArgScale := q.Support, 1.0 + if xscale > 1 { + xHalfWidth *= xscale + xKernelArgScale = 1 / xscale + } + yHalfWidth, yKernelArgScale := q.Support, 1.0 + if yscale > 1 { + yHalfWidth *= yscale + yKernelArgScale = 1 / yscale + } + + xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth))) + yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth))) + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). + sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] + sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { + continue + } + + sx -= 0.5 + ix := int(math.Floor(sx - xHalfWidth)) + if ix < sr.Min.X { + ix = sr.Min.X + } + jx := int(math.Ceil(sx + xHalfWidth)) + if jx > sr.Max.X { + jx = sr.Max.X + } + + totalXWeight := 0.0 + for kx := ix; kx < jx; kx++ { + xWeight := 0.0 + if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support { + xWeight = q.At(t) + } + xWeights[kx-ix] = xWeight + totalXWeight += xWeight + } + for x := range xWeights[:jx-ix] { + xWeights[x] /= totalXWeight + } + + sy -= 0.5 + iy := int(math.Floor(sy - yHalfWidth)) + if iy < sr.Min.Y { + iy = sr.Min.Y + } + jy := int(math.Ceil(sy + yHalfWidth)) + if jy > sr.Max.Y { + jy = sr.Max.Y + } + + totalYWeight := 0.0 + for ky := iy; ky < jy; ky++ { + yWeight := 0.0 + if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support { + yWeight = q.At(t) + } + yWeights[ky-iy] = yWeight + totalYWeight += yWeight + } + for y := range yWeights[:jy-iy] { + yWeights[y] /= totalYWeight + } + + var pr, pg, pb, pa float64 + for ky := iy; ky < jy; ky++ { + yWeight := yWeights[ky-iy] + for kx := ix; kx < jx; kx++ { + pi := src.PixOffset(kx, ky) + pru := uint32(src.Pix[pi+0]) * 0x101 + pgu := uint32(src.Pix[pi+1]) * 0x101 + pbu := uint32(src.Pix[pi+2]) * 0x101 + pau := uint32(src.Pix[pi+3]) * 0x101 + pr += float64(pru) * xWeights[kx-ix] * yWeight + pg += float64(pgu) * xWeights[kx-ix] * yWeight + pb += float64(pbu) * xWeights[kx-ix] * yWeight + pa += float64(pau) * xWeights[kx-ix] * yWeight + } + } + dst.Pix[d+0] = uint8(fffftou(pr) >> 8) + dst.Pix[d+1] = uint8(fffftou(pg) >> 8) + dst.Pix[d+2] = uint8(fffftou(pb) >> 8) + dst.Pix[d+3] = uint8(fffftou(pa) >> 8) + } + } +} + +func (q *Kernel) transform_RGBA_Uniform(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Uniform, sr image.Rectangle, xscale, yscale float64) { + // When shrinking, broaden the effective kernel support so that we still + // visit every source pixel. + xHalfWidth, xKernelArgScale := q.Support, 1.0 + if xscale > 1 { + xHalfWidth *= xscale + xKernelArgScale = 1 / xscale + } + yHalfWidth, yKernelArgScale := q.Support, 1.0 + if yscale > 1 { + yHalfWidth *= yscale + yKernelArgScale = 1 / yscale + } + + xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth))) + yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth))) + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). + sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] + sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { + continue + } + + sx -= 0.5 + ix := int(math.Floor(sx - xHalfWidth)) + if ix < sr.Min.X { + ix = sr.Min.X + } + jx := int(math.Ceil(sx + xHalfWidth)) + if jx > sr.Max.X { + jx = sr.Max.X + } + + totalXWeight := 0.0 + for kx := ix; kx < jx; kx++ { + xWeight := 0.0 + if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support { + xWeight = q.At(t) + } + xWeights[kx-ix] = xWeight + totalXWeight += xWeight + } + for x := range xWeights[:jx-ix] { + xWeights[x] /= totalXWeight + } + + sy -= 0.5 + iy := int(math.Floor(sy - yHalfWidth)) + if iy < sr.Min.Y { + iy = sr.Min.Y + } + jy := int(math.Ceil(sy + yHalfWidth)) + if jy > sr.Max.Y { + jy = sr.Max.Y + } + + totalYWeight := 0.0 + for ky := iy; ky < jy; ky++ { + yWeight := 0.0 + if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support { + yWeight = q.At(t) + } + yWeights[ky-iy] = yWeight + totalYWeight += yWeight + } + for y := range yWeights[:jy-iy] { + yWeights[y] /= totalYWeight + } + + var pr, pg, pb, pa float64 + for ky := iy; ky < jy; ky++ { + yWeight := yWeights[ky-iy] + for kx := ix; kx < jx; kx++ { + pru, pgu, pbu, pau := src.At(kx, ky).RGBA() + pr += float64(pru) * xWeights[kx-ix] * yWeight + pg += float64(pgu) * xWeights[kx-ix] * yWeight + pb += float64(pbu) * xWeights[kx-ix] * yWeight + pa += float64(pau) * xWeights[kx-ix] * yWeight + } + } + dst.Pix[d+0] = uint8(fffftou(pr) >> 8) + dst.Pix[d+1] = uint8(fffftou(pg) >> 8) + dst.Pix[d+2] = uint8(fffftou(pb) >> 8) + dst.Pix[d+3] = uint8(fffftou(pa) >> 8) + } + } +} + +func (q *Kernel) transform_RGBA_YCbCr(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, xscale, yscale float64) { + // When shrinking, broaden the effective kernel support so that we still + // visit every source pixel. + xHalfWidth, xKernelArgScale := q.Support, 1.0 + if xscale > 1 { + xHalfWidth *= xscale + xKernelArgScale = 1 / xscale + } + yHalfWidth, yKernelArgScale := q.Support, 1.0 + if yscale > 1 { + yHalfWidth *= yscale + yKernelArgScale = 1 / yscale + } + + xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth))) + yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth))) + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). + sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] + sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { + continue + } + + sx -= 0.5 + ix := int(math.Floor(sx - xHalfWidth)) + if ix < sr.Min.X { + ix = sr.Min.X + } + jx := int(math.Ceil(sx + xHalfWidth)) + if jx > sr.Max.X { + jx = sr.Max.X + } + + totalXWeight := 0.0 + for kx := ix; kx < jx; kx++ { + xWeight := 0.0 + if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support { + xWeight = q.At(t) + } + xWeights[kx-ix] = xWeight + totalXWeight += xWeight + } + for x := range xWeights[:jx-ix] { + xWeights[x] /= totalXWeight + } + + sy -= 0.5 + iy := int(math.Floor(sy - yHalfWidth)) + if iy < sr.Min.Y { + iy = sr.Min.Y + } + jy := int(math.Ceil(sy + yHalfWidth)) + if jy > sr.Max.Y { + jy = sr.Max.Y + } + + totalYWeight := 0.0 + for ky := iy; ky < jy; ky++ { + yWeight := 0.0 + if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support { + yWeight = q.At(t) + } + yWeights[ky-iy] = yWeight + totalYWeight += yWeight + } + for y := range yWeights[:jy-iy] { + yWeights[y] /= totalYWeight + } + + var pr, pg, pb, pa float64 + for ky := iy; ky < jy; ky++ { + yWeight := yWeights[ky-iy] + for kx := ix; kx < jx; kx++ { + pru, pgu, pbu, pau := src.At(kx, ky).RGBA() + pr += float64(pru) * xWeights[kx-ix] * yWeight + pg += float64(pgu) * xWeights[kx-ix] * yWeight + pb += float64(pbu) * xWeights[kx-ix] * yWeight + pa += float64(pau) * xWeights[kx-ix] * yWeight + } + } + dst.Pix[d+0] = uint8(fffftou(pr) >> 8) + dst.Pix[d+1] = uint8(fffftou(pg) >> 8) + dst.Pix[d+2] = uint8(fffftou(pb) >> 8) + dst.Pix[d+3] = uint8(fffftou(pa) >> 8) + } + } +} + +func (q *Kernel) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, xscale, yscale float64) { + // When shrinking, broaden the effective kernel support so that we still + // visit every source pixel. + xHalfWidth, xKernelArgScale := q.Support, 1.0 + if xscale > 1 { + xHalfWidth *= xscale + xKernelArgScale = 1 / xscale + } + yHalfWidth, yKernelArgScale := q.Support, 1.0 + if yscale > 1 { + yHalfWidth *= yscale + yKernelArgScale = 1 / yscale + } + + xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth))) + yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth))) + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). + sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] + sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { + continue + } + + sx -= 0.5 + ix := int(math.Floor(sx - xHalfWidth)) + if ix < sr.Min.X { + ix = sr.Min.X + } + jx := int(math.Ceil(sx + xHalfWidth)) + if jx > sr.Max.X { + jx = sr.Max.X + } + + totalXWeight := 0.0 + for kx := ix; kx < jx; kx++ { + xWeight := 0.0 + if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support { + xWeight = q.At(t) + } + xWeights[kx-ix] = xWeight + totalXWeight += xWeight + } + for x := range xWeights[:jx-ix] { + xWeights[x] /= totalXWeight + } + + sy -= 0.5 + iy := int(math.Floor(sy - yHalfWidth)) + if iy < sr.Min.Y { + iy = sr.Min.Y + } + jy := int(math.Ceil(sy + yHalfWidth)) + if jy > sr.Max.Y { + jy = sr.Max.Y + } + + totalYWeight := 0.0 + for ky := iy; ky < jy; ky++ { + yWeight := 0.0 + if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support { + yWeight = q.At(t) + } + yWeights[ky-iy] = yWeight + totalYWeight += yWeight + } + for y := range yWeights[:jy-iy] { + yWeights[y] /= totalYWeight + } + + var pr, pg, pb, pa float64 + for ky := iy; ky < jy; ky++ { + yWeight := yWeights[ky-iy] + for kx := ix; kx < jx; kx++ { + pru, pgu, pbu, pau := src.At(kx, ky).RGBA() + pr += float64(pru) * xWeights[kx-ix] * yWeight + pg += float64(pgu) * xWeights[kx-ix] * yWeight + pb += float64(pbu) * xWeights[kx-ix] * yWeight + pa += float64(pau) * xWeights[kx-ix] * yWeight + } + } + dst.Pix[d+0] = uint8(fffftou(pr) >> 8) + dst.Pix[d+1] = uint8(fffftou(pg) >> 8) + dst.Pix[d+2] = uint8(fffftou(pb) >> 8) + dst.Pix[d+3] = uint8(fffftou(pa) >> 8) + } + } +} + +func (q *Kernel) transform_Image_Image(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, xscale, yscale float64) { + // When shrinking, broaden the effective kernel support so that we still + // visit every source pixel. + xHalfWidth, xKernelArgScale := q.Support, 1.0 + if xscale > 1 { + xHalfWidth *= xscale + xKernelArgScale = 1 / xscale + } + yHalfWidth, yKernelArgScale := q.Support, 1.0 + if yscale > 1 { + yHalfWidth *= yscale + yKernelArgScale = 1 / yscale + } + + xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth))) + yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth))) + + dstColorRGBA64 := &color.RGBA64{} + dstColor := color.Color(dstColorRGBA64) + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + // TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)). + sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] + sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) { + continue + } + + sx -= 0.5 + ix := int(math.Floor(sx - xHalfWidth)) + if ix < sr.Min.X { + ix = sr.Min.X + } + jx := int(math.Ceil(sx + xHalfWidth)) + if jx > sr.Max.X { + jx = sr.Max.X + } + + totalXWeight := 0.0 + for kx := ix; kx < jx; kx++ { + xWeight := 0.0 + if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support { + xWeight = q.At(t) + } + xWeights[kx-ix] = xWeight + totalXWeight += xWeight + } + for x := range xWeights[:jx-ix] { + xWeights[x] /= totalXWeight + } + + sy -= 0.5 + iy := int(math.Floor(sy - yHalfWidth)) + if iy < sr.Min.Y { + iy = sr.Min.Y + } + jy := int(math.Ceil(sy + yHalfWidth)) + if jy > sr.Max.Y { + jy = sr.Max.Y + } + + totalYWeight := 0.0 + for ky := iy; ky < jy; ky++ { + yWeight := 0.0 + if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support { + yWeight = q.At(t) + } + yWeights[ky-iy] = yWeight + totalYWeight += yWeight + } + for y := range yWeights[:jy-iy] { + yWeights[y] /= totalYWeight + } + + var pr, pg, pb, pa float64 + for ky := iy; ky < jy; ky++ { + yWeight := yWeights[ky-iy] + for kx := ix; kx < jx; kx++ { + pru, pgu, pbu, pau := src.At(kx, ky).RGBA() + pr += float64(pru) * xWeights[kx-ix] * yWeight + pg += float64(pgu) * xWeights[kx-ix] * yWeight + pb += float64(pbu) * xWeights[kx-ix] * yWeight + pa += float64(pau) * xWeights[kx-ix] * yWeight + } + } + dstColorRGBA64.R = fffftou(pr) + dstColorRGBA64.G = fffftou(pg) + dstColorRGBA64.B = fffftou(pb) + dstColorRGBA64.A = fffftou(pa) + dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor) + } + } +} diff --git a/draw/scale.go b/draw/scale.go index 4f94086..d570f6e 100644 --- a/draw/scale.go +++ b/draw/scale.go @@ -86,21 +86,21 @@ type Kernel struct { } // Scale implements the Scaler interface. -func (k *Kernel) Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) { - k.NewScaler(dr.Dx(), dr.Dy(), sr.Dx(), sr.Dy()).Scale(dst, dr, src, sr, opts) +func (q *Kernel) Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) { + q.NewScaler(dr.Dx(), dr.Dy(), sr.Dx(), sr.Dy()).Scale(dst, dr, src, sr, opts) } // NewScaler returns a Scaler that is optimized for scaling multiple times with // the same fixed destination and source width and height. -func (k *Kernel) NewScaler(dw, dh, sw, sh int) Scaler { +func (q *Kernel) NewScaler(dw, dh, sw, sh int) Scaler { return &kernelScaler{ - kernel: k, + kernel: q, dw: int32(dw), dh: int32(dh), sw: int32(sw), sh: int32(sh), - horizontal: newDistrib(k, int32(dw), int32(sw)), - vertical: newDistrib(k, int32(dh), int32(sh)), + horizontal: newDistrib(q, int32(dw), int32(sw)), + vertical: newDistrib(q, int32(dh), int32(sh)), } } @@ -181,6 +181,8 @@ type distrib struct { func newDistrib(q *Kernel, dw, sw int32) distrib { scale := float64(sw) / float64(dw) halfWidth, kernelArgScale := q.Support, 1.0 + // When shrinking, broaden the effective kernel support so that we still + // visit every source pixel. if scale > 1 { halfWidth *= scale kernelArgScale = 1 / scale @@ -199,25 +201,22 @@ func newDistrib(q *Kernel, dw, sw int32) distrib { i = 0 } j := int32(math.Ceil(center + halfWidth)) - if j >= sw { - j = sw - 1 + if j > sw { + j = sw if j < i { j = i } } sources[x] = source{i: i, j: j, invTotalWeight: center} - n += j - i + 1 + n += j - i } contribs := make([]contrib, 0, n) for k, b := range sources { totalWeight := 0.0 l := int32(len(contribs)) - for coord := b.i; coord <= b.j; coord++ { - t := (b.invTotalWeight - float64(coord)) * kernelArgScale - if t < 0 { - t = -t - } + for coord := b.i; coord < b.j; coord++ { + t := abs((b.invTotalWeight - float64(coord)) * kernelArgScale) if t >= q.Support { continue } @@ -240,11 +239,34 @@ func newDistrib(q *Kernel, dw, sw int32) distrib { return distrib{sources, contribs} } +// abs is like math.Abs, but it doesn't care about negative zero, infinities or +// NaNs. +func abs(f float64) float64 { + if f < 0 { + f = -f + } + return f +} + +// ftou converts the range [0.0, 1.0] to [0, 0xffff]. func ftou(f float64) uint16 { i := int32(0xffff*f + 0.5) if i > 0xffff { return 0xffff - } else if i > 0 { + } + if i > 0 { + return uint16(i) + } + return 0 +} + +// fffftou converts the range [0.0, 65535.0] to [0, 0xffff]. +func fffftou(f float64) uint16 { + i := int32(f + 0.5) + if i > 0xffff { + return 0xffff + } + if i > 0 { return uint16(i) } return 0 @@ -275,6 +297,17 @@ func invert(m *f64.Aff3) f64.Aff3 { } } +func matMul(p, q *f64.Aff3) f64.Aff3 { + return f64.Aff3{ + p[3*0+0]*q[3*0+0] + p[3*0+1]*q[3*1+0], + p[3*0+0]*q[3*0+1] + p[3*0+1]*q[3*1+1], + p[3*0+0]*q[3*0+2] + p[3*0+1]*q[3*1+2] + p[3*0+2], + p[3*1+0]*q[3*0+0] + p[3*1+1]*q[3*1+0], + p[3*1+0]*q[3*0+1] + p[3*1+1]*q[3*1+1], + p[3*1+0]*q[3*0+2] + p[3*1+1]*q[3*1+2] + p[3*1+2], + } +} + // transformRect returns a rectangle dr that contains sr transformed by s2d. func transformRect(s2d *f64.Aff3, sr *image.Rectangle) (dr image.Rectangle) { ps := [...]image.Point{ diff --git a/draw/scale_test.go b/draw/scale_test.go index c15810e..4ed60d6 100644 --- a/draw/scale_test.go +++ b/draw/scale_test.go @@ -23,13 +23,25 @@ import ( var genGoldenFiles = flag.Bool("gen_golden_files", false, "whether to generate the TestXxx golden files.") -var transformMatrix = func() *f64.Aff3 { +var transformMatrix = func(tx, ty float64) *f64.Aff3 { const scale, cos30, sin30 = 3.75, 0.866025404, 0.5 return &f64.Aff3{ - +scale * cos30, -scale * sin30, 40, - +scale * sin30, +scale * cos30, 10, + +scale * cos30, -scale * sin30, tx, + +scale * sin30, +scale * cos30, ty, } -}() +} + +func encode(filename string, m image.Image) error { + f, err := os.Create(filename) + if err != nil { + return fmt.Errorf("Create: %v", err) + } + defer f.Close() + if err := png.Encode(f, m); err != nil { + return fmt.Errorf("Encode: %v", err) + } + return nil +} // testInterp tests that interpolating the source image gives the exact // destination image. This is to ensure that any refactoring or optimization of @@ -58,25 +70,14 @@ func testInterp(t *testing.T, w int, h int, direction, srcFilename string) { got := image.NewRGBA(image.Rect(0, 0, w, h)) if direction == "rotate" { - if name == "bl" || name == "cr" { - // TODO: implement Kernel.Transform. - continue - } - q.Transform(got, transformMatrix, src, src.Bounds(), nil) + q.Transform(got, transformMatrix(40, 10), src, src.Bounds(), nil) } else { q.Scale(got, got.Bounds(), src, src.Bounds(), nil) } if *genGoldenFiles { - g, err := os.Create(goldenFilename) - if err != nil { - t.Errorf("Create: %v", err) - continue - } - defer g.Close() - if err := png.Encode(g, got); err != nil { - t.Errorf("Encode: %v", err) - continue + if err := encode(goldenFilename, got); err != nil { + t.Error(err) } continue } @@ -132,11 +133,6 @@ func TestInterpClipCommute(t *testing.T) { } for _, transform := range []bool{false, true} { for _, q := range qs { - if transform && q == CatmullRom { - // TODO: implement Kernel.Transform. - continue - } - dst0 := image.NewRGBA(image.Rect(1, 1, 10, 10)) dst1 := image.NewRGBA(image.Rect(1, 1, 10, 10)) for i := range dst0.Pix { @@ -147,7 +143,7 @@ func TestInterpClipCommute(t *testing.T) { var interp func(dst *image.RGBA) if transform { interp = func(dst *image.RGBA) { - q.Transform(dst, transformMatrix, src, src.Bounds(), nil) + q.Transform(dst, transformMatrix(2, 1), src, src.Bounds(), nil) } } else { interp = func(dst *image.RGBA) { @@ -216,20 +212,32 @@ func TestSrcTranslationInvariance(t *testing.T) { {-8, +8}, {-8, -8}, } + m00 := transformMatrix(0, 0) - for _, q := range qs { - want := image.NewRGBA(image.Rect(0, 0, 200, 200)) - q.Scale(want, want.Bounds(), src, src.Bounds(), nil) - for _, delta := range deltas { - tsrc := &translatedImage{src, delta} - - got := image.NewRGBA(image.Rect(0, 0, 200, 200)) - q.Scale(got, got.Bounds(), tsrc, tsrc.Bounds(), nil) - if !bytes.Equal(got.Pix, want.Pix) { - t.Errorf("pix differ for delta=%v, q=%T", delta, q) + for _, transform := range []bool{false, true} { + for _, q := range qs { + want := image.NewRGBA(image.Rect(0, 0, 200, 200)) + if transform { + q.Transform(want, m00, src, src.Bounds(), nil) + } else { + q.Scale(want, want.Bounds(), src, src.Bounds(), nil) + } + for _, delta := range deltas { + tsrc := &translatedImage{src, delta} + got := image.NewRGBA(image.Rect(0, 0, 200, 200)) + if transform { + m := matMul(m00, &f64.Aff3{ + 1, 0, -float64(delta.X), + 0, 1, -float64(delta.Y), + }) + q.Transform(got, &m, tsrc, tsrc.Bounds(), nil) + } else { + q.Scale(got, got.Bounds(), tsrc, tsrc.Bounds(), nil) + } + if !bytes.Equal(got.Pix, want.Pix) { + t.Errorf("pix differ for delta=%v, transform=%t, q=%T", delta, transform, q) + } } - - // TODO: Transform, once Kernel.Transform is implemented. } } } @@ -285,18 +293,27 @@ func TestFastPaths(t *testing.T) { for _, dr := range drs { for _, src := range srcs { for _, sr := range srs { - for _, q := range qs { - dst0 := image.NewRGBA(drs[0]) - dst1 := image.NewRGBA(drs[0]) - Draw(dst0, dst0.Bounds(), blue, image.Point{}, Src) - Draw(dstWrapper{dst1}, dst1.Bounds(), srcWrapper{blue}, image.Point{}, Src) - q.Scale(dst0, dr, src, sr, nil) - q.Scale(dstWrapper{dst1}, dr, srcWrapper{src}, sr, nil) - if !bytes.Equal(dst0.Pix, dst1.Pix) { - t.Errorf("pix differ for dr=%v, src=%T, sr=%v, q=%T", dr, src, sr, q) - } + for _, transform := range []bool{false, true} { + for _, q := range qs { + dst0 := image.NewRGBA(drs[0]) + dst1 := image.NewRGBA(drs[0]) + Draw(dst0, dst0.Bounds(), blue, image.Point{}, Src) + Draw(dstWrapper{dst1}, dst1.Bounds(), srcWrapper{blue}, image.Point{}, Src) - // TODO: Transform, once Kernel.Transform is implemented. + if transform { + m := transformMatrix(2, 1) + q.Transform(dst0, m, src, sr, nil) + q.Transform(dstWrapper{dst1}, m, srcWrapper{src}, sr, nil) + } else { + q.Scale(dst0, dr, src, sr, nil) + q.Scale(dstWrapper{dst1}, dr, srcWrapper{src}, sr, nil) + } + + if !bytes.Equal(dst0.Pix, dst1.Pix) { + t.Errorf("pix differ for dr=%v, src=%T, sr=%v, transform=%t, q=%T", + dr, src, sr, transform, q) + } + } } } } @@ -385,10 +402,11 @@ func benchTform(b *testing.B, srcf func(image.Rectangle) (image.Image, error), w b.Fatal(err) } sr := src.Bounds() + m := transformMatrix(40, 10) b.ResetTimer() for i := 0; i < b.N; i++ { - q.Transform(dst, transformMatrix, src, sr, nil) + q.Transform(dst, m, src, sr, nil) } } @@ -413,8 +431,14 @@ func BenchmarkScaleSrcRGBA(b *testing.B) { benchScale(b, srcRGBA, 200, 150, A func BenchmarkScaleSrcUniform(b *testing.B) { benchScale(b, srcUniform, 200, 150, ApproxBiLinear) } func BenchmarkScaleSrcYCbCr(b *testing.B) { benchScale(b, srcYCbCr, 200, 150, ApproxBiLinear) } -func BenchmarkTformSrcGray(b *testing.B) { benchTform(b, srcGray, 200, 150, ApproxBiLinear) } -func BenchmarkTformSrcNRGBA(b *testing.B) { benchTform(b, srcNRGBA, 200, 150, ApproxBiLinear) } -func BenchmarkTformSrcRGBA(b *testing.B) { benchTform(b, srcRGBA, 200, 150, ApproxBiLinear) } -func BenchmarkTformSrcUniform(b *testing.B) { benchTform(b, srcUniform, 200, 150, ApproxBiLinear) } -func BenchmarkTformSrcYCbCr(b *testing.B) { benchTform(b, srcYCbCr, 200, 150, ApproxBiLinear) } +func BenchmarkTformABSrcGray(b *testing.B) { benchTform(b, srcGray, 200, 150, ApproxBiLinear) } +func BenchmarkTformABSrcNRGBA(b *testing.B) { benchTform(b, srcNRGBA, 200, 150, ApproxBiLinear) } +func BenchmarkTformABSrcRGBA(b *testing.B) { benchTform(b, srcRGBA, 200, 150, ApproxBiLinear) } +func BenchmarkTformABSrcUniform(b *testing.B) { benchTform(b, srcUniform, 200, 150, ApproxBiLinear) } +func BenchmarkTformABSrcYCbCr(b *testing.B) { benchTform(b, srcYCbCr, 200, 150, ApproxBiLinear) } + +func BenchmarkTformCRSrcGray(b *testing.B) { benchTform(b, srcGray, 200, 150, CatmullRom) } +func BenchmarkTformCRSrcNRGBA(b *testing.B) { benchTform(b, srcNRGBA, 200, 150, CatmullRom) } +func BenchmarkTformCRSrcRGBA(b *testing.B) { benchTform(b, srcRGBA, 200, 150, CatmullRom) } +func BenchmarkTformCRSrcUniform(b *testing.B) { benchTform(b, srcUniform, 200, 150, CatmullRom) } +func BenchmarkTformCRSrcYCbCr(b *testing.B) { benchTform(b, srcYCbCr, 200, 150, CatmullRom) } diff --git a/testdata/go-turns-two-rotate-bl.png b/testdata/go-turns-two-rotate-bl.png new file mode 100644 index 0000000..b4e1279 Binary files /dev/null and b/testdata/go-turns-two-rotate-bl.png differ diff --git a/testdata/go-turns-two-rotate-cr.png b/testdata/go-turns-two-rotate-cr.png new file mode 100644 index 0000000..0b64d02 Binary files /dev/null and b/testdata/go-turns-two-rotate-cr.png differ