draw: RGBA src fast path for scaling.
benchmark old ns/op new ns/op delta BenchmarkScaleSrcRGBA 15124800 2091946 -86.17% Change-Id: Id8d3088793ebf1d75b929fcf6945987817e87463 Reviewed-on: https://go-review.googlesource.com/6234 Reviewed-by: Rob Pike <r@golang.org>
This commit is contained in:
parent
2a40e8dacf
commit
b57ddf1b68
36
draw/gen.go
36
draw/gen.go
|
@ -284,10 +284,26 @@ func expnDollar(prefix, dollar, suffix string, d *data) string {
|
||||||
switch d.sType {
|
switch d.sType {
|
||||||
default:
|
default:
|
||||||
log.Fatalf("bad sType %q", d.sType)
|
log.Fatalf("bad sType %q", d.sType)
|
||||||
case "image.Image", "*image.NRGBA", "*image.RGBA", "*image.Uniform", "*image.YCbCr": // TODO: separate code for concrete types.
|
case "image.Image", "*image.NRGBA", "*image.Uniform", "*image.YCbCr": // TODO: separate code for concrete types.
|
||||||
fmt.Fprintf(buf, "%sr%s, %sg%s, %sb%s, %sa%s := "+
|
fmt.Fprintf(buf, "%sr%s, %sg%s, %sb%s, %sa%s := "+
|
||||||
"src.At(sp.X + int(%s), sp.Y+int(%s)).RGBA()\n",
|
"src.At(sp.X + int(%s), sp.Y+int(%s)).RGBA()\n",
|
||||||
lhs, tmp, lhs, tmp, lhs, tmp, lhs, tmp, args[0], args[1])
|
lhs, tmp, lhs, tmp, lhs, tmp, lhs, tmp,
|
||||||
|
args[0], args[1],
|
||||||
|
)
|
||||||
|
case "*image.RGBA":
|
||||||
|
// TODO: there's no need to multiply by 0x101 if the next thing
|
||||||
|
// we're going to do is shift right by 8.
|
||||||
|
fmt.Fprintf(buf, "%si := src.PixOffset(sp.X + int(%s), sp.Y+int(%s))\n"+
|
||||||
|
"%sr%s := uint32(src.Pix[%si+0]) * 0x101\n"+
|
||||||
|
"%sg%s := uint32(src.Pix[%si+1]) * 0x101\n"+
|
||||||
|
"%sb%s := uint32(src.Pix[%si+2]) * 0x101\n"+
|
||||||
|
"%sa%s := uint32(src.Pix[%si+3]) * 0x101\n",
|
||||||
|
lhs, args[0], args[1],
|
||||||
|
lhs, tmp, lhs,
|
||||||
|
lhs, tmp, lhs,
|
||||||
|
lhs, tmp, lhs,
|
||||||
|
lhs, tmp, lhs,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if dollar == "srcf" {
|
if dollar == "srcf" {
|
||||||
|
@ -399,8 +415,15 @@ const (
|
||||||
if dr.Empty() {
|
if dr.Empty() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
// sr is the source pixels. If it extends beyond the src bounds,
|
||||||
|
// we cannot use the type-specific fast paths, as they access
|
||||||
|
// the Pix fields directly without bounds checking.
|
||||||
|
if sr := (image.Rectangle{sp, sp.Add(image.Point{int(z.sw), int(z.sh)})}); !sr.In(src.Bounds()) {
|
||||||
|
z.scale_Image_Image(dst, dp, dr, src, sp)
|
||||||
|
} else {
|
||||||
$switch z.scale_$dTypeRN_$sTypeRN(dst, dp, dr, src, sp)
|
$switch z.scale_$dTypeRN_$sTypeRN(dst, dp, dr, src, sp)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
`
|
`
|
||||||
|
|
||||||
codeNNLeaf = `
|
codeNNLeaf = `
|
||||||
|
@ -478,7 +501,16 @@ const (
|
||||||
// scaleY distributes the temporary image's rows over the destination image.
|
// scaleY distributes the temporary image's rows over the destination image.
|
||||||
// TODO: is it worth having a sync.Pool for this temporary buffer?
|
// TODO: is it worth having a sync.Pool for this temporary buffer?
|
||||||
tmp := make([][4]float64, z.dw*z.sh)
|
tmp := make([][4]float64, z.dw*z.sh)
|
||||||
|
|
||||||
|
// sr is the source pixels. If it extends beyond the src bounds,
|
||||||
|
// we cannot use the type-specific fast paths, as they access
|
||||||
|
// the Pix fields directly without bounds checking.
|
||||||
|
if sr := (image.Rectangle{sp, sp.Add(image.Point{int(z.sw), int(z.sh)})}); !sr.In(src.Bounds()) {
|
||||||
|
z.scaleX_Image(tmp, src, sp)
|
||||||
|
} else {
|
||||||
$switchS z.scaleX_$sTypeRN(tmp, src, sp)
|
$switchS z.scaleX_$sTypeRN(tmp, src, sp)
|
||||||
|
}
|
||||||
|
|
||||||
$switchD z.scaleY_$dTypeRN(dst, dp, dr, tmp)
|
$switchD z.scaleY_$dTypeRN(dst, dp, dr, tmp)
|
||||||
}
|
}
|
||||||
`
|
`
|
||||||
|
|
59
draw/impl.go
59
draw/impl.go
|
@ -16,6 +16,12 @@ func (z *nnScaler) Scale(dst Image, dp image.Point, src image.Image, sp image.Po
|
||||||
if dr.Empty() {
|
if dr.Empty() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
// sr is the source pixels. If it extends beyond the src bounds,
|
||||||
|
// we cannot use the type-specific fast paths, as they access
|
||||||
|
// the Pix fields directly without bounds checking.
|
||||||
|
if sr := (image.Rectangle{sp, sp.Add(image.Point{int(z.sw), int(z.sh)})}); !sr.In(src.Bounds()) {
|
||||||
|
z.scale_Image_Image(dst, dp, dr, src, sp)
|
||||||
|
} else {
|
||||||
switch dst := dst.(type) {
|
switch dst := dst.(type) {
|
||||||
case *image.RGBA:
|
case *image.RGBA:
|
||||||
switch src := src.(type) {
|
switch src := src.(type) {
|
||||||
|
@ -37,6 +43,7 @@ func (z *nnScaler) Scale(dst Image, dp image.Point, src image.Image, sp image.Po
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (z *nnScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.NRGBA, sp image.Point) {
|
func (z *nnScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.NRGBA, sp image.Point) {
|
||||||
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
|
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
|
||||||
|
@ -60,7 +67,11 @@ func (z *nnScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Rec
|
||||||
d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
|
d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
|
||||||
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
|
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
|
||||||
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
|
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
|
||||||
pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
|
pi := src.PixOffset(sp.X+int(sx), sp.Y+int(sy))
|
||||||
|
pr := uint32(src.Pix[pi+0]) * 0x101
|
||||||
|
pg := uint32(src.Pix[pi+1]) * 0x101
|
||||||
|
pb := uint32(src.Pix[pi+2]) * 0x101
|
||||||
|
pa := uint32(src.Pix[pi+3]) * 0x101
|
||||||
dst.Pix[d+0] = uint8(uint32(pr) >> 8)
|
dst.Pix[d+0] = uint8(uint32(pr) >> 8)
|
||||||
dst.Pix[d+1] = uint8(uint32(pg) >> 8)
|
dst.Pix[d+1] = uint8(uint32(pg) >> 8)
|
||||||
dst.Pix[d+2] = uint8(uint32(pb) >> 8)
|
dst.Pix[d+2] = uint8(uint32(pb) >> 8)
|
||||||
|
@ -144,6 +155,12 @@ func (z *ablScaler) Scale(dst Image, dp image.Point, src image.Image, sp image.P
|
||||||
if dr.Empty() {
|
if dr.Empty() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
// sr is the source pixels. If it extends beyond the src bounds,
|
||||||
|
// we cannot use the type-specific fast paths, as they access
|
||||||
|
// the Pix fields directly without bounds checking.
|
||||||
|
if sr := (image.Rectangle{sp, sp.Add(image.Point{int(z.sw), int(z.sh)})}); !sr.In(src.Bounds()) {
|
||||||
|
z.scale_Image_Image(dst, dp, dr, src, sp)
|
||||||
|
} else {
|
||||||
switch dst := dst.(type) {
|
switch dst := dst.(type) {
|
||||||
case *image.RGBA:
|
case *image.RGBA:
|
||||||
switch src := src.(type) {
|
switch src := src.(type) {
|
||||||
|
@ -165,6 +182,7 @@ func (z *ablScaler) Scale(dst Image, dp image.Point, src image.Image, sp image.P
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (z *ablScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.NRGBA, sp image.Point) {
|
func (z *ablScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.NRGBA, sp image.Point) {
|
||||||
yscale := float64(z.sh) / float64(z.dh)
|
yscale := float64(z.sh) / float64(z.dh)
|
||||||
|
@ -267,12 +285,20 @@ func (z *ablScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Re
|
||||||
sx1 = sx0
|
sx1 = sx0
|
||||||
xFrac0, xFrac1 = 1, 0
|
xFrac0, xFrac1 = 1, 0
|
||||||
}
|
}
|
||||||
s00ru, s00gu, s00bu, s00au := src.At(sp.X+int(sx0), sp.Y+int(sy0)).RGBA()
|
s00i := src.PixOffset(sp.X+int(sx0), sp.Y+int(sy0))
|
||||||
|
s00ru := uint32(src.Pix[s00i+0]) * 0x101
|
||||||
|
s00gu := uint32(src.Pix[s00i+1]) * 0x101
|
||||||
|
s00bu := uint32(src.Pix[s00i+2]) * 0x101
|
||||||
|
s00au := uint32(src.Pix[s00i+3]) * 0x101
|
||||||
s00r := float64(s00ru)
|
s00r := float64(s00ru)
|
||||||
s00g := float64(s00gu)
|
s00g := float64(s00gu)
|
||||||
s00b := float64(s00bu)
|
s00b := float64(s00bu)
|
||||||
s00a := float64(s00au)
|
s00a := float64(s00au)
|
||||||
s10ru, s10gu, s10bu, s10au := src.At(sp.X+int(sx1), sp.Y+int(sy0)).RGBA()
|
s10i := src.PixOffset(sp.X+int(sx1), sp.Y+int(sy0))
|
||||||
|
s10ru := uint32(src.Pix[s10i+0]) * 0x101
|
||||||
|
s10gu := uint32(src.Pix[s10i+1]) * 0x101
|
||||||
|
s10bu := uint32(src.Pix[s10i+2]) * 0x101
|
||||||
|
s10au := uint32(src.Pix[s10i+3]) * 0x101
|
||||||
s10r := float64(s10ru)
|
s10r := float64(s10ru)
|
||||||
s10g := float64(s10gu)
|
s10g := float64(s10gu)
|
||||||
s10b := float64(s10bu)
|
s10b := float64(s10bu)
|
||||||
|
@ -281,12 +307,20 @@ func (z *ablScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Re
|
||||||
s10g = xFrac1*s00g + xFrac0*s10g
|
s10g = xFrac1*s00g + xFrac0*s10g
|
||||||
s10b = xFrac1*s00b + xFrac0*s10b
|
s10b = xFrac1*s00b + xFrac0*s10b
|
||||||
s10a = xFrac1*s00a + xFrac0*s10a
|
s10a = xFrac1*s00a + xFrac0*s10a
|
||||||
s01ru, s01gu, s01bu, s01au := src.At(sp.X+int(sx0), sp.Y+int(sy1)).RGBA()
|
s01i := src.PixOffset(sp.X+int(sx0), sp.Y+int(sy1))
|
||||||
|
s01ru := uint32(src.Pix[s01i+0]) * 0x101
|
||||||
|
s01gu := uint32(src.Pix[s01i+1]) * 0x101
|
||||||
|
s01bu := uint32(src.Pix[s01i+2]) * 0x101
|
||||||
|
s01au := uint32(src.Pix[s01i+3]) * 0x101
|
||||||
s01r := float64(s01ru)
|
s01r := float64(s01ru)
|
||||||
s01g := float64(s01gu)
|
s01g := float64(s01gu)
|
||||||
s01b := float64(s01bu)
|
s01b := float64(s01bu)
|
||||||
s01a := float64(s01au)
|
s01a := float64(s01au)
|
||||||
s11ru, s11gu, s11bu, s11au := src.At(sp.X+int(sx1), sp.Y+int(sy1)).RGBA()
|
s11i := src.PixOffset(sp.X+int(sx1), sp.Y+int(sy1))
|
||||||
|
s11ru := uint32(src.Pix[s11i+0]) * 0x101
|
||||||
|
s11gu := uint32(src.Pix[s11i+1]) * 0x101
|
||||||
|
s11bu := uint32(src.Pix[s11i+2]) * 0x101
|
||||||
|
s11au := uint32(src.Pix[s11i+3]) * 0x101
|
||||||
s11r := float64(s11ru)
|
s11r := float64(s11ru)
|
||||||
s11g := float64(s11gu)
|
s11g := float64(s11gu)
|
||||||
s11b := float64(s11bu)
|
s11b := float64(s11bu)
|
||||||
|
@ -607,6 +641,13 @@ func (z *kernelScaler) Scale(dst Image, dp image.Point, src image.Image, sp imag
|
||||||
// scaleY distributes the temporary image's rows over the destination image.
|
// scaleY distributes the temporary image's rows over the destination image.
|
||||||
// TODO: is it worth having a sync.Pool for this temporary buffer?
|
// TODO: is it worth having a sync.Pool for this temporary buffer?
|
||||||
tmp := make([][4]float64, z.dw*z.sh)
|
tmp := make([][4]float64, z.dw*z.sh)
|
||||||
|
|
||||||
|
// sr is the source pixels. If it extends beyond the src bounds,
|
||||||
|
// we cannot use the type-specific fast paths, as they access
|
||||||
|
// the Pix fields directly without bounds checking.
|
||||||
|
if sr := (image.Rectangle{sp, sp.Add(image.Point{int(z.sw), int(z.sh)})}); !sr.In(src.Bounds()) {
|
||||||
|
z.scaleX_Image(tmp, src, sp)
|
||||||
|
} else {
|
||||||
switch src := src.(type) {
|
switch src := src.(type) {
|
||||||
case *image.NRGBA:
|
case *image.NRGBA:
|
||||||
z.scaleX_NRGBA(tmp, src, sp)
|
z.scaleX_NRGBA(tmp, src, sp)
|
||||||
|
@ -619,6 +660,8 @@ func (z *kernelScaler) Scale(dst Image, dp image.Point, src image.Image, sp imag
|
||||||
default:
|
default:
|
||||||
z.scaleX_Image(tmp, src, sp)
|
z.scaleX_Image(tmp, src, sp)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
switch dst := dst.(type) {
|
switch dst := dst.(type) {
|
||||||
case *image.RGBA:
|
case *image.RGBA:
|
||||||
z.scaleY_RGBA(dst, dp, dr, tmp)
|
z.scaleY_RGBA(dst, dp, dr, tmp)
|
||||||
|
@ -656,7 +699,11 @@ func (z *kernelScaler) scaleX_RGBA(tmp [][4]float64, src *image.RGBA, sp image.P
|
||||||
for _, s := range z.horizontal.sources {
|
for _, s := range z.horizontal.sources {
|
||||||
var pr, pg, pb, pa float64
|
var pr, pg, pb, pa float64
|
||||||
for _, c := range z.horizontal.contribs[s.i:s.j] {
|
for _, c := range z.horizontal.contribs[s.i:s.j] {
|
||||||
pru, pgu, pbu, pau := src.At(sp.X+int(c.coord), sp.Y+int(y)).RGBA()
|
pi := src.PixOffset(sp.X+int(c.coord), sp.Y+int(y))
|
||||||
|
pru := uint32(src.Pix[pi+0]) * 0x101
|
||||||
|
pgu := uint32(src.Pix[pi+1]) * 0x101
|
||||||
|
pbu := uint32(src.Pix[pi+2]) * 0x101
|
||||||
|
pau := uint32(src.Pix[pi+3]) * 0x101
|
||||||
pr += float64(pru) * c.weight
|
pr += float64(pru) * c.weight
|
||||||
pg += float64(pgu) * c.weight
|
pg += float64(pgu) * c.weight
|
||||||
pb += float64(pbu) * c.weight
|
pb += float64(pbu) * c.weight
|
||||||
|
|
Loading…
Reference in New Issue
Block a user