feat: reduce avoid boundary check
diff --git a/resize.go b/resize.go
index 97f498a..0c78e47 100644
--- a/resize.go
+++ b/resize.go
@@ -116,23 +116,25 @@
for y := range ys {
src.scan(0, y, src.w, y+1, scanLine)
j0 := y * dst.Stride
- for x := 0; x < width; x++ {
+ for x := range weights {
var r, g, b, a float64
for _, w := range weights[x] {
i := w.index * 4
- aw := float64(scanLine[i+3]) * w.weight
- r += float64(scanLine[i+0]) * aw
- g += float64(scanLine[i+1]) * aw
- b += float64(scanLine[i+2]) * aw
+ s := scanLine[i : i+4 : i+4]
+ aw := float64(s[3]) * w.weight
+ r += float64(s[0]) * aw
+ g += float64(s[1]) * aw
+ b += float64(s[2]) * aw
a += aw
}
if a != 0 {
aInv := 1 / a
j := j0 + x*4
- dst.Pix[j+0] = clamp(r * aInv)
- dst.Pix[j+1] = clamp(g * aInv)
- dst.Pix[j+2] = clamp(b * aInv)
- dst.Pix[j+3] = clamp(a)
+ d := dst.Pix[j : j+4 : j+4]
+ d[0] = clamp(r * aInv)
+ d[1] = clamp(g * aInv)
+ d[2] = clamp(b * aInv)
+ d[3] = clamp(a)
}
}
}
@@ -148,23 +150,25 @@
scanLine := make([]uint8, src.h*4)
for x := range xs {
src.scan(x, 0, x+1, src.h, scanLine)
- for y := 0; y < height; y++ {
+ for y := range weights {
var r, g, b, a float64
for _, w := range weights[y] {
i := w.index * 4
- aw := float64(scanLine[i+3]) * w.weight
- r += float64(scanLine[i+0]) * aw
- g += float64(scanLine[i+1]) * aw
- b += float64(scanLine[i+2]) * aw
+ s := scanLine[i : i+4 : i+4]
+ aw := float64(s[3]) * w.weight
+ r += float64(s[0]) * aw
+ g += float64(s[1]) * aw
+ b += float64(s[2]) * aw
a += aw
}
if a != 0 {
aInv := 1 / a
j := y*dst.Stride + x*4
- dst.Pix[j+0] = clamp(r * aInv)
- dst.Pix[j+1] = clamp(g * aInv)
- dst.Pix[j+2] = clamp(b * aInv)
- dst.Pix[j+3] = clamp(a)
+ d := dst.Pix[j : j+4 : j+4]
+ d[0] = clamp(r * aInv)
+ d[1] = clamp(g * aInv)
+ d[2] = clamp(b * aInv)
+ d[3] = clamp(a)
}
}
}
diff --git a/scanner.go b/scanner.go
index c4dbfe1..5e76987 100644
--- a/scanner.go
+++ b/scanner.go
@@ -44,10 +44,12 @@
for y := y1; y < y2; y++ {
i := y*img.Stride + x1*8
for x := x1; x < x2; x++ {
- dst[j+0] = img.Pix[i+0]
- dst[j+1] = img.Pix[i+2]
- dst[j+2] = img.Pix[i+4]
- dst[j+3] = img.Pix[i+6]
+ s := img.Pix[i : i+8 : i+8]
+ d := dst[j : j+4 : j+4]
+ d[0] = s[0]
+ d[1] = s[2]
+ d[2] = s[4]
+ d[3] = s[6]
j += 4
i += 8
}
@@ -58,26 +60,28 @@
for y := y1; y < y2; y++ {
i := y*img.Stride + x1*4
for x := x1; x < x2; x++ {
- a := img.Pix[i+3]
+ s := img.Pix[i : i+4 : i+4]
+ d := dst[j : j+4 : j+4]
+ a := s[3]
switch a {
case 0:
- dst[j+0] = 0
- dst[j+1] = 0
- dst[j+2] = 0
+ d[0] = 0
+ d[1] = 0
+ d[2] = 0
case 0xff:
- dst[j+0] = img.Pix[i+0]
- dst[j+1] = img.Pix[i+1]
- dst[j+2] = img.Pix[i+2]
+ d[0] = s[0]
+ d[1] = s[1]
+ d[2] = s[2]
default:
- r16 := uint16(img.Pix[i+0])
- g16 := uint16(img.Pix[i+1])
- b16 := uint16(img.Pix[i+2])
+ r16 := uint16(s[0])
+ g16 := uint16(s[1])
+ b16 := uint16(s[2])
a16 := uint16(a)
- dst[j+0] = uint8(r16 * 0xff / a16)
- dst[j+1] = uint8(g16 * 0xff / a16)
- dst[j+2] = uint8(b16 * 0xff / a16)
+ d[0] = uint8(r16 * 0xff / a16)
+ d[1] = uint8(g16 * 0xff / a16)
+ d[2] = uint8(b16 * 0xff / a16)
}
- dst[j+3] = a
+ d[3] = a
j += 4
i += 4
}
@@ -88,26 +92,28 @@
for y := y1; y < y2; y++ {
i := y*img.Stride + x1*8
for x := x1; x < x2; x++ {
- a := img.Pix[i+6]
+ s := img.Pix[i : i+8 : i+8]
+ d := dst[j : j+4 : j+4]
+ a := s[6]
switch a {
case 0:
- dst[j+0] = 0
- dst[j+1] = 0
- dst[j+2] = 0
+ d[0] = 0
+ d[1] = 0
+ d[2] = 0
case 0xff:
- dst[j+0] = img.Pix[i+0]
- dst[j+1] = img.Pix[i+2]
- dst[j+2] = img.Pix[i+4]
+ d[0] = s[0]
+ d[1] = s[2]
+ d[2] = s[4]
default:
- r32 := uint32(img.Pix[i+0])<<8 | uint32(img.Pix[i+1])
- g32 := uint32(img.Pix[i+2])<<8 | uint32(img.Pix[i+3])
- b32 := uint32(img.Pix[i+4])<<8 | uint32(img.Pix[i+5])
- a32 := uint32(img.Pix[i+6])<<8 | uint32(img.Pix[i+7])
- dst[j+0] = uint8((r32 * 0xffff / a32) >> 8)
- dst[j+1] = uint8((g32 * 0xffff / a32) >> 8)
- dst[j+2] = uint8((b32 * 0xffff / a32) >> 8)
+ r32 := uint32(s[0])<<8 | uint32(s[1])
+ g32 := uint32(s[2])<<8 | uint32(s[3])
+ b32 := uint32(s[4])<<8 | uint32(s[5])
+ a32 := uint32(s[6])<<8 | uint32(s[7])
+ d[0] = uint8((r32 * 0xffff / a32) >> 8)
+ d[1] = uint8((g32 * 0xffff / a32) >> 8)
+ d[2] = uint8((b32 * 0xffff / a32) >> 8)
}
- dst[j+3] = a
+ d[3] = a
j += 4
i += 8
}
@@ -119,10 +125,11 @@
i := y*img.Stride + x1
for x := x1; x < x2; x++ {
c := img.Pix[i]
- dst[j+0] = c
- dst[j+1] = c
- dst[j+2] = c
- dst[j+3] = 0xff
+ d := dst[j : j+4 : j+4]
+ d[0] = c
+ d[1] = c
+ d[2] = c
+ d[3] = 0xff
j += 4
i++
}
@@ -134,10 +141,11 @@
i := y*img.Stride + x1*2
for x := x1; x < x2; x++ {
c := img.Pix[i]
- dst[j+0] = c
- dst[j+1] = c
- dst[j+2] = c
- dst[j+3] = 0xff
+ d := dst[j : j+4 : j+4]
+ d[0] = c
+ d[1] = c
+ d[2] = c
+ d[3] = 0xff
j += 4
i += 2
}
@@ -191,10 +199,11 @@
b = 0
}
- dst[j+0] = uint8(r)
- dst[j+1] = uint8(g)
- dst[j+2] = uint8(b)
- dst[j+3] = 0xff
+ d := dst[j : j+4 : j+4]
+ d[0] = uint8(r)
+ d[1] = uint8(g)
+ d[2] = uint8(b)
+ d[3] = 0xff
iy++
j += 4
@@ -207,10 +216,11 @@
i := y*img.Stride + x1
for x := x1; x < x2; x++ {
c := s.palette[img.Pix[i]]
- dst[j+0] = c.R
- dst[j+1] = c.G
- dst[j+2] = c.B
- dst[j+3] = c.A
+ d := dst[j : j+4 : j+4]
+ d[0] = c.R
+ d[1] = c.G
+ d[2] = c.B
+ d[3] = c.A
j += 4
i++
}
@@ -226,22 +236,23 @@
for y := y1; y < y2; y++ {
for x := x1; x < x2; x++ {
r16, g16, b16, a16 := s.image.At(x, y).RGBA()
+ d := dst[j : j+4 : j+4]
switch a16 {
case 0xffff:
- dst[j+0] = uint8(r16 >> 8)
- dst[j+1] = uint8(g16 >> 8)
- dst[j+2] = uint8(b16 >> 8)
- dst[j+3] = 0xff
+ d[0] = uint8(r16 >> 8)
+ d[1] = uint8(g16 >> 8)
+ d[2] = uint8(b16 >> 8)
+ d[3] = 0xff
case 0:
- dst[j+0] = 0
- dst[j+1] = 0
- dst[j+2] = 0
- dst[j+3] = 0
+ d[0] = 0
+ d[1] = 0
+ d[2] = 0
+ d[3] = 0
default:
- dst[j+0] = uint8(((r16 * 0xffff) / a16) >> 8)
- dst[j+1] = uint8(((g16 * 0xffff) / a16) >> 8)
- dst[j+2] = uint8(((b16 * 0xffff) / a16) >> 8)
- dst[j+3] = uint8(a16 >> 8)
+ d[0] = uint8(((r16 * 0xffff) / a16) >> 8)
+ d[1] = uint8(((g16 * 0xffff) / a16) >> 8)
+ d[2] = uint8(((b16 * 0xffff) / a16) >> 8)
+ d[3] = uint8(a16 >> 8)
}
j += 4
}