draw: avoid FMA (Fused Multiply Add)

Changing "expr" to "float64(expr)" disables FMA on that expression, even
when expr's type is already nominally float64.

The output of this package should now be independent of GOARCH.

See the "FMA (Fused Multiply Add) detector" discussion at
https://groups.google.com/g/golang-dev/c/oZv0PaZmUGs

Fixes golang/go#67029

Change-Id: I43cecee0e9c8156eae39f52b5a2338c34d31a65a
Reviewed-on: https://go-review.googlesource.com/c/image/+/596816
Reviewed-by: Nigel Tao (INACTIVE; USE @golang.org INSTEAD) <nigeltao@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
diff --git a/draw/gen.go b/draw/gen.go
index 298d3b3..c975d32 100644
--- a/draw/gen.go
+++ b/draw/gen.go
@@ -283,20 +283,20 @@
 		switch d.sType {
 		default:
 			return argf(args, ""+
-				"$3r = $0*$1r + $2*$3r\n"+
-				"$3g = $0*$1g + $2*$3g\n"+
-				"$3b = $0*$1b + $2*$3b\n"+
-				"$3a = $0*$1a + $2*$3a",
+				"$3r = float64($0*$1r) + float64($2*$3r)\n"+
+				"$3g = float64($0*$1g) + float64($2*$3g)\n"+
+				"$3b = float64($0*$1b) + float64($2*$3b)\n"+
+				"$3a = float64($0*$1a) + float64($2*$3a)",
 			)
 		case "*image.Gray":
 			return argf(args, ""+
-				"$3r = $0*$1r + $2*$3r",
+				"$3r = float64($0*$1r) + float64($2*$3r)",
 			)
 		case "*image.YCbCr":
 			return argf(args, ""+
-				"$3r = $0*$1r + $2*$3r\n"+
-				"$3g = $0*$1g + $2*$3g\n"+
-				"$3b = $0*$1b + $2*$3b",
+				"$3r = float64($0*$1r) + float64($2*$3r)\n"+
+				"$3g = float64($0*$1g) + float64($2*$3g)\n"+
+				"$3b = float64($0*$1b) + float64($2*$3b)",
 			)
 		}
 
@@ -783,34 +783,39 @@
 		}
 
 		if dollar == "srcf" {
+			avoidFMA0, avoidFMA1 := "", "" // FMA is Fused Multiply Add.
+			if extra != "" {
+				avoidFMA0, avoidFMA1 = "float64(", ")"
+			}
+
 			switch d.sType {
 			default:
 				fmt.Fprintf(buf, ""+
-					"%[1]sr %[2]s float64(%[1]sru)%[3]s\n"+
-					"%[1]sg %[2]s float64(%[1]sgu)%[3]s\n"+
-					"%[1]sb %[2]s float64(%[1]sbu)%[3]s\n"+
-					"%[1]sa %[2]s float64(%[1]sau)%[3]s\n",
-					lhs, eqOp, extra,
+					"%[1]sr %[2]s %[4]sfloat64(%[1]sru)%[3]s%[5]s\n"+
+					"%[1]sg %[2]s %[4]sfloat64(%[1]sgu)%[3]s%[5]s\n"+
+					"%[1]sb %[2]s %[4]sfloat64(%[1]sbu)%[3]s%[5]s\n"+
+					"%[1]sa %[2]s %[4]sfloat64(%[1]sau)%[3]s%[5]s\n",
+					lhs, eqOp, extra, avoidFMA0, avoidFMA1,
 				)
 			case "*image.Gray":
 				fmt.Fprintf(buf, ""+
-					"%[1]sr %[2]s float64(%[1]sru)%[3]s\n",
-					lhs, eqOp, extra,
+					"%[1]sr %[2]s %[4]sfloat64(%[1]sru)%[3]s%[5]s\n",
+					lhs, eqOp, extra, avoidFMA0, avoidFMA1,
 				)
 			case "*image.YCbCr":
 				fmt.Fprintf(buf, ""+
-					"%[1]sr %[2]s float64(%[1]sru)%[3]s\n"+
-					"%[1]sg %[2]s float64(%[1]sgu)%[3]s\n"+
-					"%[1]sb %[2]s float64(%[1]sbu)%[3]s\n",
-					lhs, eqOp, extra,
+					"%[1]sr %[2]s %[4]sfloat64(%[1]sru)%[3]s%[5]s\n"+
+					"%[1]sg %[2]s %[4]sfloat64(%[1]sgu)%[3]s%[5]s\n"+
+					"%[1]sb %[2]s %[4]sfloat64(%[1]sbu)%[3]s%[5]s\n",
+					lhs, eqOp, extra, avoidFMA0, avoidFMA1,
 				)
 			case "image.RGBA64Image":
 				fmt.Fprintf(buf, ""+
-					"%[1]sr %[2]s float64(%[1]su.R)%[3]s\n"+
-					"%[1]sg %[2]s float64(%[1]su.G)%[3]s\n"+
-					"%[1]sb %[2]s float64(%[1]su.B)%[3]s\n"+
-					"%[1]sa %[2]s float64(%[1]su.A)%[3]s\n",
-					lhs, eqOp, extra,
+					"%[1]sr %[2]s %[4]sfloat64(%[1]su.R)%[3]s%[5]s\n"+
+					"%[1]sg %[2]s %[4]sfloat64(%[1]su.G)%[3]s%[5]s\n"+
+					"%[1]sb %[2]s %[4]sfloat64(%[1]su.B)%[3]s%[5]s\n"+
+					"%[1]sa %[2]s %[4]sfloat64(%[1]su.A)%[3]s%[5]s\n",
+					lhs, eqOp, extra, avoidFMA0, avoidFMA1,
 				)
 			}
 		}
@@ -1175,8 +1180,8 @@
 				$preInner
 				for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
 					dxf := float64(dr.Min.X + int(dx)) + 0.5
-					sx0 := int(d2s[0]*dxf + d2s[1]*dyf + d2s[2]) + bias.X
-					sy0 := int(d2s[3]*dxf + d2s[4]*dyf + d2s[5]) + bias.Y
+					sx0 := int(float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]) + bias.X
+					sy0 := int(float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]) + bias.Y
 					if !(image.Point{sx0, sy0}).In(sr) {
 						continue
 					}
@@ -1197,7 +1202,7 @@
 			$preOuter
 
 			for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-				sy := (float64(dy)+0.5)*yscale - 0.5
+				sy := float64((float64(dy)+0.5)*yscale) - 0.5
 				// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 				// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 				// sx, below.
@@ -1215,7 +1220,7 @@
 				$preInner
 
 				for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
-					sx := (float64(dx)+0.5)*xscale - 0.5
+					sx := float64((float64(dx)+0.5)*xscale) - 0.5
 					sx0 := int32(sx)
 					xFrac0 := sx - float64(sx0)
 					xFrac1 := 1 - xFrac0
@@ -1250,8 +1255,8 @@
 				$preInner
 				for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
 					dxf := float64(dr.Min.X + int(dx)) + 0.5
-					sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-					sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+					sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+					sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 					if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 						continue
 					}
@@ -1458,10 +1463,10 @@
 					var pr, pg, pb, pa float64
 					for _, c := range z.vertical.contribs[s.i:s.j] {
 						p := &tmp[c.coord*z.dw+dx]
-						pr += p[0] * c.weight
-						pg += p[1] * c.weight
-						pb += p[2] * c.weight
-						pa += p[3] * c.weight
+						pr += float64(p[0] * c.weight)
+						pg += float64(p[1] * c.weight)
+						pb += float64(p[2] * c.weight)
+						pa += float64(p[3] * c.weight)
 					}
 					$clampToAlpha
 					$outputf[dr.Min.X + int(dx), dr.Min.Y + int(adr.Min.Y + dy), ftou, p, s.invTotalWeight]
@@ -1495,8 +1500,8 @@
 				$preInner
 				for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
 					dxf := float64(dr.Min.X + int(dx)) + 0.5
-					sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-					sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+					sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+					sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 					if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 						continue
 					}
diff --git a/draw/impl.go b/draw/impl.go
index 94ee826..fcd1994 100644
--- a/draw/impl.go
+++ b/draw/impl.go
@@ -778,8 +778,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -800,8 +800,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -825,8 +825,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -849,8 +849,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -874,8 +874,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -898,8 +898,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -942,8 +942,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -986,8 +986,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -1030,8 +1030,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -1074,8 +1074,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -1095,8 +1095,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -1115,8 +1115,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -1136,8 +1136,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -1159,8 +1159,8 @@
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -1199,8 +1199,8 @@
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -1241,8 +1241,8 @@
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -1281,8 +1281,8 @@
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+			sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -1559,7 +1559,7 @@
 	swMinus1, shMinus1 := sw-1, sh-1
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -1577,7 +1577,7 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -1596,15 +1596,15 @@
 			s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
 			s10ru := uint32(src.Pix[s10i]) * 0x101
 			s10r := float64(s10ru)
-			s10r = xFrac1*s00r + xFrac0*s10r
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
 			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
 			s01ru := uint32(src.Pix[s01i]) * 0x101
 			s01r := float64(s01ru)
 			s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
 			s11ru := uint32(src.Pix[s11i]) * 0x101
 			s11r := float64(s11ru)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11r = yFrac1*s10r + yFrac0*s11r
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
 			pr := uint32(s11r)
 			out := uint8(pr >> 8)
 			dst.Pix[d+0] = out
@@ -1623,7 +1623,7 @@
 	swMinus1, shMinus1 := sw-1, sh-1
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -1641,7 +1641,7 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -1672,10 +1672,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4
 			s01au := uint32(src.Pix[s01i+3]) * 0x101
 			s01ru := uint32(src.Pix[s01i+0]) * s01au / 0xff
@@ -1694,14 +1694,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -1723,7 +1723,7 @@
 	swMinus1, shMinus1 := sw-1, sh-1
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -1741,7 +1741,7 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -1772,10 +1772,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4
 			s01au := uint32(src.Pix[s01i+3]) * 0x101
 			s01ru := uint32(src.Pix[s01i+0]) * s01au / 0xff
@@ -1794,14 +1794,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -1822,7 +1822,7 @@
 	swMinus1, shMinus1 := sw-1, sh-1
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -1840,7 +1840,7 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -1871,10 +1871,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4
 			s01ru := uint32(src.Pix[s01i+0]) * 0x101
 			s01gu := uint32(src.Pix[s01i+1]) * 0x101
@@ -1893,14 +1893,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -1922,7 +1922,7 @@
 	swMinus1, shMinus1 := sw-1, sh-1
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -1940,7 +1940,7 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -1971,10 +1971,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4
 			s01ru := uint32(src.Pix[s01i+0]) * 0x101
 			s01gu := uint32(src.Pix[s01i+1]) * 0x101
@@ -1993,14 +1993,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -2021,7 +2021,7 @@
 	swMinus1, shMinus1 := sw-1, sh-1
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -2039,7 +2039,7 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -2110,9 +2110,9 @@
 			s10r := float64(s10ru)
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
 			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
 			s01j := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.CStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
 
@@ -2171,12 +2171,12 @@
 			s11r := float64(s11ru)
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -2196,7 +2196,7 @@
 	swMinus1, shMinus1 := sw-1, sh-1
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -2214,7 +2214,7 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -2285,9 +2285,9 @@
 			s10r := float64(s10ru)
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
 			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
 			s01j := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.CStride + ((sr.Min.X+int(sx0))/2 - src.Rect.Min.X/2)
 
@@ -2346,12 +2346,12 @@
 			s11r := float64(s11ru)
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -2371,7 +2371,7 @@
 	swMinus1, shMinus1 := sw-1, sh-1
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -2389,7 +2389,7 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -2460,9 +2460,9 @@
 			s10r := float64(s10ru)
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
 			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
 			s01j := ((sr.Min.Y+int(sy1))/2-src.Rect.Min.Y/2)*src.CStride + ((sr.Min.X+int(sx0))/2 - src.Rect.Min.X/2)
 
@@ -2521,12 +2521,12 @@
 			s11r := float64(s11ru)
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -2546,7 +2546,7 @@
 	swMinus1, shMinus1 := sw-1, sh-1
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -2564,7 +2564,7 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -2635,9 +2635,9 @@
 			s10r := float64(s10ru)
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
 			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
 			s01j := ((sr.Min.Y+int(sy1))/2-src.Rect.Min.Y/2)*src.CStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
 
@@ -2696,12 +2696,12 @@
 			s11r := float64(s11ru)
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -2721,7 +2721,7 @@
 	swMinus1, shMinus1 := sw-1, sh-1
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -2739,7 +2739,7 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -2762,10 +2762,10 @@
 			s10g := float64(s10u.G)
 			s10b := float64(s10u.B)
 			s10a := float64(s10u.A)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1))
 			s01r := float64(s01u.R)
 			s01g := float64(s01u.G)
@@ -2776,14 +2776,14 @@
 			s11g := float64(s11u.G)
 			s11b := float64(s11u.B)
 			s11a := float64(s11u.A)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
 			pa1 := (0xffff - uint32(p.A)) * 0x101
 			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + uint32(p.R)) >> 8)
@@ -2802,7 +2802,7 @@
 	swMinus1, shMinus1 := sw-1, sh-1
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -2820,7 +2820,7 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -2843,10 +2843,10 @@
 			s10g := float64(s10u.G)
 			s10b := float64(s10u.B)
 			s10a := float64(s10u.A)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1))
 			s01r := float64(s01u.R)
 			s01g := float64(s01u.G)
@@ -2857,14 +2857,14 @@
 			s11g := float64(s11u.G)
 			s11b := float64(s11u.B)
 			s11a := float64(s11u.A)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
 			dst.Pix[d+0] = uint8(p.R >> 8)
 			dst.Pix[d+1] = uint8(p.G >> 8)
@@ -2882,7 +2882,7 @@
 	swMinus1, shMinus1 := sw-1, sh-1
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -2900,7 +2900,7 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -2923,10 +2923,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01ru, s01gu, s01bu, s01au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)).RGBA()
 			s01r := float64(s01ru)
 			s01g := float64(s01gu)
@@ -2937,14 +2937,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -2966,7 +2966,7 @@
 	swMinus1, shMinus1 := sw-1, sh-1
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -2984,7 +2984,7 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -3007,10 +3007,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01ru, s01gu, s01bu, s01au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)).RGBA()
 			s01r := float64(s01ru)
 			s01g := float64(s01gu)
@@ -3021,14 +3021,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -3052,7 +3052,7 @@
 	dstColorRGBA64 := color.RGBA64{}
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -3069,7 +3069,7 @@
 		}
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -3106,10 +3106,10 @@
 			s10g := float64(s10u.G)
 			s10b := float64(s10u.B)
 			s10a := float64(s10u.A)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1))
 			if srcMask != nil {
 				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy1)).RGBA()
@@ -3134,14 +3134,14 @@
 			s11g := float64(s11u.G)
 			s11b := float64(s11u.B)
 			s11a := float64(s11u.A)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
 			q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy))
 			if dstMask != nil {
@@ -3172,7 +3172,7 @@
 	dstColorRGBA64 := color.RGBA64{}
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -3189,7 +3189,7 @@
 		}
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -3226,10 +3226,10 @@
 			s10g := float64(s10u.G)
 			s10b := float64(s10u.B)
 			s10a := float64(s10u.A)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1))
 			if srcMask != nil {
 				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy1)).RGBA()
@@ -3254,14 +3254,14 @@
 			s11g := float64(s11u.G)
 			s11b := float64(s11u.B)
 			s11a := float64(s11u.A)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
 			if dstMask != nil {
 				q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy))
@@ -3295,7 +3295,7 @@
 	dstColor := color.Color(dstColorRGBA64)
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -3312,7 +3312,7 @@
 		}
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -3349,10 +3349,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01ru, s01gu, s01bu, s01au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)).RGBA()
 			if srcMask != nil {
 				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy1)).RGBA()
@@ -3377,14 +3377,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -3419,7 +3419,7 @@
 	dstColor := color.Color(dstColorRGBA64)
 
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy := float64((float64(dy)+0.5)*yscale) - 0.5
 		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
 		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
 		// sx, below.
@@ -3436,7 +3436,7 @@
 		}
 
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
-			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx := float64((float64(dx)+0.5)*xscale) - 0.5
 			sx0 := int32(sx)
 			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
@@ -3473,10 +3473,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01ru, s01gu, s01bu, s01au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)).RGBA()
 			if srcMask != nil {
 				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy1)).RGBA()
@@ -3501,14 +3501,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -3543,8 +3543,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -3583,15 +3583,15 @@
 			s10i := (sy0-src.Rect.Min.Y)*src.Stride + (sx1 - src.Rect.Min.X)
 			s10ru := uint32(src.Pix[s10i]) * 0x101
 			s10r := float64(s10ru)
-			s10r = xFrac1*s00r + xFrac0*s10r
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
 			s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0 - src.Rect.Min.X)
 			s01ru := uint32(src.Pix[s01i]) * 0x101
 			s01r := float64(s01ru)
 			s11i := (sy1-src.Rect.Min.Y)*src.Stride + (sx1 - src.Rect.Min.X)
 			s11ru := uint32(src.Pix[s11i]) * 0x101
 			s11r := float64(s11ru)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11r = yFrac1*s10r + yFrac0*s11r
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
 			pr := uint32(s11r)
 			out := uint8(pr >> 8)
 			dst.Pix[d+0] = out
@@ -3608,8 +3608,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -3660,10 +3660,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
 			s01au := uint32(src.Pix[s01i+3]) * 0x101
 			s01ru := uint32(src.Pix[s01i+0]) * s01au / 0xff
@@ -3682,14 +3682,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -3709,8 +3709,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -3761,10 +3761,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
 			s01au := uint32(src.Pix[s01i+3]) * 0x101
 			s01ru := uint32(src.Pix[s01i+0]) * s01au / 0xff
@@ -3783,14 +3783,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -3809,8 +3809,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -3861,10 +3861,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
 			s01ru := uint32(src.Pix[s01i+0]) * 0x101
 			s01gu := uint32(src.Pix[s01i+1]) * 0x101
@@ -3883,14 +3883,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -3910,8 +3910,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -3962,10 +3962,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
 			s01ru := uint32(src.Pix[s01i+0]) * 0x101
 			s01gu := uint32(src.Pix[s01i+1]) * 0x101
@@ -3984,14 +3984,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -4010,8 +4010,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -4102,9 +4102,9 @@
 			s10r := float64(s10ru)
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
 			s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
 			s01j := (sy1-src.Rect.Min.Y)*src.CStride + (sx0 - src.Rect.Min.X)
 
@@ -4163,12 +4163,12 @@
 			s11r := float64(s11ru)
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -4186,8 +4186,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -4278,9 +4278,9 @@
 			s10r := float64(s10ru)
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
 			s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
 			s01j := (sy1-src.Rect.Min.Y)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2)
 
@@ -4339,12 +4339,12 @@
 			s11r := float64(s11ru)
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -4362,8 +4362,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -4454,9 +4454,9 @@
 			s10r := float64(s10ru)
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
 			s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
 			s01j := ((sy1)/2-src.Rect.Min.Y/2)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2)
 
@@ -4515,12 +4515,12 @@
 			s11r := float64(s11ru)
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -4538,8 +4538,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -4630,9 +4630,9 @@
 			s10r := float64(s10ru)
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
 			s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
 			s01j := ((sy1)/2-src.Rect.Min.Y/2)*src.CStride + (sx0 - src.Rect.Min.X)
 
@@ -4691,12 +4691,12 @@
 			s11r := float64(s11ru)
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -4714,8 +4714,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -4758,10 +4758,10 @@
 			s10g := float64(s10u.G)
 			s10b := float64(s10u.B)
 			s10a := float64(s10u.A)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01u := src.RGBA64At(sx0, sy1)
 			s01r := float64(s01u.R)
 			s01g := float64(s01u.G)
@@ -4772,14 +4772,14 @@
 			s11g := float64(s11u.G)
 			s11b := float64(s11u.B)
 			s11a := float64(s11u.A)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
 			pa1 := (0xffff - uint32(p.A)) * 0x101
 			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + uint32(p.R)) >> 8)
@@ -4796,8 +4796,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -4840,10 +4840,10 @@
 			s10g := float64(s10u.G)
 			s10b := float64(s10u.B)
 			s10a := float64(s10u.A)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01u := src.RGBA64At(sx0, sy1)
 			s01r := float64(s01u.R)
 			s01g := float64(s01u.G)
@@ -4854,14 +4854,14 @@
 			s11g := float64(s11u.G)
 			s11b := float64(s11u.B)
 			s11a := float64(s11u.A)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
 			dst.Pix[d+0] = uint8(p.R >> 8)
 			dst.Pix[d+1] = uint8(p.G >> 8)
@@ -4877,8 +4877,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -4921,10 +4921,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01ru, s01gu, s01bu, s01au := src.At(sx0, sy1).RGBA()
 			s01r := float64(s01ru)
 			s01g := float64(s01gu)
@@ -4935,14 +4935,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -4962,8 +4962,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -5006,10 +5006,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01ru, s01gu, s01bu, s01au := src.At(sx0, sy1).RGBA()
 			s01r := float64(s01ru)
 			s01g := float64(s01gu)
@@ -5020,14 +5020,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -5049,8 +5049,8 @@
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -5107,10 +5107,10 @@
 			s10g := float64(s10u.G)
 			s10b := float64(s10u.B)
 			s10a := float64(s10u.A)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01u := src.RGBA64At(sx0, sy1)
 			if srcMask != nil {
 				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy1).RGBA()
@@ -5135,14 +5135,14 @@
 			s11g := float64(s11u.G)
 			s11b := float64(s11u.B)
 			s11a := float64(s11u.A)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
 			q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy))
 			if dstMask != nil {
@@ -5171,8 +5171,8 @@
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -5229,10 +5229,10 @@
 			s10g := float64(s10u.G)
 			s10b := float64(s10u.B)
 			s10a := float64(s10u.A)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01u := src.RGBA64At(sx0, sy1)
 			if srcMask != nil {
 				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy1).RGBA()
@@ -5257,14 +5257,14 @@
 			s11g := float64(s11u.G)
 			s11b := float64(s11u.B)
 			s11a := float64(s11u.A)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
 			if dstMask != nil {
 				q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy))
@@ -5295,8 +5295,8 @@
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -5353,10 +5353,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01ru, s01gu, s01bu, s01au := src.At(sx0, sy1).RGBA()
 			if srcMask != nil {
 				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy1).RGBA()
@@ -5381,14 +5381,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -5420,8 +5420,8 @@
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -5478,10 +5478,10 @@
 			s10g := float64(s10gu)
 			s10b := float64(s10bu)
 			s10a := float64(s10au)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s10a = xFrac1*s00a + xFrac0*s10a
+			s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r)
+			s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g)
+			s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b)
+			s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a)
 			s01ru, s01gu, s01bu, s01au := src.At(sx0, sy1).RGBA()
 			if srcMask != nil {
 				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy1).RGBA()
@@ -5506,14 +5506,14 @@
 			s11g := float64(s11gu)
 			s11b := float64(s11bu)
 			s11a := float64(s11au)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11a = xFrac1*s01a + xFrac0*s11a
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			s11a = yFrac1*s10a + yFrac0*s11a
+			s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r)
+			s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g)
+			s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b)
+			s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a)
+			s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r)
+			s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g)
+			s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b)
+			s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a)
 			pr := uint32(s11r)
 			pg := uint32(s11g)
 			pb := uint32(s11b)
@@ -5783,7 +5783,7 @@
 			for _, c := range z.horizontal.contribs[s.i:s.j] {
 				pi := (sr.Min.Y+int(y)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(c.coord) - src.Rect.Min.X)
 				pru := uint32(src.Pix[pi]) * 0x101
-				pr += float64(pru) * c.weight
+				pr += float64(float64(pru) * c.weight)
 			}
 			pr *= s.invTotalWeightFFFF
 			tmp[t] = [4]float64{
@@ -5808,10 +5808,10 @@
 				pru := uint32(src.Pix[pi+0]) * pau / 0xff
 				pgu := uint32(src.Pix[pi+1]) * pau / 0xff
 				pbu := uint32(src.Pix[pi+2]) * pau / 0xff
-				pr += float64(pru) * c.weight
-				pg += float64(pgu) * c.weight
-				pb += float64(pbu) * c.weight
-				pa += float64(pau) * c.weight
+				pr += float64(float64(pru) * c.weight)
+				pg += float64(float64(pgu) * c.weight)
+				pb += float64(float64(pbu) * c.weight)
+				pa += float64(float64(pau) * c.weight)
 			}
 			tmp[t] = [4]float64{
 				pr * s.invTotalWeightFFFF,
@@ -5835,10 +5835,10 @@
 				pgu := uint32(src.Pix[pi+1]) * 0x101
 				pbu := uint32(src.Pix[pi+2]) * 0x101
 				pau := uint32(src.Pix[pi+3]) * 0x101
-				pr += float64(pru) * c.weight
-				pg += float64(pgu) * c.weight
-				pb += float64(pbu) * c.weight
-				pa += float64(pau) * c.weight
+				pr += float64(float64(pru) * c.weight)
+				pg += float64(float64(pgu) * c.weight)
+				pb += float64(float64(pbu) * c.weight)
+				pa += float64(float64(pau) * c.weight)
 			}
 			tmp[t] = [4]float64{
 				pr * s.invTotalWeightFFFF,
@@ -5883,9 +5883,9 @@
 					pbu = 0xffff
 				}
 
-				pr += float64(pru) * c.weight
-				pg += float64(pgu) * c.weight
-				pb += float64(pbu) * c.weight
+				pr += float64(float64(pru) * c.weight)
+				pg += float64(float64(pgu) * c.weight)
+				pb += float64(float64(pbu) * c.weight)
 			}
 			tmp[t] = [4]float64{
 				pr * s.invTotalWeightFFFF,
@@ -5930,9 +5930,9 @@
 					pbu = 0xffff
 				}
 
-				pr += float64(pru) * c.weight
-				pg += float64(pgu) * c.weight
-				pb += float64(pbu) * c.weight
+				pr += float64(float64(pru) * c.weight)
+				pg += float64(float64(pgu) * c.weight)
+				pb += float64(float64(pbu) * c.weight)
 			}
 			tmp[t] = [4]float64{
 				pr * s.invTotalWeightFFFF,
@@ -5977,9 +5977,9 @@
 					pbu = 0xffff
 				}
 
-				pr += float64(pru) * c.weight
-				pg += float64(pgu) * c.weight
-				pb += float64(pbu) * c.weight
+				pr += float64(float64(pru) * c.weight)
+				pg += float64(float64(pgu) * c.weight)
+				pb += float64(float64(pbu) * c.weight)
 			}
 			tmp[t] = [4]float64{
 				pr * s.invTotalWeightFFFF,
@@ -6024,9 +6024,9 @@
 					pbu = 0xffff
 				}
 
-				pr += float64(pru) * c.weight
-				pg += float64(pgu) * c.weight
-				pb += float64(pbu) * c.weight
+				pr += float64(float64(pru) * c.weight)
+				pg += float64(float64(pgu) * c.weight)
+				pb += float64(float64(pbu) * c.weight)
 			}
 			tmp[t] = [4]float64{
 				pr * s.invTotalWeightFFFF,
@@ -6054,10 +6054,10 @@
 					pu.B = uint16(uint32(pu.B) * ma / 0xffff)
 					pu.A = uint16(uint32(pu.A) * ma / 0xffff)
 				}
-				pr += float64(pu.R) * c.weight
-				pg += float64(pu.G) * c.weight
-				pb += float64(pu.B) * c.weight
-				pa += float64(pu.A) * c.weight
+				pr += float64(float64(pu.R) * c.weight)
+				pg += float64(float64(pu.G) * c.weight)
+				pb += float64(float64(pu.B) * c.weight)
+				pa += float64(float64(pu.A) * c.weight)
 			}
 			tmp[t] = [4]float64{
 				pr * s.invTotalWeightFFFF,
@@ -6085,10 +6085,10 @@
 					pbu = pbu * ma / 0xffff
 					pau = pau * ma / 0xffff
 				}
-				pr += float64(pru) * c.weight
-				pg += float64(pgu) * c.weight
-				pb += float64(pbu) * c.weight
-				pa += float64(pau) * c.weight
+				pr += float64(float64(pru) * c.weight)
+				pg += float64(float64(pgu) * c.weight)
+				pb += float64(float64(pbu) * c.weight)
+				pa += float64(float64(pau) * c.weight)
 			}
 			tmp[t] = [4]float64{
 				pr * s.invTotalWeightFFFF,
@@ -6108,10 +6108,10 @@
 			var pr, pg, pb, pa float64
 			for _, c := range z.vertical.contribs[s.i:s.j] {
 				p := &tmp[c.coord*z.dw+dx]
-				pr += p[0] * c.weight
-				pg += p[1] * c.weight
-				pb += p[2] * c.weight
-				pa += p[3] * c.weight
+				pr += float64(p[0] * c.weight)
+				pg += float64(p[1] * c.weight)
+				pb += float64(p[2] * c.weight)
+				pa += float64(p[3] * c.weight)
 			}
 
 			if pr > pa {
@@ -6145,10 +6145,10 @@
 			var pr, pg, pb, pa float64
 			for _, c := range z.vertical.contribs[s.i:s.j] {
 				p := &tmp[c.coord*z.dw+dx]
-				pr += p[0] * c.weight
-				pg += p[1] * c.weight
-				pb += p[2] * c.weight
-				pa += p[3] * c.weight
+				pr += float64(p[0] * c.weight)
+				pg += float64(p[1] * c.weight)
+				pb += float64(p[2] * c.weight)
+				pa += float64(p[3] * c.weight)
 			}
 
 			if pr > pa {
@@ -6179,10 +6179,10 @@
 			var pr, pg, pb, pa float64
 			for _, c := range z.vertical.contribs[s.i:s.j] {
 				p := &tmp[c.coord*z.dw+dx]
-				pr += p[0] * c.weight
-				pg += p[1] * c.weight
-				pb += p[2] * c.weight
-				pa += p[3] * c.weight
+				pr += float64(p[0] * c.weight)
+				pg += float64(p[1] * c.weight)
+				pb += float64(p[2] * c.weight)
+				pa += float64(p[3] * c.weight)
 			}
 
 			if pr > pa {
@@ -6226,10 +6226,10 @@
 			var pr, pg, pb, pa float64
 			for _, c := range z.vertical.contribs[s.i:s.j] {
 				p := &tmp[c.coord*z.dw+dx]
-				pr += p[0] * c.weight
-				pg += p[1] * c.weight
-				pb += p[2] * c.weight
-				pa += p[3] * c.weight
+				pr += float64(p[0] * c.weight)
+				pg += float64(p[1] * c.weight)
+				pb += float64(p[2] * c.weight)
+				pa += float64(p[3] * c.weight)
 			}
 
 			if pr > pa {
@@ -6275,10 +6275,10 @@
 			var pr, pg, pb, pa float64
 			for _, c := range z.vertical.contribs[s.i:s.j] {
 				p := &tmp[c.coord*z.dw+dx]
-				pr += p[0] * c.weight
-				pg += p[1] * c.weight
-				pb += p[2] * c.weight
-				pa += p[3] * c.weight
+				pr += float64(p[0] * c.weight)
+				pg += float64(p[1] * c.weight)
+				pb += float64(p[2] * c.weight)
+				pa += float64(p[3] * c.weight)
 			}
 
 			if pr > pa {
@@ -6322,10 +6322,10 @@
 			var pr, pg, pb, pa float64
 			for _, c := range z.vertical.contribs[s.i:s.j] {
 				p := &tmp[c.coord*z.dw+dx]
-				pr += p[0] * c.weight
-				pg += p[1] * c.weight
-				pb += p[2] * c.weight
-				pa += p[3] * c.weight
+				pr += float64(p[0] * c.weight)
+				pg += float64(p[1] * c.weight)
+				pb += float64(p[2] * c.weight)
+				pa += float64(p[3] * c.weight)
 			}
 
 			if pr > pa {
@@ -6384,8 +6384,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -6447,7 +6447,7 @@
 						if w := xWeights[kx-ix] * yWeight; w != 0 {
 							pi := (ky-src.Rect.Min.Y)*src.Stride + (kx - src.Rect.Min.X)
 							pru := uint32(src.Pix[pi]) * 0x101
-							pr += float64(pru) * w
+							pr += float64(float64(pru) * w)
 						}
 					}
 				}
@@ -6483,8 +6483,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -6549,10 +6549,10 @@
 							pru := uint32(src.Pix[pi+0]) * pau / 0xff
 							pgu := uint32(src.Pix[pi+1]) * pau / 0xff
 							pbu := uint32(src.Pix[pi+2]) * pau / 0xff
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
-							pa += float64(pau) * w
+							pr += float64(float64(pru) * w)
+							pg += float64(float64(pgu) * w)
+							pb += float64(float64(pbu) * w)
+							pa += float64(float64(pau) * w)
 						}
 					}
 				}
@@ -6603,8 +6603,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -6669,10 +6669,10 @@
 							pru := uint32(src.Pix[pi+0]) * pau / 0xff
 							pgu := uint32(src.Pix[pi+1]) * pau / 0xff
 							pbu := uint32(src.Pix[pi+2]) * pau / 0xff
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
-							pa += float64(pau) * w
+							pr += float64(float64(pru) * w)
+							pg += float64(float64(pgu) * w)
+							pb += float64(float64(pbu) * w)
+							pa += float64(float64(pau) * w)
 						}
 					}
 				}
@@ -6718,8 +6718,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -6784,10 +6784,10 @@
 							pgu := uint32(src.Pix[pi+1]) * 0x101
 							pbu := uint32(src.Pix[pi+2]) * 0x101
 							pau := uint32(src.Pix[pi+3]) * 0x101
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
-							pa += float64(pau) * w
+							pr += float64(float64(pru) * w)
+							pg += float64(float64(pgu) * w)
+							pb += float64(float64(pbu) * w)
+							pa += float64(float64(pau) * w)
 						}
 					}
 				}
@@ -6838,8 +6838,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -6904,10 +6904,10 @@
 							pgu := uint32(src.Pix[pi+1]) * 0x101
 							pbu := uint32(src.Pix[pi+2]) * 0x101
 							pau := uint32(src.Pix[pi+3]) * 0x101
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
-							pa += float64(pau) * w
+							pr += float64(float64(pru) * w)
+							pg += float64(float64(pgu) * w)
+							pb += float64(float64(pbu) * w)
+							pa += float64(float64(pau) * w)
 						}
 					}
 				}
@@ -6953,8 +6953,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -7040,9 +7040,9 @@
 								pbu = 0xffff
 							}
 
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
+							pr += float64(float64(pru) * w)
+							pg += float64(float64(pgu) * w)
+							pb += float64(float64(pbu) * w)
 						}
 					}
 				}
@@ -7077,8 +7077,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -7164,9 +7164,9 @@
 								pbu = 0xffff
 							}
 
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
+							pr += float64(float64(pru) * w)
+							pg += float64(float64(pgu) * w)
+							pb += float64(float64(pbu) * w)
 						}
 					}
 				}
@@ -7201,8 +7201,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -7288,9 +7288,9 @@
 								pbu = 0xffff
 							}
 
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
+							pr += float64(float64(pru) * w)
+							pg += float64(float64(pgu) * w)
+							pb += float64(float64(pbu) * w)
 						}
 					}
 				}
@@ -7325,8 +7325,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -7412,9 +7412,9 @@
 								pbu = 0xffff
 							}
 
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
+							pr += float64(float64(pru) * w)
+							pg += float64(float64(pgu) * w)
+							pb += float64(float64(pbu) * w)
 						}
 					}
 				}
@@ -7449,8 +7449,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -7511,10 +7511,10 @@
 					for kx := ix; kx < jx; kx++ {
 						if w := xWeights[kx-ix] * yWeight; w != 0 {
 							pu := src.RGBA64At(kx, ky)
-							pr += float64(pu.R) * w
-							pg += float64(pu.G) * w
-							pb += float64(pu.B) * w
-							pa += float64(pu.A) * w
+							pr += float64(float64(pu.R) * w)
+							pg += float64(float64(pu.G) * w)
+							pb += float64(float64(pu.B) * w)
+							pa += float64(float64(pu.A) * w)
 						}
 					}
 				}
@@ -7565,8 +7565,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -7627,10 +7627,10 @@
 					for kx := ix; kx < jx; kx++ {
 						if w := xWeights[kx-ix] * yWeight; w != 0 {
 							pu := src.RGBA64At(kx, ky)
-							pr += float64(pu.R) * w
-							pg += float64(pu.G) * w
-							pb += float64(pu.B) * w
-							pa += float64(pu.A) * w
+							pr += float64(float64(pu.R) * w)
+							pg += float64(float64(pu.G) * w)
+							pb += float64(float64(pu.B) * w)
+							pa += float64(float64(pu.A) * w)
 						}
 					}
 				}
@@ -7676,8 +7676,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -7738,10 +7738,10 @@
 					for kx := ix; kx < jx; kx++ {
 						if w := xWeights[kx-ix] * yWeight; w != 0 {
 							pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
-							pa += float64(pau) * w
+							pr += float64(float64(pru) * w)
+							pg += float64(float64(pgu) * w)
+							pb += float64(float64(pbu) * w)
+							pa += float64(float64(pau) * w)
 						}
 					}
 				}
@@ -7792,8 +7792,8 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -7854,10 +7854,10 @@
 					for kx := ix; kx < jx; kx++ {
 						if w := xWeights[kx-ix] * yWeight; w != 0 {
 							pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
-							pa += float64(pau) * w
+							pr += float64(float64(pru) * w)
+							pg += float64(float64(pgu) * w)
+							pb += float64(float64(pbu) * w)
+							pa += float64(float64(pau) * w)
 						}
 					}
 				}
@@ -7906,8 +7906,8 @@
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -7975,10 +7975,10 @@
 								pu.B = uint16(uint32(pu.B) * ma / 0xffff)
 								pu.A = uint16(uint32(pu.A) * ma / 0xffff)
 							}
-							pr += float64(pu.R) * w
-							pg += float64(pu.G) * w
-							pb += float64(pu.B) * w
-							pa += float64(pu.A) * w
+							pr += float64(float64(pu.R) * w)
+							pg += float64(float64(pu.G) * w)
+							pb += float64(float64(pu.B) * w)
+							pa += float64(float64(pu.A) * w)
 						}
 					}
 				}
@@ -8041,8 +8041,8 @@
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -8110,10 +8110,10 @@
 								pu.B = uint16(uint32(pu.B) * ma / 0xffff)
 								pu.A = uint16(uint32(pu.A) * ma / 0xffff)
 							}
-							pr += float64(pu.R) * w
-							pg += float64(pu.G) * w
-							pb += float64(pu.B) * w
-							pa += float64(pu.A) * w
+							pr += float64(float64(pu.R) * w)
+							pg += float64(float64(pu.G) * w)
+							pb += float64(float64(pu.B) * w)
+							pa += float64(float64(pu.A) * w)
 						}
 					}
 				}
@@ -8178,8 +8178,8 @@
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -8247,10 +8247,10 @@
 								pbu = pbu * ma / 0xffff
 								pau = pau * ma / 0xffff
 							}
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
-							pa += float64(pau) * w
+							pr += float64(float64(pru) * w)
+							pg += float64(float64(pgu) * w)
+							pb += float64(float64(pbu) * w)
+							pa += float64(float64(pau) * w)
 						}
 					}
 				}
@@ -8313,8 +8313,8 @@
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2]
+			sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5]
 			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
@@ -8382,10 +8382,10 @@
 								pbu = pbu * ma / 0xffff
 								pau = pau * ma / 0xffff
 							}
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
-							pa += float64(pau) * w
+							pr += float64(float64(pru) * w)
+							pg += float64(float64(pgu) * w)
+							pb += float64(float64(pbu) * w)
+							pa += float64(float64(pau) * w)
 						}
 					}
 				}
diff --git a/draw/scale.go b/draw/scale.go
index ba1bdf3..aef200b 100644
--- a/draw/scale.go
+++ b/draw/scale.go
@@ -182,9 +182,9 @@
 	// Computer Graphics", Computer Graphics, Vol. 22, No. 4, pp. 221-228.
 	CatmullRom = &Kernel{2, func(t float64) float64 {
 		if t < 1 {
-			return (1.5*t-2.5)*t*t + 1
+			return float64((float64(1.5*t)-2.5)*t*t) + 1
 		}
-		return ((-0.5*t+2.5)*t-4)*t + 2
+		return float64((float64(float64(float64(-0.5*t)+2.5)*t)-4)*t) + 2
 	}}
 
 	// TODO: a Kaiser-Bessel kernel?
@@ -247,7 +247,7 @@
 	// source column or row.
 	n, sources := int32(0), make([]source, dw)
 	for x := range sources {
-		center := (float64(x)+0.5)*scale - 0.5
+		center := float64((float64(x)+0.5)*scale) - 0.5
 		i := int32(math.Floor(center - halfWidth))
 		if i < 0 {
 			i = 0
@@ -302,7 +302,7 @@
 
 // ftou converts the range [0.0, 1.0] to [0, 0xffff].
 func ftou(f float64) uint16 {
-	i := int32(0xffff*f + 0.5)
+	i := int32(float64(0xffff*f) + 0.5)
 	if i > 0xffff {
 		return 0xffff
 	}
@@ -332,12 +332,12 @@
 func invert(m *f64.Aff3) f64.Aff3 {
 	m00 := +m[3*1+1]
 	m01 := -m[3*0+1]
-	m02 := +m[3*1+2]*m[3*0+1] - m[3*1+1]*m[3*0+2]
+	m02 := +float64(m[3*1+2]*m[3*0+1]) - float64(m[3*1+1]*m[3*0+2])
 	m10 := -m[3*1+0]
 	m11 := +m[3*0+0]
-	m12 := +m[3*1+0]*m[3*0+2] - m[3*1+2]*m[3*0+0]
+	m12 := +float64(m[3*1+0]*m[3*0+2]) - float64(m[3*1+2]*m[3*0+0])
 
-	det := m00*m11 - m10*m01
+	det := float64(m00*m11) - float64(m10*m01)
 
 	return f64.Aff3{
 		m00 / det,
@@ -351,12 +351,12 @@
 
 func matMul(p, q *f64.Aff3) f64.Aff3 {
 	return f64.Aff3{
-		p[3*0+0]*q[3*0+0] + p[3*0+1]*q[3*1+0],
-		p[3*0+0]*q[3*0+1] + p[3*0+1]*q[3*1+1],
-		p[3*0+0]*q[3*0+2] + p[3*0+1]*q[3*1+2] + p[3*0+2],
-		p[3*1+0]*q[3*0+0] + p[3*1+1]*q[3*1+0],
-		p[3*1+0]*q[3*0+1] + p[3*1+1]*q[3*1+1],
-		p[3*1+0]*q[3*0+2] + p[3*1+1]*q[3*1+2] + p[3*1+2],
+		float64(p[3*0+0]*q[3*0+0]) + float64(p[3*0+1]*q[3*1+0]),
+		float64(p[3*0+0]*q[3*0+1]) + float64(p[3*0+1]*q[3*1+1]),
+		float64(p[3*0+0]*q[3*0+2]) + float64(p[3*0+1]*q[3*1+2]) + p[3*0+2],
+		float64(p[3*1+0]*q[3*0+0]) + float64(p[3*1+1]*q[3*1+0]),
+		float64(p[3*1+0]*q[3*0+1]) + float64(p[3*1+1]*q[3*1+1]),
+		float64(p[3*1+0]*q[3*0+2]) + float64(p[3*1+1]*q[3*1+2]) + p[3*1+2],
 	}
 }
 
@@ -371,8 +371,8 @@
 	for i, p := range ps {
 		sxf := float64(p.X)
 		syf := float64(p.Y)
-		dx := int(math.Floor(s2d[0]*sxf + s2d[1]*syf + s2d[2]))
-		dy := int(math.Floor(s2d[3]*sxf + s2d[4]*syf + s2d[5]))
+		dx := int(math.Floor(float64(s2d[0]*sxf) + float64(s2d[1]*syf) + s2d[2]))
+		dy := int(math.Floor(float64(s2d[3]*sxf) + float64(s2d[4]*syf) + s2d[5]))
 
 		// The +1 adjustments below are because an image.Rectangle is inclusive
 		// on the low end but exclusive on the high end.
@@ -428,8 +428,8 @@
 				d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy))
 				for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 					dxf := float64(dr.Min.X+int(dx)) + 0.5
-					sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-					sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+					sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+					sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 					if !(image.Point{sx0, sy0}).In(sr) {
 						continue
 					}
@@ -450,8 +450,8 @@
 				dyf := float64(dr.Min.Y+int(dy)) + 0.5
 				for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 					dxf := float64(dr.Min.X+int(dx)) + 0.5
-					sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-					sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+					sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+					sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 					if !(image.Point{sx0, sy0}).In(sr) {
 						continue
 					}
@@ -479,8 +479,8 @@
 				d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy))
 				for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 					dxf := float64(dr.Min.X+int(dx)) + 0.5
-					sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-					sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+					sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+					sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 					if !(image.Point{sx0, sy0}).In(sr) {
 						continue
 					}
@@ -505,8 +505,8 @@
 				dyf := float64(dr.Min.Y+int(dy)) + 0.5
 				for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 					dxf := float64(dr.Min.X+int(dx)) + 0.5
-					sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-					sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+					sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X
+					sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y
 					if !(image.Point{sx0, sy0}).In(sr) {
 						continue
 					}