Fix integer cube map sample HLSL calculation

This patch fixes two issues in the HLSL generated to sample an integer cube map.
The first issue was that an inappropriate major axis was selected when sampling
from corners of the cube map. In particular, the added test case demonstrates a
situation where a major axis of length 0 was selected, which lead to an infinite
LOD being calculated. The fix was to adjust inequalities such that always
exactly one of xMajor, yMajor and zMajor is true.
The second issue was that the derivative functions ddx and ddy were being used
on values that depended on the choice of major axis, which is not continuous at
the corners of a cube map. This lead to a finite but incorrect LOD being
calculated. The fix was to make sure major axis choice is not included when
estimating the scaling factor.

Bug: angleproject:3442
Change-Id: Ia3eb8c89f47d1bfdadc18aec989e8cbebc088ec0
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1601515
Commit-Queue: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
diff --git a/src/compiler/translator/TextureFunctionHLSL.cpp b/src/compiler/translator/TextureFunctionHLSL.cpp
index 4fc1fce..2ae9d17 100644
--- a/src/compiler/translator/TextureFunctionHLSL.cpp
+++ b/src/compiler/translator/TextureFunctionHLSL.cpp
@@ -627,8 +627,8 @@
         out << "    " << textureReference
             << ".GetDimensions(baseLevel + mip, width, height, layers, levels);\n";
 
-        out << "    bool xMajor = abs(t.x) > abs(t.y) && abs(t.x) > abs(t.z);\n";
-        out << "    bool yMajor = abs(t.y) > abs(t.z) && abs(t.y) > abs(t.x);\n";
+        out << "    bool xMajor = abs(t.x) >= abs(t.y) && abs(t.x) >= abs(t.z);\n";
+        out << "    bool yMajor = abs(t.y) >= abs(t.z) && abs(t.y) > abs(t.x);\n";
         out << "    bool zMajor = abs(t.z) > abs(t.x) && abs(t.z) > abs(t.y);\n";
         out << "    bool negative = (xMajor && t.x < 0.0f) || (yMajor && t.y < 0.0f) || "
                "(zMajor && t.z < 0.0f);\n";
@@ -645,6 +645,7 @@
         out << "    float v = yMajor ? t.z : (negative ? t.y : -t.y);\n";
         out << "    float m = xMajor ? t.x : (yMajor ? t.y : t.z);\n";
 
+        out << "    float3 r = any(t) ? t : float3(1, 0, 0);\n";
         out << "    t.x = (u * 0.5f / m) + 0.5f;\n";
         out << "    t.y = (v * 0.5f / m) + 0.5f;\n";
 
@@ -655,10 +656,76 @@
         {
             if (textureFunction.method == TextureFunctionHLSL::TextureFunction::IMPLICIT)
             {
-                out << "    float2 tSized = float2(t.x * width, t.y * height);\n"
-                       "    float2 dx = ddx(tSized);\n"
-                       "    float2 dy = ddy(tSized);\n"
-                       "    float lod = 0.5f * log2(max(dot(dx, dx), dot(dy, dy)));\n";
+                // We would like to calculate tha maximum of how many texels we move in the major
+                // face's texture as we move across the screen in any direction. Namely, we want the
+                // length of the directional derivative of the function p (defined below), maximized
+                // over screen space directions. (For short: we want the norm of Dp.) For
+                // simplicity, assume that z-axis is the major axis. By symmetry, we can assume that
+                // the positive z direction is major. (The calculated value will be the same even if
+                // this is false.) Let r denote the function from screen position to cube texture
+                // coordinates. Then p can be written as p = s . P . r, where P(r) = (r.x, r.y)/r.z
+                // is the projection onto the major cube face, and s = diag(width, height)/2. (s
+                // linearly maps from the cube face into texture space, so that p(r) is in units of
+                // texels.) The derivative is
+                // Dp(r) = s |1 0 -r.x/r.z|
+                //           |0 1 -r.y/r.z| |ddx(r) ddy(r)| / r.z
+                //       = |dot(a, ddx(r)) dot(a, ddy(r))|
+                //         |dot(b, ddx(r)) dot(b, ddy(r))| / (2 r.z)
+                // where a = w * vec3(1, 0, -r.x/r.z)
+                //       b = h * vec3(0, 1, -r.y/r.z)
+                // We would like to know max(L(x)) over unit vectors x, where L(x) = |Dp(r) x|^2.
+                // Since ddx(r) and ddy(r) are unknown, the best we can do is to sample L in some
+                // directions and take the maximum across the samples.
+                //
+                // Some implementations use max(L(n1), L(n2)) where n1 = vec2(1,0) and n2 =
+                // vec2(0,1).
+                //
+                // Some implementations use max(L(n1), L(n2), L(n3), L(n4)),
+                // where n3 = (n1 + n2) / |n1 + n2| = (n1 + n2)/sqrt(2)
+                //       n4 = (n1 - n2) / |n1 - n2| = (n1 - n2)/sqrt(2).
+                // In other words, two samples along the diagonal screen space directions have been
+                // added, giving a strictly better estimate of the true maximum.
+                //
+                // It turns out we can get twice the sample count very cheaply.
+                // We can use the linearity of Dp(r) to get these extra samples of L cheaply in
+                // terms of the already taken samples, L(n1) and L(n2):
+                // Denoting
+                // dpx = Dp(r)n1
+                // dpy = Dp(r)n2
+                // dpxx = dot(dpx, dpx)
+                // dpyy = dot(dpy, dpy)
+                // dpxy = dot(dpx, dpy)
+                // we obtain
+                // L(n3) = |Dp(r)n1 + Dp(r)n2|^2/2 = (dpxx + dpyy)/2 + dpxy
+                // L(n4) = |Dp(r)n1 - Dp(r)n2|^2/2 = (dpxx + dpyy)/2 - dpxy
+                // max(L(n1), L(n2), L(n3), L(n4))
+                // = max(max(L(n1), L(n2)), max(L(n3), L(n4)))
+                // = max(max(dpxx, dpyy), (dpxx + dpyy)/2 + abs(dpxy))
+                // So the extra cost is: one dot, one abs, one add, one multiply-add and one max.
+                // (All scalar.)
+                //
+                // In section 3.8.10.1, the OpenGL ES 3 specification defines the "scale factor",
+                // rho. In our terminology, this definition works out to taking sqrt(max(L(n1),
+                // L(n2))). Some implementations will use this estimate, here we use the strictly
+                // better sqrt(max(L(n1), L(n2), L(n3), L(n4))), since it's not much more expensive
+                // to calculate.
+
+                // Swap coordinates such that we can assume that the positive z-axis is major, in
+                // what follows.
+                out << "    float3 ddxr = xMajor ? ddx(r).yzx : yMajor ? ddx(r).zxy : ddx(r).xyz;\n"
+                       "    float3 ddyr = xMajor ? ddy(r).yzx : yMajor ? ddy(r).zxy : ddy(r).xyz;\n"
+                       "    r = xMajor ? r.yzx : yMajor ? r.zxy : r.xyz;\n";
+
+                out << "    float2 s = 0.5*float2(width, height);\n"
+                       "    float2 dpx = s * (ddxr.xy - ddxr.z*r.xy/r.z)/r.z;\n"
+                       "    float2 dpy = s * (ddyr.xy - ddyr.z*r.xy/r.z)/r.z;\n"
+                       "    float dpxx = dot(dpx, dpx);\n;"
+                       "    float dpyy = dot(dpy, dpy);\n;"
+                       "    float dpxy = dot(dpx, dpy);\n"
+                       "    float ma = max(dpxx, dpyy);\n"
+                       "    float mb = 0.5 * (dpxx + dpyy) + abs(dpxy);\n"
+                       "    float mab = max(ma, mb);\n"
+                       "    float lod = 0.5f * log2(mab);\n";
             }
             else if (textureFunction.method == TextureFunctionHLSL::TextureFunction::GRAD)
             {
diff --git a/src/tests/gl_tests/TextureTest.cpp b/src/tests/gl_tests/TextureTest.cpp
index f57be09..e747bf8 100644
--- a/src/tests/gl_tests/TextureTest.cpp
+++ b/src/tests/gl_tests/TextureTest.cpp
@@ -1231,6 +1231,38 @@
     GLint mTextureCubeUniformLocation;
 };
 
+class TextureCubeIntegerEdgeTestES3 : public TextureCubeIntegerTestES3
+{
+  protected:
+    TextureCubeIntegerEdgeTestES3() : TextureCubeIntegerTestES3() {}
+
+    const char *getVertexShaderSource() override
+    {
+        return "#version 300 es\n"
+               "out vec2 texcoord;\n"
+               "in vec4 position;\n"
+               "void main()\n"
+               "{\n"
+               "    gl_Position = vec4(position.xy, 0.0, 1.0);\n"
+               "    texcoord = position.xy;\n"
+               "}\n";
+    }
+
+    const char *getFragmentShaderSource() override
+    {
+        return "#version 300 es\n"
+               "precision highp float;\n"
+               "precision highp usamplerCube;\n"
+               "uniform usamplerCube texCube;\n"
+               "in vec2 texcoord;\n"
+               "out vec4 fragColor;\n"
+               "void main()\n"
+               "{\n"
+               "    fragColor = vec4(texture(texCube, vec3(texcoord, 0)))/255.0;\n"
+               "}\n";
+    }
+};
+
 TEST_P(Texture2DTest, NegativeAPISubImage)
 {
     glBindTexture(GL_TEXTURE_2D, mTexture2D);
@@ -4581,6 +4613,46 @@
     EXPECT_PIXEL_COLOR_EQ(width - 1, height - 1, color);
 }
 
+// This test sets up a cube map with four distincly colored MIP levels.
+// The size of the texture and the geometry is chosen such that levels 1 or 2 should be chosen at
+// the corners of the screen.
+TEST_P(TextureCubeIntegerEdgeTestES3, IntegerCubeTextureCorner)
+{
+    glActiveTexture(GL_TEXTURE0);
+
+    glBindTexture(GL_TEXTURE_CUBE_MAP, mTextureCube);
+    int width  = getWindowWidth();
+    int height = getWindowHeight();
+    ASSERT_EQ(width, height);
+    GLColor color[4] = {GLColor::white, GLColor::green, GLColor::blue, GLColor::red};
+    for (GLint level = 0; level < 4; level++)
+    {
+        for (GLenum faceIndex = 0; faceIndex < 6; faceIndex++)
+        {
+            int levelWidth  = (2 * width) >> level;
+            int levelHeight = (2 * height) >> level;
+            std::vector<GLColor> pixels(levelWidth * levelHeight, color[level]);
+            glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + faceIndex, level, GL_RGBA8UI, levelWidth,
+                         levelHeight, 0, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, pixels.data());
+            EXPECT_GL_NO_ERROR();
+        }
+    }
+    glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_NEAREST_MIPMAP_NEAREST);
+    glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+    glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAX_LEVEL, 3);
+
+    glUseProgram(mProgram);
+    glUniform1i(mTextureCubeUniformLocation, 0);
+    drawQuad(mProgram, "position", 0.5f);
+
+    ASSERT_GL_NO_ERROR();
+    // Check that we do not read from levels 0 or 3. Levels 1 and 2 are both acceptable.
+    EXPECT_EQ(ReadColor(0, 0).R, 0);
+    EXPECT_EQ(ReadColor(width - 1, 0).R, 0);
+    EXPECT_EQ(ReadColor(0, height - 1).R, 0);
+    EXPECT_EQ(ReadColor(width - 1, height - 1).R, 0);
+}
+
 // Use this to select which configurations (e.g. which renderer, which GLES major version) these
 // tests should be run against.
 ANGLE_INSTANTIATE_TEST(Texture2DTest,
@@ -4682,5 +4754,6 @@
 ANGLE_INSTANTIATE_TEST(TextureCubeTestES3, ES3_D3D11(), ES3_OPENGL(), ES3_OPENGLES());
 ANGLE_INSTANTIATE_TEST(Texture2DIntegerTestES3, ES3_D3D11(), ES3_OPENGL());
 ANGLE_INSTANTIATE_TEST(TextureCubeIntegerTestES3, ES3_D3D11(), ES3_OPENGL());
+ANGLE_INSTANTIATE_TEST(TextureCubeIntegerEdgeTestES3, ES3_D3D11(), ES3_OPENGL());
 
 }  // anonymous namespace