Vulkan: Add support for D32F_S8 depth_texture

Test: angle_deqp_khr_gles3_tests --deqp-egl-display-type=angle-vulkan --gtest_filter=dEQP.KHR_GLES3/packed_depth_stencil_verify_read_pixels_depth24_stencil8
angle_deqp_khr_gles3_tests --deqp-egl-display-type=angle-vulkan --gtest_filter=dEQP.KHR_GLES3/packed_depth_stencil_clear_buffer_depth32f_stencil8

Full passing results are blocked by suspected driver issues:
Bug: angleproject:3683
Bug: angleproject:3689

Bug: angleproject:3457
Change-Id: I7ce1a7824802ebca2c0479a3467fac26013829eb
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1704791
Commit-Queue: Courtney Goeltzenleuchter <courtneygo@google.com>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
diff --git a/src/image_util/loadimage.cpp b/src/image_util/loadimage.cpp
index adcd016..287bbf3 100644
--- a/src/image_util/loadimage.cpp
+++ b/src/image_util/loadimage.cpp
@@ -1292,6 +1292,58 @@
     }
 }
 
+void LoadX32S8ToS8(size_t width,
+                   size_t height,
+                   size_t depth,
+                   const uint8_t *input,
+                   size_t inputRowPitch,
+                   size_t inputDepthPitch,
+                   uint8_t *output,
+                   size_t outputRowPitch,
+                   size_t outputDepthPitch)
+{
+    for (size_t z = 0; z < depth; z++)
+    {
+        for (size_t y = 0; y < height; y++)
+        {
+            const uint32_t *source = reinterpret_cast<const uint32_t *>(
+                input + (y * inputRowPitch) + (z * inputDepthPitch));
+            uint8_t *destStencil =
+                reinterpret_cast<uint8_t *>(output + (y * outputRowPitch) + (z * outputDepthPitch));
+            for (size_t x = 0; x < width; x++)
+            {
+                destStencil[x] = (source[(x * 2) + 1] & 0xFF);
+            }
+        }
+    }
+}
+
+void LoadD32FS8X24ToD32F(size_t width,
+                         size_t height,
+                         size_t depth,
+                         const uint8_t *input,
+                         size_t inputRowPitch,
+                         size_t inputDepthPitch,
+                         uint8_t *output,
+                         size_t outputRowPitch,
+                         size_t outputDepthPitch)
+{
+    for (size_t z = 0; z < depth; z++)
+    {
+        for (size_t y = 0; y < height; y++)
+        {
+            const float *sourceDepth =
+                priv::OffsetDataPointer<float>(input, y, z, inputRowPitch, inputDepthPitch);
+            float *destDepth =
+                priv::OffsetDataPointer<float>(output, y, z, outputRowPitch, outputDepthPitch);
+            for (size_t x = 0; x < width; x++)
+            {
+                destDepth[x] = gl::clamp01(sourceDepth[x * 2]);
+            }
+        }
+    }
+}
+
 void LoadD32FS8X24ToD32FS8X24(size_t width,
                               size_t height,
                               size_t depth,
diff --git a/src/image_util/loadimage.h b/src/image_util/loadimage.h
index 5542e78..f5a3a01 100644
--- a/src/image_util/loadimage.h
+++ b/src/image_util/loadimage.h
@@ -425,6 +425,26 @@
                    size_t outputRowPitch,
                    size_t outputDepthPitch);
 
+void LoadX32S8ToS8(size_t width,
+                   size_t height,
+                   size_t depth,
+                   const uint8_t *input,
+                   size_t inputRowPitch,
+                   size_t inputDepthPitch,
+                   uint8_t *output,
+                   size_t outputRowPitch,
+                   size_t outputDepthPitch);
+
+void LoadD32FS8X24ToD32F(size_t width,
+                         size_t height,
+                         size_t depth,
+                         const uint8_t *input,
+                         size_t inputRowPitch,
+                         size_t inputDepthPitch,
+                         uint8_t *output,
+                         size_t outputRowPitch,
+                         size_t outputDepthPitch);
+
 void LoadD32FS8X24ToD32FS8X24(size_t width,
                               size_t height,
                               size_t depth,
diff --git a/src/libANGLE/renderer/vulkan/vk_helpers.cpp b/src/libANGLE/renderer/vulkan/vk_helpers.cpp
index 1f8eb7c..1d60608 100644
--- a/src/libANGLE/renderer/vulkan/vk_helpers.cpp
+++ b/src/libANGLE/renderer/vulkan/vk_helpers.cpp
@@ -2082,6 +2082,9 @@
     uint32_t bufferImageHeight;
     size_t allocationSize;
 
+    LoadImageFunctionInfo loadFunctionInfo = vkFormat.textureLoadFunctions(type);
+    LoadImageFunction stencilLoadFunction  = nullptr;
+
     if (storageFormat.isBlock)
     {
         const gl::InternalFormat &storageFormatInfo = vkFormat.getInternalFormatInfo(type);
@@ -2117,7 +2120,24 @@
     {
         ASSERT(storageFormat.pixelBytes != 0);
 
-        outputRowPitch   = storageFormat.pixelBytes * glExtents.width;
+        if (storageFormat.id == angle::FormatID::D24_UNORM_S8_UINT)
+        {
+            stencilLoadFunction = angle::LoadX24S8ToS8;
+        }
+        if (storageFormat.id == angle::FormatID::D32_FLOAT_S8X24_UINT)
+        {
+            // If depth is D32FLOAT_S8, we must pack D32F tightly (no stencil) for CopyBufferToImage
+            outputRowPitch = sizeof(float) * glExtents.width;
+
+            // The generic load functions don't handle tightly packing D32FS8 to D32F & S8 so call
+            // special case load functions.
+            loadFunctionInfo.loadFunction = angle::LoadD32FS8X24ToD32F;
+            stencilLoadFunction           = angle::LoadX32S8ToS8;
+        }
+        else
+        {
+            outputRowPitch = storageFormat.pixelBytes * glExtents.width;
+        }
         outputDepthPitch = outputRowPitch * glExtents.height;
 
         bufferRowLength   = glExtents.width;
@@ -2146,11 +2166,9 @@
 
     const uint8_t *source = pixels + static_cast<ptrdiff_t>(inputSkipBytes);
 
-    LoadImageFunctionInfo loadFunction = vkFormat.textureLoadFunctions(type);
-
-    loadFunction.loadFunction(glExtents.width, glExtents.height, glExtents.depth, source,
-                              inputRowPitch, inputDepthPitch, stagingPointer, outputRowPitch,
-                              outputDepthPitch);
+    loadFunctionInfo.loadFunction(glExtents.width, glExtents.height, glExtents.depth, source,
+                                  inputRowPitch, inputDepthPitch, stagingPointer, outputRowPitch,
+                                  outputDepthPitch);
 
     VkBufferImageCopy copy         = {};
     VkImageAspectFlags aspectFlags = GetFormatAspectFlags(vkFormat.imageFormat());
@@ -2189,9 +2207,10 @@
         outputRowPitch   = glExtents.width;
         outputDepthPitch = outputRowPitch * glExtents.height;
 
-        angle::LoadX24S8ToS8(glExtents.width, glExtents.height, glExtents.depth, source,
-                             inputRowPitch, inputDepthPitch, stagingPointer, outputRowPitch,
-                             outputDepthPitch);
+        ASSERT(stencilLoadFunction != nullptr);
+        stencilLoadFunction(glExtents.width, glExtents.height, glExtents.depth, source,
+                            inputRowPitch, inputDepthPitch, stagingPointer, outputRowPitch,
+                            outputDepthPitch);
 
         VkBufferImageCopy stencilCopy = {};
 
diff --git a/src/tests/deqp_support/deqp_khr_gles3_test_expectations.txt b/src/tests/deqp_support/deqp_khr_gles3_test_expectations.txt
index 8290449..93a1fe2 100644
--- a/src/tests/deqp_support/deqp_khr_gles3_test_expectations.txt
+++ b/src/tests/deqp_support/deqp_khr_gles3_test_expectations.txt
@@ -37,8 +37,7 @@
 
 // Depth/stencil related failures.
 3457 VULKAN ANDROID : KHR-GLES3.packed_depth_stencil.*.depth24_stencil8 = FAIL
-3457 VULKAN : KHR-GLES3.packed_depth_stencil.verify_read_pixels.depth32f_stencil8 = FAIL
-3457 VULKAN : KHR-GLES3.packed_depth_stencil.clear_buffer.depth32f_stencil8 = FAIL
+3457 VULKAN ANDROID : KHR-GLES3.packed_depth_stencil.*.depth32f_stencil8 = FAIL
 
 // CopyTexImage conversion missing 2D Array and 3D texture support.
 3458 VULKAN : KHR-GLES3.copy_tex_image_conversions.required.texture_array_texture2d = SKIP