Vulkan: Shader path for texture copy when image is not initialized

This change implements staging image/texture copies when the destination
image is not yet fully initialized.  With this change, CPU readback for
glCopyTex[Sub]Image2D and glCopy[Sub]TextureCHROMIUM should happen only
if the texture formats don't allow a fragment-shader based copy.

Bug: angleproject:2958
Change-Id: I04087e14ea8fb6fbc731598c5493e44651c22c01
Reviewed-on: https://chromium-review.googlesource.com/c/1393909
Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
diff --git a/src/libANGLE/renderer/vulkan/CommandGraph.cpp b/src/libANGLE/renderer/vulkan/CommandGraph.cpp
index c22e1bc..6008574 100644
--- a/src/libANGLE/renderer/vulkan/CommandGraph.cpp
+++ b/src/libANGLE/renderer/vulkan/CommandGraph.cpp
@@ -205,7 +205,7 @@
     CommandGraphNode *readingNode = readingResource->mCurrentWritingNode;
     ASSERT(readingNode);
 
-    if (hasChildlessWritingNode())
+    if (mCurrentWritingNode)
     {
         // Ensure 'readingNode' happens after the current writing node.
         CommandGraphNode::SetHappensBeforeDependency(mCurrentWritingNode, readingNode);
diff --git a/src/libANGLE/renderer/vulkan/RenderbufferVk.cpp b/src/libANGLE/renderer/vulkan/RenderbufferVk.cpp
index f92b34f..c5ac794 100644
--- a/src/libANGLE/renderer/vulkan/RenderbufferVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RenderbufferVk.cpp
@@ -70,7 +70,7 @@
             (isDepthOrStencilFormat ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : 0);
 
         gl::Extents extents(static_cast<int>(width), static_cast<int>(height), 1);
-        ANGLE_TRY(mImage.init(contextVk, gl::TextureType::_2D, extents, vkFormat, 1, usage, 1));
+        ANGLE_TRY(mImage.init(contextVk, gl::TextureType::_2D, extents, vkFormat, 1, usage, 1, 1));
 
         VkMemoryPropertyFlags flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
         ANGLE_TRY(mImage.initMemory(contextVk, renderer->getMemoryProperties(), flags));
diff --git a/src/libANGLE/renderer/vulkan/SurfaceVk.cpp b/src/libANGLE/renderer/vulkan/SurfaceVk.cpp
index 4f9e252..317a058 100644
--- a/src/libANGLE/renderer/vulkan/SurfaceVk.cpp
+++ b/src/libANGLE/renderer/vulkan/SurfaceVk.cpp
@@ -93,7 +93,7 @@
                                                            : kSurfaceVKColorImageUsageFlags;
 
     gl::Extents extents(static_cast<int>(width), static_cast<int>(height), 1);
-    ANGLE_TRY(image.init(displayVk, gl::TextureType::_2D, extents, vkFormat, 1, usage, 1));
+    ANGLE_TRY(image.init(displayVk, gl::TextureType::_2D, extents, vkFormat, 1, usage, 1, 1));
 
     VkMemoryPropertyFlags flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
     ANGLE_TRY(image.initMemory(displayVk, renderer->getMemoryProperties(), flags));
@@ -513,7 +513,7 @@
         const VkImageUsageFlags dsUsage = kSurfaceVKDepthStencilImageUsageFlags;
 
         ANGLE_TRY(mDepthStencilImage.init(displayVk, gl::TextureType::_2D, extents, dsFormat, 1,
-                                          dsUsage, 1));
+                                          dsUsage, 1, 1));
         ANGLE_TRY(mDepthStencilImage.initMemory(displayVk, renderer->getMemoryProperties(),
                                                 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
 
diff --git a/src/libANGLE/renderer/vulkan/TextureVk.cpp b/src/libANGLE/renderer/vulkan/TextureVk.cpp
index 36422e4..2047646 100644
--- a/src/libANGLE/renderer/vulkan/TextureVk.cpp
+++ b/src/libANGLE/renderer/vulkan/TextureVk.cpp
@@ -22,9 +22,12 @@
 namespace
 {
 constexpr VkBufferUsageFlags kStagingBufferFlags =
-    (VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+    VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
 constexpr size_t kStagingBufferSize = 1024 * 16;
 
+constexpr VkImageUsageFlags kStagingImageFlags =
+    VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+
 constexpr VkFormatFeatureFlags kBlitFeatureFlags =
     VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
 
@@ -37,6 +40,37 @@
            renderer->hasTextureFormatFeatureBits(destFormat.vkTextureFormat,
                                                  VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT);
 }
+
+bool ForceCpuPathForCopy(RendererVk *renderer, vk::ImageHelper *image)
+{
+    return image->getLayerCount() > 1 && renderer->getFeatures().forceCpuPathForCubeMapCopy;
+}
+
+gl::TextureType Get2DTextureType(uint32_t layerCount, GLint samples)
+{
+    if (layerCount > 1)
+    {
+        if (samples > 1)
+        {
+            return gl::TextureType::_2DMultisampleArray;
+        }
+        else
+        {
+            return gl::TextureType::_2DArray;
+        }
+    }
+    else
+    {
+        if (samples > 1)
+        {
+            return gl::TextureType::_2DMultisample;
+        }
+        else
+        {
+            return gl::TextureType::_2D;
+        }
+    }
+}
 }  // anonymous namespace
 
 // StagingStorage implementation.
@@ -52,21 +86,34 @@
 
 void PixelBuffer::release(RendererVk *renderer)
 {
+    // Remove updates that never made it to the texture.
+    for (SubresourceUpdate &update : mSubresourceUpdates)
+    {
+        update.release(renderer);
+    }
     mStagingBuffer.release(renderer);
+    mSubresourceUpdates.clear();
 }
 
-void PixelBuffer::removeStagedUpdates(const gl::ImageIndex &index)
+void PixelBuffer::removeStagedUpdates(RendererVk *renderer, const gl::ImageIndex &index)
 {
     // Find any staged updates for this index and removes them from the pending list.
-    uint32_t levelIndex    = static_cast<uint32_t>(index.getLevelIndex());
-    uint32_t layerIndex    = static_cast<uint32_t>(index.getLayerIndex());
-    auto removeIfStatement = [levelIndex, layerIndex](SubresourceUpdate &update) {
-        return update.copyRegion.imageSubresource.mipLevel == levelIndex &&
-               update.copyRegion.imageSubresource.baseArrayLayer == layerIndex;
-    };
-    mSubresourceUpdates.erase(
-        std::remove_if(mSubresourceUpdates.begin(), mSubresourceUpdates.end(), removeIfStatement),
-        mSubresourceUpdates.end());
+    uint32_t levelIndex = index.getLevelIndex();
+    uint32_t layerIndex = index.hasLayer() ? index.getLayerIndex() : 0;
+
+    for (size_t index = 0; index < mSubresourceUpdates.size();)
+    {
+        auto update = mSubresourceUpdates.begin() + index;
+        if (update->isUpdateToLayerLevel(layerIndex, levelIndex))
+        {
+            update->release(renderer);
+            mSubresourceUpdates.erase(update);
+        }
+        else
+        {
+            index++;
+        }
+    }
 }
 
 angle::Result PixelBuffer::stageSubresourceUpdate(ContextVk *contextVk,
@@ -232,6 +279,24 @@
     return angle::Result::Continue;
 }
 
+void PixelBuffer::stageSubresourceUpdateFromImage(vk::ImageHelper *image,
+                                                  const gl::ImageIndex &index,
+                                                  const gl::Offset &destOffset,
+                                                  const gl::Extents &extents)
+{
+    VkImageCopy copyToImage                   = {};
+    copyToImage.srcSubresource.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT;
+    copyToImage.srcSubresource.layerCount     = index.getLayerCount();
+    copyToImage.dstSubresource.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT;
+    copyToImage.dstSubresource.mipLevel       = index.getLevelIndex();
+    copyToImage.dstSubresource.baseArrayLayer = index.hasLayer() ? index.getLayerIndex() : 0;
+    copyToImage.dstSubresource.layerCount     = index.getLayerCount();
+    gl_vk::GetOffset(destOffset, &copyToImage.dstOffset);
+    gl_vk::GetExtent(extents, &copyToImage.extent);
+
+    mSubresourceUpdates.emplace_back(image, copyToImage);
+}
+
 angle::Result PixelBuffer::allocate(ContextVk *contextVk,
                                     size_t sizeInBytes,
                                     uint8_t **ptrOut,
@@ -253,15 +318,21 @@
         return angle::Result::Continue;
     }
 
+    RendererVk *renderer = contextVk->getRenderer();
+
     ANGLE_TRY(mStagingBuffer.flush(contextVk));
 
     std::vector<SubresourceUpdate> updatesToKeep;
 
-    for (const SubresourceUpdate &update : mSubresourceUpdates)
+    for (SubresourceUpdate &update : mSubresourceUpdates)
     {
-        ASSERT(update.bufferHandle != VK_NULL_HANDLE);
+        ASSERT((update.updateSource == SubresourceUpdate::UpdateSource::Buffer &&
+                update.buffer.bufferHandle != VK_NULL_HANDLE) ||
+               (update.updateSource == SubresourceUpdate::UpdateSource::Image &&
+                update.image.image != nullptr && update.image.image->valid()));
 
-        const uint32_t updateMipLevel = update.copyRegion.imageSubresource.mipLevel;
+        const uint32_t updateMipLevel = update.dstSubresource().mipLevel;
+
         // It's possible we've accumulated updates that are no longer applicable if the image has
         // never been flushed but the image description has changed. Check if this level exist for
         // this image.
@@ -273,14 +344,36 @@
 
         // Conservatively flush all writes to the image. We could use a more restricted barrier.
         // Do not move this above the for loop, otherwise multiple updates can have race conditions
-        // and not be applied correctly as seen i:
+        // and not be applied correctly as seen in:
         // dEQP-gles2.functional_texture_specification_texsubimage2d_align_2d* tests on Windows AMD
         image->changeLayoutWithStages(
             VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
             VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, commandBuffer);
 
-        commandBuffer->copyBufferToImage(update.bufferHandle, image->getImage(),
-                                         image->getCurrentLayout(), 1, &update.copyRegion);
+        if (update.updateSource == SubresourceUpdate::UpdateSource::Buffer)
+        {
+            commandBuffer->copyBufferToImage(update.buffer.bufferHandle, image->getImage(),
+                                             image->getCurrentLayout(), 1,
+                                             &update.buffer.copyRegion);
+        }
+        else
+        {
+            // Note: currently, the staging images are only made through color attachment writes. If
+            // they were written to otherwise in the future, the src stage of this transition should
+            // be adjusted appropriately.
+            update.image.image->changeLayoutWithStages(
+                VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                commandBuffer);
+
+            update.image.image->addReadDependency(image);
+
+            commandBuffer->copyImage(update.image.image->getImage(),
+                                     update.image.image->getCurrentLayout(), image->getImage(),
+                                     image->getCurrentLayout(), 1, &update.image.copyRegion);
+        }
+
+        update.release(renderer);
     }
 
     // Only remove the updates that were actually applied to the image.
@@ -292,7 +385,7 @@
     }
     else
     {
-        WARN() << "Internal Vulkan bufffer could not be released. This is likely due to having "
+        WARN() << "Internal Vulkan buffer could not be released. This is likely due to having "
                   "extra images defined in the Texture.";
     }
 
@@ -381,14 +474,48 @@
     return angle::Result::Continue;
 }
 
-PixelBuffer::SubresourceUpdate::SubresourceUpdate() : bufferHandle(VK_NULL_HANDLE) {}
+PixelBuffer::SubresourceUpdate::SubresourceUpdate()
+    : updateSource(UpdateSource::Buffer), buffer{VK_NULL_HANDLE}
+{}
 
 PixelBuffer::SubresourceUpdate::SubresourceUpdate(VkBuffer bufferHandleIn,
                                                   const VkBufferImageCopy &copyRegionIn)
-    : bufferHandle(bufferHandleIn), copyRegion(copyRegionIn)
+    : updateSource(UpdateSource::Buffer), buffer{bufferHandleIn, copyRegionIn}
 {}
 
-PixelBuffer::SubresourceUpdate::SubresourceUpdate(const SubresourceUpdate &other) = default;
+PixelBuffer::SubresourceUpdate::SubresourceUpdate(vk::ImageHelper *imageIn,
+                                                  const VkImageCopy &copyRegionIn)
+    : updateSource(UpdateSource::Image), image{imageIn, copyRegionIn}
+{}
+
+PixelBuffer::SubresourceUpdate::SubresourceUpdate(const SubresourceUpdate &other)
+    : updateSource(other.updateSource)
+{
+    if (updateSource == UpdateSource::Buffer)
+    {
+        buffer = other.buffer;
+    }
+    else
+    {
+        image = other.image;
+    }
+}
+
+void PixelBuffer::SubresourceUpdate::release(RendererVk *renderer)
+{
+    if (updateSource == UpdateSource::Image)
+    {
+        image.image->release(renderer);
+        SafeDelete(image.image);
+    }
+}
+
+bool PixelBuffer::SubresourceUpdate::isUpdateToLayerLevel(uint32_t layerIndex,
+                                                          uint32_t levelIndex) const
+{
+    const VkImageSubresourceLayers &dst = dstSubresource();
+    return dst.baseArrayLayer == layerIndex && dst.mipLevel == levelIndex;
+}
 
 // TextureVk implementation.
 TextureVk::TextureVk(const gl::TextureState &state, RendererVk *renderer)
@@ -584,13 +711,10 @@
     const vk::Format &srcFormat  = framebufferVk->getColorReadRenderTarget()->getImageFormat();
     const vk::Format &destFormat = renderer->getFormat(internalFormat.sizedInternalFormat);
 
-    // TODO(syoussefi): Support draw path for when !mImage.valid().  http://anglebug.com/2958
-    bool canDraw = mImage.valid() && CanCopyWithDraw(renderer, srcFormat, destFormat);
-    bool forceCpuPath =
-        mImage.getLayerCount() > 1 && renderer->getFeatures().forceCpuPathForCubeMapCopy;
+    bool forceCpuPath = ForceCpuPathForCopy(renderer, &mImage);
 
     // If it's possible to perform the copy with a draw call, do that.
-    if (canDraw && !forceCpuPath)
+    if (CanCopyWithDraw(renderer, srcFormat, destFormat) && !forceCpuPath)
     {
         RenderTargetVk *colorReadRT = framebufferVk->getColorReadRenderTarget();
         bool isViewportFlipY        = contextVk->isViewportFlipEnabledForDrawFBO();
@@ -599,10 +723,9 @@
         ASSERT(index.getLayerCount() == 1);
 
         ANGLE_TRY(copySubImageImplWithDraw(
-            contextVk, index, modifiedDestOffset, 0, clippedSourceArea, isViewportFlipY, false,
-            false, false, &colorReadRT->getImage(), colorReadRT->getReadImageView()));
+            contextVk, index, modifiedDestOffset, destFormat, 0, clippedSourceArea, isViewportFlipY,
+            false, false, false, &colorReadRT->getImage(), colorReadRT->getReadImageView()));
 
-        framebufferVk->getFramebuffer()->addReadDependency(&mImage);
         return angle::Result::Continue;
     }
 
@@ -635,20 +758,16 @@
     const vk::Format &sourceVkFormat = source->getImage().getFormat();
     const vk::Format &destVkFormat   = renderer->getFormat(destFormat.sizedInternalFormat);
 
-    // TODO(syoussefi): Support draw path for when !mImage.valid().  http://anglebug.com/2958
-    bool canDraw = mImage.valid() && CanCopyWithDraw(renderer, sourceVkFormat, destVkFormat);
-    bool forceCpuPath =
-        mImage.getLayerCount() > 1 && renderer->getFeatures().forceCpuPathForCubeMapCopy;
+    bool forceCpuPath = ForceCpuPathForCopy(renderer, &mImage);
 
     // If it's possible to perform the copy with a draw call, do that.
-    if (canDraw && !forceCpuPath)
+    if (CanCopyWithDraw(renderer, sourceVkFormat, destVkFormat) && !forceCpuPath)
     {
-        ANGLE_TRY(copySubImageImplWithDraw(contextVk, index, destOffset, sourceLevel, sourceArea,
-                                           false, unpackFlipY, unpackPremultiplyAlpha,
+        ANGLE_TRY(copySubImageImplWithDraw(contextVk, index, destOffset, destVkFormat, sourceLevel,
+                                           sourceArea, false, unpackFlipY, unpackPremultiplyAlpha,
                                            unpackUnmultiplyAlpha, &source->getImage(),
                                            &source->getReadImageView()));
 
-        source->getImage().addReadDependency(&mImage);
         return angle::Result::Continue;
     }
 
@@ -707,6 +826,7 @@
 angle::Result TextureVk::copySubImageImplWithDraw(ContextVk *contextVk,
                                                   const gl::ImageIndex &index,
                                                   const gl::Offset &destOffset,
+                                                  const vk::Format &destFormat,
                                                   size_t sourceLevel,
                                                   const gl::Rectangle &sourceArea,
                                                   bool isSrcFlipY,
@@ -716,9 +836,9 @@
                                                   vk::ImageHelper *srcImage,
                                                   const vk::ImageView *srcView)
 {
-    ANGLE_TRY(ensureImageInitialized(contextVk));
-
-    UtilsVk &utilsVk = contextVk->getRenderer()->getUtils();
+    RendererVk *renderer      = contextVk->getRenderer();
+    UtilsVk &utilsVk          = renderer->getUtils();
+    Serial currentQueueSerial = renderer->getCurrentQueueSerial();
 
     UtilsVk::CopyImageParameters params;
     params.srcOffset[0]        = sourceArea.x;
@@ -738,14 +858,62 @@
     uint32_t baseLayer  = index.hasLayer() ? index.getLayerIndex() : 0;
     uint32_t layerCount = index.getLayerCount();
 
-    for (uint32_t layerIndex = 0; layerIndex < layerCount; ++layerIndex)
+    // If destination is valid, copy the source directly into it.
+    if (mImage.valid())
     {
-        params.srcLayer = layerIndex;
+        // Make sure any updates to the image are already flushed.
+        ANGLE_TRY(ensureImageInitialized(contextVk));
 
-        vk::ImageView *destView;
-        ANGLE_TRY(getLayerLevelDrawImageView(contextVk, baseLayer + layerIndex, level, &destView));
+        for (uint32_t layerIndex = 0; layerIndex < layerCount; ++layerIndex)
+        {
+            params.srcLayer = layerIndex;
 
-        ANGLE_TRY(utilsVk.copyImage(contextVk, &mImage, destView, srcImage, srcView, params));
+            vk::ImageView *destView;
+            ANGLE_TRY(
+                getLayerLevelDrawImageView(contextVk, baseLayer + layerIndex, level, &destView));
+
+            ANGLE_TRY(utilsVk.copyImage(contextVk, &mImage, destView, srcImage, srcView, params));
+        }
+    }
+    else
+    {
+        std::unique_ptr<vk::ImageHelper> stagingImage;
+
+        GLint samples                      = srcImage->getSamples();
+        gl::TextureType stagingTextureType = Get2DTextureType(layerCount, samples);
+
+        // Create a temporary image to stage the copy
+        stagingImage = std::make_unique<vk::ImageHelper>();
+
+        ANGLE_TRY(stagingImage->init2DStaging(contextVk, renderer->getMemoryProperties(),
+                                              gl::Extents(sourceArea.width, sourceArea.height, 1),
+                                              destFormat, kStagingImageFlags, layerCount));
+
+        params.destOffset[0] = 0;
+        params.destOffset[1] = 0;
+
+        for (uint32_t layerIndex = 0; layerIndex < layerCount; ++layerIndex)
+        {
+            params.srcLayer = layerIndex;
+
+            // Create a temporary view for this layer.
+            vk::ImageView stagingView;
+            ANGLE_TRY(stagingImage->initLayerImageView(
+                contextVk, stagingTextureType, VK_IMAGE_ASPECT_COLOR_BIT, gl::SwizzleState(),
+                &stagingView, 0, 1, layerIndex, 1));
+
+            ANGLE_TRY(utilsVk.copyImage(contextVk, stagingImage.get(), &stagingView, srcImage,
+                                        srcView, params));
+
+            // Queue the resource for cleanup as soon as the copy above is finished.  There's no
+            // need to keep it around.
+            renderer->releaseObject(currentQueueSerial, &stagingView);
+        }
+
+        // Stage the copy for when the image storage is actually created.
+        mPixelBuffer.stageSubresourceUpdateFromImage(
+            stagingImage.release(), index, destOffset,
+            gl::Extents(sourceArea.width, sourceArea.height, 1));
     }
 
     return angle::Result::Continue;
@@ -799,7 +967,7 @@
 
     // If there is any staged changes for this index, we can remove them since we're going to
     // override them with this call.
-    mPixelBuffer.removeStagedUpdates(index);
+    mPixelBuffer.removeStagedUpdates(renderer, index);
 
     if (mImage.valid())
     {
@@ -874,7 +1042,7 @@
     ContextVk *contextVk = vk::GetImpl(context);
 
     const gl::Extents baseLevelExtents = mImage.getExtents();
-    uint32_t imageLayerCount           = GetImageLayerCount(mState.getType());
+    uint32_t imageLayerCount           = mImage.getLayerCount();
 
     uint8_t *imageData = nullptr;
     gl::Rectangle imageArea(0, 0, baseLevelExtents.width, baseLevelExtents.height);
@@ -1163,7 +1331,8 @@
         (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
          VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT);
 
-    ANGLE_TRY(mImage.init(contextVk, mState.getType(), extents, format, 1, usage, levelCount));
+    ANGLE_TRY(mImage.init(contextVk, mState.getType(), extents, format, 1, usage, levelCount,
+                          mState.getType() == gl::TextureType::CubeMap ? gl::kCubeFaceCount : 1));
 
     const VkMemoryPropertyFlags flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
 
diff --git a/src/libANGLE/renderer/vulkan/TextureVk.h b/src/libANGLE/renderer/vulkan/TextureVk.h
index 8fd1315..65584a9 100644
--- a/src/libANGLE/renderer/vulkan/TextureVk.h
+++ b/src/libANGLE/renderer/vulkan/TextureVk.h
@@ -26,7 +26,7 @@
 
     void release(RendererVk *renderer);
 
-    void removeStagedUpdates(const gl::ImageIndex &index);
+    void removeStagedUpdates(RendererVk *renderer, const gl::ImageIndex &index);
 
     angle::Result stageSubresourceUpdate(ContextVk *contextVk,
                                          const gl::ImageIndex &index,
@@ -52,6 +52,11 @@
                                                         const gl::InternalFormat &formatInfo,
                                                         FramebufferVk *framebufferVk);
 
+    void stageSubresourceUpdateFromImage(vk::ImageHelper *image,
+                                         const gl::ImageIndex &index,
+                                         const gl::Offset &destOffset,
+                                         const gl::Extents &extents);
+
     // This will use the underlying dynamic buffer to allocate some memory to be used as a src or
     // dst.
     angle::Result allocate(ContextVk *contextVk,
@@ -73,10 +78,40 @@
     {
         SubresourceUpdate();
         SubresourceUpdate(VkBuffer bufferHandle, const VkBufferImageCopy &copyRegion);
+        SubresourceUpdate(vk::ImageHelper *image, const VkImageCopy &copyRegion);
         SubresourceUpdate(const SubresourceUpdate &other);
 
-        VkBuffer bufferHandle;
-        VkBufferImageCopy copyRegion;
+        void release(RendererVk *renderer);
+
+        const VkImageSubresourceLayers &dstSubresource() const
+        {
+            return updateSource == UpdateSource::Buffer ? buffer.copyRegion.imageSubresource
+                                                        : image.copyRegion.dstSubresource;
+        }
+        bool isUpdateToLayerLevel(uint32_t layerIndex, uint32_t levelIndex) const;
+
+        enum class UpdateSource
+        {
+            Buffer,
+            Image,
+        };
+        struct BufferUpdate
+        {
+            VkBuffer bufferHandle;
+            VkBufferImageCopy copyRegion;
+        };
+        struct ImageUpdate
+        {
+            vk::ImageHelper *image;
+            VkImageCopy copyRegion;
+        };
+
+        UpdateSource updateSource;
+        union
+        {
+            BufferUpdate buffer;
+            ImageUpdate image;
+        };
     };
 
     vk::DynamicBuffer mStagingBuffer;
@@ -258,6 +293,7 @@
     angle::Result copySubImageImplWithDraw(ContextVk *contextVk,
                                            const gl::ImageIndex &index,
                                            const gl::Offset &destOffset,
+                                           const vk::Format &destFormat,
                                            size_t sourceLevel,
                                            const gl::Rectangle &sourceArea,
                                            bool isSrcFlipY,
diff --git a/src/libANGLE/renderer/vulkan/UtilsVk.cpp b/src/libANGLE/renderer/vulkan/UtilsVk.cpp
index 5fae062..a59c2cf 100644
--- a/src/libANGLE/renderer/vulkan/UtilsVk.cpp
+++ b/src/libANGLE/renderer/vulkan/UtilsVk.cpp
@@ -716,12 +716,16 @@
     renderArea.height = params.srcExtents[1];
 
     // Change source layout outside render pass
-    vk::CommandBuffer *srcLayoutChange;
-    ANGLE_TRY(src->recordCommands(context, &srcLayoutChange));
+    if (src->getCurrentLayout() != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)
+    {
+        vk::CommandBuffer *srcLayoutChange;
+        ANGLE_TRY(src->recordCommands(context, &srcLayoutChange));
 
-    src->changeLayoutWithStages(VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
-                                VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
-                                VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, srcLayoutChange);
+        src->changeLayoutWithStages(VK_IMAGE_ASPECT_COLOR_BIT,
+                                    VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                                    VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                                    VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, srcLayoutChange);
+    }
 
     // Change destination layout outside render pass as well
     vk::CommandBuffer *destLayoutChange;
diff --git a/src/libANGLE/renderer/vulkan/vk_helpers.cpp b/src/libANGLE/renderer/vulkan/vk_helpers.cpp
index acf9e8b..8f62394 100644
--- a/src/libANGLE/renderer/vulkan/vk_helpers.cpp
+++ b/src/libANGLE/renderer/vulkan/vk_helpers.cpp
@@ -28,22 +28,6 @@
 // This is an arbitrary max. We can change this later if necessary.
 constexpr uint32_t kDefaultDescriptorPoolMaxSets = 2048;
 
-VkImageUsageFlags GetStagingImageUsageFlags(StagingUsage usage)
-{
-    switch (usage)
-    {
-        case StagingUsage::Read:
-            return VK_IMAGE_USAGE_TRANSFER_DST_BIT;
-        case StagingUsage::Write:
-            return VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
-        case StagingUsage::Both:
-            return (VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
-        default:
-            UNREACHABLE();
-            return 0;
-    }
-}
-
 // Gets access flags based on layout.
 VkAccessFlags GetSrcLayoutAccessFlags(VkImageLayout layout)
 {
@@ -1119,14 +1103,21 @@
                                 const Format &format,
                                 GLint samples,
                                 VkImageUsageFlags usage,
-                                uint32_t mipLevels)
+                                uint32_t mipLevels,
+                                uint32_t layerCount)
 {
     ASSERT(!valid());
 
+    // Validate that the input layerCount is compatible with the texture type
+    ASSERT(textureType != gl::TextureType::_3D || layerCount == 1);
+    ASSERT(textureType != gl::TextureType::External || layerCount == 1);
+    ASSERT(textureType != gl::TextureType::Rectangle || layerCount == 1);
+    ASSERT(textureType != gl::TextureType::CubeMap || layerCount == gl::kCubeFaceCount);
+
     mExtents    = extents;
     mFormat     = &format;
     mSamples    = samples;
-    mLayerCount = GetImageLayerCount(textureType);
+    mLayerCount = layerCount;
     mLevelCount = mipLevels;
 
     VkImageCreateInfo imageInfo     = {};
@@ -1251,22 +1242,20 @@
 
 angle::Result ImageHelper::init2DStaging(Context *context,
                                          const MemoryProperties &memoryProperties,
-                                         const Format &format,
                                          const gl::Extents &extents,
-                                         StagingUsage usage)
+                                         const Format &format,
+                                         VkImageUsageFlags usage,
+                                         uint32_t layerCount)
 {
     ASSERT(!valid());
 
     mExtents    = extents;
     mFormat     = &format;
     mSamples    = 1;
-    mLayerCount = 1;
+    mLayerCount = layerCount;
     mLevelCount = 1;
 
-    // Use Preinitialized for writable staging images - in these cases we want to map the memory
-    // before we do a copy. For readback images, use an undefined layout.
-    mCurrentLayout =
-        usage == StagingUsage::Read ? VK_IMAGE_LAYOUT_UNDEFINED : VK_IMAGE_LAYOUT_PREINITIALIZED;
+    mCurrentLayout = VK_IMAGE_LAYOUT_UNDEFINED;
 
     VkImageCreateInfo imageInfo     = {};
     imageInfo.sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
@@ -1277,10 +1266,10 @@
     imageInfo.extent.height         = static_cast<uint32_t>(extents.height);
     imageInfo.extent.depth          = 1;
     imageInfo.mipLevels             = 1;
-    imageInfo.arrayLayers           = 1;
+    imageInfo.arrayLayers           = mLayerCount;
     imageInfo.samples               = gl_vk::GetSamples(mSamples);
-    imageInfo.tiling                = VK_IMAGE_TILING_LINEAR;
-    imageInfo.usage                 = GetStagingImageUsageFlags(usage);
+    imageInfo.tiling                = VK_IMAGE_TILING_OPTIMAL;
+    imageInfo.usage                 = usage;
     imageInfo.sharingMode           = VK_SHARING_MODE_EXCLUSIVE;
     imageInfo.queueFamilyIndexCount = 0;
     imageInfo.pQueueFamilyIndices   = nullptr;
@@ -1288,13 +1277,8 @@
 
     ANGLE_VK_TRY(context, mImage.init(context->getDevice(), imageInfo));
 
-    // Allocate and bind host visible and coherent Image memory.
-    // TODO(ynovikov): better approach would be to request just visible memory,
-    // and call vkInvalidateMappedMemoryRanges if the allocated memory is not coherent.
-    // This would solve potential issues of:
-    // 1) not having (enough) coherent memory and 2) coherent memory being slower
-    VkMemoryPropertyFlags memoryPropertyFlags =
-        (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
+    // Allocate and bind device-local memory.
+    VkMemoryPropertyFlags memoryPropertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
     ANGLE_TRY(initMemory(context, memoryProperties, memoryPropertyFlags));
 
     return angle::Result::Continue;
diff --git a/src/libANGLE/renderer/vulkan/vk_helpers.h b/src/libANGLE/renderer/vulkan/vk_helpers.h
index 8e87cc1..389b386 100644
--- a/src/libANGLE/renderer/vulkan/vk_helpers.h
+++ b/src/libANGLE/renderer/vulkan/vk_helpers.h
@@ -475,7 +475,8 @@
                        const Format &format,
                        GLint samples,
                        VkImageUsageFlags usage,
-                       uint32_t mipLevels);
+                       uint32_t mipLevels,
+                       uint32_t layerCount);
     angle::Result initMemory(Context *context,
                              const MemoryProperties &memoryProperties,
                              VkMemoryPropertyFlags flags);
@@ -494,11 +495,16 @@
                                 const gl::SwizzleState &swizzleMap,
                                 ImageView *imageViewOut,
                                 uint32_t levelCount);
+    // Create a 2D[Array] for staging purposes.  Used by:
+    //
+    // - TextureVk::copySubImageImplWithDraw
+    //
     angle::Result init2DStaging(Context *context,
                                 const MemoryProperties &memoryProperties,
-                                const Format &format,
                                 const gl::Extents &extent,
-                                StagingUsage usage);
+                                const Format &format,
+                                VkImageUsageFlags usage,
+                                uint32_t layerCount);
 
     void release(RendererVk *renderer);
 
diff --git a/src/libANGLE/renderer/vulkan/vk_utils.cpp b/src/libANGLE/renderer/vulkan/vk_utils.cpp
index 925e055..eb64a0b 100644
--- a/src/libANGLE/renderer/vulkan/vk_utils.cpp
+++ b/src/libANGLE/renderer/vulkan/vk_utils.cpp
@@ -138,18 +138,6 @@
     return angle::Result::Continue;
 }
 
-uint32_t GetImageLayerCount(gl::TextureType textureType)
-{
-    if (textureType == gl::TextureType::CubeMap)
-    {
-        return gl::kCubeFaceCount;
-    }
-    else
-    {
-        return 1;
-    }
-}
-
 const char *g_VkLoaderLayersPathEnv = "VK_LAYER_PATH";
 const char *g_VkICDPathEnv          = "VK_ICD_FILENAMES";
 
@@ -1484,9 +1472,13 @@
     switch (textureType)
     {
         case gl::TextureType::_2D:
-            return VK_IMAGE_TYPE_2D;
+        case gl::TextureType::_2DArray:
+        case gl::TextureType::_2DMultisample:
+        case gl::TextureType::_2DMultisampleArray:
         case gl::TextureType::CubeMap:
             return VK_IMAGE_TYPE_2D;
+        case gl::TextureType::_3D:
+            return VK_IMAGE_TYPE_3D;
         default:
             // We will need to implement all the texture types for ES3+.
             UNIMPLEMENTED();
@@ -1499,7 +1491,13 @@
     switch (textureType)
     {
         case gl::TextureType::_2D:
+        case gl::TextureType::_2DMultisample:
             return VK_IMAGE_VIEW_TYPE_2D;
+        case gl::TextureType::_2DArray:
+        case gl::TextureType::_2DMultisampleArray:
+            return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
+        case gl::TextureType::_3D:
+            return VK_IMAGE_VIEW_TYPE_3D;
         case gl::TextureType::CubeMap:
             return VK_IMAGE_VIEW_TYPE_CUBE;
         default:
diff --git a/src/libANGLE/renderer/vulkan/vk_utils.h b/src/libANGLE/renderer/vulkan/vk_utils.h
index b7d48b3..fa28994 100644
--- a/src/libANGLE/renderer/vulkan/vk_utils.h
+++ b/src/libANGLE/renderer/vulkan/vk_utils.h
@@ -77,8 +77,6 @@
                                   const char *const **enabledLayerNames,
                                   uint32_t *enabledLayerCount);
 
-uint32_t GetImageLayerCount(gl::TextureType textureType);
-
 extern const char *g_VkLoaderLayersPathEnv;
 extern const char *g_VkICDPathEnv;