Vulkan: Store reference to context command buffer.

This frees us from checking the FB every draw. Slightly reduces time
spent in all draw methods. Improvement seen on the draw call overhead
tests. Scores went from 28.17 ns/draw to 26.76 ns/draw on my machine.

In a future improvement we could make this command buffer a dirty bit.
Currently it's a bit slower to call a handler function due to the
dispatch table. Likely we could optimize this by reverting back to a
dirty bit switch and inlining the handler functions. That is left for
future work.

Vulkan is happy enough to run multiple RenderPasses and bind different
Pipelines in the same command buffer. But ANGLE defers RenderPass init
until we submit our work. Thus we can only support one RenderPass per
secondary buffer.

Test: angle_perftests DrawCall*/vulkan_null
Bug: angleproject:3014
Change-Id: I89fd0d9e0822400a5c5a16acb5a9c400a0e71ab5
Reviewed-on: https://chromium-review.googlesource.com/c/1393905
Commit-Queue: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
diff --git a/src/libANGLE/Display.h b/src/libANGLE/Display.h
index 10f9d0b..06b32ea 100644
--- a/src/libANGLE/Display.h
+++ b/src/libANGLE/Display.h
@@ -175,6 +175,9 @@
 
     const DisplayState &getState() const { return mState; }
 
+    typedef std::set<gl::Context *> ContextSet;
+    const ContextSet &getContextSet() { return mContextSet; }
+
   private:
     Display(EGLenum platform, EGLNativeDisplayType displayId, Device *eglDevice);
 
@@ -193,7 +196,6 @@
 
     ConfigSet mConfigSet;
 
-    typedef std::set<gl::Context *> ContextSet;
     ContextSet mContextSet;
 
     typedef std::set<Image *> ImageSet;
diff --git a/src/libANGLE/renderer/vulkan/CommandGraph.cpp b/src/libANGLE/renderer/vulkan/CommandGraph.cpp
index 03f4ff1..979b1e7 100644
--- a/src/libANGLE/renderer/vulkan/CommandGraph.cpp
+++ b/src/libANGLE/renderer/vulkan/CommandGraph.cpp
@@ -11,6 +11,7 @@
 
 #include <iostream>
 
+#include "libANGLE/renderer/vulkan/ContextVk.h"
 #include "libANGLE/renderer/vulkan/RenderTargetVk.h"
 #include "libANGLE/renderer/vulkan/RendererVk.h"
 #include "libANGLE/renderer/vulkan/vk_format_utils.h"
@@ -138,7 +139,7 @@
     return mCurrentWritingNode->getRenderPassRenderArea();
 }
 
-angle::Result RecordableGraphResource::beginRenderPass(Context *context,
+angle::Result RecordableGraphResource::beginRenderPass(ContextVk *contextVk,
                                                        const Framebuffer &framebuffer,
                                                        const gl::Rectangle &renderArea,
                                                        const RenderPassDesc &renderPassDesc,
@@ -148,14 +149,16 @@
     // If a barrier has been inserted in the meantime, stop the command buffer.
     if (!hasChildlessWritingNode())
     {
-        startNewCommands(context->getRenderer());
+        startNewCommands(contextVk->getRenderer());
     }
 
     // Hard-code RenderPass to clear the first render target to the current clear value.
     // TODO(jmadill): Proper clear value implementation. http://anglebug.com/2361
     mCurrentWritingNode->storeRenderPassInfo(framebuffer, renderArea, renderPassDesc, clearValues);
 
-    return mCurrentWritingNode->beginInsideRenderPassRecording(context, commandBufferOut);
+    mCurrentWritingNode->setCommandBufferOwner(contextVk);
+
+    return mCurrentWritingNode->beginInsideRenderPassRecording(contextVk, commandBufferOut);
 }
 
 void RecordableGraphResource::addWriteDependency(RecordableGraphResource *writingResource)
@@ -263,7 +266,8 @@
       mHasChildren(false),
       mVisitedState(VisitedState::Unvisited),
       mGlobalMemoryBarrierSrcAccess(0),
-      mGlobalMemoryBarrierDstAccess(0)
+      mGlobalMemoryBarrierDstAccess(0),
+      mCommandBufferOwner(nullptr)
 {}
 
 CommandGraphNode::~CommandGraphNode()
diff --git a/src/libANGLE/renderer/vulkan/CommandGraph.h b/src/libANGLE/renderer/vulkan/CommandGraph.h
index 40d0a7e..840f6bd 100644
--- a/src/libANGLE/renderer/vulkan/CommandGraph.h
+++ b/src/libANGLE/renderer/vulkan/CommandGraph.h
@@ -42,6 +42,21 @@
     WriteTimestamp,
 };
 
+// Receives notifications when a command buffer is no longer able to record. Can be used with
+// inheritance. Faster than using an interface class since it has inlined methods. Could be used
+// with composition by adding a getCommandBuffer method.
+class CommandBufferOwner
+{
+  public:
+    CommandBufferOwner() = default;
+    virtual ~CommandBufferOwner() {}
+
+    ANGLE_INLINE void onCommandBufferFinished() { mCommandBuffer = nullptr; }
+
+  protected:
+    vk::CommandBuffer *mCommandBuffer = nullptr;
+};
+
 // Only used internally in the command graph. Kept in the header for better inlining performance.
 class CommandGraphNode final : angle::NonCopyable
 {
@@ -119,8 +134,22 @@
         mGlobalMemoryBarrierDstAccess |= dstAccess;
     }
 
+    // This can only be set for RenderPass nodes. Each RenderPass node can have at most one owner.
+    void setCommandBufferOwner(CommandBufferOwner *owner)
+    {
+        ASSERT(mCommandBufferOwner == nullptr);
+        mCommandBufferOwner = owner;
+    }
+
   private:
-    void setHasChildren() { mHasChildren = true; }
+    ANGLE_INLINE void setHasChildren()
+    {
+        mHasChildren = true;
+        if (mCommandBufferOwner)
+        {
+            mCommandBufferOwner->onCommandBufferFinished();
+        }
+    }
 
     // Used for testing only.
     bool isChildOf(CommandGraphNode *parent);
@@ -158,6 +187,9 @@
     // For global memory barriers.
     VkFlags mGlobalMemoryBarrierSrcAccess;
     VkFlags mGlobalMemoryBarrierDstAccess;
+
+    // Command buffer notifications.
+    CommandBufferOwner *mCommandBufferOwner;
 };
 
 // This is a helper class for back-end objects used in Vk command buffers. It records a serial
@@ -227,7 +259,7 @@
 
     // Begins a command buffer on the current graph node for in-RenderPass rendering.
     // Called from FramebufferVk::startNewRenderPass and UtilsVk functions.
-    angle::Result beginRenderPass(Context *context,
+    angle::Result beginRenderPass(ContextVk *contextVk,
                                   const Framebuffer &framebuffer,
                                   const gl::Rectangle &renderArea,
                                   const RenderPassDesc &renderPassDesc,
diff --git a/src/libANGLE/renderer/vulkan/ContextVk.cpp b/src/libANGLE/renderer/vulkan/ContextVk.cpp
index 3ffc47d..9af3932 100644
--- a/src/libANGLE/renderer/vulkan/ContextVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ContextVk.cpp
@@ -212,13 +212,7 @@
         mGraphicsPipelineDesc->updateTopology(&mGraphicsPipelineTransition, mCurrentDrawMode);
     }
 
-    if (!mDrawFramebuffer->appendToStartedRenderPass(mRenderer->getCurrentQueueSerial(),
-                                                     commandBufferOut))
-    {
-        ANGLE_TRY(mDrawFramebuffer->startNewRenderPass(this, commandBufferOut));
-        mDirtyBits |= mNewCommandBufferDirtyBits;
-    }
-
+    // Must be called before the command buffer is started. Can call finish.
     if (context->getStateCache().hasAnyActiveClientAttrib())
     {
         ANGLE_TRY(mVertexArray->updateClientAttribs(context, firstVertex, vertexOrIndexCount,
@@ -226,6 +220,27 @@
         mDirtyBits.set(DIRTY_BIT_VERTEX_BUFFERS);
     }
 
+    // This could be improved using a dirty bit. But currently it's slower to use a handler
+    // function than an inlined if. We should probably replace the dirty bit dispatch table
+    // with a switch with inlined handler functions.
+    // TODO(jmadill): Use dirty bit. http://anglebug.com/3014
+    if (!mCommandBuffer)
+    {
+        if (!mDrawFramebuffer->appendToStartedRenderPass(mRenderer->getCurrentQueueSerial(),
+                                                         &mCommandBuffer))
+        {
+            ANGLE_TRY(mDrawFramebuffer->startNewRenderPass(this, &mCommandBuffer));
+            mDirtyBits |= mNewCommandBufferDirtyBits;
+        }
+    }
+
+    // We keep a local copy of the command buffer. It's possible that some state changes could
+    // trigger a command buffer invalidation. The local copy ensures we retain the reference.
+    // Command buffers are pool allocated and only deleted after submit. Thus we know the
+    // command buffer will still be valid for the duration of this API call.
+    *commandBufferOut = mCommandBuffer;
+    ASSERT(*commandBufferOut);
+
     if (mProgram->dirtyUniforms())
     {
         ANGLE_TRY(mProgram->updateUniforms(this));
@@ -791,6 +806,13 @@
                 break;
             case gl::State::DIRTY_BIT_DRAW_FRAMEBUFFER_BINDING:
             {
+                // FramebufferVk::syncState signals that we should start a new command buffer. But
+                // changing the binding can skip FramebufferVk::syncState if the Framebuffer has no
+                // dirty bits. Thus we need to explicitly clear the current command buffer to
+                // ensure we start a new one. Note that we need a new command buffer because a
+                // command graph node can only support one RenderPass configuration at a time.
+                onCommandBufferFinished();
+
                 mDrawFramebuffer = vk::GetImpl(glState.getDrawFramebuffer());
                 updateFlipViewportDrawFramebuffer(glState);
                 updateViewport(mDrawFramebuffer, glState.getViewport(), glState.getNearPlane(),
diff --git a/src/libANGLE/renderer/vulkan/ContextVk.h b/src/libANGLE/renderer/vulkan/ContextVk.h
index 450beda..b4eaa30 100644
--- a/src/libANGLE/renderer/vulkan/ContextVk.h
+++ b/src/libANGLE/renderer/vulkan/ContextVk.h
@@ -26,7 +26,7 @@
 {
 class RendererVk;
 
-class ContextVk : public ContextImpl, public vk::Context
+class ContextVk : public ContextImpl, public vk::Context, public vk::CommandBufferOwner
 {
   public:
     ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk *renderer);
diff --git a/src/libANGLE/renderer/vulkan/RendererVk.cpp b/src/libANGLE/renderer/vulkan/RendererVk.cpp
index 43689c0..ede6f91 100644
--- a/src/libANGLE/renderer/vulkan/RendererVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp
@@ -17,10 +17,12 @@
 #include "common/debug.h"
 #include "common/platform.h"
 #include "common/system_utils.h"
+#include "libANGLE/Context.h"
 #include "libANGLE/Display.h"
 #include "libANGLE/renderer/driver_utils.h"
 #include "libANGLE/renderer/vulkan/CommandGraph.h"
 #include "libANGLE/renderer/vulkan/CompilerVk.h"
+#include "libANGLE/renderer/vulkan/ContextVk.h"
 #include "libANGLE/renderer/vulkan/DisplayVk.h"
 #include "libANGLE/renderer/vulkan/FramebufferVk.h"
 #include "libANGLE/renderer/vulkan/GlslangWrapper.h"
@@ -570,8 +572,7 @@
     mDeviceLost = true;
 
     mCommandGraph.clear();
-    mLastSubmittedQueueSerial = mCurrentQueueSerial;
-    mCurrentQueueSerial       = mQueueSerialFactory.generate();
+    nextSerial();
     freeAllInFlightResources();
 
     mDisplay->notifyDeviceLost();
@@ -1312,10 +1313,7 @@
     // InterleavedAttributeDataBenchmark perf test for example issues a large number of flushes.
     ASSERT(mInFlightCommands.size() <= kInFlightCommandsLimit);
 
-    // Increment the queue serial. If this fails, we should restart ANGLE.
-    // TODO(jmadill): Overflow check.
-    mLastSubmittedQueueSerial = mCurrentQueueSerial;
-    mCurrentQueueSerial       = mQueueSerialFactory.generate();
+    nextSerial();
 
     ANGLE_TRY(checkCompletedCommands(context));
 
@@ -1338,6 +1336,25 @@
     return angle::Result::Continue;
 }
 
+void RendererVk::nextSerial()
+{
+    // Increment the queue serial. If this fails, we should restart ANGLE.
+    mLastSubmittedQueueSerial = mCurrentQueueSerial;
+    mCurrentQueueSerial       = mQueueSerialFactory.generate();
+
+    // Notify the Contexts that they should be starting new command buffers.
+    // We use one command pool per serial/submit associated with this VkQueue. We can also
+    // have multiple Contexts sharing one VkQueue. In ContextVk::setupDraw we don't explicitly
+    // check for a new serial when starting a new command buffer. We just check that the current
+    // recording command buffer is valid. Thus we need to explicitly notify every other Context
+    // using this VkQueue that they their current command buffer is no longer valid.
+    for (gl::Context *context : mDisplay->getContextSet())
+    {
+        ContextVk *contextVk = vk::GetImpl(context);
+        contextVk->onCommandBufferFinished();
+    }
+}
+
 bool RendererVk::isSerialInUse(Serial serial) const
 {
     return serial > mLastCompletedQueueSerial;
diff --git a/src/libANGLE/renderer/vulkan/RendererVk.h b/src/libANGLE/renderer/vulkan/RendererVk.h
index d93b28c..8bd247e 100644
--- a/src/libANGLE/renderer/vulkan/RendererVk.h
+++ b/src/libANGLE/renderer/vulkan/RendererVk.h
@@ -230,6 +230,8 @@
     template <VkFormatFeatureFlags VkFormatProperties::*features>
     bool hasFormatFeatureBits(VkFormat format, const VkFormatFeatureFlags featureBits);
 
+    void nextSerial();
+
     egl::Display *mDisplay;
 
     mutable bool mCapsInitialized;
diff --git a/src/libANGLE/renderer/vulkan/UtilsVk.cpp b/src/libANGLE/renderer/vulkan/UtilsVk.cpp
index 875d624..0d403a5 100644
--- a/src/libANGLE/renderer/vulkan/UtilsVk.cpp
+++ b/src/libANGLE/renderer/vulkan/UtilsVk.cpp
@@ -571,17 +571,17 @@
     return angle::Result::Continue;
 }
 
-angle::Result UtilsVk::startRenderPass(vk::Context *context,
+angle::Result UtilsVk::startRenderPass(ContextVk *contextVk,
                                        vk::ImageHelper *image,
                                        const vk::ImageView *imageView,
                                        const vk::RenderPassDesc &renderPassDesc,
                                        const gl::Rectangle &renderArea,
                                        vk::CommandBuffer **commandBufferOut)
 {
-    RendererVk *renderer = context->getRenderer();
+    RendererVk *renderer = contextVk->getRenderer();
 
     vk::RenderPass *renderPass = nullptr;
-    ANGLE_TRY(renderer->getCompatibleRenderPass(context, renderPassDesc, &renderPass));
+    ANGLE_TRY(renderer->getCompatibleRenderPass(contextVk, renderPassDesc, &renderPass));
 
     VkFramebufferCreateInfo framebufferInfo = {};
 
@@ -595,14 +595,14 @@
     framebufferInfo.layers          = 1;
 
     vk::Framebuffer framebuffer;
-    ANGLE_VK_TRY(context, framebuffer.init(context->getDevice(), framebufferInfo));
+    ANGLE_VK_TRY(contextVk, framebuffer.init(contextVk->getDevice(), framebufferInfo));
 
     // TODO(jmadill): Proper clear value implementation. http://anglebug.com/2361
     std::vector<VkClearValue> clearValues = {{}};
     ASSERT(clearValues.size() == 1);
 
-    ANGLE_TRY(image->beginRenderPass(context, framebuffer, renderArea, renderPassDesc, clearValues,
-                                     commandBufferOut));
+    ANGLE_TRY(image->beginRenderPass(contextVk, framebuffer, renderArea, renderPassDesc,
+                                     clearValues, commandBufferOut));
 
     renderer->releaseObject(renderer->getCurrentQueueSerial(), &framebuffer);
 
@@ -658,16 +658,16 @@
     return angle::Result::Continue;
 }
 
-angle::Result UtilsVk::copyImage(vk::Context *context,
+angle::Result UtilsVk::copyImage(ContextVk *contextVk,
                                  vk::ImageHelper *dest,
                                  const vk::ImageView *destView,
                                  vk::ImageHelper *src,
                                  const vk::ImageView *srcView,
                                  const CopyImageParameters &params)
 {
-    RendererVk *renderer = context->getRenderer();
+    RendererVk *renderer = contextVk->getRenderer();
 
-    ANGLE_TRY(ensureImageCopyResourcesInitialized(context));
+    ANGLE_TRY(ensureImageCopyResourcesInitialized(contextVk));
 
     const vk::Format &srcFormat  = src->getFormat();
     const vk::Format &destFormat = dest->getFormat();
@@ -707,9 +707,9 @@
     VkDescriptorSet descriptorSet;
     vk::SharedDescriptorPoolBinding descriptorPoolBinding;
     ANGLE_TRY(mDescriptorPools[Function::ImageCopy].allocateSets(
-        context, mDescriptorSetLayouts[Function::ImageCopy][kSetIndex].get().ptr(), 1,
+        contextVk, mDescriptorSetLayouts[Function::ImageCopy][kSetIndex].get().ptr(), 1,
         &descriptorPoolBinding, &descriptorSet));
-    descriptorPoolBinding.get().updateSerial(context->getRenderer()->getCurrentQueueSerial());
+    descriptorPoolBinding.get().updateSerial(contextVk->getRenderer()->getCurrentQueueSerial());
 
     vk::RenderPassDesc renderPassDesc;
     renderPassDesc.setSamples(dest->getSamples());
@@ -736,8 +736,7 @@
     if (src->getCurrentLayout() != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)
     {
         vk::CommandBuffer *srcLayoutChange;
-        ANGLE_TRY(src->recordCommands(context, &srcLayoutChange));
-
+        ANGLE_TRY(src->recordCommands(contextVk, &srcLayoutChange));
         src->changeLayoutWithStages(VK_IMAGE_ASPECT_COLOR_BIT,
                                     VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
                                     VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
@@ -746,7 +745,7 @@
 
     // Change destination layout outside render pass as well
     vk::CommandBuffer *destLayoutChange;
-    ANGLE_TRY(dest->recordCommands(context, &destLayoutChange));
+    ANGLE_TRY(dest->recordCommands(contextVk, &destLayoutChange));
 
     dest->changeLayoutWithStages(VK_IMAGE_ASPECT_COLOR_BIT,
                                  VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
@@ -754,7 +753,8 @@
                                  VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, destLayoutChange);
 
     vk::CommandBuffer *commandBuffer;
-    ANGLE_TRY(startRenderPass(context, dest, destView, renderPassDesc, renderArea, &commandBuffer));
+    ANGLE_TRY(
+        startRenderPass(contextVk, dest, destView, renderPassDesc, renderArea, &commandBuffer));
 
     // Source's layout change should happen before rendering
     src->addReadDependency(dest);
@@ -771,15 +771,15 @@
     writeInfo.descriptorType       = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
     writeInfo.pImageInfo           = &imageInfo;
 
-    vkUpdateDescriptorSets(context->getDevice(), 1, &writeInfo, 0, nullptr);
+    vkUpdateDescriptorSets(contextVk->getDevice(), 1, &writeInfo, 0, nullptr);
 
     vk::ShaderLibrary &shaderLibrary                    = renderer->getShaderLibrary();
     vk::RefCounted<vk::ShaderAndSerial> *vertexShader   = nullptr;
     vk::RefCounted<vk::ShaderAndSerial> *fragmentShader = nullptr;
-    ANGLE_TRY(shaderLibrary.getFullScreenQuad_vert(context, 0, &vertexShader));
-    ANGLE_TRY(shaderLibrary.getImageCopy_frag(context, flags, &fragmentShader));
+    ANGLE_TRY(shaderLibrary.getFullScreenQuad_vert(contextVk, 0, &vertexShader));
+    ANGLE_TRY(shaderLibrary.getImageCopy_frag(contextVk, flags, &fragmentShader));
 
-    ANGLE_TRY(setupProgram(context, Function::ImageCopy, fragmentShader, vertexShader,
+    ANGLE_TRY(setupProgram(contextVk, Function::ImageCopy, fragmentShader, vertexShader,
                            &mImageCopyPrograms[flags], &pipelineDesc, descriptorSet, &shaderParams,
                            sizeof(shaderParams), commandBuffer));
 
diff --git a/src/libANGLE/renderer/vulkan/UtilsVk.h b/src/libANGLE/renderer/vulkan/UtilsVk.h
index db6ab74..574b468 100644
--- a/src/libANGLE/renderer/vulkan/UtilsVk.h
+++ b/src/libANGLE/renderer/vulkan/UtilsVk.h
@@ -101,7 +101,7 @@
                              FramebufferVk *framebuffer,
                              const ClearImageParameters &params);
 
-    angle::Result copyImage(vk::Context *context,
+    angle::Result copyImage(ContextVk *contextVk,
                             vk::ImageHelper *dest,
                             const vk::ImageView *destView,
                             vk::ImageHelper *src,
@@ -213,7 +213,7 @@
     angle::Result ensureImageClearResourcesInitialized(vk::Context *context);
     angle::Result ensureImageCopyResourcesInitialized(vk::Context *context);
 
-    angle::Result startRenderPass(vk::Context *context,
+    angle::Result startRenderPass(ContextVk *contextVk,
                                   vk::ImageHelper *image,
                                   const vk::ImageView *imageView,
                                   const vk::RenderPassDesc &renderPassDesc,