Vulkan: revamp present semaphore management
See doc/PresentSemaphores.md for details.
Bug: angleproject:3450
Bug: angleproject:3670
Change-Id: I52d5bd13a4af25f224d386c9584525c182af6f17
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1776880
Reviewed-by: Tim Van Patten <timvp@google.com>
Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
diff --git a/src/libANGLE/renderer/vulkan/RendererVk.cpp b/src/libANGLE/renderer/vulkan/RendererVk.cpp
index c6b5b4c..e789575 100644
--- a/src/libANGLE/renderer/vulkan/RendererVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp
@@ -1569,7 +1569,7 @@
}
else
{
- mFenceRecycler.fetch(mDevice, &fence);
+ mFenceRecycler.fetch(&fence);
ANGLE_VK_TRY(context, fence.reset(mDevice));
}
sharedFenceOut->assign(mDevice, std::move(fence));
diff --git a/src/libANGLE/renderer/vulkan/SurfaceVk.cpp b/src/libANGLE/renderer/vulkan/SurfaceVk.cpp
index 1d8a4d6..c380f19 100644
--- a/src/libANGLE/renderer/vulkan/SurfaceVk.cpp
+++ b/src/libANGLE/renderer/vulkan/SurfaceVk.cpp
@@ -321,40 +321,77 @@
return &mColorAttachment.image;
}
-WindowSurfaceVk::SwapchainImage::SwapchainImage() = default;
-WindowSurfaceVk::SwapchainImage::~SwapchainImage() = default;
-
-WindowSurfaceVk::SwapchainImage::SwapchainImage(SwapchainImage &&other)
- : image(std::move(other.image)),
- imageView(std::move(other.imageView)),
- framebuffer(std::move(other.framebuffer))
-{}
-
-WindowSurfaceVk::SwapHistory::SwapHistory() = default;
-
-WindowSurfaceVk::SwapHistory::~SwapHistory() = default;
-
-void WindowSurfaceVk::SwapHistory::destroy(RendererVk *renderer)
+namespace impl
{
- if (swapchain != VK_NULL_HANDLE)
+SwapchainCleanupData::SwapchainCleanupData() = default;
+SwapchainCleanupData::~SwapchainCleanupData()
+{
+ ASSERT(swapchain == VK_NULL_HANDLE);
+ ASSERT(semaphores.empty());
+}
+
+SwapchainCleanupData::SwapchainCleanupData(SwapchainCleanupData &&other)
+ : swapchain(other.swapchain), semaphores(std::move(other.semaphores))
+{
+ other.swapchain = VK_NULL_HANDLE;
+}
+
+void SwapchainCleanupData::destroy(VkDevice device, vk::Recycler<vk::Semaphore> *semaphoreRecycler)
+{
+ if (swapchain)
{
- vkDestroySwapchainKHR(renderer->getDevice(), swapchain, nullptr);
+ vkDestroySwapchainKHR(device, swapchain, nullptr);
swapchain = VK_NULL_HANDLE;
}
- renderer->resetSharedFence(&sharedFence);
- presentImageSemaphore.destroy(renderer->getDevice());
+ for (vk::Semaphore &semaphore : semaphores)
+ {
+ semaphoreRecycler->recycle(std::move(semaphore));
+ }
+ semaphores.clear();
}
-angle::Result WindowSurfaceVk::SwapHistory::waitFence(ContextVk *contextVk)
+ImagePresentHistory::ImagePresentHistory() = default;
+ImagePresentHistory::~ImagePresentHistory()
{
- if (sharedFence.isReferenced())
- {
- ANGLE_VK_TRY(contextVk, sharedFence.get().wait(contextVk->getDevice(),
- std::numeric_limits<uint64_t>::max()));
- }
+ ASSERT(!semaphore.valid());
+ ASSERT(oldSwapchains.empty());
+}
+
+ImagePresentHistory::ImagePresentHistory(ImagePresentHistory &&other)
+ : semaphore(std::move(other.semaphore)), oldSwapchains(std::move(other.oldSwapchains))
+{}
+
+SwapchainImage::SwapchainImage() = default;
+SwapchainImage::~SwapchainImage() = default;
+
+SwapchainImage::SwapchainImage(SwapchainImage &&other)
+ : image(std::move(other.image)),
+ imageView(std::move(other.imageView)),
+ framebuffer(std::move(other.framebuffer)),
+ presentHistory(std::move(other.presentHistory)),
+ currentPresentHistoryIndex(other.currentPresentHistoryIndex)
+{}
+
+SwapHistory::SwapHistory() = default;
+
+SwapHistory::~SwapHistory() = default;
+
+void SwapHistory::destroy(RendererVk *renderer)
+{
+ renderer->resetSharedFence(&sharedFence);
+}
+
+angle::Result SwapHistory::waitFence(ContextVk *contextVk)
+{
+ ASSERT(sharedFence.isReferenced());
+ ANGLE_VK_TRY(contextVk, sharedFence.get().wait(contextVk->getDevice(),
+ std::numeric_limits<uint64_t>::max()));
return angle::Result::Continue;
}
+} // namespace impl
+
+using namespace impl;
WindowSurfaceVk::WindowSurfaceVk(const egl::SurfaceState &surfaceState,
EGLNativeWindowType window,
@@ -370,8 +407,8 @@
mMinImageCount(0),
mPreTransform(VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR),
mCompositeAlpha(VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR),
- mCurrentSwapchainImageIndex(0),
- mCurrentSwapHistoryIndex(0)
+ mCurrentSwapHistoryIndex(0),
+ mCurrentSwapchainImageIndex(0)
{
// Initialize the color render target with the multisampled targets. If not multisampled, the
// render target will be updated to refer to a swapchain image on every acquire.
@@ -408,6 +445,12 @@
mSwapchain = VK_NULL_HANDLE;
}
+ for (SwapchainCleanupData &oldSwapchain : mOldSwapchains)
+ {
+ oldSwapchain.destroy(device, &mPresentSemaphoreRecycler);
+ }
+ mOldSwapchains.clear();
+
if (mSurface)
{
vkDestroySurfaceKHR(instance, mSurface, nullptr);
@@ -415,6 +458,7 @@
}
mAcquireImageSemaphore.destroy(device);
+ mPresentSemaphoreRecycler.destroy(device);
}
egl::Error WindowSurfaceVk::initialize(const egl::Display *display)
@@ -538,27 +582,131 @@
const gl::Extents &extents,
uint32_t swapHistoryIndex)
{
- VkSwapchainKHR oldSwapchain = mSwapchain;
- mSwapchain = VK_NULL_HANDLE;
+ // If mOldSwapchains is not empty, it means that a new swapchain was created, but before
+ // any of its images were presented, it's asked to be recreated. In this case, we can destroy
+ // the current swapchain immediately (although the old swapchains still need to be kept to be
+ // scheduled for destruction). This can happen for example if vkQueuePresentKHR returns
+ // OUT_OF_DATE, the swapchain is recreated and the following vkAcquireNextImageKHR again
+ // returns OUT_OF_DATE.
+ //
+ // Otherwise, keep the current swapchain as the old swapchain to be scheduled for destruction
+ // and create a new one.
- if (oldSwapchain)
+ VkSwapchainKHR swapchainToDestroy = VK_NULL_HANDLE;
+
+ if (!mOldSwapchains.empty())
{
- // Note: the old swapchain must be destroyed regardless of whether creating the new
- // swapchain succeeds. We can only destroy the swapchain once rendering to all its images
- // have finished. We therefore store the handle to the swapchain being destroyed in the
- // swap history (alongside the serial of the last submission) so it can be destroyed once we
- // wait on that serial as part of the CPU throttling.
- mSwapHistory[swapHistoryIndex].swapchain = oldSwapchain;
+ // Keep the old swapchain, destroy the current (never-used) swapchain.
+ swapchainToDestroy = mSwapchain;
+
+ // Recycle present semaphores.
+ for (SwapchainImage &swapchainImage : mSwapchainImages)
+ {
+ for (ImagePresentHistory &presentHistory : swapchainImage.presentHistory)
+ {
+ ASSERT(presentHistory.semaphore.valid());
+ ASSERT(presentHistory.oldSwapchains.empty());
+
+ mPresentSemaphoreRecycler.recycle(std::move(presentHistory.semaphore));
+ }
+ }
}
+ else
+ {
+ SwapchainCleanupData cleanupData;
+
+ // Remember the current swapchain to be scheduled for destruction later.
+ cleanupData.swapchain = mSwapchain;
+
+ // Accumulate the semaphores to be destroyed at the same time as the swapchain.
+ for (SwapchainImage &swapchainImage : mSwapchainImages)
+ {
+ for (ImagePresentHistory &presentHistory : swapchainImage.presentHistory)
+ {
+ ASSERT(presentHistory.semaphore.valid());
+ cleanupData.semaphores.emplace_back(std::move(presentHistory.semaphore));
+
+ // Accumulate any previous swapchains that are pending destruction too.
+ for (SwapchainCleanupData &oldSwapchain : presentHistory.oldSwapchains)
+ {
+ mOldSwapchains.emplace_back(std::move(oldSwapchain));
+ }
+ presentHistory.oldSwapchains.clear();
+ }
+ }
+
+ // If too many old swapchains have accumulated, wait idle and destroy them. This is to
+ // prevent failures due to too many swapchains allocated.
+ //
+ // Note: Nvidia has been observed to fail creation of swapchains after 20 are allocated on
+ // desktop, or less than 10 on Quadro P400.
+ static constexpr size_t kMaxOldSwapchains = 5;
+ if (mOldSwapchains.size() > kMaxOldSwapchains)
+ {
+ ANGLE_TRY(contextVk->getRenderer()->queueWaitIdle(contextVk));
+ for (SwapchainCleanupData &oldSwapchain : mOldSwapchains)
+ {
+ oldSwapchain.destroy(contextVk->getDevice(), &mPresentSemaphoreRecycler);
+ }
+ mOldSwapchains.clear();
+ }
+
+ mOldSwapchains.emplace_back(std::move(cleanupData));
+ }
+
+ // Recreate the swapchain based on the most recent one.
+ VkSwapchainKHR lastSwapchain = mSwapchain;
+ mSwapchain = VK_NULL_HANDLE;
releaseSwapchainImages(contextVk);
- return createSwapChain(contextVk, extents, oldSwapchain);
+ angle::Result result = createSwapChain(contextVk, extents, lastSwapchain);
+
+ // If the most recent swapchain was never used, destroy it right now.
+ if (swapchainToDestroy)
+ {
+ vkDestroySwapchainKHR(contextVk->getDevice(), swapchainToDestroy, nullptr);
+ }
+
+ return result;
+}
+
+angle::Result WindowSurfaceVk::newPresentSemaphore(vk::Context *context,
+ vk::Semaphore *semaphoreOut)
+{
+ if (mPresentSemaphoreRecycler.empty())
+ {
+ ANGLE_VK_TRY(context, semaphoreOut->init(context->getDevice()));
+ }
+ else
+ {
+ mPresentSemaphoreRecycler.fetch(semaphoreOut);
+ }
+ return angle::Result::Continue;
+}
+
+angle::Result WindowSurfaceVk::resizeSwapchainImages(vk::Context *context, uint32_t imageCount)
+{
+ mSwapchainImages.resize(imageCount);
+
+ // At this point, if there was a previous swapchain, the previous present semaphores have all
+ // been moved to mOldSwapchains to be scheduled for destruction, so all semaphore handles in
+ // mSwapchainImages should be invalid.
+ for (SwapchainImage &swapchainImage : mSwapchainImages)
+ {
+ for (ImagePresentHistory &presentHistory : swapchainImage.presentHistory)
+ {
+ ASSERT(!presentHistory.semaphore.valid());
+ ANGLE_TRY(newPresentSemaphore(context, &presentHistory.semaphore));
+ }
+ }
+
+ return angle::Result::Continue;
}
angle::Result WindowSurfaceVk::createSwapChain(vk::Context *context,
const gl::Extents &extents,
- VkSwapchainKHR oldSwapchain)
+ VkSwapchainKHR lastSwapchain)
{
ANGLE_TRACE_EVENT0("gpu.angle", "WindowSurfaceVk::createSwapchain");
@@ -592,7 +740,7 @@
swapchainInfo.compositeAlpha = mCompositeAlpha;
swapchainInfo.presentMode = mDesiredSwapchainPresentMode;
swapchainInfo.clipped = VK_TRUE;
- swapchainInfo.oldSwapchain = oldSwapchain;
+ swapchainInfo.oldSwapchain = lastSwapchain;
// TODO(syoussefi): Once EGL_SWAP_BEHAVIOR_PRESERVED_BIT is supported, the contents of the old
// swapchain need to carry over to the new one. http://anglebug.com/2942
@@ -632,7 +780,7 @@
mColorImageMS.stageClearIfEmulatedFormat(gl::ImageIndex::Make2D(0), format);
}
- mSwapchainImages.resize(imageCount);
+ ANGLE_TRY(resizeSwapchainImages(context, imageCount));
for (uint32_t imageIndex = 0; imageIndex < imageCount; ++imageIndex)
{
@@ -765,14 +913,14 @@
swapchainImage.image.resetImageWeakReference();
swapchainImage.image.destroy(contextVk->getDevice());
- if (swapchainImage.imageView.valid())
- {
- contextVk->releaseObject(imageSerial, &swapchainImage.imageView);
- }
+ contextVk->releaseObject(imageSerial, &swapchainImage.imageView);
+ contextVk->releaseObject(imageSerial, &swapchainImage.framebuffer);
- if (swapchainImage.framebuffer.valid())
+ // present history must have already been taken care of.
+ for (ImagePresentHistory &presentHistory : swapchainImage.presentHistory)
{
- contextVk->releaseObject(imageSerial, &swapchainImage.framebuffer);
+ ASSERT(!presentHistory.semaphore.valid());
+ ASSERT(presentHistory.oldSwapchains.empty());
}
}
@@ -802,25 +950,39 @@
mFramebufferMS.dumpResources(&garbageObjects);
}
+ VkDevice device = displayVk->getDevice();
+
for (vk::GarbageObjectBase &garbage : garbageObjects)
{
- garbage.destroy(displayVk->getDevice());
+ garbage.destroy(device);
}
for (SwapchainImage &swapchainImage : mSwapchainImages)
{
// We don't own the swapchain image handles, so we just remove our reference to it.
swapchainImage.image.resetImageWeakReference();
- swapchainImage.image.destroy(displayVk->getDevice());
+ swapchainImage.image.destroy(device);
if (swapchainImage.imageView.valid())
{
- swapchainImage.imageView.destroy(displayVk->getDevice());
+ swapchainImage.imageView.destroy(device);
}
if (swapchainImage.framebuffer.valid())
{
- swapchainImage.framebuffer.destroy(displayVk->getDevice());
+ swapchainImage.framebuffer.destroy(device);
+ }
+
+ for (ImagePresentHistory &presentHistory : swapchainImage.presentHistory)
+ {
+ ASSERT(presentHistory.semaphore.valid());
+
+ mPresentSemaphoreRecycler.recycle(std::move(presentHistory.semaphore));
+ for (SwapchainCleanupData &oldSwapchain : presentHistory.oldSwapchains)
+ {
+ oldSwapchain.destroy(device, &mPresentSemaphoreRecycler);
+ }
+ presentHistory.oldSwapchains.clear();
}
}
@@ -861,8 +1023,11 @@
SwapHistory &swap = mSwapHistory[mCurrentSwapHistoryIndex];
{
ANGLE_TRACE_EVENT0("gpu.angle", "WindowSurfaceVk::present: Throttle CPU");
- ANGLE_TRY(swap.waitFence(contextVk));
- swap.destroy(contextVk->getRenderer());
+ if (swap.sharedFence.isReferenced())
+ {
+ ANGLE_TRY(swap.waitFence(contextVk));
+ swap.destroy(contextVk->getRenderer());
+ }
}
SwapchainImage &image = mSwapchainImages[mCurrentSwapchainImageIndex];
@@ -899,14 +1064,34 @@
}
image.image.changeLayout(VK_IMAGE_ASPECT_COLOR_BIT, vk::ImageLayout::Present, swapCommands);
- ANGLE_VK_TRY(contextVk, swap.presentImageSemaphore.init(contextVk->getDevice()));
+ // Knowing that the kSwapHistorySize'th submission ago has finished, we can know that the
+ // (kSwapHistorySize+1)'th present ago of this image is definitely finished and so its wait
+ // semaphore can be reused. See doc/PresentSemaphores.md for details.
+ //
+ // This also means the swapchain(s) scheduled to be deleted at the same time can be deleted.
+ ImagePresentHistory &presentHistory = image.presentHistory[image.currentPresentHistoryIndex];
+ vk::Semaphore *presentSemaphore = &presentHistory.semaphore;
+ ASSERT(presentSemaphore->valid());
- ANGLE_TRY(contextVk->flushImpl(&swap.presentImageSemaphore));
+ for (SwapchainCleanupData &oldSwapchain : presentHistory.oldSwapchains)
+ {
+ oldSwapchain.destroy(contextVk->getDevice(), &mPresentSemaphoreRecycler);
+ }
+ presentHistory.oldSwapchains.clear();
+
+ // Schedule pending old swapchains to be destroyed at the same time the semaphore for this
+ // present can be destroyed.
+ presentHistory.oldSwapchains = std::move(mOldSwapchains);
+
+ image.currentPresentHistoryIndex =
+ (image.currentPresentHistoryIndex + 1) % image.presentHistory.size();
+
+ ANGLE_TRY(contextVk->flushImpl(presentSemaphore));
VkPresentInfoKHR presentInfo = {};
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
presentInfo.waitSemaphoreCount = 1;
- presentInfo.pWaitSemaphores = swap.presentImageSemaphore.ptr();
+ presentInfo.pWaitSemaphores = presentSemaphore->ptr();
presentInfo.swapchainCount = 1;
presentInfo.pSwapchains = &mSwapchain;
presentInfo.pImageIndices = &mCurrentSwapchainImageIndex;
@@ -956,7 +1141,7 @@
// If OUT_OF_DATE is returned, it's ok, we just need to recreate the swapchain before
// continuing.
- // If VK_SUBOPTIMAL_KHR is returned we it's because the device orientation changed and we should
+ // If VK_SUBOPTIMAL_KHR is returned it's because the device orientation changed and we should
// recreate the swapchain with a new window orientation. We aren't quite ready for that so just
// ignore for now.
// TODO: Check for preRotation: http://anglebug.com/3502
diff --git a/src/libANGLE/renderer/vulkan/SurfaceVk.h b/src/libANGLE/renderer/vulkan/SurfaceVk.h
index 1198f5c..0949e3b 100644
--- a/src/libANGLE/renderer/vulkan/SurfaceVk.h
+++ b/src/libANGLE/renderer/vulkan/SurfaceVk.h
@@ -99,6 +99,78 @@
AttachmentImage mDepthStencilAttachment;
};
+// Data structures used in WindowSurfaceVk
+namespace impl
+{
+// The submission fence of the context used to throttle the CPU.
+struct SwapHistory : angle::NonCopyable
+{
+ SwapHistory();
+ SwapHistory(SwapHistory &&other) = delete;
+ SwapHistory &operator=(SwapHistory &&other) = delete;
+ ~SwapHistory();
+
+ void destroy(RendererVk *renderer);
+
+ angle::Result waitFence(ContextVk *contextVk);
+
+ // Fence associated with the last submitted work to render to this swapchain image.
+ vk::Shared<vk::Fence> sharedFence;
+};
+static constexpr size_t kSwapHistorySize = 2;
+
+// Old swapchain and associated present semaphores that need to be scheduled for destruction when
+// appropriate.
+struct SwapchainCleanupData : angle::NonCopyable
+{
+ SwapchainCleanupData();
+ SwapchainCleanupData(SwapchainCleanupData &&other);
+ ~SwapchainCleanupData();
+
+ void destroy(VkDevice device, vk::Recycler<vk::Semaphore> *semaphoreRecycler);
+
+ // The swapchain to be destroyed.
+ VkSwapchainKHR swapchain = VK_NULL_HANDLE;
+ // Any present semaphores that were pending destruction at the time the swapchain was
+ // recreated will be scheduled for destruction at the same time as the swapchain.
+ std::vector<vk::Semaphore> semaphores;
+};
+
+// A circular buffer per image stores the semaphores used for presenting that image. Taking the
+// swap history into account, only the oldest semaphore is guaranteed to be no longer in use by the
+// presentation engine. See doc/PresentSemaphores.md for details.
+//
+// Old swapchains are scheduled to be destroyed at the same time as the first semaphore used to
+// present an image of the new swapchain. This is to ensure that the presentation engine is no
+// longer presenting an image from the old swapchain.
+struct ImagePresentHistory : angle::NonCopyable
+{
+ ImagePresentHistory();
+ ImagePresentHistory(ImagePresentHistory &&other);
+ ~ImagePresentHistory();
+
+ vk::Semaphore semaphore;
+ std::vector<SwapchainCleanupData> oldSwapchains;
+};
+
+// Swapchain images and their associated objects.
+struct SwapchainImage : angle::NonCopyable
+{
+ SwapchainImage();
+ SwapchainImage(SwapchainImage &&other);
+ ~SwapchainImage();
+
+ vk::ImageHelper image;
+ vk::ImageView imageView;
+ vk::Framebuffer framebuffer;
+
+ // A circular array of semaphores used for presenting this image.
+ static constexpr size_t kPresentHistorySize = kSwapHistorySize + 1;
+ std::array<ImagePresentHistory, kPresentHistorySize> presentHistory;
+ size_t currentPresentHistoryIndex = 0;
+};
+} // namespace impl
+
class WindowSurfaceVk : public SurfaceVk
{
public:
@@ -163,6 +235,7 @@
angle::Result checkForOutOfDateSwapchain(ContextVk *contextVk,
uint32_t swapHistoryIndex,
bool presentOutOfDate);
+ angle::Result resizeSwapchainImages(vk::Context *context, uint32_t imageCount);
void releaseSwapchainImages(ContextVk *contextVk);
void destroySwapChainImages(DisplayVk *displayVk);
VkResult nextSwapchainImage(vk::Context *context);
@@ -173,6 +246,8 @@
angle::Result swapImpl(const gl::Context *context, EGLint *rects, EGLint n_rects);
+ angle::Result newPresentSemaphore(vk::Context *context, vk::Semaphore *semaphoreOut);
+
bool isMultiSampled() const;
VkSurfaceCapabilitiesKHR mSurfaceCaps;
@@ -186,46 +261,25 @@
VkSurfaceTransformFlagBitsKHR mPreTransform;
VkCompositeAlphaFlagBitsKHR mCompositeAlpha;
+ // A circular buffer that stores the submission fence of the context on every swap. The CPU is
+ // throttled by waiting for the 2nd previous serial to finish.
+ std::array<impl::SwapHistory, impl::kSwapHistorySize> mSwapHistory;
+ size_t mCurrentSwapHistoryIndex;
+
+ // The previous swapchain which needs to be scheduled for destruction when appropriate. This
+ // will be done when the first image of the current swapchain is presented. If there were
+ // older swapchains pending destruction when the swapchain is recreated, they will accumulate
+ // and be destroyed with the previous swapchain.
+ //
+ // Note that if the user resizes the window such that the swapchain is recreated every frame,
+ // this array can go grow indefinitely.
+ std::vector<impl::SwapchainCleanupData> mOldSwapchains;
+
+ std::vector<impl::SwapchainImage> mSwapchainImages;
+ vk::Semaphore mAcquireImageSemaphore;
uint32_t mCurrentSwapchainImageIndex;
- struct SwapchainImage : angle::NonCopyable
- {
- SwapchainImage();
- SwapchainImage(SwapchainImage &&other);
- ~SwapchainImage();
-
- vk::ImageHelper image;
- vk::ImageView imageView;
- vk::Framebuffer framebuffer;
- };
-
- std::vector<SwapchainImage> mSwapchainImages;
- vk::Semaphore mAcquireImageSemaphore;
-
- // A circular buffer that stores the serial of the renderer on every swap. The CPU is
- // throttled by waiting for the 2nd previous serial to finish. Old swapchains are scheduled to
- // be destroyed at the same time.
- struct SwapHistory : angle::NonCopyable
- {
- SwapHistory();
- SwapHistory(SwapHistory &&other) = delete;
- SwapHistory &operator=(SwapHistory &&other) = delete;
- ~SwapHistory();
-
- void destroy(RendererVk *renderer);
-
- angle::Result waitFence(ContextVk *contextVk);
-
- // Fence associated with the last submitted work to render to this swapchain image.
- vk::Shared<vk::Fence> sharedFence;
-
- vk::Semaphore presentImageSemaphore;
-
- VkSwapchainKHR swapchain = VK_NULL_HANDLE;
- };
- static constexpr size_t kSwapHistorySize = 2;
- std::array<SwapHistory, kSwapHistorySize> mSwapHistory;
- size_t mCurrentSwapHistoryIndex;
+ vk::Recycler<vk::Semaphore> mPresentSemaphoreRecycler;
// Depth/stencil image. Possibly multisampled.
vk::ImageHelper mDepthStencilImage;
diff --git a/src/libANGLE/renderer/vulkan/doc/PresentSemaphores.md b/src/libANGLE/renderer/vulkan/doc/PresentSemaphores.md
new file mode 100644
index 0000000..5adc2f7
--- /dev/null
+++ b/src/libANGLE/renderer/vulkan/doc/PresentSemaphores.md
@@ -0,0 +1,151 @@
+# Queue Present Wait Semaphore Management
+
+The following shorthand notations are used throughout this document:
+
+- PE: Presentation Engine
+- ANI: vkAcquireNextImageKHR
+- QS: vkQueueSubmit
+- QP: vkQueuePresentKHR
+- W: Wait
+- S: Signal
+- R: Render
+- P: Present
+- SN: Semaphore N
+- IN: Swapchain image N
+- FN: Fence N
+
+---
+
+## Introduction
+
+Vulkan requires the application (ANGLE in this case) to acquire swapchain images and queue them for
+presentation, synchronizing GPU submissions with semaphores. A single frame looks like the
+following:
+
+ CPU: ANI ... QS ... QP
+ S:S1 W:S1 W:S2
+ S:S2
+ GPU: <------------ R ----------->
+ PE: <-------- P ------>
+
+That is, the GPU starts rendering after submission, and the presentation is done when rendering is
+finished. With multiple frames, the pipeline looks different based on present mode. Let's focus on
+FIFO (the arguments in this document translate to all modes) with 3 images:
+
+ CPU: QS QP QS QP QS QP QS QP
+ I1 I1 I2 I2 I3 I3 I1 I1
+ GPU: <---- R I1 ----><---- R I2 ----><---- R I3 ----><---- R I1 ---->
+ PE: <----- P I1 -----><----- P I2 -----><----- P I3 -----><----- P I1 ----->
+
+First, an issue is evident here. The CPU is submitting jobs and queuing images for presentation
+faster than the GPU can render them or the PE can view them. This causes the length of the PE queue
+to grow indefinitely, resulting in larger and larger input lag.
+
+To address this issue, ANGLE paces the CPU such that the length of the PE queue is kept at a maximum
+of 1 image (i.e. one image is being presented, and another one is in queue):
+
+ CPU: QS QS W:F1 QS W:F2 QS
+ I1 I2 I3 I1
+ S:F1 S:F2 S:F3 S:F4
+ GPU: <---- R I1 ----><---- R I2 ----><---- R I3 ----><---- R I1 ---->
+
+> Note: While this works in heavy applications (as the rendering time is almost as long as the frame
+> (i.e. present time), in which case pacing the submissions similarly paces the presentation), it's
+> not technically keeping the PE queue length 1, but rather below n+2 where n is the number of
+> swapchain images.
+>
+> To understand why, imagine a FIFO swapchain with 1000 images and submissions that are
+> infinitesimally short. In this case, the CPU pacing is effectively a no-op (as the GPU instantly
+> finishes jobs) for the first 1002 submissions. The 1003rd submission waits for F1001 (which uses
+> I1). However, the 1001st submission will not start until the PE is finished presenting I1 (at the
+> next V-Sync). The CPU then waits for V-Sync before the 1003rd submission. The CPU waits for one
+> V-Sync for every subsequent submission, keeping the length of the queue 1002.
+> [`VK_GOOGLE_display_timing`][DisplayTimingGOOGLE] is likely a solution to this problem.
+
+Associated with each QP operation is a semaphore signaled by the preceding QS and waited on by the
+PE before the image can be presented. Currently, there's no feedback from Vulkan (See [internal
+Khronos issue][VulkanIssue1060]) regarding _when_ the PE has actually finished waiting on the
+semaphore! This means that the application cannot generally know when to destroy the corresponding
+semaphore. However, taking ANGLE's CPU pacing into account, we are able to destroy (or rather
+reuse) semaphores when they are provably unused.
+
+The interested reader may follow the discussion in this abandoned [gerrit CL][CL1757018] for more
+background and ideas.
+
+[DisplayTimingGOOGLE]: https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VK_GOOGLE_display_timing.html
+[VulkanIssue1060]: https://gitlab.khronos.org/vulkan/vulkan/issues/1060
+[CL1757018]: https://chromium-review.googlesource.com/c/angle/angle/+/1757018
+
+## Determining When a QP Semaphore is Waited On
+
+Let's combine the above diagrams with all the details:
+
+ CPU: ANI | QS | QP | ANI | QS | QP | ANI | W:F1 | QS | QP | ANI | W:F2 | QS | QP
+ I1 | I1 | I1 | I2 | I2 | I2 | I3 | | I3 | I3 | I1 | | I1 | I1
+ S:SA1 | W:SA1 | | S:SA2 | W:SA2 | | S:SA3 | | W:SA3 | | S:SA4 | | W:SA4 |
+ | S:SP1 | W:SP1 | | S:SP2 | W:SP2 | | | S:SP3 | W:SP3 | | | S:SP4 | W:SP4
+ | S:F1 | | | S:F2 | | | | S:F3 | | | | S:F4 |
+
+Let's focus only on sequences that return the same image:
+
+ CPU: ANI | W:F(X-2) | QS | QP | ... | ANI | W:F(Y-2) | QS | QP
+ I1 | | I1 | I1 | | I1 | | I1 | I1
+ S:SAX | | W:SAX | | | S:SAY | | W:SAY |
+ | | S:SPX | W:SPX | | | | S:SPY | W:SPY
+ | | S:FX | | | | | S:FY |
+
+Note that X and Y are arbitrarily distanced (including possibly being sequential).
+
+Say we are at frame Y+2. There's therefore a wait on FY. The following holds:
+
+ FY is signaled
+ => SAY is signaled
+ => Previous presentation of I1 (corresponding to SPX) is finished
+ => SPX is waited
+
+At this point, we can destroy SPX. In other words, in frame Y+2, we can destroy SPX (note that 2 is
+the number of frames the CPU pacing code uses). If frame Y+1 is not using I1, this means the
+history of present semaphores for I1 would be `{SPX, SPY}` and we can destroy the oldest semaphore
+in this list. If frame Y+1 is also using I1, we should still destroy SPX in frame Y+2, but the
+history of the present semaphores for I1 would be `{SPX, SPY, SP(Y+1)}`.
+
+In the Vulkan backend, we simplify destruction of semaphores by always keeping a history of 3
+present semaphores for each image (again, 3 is H+1 where H is the swap history size used in CPU
+pacing) and always reuse (instead of destroy) the oldest semaphore of the image that is about to be
+presented.
+
+To summarize, we use the completion of a submission using an image to provably when the *previous*
+presentation of that image was finished.
+
+## Swapchain recreation
+
+When recreating the swapchain, all images are freed and new ones are created, possibly with a
+different count and present mode. For the old swapchain, we can no longer rely on the completion of
+a future submission to know when a previous presentation is done, as there won't be any more
+submissions using images from the old swapchain.
+
+> For example, imagine the old swapchain was created in FIFO mode, and one image is being presented
+> until the next V-Sync. Furthermore, imagine the new swapchain is created in MAILBOX mode. Since
+> the old swapchain's image will remain presented until V-Sync, the new MAILBOX swapchain can
+> perform an arbitrarily large number of (throw-away) presentations. The old swapchain (and its
+> associated present semaphores) cannot be destroyed until V-Sync; a signal that's not captured by
+> Vulkan.
+
+ANGLE resolves this issue by deferring the destruction of the old swapchain and its remaining
+present semaphores to the time when the semaphore corresponding to the first present of the new
+swapchain can be destroyed. In the example in the previous section, if SPX is the present semaphore
+of the first QP done on the new swapchain, at frame Y+2, when we know SPX can be destroyed, we know
+that the first image of the new swapchain has already been presented. This proves that all previous
+presentations of the old swapchain have finished.
+
+> Note: the swapchain can potentially be destroyed much earlier, but with no feedback from the
+> presentation engine, we cannot know that. This delays means that the swapchain could be recreated
+> while there are pending old swapchains to be destroyed. The destruction of both old swapchains
+> must now be deferred to when the first present of the new swapchain has finished. If an
+> application resizes the window constantly and at a high rate, ANGLE would keep accumulating old
+> swapchains and not free them until it stops. While a user will likely not be able to do this (as
+> the rate of window system events is lower than the framerate), this can be programmatically done
+> (as indeed done in EGL dEQP tests). Nvidia for example fails creation of a new swapchain if there
+> are already 20 allocated (on desktop, or less than ten on Quadro). If the backlog of old
+> swapchains get larger than a threshold, ANGLE calls `vkQueueWaitIdle()` and destroys the
+> swapchains.
diff --git a/src/libANGLE/renderer/vulkan/vk_utils.h b/src/libANGLE/renderer/vulkan/vk_utils.h
index c9f8bef..f4f29b2 100644
--- a/src/libANGLE/renderer/vulkan/vk_utils.h
+++ b/src/libANGLE/renderer/vulkan/vk_utils.h
@@ -493,7 +493,7 @@
if (!mRefCounted->isReferenced())
{
ASSERT(mRefCounted->get().valid());
- recycler->recyle(std::move(mRefCounted->get()));
+ recycler->recycle(std::move(mRefCounted->get()));
SafeDelete(mRefCounted);
}
@@ -530,9 +530,9 @@
public:
Recycler() = default;
- void recyle(T &&garbageObject) { mObjectFreeList.emplace_back(std::move(garbageObject)); }
+ void recycle(T &&garbageObject) { mObjectFreeList.emplace_back(std::move(garbageObject)); }
- void fetch(VkDevice device, T *outObject)
+ void fetch(T *outObject)
{
ASSERT(!empty());
*outObject = std::move(mObjectFreeList.back());