| // |
| // Copyright 2016 The ANGLE Project Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // |
| // RendererVk.cpp: |
| // Implements the class methods for RendererVk. |
| // |
| |
| #include "libANGLE/renderer/vulkan/RendererVk.h" |
| |
| // Placing this first seems to solve an intellisense bug. |
| #include "libANGLE/renderer/vulkan/vk_utils.h" |
| |
| #include <EGL/eglext.h> |
| |
| #include "common/debug.h" |
| #include "common/platform.h" |
| #include "common/system_utils.h" |
| #include "libANGLE/Context.h" |
| #include "libANGLE/Display.h" |
| #include "libANGLE/renderer/driver_utils.h" |
| #include "libANGLE/renderer/vulkan/CommandGraph.h" |
| #include "libANGLE/renderer/vulkan/CompilerVk.h" |
| #include "libANGLE/renderer/vulkan/ContextVk.h" |
| #include "libANGLE/renderer/vulkan/DisplayVk.h" |
| #include "libANGLE/renderer/vulkan/FramebufferVk.h" |
| #include "libANGLE/renderer/vulkan/GlslangWrapper.h" |
| #include "libANGLE/renderer/vulkan/ProgramVk.h" |
| #include "libANGLE/renderer/vulkan/VertexArrayVk.h" |
| #include "libANGLE/renderer/vulkan/vk_caps_utils.h" |
| #include "libANGLE/renderer/vulkan/vk_format_utils.h" |
| #include "platform/Platform.h" |
| #include "third_party/trace_event/trace_event.h" |
| |
| // Consts |
| namespace |
| { |
| const uint32_t kMockVendorID = 0xba5eba11; |
| const uint32_t kMockDeviceID = 0xf005ba11; |
| constexpr char kMockDeviceName[] = "Vulkan Mock Device"; |
| constexpr size_t kInFlightCommandsLimit = 100u; |
| constexpr VkFormatFeatureFlags kInvalidFormatFeatureFlags = static_cast<VkFormatFeatureFlags>(-1); |
| } // anonymous namespace |
| |
| namespace rx |
| { |
| |
| namespace |
| { |
| // We currently only allocate 2 uniform buffer per descriptor set, one for the fragment shader and |
| // one for the vertex shader. |
| constexpr size_t kUniformBufferDescriptorsPerDescriptorSet = 2; |
| // Update the pipeline cache every this many swaps (if 60fps, this means every 10 minutes) |
| constexpr uint32_t kPipelineCacheVkUpdatePeriod = 10 * 60 * 60; |
| // Wait a maximum of 10s. If that times out, we declare it a failure. |
| constexpr uint64_t kMaxFenceWaitTimeNs = 10'000'000'000llu; |
| // Per the Vulkan specification, as long as Vulkan 1.1+ is returned by vkEnumerateInstanceVersion, |
| // ANGLE must indicate the highest version of Vulkan functionality that it uses. The Vulkan |
| // validation layers will issue messages for any core functionality that requires a higher version. |
| // This value must be increased whenever ANGLE starts using functionality from a newer core |
| // version of Vulkan. |
| constexpr uint32_t kPreferredVulkanAPIVersion = VK_API_VERSION_1_1; |
| |
| bool ShouldEnableMockICD(const egl::AttributeMap &attribs) |
| { |
| #if !defined(ANGLE_PLATFORM_ANDROID) |
| // Mock ICD does not currently run on Android |
| return (attribs.get(EGL_PLATFORM_ANGLE_DEVICE_TYPE_ANGLE, |
| EGL_PLATFORM_ANGLE_DEVICE_TYPE_HARDWARE_ANGLE) == |
| EGL_PLATFORM_ANGLE_DEVICE_TYPE_NULL_ANGLE); |
| #else |
| return false; |
| #endif // !defined(ANGLE_PLATFORM_ANDROID) |
| } |
| |
| bool StrLess(const char *a, const char *b) |
| { |
| return strcmp(a, b) < 0; |
| } |
| |
| VkResult VerifyExtensionsPresent(const RendererVk::ExtensionNameList &haystack, |
| const RendererVk::ExtensionNameList &needles) |
| { |
| // NOTE: The lists must be sorted. |
| return std::includes(haystack.begin(), haystack.end(), needles.begin(), needles.end(), StrLess) |
| ? VK_SUCCESS |
| : VK_ERROR_EXTENSION_NOT_PRESENT; |
| } |
| |
| bool ExtensionFound(const char *needle, const RendererVk::ExtensionNameList &haystack) |
| { |
| // NOTE: The list must be sorted. |
| return std::binary_search(haystack.begin(), haystack.end(), needle, StrLess); |
| } |
| |
| // Array of Validation error/warning messages that will be ignored, should include bugID |
| constexpr const char *kSkippedMessages[] = { |
| // http://anglebug.com/2866 |
| "UNASSIGNED-CoreValidation-Shader-OutputNotConsumed", |
| // http://anglebug.com/2796 |
| "UNASSIGNED-CoreValidation-Shader-PointSizeMissing", |
| }; |
| |
| // Suppress validation errors that are known |
| // return "true" if given code/prefix/message is known, else return "false" |
| bool IsIgnoredDebugMessage(const char *message) |
| { |
| if (!message) |
| { |
| return false; |
| } |
| for (const char *msg : kSkippedMessages) |
| { |
| if (strstr(message, msg) != nullptr) |
| { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| const char *GetVkObjectTypeName(VkObjectType type) |
| { |
| switch (type) |
| { |
| case VK_OBJECT_TYPE_UNKNOWN: |
| return "Unknown"; |
| case VK_OBJECT_TYPE_INSTANCE: |
| return "Instance"; |
| case VK_OBJECT_TYPE_PHYSICAL_DEVICE: |
| return "Physical Device"; |
| case VK_OBJECT_TYPE_DEVICE: |
| return "Device"; |
| case VK_OBJECT_TYPE_QUEUE: |
| return "Queue"; |
| case VK_OBJECT_TYPE_SEMAPHORE: |
| return "Semaphore"; |
| case VK_OBJECT_TYPE_COMMAND_BUFFER: |
| return "Command Buffer"; |
| case VK_OBJECT_TYPE_FENCE: |
| return "Fence"; |
| case VK_OBJECT_TYPE_DEVICE_MEMORY: |
| return "Device Memory"; |
| case VK_OBJECT_TYPE_BUFFER: |
| return "Buffer"; |
| case VK_OBJECT_TYPE_IMAGE: |
| return "Image"; |
| case VK_OBJECT_TYPE_EVENT: |
| return "Event"; |
| case VK_OBJECT_TYPE_QUERY_POOL: |
| return "Query Pool"; |
| case VK_OBJECT_TYPE_BUFFER_VIEW: |
| return "Buffer View"; |
| case VK_OBJECT_TYPE_IMAGE_VIEW: |
| return "Image View"; |
| case VK_OBJECT_TYPE_SHADER_MODULE: |
| return "Shader Module"; |
| case VK_OBJECT_TYPE_PIPELINE_CACHE: |
| return "Pipeline Cache"; |
| case VK_OBJECT_TYPE_PIPELINE_LAYOUT: |
| return "Pipeline Layout"; |
| case VK_OBJECT_TYPE_RENDER_PASS: |
| return "Render Pass"; |
| case VK_OBJECT_TYPE_PIPELINE: |
| return "Pipeline"; |
| case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT: |
| return "Descriptor Set Layout"; |
| case VK_OBJECT_TYPE_SAMPLER: |
| return "Sampler"; |
| case VK_OBJECT_TYPE_DESCRIPTOR_POOL: |
| return "Descriptor Pool"; |
| case VK_OBJECT_TYPE_DESCRIPTOR_SET: |
| return "Descriptor Set"; |
| case VK_OBJECT_TYPE_FRAMEBUFFER: |
| return "Framebuffer"; |
| case VK_OBJECT_TYPE_COMMAND_POOL: |
| return "Command Pool"; |
| case VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION: |
| return "Sampler YCbCr Conversion"; |
| case VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE: |
| return "Descriptor Update Template"; |
| case VK_OBJECT_TYPE_SURFACE_KHR: |
| return "Surface"; |
| case VK_OBJECT_TYPE_SWAPCHAIN_KHR: |
| return "Swapchain"; |
| case VK_OBJECT_TYPE_DISPLAY_KHR: |
| return "Display"; |
| case VK_OBJECT_TYPE_DISPLAY_MODE_KHR: |
| return "Display Mode"; |
| case VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT: |
| return "Debug Report Callback"; |
| case VK_OBJECT_TYPE_OBJECT_TABLE_NVX: |
| return "Object Table"; |
| case VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX: |
| return "Indirect Commands Layout"; |
| case VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT: |
| return "Debug Utils Messenger"; |
| case VK_OBJECT_TYPE_VALIDATION_CACHE_EXT: |
| return "Validation Cache"; |
| case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NVX: |
| return "Acceleration Structure"; |
| default: |
| return "<Unrecognized>"; |
| } |
| } |
| |
| VKAPI_ATTR VkBool32 VKAPI_CALL |
| DebugUtilsMessenger(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, |
| VkDebugUtilsMessageTypeFlagsEXT messageTypes, |
| const VkDebugUtilsMessengerCallbackDataEXT *callbackData, |
| void *userData) |
| { |
| constexpr VkDebugUtilsMessageSeverityFlagsEXT kSeveritiesToLog = |
| VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | |
| VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT; |
| |
| // Check if we even care about this message. |
| if ((messageSeverity & kSeveritiesToLog) == 0) |
| { |
| return VK_FALSE; |
| } |
| |
| // See if it's an issue we are aware of and don't want to be spammed about. |
| if (IsIgnoredDebugMessage(callbackData->pMessageIdName)) |
| { |
| return VK_FALSE; |
| } |
| |
| std::ostringstream log; |
| if (callbackData->pMessageIdName) |
| { |
| log << "[ " << callbackData->pMessageIdName << " ] "; |
| } |
| log << callbackData->pMessage << std::endl; |
| |
| // Aesthetic value based on length of the function name, line number, etc. |
| constexpr size_t kStartIndent = 28; |
| |
| // Output the debug marker hierarchy under which this error has occured. |
| size_t indent = kStartIndent; |
| if (callbackData->queueLabelCount > 0) |
| { |
| log << std::string(indent++, ' ') << "<Queue Label Hierarchy:>" << std::endl; |
| for (uint32_t i = 0; i < callbackData->queueLabelCount; ++i) |
| { |
| log << std::string(indent++, ' ') << callbackData->pQueueLabels[i].pLabelName |
| << std::endl; |
| } |
| } |
| if (callbackData->cmdBufLabelCount > 0) |
| { |
| log << std::string(indent++, ' ') << "<Command Buffer Label Hierarchy:>" << std::endl; |
| for (uint32_t i = 0; i < callbackData->cmdBufLabelCount; ++i) |
| { |
| log << std::string(indent++, ' ') << callbackData->pCmdBufLabels[i].pLabelName |
| << std::endl; |
| } |
| } |
| // Output the objects involved in this error message. |
| if (callbackData->objectCount > 0) |
| { |
| for (uint32_t i = 0; i < callbackData->objectCount; ++i) |
| { |
| const char *objectName = callbackData->pObjects[i].pObjectName; |
| const char *objectType = GetVkObjectTypeName(callbackData->pObjects[i].objectType); |
| uint64_t objectHandle = callbackData->pObjects[i].objectHandle; |
| log << std::string(indent, ' ') << "Object: "; |
| if (objectHandle == 0) |
| { |
| log << "VK_NULL_HANDLE"; |
| } |
| else |
| { |
| log << "0x" << std::hex << objectHandle << std::dec; |
| } |
| log << " (type = " << objectType << "(" << callbackData->pObjects[i].objectType << "))"; |
| if (objectName) |
| { |
| log << " [" << objectName << "]"; |
| } |
| log << std::endl; |
| } |
| } |
| |
| bool isError = (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) != 0; |
| std::string msg = log.str(); |
| |
| if (isError) |
| { |
| ERR() << msg; |
| } |
| else |
| { |
| WARN() << msg; |
| } |
| |
| return VK_FALSE; |
| } |
| |
| VKAPI_ATTR VkBool32 VKAPI_CALL DebugReportCallback(VkDebugReportFlagsEXT flags, |
| VkDebugReportObjectTypeEXT objectType, |
| uint64_t object, |
| size_t location, |
| int32_t messageCode, |
| const char *layerPrefix, |
| const char *message, |
| void *userData) |
| { |
| if (IsIgnoredDebugMessage(message)) |
| { |
| return VK_FALSE; |
| } |
| if ((flags & VK_DEBUG_REPORT_ERROR_BIT_EXT) != 0) |
| { |
| ERR() << message; |
| #if !defined(NDEBUG) |
| // Abort the call in Debug builds. |
| return VK_TRUE; |
| #endif |
| } |
| else if ((flags & VK_DEBUG_REPORT_WARNING_BIT_EXT) != 0) |
| { |
| WARN() << message; |
| } |
| else |
| { |
| // Uncomment this if you want Vulkan spam. |
| // WARN() << message; |
| } |
| |
| return VK_FALSE; |
| } |
| |
| // If we're loading the validation layers, we could be running from any random directory. |
| // Change to the executable directory so we can find the layers, then change back to the |
| // previous directory to be safe we don't disrupt the application. |
| class ScopedVkLoaderEnvironment : angle::NonCopyable |
| { |
| public: |
| ScopedVkLoaderEnvironment(bool enableValidationLayers, bool enableMockICD) |
| : mEnableValidationLayers(enableValidationLayers), |
| mEnableMockICD(enableMockICD), |
| mChangedCWD(false), |
| mChangedICDPath(false) |
| { |
| // Changing CWD and setting environment variables makes no sense on Android, |
| // since this code is a part of Java application there. |
| // Android Vulkan loader doesn't need this either. |
| #if !defined(ANGLE_PLATFORM_ANDROID) && !defined(ANGLE_PLATFORM_FUCHSIA) |
| if (enableMockICD) |
| { |
| // Override environment variable to use built Mock ICD |
| // ANGLE_VK_ICD_JSON gets set to the built mock ICD in BUILD.gn |
| mPreviousICDPath = angle::GetEnvironmentVar(g_VkICDPathEnv); |
| mChangedICDPath = angle::SetEnvironmentVar(g_VkICDPathEnv, ANGLE_VK_ICD_JSON); |
| if (!mChangedICDPath) |
| { |
| ERR() << "Error setting Path for Mock/Null Driver."; |
| mEnableMockICD = false; |
| } |
| } |
| if (mEnableValidationLayers || mEnableMockICD) |
| { |
| const auto &cwd = angle::GetCWD(); |
| if (!cwd.valid()) |
| { |
| ERR() << "Error getting CWD for Vulkan layers init."; |
| mEnableValidationLayers = false; |
| mEnableMockICD = false; |
| } |
| else |
| { |
| mPreviousCWD = cwd.value(); |
| std::string exeDir = angle::GetExecutableDirectory(); |
| mChangedCWD = angle::SetCWD(exeDir.c_str()); |
| if (!mChangedCWD) |
| { |
| ERR() << "Error setting CWD for Vulkan layers init."; |
| mEnableValidationLayers = false; |
| mEnableMockICD = false; |
| } |
| } |
| } |
| |
| // Override environment variable to use the ANGLE layers. |
| if (mEnableValidationLayers) |
| { |
| if (!angle::PrependPathToEnvironmentVar(g_VkLoaderLayersPathEnv, ANGLE_VK_DATA_DIR)) |
| { |
| ERR() << "Error setting environment for Vulkan layers init."; |
| mEnableValidationLayers = false; |
| } |
| } |
| #endif // !defined(ANGLE_PLATFORM_ANDROID) |
| } |
| |
| ~ScopedVkLoaderEnvironment() |
| { |
| if (mChangedCWD) |
| { |
| #if !defined(ANGLE_PLATFORM_ANDROID) |
| ASSERT(mPreviousCWD.valid()); |
| angle::SetCWD(mPreviousCWD.value().c_str()); |
| #endif // !defined(ANGLE_PLATFORM_ANDROID) |
| } |
| if (mChangedICDPath) |
| { |
| if (mPreviousICDPath.value().empty()) |
| { |
| angle::UnsetEnvironmentVar(g_VkICDPathEnv); |
| } |
| else |
| { |
| angle::SetEnvironmentVar(g_VkICDPathEnv, mPreviousICDPath.value().c_str()); |
| } |
| } |
| } |
| |
| bool canEnableValidationLayers() const { return mEnableValidationLayers; } |
| |
| bool canEnableMockICD() const { return mEnableMockICD; } |
| |
| private: |
| bool mEnableValidationLayers; |
| bool mEnableMockICD; |
| bool mChangedCWD; |
| Optional<std::string> mPreviousCWD; |
| bool mChangedICDPath; |
| Optional<std::string> mPreviousICDPath; |
| }; |
| |
| void ChoosePhysicalDevice(const std::vector<VkPhysicalDevice> &physicalDevices, |
| bool preferMockICD, |
| VkPhysicalDevice *physicalDeviceOut, |
| VkPhysicalDeviceProperties *physicalDevicePropertiesOut) |
| { |
| ASSERT(!physicalDevices.empty()); |
| if (preferMockICD) |
| { |
| for (const VkPhysicalDevice &physicalDevice : physicalDevices) |
| { |
| vkGetPhysicalDeviceProperties(physicalDevice, physicalDevicePropertiesOut); |
| if ((kMockVendorID == physicalDevicePropertiesOut->vendorID) && |
| (kMockDeviceID == physicalDevicePropertiesOut->deviceID) && |
| (strcmp(kMockDeviceName, physicalDevicePropertiesOut->deviceName) == 0)) |
| { |
| *physicalDeviceOut = physicalDevice; |
| return; |
| } |
| } |
| WARN() << "Vulkan Mock Driver was requested but Mock Device was not found. Using default " |
| "physicalDevice instead."; |
| } |
| |
| // Fall back to first device. |
| *physicalDeviceOut = physicalDevices[0]; |
| vkGetPhysicalDeviceProperties(*physicalDeviceOut, physicalDevicePropertiesOut); |
| } |
| |
| // Initially dumping the command graphs is disabled. |
| constexpr bool kEnableCommandGraphDiagnostics = false; |
| |
| } // anonymous namespace |
| |
| // CommandBatch implementation. |
| RendererVk::CommandBatch::CommandBatch() = default; |
| |
| RendererVk::CommandBatch::~CommandBatch() = default; |
| |
| RendererVk::CommandBatch::CommandBatch(CommandBatch &&other) |
| : commandPool(std::move(other.commandPool)), fence(std::move(other.fence)), serial(other.serial) |
| {} |
| |
| RendererVk::CommandBatch &RendererVk::CommandBatch::operator=(CommandBatch &&other) |
| { |
| std::swap(commandPool, other.commandPool); |
| std::swap(fence, other.fence); |
| std::swap(serial, other.serial); |
| return *this; |
| } |
| |
| void RendererVk::CommandBatch::destroy(VkDevice device) |
| { |
| commandPool.destroy(device); |
| fence.destroy(device); |
| } |
| |
| // RendererVk implementation. |
| RendererVk::RendererVk() |
| : mDisplay(nullptr), |
| mCapsInitialized(false), |
| mFeaturesInitialized(false), |
| mInstance(VK_NULL_HANDLE), |
| mEnableValidationLayers(false), |
| mEnableMockICD(false), |
| mDebugUtilsMessenger(VK_NULL_HANDLE), |
| mDebugReportCallback(VK_NULL_HANDLE), |
| mPhysicalDevice(VK_NULL_HANDLE), |
| mQueue(VK_NULL_HANDLE), |
| mCurrentQueueFamilyIndex(std::numeric_limits<uint32_t>::max()), |
| mMaxVertexAttribDivisor(1), |
| mDevice(VK_NULL_HANDLE), |
| mLastCompletedQueueSerial(mQueueSerialFactory.generate()), |
| mCurrentQueueSerial(mQueueSerialFactory.generate()), |
| mDeviceLost(false), |
| mPipelineCacheVkUpdateTimeout(kPipelineCacheVkUpdatePeriod), |
| mCommandGraph(kEnableCommandGraphDiagnostics), |
| mGpuEventsEnabled(false), |
| mGpuClockSync{std::numeric_limits<double>::max(), std::numeric_limits<double>::max()}, |
| mGpuEventTimestampOrigin(0) |
| { |
| VkFormatProperties invalid = {0, 0, kInvalidFormatFeatureFlags}; |
| mFormatProperties.fill(invalid); |
| } |
| |
| RendererVk::~RendererVk() {} |
| |
| void RendererVk::onDestroy(vk::Context *context) |
| { |
| if (!mInFlightCommands.empty() || !mGarbage.empty()) |
| { |
| // TODO(jmadill): Not nice to pass nullptr here, but shouldn't be a problem. |
| (void)finish(context); |
| } |
| |
| mUtils.destroy(mDevice); |
| |
| mPipelineLayoutCache.destroy(mDevice); |
| mDescriptorSetLayoutCache.destroy(mDevice); |
| |
| mRenderPassCache.destroy(mDevice); |
| mPipelineCache.destroy(mDevice); |
| mSubmitSemaphorePool.destroy(mDevice); |
| mShaderLibrary.destroy(mDevice); |
| mGpuEventQueryPool.destroy(mDevice); |
| |
| GlslangWrapper::Release(); |
| |
| if (mCommandPool.valid()) |
| { |
| mCommandPool.destroy(mDevice); |
| } |
| |
| if (mDevice) |
| { |
| vkDestroyDevice(mDevice, nullptr); |
| mDevice = VK_NULL_HANDLE; |
| } |
| |
| if (mDebugUtilsMessenger) |
| { |
| ASSERT(mInstance && vkDestroyDebugUtilsMessengerEXT); |
| vkDestroyDebugUtilsMessengerEXT(mInstance, mDebugUtilsMessenger, nullptr); |
| |
| ASSERT(mDebugReportCallback == VK_NULL_HANDLE); |
| } |
| else if (mDebugReportCallback) |
| { |
| ASSERT(mInstance && vkDestroyDebugReportCallbackEXT); |
| vkDestroyDebugReportCallbackEXT(mInstance, mDebugReportCallback, nullptr); |
| } |
| |
| if (mInstance) |
| { |
| vkDestroyInstance(mInstance, nullptr); |
| mInstance = VK_NULL_HANDLE; |
| } |
| |
| mMemoryProperties.destroy(); |
| mPhysicalDevice = VK_NULL_HANDLE; |
| } |
| |
| void RendererVk::notifyDeviceLost() |
| { |
| mDeviceLost = true; |
| |
| mCommandGraph.clear(); |
| nextSerial(); |
| freeAllInFlightResources(); |
| |
| mDisplay->notifyDeviceLost(); |
| } |
| |
| bool RendererVk::isDeviceLost() const |
| { |
| return mDeviceLost; |
| } |
| |
| angle::Result RendererVk::initialize(DisplayVk *displayVk, |
| egl::Display *display, |
| const char *wsiExtension, |
| const char *wsiLayer) |
| { |
| mDisplay = display; |
| const egl::AttributeMap &attribs = mDisplay->getAttributeMap(); |
| ScopedVkLoaderEnvironment scopedEnvironment(ShouldUseDebugLayers(attribs), |
| ShouldEnableMockICD(attribs)); |
| mEnableValidationLayers = scopedEnvironment.canEnableValidationLayers(); |
| mEnableMockICD = scopedEnvironment.canEnableMockICD(); |
| |
| // Gather global layer properties. |
| uint32_t instanceLayerCount = 0; |
| ANGLE_VK_TRY(displayVk, vkEnumerateInstanceLayerProperties(&instanceLayerCount, nullptr)); |
| |
| std::vector<VkLayerProperties> instanceLayerProps(instanceLayerCount); |
| if (instanceLayerCount > 0) |
| { |
| ANGLE_VK_TRY(displayVk, vkEnumerateInstanceLayerProperties(&instanceLayerCount, |
| instanceLayerProps.data())); |
| } |
| |
| VulkanLayerVector enabledInstanceLayerNames; |
| if (mEnableValidationLayers) |
| { |
| bool layersRequested = |
| (attribs.get(EGL_PLATFORM_ANGLE_DEBUG_LAYERS_ENABLED_ANGLE, EGL_DONT_CARE) == EGL_TRUE); |
| mEnableValidationLayers = GetAvailableValidationLayers(instanceLayerProps, layersRequested, |
| &enabledInstanceLayerNames); |
| } |
| |
| if (wsiLayer) |
| { |
| enabledInstanceLayerNames.push_back(wsiLayer); |
| } |
| |
| // Enumerate instance extensions that are provided by the vulkan |
| // implementation and implicit layers. |
| uint32_t instanceExtensionCount = 0; |
| ANGLE_VK_TRY(displayVk, |
| vkEnumerateInstanceExtensionProperties(nullptr, &instanceExtensionCount, nullptr)); |
| |
| std::vector<VkExtensionProperties> instanceExtensionProps(instanceExtensionCount); |
| if (instanceExtensionCount > 0) |
| { |
| ANGLE_VK_TRY(displayVk, |
| vkEnumerateInstanceExtensionProperties(nullptr, &instanceExtensionCount, |
| instanceExtensionProps.data())); |
| } |
| |
| // Enumerate instance extensions that are provided by explicit layers. |
| for (const char *layerName : enabledInstanceLayerNames) |
| { |
| uint32_t previousExtensionCount = instanceExtensionProps.size(); |
| uint32_t instanceLayerExtensionCount = 0; |
| ANGLE_VK_TRY(displayVk, vkEnumerateInstanceExtensionProperties( |
| layerName, &instanceLayerExtensionCount, nullptr)); |
| instanceExtensionProps.resize(previousExtensionCount + instanceLayerExtensionCount); |
| ANGLE_VK_TRY(displayVk, vkEnumerateInstanceExtensionProperties( |
| layerName, &instanceLayerExtensionCount, |
| instanceExtensionProps.data() + previousExtensionCount)); |
| } |
| |
| ExtensionNameList instanceExtensionNames; |
| if (!instanceExtensionProps.empty()) |
| { |
| for (const VkExtensionProperties &i : instanceExtensionProps) |
| { |
| instanceExtensionNames.push_back(i.extensionName); |
| } |
| std::sort(instanceExtensionNames.begin(), instanceExtensionNames.end(), StrLess); |
| } |
| |
| ExtensionNameList enabledInstanceExtensions; |
| enabledInstanceExtensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); |
| enabledInstanceExtensions.push_back(wsiExtension); |
| |
| bool enableDebugUtils = |
| mEnableValidationLayers && |
| ExtensionFound(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, instanceExtensionNames); |
| bool enableDebugReport = |
| mEnableValidationLayers && !enableDebugUtils && |
| ExtensionFound(VK_EXT_DEBUG_REPORT_EXTENSION_NAME, instanceExtensionNames); |
| |
| if (enableDebugUtils) |
| { |
| enabledInstanceExtensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); |
| } |
| else if (enableDebugReport) |
| { |
| enabledInstanceExtensions.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME); |
| } |
| |
| // Verify the required extensions are in the extension names set. Fail if not. |
| std::sort(enabledInstanceExtensions.begin(), enabledInstanceExtensions.end(), StrLess); |
| ANGLE_VK_TRY(displayVk, |
| VerifyExtensionsPresent(instanceExtensionNames, enabledInstanceExtensions)); |
| |
| // Enable VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME if available. |
| if (ExtensionFound(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, |
| instanceExtensionNames)) |
| { |
| enabledInstanceExtensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); |
| } |
| |
| VkApplicationInfo applicationInfo = {}; |
| applicationInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; |
| applicationInfo.pApplicationName = "ANGLE"; |
| applicationInfo.applicationVersion = 1; |
| applicationInfo.pEngineName = "ANGLE"; |
| applicationInfo.engineVersion = 1; |
| |
| auto enumerateInstanceVersion = reinterpret_cast<PFN_vkEnumerateInstanceVersion>( |
| vkGetInstanceProcAddr(mInstance, "vkEnumerateInstanceVersion")); |
| if (!enumerateInstanceVersion) |
| { |
| applicationInfo.apiVersion = VK_API_VERSION_1_0; |
| } |
| else |
| { |
| uint32_t apiVersion = VK_API_VERSION_1_0; |
| ANGLE_VK_TRY(displayVk, enumerateInstanceVersion(&apiVersion)); |
| if ((VK_VERSION_MAJOR(apiVersion) > 1) || (VK_VERSION_MINOR(apiVersion) >= 1)) |
| { |
| // This is the highest version of core Vulkan functionality that ANGLE uses. |
| applicationInfo.apiVersion = kPreferredVulkanAPIVersion; |
| } |
| else |
| { |
| // Since only 1.0 instance-level functionality is available, this must set to 1.0. |
| applicationInfo.apiVersion = VK_API_VERSION_1_0; |
| } |
| } |
| |
| VkInstanceCreateInfo instanceInfo = {}; |
| instanceInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; |
| instanceInfo.flags = 0; |
| instanceInfo.pApplicationInfo = &applicationInfo; |
| |
| // Enable requested layers and extensions. |
| instanceInfo.enabledExtensionCount = static_cast<uint32_t>(enabledInstanceExtensions.size()); |
| instanceInfo.ppEnabledExtensionNames = |
| enabledInstanceExtensions.empty() ? nullptr : enabledInstanceExtensions.data(); |
| instanceInfo.enabledLayerCount = enabledInstanceLayerNames.size(); |
| instanceInfo.ppEnabledLayerNames = enabledInstanceLayerNames.data(); |
| |
| ANGLE_VK_TRY(displayVk, vkCreateInstance(&instanceInfo, nullptr, &mInstance)); |
| |
| if (enableDebugUtils) |
| { |
| // Try to use the newer EXT_debug_utils if it exists. |
| InitDebugUtilsEXTFunctions(mInstance); |
| |
| // Create the messenger callback. |
| VkDebugUtilsMessengerCreateInfoEXT messengerInfo = {}; |
| |
| messengerInfo.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; |
| messengerInfo.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | |
| VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT; |
| messengerInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | |
| VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | |
| VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; |
| messengerInfo.pfnUserCallback = &DebugUtilsMessenger; |
| messengerInfo.pUserData = this; |
| |
| ANGLE_VK_TRY(displayVk, vkCreateDebugUtilsMessengerEXT(mInstance, &messengerInfo, nullptr, |
| &mDebugUtilsMessenger)); |
| } |
| else if (enableDebugReport) |
| { |
| // Fallback to EXT_debug_report. |
| InitDebugReportEXTFunctions(mInstance); |
| |
| VkDebugReportCallbackCreateInfoEXT debugReportInfo = {}; |
| |
| debugReportInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT; |
| debugReportInfo.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT; |
| debugReportInfo.pfnCallback = &DebugReportCallback; |
| debugReportInfo.pUserData = this; |
| |
| ANGLE_VK_TRY(displayVk, vkCreateDebugReportCallbackEXT(mInstance, &debugReportInfo, nullptr, |
| &mDebugReportCallback)); |
| } |
| |
| if (std::find(enabledInstanceExtensions.begin(), enabledInstanceExtensions.end(), |
| VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME) != |
| enabledInstanceExtensions.end()) |
| { |
| InitGetPhysicalDeviceProperties2KHRFunctions(mInstance); |
| ASSERT(vkGetPhysicalDeviceProperties2KHR); |
| } |
| |
| uint32_t physicalDeviceCount = 0; |
| ANGLE_VK_TRY(displayVk, vkEnumeratePhysicalDevices(mInstance, &physicalDeviceCount, nullptr)); |
| ANGLE_VK_CHECK(displayVk, physicalDeviceCount > 0, VK_ERROR_INITIALIZATION_FAILED); |
| |
| // TODO(jmadill): Handle multiple physical devices. For now, use the first device. |
| std::vector<VkPhysicalDevice> physicalDevices(physicalDeviceCount); |
| ANGLE_VK_TRY(displayVk, vkEnumeratePhysicalDevices(mInstance, &physicalDeviceCount, |
| physicalDevices.data())); |
| ChoosePhysicalDevice(physicalDevices, mEnableMockICD, &mPhysicalDevice, |
| &mPhysicalDeviceProperties); |
| |
| vkGetPhysicalDeviceFeatures(mPhysicalDevice, &mPhysicalDeviceFeatures); |
| |
| // Ensure we can find a graphics queue family. |
| uint32_t queueCount = 0; |
| vkGetPhysicalDeviceQueueFamilyProperties(mPhysicalDevice, &queueCount, nullptr); |
| |
| ANGLE_VK_CHECK(displayVk, queueCount > 0, VK_ERROR_INITIALIZATION_FAILED); |
| |
| mQueueFamilyProperties.resize(queueCount); |
| vkGetPhysicalDeviceQueueFamilyProperties(mPhysicalDevice, &queueCount, |
| mQueueFamilyProperties.data()); |
| |
| size_t graphicsQueueFamilyCount = false; |
| uint32_t firstGraphicsQueueFamily = 0; |
| constexpr VkQueueFlags kGraphicsAndCompute = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; |
| for (uint32_t familyIndex = 0; familyIndex < queueCount; ++familyIndex) |
| { |
| const auto &queueInfo = mQueueFamilyProperties[familyIndex]; |
| if ((queueInfo.queueFlags & kGraphicsAndCompute) == kGraphicsAndCompute) |
| { |
| ASSERT(queueInfo.queueCount > 0); |
| graphicsQueueFamilyCount++; |
| if (firstGraphicsQueueFamily == 0) |
| { |
| firstGraphicsQueueFamily = familyIndex; |
| } |
| break; |
| } |
| } |
| |
| ANGLE_VK_CHECK(displayVk, graphicsQueueFamilyCount > 0, VK_ERROR_INITIALIZATION_FAILED); |
| |
| // If only one queue family, go ahead and initialize the device. If there is more than one |
| // queue, we'll have to wait until we see a WindowSurface to know which supports present. |
| if (graphicsQueueFamilyCount == 1) |
| { |
| ANGLE_TRY(initializeDevice(displayVk, firstGraphicsQueueFamily)); |
| } |
| |
| // Store the physical device memory properties so we can find the right memory pools. |
| mMemoryProperties.init(mPhysicalDevice); |
| |
| GlslangWrapper::Initialize(); |
| |
| // Initialize the format table. |
| mFormatTable.initialize(this, &mNativeTextureCaps, &mNativeCaps.compressedTextureFormats); |
| |
| return angle::Result::Continue; |
| } |
| |
| angle::Result RendererVk::initializeDevice(DisplayVk *displayVk, uint32_t queueFamilyIndex) |
| { |
| uint32_t deviceLayerCount = 0; |
| ANGLE_VK_TRY(displayVk, |
| vkEnumerateDeviceLayerProperties(mPhysicalDevice, &deviceLayerCount, nullptr)); |
| |
| std::vector<VkLayerProperties> deviceLayerProps(deviceLayerCount); |
| if (deviceLayerCount > 0) |
| { |
| ANGLE_VK_TRY(displayVk, vkEnumerateDeviceLayerProperties(mPhysicalDevice, &deviceLayerCount, |
| deviceLayerProps.data())); |
| } |
| |
| VulkanLayerVector enabledDeviceLayerNames; |
| if (mEnableValidationLayers) |
| { |
| mEnableValidationLayers = |
| GetAvailableValidationLayers(deviceLayerProps, false, &enabledDeviceLayerNames); |
| } |
| |
| const char *wsiLayer = displayVk->getWSILayer(); |
| if (wsiLayer) |
| { |
| enabledDeviceLayerNames.push_back(wsiLayer); |
| } |
| |
| // Enumerate device extensions that are provided by the vulkan |
| // implementation and implicit layers. |
| uint32_t deviceExtensionCount = 0; |
| ANGLE_VK_TRY(displayVk, vkEnumerateDeviceExtensionProperties(mPhysicalDevice, nullptr, |
| &deviceExtensionCount, nullptr)); |
| |
| std::vector<VkExtensionProperties> deviceExtensionProps(deviceExtensionCount); |
| if (deviceExtensionCount > 0) |
| { |
| ANGLE_VK_TRY(displayVk, vkEnumerateDeviceExtensionProperties(mPhysicalDevice, nullptr, |
| &deviceExtensionCount, |
| deviceExtensionProps.data())); |
| } |
| |
| // Enumerate device extensions that are provided by explicit layers. |
| for (const char *layerName : enabledDeviceLayerNames) |
| { |
| uint32_t previousExtensionCount = deviceExtensionProps.size(); |
| uint32_t deviceLayerExtensionCount = 0; |
| ANGLE_VK_TRY(displayVk, |
| vkEnumerateDeviceExtensionProperties(mPhysicalDevice, layerName, |
| &deviceLayerExtensionCount, nullptr)); |
| deviceExtensionProps.resize(previousExtensionCount + deviceLayerExtensionCount); |
| ANGLE_VK_TRY(displayVk, vkEnumerateDeviceExtensionProperties( |
| mPhysicalDevice, layerName, &deviceLayerExtensionCount, |
| deviceExtensionProps.data() + previousExtensionCount)); |
| } |
| |
| ExtensionNameList deviceExtensionNames; |
| if (!deviceExtensionProps.empty()) |
| { |
| ASSERT(deviceExtensionNames.size() <= deviceExtensionProps.size()); |
| for (const VkExtensionProperties &prop : deviceExtensionProps) |
| { |
| deviceExtensionNames.push_back(prop.extensionName); |
| } |
| std::sort(deviceExtensionNames.begin(), deviceExtensionNames.end(), StrLess); |
| } |
| |
| ExtensionNameList enabledDeviceExtensions; |
| enabledDeviceExtensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); |
| |
| initFeatures(deviceExtensionNames); |
| mFeaturesInitialized = true; |
| |
| // Selectively enable KHR_MAINTENANCE1 to support viewport flipping. |
| if ((getFeatures().flipViewportY) && |
| (mPhysicalDeviceProperties.apiVersion < VK_MAKE_VERSION(1, 1, 0))) |
| { |
| enabledDeviceExtensions.push_back(VK_KHR_MAINTENANCE1_EXTENSION_NAME); |
| } |
| if (getFeatures().supportsIncrementalPresent) |
| { |
| enabledDeviceExtensions.push_back(VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME); |
| } |
| |
| std::sort(enabledDeviceExtensions.begin(), enabledDeviceExtensions.end(), StrLess); |
| ANGLE_VK_TRY(displayVk, VerifyExtensionsPresent(deviceExtensionNames, enabledDeviceExtensions)); |
| |
| // Select additional features to be enabled |
| VkPhysicalDeviceFeatures2KHR enabledFeatures = {}; |
| enabledFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; |
| enabledFeatures.features.inheritedQueries = mPhysicalDeviceFeatures.inheritedQueries; |
| enabledFeatures.features.robustBufferAccess = mPhysicalDeviceFeatures.robustBufferAccess; |
| |
| VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT divisorFeatures = {}; |
| divisorFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; |
| divisorFeatures.vertexAttributeInstanceRateDivisor = true; |
| |
| float zeroPriority = 0.0f; |
| VkDeviceQueueCreateInfo queueCreateInfo = {}; |
| queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; |
| queueCreateInfo.flags = 0; |
| queueCreateInfo.queueFamilyIndex = queueFamilyIndex; |
| queueCreateInfo.queueCount = 1; |
| queueCreateInfo.pQueuePriorities = &zeroPriority; |
| |
| // Initialize the device |
| VkDeviceCreateInfo createInfo = {}; |
| |
| createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; |
| createInfo.flags = 0; |
| createInfo.queueCreateInfoCount = 1; |
| createInfo.pQueueCreateInfos = &queueCreateInfo; |
| createInfo.enabledLayerCount = enabledDeviceLayerNames.size(); |
| createInfo.ppEnabledLayerNames = enabledDeviceLayerNames.data(); |
| |
| if (vkGetPhysicalDeviceProperties2KHR && |
| ExtensionFound(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, deviceExtensionNames)) |
| { |
| enabledDeviceExtensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); |
| enabledFeatures.pNext = &divisorFeatures; |
| |
| VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT divisorProperties = {}; |
| divisorProperties.sType = |
| VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; |
| |
| VkPhysicalDeviceProperties2 deviceProperties = {}; |
| deviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; |
| deviceProperties.pNext = &divisorProperties; |
| |
| vkGetPhysicalDeviceProperties2KHR(mPhysicalDevice, &deviceProperties); |
| mMaxVertexAttribDivisor = divisorProperties.maxVertexAttribDivisor; |
| |
| createInfo.pNext = &enabledFeatures; |
| } |
| else |
| { |
| createInfo.pEnabledFeatures = &enabledFeatures.features; |
| } |
| |
| createInfo.enabledExtensionCount = static_cast<uint32_t>(enabledDeviceExtensions.size()); |
| createInfo.ppEnabledExtensionNames = |
| enabledDeviceExtensions.empty() ? nullptr : enabledDeviceExtensions.data(); |
| |
| ANGLE_VK_TRY(displayVk, vkCreateDevice(mPhysicalDevice, &createInfo, nullptr, &mDevice)); |
| |
| mCurrentQueueFamilyIndex = queueFamilyIndex; |
| |
| vkGetDeviceQueue(mDevice, mCurrentQueueFamilyIndex, 0, &mQueue); |
| |
| // Initialize the command pool now that we know the queue family index. |
| VkCommandPoolCreateInfo commandPoolInfo = {}; |
| commandPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; |
| commandPoolInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; |
| commandPoolInfo.queueFamilyIndex = mCurrentQueueFamilyIndex; |
| |
| ANGLE_VK_TRY(displayVk, mCommandPool.init(mDevice, commandPoolInfo)); |
| |
| // Initialize the vulkan pipeline cache. |
| ANGLE_TRY(initPipelineCache(displayVk)); |
| |
| // Initialize the submission semaphore pool. |
| ANGLE_TRY(mSubmitSemaphorePool.init(displayVk, vk::kDefaultSemaphorePoolSize)); |
| |
| #if ANGLE_ENABLE_VULKAN_GPU_TRACE_EVENTS |
| angle::PlatformMethods *platform = ANGLEPlatformCurrent(); |
| ASSERT(platform); |
| |
| // GPU tracing workaround for anglebug.com/2927. The renderer should not emit gpu events during |
| // platform discovery. |
| const unsigned char *gpuEventsEnabled = |
| platform->getTraceCategoryEnabledFlag(platform, "gpu.angle.gpu"); |
| mGpuEventsEnabled = gpuEventsEnabled && *gpuEventsEnabled; |
| #endif |
| |
| if (mGpuEventsEnabled) |
| { |
| // Calculate the difference between CPU and GPU clocks for GPU event reporting. |
| ANGLE_TRY(mGpuEventQueryPool.init(displayVk, VK_QUERY_TYPE_TIMESTAMP, |
| vk::kDefaultTimestampQueryPoolSize)); |
| ANGLE_TRY(synchronizeCpuGpuTime(displayVk)); |
| } |
| |
| return angle::Result::Continue; |
| } |
| |
| angle::Result RendererVk::selectPresentQueueForSurface(DisplayVk *displayVk, |
| VkSurfaceKHR surface, |
| uint32_t *presentQueueOut) |
| { |
| // We've already initialized a device, and can't re-create it unless it's never been used. |
| // TODO(jmadill): Handle the re-creation case if necessary. |
| if (mDevice != VK_NULL_HANDLE) |
| { |
| ASSERT(mCurrentQueueFamilyIndex != std::numeric_limits<uint32_t>::max()); |
| |
| // Check if the current device supports present on this surface. |
| VkBool32 supportsPresent = VK_FALSE; |
| ANGLE_VK_TRY(displayVk, |
| vkGetPhysicalDeviceSurfaceSupportKHR(mPhysicalDevice, mCurrentQueueFamilyIndex, |
| surface, &supportsPresent)); |
| |
| if (supportsPresent == VK_TRUE) |
| { |
| *presentQueueOut = mCurrentQueueFamilyIndex; |
| return angle::Result::Continue; |
| } |
| } |
| |
| // Find a graphics and present queue. |
| Optional<uint32_t> newPresentQueue; |
| uint32_t queueCount = static_cast<uint32_t>(mQueueFamilyProperties.size()); |
| constexpr VkQueueFlags kGraphicsAndCompute = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; |
| for (uint32_t queueIndex = 0; queueIndex < queueCount; ++queueIndex) |
| { |
| const auto &queueInfo = mQueueFamilyProperties[queueIndex]; |
| if ((queueInfo.queueFlags & kGraphicsAndCompute) == kGraphicsAndCompute) |
| { |
| VkBool32 supportsPresent = VK_FALSE; |
| ANGLE_VK_TRY(displayVk, vkGetPhysicalDeviceSurfaceSupportKHR( |
| mPhysicalDevice, queueIndex, surface, &supportsPresent)); |
| |
| if (supportsPresent == VK_TRUE) |
| { |
| newPresentQueue = queueIndex; |
| break; |
| } |
| } |
| } |
| |
| ANGLE_VK_CHECK(displayVk, newPresentQueue.valid(), VK_ERROR_INITIALIZATION_FAILED); |
| ANGLE_TRY(initializeDevice(displayVk, newPresentQueue.value())); |
| |
| *presentQueueOut = newPresentQueue.value(); |
| return angle::Result::Continue; |
| } |
| |
| std::string RendererVk::getVendorString() const |
| { |
| return GetVendorString(mPhysicalDeviceProperties.vendorID); |
| } |
| |
| std::string RendererVk::getRendererDescription() const |
| { |
| std::stringstream strstr; |
| |
| uint32_t apiVersion = mPhysicalDeviceProperties.apiVersion; |
| |
| strstr << "Vulkan "; |
| strstr << VK_VERSION_MAJOR(apiVersion) << "."; |
| strstr << VK_VERSION_MINOR(apiVersion) << "."; |
| strstr << VK_VERSION_PATCH(apiVersion); |
| |
| strstr << "("; |
| |
| // In the case of NVIDIA, deviceName does not necessarily contain "NVIDIA". Add "NVIDIA" so that |
| // Vulkan end2end tests can be selectively disabled on NVIDIA. TODO(jmadill): should not be |
| // needed after http://anglebug.com/1874 is fixed and end2end_tests use more sophisticated |
| // driver detection. |
| if (mPhysicalDeviceProperties.vendorID == VENDOR_ID_NVIDIA) |
| { |
| strstr << GetVendorString(mPhysicalDeviceProperties.vendorID) << " "; |
| } |
| |
| strstr << mPhysicalDeviceProperties.deviceName; |
| strstr << " (" << gl::FmtHex(mPhysicalDeviceProperties.deviceID) << ")"; |
| |
| strstr << ")"; |
| |
| return strstr.str(); |
| } |
| |
| gl::Version RendererVk::getMaxSupportedESVersion() const |
| { |
| // Current highest supported version |
| // TODO: Update this to support ES 3.0. http://crbug.com/angleproject/2950 |
| gl::Version maxVersion = gl::Version(2, 0); |
| |
| // Vulkan inherited queries are required to support any GL query type |
| if (!mPhysicalDeviceFeatures.inheritedQueries) |
| { |
| maxVersion = std::max(maxVersion, gl::Version(2, 0)); |
| } |
| |
| return maxVersion; |
| } |
| |
| void RendererVk::initFeatures(const ExtensionNameList &deviceExtensionNames) |
| { |
| // Use OpenGL line rasterization rules by default. |
| // TODO(jmadill): Fix Android support. http://anglebug.com/2830 |
| #if defined(ANGLE_PLATFORM_ANDROID) |
| mFeatures.basicGLLineRasterization = false; |
| #else |
| mFeatures.basicGLLineRasterization = true; |
| #endif // defined(ANGLE_PLATFORM_ANDROID) |
| |
| if ((mPhysicalDeviceProperties.apiVersion >= VK_MAKE_VERSION(1, 1, 0)) || |
| ExtensionFound(VK_KHR_MAINTENANCE1_EXTENSION_NAME, deviceExtensionNames)) |
| { |
| // TODO(lucferron): Currently disabled on Intel only since many tests are failing and need |
| // investigation. http://anglebug.com/2728 |
| mFeatures.flipViewportY = !IsIntel(mPhysicalDeviceProperties.vendorID); |
| } |
| |
| #ifdef ANGLE_PLATFORM_WINDOWS |
| // http://anglebug.com/2838 |
| mFeatures.extraCopyBufferRegion = IsIntel(mPhysicalDeviceProperties.vendorID); |
| |
| // http://anglebug.com/3055 |
| mFeatures.forceCpuPathForCubeMapCopy = IsIntel(mPhysicalDeviceProperties.vendorID); |
| #endif |
| |
| angle::PlatformMethods *platform = ANGLEPlatformCurrent(); |
| platform->overrideFeaturesVk(platform, &mFeatures); |
| |
| // Work around incorrect NVIDIA point size range clamping. |
| // TODO(jmadill): Narrow driver range once fixed. http://anglebug.com/2970 |
| if (IsNvidia(mPhysicalDeviceProperties.vendorID)) |
| { |
| mFeatures.clampPointSize = true; |
| } |
| |
| // We also need to clamp point size on several Android drivers. |
| // TODO(jmadill): Remove suppression once fixed. http://anglebug.com/2599 |
| if (IsAndroid()) |
| { |
| mFeatures.clampPointSize = true; |
| } |
| |
| #if defined(ANGLE_PLATFORM_ANDROID) |
| // Work around ineffective compute-graphics barriers on Nexus 5X. |
| // TODO(syoussefi): Figure out which other vendors and driver versions are affected. |
| // http://anglebug.com/3019 |
| mFeatures.flushAfterVertexConversion = |
| IsNexus5X(mPhysicalDeviceProperties.vendorID, mPhysicalDeviceProperties.deviceID); |
| #endif |
| |
| if (ExtensionFound(VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME, deviceExtensionNames)) |
| { |
| mFeatures.supportsIncrementalPresent = true; |
| } |
| } |
| |
| void RendererVk::initPipelineCacheVkKey() |
| { |
| std::ostringstream hashStream("ANGLE Pipeline Cache: ", std::ios_base::ate); |
| // Add the pipeline cache UUID to make sure the blob cache always gives a compatible pipeline |
| // cache. It's not particularly necessary to write it as a hex number as done here, so long as |
| // there is no '\0' in the result. |
| for (const uint32_t c : mPhysicalDeviceProperties.pipelineCacheUUID) |
| { |
| hashStream << std::hex << c; |
| } |
| // Add the vendor and device id too for good measure. |
| hashStream << std::hex << mPhysicalDeviceProperties.vendorID; |
| hashStream << std::hex << mPhysicalDeviceProperties.deviceID; |
| |
| const std::string &hashString = hashStream.str(); |
| angle::base::SHA1HashBytes(reinterpret_cast<const unsigned char *>(hashString.c_str()), |
| hashString.length(), mPipelineCacheVkBlobKey.data()); |
| } |
| |
| angle::Result RendererVk::initPipelineCache(DisplayVk *display) |
| { |
| initPipelineCacheVkKey(); |
| |
| egl::BlobCache::Value initialData; |
| bool success = display->getBlobCache()->get(display->getScratchBuffer(), |
| mPipelineCacheVkBlobKey, &initialData); |
| |
| VkPipelineCacheCreateInfo pipelineCacheCreateInfo = {}; |
| |
| pipelineCacheCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; |
| pipelineCacheCreateInfo.flags = 0; |
| pipelineCacheCreateInfo.initialDataSize = success ? initialData.size() : 0; |
| pipelineCacheCreateInfo.pInitialData = success ? initialData.data() : nullptr; |
| |
| ANGLE_VK_TRY(display, mPipelineCache.init(mDevice, pipelineCacheCreateInfo)); |
| return angle::Result::Continue; |
| } |
| |
| void RendererVk::getSubmitWaitSemaphores( |
| vk::Context *context, |
| angle::FixedVector<VkSemaphore, kMaxWaitSemaphores> *waitSemaphores, |
| angle::FixedVector<VkPipelineStageFlags, kMaxWaitSemaphores> *waitStageMasks) |
| { |
| if (mSubmitLastSignaledSemaphore.getSemaphore()) |
| { |
| waitSemaphores->push_back(mSubmitLastSignaledSemaphore.getSemaphore()->getHandle()); |
| waitStageMasks->push_back(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); |
| |
| // Return the semaphore to the pool (which will remain valid and unused until the |
| // queue it's about to be waited on has finished execution). |
| mSubmitSemaphorePool.freeSemaphore(context, &mSubmitLastSignaledSemaphore); |
| } |
| |
| for (vk::SemaphoreHelper &semaphore : mSubmitWaitSemaphores) |
| { |
| waitSemaphores->push_back(semaphore.getSemaphore()->getHandle()); |
| waitStageMasks->push_back(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); |
| |
| mSubmitSemaphorePool.freeSemaphore(context, &semaphore); |
| } |
| mSubmitWaitSemaphores.clear(); |
| } |
| |
| const gl::Caps &RendererVk::getNativeCaps() const |
| { |
| ensureCapsInitialized(); |
| return mNativeCaps; |
| } |
| |
| const gl::TextureCapsMap &RendererVk::getNativeTextureCaps() const |
| { |
| ensureCapsInitialized(); |
| return mNativeTextureCaps; |
| } |
| |
| const gl::Extensions &RendererVk::getNativeExtensions() const |
| { |
| ensureCapsInitialized(); |
| return mNativeExtensions; |
| } |
| |
| const gl::Limitations &RendererVk::getNativeLimitations() const |
| { |
| ensureCapsInitialized(); |
| return mNativeLimitations; |
| } |
| |
| uint32_t RendererVk::getMaxActiveTextures() |
| { |
| // TODO(lucferron): expose this limitation to GL in Context Caps |
| return std::min<uint32_t>(mPhysicalDeviceProperties.limits.maxPerStageDescriptorSamplers, |
| gl::IMPLEMENTATION_MAX_ACTIVE_TEXTURES); |
| } |
| |
| const vk::CommandPool &RendererVk::getCommandPool() const |
| { |
| return mCommandPool; |
| } |
| |
| angle::Result RendererVk::finish(vk::Context *context) |
| { |
| if (!mCommandGraph.empty()) |
| { |
| TRACE_EVENT0("gpu.angle", "RendererVk::finish"); |
| |
| vk::Scoped<vk::CommandBuffer> commandBatch(mDevice); |
| ANGLE_TRY(flushCommandGraph(context, &commandBatch.get())); |
| |
| angle::FixedVector<VkSemaphore, kMaxWaitSemaphores> waitSemaphores; |
| angle::FixedVector<VkPipelineStageFlags, kMaxWaitSemaphores> waitStageMasks; |
| getSubmitWaitSemaphores(context, &waitSemaphores, &waitStageMasks); |
| |
| VkSubmitInfo submitInfo = {}; |
| submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; |
| submitInfo.waitSemaphoreCount = static_cast<uint32_t>(waitSemaphores.size()); |
| submitInfo.pWaitSemaphores = waitSemaphores.data(); |
| submitInfo.pWaitDstStageMask = waitStageMasks.data(); |
| submitInfo.commandBufferCount = 1; |
| submitInfo.pCommandBuffers = commandBatch.get().ptr(); |
| submitInfo.signalSemaphoreCount = 0; |
| submitInfo.pSignalSemaphores = nullptr; |
| |
| ANGLE_TRY(submitFrame(context, submitInfo, std::move(commandBatch.get()))); |
| } |
| |
| ASSERT(mQueue != VK_NULL_HANDLE); |
| ANGLE_VK_TRY(context, vkQueueWaitIdle(mQueue)); |
| freeAllInFlightResources(); |
| |
| if (mGpuEventsEnabled) |
| { |
| // This loop should in practice execute once since the queue is already idle. |
| while (mInFlightGpuEventQueries.size() > 0) |
| { |
| ANGLE_TRY(checkCompletedGpuEvents(context)); |
| } |
| // Recalculate the CPU/GPU time difference to account for clock drifting. Avoid unnecessary |
| // synchronization if there is no event to be adjusted (happens when finish() gets called |
| // multiple times towards the end of the application). |
| if (mGpuEvents.size() > 0) |
| { |
| ANGLE_TRY(synchronizeCpuGpuTime(context)); |
| } |
| } |
| |
| return angle::Result::Continue; |
| } |
| |
| void RendererVk::freeAllInFlightResources() |
| { |
| for (CommandBatch &batch : mInFlightCommands) |
| { |
| // On device loss we need to wait for fence to be signaled before destroying it |
| if (mDeviceLost) |
| { |
| VkResult status = batch.fence.wait(mDevice, kMaxFenceWaitTimeNs); |
| // If wait times out, it is probably not possible to recover from lost device |
| ASSERT(status == VK_SUCCESS || status == VK_ERROR_DEVICE_LOST); |
| } |
| batch.fence.destroy(mDevice); |
| batch.commandPool.destroy(mDevice); |
| } |
| mInFlightCommands.clear(); |
| |
| for (auto &garbage : mGarbage) |
| { |
| garbage.destroy(mDevice); |
| } |
| mGarbage.clear(); |
| |
| mLastCompletedQueueSerial = mLastSubmittedQueueSerial; |
| } |
| |
| angle::Result RendererVk::checkCompletedCommands(vk::Context *context) |
| { |
| int finishedCount = 0; |
| |
| for (CommandBatch &batch : mInFlightCommands) |
| { |
| VkResult result = batch.fence.getStatus(mDevice); |
| if (result == VK_NOT_READY) |
| { |
| break; |
| } |
| ANGLE_VK_TRY(context, result); |
| |
| ASSERT(batch.serial > mLastCompletedQueueSerial); |
| mLastCompletedQueueSerial = batch.serial; |
| |
| batch.fence.destroy(mDevice); |
| TRACE_EVENT0("gpu.angle", "commandPool.destroy"); |
| batch.commandPool.destroy(mDevice); |
| ++finishedCount; |
| } |
| |
| mInFlightCommands.erase(mInFlightCommands.begin(), mInFlightCommands.begin() + finishedCount); |
| |
| size_t freeIndex = 0; |
| for (; freeIndex < mGarbage.size(); ++freeIndex) |
| { |
| if (!mGarbage[freeIndex].destroyIfComplete(mDevice, mLastCompletedQueueSerial)) |
| break; |
| } |
| |
| // Remove the entries from the garbage list - they should be ready to go. |
| if (freeIndex > 0) |
| { |
| mGarbage.erase(mGarbage.begin(), mGarbage.begin() + freeIndex); |
| } |
| |
| return angle::Result::Continue; |
| } |
| |
| angle::Result RendererVk::submitFrame(vk::Context *context, |
| const VkSubmitInfo &submitInfo, |
| vk::CommandBuffer &&commandBuffer) |
| { |
| TRACE_EVENT0("gpu.angle", "RendererVk::submitFrame"); |
| VkFenceCreateInfo fenceInfo = {}; |
| fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; |
| fenceInfo.flags = 0; |
| |
| vk::Scoped<CommandBatch> scopedBatch(mDevice); |
| CommandBatch &batch = scopedBatch.get(); |
| ANGLE_VK_TRY(context, batch.fence.init(mDevice, fenceInfo)); |
| |
| ANGLE_VK_TRY(context, vkQueueSubmit(mQueue, 1, &submitInfo, batch.fence.getHandle())); |
| |
| // Store this command buffer in the in-flight list. |
| batch.commandPool = std::move(mCommandPool); |
| batch.serial = mCurrentQueueSerial; |
| |
| mInFlightCommands.emplace_back(scopedBatch.release()); |
| |
| // CPU should be throttled to avoid mInFlightCommands from growing too fast. That is done on |
| // swap() though, and there could be multiple submissions in between (through glFlush() calls), |
| // so the limit is larger than the expected number of images. The |
| // InterleavedAttributeDataBenchmark perf test for example issues a large number of flushes. |
| ASSERT(mInFlightCommands.size() <= kInFlightCommandsLimit); |
| |
| nextSerial(); |
| |
| ANGLE_TRY(checkCompletedCommands(context)); |
| |
| if (mGpuEventsEnabled) |
| { |
| ANGLE_TRY(checkCompletedGpuEvents(context)); |
| } |
| |
| // Simply null out the command buffer here - it was allocated using the command pool. |
| commandBuffer.releaseHandle(); |
| |
| // Reallocate the command pool for next frame. |
| // TODO(jmadill): Consider reusing command pools. |
| VkCommandPoolCreateInfo poolInfo = {}; |
| poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; |
| poolInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; |
| poolInfo.queueFamilyIndex = mCurrentQueueFamilyIndex; |
| |
| ANGLE_VK_TRY(context, mCommandPool.init(mDevice, poolInfo)); |
| return angle::Result::Continue; |
| } |
| |
| void RendererVk::nextSerial() |
| { |
| // Increment the queue serial. If this fails, we should restart ANGLE. |
| mLastSubmittedQueueSerial = mCurrentQueueSerial; |
| mCurrentQueueSerial = mQueueSerialFactory.generate(); |
| |
| // Notify the Contexts that they should be starting new command buffers. |
| // We use one command pool per serial/submit associated with this VkQueue. We can also |
| // have multiple Contexts sharing one VkQueue. In ContextVk::setupDraw we don't explicitly |
| // check for a new serial when starting a new command buffer. We just check that the current |
| // recording command buffer is valid. Thus we need to explicitly notify every other Context |
| // using this VkQueue that they their current command buffer is no longer valid. |
| for (gl::Context *context : mDisplay->getContextSet()) |
| { |
| ContextVk *contextVk = vk::GetImpl(context); |
| contextVk->onCommandBufferFinished(); |
| } |
| } |
| |
| bool RendererVk::isSerialInUse(Serial serial) const |
| { |
| return serial > mLastCompletedQueueSerial; |
| } |
| |
| angle::Result RendererVk::finishToSerial(vk::Context *context, Serial serial) |
| { |
| bool timedOut = false; |
| angle::Result result = finishToSerialOrTimeout(context, serial, kMaxFenceWaitTimeNs, &timedOut); |
| |
| // Don't tolerate timeout. If such a large wait time results in timeout, something's wrong. |
| if (timedOut) |
| { |
| result = angle::Result::Stop; |
| } |
| return result; |
| } |
| |
| angle::Result RendererVk::finishToSerialOrTimeout(vk::Context *context, |
| Serial serial, |
| uint64_t timeout, |
| bool *outTimedOut) |
| { |
| *outTimedOut = false; |
| |
| if (!isSerialInUse(serial) || mInFlightCommands.empty()) |
| { |
| return angle::Result::Continue; |
| } |
| |
| // Find the first batch with serial equal to or bigger than given serial (note that |
| // the batch serials are unique, otherwise upper-bound would have been necessary). |
| size_t batchIndex = mInFlightCommands.size() - 1; |
| for (size_t i = 0; i < mInFlightCommands.size(); ++i) |
| { |
| if (mInFlightCommands[i].serial >= serial) |
| { |
| batchIndex = i; |
| break; |
| } |
| } |
| const CommandBatch &batch = mInFlightCommands[batchIndex]; |
| |
| // Wait for it finish |
| VkResult status = batch.fence.wait(mDevice, kMaxFenceWaitTimeNs); |
| |
| // If timed out, report it as such. |
| if (status == VK_TIMEOUT) |
| { |
| *outTimedOut = true; |
| return angle::Result::Continue; |
| } |
| |
| ANGLE_VK_TRY(context, status); |
| |
| // Clean up finished batches. |
| return checkCompletedCommands(context); |
| } |
| |
| angle::Result RendererVk::getCompatibleRenderPass(vk::Context *context, |
| const vk::RenderPassDesc &desc, |
| vk::RenderPass **renderPassOut) |
| { |
| return mRenderPassCache.getCompatibleRenderPass(context, mCurrentQueueSerial, desc, |
| renderPassOut); |
| } |
| |
| angle::Result RendererVk::getRenderPassWithOps(vk::Context *context, |
| const vk::RenderPassDesc &desc, |
| const vk::AttachmentOpsArray &ops, |
| vk::RenderPass **renderPassOut) |
| { |
| return mRenderPassCache.getRenderPassWithOps(context, mCurrentQueueSerial, desc, ops, |
| renderPassOut); |
| } |
| |
| vk::CommandGraph *RendererVk::getCommandGraph() |
| { |
| return &mCommandGraph; |
| } |
| |
| angle::Result RendererVk::flushCommandGraph(vk::Context *context, vk::CommandBuffer *commandBatch) |
| { |
| return mCommandGraph.submitCommands(context, mCurrentQueueSerial, &mRenderPassCache, |
| &mCommandPool, commandBatch); |
| } |
| |
| angle::Result RendererVk::flush(vk::Context *context) |
| { |
| if (mCommandGraph.empty()) |
| { |
| return angle::Result::Continue; |
| } |
| |
| TRACE_EVENT0("gpu.angle", "RendererVk::flush"); |
| |
| vk::Scoped<vk::CommandBuffer> commandBatch(mDevice); |
| ANGLE_TRY(flushCommandGraph(context, &commandBatch.get())); |
| |
| angle::FixedVector<VkSemaphore, kMaxWaitSemaphores> waitSemaphores; |
| angle::FixedVector<VkPipelineStageFlags, kMaxWaitSemaphores> waitStageMasks; |
| getSubmitWaitSemaphores(context, &waitSemaphores, &waitStageMasks); |
| |
| // On every flush, create a semaphore to be signaled. On the next submission, this semaphore |
| // will be waited on. |
| ANGLE_TRY(mSubmitSemaphorePool.allocateSemaphore(context, &mSubmitLastSignaledSemaphore)); |
| |
| VkSubmitInfo submitInfo = {}; |
| submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; |
| submitInfo.waitSemaphoreCount = static_cast<uint32_t>(waitSemaphores.size()); |
| submitInfo.pWaitSemaphores = waitSemaphores.data(); |
| submitInfo.pWaitDstStageMask = waitStageMasks.data(); |
| submitInfo.commandBufferCount = 1; |
| submitInfo.pCommandBuffers = commandBatch.get().ptr(); |
| submitInfo.signalSemaphoreCount = 1; |
| submitInfo.pSignalSemaphores = mSubmitLastSignaledSemaphore.getSemaphore()->ptr(); |
| |
| ANGLE_TRY(submitFrame(context, submitInfo, commandBatch.release())); |
| |
| return angle::Result::Continue; |
| } |
| |
| Serial RendererVk::issueShaderSerial() |
| { |
| return mShaderSerialFactory.generate(); |
| } |
| |
| angle::Result RendererVk::getDescriptorSetLayout( |
| vk::Context *context, |
| const vk::DescriptorSetLayoutDesc &desc, |
| vk::BindingPointer<vk::DescriptorSetLayout> *descriptorSetLayoutOut) |
| { |
| return mDescriptorSetLayoutCache.getDescriptorSetLayout(context, desc, descriptorSetLayoutOut); |
| } |
| |
| angle::Result RendererVk::getPipelineLayout( |
| vk::Context *context, |
| const vk::PipelineLayoutDesc &desc, |
| const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts, |
| vk::BindingPointer<vk::PipelineLayout> *pipelineLayoutOut) |
| { |
| return mPipelineLayoutCache.getPipelineLayout(context, desc, descriptorSetLayouts, |
| pipelineLayoutOut); |
| } |
| |
| angle::Result RendererVk::syncPipelineCacheVk(DisplayVk *displayVk) |
| { |
| ASSERT(mPipelineCache.valid()); |
| |
| if (--mPipelineCacheVkUpdateTimeout > 0) |
| { |
| return angle::Result::Continue; |
| } |
| |
| mPipelineCacheVkUpdateTimeout = kPipelineCacheVkUpdatePeriod; |
| |
| // Get the size of the cache. |
| size_t pipelineCacheSize = 0; |
| VkResult result = mPipelineCache.getCacheData(mDevice, &pipelineCacheSize, nullptr); |
| if (result != VK_INCOMPLETE) |
| { |
| ANGLE_VK_TRY(displayVk, result); |
| } |
| |
| angle::MemoryBuffer *pipelineCacheData = nullptr; |
| ANGLE_VK_CHECK_ALLOC(displayVk, |
| displayVk->getScratchBuffer(pipelineCacheSize, &pipelineCacheData)); |
| |
| size_t originalPipelineCacheSize = pipelineCacheSize; |
| result = mPipelineCache.getCacheData(mDevice, &pipelineCacheSize, pipelineCacheData->data()); |
| // Note: currently we don't accept incomplete as we don't expect it (the full size of cache |
| // was determined just above), so receiving it hints at an implementation bug we would want |
| // to know about early. |
| ASSERT(result != VK_INCOMPLETE); |
| ANGLE_VK_TRY(displayVk, result); |
| |
| // If vkGetPipelineCacheData ends up writing fewer bytes than requested, zero out the rest of |
| // the buffer to avoid leaking garbage memory. |
| ASSERT(pipelineCacheSize <= originalPipelineCacheSize); |
| if (pipelineCacheSize < originalPipelineCacheSize) |
| { |
| memset(pipelineCacheData->data() + pipelineCacheSize, 0, |
| originalPipelineCacheSize - pipelineCacheSize); |
| } |
| |
| displayVk->getBlobCache()->putApplication(mPipelineCacheVkBlobKey, *pipelineCacheData); |
| |
| return angle::Result::Continue; |
| } |
| |
| angle::Result RendererVk::allocateSubmitWaitSemaphore(vk::Context *context, |
| const vk::Semaphore **outSemaphore) |
| { |
| ASSERT(mSubmitWaitSemaphores.size() < mSubmitWaitSemaphores.max_size()); |
| |
| vk::SemaphoreHelper semaphore; |
| ANGLE_TRY(mSubmitSemaphorePool.allocateSemaphore(context, &semaphore)); |
| |
| mSubmitWaitSemaphores.push_back(std::move(semaphore)); |
| *outSemaphore = mSubmitWaitSemaphores.back().getSemaphore(); |
| |
| return angle::Result::Continue; |
| } |
| |
| const vk::Semaphore *RendererVk::getSubmitLastSignaledSemaphore(vk::Context *context) |
| { |
| const vk::Semaphore *semaphore = mSubmitLastSignaledSemaphore.getSemaphore(); |
| |
| // Return the semaphore to the pool (which will remain valid and unused until the |
| // queue it's about to be waited on has finished execution). The caller is about |
| // to wait on it. |
| mSubmitSemaphorePool.freeSemaphore(context, &mSubmitLastSignaledSemaphore); |
| |
| return semaphore; |
| } |
| |
| angle::Result RendererVk::getTimestamp(vk::Context *context, uint64_t *timestampOut) |
| { |
| // The intent of this function is to query the timestamp without stalling the GPU. Currently, |
| // that seems impossible, so instead, we are going to make a small submission with just a |
| // timestamp query. First, the disjoint timer query extension says: |
| // |
| // > This will return the GL time after all previous commands have reached the GL server but |
| // have not yet necessarily executed. |
| // |
| // The previous commands are stored in the command graph at the moment and are not yet flushed. |
| // The wording allows us to make a submission to get the timestamp without performing a flush. |
| // |
| // Second: |
| // |
| // > By using a combination of this synchronous get command and the asynchronous timestamp query |
| // object target, applications can measure the latency between when commands reach the GL server |
| // and when they are realized in the framebuffer. |
| // |
| // This fits with the above strategy as well, although inevitably we are possibly introducing a |
| // GPU bubble. This function directly generates a command buffer and submits it instead of |
| // using the other member functions. This is to avoid changing any state, such as the queue |
| // serial. |
| |
| // Create a query used to receive the GPU timestamp |
| vk::Scoped<vk::DynamicQueryPool> timestampQueryPool(mDevice); |
| vk::QueryHelper timestampQuery; |
| ANGLE_TRY(timestampQueryPool.get().init(context, VK_QUERY_TYPE_TIMESTAMP, 1)); |
| ANGLE_TRY(timestampQueryPool.get().allocateQuery(context, ×tampQuery)); |
| |
| // Record the command buffer |
| vk::Scoped<vk::CommandBuffer> commandBatch(mDevice); |
| vk::CommandBuffer &commandBuffer = commandBatch.get(); |
| |
| VkCommandBufferAllocateInfo commandBufferInfo = {}; |
| commandBufferInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; |
| commandBufferInfo.commandPool = mCommandPool.getHandle(); |
| commandBufferInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; |
| commandBufferInfo.commandBufferCount = 1; |
| |
| ANGLE_VK_TRY(context, commandBuffer.init(mDevice, commandBufferInfo)); |
| |
| VkCommandBufferBeginInfo beginInfo = {}; |
| beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; |
| beginInfo.flags = 0; |
| beginInfo.pInheritanceInfo = nullptr; |
| |
| ANGLE_VK_TRY(context, commandBuffer.begin(beginInfo)); |
| |
| commandBuffer.resetQueryPool(timestampQuery.getQueryPool()->getHandle(), |
| timestampQuery.getQuery(), 1); |
| commandBuffer.writeTimestamp(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, |
| timestampQuery.getQueryPool()->getHandle(), |
| timestampQuery.getQuery()); |
| |
| ANGLE_VK_TRY(context, commandBuffer.end()); |
| |
| // Create fence for the submission |
| VkFenceCreateInfo fenceInfo = {}; |
| fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; |
| fenceInfo.flags = 0; |
| |
| vk::Scoped<vk::Fence> fence(mDevice); |
| ANGLE_VK_TRY(context, fence.get().init(mDevice, fenceInfo)); |
| |
| // Submit the command buffer |
| VkSubmitInfo submitInfo = {}; |
| submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; |
| submitInfo.waitSemaphoreCount = 0; |
| submitInfo.pWaitSemaphores = nullptr; |
| submitInfo.pWaitDstStageMask = nullptr; |
| submitInfo.commandBufferCount = 1; |
| submitInfo.pCommandBuffers = commandBuffer.ptr(); |
| submitInfo.signalSemaphoreCount = 0; |
| submitInfo.pSignalSemaphores = nullptr; |
| |
| ANGLE_VK_TRY(context, vkQueueSubmit(mQueue, 1, &submitInfo, fence.get().getHandle())); |
| |
| // Wait for the submission to finish. Given no semaphores, there is hope that it would execute |
| // in parallel with what's already running on the GPU. |
| ANGLE_VK_TRY(context, fence.get().wait(mDevice, kMaxFenceWaitTimeNs)); |
| |
| // Get the query results |
| constexpr VkQueryResultFlags queryFlags = VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT; |
| |
| ANGLE_VK_TRY(context, timestampQuery.getQueryPool()->getResults( |
| mDevice, timestampQuery.getQuery(), 1, sizeof(*timestampOut), |
| timestampOut, sizeof(*timestampOut), queryFlags)); |
| |
| timestampQueryPool.get().freeQuery(context, ×tampQuery); |
| |
| // Convert results to nanoseconds. |
| *timestampOut = static_cast<uint64_t>( |
| *timestampOut * static_cast<double>(mPhysicalDeviceProperties.limits.timestampPeriod)); |
| |
| return angle::Result::Continue; |
| } |
| |
| // These functions look at the mandatory format for support, and fallback to querying the device (if |
| // necessary) to test the availability of the bits. |
| bool RendererVk::hasLinearTextureFormatFeatureBits(VkFormat format, |
| const VkFormatFeatureFlags featureBits) |
| { |
| return hasFormatFeatureBits<&VkFormatProperties::linearTilingFeatures>(format, featureBits); |
| } |
| |
| bool RendererVk::hasTextureFormatFeatureBits(VkFormat format, |
| const VkFormatFeatureFlags featureBits) |
| { |
| return hasFormatFeatureBits<&VkFormatProperties::optimalTilingFeatures>(format, featureBits); |
| } |
| |
| bool RendererVk::hasBufferFormatFeatureBits(VkFormat format, const VkFormatFeatureFlags featureBits) |
| { |
| return hasFormatFeatureBits<&VkFormatProperties::bufferFeatures>(format, featureBits); |
| } |
| |
| angle::Result RendererVk::synchronizeCpuGpuTime(vk::Context *context) |
| { |
| ASSERT(mGpuEventsEnabled); |
| |
| angle::PlatformMethods *platform = ANGLEPlatformCurrent(); |
| ASSERT(platform); |
| |
| // To synchronize CPU and GPU times, we need to get the CPU timestamp as close as possible to |
| // the GPU timestamp. The process of getting the GPU timestamp is as follows: |
| // |
| // CPU GPU |
| // |
| // Record command buffer |
| // with timestamp query |
| // |
| // Submit command buffer |
| // |
| // Post-submission work Begin execution |
| // |
| // ???? Write timstamp Tgpu |
| // |
| // ???? End execution |
| // |
| // ???? Return query results |
| // |
| // ???? |
| // |
| // Get query results |
| // |
| // The areas of unknown work (????) on the CPU indicate that the CPU may or may not have |
| // finished post-submission work while the GPU is executing in parallel. With no further work, |
| // querying CPU timestamps before submission and after getting query results give the bounds to |
| // Tgpu, which could be quite large. |
| // |
| // Using VkEvents, the GPU can be made to wait for the CPU and vice versa, in an effort to |
| // reduce this range. This function implements the following procedure: |
| // |
| // CPU GPU |
| // |
| // Record command buffer |
| // with timestamp query |
| // |
| // Submit command buffer |
| // |
| // Post-submission work Begin execution |
| // |
| // ???? Set Event GPUReady |
| // |
| // Wait on Event GPUReady Wait on Event CPUReady |
| // |
| // Get CPU Time Ts Wait on Event CPUReady |
| // |
| // Set Event CPUReady Wait on Event CPUReady |
| // |
| // Get CPU Time Tcpu Get GPU Time Tgpu |
| // |
| // Wait on Event GPUDone Set Event GPUDone |
| // |
| // Get CPU Time Te End Execution |
| // |
| // Idle Return query results |
| // |
| // Get query results |
| // |
| // If Te-Ts > epsilon, a GPU or CPU interruption can be assumed and the operation can be |
| // retried. Once Te-Ts < epsilon, Tcpu can be taken to presumably match Tgpu. Finding an |
| // epsilon that's valid for all devices may be difficult, so the loop can be performed only a |
| // limited number of times and the Tcpu,Tgpu pair corresponding to smallest Te-Ts used for |
| // calibration. |
| // |
| // Note: Once VK_EXT_calibrated_timestamps is ubiquitous, this should be redone. |
| |
| // Make sure nothing is running |
| ASSERT(mCommandGraph.empty()); |
| |
| TRACE_EVENT0("gpu.angle", "RendererVk::synchronizeCpuGpuTime"); |
| |
| // Create a query used to receive the GPU timestamp |
| vk::QueryHelper timestampQuery; |
| ANGLE_TRY(mGpuEventQueryPool.allocateQuery(context, ×tampQuery)); |
| |
| // Create the three events |
| VkEventCreateInfo eventCreateInfo = {}; |
| eventCreateInfo.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO; |
| eventCreateInfo.flags = 0; |
| |
| vk::Scoped<vk::Event> cpuReady(mDevice), gpuReady(mDevice), gpuDone(mDevice); |
| ANGLE_VK_TRY(context, cpuReady.get().init(mDevice, eventCreateInfo)); |
| ANGLE_VK_TRY(context, gpuReady.get().init(mDevice, eventCreateInfo)); |
| ANGLE_VK_TRY(context, gpuDone.get().init(mDevice, eventCreateInfo)); |
| |
| constexpr uint32_t kRetries = 10; |
| |
| // Time suffixes used are S for seconds and Cycles for cycles |
| double tightestRangeS = 1e6f; |
| double TcpuS = 0; |
| uint64_t TgpuCycles = 0; |
| for (uint32_t i = 0; i < kRetries; ++i) |
| { |
| // Reset the events |
| ANGLE_VK_TRY(context, cpuReady.get().reset(mDevice)); |
| ANGLE_VK_TRY(context, gpuReady.get().reset(mDevice)); |
| ANGLE_VK_TRY(context, gpuDone.get().reset(mDevice)); |
| |
| // Record the command buffer |
| vk::Scoped<vk::CommandBuffer> commandBatch(mDevice); |
| vk::CommandBuffer &commandBuffer = commandBatch.get(); |
| |
| VkCommandBufferAllocateInfo commandBufferInfo = {}; |
| commandBufferInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; |
| commandBufferInfo.commandPool = mCommandPool.getHandle(); |
| commandBufferInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; |
| commandBufferInfo.commandBufferCount = 1; |
| |
| ANGLE_VK_TRY(context, commandBuffer.init(mDevice, commandBufferInfo)); |
| |
| VkCommandBufferBeginInfo beginInfo = {}; |
| beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; |
| beginInfo.flags = 0; |
| beginInfo.pInheritanceInfo = nullptr; |
| |
| ANGLE_VK_TRY(context, commandBuffer.begin(beginInfo)); |
| |
| commandBuffer.setEvent(gpuReady.get().getHandle(), VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT); |
| commandBuffer.waitEvents(1, cpuReady.get().ptr(), VK_PIPELINE_STAGE_HOST_BIT, |
| VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, 0, nullptr, 0, |
| nullptr); |
| |
| commandBuffer.resetQueryPool(timestampQuery.getQueryPool()->getHandle(), |
| timestampQuery.getQuery(), 1); |
| commandBuffer.writeTimestamp(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, |
| timestampQuery.getQueryPool()->getHandle(), |
| timestampQuery.getQuery()); |
| |
| commandBuffer.setEvent(gpuDone.get().getHandle(), VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT); |
| |
| ANGLE_VK_TRY(context, commandBuffer.end()); |
| |
| // Submit the command buffer |
| angle::FixedVector<VkSemaphore, kMaxWaitSemaphores> waitSemaphores; |
| angle::FixedVector<VkPipelineStageFlags, kMaxWaitSemaphores> waitStageMasks; |
| getSubmitWaitSemaphores(context, &waitSemaphores, &waitStageMasks); |
| |
| VkSubmitInfo submitInfo = {}; |
| submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; |
| submitInfo.waitSemaphoreCount = static_cast<uint32_t>(waitSemaphores.size()); |
| submitInfo.pWaitSemaphores = waitSemaphores.data(); |
| submitInfo.pWaitDstStageMask = waitStageMasks.data(); |
| submitInfo.commandBufferCount = 1; |
| submitInfo.pCommandBuffers = commandBuffer.ptr(); |
| submitInfo.signalSemaphoreCount = 0; |
| submitInfo.pSignalSemaphores = nullptr; |
| |
| ANGLE_TRY(submitFrame(context, submitInfo, std::move(commandBuffer))); |
| |
| // Wait for GPU to be ready. This is a short busy wait. |
| VkResult result = VK_EVENT_RESET; |
| do |
| { |
| result = gpuReady.get().getStatus(mDevice); |
| if (result != VK_EVENT_SET && result != VK_EVENT_RESET) |
| { |
| ANGLE_VK_TRY(context, result); |
| } |
| } while (result == VK_EVENT_RESET); |
| |
| double TsS = platform->monotonicallyIncreasingTime(platform); |
| |
| // Tell the GPU to go ahead with the timestamp query. |
| ANGLE_VK_TRY(context, cpuReady.get().set(mDevice)); |
| double cpuTimestampS = platform->monotonicallyIncreasingTime(platform); |
| |
| // Wait for GPU to be done. Another short busy wait. |
| do |
| { |
| result = gpuDone.get().getStatus(mDevice); |
| if (result != VK_EVENT_SET && result != VK_EVENT_RESET) |
| { |
| ANGLE_VK_TRY(context, result); |
| } |
| } while (result == VK_EVENT_RESET); |
| |
| double TeS = platform->monotonicallyIncreasingTime(platform); |
| |
| // Get the query results |
| ANGLE_TRY(finishToSerial(context, getLastSubmittedQueueSerial())); |
| |
| constexpr VkQueryResultFlags queryFlags = VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT; |
| |
| uint64_t gpuTimestampCycles = 0; |
| ANGLE_VK_TRY(context, timestampQuery.getQueryPool()->getResults( |
| mDevice, timestampQuery.getQuery(), 1, sizeof(gpuTimestampCycles), |
| &gpuTimestampCycles, sizeof(gpuTimestampCycles), queryFlags)); |
| |
| // Use the first timestamp queried as origin. |
| if (mGpuEventTimestampOrigin == 0) |
| { |
| mGpuEventTimestampOrigin = gpuTimestampCycles; |
| } |
| |
| // Take these CPU and GPU timestamps if there is better confidence. |
| double confidenceRangeS = TeS - TsS; |
| if (confidenceRangeS < tightestRangeS) |
| { |
| tightestRangeS = confidenceRangeS; |
| TcpuS = cpuTimestampS; |
| TgpuCycles = gpuTimestampCycles; |
| } |
| } |
| |
| mGpuEventQueryPool.freeQuery(context, ×tampQuery); |
| |
| // timestampPeriod gives nanoseconds/cycle. |
| double TgpuS = (TgpuCycles - mGpuEventTimestampOrigin) * |
| static_cast<double>(mPhysicalDeviceProperties.limits.timestampPeriod) / |
| 1'000'000'000.0; |
| |
| flushGpuEvents(TgpuS, TcpuS); |
| |
| mGpuClockSync.gpuTimestampS = TgpuS; |
| mGpuClockSync.cpuTimestampS = TcpuS; |
| |
| return angle::Result::Continue; |
| } |
| |
| angle::Result RendererVk::traceGpuEventImpl(vk::Context *context, |
| vk::CommandBuffer *commandBuffer, |
| char phase, |
| const char *name) |
| { |
| ASSERT(mGpuEventsEnabled); |
| |
| GpuEventQuery event; |
| |
| event.name = name; |
| event.phase = phase; |
| event.serial = mCurrentQueueSerial; |
| |
| ANGLE_TRY(mGpuEventQueryPool.allocateQuery(context, &event.queryPoolIndex, &event.queryIndex)); |
| |
| commandBuffer->resetQueryPool( |
| mGpuEventQueryPool.getQueryPool(event.queryPoolIndex)->getHandle(), event.queryIndex, 1); |
| commandBuffer->writeTimestamp( |
| VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, |
| mGpuEventQueryPool.getQueryPool(event.queryPoolIndex)->getHandle(), event.queryIndex); |
| |
| mInFlightGpuEventQueries.push_back(std::move(event)); |
| |
| return angle::Result::Continue; |
| } |
| |
| angle::Result RendererVk::checkCompletedGpuEvents(vk::Context *context) |
| { |
| ASSERT(mGpuEventsEnabled); |
| |
| angle::PlatformMethods *platform = ANGLEPlatformCurrent(); |
| ASSERT(platform); |
| |
| int finishedCount = 0; |
| |
| for (GpuEventQuery &eventQuery : mInFlightGpuEventQueries) |
| { |
| // Only check the timestamp query if the submission has finished. |
| if (eventQuery.serial > mLastCompletedQueueSerial) |
| { |
| break; |
| } |
| |
| // See if the results are available. |
| uint64_t gpuTimestampCycles = 0; |
| VkResult result = mGpuEventQueryPool.getQueryPool(eventQuery.queryPoolIndex) |
| ->getResults(mDevice, eventQuery.queryIndex, 1, |
| sizeof(gpuTimestampCycles), &gpuTimestampCycles, |
| sizeof(gpuTimestampCycles), VK_QUERY_RESULT_64_BIT); |
| if (result == VK_NOT_READY) |
| { |
| break; |
| } |
| ANGLE_VK_TRY(context, result); |
| |
| mGpuEventQueryPool.freeQuery(context, eventQuery.queryPoolIndex, eventQuery.queryIndex); |
| |
| GpuEvent event; |
| event.gpuTimestampCycles = gpuTimestampCycles; |
| event.name = eventQuery.name; |
| event.phase = eventQuery.phase; |
| |
| mGpuEvents.emplace_back(event); |
| |
| ++finishedCount; |
| } |
| |
| mInFlightGpuEventQueries.erase(mInFlightGpuEventQueries.begin(), |
| mInFlightGpuEventQueries.begin() + finishedCount); |
| |
| return angle::Result::Continue; |
| } |
| |
| void RendererVk::flushGpuEvents(double nextSyncGpuTimestampS, double nextSyncCpuTimestampS) |
| { |
| if (mGpuEvents.size() == 0) |
| { |
| return; |
| } |
| |
| angle::PlatformMethods *platform = ANGLEPlatformCurrent(); |
| ASSERT(platform); |
| |
| // Find the slope of the clock drift for adjustment |
| double lastGpuSyncTimeS = mGpuClockSync.gpuTimestampS; |
| double lastGpuSyncDiffS = mGpuClockSync.cpuTimestampS - mGpuClockSync.gpuTimestampS; |
| double gpuSyncDriftSlope = 0; |
| |
| double nextGpuSyncTimeS = nextSyncGpuTimestampS; |
| double nextGpuSyncDiffS = nextSyncCpuTimestampS - nextSyncGpuTimestampS; |
| |
| // No gpu trace events should have been generated before the clock sync, so if there is no |
| // "previous" clock sync, there should be no gpu events (i.e. the function early-outs above). |
| ASSERT(mGpuClockSync.gpuTimestampS != std::numeric_limits<double>::max() && |
| mGpuClockSync.cpuTimestampS != std::numeric_limits<double>::max()); |
| |
| gpuSyncDriftSlope = |
| (nextGpuSyncDiffS - lastGpuSyncDiffS) / (nextGpuSyncTimeS - lastGpuSyncTimeS); |
| |
| for (const GpuEvent &event : mGpuEvents) |
| { |
| double gpuTimestampS = |
| (event.gpuTimestampCycles - mGpuEventTimestampOrigin) * |
| static_cast<double>(mPhysicalDeviceProperties.limits.timestampPeriod) * 1e-9; |
| |
| // Account for clock drift. |
| gpuTimestampS += lastGpuSyncDiffS + gpuSyncDriftSlope * (gpuTimestampS - lastGpuSyncTimeS); |
| |
| // Generate the trace now that the GPU timestamp is available and clock drifts are accounted |
| // for. |
| static long long eventId = 1; |
| static const unsigned char *categoryEnabled = |
| TRACE_EVENT_API_GET_CATEGORY_ENABLED("gpu.angle.gpu"); |
| platform->addTraceEvent(platform, event.phase, categoryEnabled, event.name, eventId++, |
| gpuTimestampS, 0, nullptr, nullptr, nullptr, TRACE_EVENT_FLAG_NONE); |
| } |
| |
| mGpuEvents.clear(); |
| } |
| |
| template <VkFormatFeatureFlags VkFormatProperties::*features> |
| bool RendererVk::hasFormatFeatureBits(VkFormat format, const VkFormatFeatureFlags featureBits) |
| { |
| ASSERT(static_cast<uint32_t>(format) < vk::kNumVkFormats); |
| VkFormatProperties &deviceProperties = mFormatProperties[format]; |
| |
| if (deviceProperties.bufferFeatures == kInvalidFormatFeatureFlags) |
| { |
| // If we don't have the actual device features, see if the requested features are mandatory. |
| // If so, there's no need to query the device. |
| const VkFormatProperties &mandatoryProperties = vk::GetMandatoryFormatSupport(format); |
| if (IsMaskFlagSet(mandatoryProperties.*features, featureBits)) |
| { |
| return true; |
| } |
| |
| // Otherwise query the format features and cache it. |
| vkGetPhysicalDeviceFormatProperties(mPhysicalDevice, format, &deviceProperties); |
| } |
| |
| return IsMaskFlagSet(deviceProperties.*features, featureBits); |
| } |
| |
| uint32_t GetUniformBufferDescriptorCount() |
| { |
| return kUniformBufferDescriptorsPerDescriptorSet; |
| } |
| |
| } // namespace rx |