| // |
| // Copyright 2019 The ANGLE Project Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // |
| // VulkanBarriersPerf: |
| // Performance tests for ANGLE's Vulkan backend w.r.t barrier efficiency. |
| // |
| |
| #include <sstream> |
| |
| #include "ANGLEPerfTest.h" |
| #include "test_utils/gl_raii.h" |
| #include "util/shader_utils.h" |
| |
| using namespace angle; |
| |
| namespace |
| { |
| constexpr unsigned int kIterationsPerStep = 10; |
| |
| struct VulkanBarriersPerfParams final : public RenderTestParams |
| { |
| VulkanBarriersPerfParams(bool bufferCopy, bool largeTransfers, bool slowFS) |
| { |
| iterationsPerStep = kIterationsPerStep; |
| |
| // Common default parameters |
| eglParameters = egl_platform::VULKAN(); |
| majorVersion = 3; |
| minorVersion = 0; |
| windowWidth = 256; |
| windowHeight = 256; |
| trackGpuTime = true; |
| |
| doBufferCopy = bufferCopy; |
| doLargeTransfers = largeTransfers; |
| doSlowFragmentShaders = slowFS; |
| } |
| |
| std::string story() const override; |
| |
| // Static parameters |
| static constexpr int kImageSizes[3] = {256, 512, 4096}; |
| static constexpr int kBufferSize = 4096 * 4096; |
| |
| bool doBufferCopy; |
| bool doLargeTransfers; |
| bool doSlowFragmentShaders; |
| }; |
| |
| constexpr int VulkanBarriersPerfParams::kImageSizes[]; |
| |
| std::ostream &operator<<(std::ostream &os, const VulkanBarriersPerfParams ¶ms) |
| { |
| os << params.backendAndStory().substr(1); |
| return os; |
| } |
| |
| class VulkanBarriersPerfBenchmark : public ANGLERenderTest, |
| public ::testing::WithParamInterface<VulkanBarriersPerfParams> |
| { |
| public: |
| VulkanBarriersPerfBenchmark(); |
| |
| void initializeBenchmark() override; |
| void destroyBenchmark() override; |
| void drawBenchmark() override; |
| |
| private: |
| void createTexture(uint32_t textureIndex, uint32_t sizeIndex, bool compressed); |
| void createUniformBuffer(); |
| void createFramebuffer(uint32_t fboIndex, uint32_t textureIndex, uint32_t sizeIndex); |
| void createResources(); |
| |
| // Handle to the program object |
| GLProgram mProgram; |
| |
| // Attribute locations |
| GLint mPositionLoc; |
| GLint mTexCoordLoc; |
| |
| // Sampler location |
| GLint mSamplerLoc; |
| |
| // Texture handles |
| GLTexture mTextures[4]; |
| |
| // Uniform buffer handles |
| GLBuffer mUniformBuffers[2]; |
| |
| // Framebuffer handles |
| GLFramebuffer mFbos[2]; |
| |
| // Buffer handle |
| GLBuffer mVertexBuffer; |
| GLBuffer mIndexBuffer; |
| |
| static constexpr size_t kSmallFboIndex = 0; |
| static constexpr size_t kLargeFboIndex = 1; |
| |
| static constexpr size_t kUniformBuffer1Index = 0; |
| static constexpr size_t kUniformBuffer2Index = 1; |
| |
| static constexpr size_t kSmallTextureIndex = 0; |
| static constexpr size_t kLargeTextureIndex = 1; |
| static constexpr size_t kTransferTexture1Index = 2; |
| static constexpr size_t kTransferTexture2Index = 3; |
| |
| static constexpr size_t kSmallSizeIndex = 0; |
| static constexpr size_t kLargeSizeIndex = 1; |
| static constexpr size_t kHugeSizeIndex = 2; |
| }; |
| |
| std::string VulkanBarriersPerfParams::story() const |
| { |
| std::ostringstream sout; |
| |
| sout << RenderTestParams::story(); |
| |
| if (doBufferCopy) |
| { |
| sout << "_buffer_copy"; |
| } |
| if (doLargeTransfers) |
| { |
| sout << "_transfer"; |
| } |
| if (doSlowFragmentShaders) |
| { |
| sout << "_slowfs"; |
| } |
| |
| return sout.str(); |
| } |
| |
| VulkanBarriersPerfBenchmark::VulkanBarriersPerfBenchmark() |
| : ANGLERenderTest("VulkanBarriersPerf", GetParam()), |
| mPositionLoc(-1), |
| mTexCoordLoc(-1), |
| mSamplerLoc(-1) |
| { |
| if (IsNVIDIA() && IsWindows7()) |
| { |
| skipTest( |
| "http://crbug.com/1096510 Fails on Windows7 NVIDIA Vulkan, presumably due to old " |
| "drivers"); |
| } |
| } |
| |
| constexpr char kVS[] = R"(attribute vec4 a_position; |
| attribute vec2 a_texCoord; |
| varying vec2 v_texCoord; |
| void main() |
| { |
| gl_Position = a_position; |
| v_texCoord = a_texCoord; |
| })"; |
| |
| constexpr char kShortFS[] = R"(precision mediump float; |
| varying vec2 v_texCoord; |
| uniform sampler2D s_texture; |
| void main() |
| { |
| gl_FragColor = texture2D(s_texture, v_texCoord); |
| })"; |
| |
| constexpr char kSlowFS[] = R"(precision mediump float; |
| varying vec2 v_texCoord; |
| uniform sampler2D s_texture; |
| void main() |
| { |
| vec4 outColor = vec4(0); |
| if (v_texCoord.x < 0.2) |
| { |
| for (int i = 0; i < 100; ++i) |
| { |
| outColor += texture2D(s_texture, v_texCoord); |
| } |
| } |
| gl_FragColor = outColor; |
| })"; |
| |
| void VulkanBarriersPerfBenchmark::createTexture(uint32_t textureIndex, |
| uint32_t sizeIndex, |
| bool compressed) |
| { |
| const auto ¶ms = GetParam(); |
| |
| // TODO(syoussefi): compressed copy using vkCmdCopyImage not yet implemented in the vulkan |
| // backend. http://anglebug.com/42261682 |
| |
| glBindTexture(GL_TEXTURE_2D, mTextures[textureIndex]); |
| glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, params.kImageSizes[sizeIndex], |
| params.kImageSizes[sizeIndex], 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); |
| |
| // Disable mipmapping |
| glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); |
| glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); |
| } |
| |
| void VulkanBarriersPerfBenchmark::createUniformBuffer() |
| { |
| const auto ¶ms = GetParam(); |
| |
| glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffers[kUniformBuffer1Index]); |
| glBufferData(GL_UNIFORM_BUFFER, params.kBufferSize, nullptr, GL_DYNAMIC_COPY); |
| glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffers[kUniformBuffer2Index]); |
| glBufferData(GL_UNIFORM_BUFFER, params.kBufferSize, nullptr, GL_DYNAMIC_COPY); |
| glBindBuffer(GL_UNIFORM_BUFFER, 0); |
| } |
| |
| void VulkanBarriersPerfBenchmark::createFramebuffer(uint32_t fboIndex, |
| uint32_t textureIndex, |
| uint32_t sizeIndex) |
| { |
| createTexture(textureIndex, sizeIndex, false); |
| |
| glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboIndex]); |
| glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, |
| mTextures[textureIndex], 0); |
| } |
| |
| void VulkanBarriersPerfBenchmark::createResources() |
| { |
| const auto ¶ms = GetParam(); |
| |
| mProgram.makeRaster(kVS, params.doSlowFragmentShaders ? kSlowFS : kShortFS); |
| ASSERT_TRUE(mProgram.valid()); |
| |
| // Get the attribute locations |
| mPositionLoc = glGetAttribLocation(mProgram, "a_position"); |
| mTexCoordLoc = glGetAttribLocation(mProgram, "a_texCoord"); |
| |
| // Get the sampler location |
| mSamplerLoc = glGetUniformLocation(mProgram, "s_texture"); |
| |
| // Build the vertex buffer |
| GLfloat vertices[] = { |
| -0.5f, 0.5f, 0.0f, // Position 0 |
| 0.0f, 0.0f, // TexCoord 0 |
| -0.5f, -0.5f, 0.0f, // Position 1 |
| 0.0f, 1.0f, // TexCoord 1 |
| 0.5f, -0.5f, 0.0f, // Position 2 |
| 1.0f, 1.0f, // TexCoord 2 |
| 0.5f, 0.5f, 0.0f, // Position 3 |
| 1.0f, 0.0f // TexCoord 3 |
| }; |
| |
| glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer); |
| glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); |
| |
| GLushort indices[] = {0, 1, 2, 0, 2, 3}; |
| glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer); |
| glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW); |
| |
| // Use tightly packed data |
| glPixelStorei(GL_UNPACK_ALIGNMENT, 1); |
| |
| // Create four textures. Two of them are going to be framebuffers, and two are used for large |
| // transfers. |
| createFramebuffer(kSmallFboIndex, kSmallTextureIndex, kSmallSizeIndex); |
| createFramebuffer(kLargeFboIndex, kLargeTextureIndex, kLargeSizeIndex); |
| createUniformBuffer(); |
| |
| if (params.doLargeTransfers) |
| { |
| createTexture(kTransferTexture1Index, kHugeSizeIndex, true); |
| createTexture(kTransferTexture2Index, kHugeSizeIndex, true); |
| } |
| } |
| |
| void VulkanBarriersPerfBenchmark::initializeBenchmark() |
| { |
| createResources(); |
| |
| glClearColor(0.0f, 0.0f, 0.0f, 0.0f); |
| |
| ASSERT_GL_NO_ERROR(); |
| } |
| |
| void VulkanBarriersPerfBenchmark::destroyBenchmark() {} |
| |
| void VulkanBarriersPerfBenchmark::drawBenchmark() |
| { |
| const auto ¶ms = GetParam(); |
| |
| glUseProgram(mProgram); |
| |
| // Bind the buffers |
| glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer); |
| glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer); |
| |
| // Load the vertex position |
| glVertexAttribPointer(mPositionLoc, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), 0); |
| // Load the texture coordinate |
| glVertexAttribPointer(mTexCoordLoc, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), |
| reinterpret_cast<void *>(3 * sizeof(GLfloat))); |
| |
| glEnableVertexAttribArray(mPositionLoc); |
| glEnableVertexAttribArray(mTexCoordLoc); |
| |
| // Set the texture sampler to texture unit to 0 |
| glUniform1i(mSamplerLoc, 0); |
| |
| /* |
| * The perf benchmark does the following: |
| * |
| * - Alternately clear and draw from fbo 1 into fbo 2 and back. This would use the color |
| * attachment and shader read-only layouts in the fragment shader and color attachment stages. |
| * |
| * - Alternately copy data between the 2 uniform buffers. This would use the transfer layouts |
| * in the transfer stage. |
| * |
| * Once compressed texture copies are supported, alternately copy large chunks of data from |
| * texture 1 into texture 2 and back. This would use the transfer layouts in the transfer |
| * stage. |
| * |
| * Once compute shader support is added, another independent set of operations could be a few |
| * dispatches. This would use the general and shader read-only layouts in the compute stage. |
| * |
| * The idea is to create independent pipelines of operations that would run in parallel on the |
| * GPU. Regressions or inefficiencies in the barrier implementation could result in |
| * serialization of these jobs, resulting in a hit in performance. |
| * |
| * The above operations for example should ideally run on the GPU threads in parallel: |
| * |
| * + |---draw---||---draw---||---draw---||---draw---||---draw---| |
| * + |----buffer copy----||----buffer copy----||----buffer copy----| |
| * + |-----------texture copy------------||-----------texture copy------------| |
| * + |-----dispatch------||------dispatch------||------dispatch------| |
| * |
| * If barriers are too restrictive, situations like this could happen (draw is blocking |
| * copy): |
| * |
| * + |---draw---||---draw---||---draw---||---draw---||---draw---| |
| * + |------------copy------------||-----------copy------------| |
| * |
| * Or like this (copy is blocking draw): |
| * |
| * + |---draw---| |---draw---| |---draw---| |
| * + |--------------copy-------------||-------------copy--------------| |
| * |
| * Or like this (draw and copy blocking each other): |
| * |
| * + |---draw---| |---draw---| |
| * + |------------copy---------------| |------------copy------------| |
| * |
| * The idea of doing slow FS calls is to make the second case above slower (by making the draw |
| * slower than the transfer): |
| * |
| * + |------------------draw------------------| |-...draw...-| |
| * + |--------------copy----------------| |-------------copy-------------| |
| */ |
| |
| startGpuTimer(); |
| for (unsigned int iteration = 0; iteration < params.iterationsPerStep; ++iteration) |
| { |
| bool altEven = iteration % 2 == 0; |
| |
| const int fboDestIndex = altEven ? kLargeFboIndex : kSmallFboIndex; |
| const int fboTexSrcIndex = altEven ? kSmallTextureIndex : kLargeTextureIndex; |
| const int fboDestSizeIndex = altEven ? kLargeSizeIndex : kSmallSizeIndex; |
| const int uniformBufferReadIndex = altEven ? kUniformBuffer1Index : kUniformBuffer2Index; |
| const int uniformBufferWriteIndex = altEven ? kUniformBuffer2Index : kUniformBuffer1Index; |
| |
| if (params.doBufferCopy) |
| { |
| // Transfer data between the 2 Uniform buffers |
| glBindBuffer(GL_COPY_READ_BUFFER, mUniformBuffers[uniformBufferReadIndex]); |
| glBindBuffer(GL_COPY_WRITE_BUFFER, mUniformBuffers[uniformBufferWriteIndex]); |
| glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, 0, 0, |
| params.kBufferSize); |
| } |
| |
| // Bind the framebuffer |
| glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboDestIndex]); |
| |
| // Set the viewport |
| glViewport(0, 0, params.kImageSizes[fboDestSizeIndex], |
| params.kImageSizes[fboDestSizeIndex]); |
| |
| // Clear the color buffer |
| glClear(GL_COLOR_BUFFER_BIT); |
| |
| // Bind the texture |
| glActiveTexture(GL_TEXTURE0); |
| glBindTexture(GL_TEXTURE_2D, mTextures[fboTexSrcIndex]); |
| |
| ASSERT_GL_NO_ERROR(); |
| |
| glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); |
| } |
| stopGpuTimer(); |
| |
| ASSERT_GL_NO_ERROR(); |
| } |
| |
| } // namespace |
| |
| TEST_P(VulkanBarriersPerfBenchmark, Run) |
| { |
| run(); |
| } |
| |
| GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(VulkanBarriersPerfBenchmark); |
| ANGLE_INSTANTIATE_TEST(VulkanBarriersPerfBenchmark, |
| VulkanBarriersPerfParams(false, false, false), |
| VulkanBarriersPerfParams(true, false, false), |
| VulkanBarriersPerfParams(false, true, false), |
| VulkanBarriersPerfParams(false, true, true)); |