D3D11: Translate uniform blocks to StructuredBuffer when necessary

fxc exhibits slow compile performance with dynamic cbuffer indexing.
So when a uniform block contains only one large array member, which is
an array of structures, translate this uniform block to
a StructuredBuffer instead.

Bug: angleproject:3682
TEST=angle_end2end_tests.UniformBufferTest.*
Change-Id: Ife80dba8aae65b761737e095895e00a570230f88
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1782046
Commit-Queue: Xinghua Cao <xinghua.cao@intel.com>
Reviewed-by: Kenneth Russell <kbr@chromium.org>
Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
diff --git a/include/GLSLANG/ShaderLang.h b/include/GLSLANG/ShaderLang.h
index 5adb91a..db7cfab 100644
--- a/include/GLSLANG/ShaderLang.h
+++ b/include/GLSLANG/ShaderLang.h
@@ -26,7 +26,7 @@
 
 // Version number for shader translation API.
 // It is incremented every time the API changes.
-#define ANGLE_SH_VERSION 220
+#define ANGLE_SH_VERSION 221
 
 enum ShShaderSpec
 {
@@ -314,6 +314,10 @@
 // This works around the dynamic lvalue indexing of swizzled vectors on various platforms.
 const ShCompileOptions SH_REMOVE_DYNAMIC_INDEXING_OF_SWIZZLED_VECTOR = UINT64_C(1) << 49;
 
+// This flag works a driver bug that fails to allocate ShaderResourceView for StructuredBuffer
+// on old Windows system with AMD driver.
+const ShCompileOptions SH_DONT_TRANSLATE_UNIFORM_BLOCK_TO_STRUCTUREDBUFFER = UINT64_C(1) << 50;
+
 // Defines alternate strategies for implementing array index clamping.
 enum ShArrayIndexClampingStrategy
 {
@@ -674,6 +678,9 @@
                              const std::string &uniformBlockName,
                              unsigned int *indexOut);
 
+bool ShouldUniformBlockUseStructuredBuffer(const ShHandle handle,
+                                           const std::string &uniformBlockName);
+
 // Gives a map from uniform names to compiler-assigned registers in the default uniform block.
 // Note that the map contains also registers of samplers that have been extracted from structs.
 const std::map<std::string, unsigned int> *GetUniformRegisterMap(const ShHandle handle);
diff --git a/include/platform/FeaturesD3D.h b/include/platform/FeaturesD3D.h
index 6cf00c3..35f0d1f 100644
--- a/include/platform/FeaturesD3D.h
+++ b/include/platform/FeaturesD3D.h
@@ -206,6 +206,13 @@
         "allow_clear_for_robust_resource_init", FeatureCategory::D3DWorkarounds,
         "Some drivers corrupt texture data when clearing for robust resource initialization.",
         &members, "http://crbug.com/941620"};
+
+    // Don't translate uniform block to StructuredBuffer on old Windows system with AMD driver.
+    // This is targeted to work around a bug in AMD D3D driver that fails to allocate
+    // ShaderResourceView for StructuredBuffer.
+    Feature dontTranslateUniformBlockToStructuredBuffer = {
+        "dont_translate_uniform_block_to_structured_buffer", FeatureCategory::D3DWorkarounds,
+        "Fails to allocate ShaderResourceView for StructuredBuffer on some drivers", &members};
 };
 
 inline FeaturesD3D::FeaturesD3D()  = default;
diff --git a/src/compiler/translator/OutputHLSL.cpp b/src/compiler/translator/OutputHLSL.cpp
index e154c50..02ba156 100644
--- a/src/compiler/translator/OutputHLSL.cpp
+++ b/src/compiler/translator/OutputHLSL.cpp
@@ -344,7 +344,8 @@
 
     unsigned int firstUniformRegister =
         ((compileOptions & SH_SKIP_D3D_CONSTANT_REGISTER_ZERO) != 0) ? 1u : 0u;
-    mResourcesHLSL = new ResourcesHLSL(mStructureHLSL, outputType, uniforms, firstUniformRegister);
+    mResourcesHLSL = new ResourcesHLSL(mStructureHLSL, outputType, compileOptions, uniforms,
+                                       firstUniformRegister);
 
     if (mOutputType == SH_HLSL_3_0_OUTPUT)
     {
@@ -431,6 +432,11 @@
     return mResourcesHLSL->getUniformBlockRegisterMap();
 }
 
+const std::map<std::string, bool> &OutputHLSL::getUniformBlockUseStructuredBufferMap() const
+{
+    return mResourcesHLSL->getUniformBlockUseStructuredBufferMap();
+}
+
 const std::map<std::string, unsigned int> &OutputHLSL::getUniformRegisterMap() const
 {
     return mResourcesHLSL->getUniformRegisterMap();
diff --git a/src/compiler/translator/OutputHLSL.h b/src/compiler/translator/OutputHLSL.h
index 976ef56..f0b90a1 100644
--- a/src/compiler/translator/OutputHLSL.h
+++ b/src/compiler/translator/OutputHLSL.h
@@ -58,6 +58,7 @@
 
     const std::map<std::string, unsigned int> &getShaderStorageBlockRegisterMap() const;
     const std::map<std::string, unsigned int> &getUniformBlockRegisterMap() const;
+    const std::map<std::string, bool> &getUniformBlockUseStructuredBufferMap() const;
     const std::map<std::string, unsigned int> &getUniformRegisterMap() const;
     unsigned int getReadonlyImage2DRegisterIndex() const;
     unsigned int getImage2DRegisterIndex() const;
diff --git a/src/compiler/translator/ResourcesHLSL.cpp b/src/compiler/translator/ResourcesHLSL.cpp
index 33d7b70..b39d59e 100644
--- a/src/compiler/translator/ResourcesHLSL.cpp
+++ b/src/compiler/translator/ResourcesHLSL.cpp
@@ -24,6 +24,11 @@
 {
 
 constexpr const ImmutableString kAngleDecorString("angle_");
+// D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT = 128;
+const unsigned int kMaxInputResourceSlotCount = 128u;
+// If uniform block member's array size is larger than kMinArraySizeUseStructuredBuffer,
+// then we translate uniform block to StructuredBuffer for compiling performance.
+const unsigned int kMinArraySizeUseStructuredBuffer = 50u;
 
 static const char *UniformRegisterPrefix(const TType &type)
 {
@@ -37,7 +42,9 @@
     }
 }
 
-static TString InterfaceBlockFieldTypeString(const TField &field, TLayoutBlockStorage blockStorage)
+static TString InterfaceBlockFieldTypeString(const TField &field,
+                                             TLayoutBlockStorage blockStorage,
+                                             bool usedStructuredbuffer)
 {
     const TType &fieldType                   = *field.type();
     const TLayoutMatrixPacking matrixPacking = fieldType.getLayoutQualifier().matrixPacking;
@@ -53,9 +60,12 @@
     }
     else if (structure)
     {
+        // If uniform block's layout is std140 and translating it to StructuredBuffer,
+        // should pack structure in the end, in order to fit API buffer.
+        bool forcePackingEnd = usedStructuredbuffer && (blockStorage == EbsStd140);
         // Use HLSL row-major packing for GLSL column-major matrices
         return QualifiedStructNameString(*structure, matrixPacking == EmpColumnMajor,
-                                         blockStorage == EbsStd140);
+                                         blockStorage == EbsStd140, forcePackingEnd);
     }
     else
     {
@@ -98,15 +108,17 @@
 
 ResourcesHLSL::ResourcesHLSL(StructureHLSL *structureHLSL,
                              ShShaderOutput outputType,
+                             ShCompileOptions compileOptions,
                              const std::vector<ShaderVariable> &uniforms,
                              unsigned int firstUniformRegister)
     : mUniformRegister(firstUniformRegister),
       mUniformBlockRegister(0),
-      mTextureRegister(0),
+      mSRVRegister(0),
       mUAVRegister(0),
       mSamplerCount(0),
       mStructureHLSL(structureHLSL),
       mOutputType(outputType),
+      mCompileOptions(compileOptions),
       mUniforms(uniforms)
 {}
 
@@ -144,7 +156,7 @@
     if (IsSampler(type.getBasicType()) ||
         (IsImage(type.getBasicType()) && type.getMemoryQualifier().readonly))
     {
-        registerIndex = mTextureRegister;
+        registerIndex = mSRVRegister;
     }
     else if (IsImage(type.getBasicType()))
     {
@@ -187,7 +199,7 @@
     if (IsSampler(type.getBasicType()) ||
         (IsImage(type.getBasicType()) && type.getMemoryQualifier().readonly))
     {
-        mTextureRegister += registerCount;
+        mSRVRegister += registerCount;
     }
     else if (IsImage(type.getBasicType()))
     {
@@ -210,10 +222,10 @@
 {
     // Sampler that is a field of a uniform structure.
     ASSERT(IsSampler(type.getBasicType()));
-    unsigned int registerIndex                     = mTextureRegister;
+    unsigned int registerIndex                     = mSRVRegister;
     mUniformRegisterMap[std::string(name.c_str())] = registerIndex;
     unsigned int registerCount = type.isArray() ? type.getArraySizeProduct() : 1u;
-    mTextureRegister += registerCount;
+    mSRVRegister += registerCount;
     if (outRegisterCount)
     {
         *outRegisterCount = registerCount;
@@ -240,7 +252,7 @@
         unsigned int registerCount;
 
         // The uniform might be just a regular sampler or one extracted from a struct.
-        unsigned int samplerArrayIndex = 0u;
+        unsigned int samplerArrayIndex      = 0u;
         const ShaderVariable *uniformByName = findUniformByName(name);
         if (uniformByName)
         {
@@ -386,7 +398,7 @@
     // nameless structs in ES, as nameless structs cannot be used anywhere that layout qualifiers
     // are permitted.
     const TString &typeName = ((structure && structure->symbolType() != SymbolType::Empty)
-                                   ? QualifiedStructNameString(*structure, false, false)
+                                   ? QualifiedStructNameString(*structure, false, false, false)
                                    : TypeString(type));
 
     const TString &registerString =
@@ -549,9 +561,9 @@
         mSamplerCount = groupTextureRegisterIndex;
 
         // Reserve t type register for readonly image2D variables.
-        mReadonlyImage2DRegisterIndex = mTextureRegister;
+        mReadonlyImage2DRegisterIndex = mSRVRegister;
         groupTextureRegisterIndex += reservedReadonlyImageRegisterCount;
-        mTextureRegister += reservedReadonlyImageRegisterCount;
+        mSRVRegister += reservedReadonlyImageRegisterCount;
 
         for (int groupId = HLSL_TEXTURE_MIN; groupId < HLSL_TEXTURE_MAX; ++groupId)
         {
@@ -642,6 +654,21 @@
             interfaceBlocks += uniformBlockStructString(interfaceBlock);
         }
 
+        // In order to avoid compile performance issue, translate uniform block to structured
+        // buffer. anglebug.com/3682.
+        // TODO(anglebug.com/4205): Support uniform block with an instance name.
+        if (instanceVariable == nullptr &&
+            shouldTranslateUniformBlockToStructuredBuffer(interfaceBlock))
+        {
+            unsigned int structuredBufferRegister = mSRVRegister;
+            interfaceBlocks +=
+                uniformBlockWithOneLargeArrayMemberString(interfaceBlock, structuredBufferRegister);
+            mUniformBlockRegisterMap[interfaceBlock.name().data()] = structuredBufferRegister;
+            mUniformBlockUseStructuredBufferMap[interfaceBlock.name().data()] = true;
+            mSRVRegister += 1u;
+            continue;
+        }
+
         unsigned int activeRegister                            = mUniformBlockRegister;
         mUniformBlockRegisterMap[interfaceBlock.name().data()] = activeRegister;
 
@@ -729,6 +756,22 @@
     return hlsl;
 }
 
+TString ResourcesHLSL::uniformBlockWithOneLargeArrayMemberString(
+    const TInterfaceBlock &interfaceBlock,
+    unsigned int registerIndex)
+{
+    TString hlsl, typeString;
+
+    const TField &field                    = *interfaceBlock.fields()[0];
+    const TLayoutBlockStorage blockStorage = interfaceBlock.blockStorage();
+    typeString = InterfaceBlockFieldTypeString(field, blockStorage, true);
+
+    hlsl += "StructuredBuffer <" + typeString + "> " + Decorate(field.name()) + " : register(t" +
+            str(registerIndex) + ");\n";
+
+    return hlsl;
+}
+
 TString ResourcesHLSL::shaderStorageBlockString(const TInterfaceBlock &interfaceBlock,
                                                 const TVariable *instanceVariable,
                                                 unsigned int registerIndex,
@@ -780,7 +823,7 @@
             hlsl += padHelper.prePaddingString(fieldType);
         }
 
-        hlsl += "    " + InterfaceBlockFieldTypeString(field, blockStorage) + " " +
+        hlsl += "    " + InterfaceBlockFieldTypeString(field, blockStorage, false) + " " +
                 Decorate(field.name()) + ArrayString(fieldType).data() + ";\n";
 
         // must pad out after matrices and arrays, where HLSL usually allows itself room to pack
@@ -789,7 +832,7 @@
         {
             const bool useHLSLRowMajorPacking =
                 (fieldType.getLayoutQualifier().matrixPacking == EmpColumnMajor);
-            hlsl += padHelper.postPaddingString(fieldType, useHLSLRowMajorPacking);
+            hlsl += padHelper.postPaddingString(fieldType, useHLSLRowMajorPacking, false);
         }
     }
 
@@ -805,4 +848,17 @@
            "{\n" +
            uniformBlockMembersString(interfaceBlock, blockStorage) + "};\n\n";
 }
+
+bool ResourcesHLSL::shouldTranslateUniformBlockToStructuredBuffer(
+    const TInterfaceBlock &interfaceBlock)
+{
+    const TType &fieldType = *interfaceBlock.fields()[0]->type();
+
+    // TODO(anglebug.com/4206): Support uniform block contains only a matrix array member,
+    // and fix row-major/column-major conversion issue.
+    return (mCompileOptions & SH_DONT_TRANSLATE_UNIFORM_BLOCK_TO_STRUCTUREDBUFFER) == 0 &&
+           mSRVRegister < kMaxInputResourceSlotCount && interfaceBlock.fields().size() == 1u &&
+           fieldType.getStruct() != nullptr && fieldType.getNumArraySizes() == 1u &&
+           fieldType.getOutermostArraySize() >= kMinArraySizeUseStructuredBuffer;
+}
 }  // namespace sh
diff --git a/src/compiler/translator/ResourcesHLSL.h b/src/compiler/translator/ResourcesHLSL.h
index 6413724..9764129 100644
--- a/src/compiler/translator/ResourcesHLSL.h
+++ b/src/compiler/translator/ResourcesHLSL.h
@@ -24,6 +24,7 @@
   public:
     ResourcesHLSL(StructureHLSL *structureHLSL,
                   ShShaderOutput outputType,
+                  ShCompileOptions compileOptions,
                   const std::vector<ShaderVariable> &uniforms,
                   unsigned int firstUniformRegister);
 
@@ -54,6 +55,12 @@
     {
         return mUniformBlockRegisterMap;
     }
+
+    const std::map<std::string, bool> &getUniformBlockUseStructuredBufferMap() const
+    {
+        return mUniformBlockUseStructuredBufferMap;
+    }
+
     const std::map<std::string, unsigned int> &getUniformRegisterMap() const
     {
         return mUniformRegisterMap;
@@ -67,6 +74,8 @@
                                const TVariable *instanceVariable,
                                unsigned int registerIndex,
                                unsigned int arrayIndex);
+    TString uniformBlockWithOneLargeArrayMemberString(const TInterfaceBlock &interfaceBlock,
+                                                      unsigned int registerIndex);
 
     TString shaderStorageBlockString(const TInterfaceBlock &interfaceBlock,
                                      const TVariable *instanceVariable,
@@ -116,21 +125,24 @@
                                      const HLSLRWTextureGroup textureGroup,
                                      const TVector<const TVariable *> &group,
                                      unsigned int *groupTextureRegisterIndex);
+    bool shouldTranslateUniformBlockToStructuredBuffer(const TInterfaceBlock &interfaceBlock);
 
     unsigned int mUniformRegister;
     unsigned int mUniformBlockRegister;
-    unsigned int mTextureRegister;
+    unsigned int mSRVRegister;
     unsigned int mUAVRegister;
     unsigned int mSamplerCount;
     unsigned int mReadonlyImageCount;
     unsigned int mImageCount;
     StructureHLSL *mStructureHLSL;
     ShShaderOutput mOutputType;
+    ShCompileOptions mCompileOptions;
 
     const std::vector<ShaderVariable> &mUniforms;
     std::map<std::string, unsigned int> mUniformBlockRegisterMap;
     std::map<std::string, unsigned int> mShaderStorageBlockRegisterMap;
     std::map<std::string, unsigned int> mUniformRegisterMap;
+    std::map<std::string, bool> mUniformBlockUseStructuredBufferMap;
     unsigned int mReadonlyImage2DRegisterIndex;
     unsigned int mImage2DRegisterIndex;
 };
diff --git a/src/compiler/translator/ShaderLang.cpp b/src/compiler/translator/ShaderLang.cpp
index 8bd08b1..dea80cc 100644
--- a/src/compiler/translator/ShaderLang.cpp
+++ b/src/compiler/translator/ShaderLang.cpp
@@ -546,6 +546,19 @@
 #endif  // ANGLE_ENABLE_HLSL
 }
 
+bool ShouldUniformBlockUseStructuredBuffer(const ShHandle handle,
+                                           const std::string &uniformBlockName)
+{
+#ifdef ANGLE_ENABLE_HLSL
+    TranslatorHLSL *translator = GetTranslatorHLSLFromHandle(handle);
+    ASSERT(translator);
+
+    return translator->shouldUniformBlockUseStructuredBuffer(uniformBlockName);
+#else
+    return false;
+#endif  // ANGLE_ENABLE_HLSL
+}
+
 const std::map<std::string, unsigned int> *GetUniformRegisterMap(const ShHandle handle)
 {
 #ifdef ANGLE_ENABLE_HLSL
diff --git a/src/compiler/translator/StructureHLSL.cpp b/src/compiler/translator/StructureHLSL.cpp
index 6423434..622ae97 100644
--- a/src/compiler/translator/StructureHLSL.cpp
+++ b/src/compiler/translator/StructureHLSL.cpp
@@ -23,12 +23,13 @@
 TString Define(const TStructure &structure,
                bool useHLSLRowMajorPacking,
                bool useStd140Packing,
+               bool forcePackingEnd,
                Std140PaddingHelper *padHelper)
 {
-    const TFieldList &fields = structure.fields();
-    const bool isNameless    = (structure.symbolType() == SymbolType::Empty);
-    const TString &structName =
-        QualifiedStructNameString(structure, useHLSLRowMajorPacking, useStd140Packing);
+    const TFieldList &fields    = structure.fields();
+    const bool isNameless       = (structure.symbolType() == SymbolType::Empty);
+    const TString &structName   = QualifiedStructNameString(structure, useHLSLRowMajorPacking,
+                                                          useStd140Packing, forcePackingEnd);
     const TString declareString = (isNameless ? "struct" : "struct " + structName);
 
     TString string;
@@ -36,15 +37,17 @@
               "\n"
               "{\n";
 
+    size_t memberSize = fields.size();
     for (const TField *field : fields)
     {
+        memberSize--;
         const TType &fieldType = *field->type();
         if (!IsSampler(fieldType.getBasicType()))
         {
             const TStructure *fieldStruct = fieldType.getStruct();
             const TString &fieldTypeString =
                 fieldStruct ? QualifiedStructNameString(*fieldStruct, useHLSLRowMajorPacking,
-                                                        useStd140Packing)
+                                                        useStd140Packing, false)
                             : TypeString(fieldType);
 
             if (padHelper)
@@ -57,7 +60,8 @@
 
             if (padHelper)
             {
-                string += padHelper->postPaddingString(fieldType, useHLSLRowMajorPacking);
+                string += padHelper->postPaddingString(fieldType, useHLSLRowMajorPacking,
+                                                       memberSize == 0 && forcePackingEnd);
             }
         }
     }
@@ -165,9 +169,11 @@
     return padding;
 }
 
-TString Std140PaddingHelper::postPaddingString(const TType &type, bool useHLSLRowMajorPacking)
+TString Std140PaddingHelper::postPaddingString(const TType &type,
+                                               bool useHLSLRowMajorPacking,
+                                               bool forcePadding)
 {
-    if (!type.isMatrix() && !type.isArray() && type.getBasicType() != EbtStruct)
+    if (!forcePadding && !type.isMatrix() && !type.isArray() && type.getBasicType() != EbtStruct)
     {
         return "";
     }
@@ -191,7 +197,7 @@
     else if (structure)
     {
         const TString &structName =
-            QualifiedStructNameString(*structure, useHLSLRowMajorPacking, true);
+            QualifiedStructNameString(*structure, useHLSLRowMajorPacking, true, false);
         numComponents = mStructElementIndexes->find(structName)->second;
 
         if (numComponents == 0)
@@ -222,22 +228,24 @@
 
 TString StructureHLSL::defineQualified(const TStructure &structure,
                                        bool useHLSLRowMajorPacking,
-                                       bool useStd140Packing)
+                                       bool useStd140Packing,
+                                       bool forcePackingEnd)
 {
     if (useStd140Packing)
     {
         Std140PaddingHelper padHelper = getPaddingHelper();
-        return Define(structure, useHLSLRowMajorPacking, useStd140Packing, &padHelper);
+        return Define(structure, useHLSLRowMajorPacking, useStd140Packing, forcePackingEnd,
+                      &padHelper);
     }
     else
     {
-        return Define(structure, useHLSLRowMajorPacking, useStd140Packing, nullptr);
+        return Define(structure, useHLSLRowMajorPacking, useStd140Packing, false, nullptr);
     }
 }
 
 TString StructureHLSL::defineNameless(const TStructure &structure)
 {
-    return Define(structure, false, false, nullptr);
+    return Define(structure, false, false, false, nullptr);
 }
 
 StructureHLSL::DefinedStructs::iterator StructureHLSL::defineVariants(const TStructure &structure,
@@ -260,24 +268,33 @@
     storeStd140ElementIndex(structure, false);
     storeStd140ElementIndex(structure, true);
 
-    const TString &structString = defineQualified(structure, false, false);
+    const TString &structString = defineQualified(structure, false, false, false);
 
     ASSERT(std::find(mStructDeclarations.begin(), mStructDeclarations.end(), structString) ==
            mStructDeclarations.end());
     // Add row-major packed struct for interface blocks
     TString rowMajorString = "#pragma pack_matrix(row_major)\n" +
-                             defineQualified(structure, true, false) +
+                             defineQualified(structure, true, false, false) +
                              "#pragma pack_matrix(column_major)\n";
 
-    TString std140String         = defineQualified(structure, false, true);
+    TString std140String         = defineQualified(structure, false, true, false);
     TString std140RowMajorString = "#pragma pack_matrix(row_major)\n" +
-                                   defineQualified(structure, true, true) +
+                                   defineQualified(structure, true, true, false) +
                                    "#pragma pack_matrix(column_major)\n";
 
+    // Must use packed structure for StructuredBuffer element type, if qualifier of structure is
+    // std140.
+    TString std140PackingEndString         = defineQualified(structure, false, true, true);
+    TString std140RowMajorPackEndingString = "#pragma pack_matrix(row_major)\n" +
+                                             defineQualified(structure, true, true, true) +
+                                             "#pragma pack_matrix(column_major)\n";
+
     mStructDeclarations.push_back(structString);
     mStructDeclarations.push_back(rowMajorString);
     mStructDeclarations.push_back(std140String);
     mStructDeclarations.push_back(std140RowMajorString);
+    mStructDeclarations.push_back(std140PackingEndString);
+    mStructDeclarations.push_back(std140RowMajorPackEndingString);
     return addedStruct;
 }
 
@@ -584,7 +601,8 @@
 
     // Add remaining element index to the global map, for use with nested structs in standard
     // layouts
-    const TString &structName = QualifiedStructNameString(structure, useHLSLRowMajorPacking, true);
+    const TString &structName =
+        QualifiedStructNameString(structure, useHLSLRowMajorPacking, true, false);
     mStd140StructElementIndexes[structName] = padHelper.elementIndex();
 }
 
diff --git a/src/compiler/translator/StructureHLSL.h b/src/compiler/translator/StructureHLSL.h
index 67102d4..b8e74f7 100644
--- a/src/compiler/translator/StructureHLSL.h
+++ b/src/compiler/translator/StructureHLSL.h
@@ -34,7 +34,7 @@
     int elementIndex() const { return mElementIndex; }
     int prePadding(const TType &type);
     TString prePaddingString(const TType &type);
-    TString postPaddingString(const TType &type, bool useHLSLRowMajorPacking);
+    TString postPaddingString(const TType &type, bool useHLSLRowMajorPacking, bool forcePadding);
 
   private:
     TString next();
@@ -90,7 +90,8 @@
     void storeStd140ElementIndex(const TStructure &structure, bool useHLSLRowMajorPacking);
     TString defineQualified(const TStructure &structure,
                             bool useHLSLRowMajorPacking,
-                            bool useStd140Packing);
+                            bool useStd140Packing,
+                            bool forcePackingEnd);
     DefinedStructs::iterator defineVariants(const TStructure &structure, const TString &name);
 };
 }  // namespace sh
diff --git a/src/compiler/translator/TranslatorHLSL.cpp b/src/compiler/translator/TranslatorHLSL.cpp
index 3a8e57f..bff2539 100644
--- a/src/compiler/translator/TranslatorHLSL.cpp
+++ b/src/compiler/translator/TranslatorHLSL.cpp
@@ -203,12 +203,13 @@
 
     outputHLSL.output(root, getInfoSink().obj);
 
-    mShaderStorageBlockRegisterMap = outputHLSL.getShaderStorageBlockRegisterMap();
-    mUniformBlockRegisterMap       = outputHLSL.getUniformBlockRegisterMap();
-    mUniformRegisterMap            = outputHLSL.getUniformRegisterMap();
-    mReadonlyImage2DRegisterIndex  = outputHLSL.getReadonlyImage2DRegisterIndex();
-    mImage2DRegisterIndex          = outputHLSL.getImage2DRegisterIndex();
-    mUsedImage2DFunctionNames      = outputHLSL.getUsedImage2DFunctionNames();
+    mShaderStorageBlockRegisterMap      = outputHLSL.getShaderStorageBlockRegisterMap();
+    mUniformBlockRegisterMap            = outputHLSL.getUniformBlockRegisterMap();
+    mUniformBlockUseStructuredBufferMap = outputHLSL.getUniformBlockUseStructuredBufferMap();
+    mUniformRegisterMap                 = outputHLSL.getUniformRegisterMap();
+    mReadonlyImage2DRegisterIndex       = outputHLSL.getReadonlyImage2DRegisterIndex();
+    mImage2DRegisterIndex               = outputHLSL.getImage2DRegisterIndex();
+    mUsedImage2DFunctionNames           = outputHLSL.getUsedImage2DFunctionNames();
 
     return true;
 }
@@ -262,4 +263,12 @@
     return &mUsedImage2DFunctionNames;
 }
 
+bool TranslatorHLSL::shouldUniformBlockUseStructuredBuffer(
+    const std::string &uniformBlockName) const
+{
+    auto uniformBlockIter = mUniformBlockUseStructuredBufferMap.find(uniformBlockName);
+    return uniformBlockIter != mUniformBlockUseStructuredBufferMap.end() &&
+           uniformBlockIter->second;
+}
+
 }  // namespace sh
diff --git a/src/compiler/translator/TranslatorHLSL.h b/src/compiler/translator/TranslatorHLSL.h
index eb48e6b..9f9de96 100644
--- a/src/compiler/translator/TranslatorHLSL.h
+++ b/src/compiler/translator/TranslatorHLSL.h
@@ -23,6 +23,7 @@
 
     bool hasUniformBlock(const std::string &interfaceBlockName) const;
     unsigned int getUniformBlockRegister(const std::string &interfaceBlockName) const;
+    bool shouldUniformBlockUseStructuredBuffer(const std::string &uniformBlockName) const;
 
     const std::map<std::string, unsigned int> *getUniformRegisterMap() const;
     unsigned int getReadonlyImage2DRegisterIndex() const;
@@ -40,6 +41,7 @@
 
     std::map<std::string, unsigned int> mShaderStorageBlockRegisterMap;
     std::map<std::string, unsigned int> mUniformBlockRegisterMap;
+    std::map<std::string, bool> mUniformBlockUseStructuredBufferMap;
     std::map<std::string, unsigned int> mUniformRegisterMap;
     unsigned int mReadonlyImage2DRegisterIndex;
     unsigned int mImage2DRegisterIndex;
diff --git a/src/compiler/translator/UtilsHLSL.cpp b/src/compiler/translator/UtilsHLSL.cpp
index 089ae13..7853763 100644
--- a/src/compiler/translator/UtilsHLSL.cpp
+++ b/src/compiler/translator/UtilsHLSL.cpp
@@ -1008,7 +1008,8 @@
 
 TString QualifiedStructNameString(const TStructure &structure,
                                   bool useHLSLRowMajorPacking,
-                                  bool useStd140Packing)
+                                  bool useStd140Packing,
+                                  bool forcePackingEnd)
 {
     if (structure.symbolType() == SymbolType::Empty)
     {
@@ -1030,6 +1031,11 @@
         prefix += "rm_";
     }
 
+    if (forcePackingEnd)
+    {
+        prefix += "pe_";
+    }
+
     return prefix + StructNameString(structure);
 }
 
diff --git a/src/compiler/translator/UtilsHLSL.h b/src/compiler/translator/UtilsHLSL.h
index e95454d..88520a7 100644
--- a/src/compiler/translator/UtilsHLSL.h
+++ b/src/compiler/translator/UtilsHLSL.h
@@ -123,7 +123,8 @@
 TString StructNameString(const TStructure &structure);
 TString QualifiedStructNameString(const TStructure &structure,
                                   bool useHLSLRowMajorPacking,
-                                  bool useStd140Packing);
+                                  bool useStd140Packing,
+                                  bool forcePackingEnd);
 const char *InterpolationString(TQualifier qualifier);
 const char *QualifierString(TQualifier qualifier);
 // Parameters may need to be included in function names to disambiguate between overloaded
diff --git a/src/libANGLE/ProgramLinkedResources.cpp b/src/libANGLE/ProgramLinkedResources.cpp
index f463cb4..1f9e6c6 100644
--- a/src/libANGLE/ProgramLinkedResources.cpp
+++ b/src/libANGLE/ProgramLinkedResources.cpp
@@ -1138,6 +1138,8 @@
         blockIndexes.push_back(static_cast<unsigned int>(blockMemberIndex));
     }
 
+    unsigned int firstFieldArraySize = interfaceBlock.fields[0].getArraySizeProduct();
+
     for (unsigned int arrayElement = 0; arrayElement < interfaceBlock.elementCount();
          ++arrayElement)
     {
@@ -1161,7 +1163,8 @@
         int blockBinding =
             (interfaceBlock.binding == -1 ? 0 : interfaceBlock.binding + arrayElement);
         InterfaceBlock block(interfaceBlock.name, interfaceBlock.mappedName,
-                             interfaceBlock.isArray(), arrayElement, blockBinding);
+                             interfaceBlock.isArray(), arrayElement, firstFieldArraySize,
+                             blockBinding);
         block.memberIndexes = blockIndexes;
         block.setActive(shaderType, interfaceBlock.active);
 
diff --git a/src/libANGLE/Uniform.cpp b/src/libANGLE/Uniform.cpp
index dbe119b..614f501 100644
--- a/src/libANGLE/Uniform.cpp
+++ b/src/libANGLE/Uniform.cpp
@@ -87,11 +87,11 @@
 LinkedUniform &LinkedUniform::operator=(const LinkedUniform &uniform)
 {
     sh::ShaderVariable::operator=(uniform);
-    ActiveVariable::operator=(uniform);
-    typeInfo                = uniform.typeInfo;
-    bufferIndex             = uniform.bufferIndex;
-    blockInfo               = uniform.blockInfo;
-    outerArraySizes         = uniform.outerArraySizes;
+    ActiveVariable::operator    =(uniform);
+    typeInfo                    = uniform.typeInfo;
+    bufferIndex                 = uniform.bufferIndex;
+    blockInfo                   = uniform.blockInfo;
+    outerArraySizes             = uniform.outerArraySizes;
     return *this;
 }
 
@@ -134,8 +134,13 @@
                                const std::string &mappedNameIn,
                                bool isArrayIn,
                                unsigned int arrayElementIn,
+                               unsigned int firstFieldArraySizeIn,
                                int bindingIn)
-    : name(nameIn), mappedName(mappedNameIn), isArray(isArrayIn), arrayElement(arrayElementIn)
+    : name(nameIn),
+      mappedName(mappedNameIn),
+      isArray(isArrayIn),
+      arrayElement(arrayElementIn),
+      firstFieldArraySize(firstFieldArraySizeIn)
 {
     binding = bindingIn;
 }
diff --git a/src/libANGLE/Uniform.h b/src/libANGLE/Uniform.h
index ec2b4a6..f7e1af6 100644
--- a/src/libANGLE/Uniform.h
+++ b/src/libANGLE/Uniform.h
@@ -119,6 +119,7 @@
                    const std::string &mappedNameIn,
                    bool isArrayIn,
                    unsigned int arrayElementIn,
+                   unsigned int firstFieldArraySizeIn,
                    int bindingIn);
 
     std::string nameWithArrayIndex() const;
@@ -128,6 +129,7 @@
     std::string mappedName;
     bool isArray;
     unsigned int arrayElement;
+    unsigned int firstFieldArraySize;
 };
 
 }  // namespace gl
diff --git a/src/libANGLE/renderer/d3d/ProgramD3D.cpp b/src/libANGLE/renderer/d3d/ProgramD3D.cpp
index a85c280..b229d6b 100644
--- a/src/libANGLE/renderer/d3d/ProgramD3D.cpp
+++ b/src/libANGLE/renderer/d3d/ProgramD3D.cpp
@@ -372,6 +372,15 @@
 
 D3DInterfaceBlock::D3DInterfaceBlock(const D3DInterfaceBlock &other) = default;
 
+D3DUniformBlock::D3DUniformBlock()
+{
+    mUseStructuredBuffers.fill(false);
+    mByteWidths.fill(0u);
+    mStructureByteStrides.fill(0u);
+}
+
+D3DUniformBlock::D3DUniformBlock(const D3DUniformBlock &other) = default;
+
 // D3DVarying Implementation
 
 D3DVarying::D3DVarying() : semanticIndex(0), componentCount(0), outputSlot(0) {}
@@ -1081,10 +1090,13 @@
     ASSERT(mD3DUniformBlocks.empty());
     for (unsigned int blockIndex = 0; blockIndex < blockCount; ++blockIndex)
     {
-        D3DInterfaceBlock uniformBlock;
+        D3DUniformBlock uniformBlock;
         for (gl::ShaderType shaderType : gl::AllShaderTypes())
         {
             stream->readInt(&uniformBlock.mShaderRegisterIndexes[shaderType]);
+            stream->readBool(&uniformBlock.mUseStructuredBuffers[shaderType]);
+            stream->readInt(&uniformBlock.mByteWidths[shaderType]);
+            stream->readInt(&uniformBlock.mStructureByteStrides[shaderType]);
         }
         mD3DUniformBlocks.push_back(uniformBlock);
     }
@@ -1365,11 +1377,14 @@
     }
 
     stream->writeInt(mD3DUniformBlocks.size());
-    for (const D3DInterfaceBlock &uniformBlock : mD3DUniformBlocks)
+    for (const D3DUniformBlock &uniformBlock : mD3DUniformBlocks)
     {
         for (gl::ShaderType shaderType : gl::AllShaderTypes())
         {
             stream->writeIntOrNegOne(uniformBlock.mShaderRegisterIndexes[shaderType]);
+            stream->writeInt(uniformBlock.mUseStructuredBuffers[shaderType]);
+            stream->writeInt(uniformBlock.mByteWidths[shaderType]);
+            stream->writeInt(uniformBlock.mStructureByteStrides[shaderType]);
         }
     }
 
@@ -2143,7 +2158,7 @@
     {
         unsigned int uniformBlockElement = uniformBlock.isArray ? uniformBlock.arrayElement : 0;
 
-        D3DInterfaceBlock d3dUniformBlock;
+        D3DUniformBlock d3dUniformBlock;
 
         for (gl::ShaderType shaderType : gl::AllShaderTypes())
         {
@@ -2154,6 +2169,18 @@
                     shadersD3D[shaderType]->getUniformBlockRegister(uniformBlock.name);
                 d3dUniformBlock.mShaderRegisterIndexes[shaderType] =
                     baseRegister + uniformBlockElement;
+                bool useStructuredBuffer =
+                    shadersD3D[shaderType]->shouldUniformBlockUseStructuredBuffer(
+                        uniformBlock.name);
+                if (useStructuredBuffer)
+                {
+                    d3dUniformBlock.mUseStructuredBuffers[shaderType] = true;
+                    d3dUniformBlock.mByteWidths[shaderType]           = uniformBlock.dataSize;
+                    d3dUniformBlock.mStructureByteStrides[shaderType] =
+                        uniformBlock.firstFieldArraySize == 0u
+                            ? uniformBlock.dataSize
+                            : uniformBlock.dataSize / uniformBlock.firstFieldArraySize;
+                }
             }
         }
 
@@ -2213,9 +2240,7 @@
     }
 }
 
-void ProgramD3D::updateUniformBufferCache(
-    const gl::Caps &caps,
-    const gl::ShaderMap<unsigned int> &reservedShaderRegisterIndexes)
+void ProgramD3D::updateUniformBufferCache(const gl::Caps &caps)
 {
     if (mState.getUniformBlocks().empty())
     {
@@ -2225,13 +2250,14 @@
     for (gl::ShaderType shaderType : gl::AllShaderTypes())
     {
         mShaderUBOCaches[shaderType].clear();
+        mShaderUBOCachesUseSB[shaderType].clear();
     }
 
     for (unsigned int uniformBlockIndex = 0; uniformBlockIndex < mD3DUniformBlocks.size();
          uniformBlockIndex++)
     {
-        const D3DInterfaceBlock &uniformBlock = mD3DUniformBlocks[uniformBlockIndex];
-        GLuint blockBinding                   = mState.getUniformBlockBinding(uniformBlockIndex);
+        const D3DUniformBlock &uniformBlock = mD3DUniformBlocks[uniformBlockIndex];
+        GLuint blockBinding                 = mState.getUniformBlockBinding(uniformBlockIndex);
 
         // Unnecessary to apply an unreferenced standard or shared UBO
         for (gl::ShaderType shaderType : gl::AllShaderTypes())
@@ -2241,21 +2267,36 @@
                 continue;
             }
 
-            unsigned int registerIndex = uniformBlock.mShaderRegisterIndexes[shaderType] -
-                                         reservedShaderRegisterIndexes[shaderType];
-            ASSERT(registerIndex <
-                   static_cast<unsigned int>(caps.maxShaderUniformBlocks[shaderType]));
-
-            std::vector<int> &shaderUBOcache = mShaderUBOCaches[shaderType];
-            if (shaderUBOcache.size() <= registerIndex)
+            bool useStructuredBuffer   = uniformBlock.mUseStructuredBuffers[shaderType];
+            unsigned int registerIndex = uniformBlock.mShaderRegisterIndexes[shaderType];
+            if (useStructuredBuffer)
             {
-                shaderUBOcache.resize(registerIndex + 1, -1);
+                D3DUBOCacheUseSB cacheUseSB;
+                cacheUseSB.registerIndex       = registerIndex;
+                cacheUseSB.binding             = blockBinding;
+                cacheUseSB.byteWidth           = uniformBlock.mByteWidths[shaderType];
+                cacheUseSB.structureByteStride = uniformBlock.mStructureByteStrides[shaderType];
+                mShaderUBOCachesUseSB[shaderType].push_back(cacheUseSB);
             }
-
-            ASSERT(shaderUBOcache[registerIndex] == -1);
-            shaderUBOcache[registerIndex] = blockBinding;
+            else
+            {
+                ASSERT(registerIndex <
+                       static_cast<unsigned int>(caps.maxShaderUniformBlocks[shaderType]));
+                D3DUBOCache cache;
+                cache.registerIndex = registerIndex;
+                cache.binding       = blockBinding;
+                mShaderUBOCaches[shaderType].push_back(cache);
+            }
         }
     }
+
+    for (gl::ShaderType shaderType : gl::AllShaderTypes())
+    {
+        GLuint uniformBlockCount = static_cast<GLuint>(mShaderUBOCaches[shaderType].size() +
+                                                       mShaderUBOCachesUseSB[shaderType].size());
+        ASSERT(uniformBlockCount <=
+               static_cast<unsigned int>(caps.maxShaderUniformBlocks[shaderType]));
+    }
 }
 
 unsigned int ProgramD3D::getAtomicCounterBufferRegisterIndex(GLuint binding,
@@ -2276,11 +2317,18 @@
     return mD3DShaderStorageBlocks[blockIndex].mShaderRegisterIndexes[shaderType];
 }
 
-const std::vector<GLint> &ProgramD3D::getShaderUniformBufferCache(gl::ShaderType shaderType) const
+const std::vector<D3DUBOCache> &ProgramD3D::getShaderUniformBufferCache(
+    gl::ShaderType shaderType) const
 {
     return mShaderUBOCaches[shaderType];
 }
 
+const std::vector<D3DUBOCacheUseSB> &ProgramD3D::getShaderUniformBufferCacheUseSB(
+    gl::ShaderType shaderType) const
+{
+    return mShaderUBOCachesUseSB[shaderType];
+}
+
 void ProgramD3D::dirtyAllUniforms()
 {
     mShaderUniformsDirty = mState.getLinkedShaderStages();
diff --git a/src/libANGLE/renderer/d3d/ProgramD3D.h b/src/libANGLE/renderer/d3d/ProgramD3D.h
index 81d6139..1de0ef0 100644
--- a/src/libANGLE/renderer/d3d/ProgramD3D.h
+++ b/src/libANGLE/renderer/d3d/ProgramD3D.h
@@ -96,6 +96,28 @@
     gl::ShaderMap<unsigned int> mShaderRegisterIndexes;
 };
 
+struct D3DUniformBlock : D3DInterfaceBlock
+{
+    D3DUniformBlock();
+    D3DUniformBlock(const D3DUniformBlock &other);
+
+    gl::ShaderMap<bool> mUseStructuredBuffers;
+    gl::ShaderMap<unsigned int> mByteWidths;
+    gl::ShaderMap<unsigned int> mStructureByteStrides;
+};
+
+struct D3DUBOCache
+{
+    unsigned int registerIndex;
+    int binding;
+};
+
+struct D3DUBOCacheUseSB : D3DUBOCache
+{
+    unsigned int byteWidth;
+    unsigned int structureByteStride;
+};
+
 struct D3DVarying final
 {
     D3DVarying();
@@ -219,15 +241,16 @@
                                  GLint components,
                                  const GLfloat *coeffs) override;
 
-    void updateUniformBufferCache(const gl::Caps &caps,
-                                  const gl::ShaderMap<unsigned int> &reservedShaderRegisterIndexes);
+    void updateUniformBufferCache(const gl::Caps &caps);
 
     unsigned int getAtomicCounterBufferRegisterIndex(GLuint binding,
                                                      gl::ShaderType shaderType) const;
 
     unsigned int getShaderStorageBufferRegisterIndex(GLuint blockIndex,
                                                      gl::ShaderType shaderType) const;
-    const std::vector<GLint> &getShaderUniformBufferCache(gl::ShaderType shaderType) const;
+    const std::vector<D3DUBOCache> &getShaderUniformBufferCache(gl::ShaderType shaderType) const;
+    const std::vector<D3DUBOCacheUseSB> &getShaderUniformBufferCacheUseSB(
+        gl::ShaderType shaderType) const;
 
     void dirtyAllUniforms();
 
@@ -552,7 +575,8 @@
 
     unsigned int mSerial;
 
-    gl::ShaderMap<std::vector<int>> mShaderUBOCaches;
+    gl::ShaderMap<std::vector<D3DUBOCache>> mShaderUBOCaches;
+    gl::ShaderMap<std::vector<D3DUBOCacheUseSB>> mShaderUBOCachesUseSB;
     VertexExecutable::Signature mCachedVertexSignature;
     gl::InputLayout mCachedInputLayout;
     Optional<size_t> mCachedVertexExecutableIndex;
@@ -561,7 +585,7 @@
     std::vector<D3DUniform *> mD3DUniforms;
     std::map<std::string, int> mImageBindingMap;
     std::map<std::string, int> mAtomicBindingMap;
-    std::vector<D3DInterfaceBlock> mD3DUniformBlocks;
+    std::vector<D3DUniformBlock> mD3DUniformBlocks;
     std::vector<D3DInterfaceBlock> mD3DShaderStorageBlocks;
     std::array<unsigned int, gl::IMPLEMENTATION_MAX_ATOMIC_COUNTER_BUFFERS>
         mComputeAtomicCounterBufferRegisterIndices;
diff --git a/src/libANGLE/renderer/d3d/ShaderD3D.cpp b/src/libANGLE/renderer/d3d/ShaderD3D.cpp
index a3cd5e4..14038a3 100644
--- a/src/libANGLE/renderer/d3d/ShaderD3D.cpp
+++ b/src/libANGLE/renderer/d3d/ShaderD3D.cpp
@@ -123,6 +123,10 @@
     {
         mAdditionalOptions |= SH_FORCE_ATOMIC_VALUE_RESOLUTION;
     }
+    if (features.dontTranslateUniformBlockToStructuredBuffer.enabled)
+    {
+        mAdditionalOptions |= SH_DONT_TRANSLATE_UNIFORM_BLOCK_TO_STRUCTUREDBUFFER;
+    }
     if (extensions.multiview || extensions.multiview2)
     {
         mAdditionalOptions |= SH_INITIALIZE_BUILTINS_FOR_INSTANCED_MULTIVIEW;
@@ -207,6 +211,12 @@
     return mUniformBlockRegisterMap.find(blockName)->second;
 }
 
+bool ShaderD3D::shouldUniformBlockUseStructuredBuffer(const std::string &blockName) const
+{
+    ASSERT(mUniformBlockUseStructuredBufferMap.count(blockName) > 0);
+    return mUniformBlockUseStructuredBufferMap.find(blockName)->second;
+}
+
 unsigned int ShaderD3D::getShaderStorageBlockRegister(const std::string &blockName) const
 {
     ASSERT(mShaderStorageBlockRegisterMap.count(blockName) > 0);
@@ -310,8 +320,11 @@
                 bool blockRegisterResult =
                     sh::GetUniformBlockRegister(compilerHandle, interfaceBlock.name, &index);
                 ASSERT(blockRegisterResult);
+                bool useStructuredBuffer =
+                    sh::ShouldUniformBlockUseStructuredBuffer(compilerHandle, interfaceBlock.name);
 
-                mUniformBlockRegisterMap[interfaceBlock.name] = index;
+                mUniformBlockRegisterMap[interfaceBlock.name]            = index;
+                mUniformBlockUseStructuredBufferMap[interfaceBlock.name] = useStructuredBuffer;
             }
         }
 
diff --git a/src/libANGLE/renderer/d3d/ShaderD3D.h b/src/libANGLE/renderer/d3d/ShaderD3D.h
index badb742..b7637ba 100644
--- a/src/libANGLE/renderer/d3d/ShaderD3D.h
+++ b/src/libANGLE/renderer/d3d/ShaderD3D.h
@@ -54,6 +54,7 @@
     unsigned int getUniformRegister(const std::string &uniformName) const;
 
     unsigned int getUniformBlockRegister(const std::string &blockName) const;
+    bool shouldUniformBlockUseStructuredBuffer(const std::string &blockName) const;
     unsigned int getShaderStorageBlockRegister(const std::string &blockName) const;
     unsigned int getReadonlyImage2DRegisterIndex() const { return mReadonlyImage2DRegisterIndex; }
     unsigned int getImage2DRegisterIndex() const { return mImage2DRegisterIndex; }
@@ -102,6 +103,7 @@
     mutable std::string mDebugInfo;
     std::map<std::string, unsigned int> mUniformRegisterMap;
     std::map<std::string, unsigned int> mUniformBlockRegisterMap;
+    std::map<std::string, bool> mUniformBlockUseStructuredBufferMap;
     std::map<std::string, unsigned int> mShaderStorageBlockRegisterMap;
     unsigned int mReadonlyImage2DRegisterIndex;
     unsigned int mImage2DRegisterIndex;
diff --git a/src/libANGLE/renderer/d3d/d3d11/Buffer11.cpp b/src/libANGLE/renderer/d3d/d3d11/Buffer11.cpp
index 8b3df6c..726fcf0 100644
--- a/src/libANGLE/renderer/d3d/d3d11/Buffer11.cpp
+++ b/src/libANGLE/renderer/d3d/d3d11/Buffer11.cpp
@@ -84,7 +84,9 @@
     else if (writeBit && !readBit)
     {
         // Special case for uniform storage - we only allow full buffer updates.
-        return usage == BUFFER_USAGE_UNIFORM ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE;
+        return usage == BUFFER_USAGE_UNIFORM || usage == BUFFER_USAGE_STRUCTURED
+                   ? D3D11_MAP_WRITE_DISCARD
+                   : D3D11_MAP_WRITE;
     }
     else if (writeBit && readBit)
     {
@@ -137,6 +139,8 @@
                           size_t offset,
                           size_t size);
 
+    void setStructureByteStride(unsigned int structureByteStride);
+
   protected:
     BufferStorage(Renderer11 *renderer, BufferUsage usage);
 
@@ -182,6 +186,10 @@
                             unsigned int size,
                             d3d11::UnorderedAccessView **uavOut);
 
+  protected:
+    d3d11::Buffer mBuffer;
+    const angle::Subject *mOnStorageChanged;
+
   private:
     static void FillBufferDesc(D3D11_BUFFER_DESC *bufferDesc,
                                Renderer11 *renderer,
@@ -190,12 +198,30 @@
     void clearSRVs();
     void clearUAVs();
 
-    d3d11::Buffer mBuffer;
-    const angle::Subject *mOnStorageChanged;
     std::map<DXGI_FORMAT, d3d11::ShaderResourceView> mBufferResourceViews;
     std::map<std::pair<unsigned int, unsigned int>, d3d11::UnorderedAccessView> mBufferRawUAVs;
 };
 
+class Buffer11::StructuredBufferStorage : public Buffer11::NativeStorage
+{
+  public:
+    StructuredBufferStorage(Renderer11 *renderer,
+                            BufferUsage usage,
+                            const angle::Subject *onStorageChanged);
+    ~StructuredBufferStorage() override;
+    angle::Result resizeStructuredBuffer(const gl::Context *context,
+                                         unsigned int size,
+                                         unsigned int structureByteStride);
+    angle::Result getStructuredBufferRangeSRV(const gl::Context *context,
+                                              unsigned int offset,
+                                              unsigned int size,
+                                              unsigned int structureByteStride,
+                                              const d3d11::ShaderResourceView **bufferOut);
+
+  private:
+    d3d11::ShaderResourceView mStructuredBufferResourceView;
+};
+
 // A emulated indexed buffer storage represents an underlying D3D11 buffer for data
 // that has been expanded to match the indices list used. This storage is only
 // used for FL9_3 pointsprite rendering emulation.
@@ -322,7 +348,9 @@
       mDeallocThresholds({}),
       mIdleness({}),
       mConstantBufferStorageAdditionalSize(0),
-      mMaxConstantBufferLruCount(0)
+      mMaxConstantBufferLruCount(0),
+      mStructuredBufferStorageAdditionalSize(0),
+      mMaxStructuredBufferLruCount(0)
 {}
 
 Buffer11::~Buffer11()
@@ -337,6 +365,11 @@
         SafeDelete(p.second.storage);
     }
 
+    for (auto &p : mStructuredBufferRangeStoragesCache)
+    {
+        SafeDelete(p.second.storage);
+    }
+
     mRenderer->onBufferDelete(this);
 }
 
@@ -800,6 +833,8 @@
         case BUFFER_USAGE_INDEX:
         case BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK:
             return new NativeStorage(mRenderer, usage, this);
+        case BUFFER_USAGE_STRUCTURED:
+            return new StructuredBufferStorage(mRenderer, usage, nullptr);
         default:
             return new NativeStorage(mRenderer, usage, nullptr);
     }
@@ -811,11 +846,10 @@
                                                       Buffer11::NativeStorage **storageOut)
 {
     BufferStorage *newStorage;
-
     {
         // Keep the cacheEntry in a limited scope because it may be invalidated later in the code if
         // we need to reclaim some space.
-        ConstantBufferCacheEntry *cacheEntry = &mConstantBufferRangeStoragesCache[offset];
+        BufferCacheEntry *cacheEntry = &mConstantBufferRangeStoragesCache[offset];
 
         if (!cacheEntry->storage)
         {
@@ -837,12 +871,12 @@
 
         while (mConstantBufferStorageAdditionalSize + sizeDelta > maximumAllowedAdditionalSize)
         {
-            auto iter = std::min_element(std::begin(mConstantBufferRangeStoragesCache),
-                                         std::end(mConstantBufferRangeStoragesCache),
-                                         [](const ConstantBufferCache::value_type &a,
-                                            const ConstantBufferCache::value_type &b) {
-                                             return a.second.lruCount < b.second.lruCount;
-                                         });
+            auto iter = std::min_element(
+                std::begin(mConstantBufferRangeStoragesCache),
+                std::end(mConstantBufferRangeStoragesCache),
+                [](const BufferCache::value_type &a, const BufferCache::value_type &b) {
+                    return a.second.lruCount < b.second.lruCount;
+                });
 
             ASSERT(iter->second.storage != newStorage);
             ASSERT(mConstantBufferStorageAdditionalSize >= iter->second.storage->getSize());
@@ -867,6 +901,74 @@
     return angle::Result::Continue;
 }
 
+angle::Result Buffer11::getStructuredBufferRangeSRV(const gl::Context *context,
+                                                    unsigned int offset,
+                                                    unsigned int size,
+                                                    unsigned int structureByteStride,
+                                                    const d3d11::ShaderResourceView **srvOut)
+{
+    BufferStorage *newStorage;
+
+    {
+        // Keep the cacheEntry in a limited scope because it may be invalidated later in the code if
+        // we need to reclaim some space.
+        StructuredBufferKey structuredBufferKey = StructuredBufferKey(offset, structureByteStride);
+        BufferCacheEntry *cacheEntry = &mStructuredBufferRangeStoragesCache[structuredBufferKey];
+
+        if (!cacheEntry->storage)
+        {
+            cacheEntry->storage  = allocateStorage(BUFFER_USAGE_STRUCTURED);
+            cacheEntry->lruCount = ++mMaxStructuredBufferLruCount;
+        }
+
+        cacheEntry->lruCount = ++mMaxStructuredBufferLruCount;
+        newStorage           = cacheEntry->storage;
+    }
+
+    StructuredBufferStorage *structuredBufferStorage = GetAs<StructuredBufferStorage>(newStorage);
+
+    markBufferUsage(BUFFER_USAGE_STRUCTURED);
+
+    if (newStorage->getSize() < static_cast<size_t>(size))
+    {
+        size_t maximumAllowedAdditionalSize = 2 * getSize();
+
+        size_t sizeDelta = static_cast<size_t>(size) - newStorage->getSize();
+
+        while (mStructuredBufferStorageAdditionalSize + sizeDelta > maximumAllowedAdditionalSize)
+        {
+            auto iter = std::min_element(std::begin(mStructuredBufferRangeStoragesCache),
+                                         std::end(mStructuredBufferRangeStoragesCache),
+                                         [](const StructuredBufferCache::value_type &a,
+                                            const StructuredBufferCache::value_type &b) {
+                                             return a.second.lruCount < b.second.lruCount;
+                                         });
+
+            ASSERT(iter->second.storage != newStorage);
+            ASSERT(mStructuredBufferStorageAdditionalSize >= iter->second.storage->getSize());
+
+            mStructuredBufferStorageAdditionalSize -= iter->second.storage->getSize();
+            SafeDelete(iter->second.storage);
+            mStructuredBufferRangeStoragesCache.erase(iter);
+        }
+
+        ANGLE_TRY(
+            structuredBufferStorage->resizeStructuredBuffer(context, size, structureByteStride));
+        mStructuredBufferStorageAdditionalSize += sizeDelta;
+
+        // We don't copy the old data when resizing the structured buffer because the data may be
+        // out-of-date therefore we reset the data revision and let updateBufferStorage() handle the
+        // copy.
+        newStorage->setDataRevision(0);
+    }
+
+    ANGLE_TRY(updateBufferStorage(context, newStorage, offset, static_cast<size_t>(size)));
+    ANGLE_TRY(garbageCollection(context, BUFFER_USAGE_STRUCTURED));
+    ANGLE_TRY(structuredBufferStorage->getStructuredBufferRangeSRV(context, offset, size,
+                                                                   structureByteStride, srvOut));
+    return angle::Result::Continue;
+}
+
 angle::Result Buffer11::updateBufferStorage(const gl::Context *context,
                                             BufferStorage *storage,
                                             size_t sourceOffset,
@@ -1029,7 +1131,8 @@
         return (mUsage == BUFFER_USAGE_STAGING);
     }
     ASSERT((access & GL_MAP_WRITE_BIT) != 0);
-    return (mUsage == BUFFER_USAGE_STAGING || mUsage == BUFFER_USAGE_UNIFORM);
+    return (mUsage == BUFFER_USAGE_STAGING || mUsage == BUFFER_USAGE_UNIFORM ||
+            mUsage == BUFFER_USAGE_STRUCTURED);
 }
 
 // Returns true if it recreates the direct buffer
@@ -1332,8 +1435,85 @@
     mBufferRawUAVs.clear();
 }
 
-// Buffer11::EmulatedIndexStorage implementation
+Buffer11::StructuredBufferStorage::StructuredBufferStorage(Renderer11 *renderer,
+                                                           BufferUsage usage,
+                                                           const angle::Subject *onStorageChanged)
+    : NativeStorage(renderer, usage, onStorageChanged), mStructuredBufferResourceView()
+{}
 
+Buffer11::StructuredBufferStorage::~StructuredBufferStorage()
+{
+    mStructuredBufferResourceView.reset();
+}
+
+angle::Result Buffer11::StructuredBufferStorage::resizeStructuredBuffer(
+    const gl::Context *context,
+    unsigned int size,
+    unsigned int structureByteStride)
+{
+    if (size == 0)
+    {
+        mBuffer.reset();
+        mBufferSize = 0;
+        return angle::Result::Continue;
+    }
+
+    D3D11_BUFFER_DESC bufferDesc;
+    bufferDesc.ByteWidth           = size;
+    bufferDesc.MiscFlags           = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
+    bufferDesc.StructureByteStride = structureByteStride;
+    bufferDesc.Usage               = D3D11_USAGE_DYNAMIC;
+    bufferDesc.BindFlags           = D3D11_BIND_SHADER_RESOURCE;
+    bufferDesc.CPUAccessFlags      = D3D11_CPU_ACCESS_WRITE;
+
+    d3d11::Buffer newBuffer;
+    ANGLE_TRY(
+        mRenderer->allocateResource(SafeGetImplAs<Context11>(context), bufferDesc, &newBuffer));
+    newBuffer.setDebugName("Buffer11::StructuredBufferStorage");
+
+    // No longer need the old buffer
+    mBuffer = std::move(newBuffer);
+
+    mBufferSize = static_cast<size_t>(bufferDesc.ByteWidth);
+
+    mStructuredBufferResourceView.reset();
+
+    // Notify that the storage has changed.
+    if (mOnStorageChanged)
+    {
+        mOnStorageChanged->onStateChange(angle::SubjectMessage::SubjectChanged);
+    }
+
+    return angle::Result::Continue;
+}
+
+angle::Result Buffer11::StructuredBufferStorage::getStructuredBufferRangeSRV(
+    const gl::Context *context,
+    unsigned int offset,
+    unsigned int size,
+    unsigned int structureByteStride,
+    const d3d11::ShaderResourceView **srvOut)
+{
+    if (mStructuredBufferResourceView.valid())
+    {
+        *srvOut = &mStructuredBufferResourceView;
+        return angle::Result::Continue;
+    }
+
+    D3D11_SHADER_RESOURCE_VIEW_DESC bufferSRVDesc = {};
+    bufferSRVDesc.Buffer.NumElements  = structureByteStride == 0u ? 1 : size / structureByteStride;
+    bufferSRVDesc.Buffer.FirstElement = 0;
+    bufferSRVDesc.ViewDimension       = D3D11_SRV_DIMENSION_BUFFEREX;
+    bufferSRVDesc.Format              = DXGI_FORMAT_UNKNOWN;
+
+    ANGLE_TRY(mRenderer->allocateResource(GetImplAs<Context11>(context), bufferSRVDesc,
+                                          mBuffer.get(), &mStructuredBufferResourceView));
+
+    *srvOut = &mStructuredBufferResourceView;
+    return angle::Result::Continue;
+}
+
+// Buffer11::EmulatedIndexStorage implementation
 Buffer11::EmulatedIndexedStorage::EmulatedIndexedStorage(Renderer11 *renderer)
     : BufferStorage(renderer, BUFFER_USAGE_EMULATED_INDEXED_VERTEX), mBuffer()
 {}
diff --git a/src/libANGLE/renderer/d3d/d3d11/Buffer11.h b/src/libANGLE/renderer/d3d/d3d11/Buffer11.h
index bcf8826..97c9a2c 100644
--- a/src/libANGLE/renderer/d3d/d3d11/Buffer11.h
+++ b/src/libANGLE/renderer/d3d/d3d11/Buffer11.h
@@ -39,6 +39,7 @@
     BUFFER_USAGE_PIXEL_UNPACK,
     BUFFER_USAGE_PIXEL_PACK,
     BUFFER_USAGE_UNIFORM,
+    BUFFER_USAGE_STRUCTURED,
     BUFFER_USAGE_EMULATED_INDEXED_VERTEX,
     BUFFER_USAGE_RAW_UAV,
 
@@ -67,6 +68,11 @@
                                          const d3d11::Buffer **bufferOut,
                                          UINT *firstConstantOut,
                                          UINT *numConstantsOut);
+    angle::Result getStructuredBufferRangeSRV(const gl::Context *context,
+                                              unsigned int offset,
+                                              unsigned int size,
+                                              unsigned int structureByteStride,
+                                              const d3d11::ShaderResourceView **srvOut);
     angle::Result getSRV(const gl::Context *context,
                          DXGI_FORMAT srvFormat,
                          const d3d11::ShaderResourceView **srvOut);
@@ -120,15 +126,30 @@
     class NativeStorage;
     class PackStorage;
     class SystemMemoryStorage;
+    class StructuredBufferStorage;
 
-    struct ConstantBufferCacheEntry
+    struct BufferCacheEntry
     {
-        ConstantBufferCacheEntry() : storage(nullptr), lruCount(0) {}
+        BufferCacheEntry() : storage(nullptr), lruCount(0) {}
 
         BufferStorage *storage;
         unsigned int lruCount;
     };
 
+    struct StructuredBufferKey
+    {
+        StructuredBufferKey(unsigned int offsetIn, unsigned int structureByteStrideIn)
+            : offset(offsetIn), structureByteStride(structureByteStrideIn)
+        {}
+        bool operator<(const StructuredBufferKey &rhs) const
+        {
+            return std::tie(offset, structureByteStride) <
+                   std::tie(rhs.offset, rhs.structureByteStride);
+        }
+        unsigned int offset;
+        unsigned int structureByteStride;
+    };
+
     void markBufferUsage(BufferUsage usage);
     angle::Result markBufferUsage(const gl::Context *context, BufferUsage usage);
     angle::Result garbageCollection(const gl::Context *context, BufferUsage currentUsage);
@@ -189,10 +210,15 @@
     // Cache of D3D11 constant buffer for specific ranges of buffer data.
     // This is used to emulate UBO ranges on 11.0 devices.
     // Constant buffers are indexed by there start offset.
-    typedef std::map<GLintptr /*offset*/, ConstantBufferCacheEntry> ConstantBufferCache;
-    ConstantBufferCache mConstantBufferRangeStoragesCache;
+    typedef std::map<GLintptr /*offset*/, BufferCacheEntry> BufferCache;
+    BufferCache mConstantBufferRangeStoragesCache;
     size_t mConstantBufferStorageAdditionalSize;
     unsigned int mMaxConstantBufferLruCount;
+
+    typedef std::map<StructuredBufferKey, BufferCacheEntry> StructuredBufferCache;
+    StructuredBufferCache mStructuredBufferRangeStoragesCache;
+    size_t mStructuredBufferStorageAdditionalSize;
+    unsigned int mMaxStructuredBufferLruCount;
 };
 
 }  // namespace rx
diff --git a/src/libANGLE/renderer/d3d/d3d11/StateManager11.cpp b/src/libANGLE/renderer/d3d/d3d11/StateManager11.cpp
index f86ff42..b23e831 100644
--- a/src/libANGLE/renderer/d3d/d3d11/StateManager11.cpp
+++ b/src/libANGLE/renderer/d3d/d3d11/StateManager11.cpp
@@ -3481,24 +3481,21 @@
 angle::Result StateManager11::syncUniformBuffersForShader(const gl::Context *context,
                                                           gl::ShaderType shaderType)
 {
-    gl::ShaderMap<unsigned int> shaderReservedUBOs = mRenderer->getReservedShaderUniformBuffers();
-
     const auto &glState                  = context->getState();
     ID3D11DeviceContext *deviceContext   = mRenderer->getDeviceContext();
     ID3D11DeviceContext1 *deviceContext1 = mRenderer->getDeviceContext1IfSupported();
 
     const auto &shaderUniformBuffers = mProgramD3D->getShaderUniformBufferCache(shaderType);
-    const unsigned int reservedUBOs  = shaderReservedUBOs[shaderType];
 
     for (size_t bufferIndex = 0; bufferIndex < shaderUniformBuffers.size(); ++bufferIndex)
     {
-        const GLint binding = shaderUniformBuffers[bufferIndex];
-        if (binding == -1)
+        const D3DUBOCache cache = shaderUniformBuffers[bufferIndex];
+        if (cache.binding == -1)
         {
             continue;
         }
 
-        const auto &uniformBuffer          = glState.getIndexedUniformBuffer(binding);
+        const auto &uniformBuffer          = glState.getIndexedUniformBuffer(cache.binding);
         const GLintptr uniformBufferOffset = uniformBuffer.getOffset();
         const GLsizeiptr uniformBufferSize = uniformBuffer.getSize();
 
@@ -3517,7 +3514,6 @@
                                                         &firstConstant, &numConstants));
         ASSERT(constantBuffer);
 
-        const unsigned int appliedIndex = reservedUBOs + static_cast<unsigned int>(bufferIndex);
         switch (shaderType)
         {
             case gl::ShaderType::Vertex:
@@ -3532,19 +3528,19 @@
                 if (firstConstant != 0 && uniformBufferSize != 0)
                 {
                     ASSERT(numConstants != 0);
-                    deviceContext1->VSSetConstantBuffers1(appliedIndex, 1,
+                    deviceContext1->VSSetConstantBuffers1(cache.registerIndex, 1,
                                                           constantBuffer->getPointer(),
                                                           &firstConstant, &numConstants);
                 }
                 else
                 {
-                    deviceContext->VSSetConstantBuffers(appliedIndex, 1,
+                    deviceContext->VSSetConstantBuffers(cache.registerIndex, 1,
                                                         constantBuffer->getPointer());
                 }
 
-                mCurrentConstantBufferVS[appliedIndex]       = constantBuffer->getSerial();
-                mCurrentConstantBufferVSOffset[appliedIndex] = uniformBufferOffset;
-                mCurrentConstantBufferVSSize[appliedIndex]   = uniformBufferSize;
+                mCurrentConstantBufferVS[cache.registerIndex]       = constantBuffer->getSerial();
+                mCurrentConstantBufferVSOffset[cache.registerIndex] = uniformBufferOffset;
+                mCurrentConstantBufferVSSize[cache.registerIndex]   = uniformBufferSize;
                 break;
             }
 
@@ -3559,19 +3555,19 @@
 
                 if (firstConstant != 0 && uniformBufferSize != 0)
                 {
-                    deviceContext1->PSSetConstantBuffers1(appliedIndex, 1,
+                    deviceContext1->PSSetConstantBuffers1(cache.registerIndex, 1,
                                                           constantBuffer->getPointer(),
                                                           &firstConstant, &numConstants);
                 }
                 else
                 {
-                    deviceContext->PSSetConstantBuffers(appliedIndex, 1,
+                    deviceContext->PSSetConstantBuffers(cache.registerIndex, 1,
                                                         constantBuffer->getPointer());
                 }
 
-                mCurrentConstantBufferPS[appliedIndex]       = constantBuffer->getSerial();
-                mCurrentConstantBufferPSOffset[appliedIndex] = uniformBufferOffset;
-                mCurrentConstantBufferPSSize[appliedIndex]   = uniformBufferSize;
+                mCurrentConstantBufferPS[cache.registerIndex]       = constantBuffer->getSerial();
+                mCurrentConstantBufferPSOffset[cache.registerIndex] = uniformBufferOffset;
+                mCurrentConstantBufferPSSize[cache.registerIndex]   = uniformBufferSize;
                 break;
             }
 
@@ -3586,19 +3582,19 @@
 
                 if (firstConstant != 0 && uniformBufferSize != 0)
                 {
-                    deviceContext1->CSSetConstantBuffers1(appliedIndex, 1,
+                    deviceContext1->CSSetConstantBuffers1(cache.registerIndex, 1,
                                                           constantBuffer->getPointer(),
                                                           &firstConstant, &numConstants);
                 }
                 else
                 {
-                    deviceContext->CSSetConstantBuffers(appliedIndex, 1,
+                    deviceContext->CSSetConstantBuffers(cache.registerIndex, 1,
                                                         constantBuffer->getPointer());
                 }
 
-                mCurrentConstantBufferCS[appliedIndex]       = constantBuffer->getSerial();
-                mCurrentConstantBufferCSOffset[appliedIndex] = uniformBufferOffset;
-                mCurrentConstantBufferCSSize[appliedIndex]   = uniformBufferSize;
+                mCurrentConstantBufferCS[cache.registerIndex]       = constantBuffer->getSerial();
+                mCurrentConstantBufferCSOffset[cache.registerIndex] = uniformBufferOffset;
+                mCurrentConstantBufferCSSize[cache.registerIndex]   = uniformBufferSize;
                 break;
             }
 
@@ -3612,6 +3608,33 @@
         }
     }
 
+    const auto &shaderUniformBuffersUseSB =
+        mProgramD3D->getShaderUniformBufferCacheUseSB(shaderType);
+    for (size_t bufferIndex = 0; bufferIndex < shaderUniformBuffersUseSB.size(); ++bufferIndex)
+    {
+        const D3DUBOCacheUseSB cache = shaderUniformBuffersUseSB[bufferIndex];
+        if (cache.binding == -1)
+        {
+            continue;
+        }
+
+        const auto &uniformBuffer = glState.getIndexedUniformBuffer(cache.binding);
+        if (uniformBuffer.get() == nullptr)
+        {
+            continue;
+        }
+        const GLintptr uniformBufferOffset = uniformBuffer.getOffset();
+
+        Buffer11 *bufferStorage                    = GetImplAs<Buffer11>(uniformBuffer.get());
+        const d3d11::ShaderResourceView *bufferSRV = nullptr;
+        ANGLE_TRY(bufferStorage->getStructuredBufferRangeSRV(
+            context, static_cast<unsigned int>(uniformBufferOffset), cache.byteWidth,
+            cache.structureByteStride, &bufferSRV));
+
+        ASSERT(bufferSRV->valid());
+        setShaderResourceInternal(shaderType, cache.registerIndex, bufferSRV);
+    }
+
     return angle::Result::Continue;
 }
 
@@ -3693,8 +3716,7 @@
 
 angle::Result StateManager11::syncUniformBuffers(const gl::Context *context)
 {
-    gl::ShaderMap<unsigned int> shaderReservedUBOs = mRenderer->getReservedShaderUniformBuffers();
-    mProgramD3D->updateUniformBufferCache(context->getCaps(), shaderReservedUBOs);
+    mProgramD3D->updateUniformBufferCache(context->getCaps());
 
     if (mProgramD3D->hasShaderStage(gl::ShaderType::Compute))
     {
diff --git a/src/libANGLE/renderer/d3d/d3d11/renderer11_utils.cpp b/src/libANGLE/renderer/d3d/d3d11/renderer11_utils.cpp
index 63a4723..4f9e52c 100644
--- a/src/libANGLE/renderer/d3d/d3d11/renderer11_utils.cpp
+++ b/src/libANGLE/renderer/d3d/d3d11/renderer11_utils.cpp
@@ -9,6 +9,7 @@
 
 #include "libANGLE/renderer/d3d/d3d11/renderer11_utils.h"
 
+#include <versionhelpers.h>
 #include <algorithm>
 
 #include "common/debug.h"
@@ -2444,6 +2445,12 @@
     // Never clear for robust resource init.  This matches Chrome's texture clearning behaviour.
     ANGLE_FEATURE_CONDITION(features, allowClearForRobustResourceInit, false);
 
+    // Don't translate uniform block to StructuredBuffer on old Windows system with AMD driver.
+    // This is targeted to work around a bug in AMD D3D driver that fails to allocate
+    // ShaderResourceView for StructuredBuffer.
+    ANGLE_FEATURE_CONDITION(features, dontTranslateUniformBlockToStructuredBuffer,
+                            isAMD && !IsWindows10OrGreater());
+
     // Call platform hooks for testing overrides.
     auto *platform = ANGLEPlatformCurrent();
     platform->overrideWorkaroundsD3D(platform, features);
diff --git a/src/tests/gl_tests/UniformBufferTest.cpp b/src/tests/gl_tests/UniformBufferTest.cpp
index 0549f2f..4264ff1 100644
--- a/src/tests/gl_tests/UniformBufferTest.cpp
+++ b/src/tests/gl_tests/UniformBufferTest.cpp
@@ -6,6 +6,7 @@
 
 #include "test_utils/ANGLETest.h"
 #include "test_utils/gl_raii.h"
+#include "util/random_utils.h"
 
 using namespace angle;
 
@@ -1596,6 +1597,560 @@
     EXPECT_PIXEL_COLOR_EQ(width / 2 + 5, height / 2 + 5, GLColor::green);
 }
 
+// Compile uniform buffer with large array member.
+TEST_P(UniformBufferTest, LargeArrayOfStructs)
+{
+    constexpr char kVertexShader[] = R"(#version 300 es
+        struct InstancingData
+        {
+            mat4 transformation;
+        };
+
+        #define MAX_INSTANCE_COUNT 800
+
+        layout(std140) uniform InstanceBlock
+        {
+            InstancingData instances[MAX_INSTANCE_COUNT];
+        };
+
+        void main()
+        {
+            gl_Position = vec4(1.0) * instances[gl_InstanceID].transformation;
+        })";
+
+    constexpr char kFragmentShader[] = R"(#version 300 es
+        precision mediump float;
+        out vec4 outFragColor;
+        void main()
+        {
+            outFragColor = vec4(0.0);
+        })";
+
+    ANGLE_GL_PROGRAM(program, kVertexShader, kFragmentShader);
+    // Add a draw call for the sake of the Vulkan backend that currently really builds shaders at
+    // draw time.
+    drawQuad(program.get(), essl3_shaders::PositionAttrib(), 0.5f);
+}
+
+// Test uniform buffer with large struct array member, where the struct itself contains a mat4
+// member.
+TEST_P(UniformBufferTest, UniformBlockWithOneLargeStructArray)
+{
+    GLint64 maxUniformBlockSize;
+    glGetInteger64v(GL_MAX_UNIFORM_BLOCK_SIZE, &maxUniformBlockSize);
+    std::ostringstream stream;
+    GLuint arraySize;
+    // Ensure that shader uniform block do not exceed MAX_UNIFORM_BLOCK_SIZE limit.
+    if (maxUniformBlockSize >= 16384 && maxUniformBlockSize < 32768)
+    {
+        arraySize = 128;
+        stream << "const uint arraySize = 128u;\n"
+                  "const uint divisor1 = 128u;\n"
+                  "const uint divisor2 = 32u;\n";
+    }
+    else if (maxUniformBlockSize >= 32768 && maxUniformBlockSize < 65536)
+    {
+        arraySize = 256;
+        stream << "const uint arraySize = 256u;\n"
+                  "const uint divisor1 = 64u;\n"
+                  "const uint divisor2 = 16u;\n";
+    }
+    else
+    {
+        arraySize = 512;
+        stream << "const uint arraySize = 512u;\n"
+                  "const uint divisor1 = 32u;\n"
+                  "const uint divisor2 = 8u;\n";
+    }
+
+    const std::string &kFS =
+        "#version 300 es\n"
+        "precision highp float;\n" +
+        stream.str() +
+        "out vec4 my_FragColor;\n"
+        "struct S { mat4 color;};\n"
+        "layout(std140) uniform buffer { S s[arraySize]; };\n"
+        "void main()\n"
+        "{\n"
+        "    uvec2 coord = uvec2(floor(gl_FragCoord.xy));\n"
+        "    uint index = coord.x +  coord.y * 128u;\n"
+        "    uint index_x = index / divisor1;\n"
+        "    uint index_y = (index % divisor1) / divisor2;\n"
+        "    my_FragColor = s[index_x].color[index_y];\n"
+        "}\n";
+
+    GLint blockSize;
+    ANGLE_GL_PROGRAM(program, essl3_shaders::vs::Simple(), kFS.c_str());
+    GLint uniformBufferIndex = glGetUniformBlockIndex(program, "buffer");
+    glGetActiveUniformBlockiv(program, uniformBufferIndex, GL_UNIFORM_BLOCK_DATA_SIZE, &blockSize);
+
+    glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffer);
+    glBufferData(GL_UNIFORM_BUFFER, blockSize, nullptr, GL_STATIC_DRAW);
+
+    glBindBufferBase(GL_UNIFORM_BUFFER, 0, mUniformBuffer);
+    glUniformBlockBinding(program, uniformBufferIndex, 0);
+
+    const GLuint kVectorPerMat   = 4;
+    const GLuint kFloatPerVector = 4;
+    GLuint kVectorCount          = arraySize * kVectorPerMat;
+    GLuint kFloatCount           = kVectorCount * kFloatPerVector;
+    std::vector<GLfloat> floatData(kFloatCount, 0.0f);
+    const GLuint kPositionCount                    = 12;
+    unsigned int positionToTest[kPositionCount][2] = {{0, 0},  {75, 0},  {98, 13},  {31, 31},
+                                                      {0, 32}, {65, 33}, {23, 54},  {63, 63},
+                                                      {0, 64}, {43, 86}, {53, 100}, {127, 127}};
+
+    for (GLuint i = 0; i < kVectorCount; i++)
+    {
+        floatData[4 * i + 2] = 1.0f;
+        floatData[4 * i + 3] = 1.0f;
+    }
+    glBufferSubData(GL_UNIFORM_BUFFER, 0, kFloatCount * sizeof(GLfloat), floatData.data());
+    drawQuad(program.get(), essl3_shaders::PositionAttrib(), 0.5f);
+    for (GLuint i = 0; i < kPositionCount; i++)
+    {
+        EXPECT_PIXEL_COLOR_EQ(positionToTest[i][0], positionToTest[i][1], GLColor::blue);
+    }
+
+    for (GLuint i = 0; i < kVectorCount; i++)
+    {
+        floatData[4 * i + 1] = 1.0f;
+        floatData[4 * i + 2] = 0.0f;
+    }
+    glBufferSubData(GL_UNIFORM_BUFFER, 0, kFloatCount * sizeof(GLfloat), floatData.data());
+    drawQuad(program.get(), essl3_shaders::PositionAttrib(), 0.5f);
+    for (GLuint i = 0; i < kPositionCount; i++)
+    {
+        EXPECT_PIXEL_COLOR_EQ(positionToTest[i][0], positionToTest[i][1], GLColor::green);
+    }
+
+    for (GLuint i = kVectorCount / 4; i < kVectorCount / 2; i++)
+    {
+        floatData[4 * i]     = 1.0f;
+        floatData[4 * i + 1] = 0.0f;
+    }
+    glBufferSubData(GL_UNIFORM_BUFFER, 0, kFloatCount * sizeof(GLfloat), floatData.data());
+    drawQuad(program.get(), essl3_shaders::PositionAttrib(), 0.5f);
+    for (GLuint i = 0; i < kPositionCount; i++)
+    {
+        if (positionToTest[i][1] > 31 && positionToTest[i][1] < 64)
+        {
+            EXPECT_PIXEL_COLOR_EQ(positionToTest[i][0], positionToTest[i][1], GLColor::red);
+        }
+        else
+        {
+            EXPECT_PIXEL_COLOR_EQ(positionToTest[i][0], positionToTest[i][1], GLColor::green);
+        }
+    }
+}
+
+// Test uniform buffer with large struct array member, where the struct itself contains
+// a mat4 member and a float member.
+TEST_P(UniformBufferTest, UniformBlockWithOneLargeMixStructArray)
+{
+    GLint64 maxUniformBlockSize;
+    glGetInteger64v(GL_MAX_UNIFORM_BLOCK_SIZE, &maxUniformBlockSize);
+    std::ostringstream stream;
+    GLuint arraySize;
+    // Ensure that shader uniform block do not exceed MAX_UNIFORM_BLOCK_SIZE limit.
+    if (maxUniformBlockSize >= 16384 && maxUniformBlockSize < 32768)
+    {
+        arraySize = 128;
+        stream << "const uint arraySize = 128u;\n"
+                  "const uint divisor1 = 128u;\n"
+                  "const uint divisor2 = 32u;\n";
+    }
+    else if (maxUniformBlockSize >= 32768 && maxUniformBlockSize < 65536)
+    {
+        arraySize = 256;
+        stream << "const uint arraySize = 256u;\n"
+                  "const uint divisor1 = 64u;\n"
+                  "const uint divisor2 = 16u;\n";
+    }
+    else
+    {
+        arraySize = 512;
+        stream << "const uint arraySize = 512u;\n"
+                  "const uint divisor1 = 32u;\n"
+                  "const uint divisor2 = 8u;\n";
+    }
+
+    const std::string &kFS =
+        "#version 300 es\n"
+        "precision highp float;\n" +
+        stream.str() +
+        "out vec4 my_FragColor;\n"
+        "struct S { mat4 color; float factor; };\n"
+        "layout(std140) uniform buffer { S s[arraySize]; };\n"
+        "void main()\n"
+        "{\n"
+        "    uvec2 coord = uvec2(floor(gl_FragCoord.xy));\n"
+        "    uint index = coord.x +  coord.y * 128u;\n"
+        "    uint index_x = index / divisor1;\n"
+        "    uint index_y = (index % divisor1) / divisor2;\n"
+        "    my_FragColor = s[index_x].factor * s[index_x].color[index_y];\n"
+        "}\n";
+
+    GLint blockSize;
+    ANGLE_GL_PROGRAM(program, essl3_shaders::vs::Simple(), kFS.c_str());
+    GLint uniformBufferIndex = glGetUniformBlockIndex(program, "buffer");
+    glGetActiveUniformBlockiv(program, uniformBufferIndex, GL_UNIFORM_BLOCK_DATA_SIZE, &blockSize);
+
+    glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffer);
+    glBufferData(GL_UNIFORM_BUFFER, blockSize, nullptr, GL_STATIC_DRAW);
+
+    glBindBufferBase(GL_UNIFORM_BUFFER, 0, mUniformBuffer);
+    glUniformBlockBinding(program, uniformBufferIndex, 0);
+
+    const GLuint kVectorPerMat   = 4;
+    const GLuint kFloatPerVector = 4;
+    // The member s is an array of S structures, each element of s should be rounded up
+    // to the base alignment of a vec4 according to std140 storage layout rules.
+    GLuint kFloatCount = arraySize * (kVectorPerMat * kFloatPerVector + kFloatPerVector);
+    std::vector<GLfloat> floatData(kFloatCount, 0.0f);
+    const GLuint kPositionCount                    = 12;
+    unsigned int positionToTest[kPositionCount][2] = {{0, 0},  {75, 0},  {98, 13},  {31, 31},
+                                                      {0, 32}, {65, 33}, {23, 54},  {63, 63},
+                                                      {0, 64}, {43, 86}, {53, 100}, {127, 127}};
+
+    const size_t kStrideofFloatCount = kVectorPerMat * kFloatPerVector + kFloatPerVector;
+    for (GLuint i = 0; i < arraySize; i++)
+    {
+        for (GLuint j = 0; j < kVectorPerMat; j++)
+        {
+            floatData[i * kStrideofFloatCount + kVectorPerMat * j + 2] = 0.5f;
+            floatData[i * kStrideofFloatCount + kVectorPerMat * j + 3] = 0.5f;
+        }
+        floatData[i * kStrideofFloatCount + kVectorPerMat * kFloatPerVector] = 2.0f;
+    }
+    glBufferSubData(GL_UNIFORM_BUFFER, 0,
+                    std::min(static_cast<size_t>(blockSize), kFloatCount * sizeof(GLfloat)),
+                    floatData.data());
+    drawQuad(program.get(), essl3_shaders::PositionAttrib(), 0.5f);
+    for (GLuint i = 0; i < kPositionCount; i++)
+    {
+        EXPECT_PIXEL_COLOR_EQ(positionToTest[i][0], positionToTest[i][1], GLColor::blue);
+    }
+
+    for (GLuint i = 0; i < arraySize; i++)
+    {
+        for (GLuint j = 0; j < kVectorPerMat; j++)
+        {
+            floatData[i * kStrideofFloatCount + kVectorPerMat * j + 1] = 0.5f;
+            floatData[i * kStrideofFloatCount + kVectorPerMat * j + 2] = 0.0f;
+        }
+    }
+    glBufferSubData(GL_UNIFORM_BUFFER, 0,
+                    std::min(static_cast<size_t>(blockSize), kFloatCount * sizeof(GLfloat)),
+                    floatData.data());
+    drawQuad(program.get(), essl3_shaders::PositionAttrib(), 0.5f);
+    for (GLuint i = 0; i < kPositionCount; i++)
+    {
+        EXPECT_PIXEL_COLOR_EQ(positionToTest[i][0], positionToTest[i][1], GLColor::green);
+    }
+
+    for (GLuint i = arraySize / 4; i < arraySize / 2; i++)
+    {
+        for (GLuint j = 0; j < kVectorPerMat; j++)
+        {
+            floatData[i * kStrideofFloatCount + kVectorPerMat * j]     = 0.5f;
+            floatData[i * kStrideofFloatCount + kVectorPerMat * j + 1] = 0.0f;
+        }
+    }
+    glBufferSubData(GL_UNIFORM_BUFFER, 0,
+                    std::min(static_cast<size_t>(blockSize), kFloatCount * sizeof(GLfloat)),
+                    floatData.data());
+    drawQuad(program.get(), essl3_shaders::PositionAttrib(), 0.5f);
+    for (GLuint i = 0; i < kPositionCount; i++)
+    {
+        if (positionToTest[i][1] > 31 && positionToTest[i][1] < 64)
+        {
+            EXPECT_PIXEL_COLOR_EQ(positionToTest[i][0], positionToTest[i][1], GLColor::red);
+        }
+        else
+        {
+            EXPECT_PIXEL_COLOR_EQ(positionToTest[i][0], positionToTest[i][1], GLColor::green);
+        }
+    }
+}
+
+// Test a uniform block with large struct array member and a uniform block with small
+// struct array member in the same program, and they share a uniform buffer.
+TEST_P(UniformBufferTest, UniformBlocksInSameProgramShareUniformBuffer)
+{
+    GLint64 maxUniformBlockSize;
+    glGetInteger64v(GL_MAX_UNIFORM_BLOCK_SIZE, &maxUniformBlockSize);
+    std::ostringstream stream;
+    GLuint arraySize1, arraySize2;
+    // Ensure that shader uniform block do not exceed MAX_UNIFORM_BLOCK_SIZE limit.
+    if (maxUniformBlockSize >= 16384 && maxUniformBlockSize < 32768)
+    {
+        arraySize1 = 128;
+        arraySize2 = 8;
+        stream << "const uint arraySize1 = 128u;\n"
+                  "const uint arraySize2 = 8u;\n"
+                  "const uint divisor1 = 128u;\n"
+                  "const uint divisor2 = 32u;\n"
+                  "const uint divisor3 = 16u;\n";
+    }
+    else if (maxUniformBlockSize >= 32768 && maxUniformBlockSize < 65536)
+    {
+        arraySize1 = 256;
+        arraySize2 = 16;
+        stream << "const uint arraySize1 = 256u;\n"
+                  "const uint arraySize2 = 16u;\n"
+                  "const uint divisor1 = 64u;\n"
+                  "const uint divisor2 = 16u;\n"
+                  "const uint divisor3 = 8u;\n";
+    }
+    else
+    {
+        arraySize1 = 512;
+        arraySize2 = 32;
+        stream << "const uint arraySize1 = 512u;\n"
+                  "const uint arraySize2 = 32u;\n"
+                  "const uint divisor1 = 32u;\n"
+                  "const uint divisor2 = 8u;\n"
+                  "const uint divisor3 = 4u;\n";
+    }
+
+    const std::string &kFS =
+        "#version 300 es\n"
+        "precision highp float;\n" +
+        stream.str() +
+        "out vec4 my_FragColor;\n"
+        "struct S { mat4 color;};\n"
+        "layout(std140) uniform buffer1 { S s1[arraySize1]; };\n"
+        "layout(std140) uniform buffer2 { S s2[arraySize2]; };\n"
+        "void main()\n"
+        "{\n"
+        "    uvec2 coord = uvec2(floor(gl_FragCoord.xy));\n"
+        "    uint index = coord.x +  coord.y * 128u;\n"
+        "    uint index_x1 = index / divisor1;\n"
+        "    uint index_y1 = (index % divisor1) / divisor2;\n"
+        "    uint index_x2 = coord.x / divisor3;\n"
+        "    uint index_y2 = coord.x % 4u;\n"
+        "    my_FragColor = s1[index_x1].color[index_y1] + s2[index_x2].color[index_y2];\n"
+        "}\n";
+
+    GLint blockSize1, blockSize2;
+    ANGLE_GL_PROGRAM(program, essl3_shaders::vs::Simple(), kFS.c_str());
+    GLint uniformBufferIndex1 = glGetUniformBlockIndex(program, "buffer1");
+    GLint uniformBufferIndex2 = glGetUniformBlockIndex(program, "buffer2");
+    glGetActiveUniformBlockiv(program, uniformBufferIndex1, GL_UNIFORM_BLOCK_DATA_SIZE,
+                              &blockSize1);
+    glGetActiveUniformBlockiv(program, uniformBufferIndex2, GL_UNIFORM_BLOCK_DATA_SIZE,
+                              &blockSize2);
+
+    glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffer);
+    glBufferData(GL_UNIFORM_BUFFER, blockSize1 + blockSize2, nullptr, GL_STATIC_DRAW);
+
+    glBindBufferRange(GL_UNIFORM_BUFFER, 0, mUniformBuffer, 0, blockSize2);
+    glUniformBlockBinding(program, uniformBufferIndex2, 0);
+    glBindBufferRange(GL_UNIFORM_BUFFER, 1, mUniformBuffer, blockSize2, blockSize1);
+    glUniformBlockBinding(program, uniformBufferIndex1, 1);
+
+    const GLuint kVectorPerMat   = 4;
+    const GLuint kFloatPerVector = 4;
+    GLuint kVectorCount1         = arraySize1 * kVectorPerMat;
+    GLuint kVectorCount2         = arraySize2 * kVectorPerMat;
+    GLuint kFloatCount1          = kVectorCount1 * kFloatPerVector;
+    GLuint kFloatCount2          = kVectorCount2 * kFloatPerVector;
+    GLuint kFloatCount           = kFloatCount1 + kFloatCount2;
+    std::vector<GLfloat> floatData(kFloatCount, 0.0f);
+    const GLuint kPositionCount                    = 12;
+    unsigned int positionToTest[kPositionCount][2] = {{0, 0},  {75, 0},  {98, 13},  {31, 31},
+                                                      {0, 32}, {65, 33}, {23, 54},  {63, 63},
+                                                      {0, 64}, {43, 86}, {53, 100}, {127, 127}};
+
+    for (GLuint i = kVectorCount2; i < kVectorCount1 + kVectorCount2; i++)
+    {
+        floatData[4 * i + 2] = 1.0f;
+        floatData[4 * i + 3] = 1.0f;
+    }
+    glBufferSubData(GL_UNIFORM_BUFFER, 0, kFloatCount2 * sizeof(GLfloat), &floatData[0]);
+    glBufferSubData(GL_UNIFORM_BUFFER, blockSize2, kFloatCount1 * sizeof(GLfloat),
+                    &floatData[kFloatCount2]);
+    drawQuad(program.get(), essl3_shaders::PositionAttrib(), 0.5f);
+    for (GLuint i = 0; i < kPositionCount; i++)
+    {
+        EXPECT_PIXEL_COLOR_EQ(positionToTest[i][0], positionToTest[i][1], GLColor::blue);
+    }
+
+    for (GLuint i = 0; i < kVectorCount2; i++)
+    {
+        floatData[4 * i + 1] = 1.0f;
+    }
+    for (GLuint i = kVectorCount2 + kVectorCount1 / 4; i < kVectorCount2 + kVectorCount1 / 2; i++)
+    {
+        floatData[4 * i]     = 1.0f;
+        floatData[4 * i + 2] = 0.0f;
+    }
+    glBufferSubData(GL_UNIFORM_BUFFER, 0, kFloatCount2 * sizeof(GLfloat), &floatData[0]);
+    glBufferSubData(GL_UNIFORM_BUFFER, blockSize2 + kVectorCount1 * sizeof(GLfloat),
+                    kVectorCount1 * sizeof(GLfloat), &floatData[kFloatCount2 + kVectorCount1]);
+    drawQuad(program.get(), essl3_shaders::PositionAttrib(), 0.5f);
+    for (GLuint i = 0; i < kPositionCount; i++)
+    {
+        if (positionToTest[i][1] > 31 && positionToTest[i][1] < 64)
+        {
+            EXPECT_PIXEL_COLOR_EQ(positionToTest[i][0], positionToTest[i][1], GLColor::yellow);
+        }
+        else
+        {
+            EXPECT_PIXEL_COLOR_EQ(positionToTest[i][0], positionToTest[i][1], GLColor::cyan);
+        }
+    }
+}
+
+// Test a uniform block with large struct array member and a uniform block with small
+// struct array member in the different programs, and they share a uniform buffer.
+TEST_P(UniformBufferTest, UniformBlocksInDiffProgramShareUniformBuffer)
+{
+    GLint64 maxUniformBlockSize;
+    glGetInteger64v(GL_MAX_UNIFORM_BLOCK_SIZE, &maxUniformBlockSize);
+    std::ostringstream stream1;
+    std::ostringstream stream2;
+    GLuint arraySize1, arraySize2;
+    // Ensure that shader uniform block do not exceed MAX_UNIFORM_BLOCK_SIZE limit.
+    if (maxUniformBlockSize >= 16384 && maxUniformBlockSize < 32768)
+    {
+        arraySize1 = 128;
+        arraySize2 = 8;
+        stream1 << "const uint arraySize1 = 128u;\n"
+                   "const uint divisor1 = 128u;\n"
+                   "const uint divisor2 = 32u;\n";
+        stream2 << "const uint arraySize2 = 8u;\n"
+                   "const uint divisor3 = 16u;\n";
+    }
+    else if (maxUniformBlockSize >= 32768 && maxUniformBlockSize < 65536)
+    {
+        arraySize1 = 256;
+        arraySize2 = 16;
+        stream1 << "const uint arraySize1 = 256u;\n"
+                   "const uint divisor1 = 64u;\n"
+                   "const uint divisor2 = 16u;\n";
+        stream2 << "const uint arraySize2 = 16u;\n"
+                   "const uint divisor3 = 8u;\n";
+    }
+    else
+    {
+        arraySize1 = 512;
+        arraySize2 = 32;
+        stream1 << "const uint arraySize1 = 512u;\n"
+                   "const uint divisor1 = 32u;\n"
+                   "const uint divisor2 = 8u;\n";
+        stream2 << "const uint arraySize2 = 32u;\n"
+                   "const uint divisor3 = 4u;\n";
+    }
+
+    const std::string &kFS1 =
+        "#version 300 es\n"
+        "precision highp float;\n" +
+        stream1.str() +
+        "out vec4 my_FragColor;\n"
+        "struct S { mat4 color;};\n"
+        "layout(std140) uniform buffer { S s[arraySize1]; };\n"
+        "void main()\n"
+        "{\n"
+        "    uvec2 coord = uvec2(floor(gl_FragCoord.xy));\n"
+        "    uint index = coord.x +  coord.y * 128u;\n"
+        "    uint index_x = index / divisor1;\n"
+        "    uint index_y = (index % divisor1) / divisor2;\n"
+        "    my_FragColor = s[index_x].color[index_y];\n"
+        "}\n";
+
+    const std::string &kFS2 =
+        "#version 300 es\n"
+        "precision highp float;\n" +
+        stream2.str() +
+        "out vec4 my_FragColor;\n"
+        "struct S { mat4 color;};\n"
+        "layout(std140) uniform buffer { S s[arraySize2]; };\n"
+        "void main()\n"
+        "{\n"
+        "    uvec2 coord = uvec2(floor(gl_FragCoord.xy));\n"
+        "    uint index_x = coord.x / divisor3;\n"
+        "    uint index_y = coord.x % 4u;\n"
+        "    my_FragColor = s[index_x].color[index_y];\n"
+        "}\n";
+
+    GLint blockSize1, blockSize2;
+    ANGLE_GL_PROGRAM(program1, essl3_shaders::vs::Simple(), kFS1.c_str());
+    ANGLE_GL_PROGRAM(program2, essl3_shaders::vs::Simple(), kFS2.c_str());
+    GLint uniformBufferIndex1 = glGetUniformBlockIndex(program1, "buffer");
+    GLint uniformBufferIndex2 = glGetUniformBlockIndex(program2, "buffer");
+    glGetActiveUniformBlockiv(program1, uniformBufferIndex1, GL_UNIFORM_BLOCK_DATA_SIZE,
+                              &blockSize1);
+    glGetActiveUniformBlockiv(program2, uniformBufferIndex2, GL_UNIFORM_BLOCK_DATA_SIZE,
+                              &blockSize2);
+
+    glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffer);
+    glBufferData(GL_UNIFORM_BUFFER, std::max(blockSize1, blockSize2), nullptr, GL_STATIC_DRAW);
+
+    glBindBufferRange(GL_UNIFORM_BUFFER, 0, mUniformBuffer, 0, blockSize2);
+    glUniformBlockBinding(program2, uniformBufferIndex2, 0);
+    glBindBufferRange(GL_UNIFORM_BUFFER, 1, mUniformBuffer, 0, blockSize1);
+    glUniformBlockBinding(program1, uniformBufferIndex1, 1);
+
+    const GLuint kVectorPerMat   = 4;
+    const GLuint kFloatPerVector = 4;
+    GLuint kVectorCount1         = arraySize1 * kVectorPerMat;
+    GLuint kVectorCount2         = arraySize2 * kVectorPerMat;
+    GLuint kFloatCount1          = kVectorCount1 * kFloatPerVector;
+    GLuint kFloatCount2          = kVectorCount2 * kFloatPerVector;
+    GLuint kFloatCount           = kFloatCount1;
+    std::vector<GLfloat> floatData(kFloatCount, 0.0f);
+    const GLuint kPositionCount                    = 12;
+    unsigned int positionToTest[kPositionCount][2] = {{0, 0},  {75, 0},  {98, 13},  {31, 31},
+                                                      {0, 32}, {65, 33}, {23, 54},  {63, 63},
+                                                      {0, 64}, {43, 86}, {53, 100}, {127, 127}};
+
+    for (GLuint i = 0; i < kVectorCount1; i++)
+    {
+        floatData[4 * i + 2] = 1.0f;
+        floatData[4 * i + 3] = 1.0f;
+    }
+    glBufferSubData(GL_UNIFORM_BUFFER, 0, kFloatCount * sizeof(GLfloat), &floatData[0]);
+    drawQuad(program1.get(), essl3_shaders::PositionAttrib(), 0.5f);
+    for (GLuint i = 0; i < kPositionCount; i++)
+    {
+        EXPECT_PIXEL_COLOR_EQ(positionToTest[i][0], positionToTest[i][1], GLColor::blue);
+    }
+
+    for (GLuint i = 0; i < kVectorCount2; i++)
+    {
+        floatData[4 * i + 1] = 1.0f;
+        floatData[4 * i + 2] = 0.0f;
+    }
+    glBufferSubData(GL_UNIFORM_BUFFER, 0, kFloatCount2 * sizeof(GLfloat), &floatData[0]);
+    drawQuad(program2.get(), essl3_shaders::PositionAttrib(), 0.5f);
+    for (GLuint i = 0; i < kPositionCount; i++)
+    {
+        EXPECT_PIXEL_COLOR_EQ(positionToTest[i][0], positionToTest[i][1], GLColor::green);
+    }
+
+    for (GLuint i = kVectorCount2; i < kVectorCount1 / 2; i++)
+    {
+        floatData[4 * i + 1] = 1.0f;
+        floatData[4 * i + 2] = 0.0f;
+    }
+    glBufferSubData(GL_UNIFORM_BUFFER, kFloatCount2 * sizeof(GLfloat),
+                    (kFloatCount1 / 2 - kFloatCount2) * sizeof(GLfloat), &floatData[0]);
+    drawQuad(program1.get(), essl3_shaders::PositionAttrib(), 0.5f);
+    for (GLuint i = 0; i < kPositionCount; i++)
+    {
+        if (positionToTest[i][1] < 64)
+        {
+            EXPECT_PIXEL_COLOR_EQ(positionToTest[i][0], positionToTest[i][1], GLColor::green);
+        }
+        else
+        {
+            EXPECT_PIXEL_COLOR_EQ(positionToTest[i][0], positionToTest[i][1], GLColor::blue);
+        }
+    }
+}
+
 // Use this to select which configurations (e.g. which renderer, which GLES major version) these
 // tests should be run against.
 ANGLE_INSTANTIATE_TEST_ES3(UniformBufferTest);