Fix writing uniform block maps to HLSL output

HLSL output maps structs in std140 uniform blocks to a different
layout in order to eliminate padding. The padding may have been
inserted to comply with std140 packing rules.

There used to be two issues in writing the maps: Sometimes the same
map could be written multiple times, and the maps were not being
written for uniform blocks with instance names.

Rewrite how the uniform buffer struct maps get generated so that
the code works correctly. Instead of flagging accesses, structs inside
uniform blocks are gathered from uniform block declarations. When
accesses to structs in uniform blocks are written out in OutputHLSL,
it's checked whether a mapped struct needs to be used instead of the
original one.

This code could still be optimized further by limiting mapped structs
generation to those ones that really need to be used. This is left to
be done later.

BUG=angleproject:2084
TEST=angle_end2end_tests

Change-Id: Iee24b3ef15847d2af64554ac74b8e4be5060d18c
Reviewed-on: https://chromium-review.googlesource.com/751506
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Olli Etuaho <oetuaho@nvidia.com>
diff --git a/src/compiler/translator/FlagStd140Structs.cpp b/src/compiler/translator/FlagStd140Structs.cpp
index d16412b..fba837f 100644
--- a/src/compiler/translator/FlagStd140Structs.cpp
+++ b/src/compiler/translator/FlagStd140Structs.cpp
@@ -3,75 +3,73 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 //
+// FlagStd140Structs.cpp: Find structs in std140 blocks, where the padding added in the translator
+// conflicts with the "natural" unpadded type.
 
 #include "compiler/translator/FlagStd140Structs.h"
 
+#include "compiler/translator/IntermTraverse.h"
+
 namespace sh
 {
 
-bool FlagStd140Structs::visitBinary(Visit visit, TIntermBinary *binaryNode)
+namespace
 {
-    if (binaryNode->getRight()->getBasicType() == EbtStruct)
+
+class FlagStd140StructsTraverser : public TIntermTraverser
+{
+  public:
+    FlagStd140StructsTraverser() : TIntermTraverser(true, false, false) {}
+
+    const std::vector<MappedStruct> getMappedStructs() const { return mMappedStructs; }
+
+  protected:
+    bool visitDeclaration(Visit visit, TIntermDeclaration *node) override;
+
+  private:
+    void mapBlockStructMembers(TIntermSymbol *blockDeclarator, TInterfaceBlock *block);
+
+    std::vector<MappedStruct> mMappedStructs;
+};
+
+void FlagStd140StructsTraverser::mapBlockStructMembers(TIntermSymbol *blockDeclarator,
+                                                       TInterfaceBlock *block)
+{
+    for (auto *field : block->fields())
     {
-        switch (binaryNode->getOp())
+        if (field->type()->getBasicType() == EbtStruct)
         {
-            case EOpIndexDirectInterfaceBlock:
-            case EOpIndexDirectStruct:
-                if (isInStd140InterfaceBlock(binaryNode->getLeft()))
-                {
-                    mFlaggedNodes.push_back(binaryNode);
-                }
-                break;
-
-            default:
-                break;
+            MappedStruct mappedStruct;
+            mappedStruct.blockDeclarator = blockDeclarator;
+            mappedStruct.field           = field;
+            mMappedStructs.push_back(mappedStruct);
         }
-        return false;
-    }
-
-    if (binaryNode->getOp() == EOpIndexDirectStruct)
-    {
-        return false;
-    }
-
-    return visit == PreVisit;
-}
-
-void FlagStd140Structs::visitSymbol(TIntermSymbol *symbol)
-{
-    if (isInStd140InterfaceBlock(symbol) && symbol->getBasicType() == EbtStruct)
-    {
-        mFlaggedNodes.push_back(symbol);
     }
 }
 
-bool FlagStd140Structs::isInStd140InterfaceBlock(TIntermTyped *node) const
+bool FlagStd140StructsTraverser::visitDeclaration(Visit visit, TIntermDeclaration *node)
 {
-    TIntermBinary *binaryNode = node->getAsBinaryNode();
-
-    if (binaryNode)
+    TIntermTyped *declarator = node->getSequence()->back()->getAsTyped();
+    if (declarator->getBasicType() == EbtInterfaceBlock)
     {
-        return isInStd140InterfaceBlock(binaryNode->getLeft());
+        TInterfaceBlock *block = declarator->getType().getInterfaceBlock();
+        if (block->blockStorage() == EbsStd140)
+        {
+            mapBlockStructMembers(declarator->getAsSymbolNode(), block);
+        }
     }
-
-    const TType &type = node->getType();
-
-    // determine if we are in the standard layout
-    const TInterfaceBlock *interfaceBlock = type.getInterfaceBlock();
-    if (interfaceBlock)
-    {
-        return (interfaceBlock->blockStorage() == EbsStd140);
-    }
-
     return false;
 }
 
-std::vector<TIntermTyped *> FlagStd140ValueStructs(TIntermNode *node)
+}  // anonymous namespace
+
+std::vector<MappedStruct> FlagStd140Structs(TIntermNode *node)
 {
-    FlagStd140Structs flaggingTraversal;
+    FlagStd140StructsTraverser flaggingTraversal;
 
     node->traverse(&flaggingTraversal);
 
-    return flaggingTraversal.getFlaggedNodes();
+    return flaggingTraversal.getMappedStructs();
 }
-}
+
+}  // namespace sh
diff --git a/src/compiler/translator/FlagStd140Structs.h b/src/compiler/translator/FlagStd140Structs.h
index ab904c2..f548d8b6 100644
--- a/src/compiler/translator/FlagStd140Structs.h
+++ b/src/compiler/translator/FlagStd140Structs.h
@@ -3,36 +3,28 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 //
+// FlagStd140Structs.h: Find structs in std140 blocks, where the padding added in the translator
+// conflicts with the "natural" unpadded type.
 
 #ifndef COMPILER_TRANSLATOR_FLAGSTD140STRUCTS_H_
 #define COMPILER_TRANSLATOR_FLAGSTD140STRUCTS_H_
 
-#include "compiler/translator/IntermTraverse.h"
+#include <vector>
 
 namespace sh
 {
 
-// This class finds references to nested structs of std140 blocks that access
-// the nested struct "by value", where the padding added in the translator
-// conflicts with the "natural" unpadded type.
-class FlagStd140Structs : public TIntermTraverser
+class TField;
+class TIntermNode;
+class TIntermSymbol;
+
+struct MappedStruct
 {
-  public:
-    FlagStd140Structs() : TIntermTraverser(true, false, false) {}
-
-    const std::vector<TIntermTyped *> getFlaggedNodes() const { return mFlaggedNodes; }
-
-  protected:
-    bool visitBinary(Visit visit, TIntermBinary *binaryNode) override;
-    void visitSymbol(TIntermSymbol *symbol) override;
-
-  private:
-    bool isInStd140InterfaceBlock(TIntermTyped *node) const;
-
-    std::vector<TIntermTyped *> mFlaggedNodes;
+    TIntermSymbol *blockDeclarator;
+    TField *field;
 };
 
-std::vector<TIntermTyped *> FlagStd140ValueStructs(TIntermNode *node);
+std::vector<MappedStruct> FlagStd140Structs(TIntermNode *node);
 }
 
 #endif  // COMPILER_TRANSLATOR_FLAGSTD140STRUCTS_H_
diff --git a/src/compiler/translator/OutputHLSL.cpp b/src/compiler/translator/OutputHLSL.cpp
index 13a7016..7e84761 100644
--- a/src/compiler/translator/OutputHLSL.cpp
+++ b/src/compiler/translator/OutputHLSL.cpp
@@ -15,7 +15,6 @@
 #include "common/utilities.h"
 #include "compiler/translator/BuiltInFunctionEmulator.h"
 #include "compiler/translator/BuiltInFunctionEmulatorHLSL.h"
-#include "compiler/translator/FlagStd140Structs.h"
 #include "compiler/translator/ImageFunctionHLSL.h"
 #include "compiler/translator/InfoSink.h"
 #include "compiler/translator/NodeSearch.h"
@@ -57,6 +56,27 @@
             variable->getQualifier() == EvqConst);
 }
 
+bool IsInStd140InterfaceBlock(TIntermTyped *node)
+{
+    TIntermBinary *binaryNode = node->getAsBinaryNode();
+
+    if (binaryNode)
+    {
+        return IsInStd140InterfaceBlock(binaryNode->getLeft());
+    }
+
+    const TType &type = node->getType();
+
+    // determine if we are in the standard layout
+    const TInterfaceBlock *interfaceBlock = type.getInterfaceBlock();
+    if (interfaceBlock)
+    {
+        return (interfaceBlock->blockStorage() == EbsStd140);
+    }
+
+    return false;
+}
+
 }  // anonymous namespace
 
 void OutputHLSL::writeFloat(TInfoSinkBase &out, float f)
@@ -203,9 +223,6 @@
 
 void OutputHLSL::output(TIntermNode *treeRoot, TInfoSinkBase &objSink)
 {
-    const std::vector<TIntermTyped *> &flaggedStructs = FlagStd140ValueStructs(treeRoot);
-    makeFlaggedStructMaps(flaggedStructs);
-
     BuiltInFunctionEmulator builtInFunctionEmulator;
     InitBuiltInFunctionEmulatorForHLSL(&builtInFunctionEmulator);
     if ((mCompileOptions & SH_EMULATE_ISNAN_FLOAT_FUNCTION) != 0)
@@ -221,6 +238,11 @@
     ASSERT(success == CallDAG::INITDAG_SUCCESS);
     mASTMetadataList = CreateASTMetadataHLSL(treeRoot, mCallDag);
 
+    const std::vector<MappedStruct> std140Structs = FlagStd140Structs(treeRoot);
+    // TODO(oetuaho): The std140Structs could be filtered based on which ones actually get used in
+    // the shader code. When we add shader storage blocks we might also consider an alternative
+    // solution, since the struct mapping won't work very well for shader storage blocks.
+
     // Output the body and footer first to determine what has to go in the header
     mInfoSinkStack.push(&mBody);
     treeRoot->traverse(this);
@@ -230,7 +252,7 @@
     mInfoSinkStack.pop();
 
     mInfoSinkStack.push(&mHeader);
-    header(mHeader, &builtInFunctionEmulator);
+    header(mHeader, std140Structs, &builtInFunctionEmulator);
     mInfoSinkStack.pop();
 
     objSink << mHeader.c_str();
@@ -240,33 +262,6 @@
     builtInFunctionEmulator.cleanup();
 }
 
-void OutputHLSL::makeFlaggedStructMaps(const std::vector<TIntermTyped *> &flaggedStructs)
-{
-    for (unsigned int structIndex = 0; structIndex < flaggedStructs.size(); structIndex++)
-    {
-        TIntermTyped *flaggedNode = flaggedStructs[structIndex];
-
-        TInfoSinkBase structInfoSink;
-        mInfoSinkStack.push(&structInfoSink);
-
-        // This will mark the necessary block elements as referenced
-        flaggedNode->traverse(this);
-
-        TString structName(structInfoSink.c_str());
-        mInfoSinkStack.pop();
-
-        mFlaggedStructOriginalNames[flaggedNode] = structName;
-
-        for (size_t pos = structName.find('.'); pos != std::string::npos;
-             pos        = structName.find('.'))
-        {
-            structName.erase(pos, 1);
-        }
-
-        mFlaggedStructMappedNames[flaggedNode] = "map" + structName;
-    }
-}
-
 const std::map<std::string, unsigned int> &OutputHLSL::getUniformBlockRegisterMap() const
 {
     return mUniformHLSL->getUniformBlockRegisterMap();
@@ -277,7 +272,9 @@
     return mUniformHLSL->getUniformRegisterMap();
 }
 
-TString OutputHLSL::structInitializerString(int indent, const TType &type, const TString &name)
+TString OutputHLSL::structInitializerString(int indent,
+                                            const TType &type,
+                                            const TString &name) const
 {
     TString init;
 
@@ -333,30 +330,77 @@
     return init;
 }
 
-void OutputHLSL::header(TInfoSinkBase &out, const BuiltInFunctionEmulator *builtInFunctionEmulator)
+TString OutputHLSL::generateStructMapping(const std::vector<MappedStruct> &std140Structs) const
+{
+    TString mappedStructs;
+
+    for (auto &mappedStruct : std140Structs)
+    {
+        TInterfaceBlock *interfaceBlock =
+            mappedStruct.blockDeclarator->getType().getInterfaceBlock();
+        const TString &interfaceBlockName = interfaceBlock->name();
+        const TName &instanceName         = mappedStruct.blockDeclarator->getName();
+        if (mReferencedUniformBlocks.count(interfaceBlockName) == 0 &&
+            (instanceName.getString() == "" ||
+             mReferencedUniformBlocks.count(instanceName.getString()) == 0))
+        {
+            continue;
+        }
+
+        unsigned int instanceCount = 1u;
+        bool isInstanceArray       = mappedStruct.blockDeclarator->isArray();
+        if (isInstanceArray)
+        {
+            instanceCount = mappedStruct.blockDeclarator->getOutermostArraySize();
+        }
+
+        for (unsigned int instanceArrayIndex = 0; instanceArrayIndex < instanceCount;
+             ++instanceArrayIndex)
+        {
+            TString originalName;
+            TString mappedName("map");
+
+            if (instanceName.getString() != "")
+            {
+                unsigned int instanceStringArrayIndex = GL_INVALID_INDEX;
+                if (isInstanceArray)
+                    instanceStringArrayIndex = instanceArrayIndex;
+                TString instanceString = mUniformHLSL->uniformBlockInstanceString(
+                    *interfaceBlock, instanceStringArrayIndex);
+                originalName += instanceString;
+                mappedName += instanceString;
+                originalName += ".";
+                mappedName += "_";
+            }
+
+            TString fieldName = Decorate(mappedStruct.field->name());
+            originalName += fieldName;
+            mappedName += fieldName;
+
+            TType *structType = mappedStruct.field->type();
+            mappedStructs +=
+                "static " + Decorate(structType->getStruct()->name()) + " " + mappedName;
+
+            if (structType->isArray())
+            {
+                mappedStructs += ArrayString(*mappedStruct.field->type());
+            }
+
+            mappedStructs += " =\n";
+            mappedStructs += structInitializerString(0, *structType, originalName);
+            mappedStructs += ";\n";
+        }
+    }
+    return mappedStructs;
+}
+
+void OutputHLSL::header(TInfoSinkBase &out,
+                        const std::vector<MappedStruct> &std140Structs,
+                        const BuiltInFunctionEmulator *builtInFunctionEmulator) const
 {
     TString varyings;
     TString attributes;
-    TString flaggedStructs;
-
-    for (std::map<TIntermTyped *, TString>::const_iterator flaggedStructIt =
-             mFlaggedStructMappedNames.begin();
-         flaggedStructIt != mFlaggedStructMappedNames.end(); flaggedStructIt++)
-    {
-        TIntermTyped *structNode    = flaggedStructIt->first;
-        const TString &mappedName   = flaggedStructIt->second;
-        const TStructure &structure = *structNode->getType().getStruct();
-        const TString &originalName = mFlaggedStructOriginalNames[structNode];
-
-        flaggedStructs += "static " + Decorate(structure.name()) + " " + mappedName;
-        if (structNode->isArray())
-        {
-            flaggedStructs += ArrayString(structNode->getType());
-        }
-        flaggedStructs += " =\n";
-        flaggedStructs += structInitializerString(0, structNode->getType(), originalName);
-        flaggedStructs += ";\n";
-    }
+    TString mappedStructs = generateStructMapping(std140Structs);
 
     for (ReferencedSymbols::const_iterator varying = mReferencedVaryings.begin();
          varying != mReferencedVaryings.end(); varying++)
@@ -574,11 +618,11 @@
                    "\n";
         }
 
-        if (!flaggedStructs.empty())
+        if (!mappedStructs.empty())
         {
-            out << "// Std140 Structures accessed by value\n";
+            out << "// Structures from std140 blocks with padding removed\n";
             out << "\n";
-            out << flaggedStructs;
+            out << mappedStructs;
             out << "\n";
         }
 
@@ -687,11 +731,11 @@
                    "\n";
         }
 
-        if (!flaggedStructs.empty())
+        if (!mappedStructs.empty())
         {
-            out << "// Std140 Structures accessed by value\n";
+            out << "// Structures from std140 blocks with padding removed\n";
             out << "\n";
-            out << flaggedStructs;
+            out << mappedStructs;
             out << "\n";
         }
     }
@@ -820,10 +864,9 @@
     TInfoSinkBase &out = getInfoSink();
 
     // Handle accessing std140 structs by value
-    if (mFlaggedStructMappedNames.count(node) > 0)
+    if (IsInStd140InterfaceBlock(node) && node->getBasicType() == EbtStruct)
     {
-        out << mFlaggedStructMappedNames[node];
-        return;
+        out << "map";
     }
 
     TString name = node->getSymbol();
@@ -1057,13 +1100,6 @@
 {
     TInfoSinkBase &out = getInfoSink();
 
-    // Handle accessing std140 structs by value
-    if (mFlaggedStructMappedNames.count(node) > 0)
-    {
-        out << mFlaggedStructMappedNames[node];
-        return false;
-    }
-
     switch (node->getOp())
     {
         case EOpComma:
@@ -1262,17 +1298,33 @@
         }
         break;
         case EOpIndexDirectInterfaceBlock:
+        {
+            bool structInStd140Block =
+                node->getBasicType() == EbtStruct && IsInStd140InterfaceBlock(node->getLeft());
+            if (visit == PreVisit && structInStd140Block)
+            {
+                out << "map";
+            }
             if (visit == InVisit)
             {
                 const TInterfaceBlock *interfaceBlock =
                     node->getLeft()->getType().getInterfaceBlock();
                 const TIntermConstantUnion *index = node->getRight()->getAsConstantUnion();
                 const TField *field               = interfaceBlock->fields()[index->getIConst(0)];
-                out << "." + Decorate(field->name());
+                if (structInStd140Block)
+                {
+                    out << "_";
+                }
+                else
+                {
+                    out << ".";
+                }
+                out << Decorate(field->name());
 
                 return false;
             }
             break;
+        }
         case EOpAdd:
             outputTriplet(out, visit, "(", " + ", ")");
             break;
diff --git a/src/compiler/translator/OutputHLSL.h b/src/compiler/translator/OutputHLSL.h
index 4e8e427..553d4c5 100644
--- a/src/compiler/translator/OutputHLSL.h
+++ b/src/compiler/translator/OutputHLSL.h
@@ -14,6 +14,7 @@
 #include "angle_gl.h"
 #include "compiler/translator/ASTMetadataHLSL.h"
 #include "compiler/translator/Compiler.h"
+#include "compiler/translator/FlagStd140Structs.h"
 #include "compiler/translator/IntermTraverse.h"
 
 class BuiltInFunctionEmulator;
@@ -61,7 +62,9 @@
     static bool canWriteAsHLSLLiteral(TIntermTyped *expression);
 
   protected:
-    void header(TInfoSinkBase &out, const BuiltInFunctionEmulator *builtInFunctionEmulator);
+    void header(TInfoSinkBase &out,
+                const std::vector<MappedStruct> &std140Structs,
+                const BuiltInFunctionEmulator *builtInFunctionEmulator) const;
 
     void writeFloat(TInfoSinkBase &out, float f);
     void writeSingleConstant(TInfoSinkBase &out, const TConstantUnion *const constUnion);
@@ -115,7 +118,6 @@
     void outputAssign(Visit visit, const TType &type, TInfoSinkBase &out);
 
     void writeEmulatedFunctionTriplet(TInfoSinkBase &out, Visit visit, TOperator op);
-    void makeFlaggedStructMaps(const std::vector<TIntermTyped *> &flaggedStructs);
 
     // Returns true if it found a 'same symbol' initializer (initializer that references the
     // variable it's initting)
@@ -204,10 +206,7 @@
 
     TIntermSymbol *mExcessiveLoopIndex;
 
-    TString structInitializerString(int indent, const TType &type, const TString &name);
-
-    std::map<TIntermTyped *, TString> mFlaggedStructMappedNames;
-    std::map<TIntermTyped *, TString> mFlaggedStructOriginalNames;
+    TString structInitializerString(int indent, const TType &type, const TString &name) const;
 
     struct HelperFunction
     {
@@ -245,6 +244,7 @@
     PerformanceDiagnostics *mPerfDiagnostics;
 
   private:
+    TString generateStructMapping(const std::vector<MappedStruct> &std140Structs) const;
     TString samplerNamePrefixFromStruct(TIntermTyped *node);
     bool ancestorEvaluatesToSamplerInStruct();
 };
diff --git a/src/tests/gl_tests/UniformBufferTest.cpp b/src/tests/gl_tests/UniformBufferTest.cpp
index db28f7b..05fa369 100644
--- a/src/tests/gl_tests/UniformBufferTest.cpp
+++ b/src/tests/gl_tests/UniformBufferTest.cpp
@@ -866,31 +866,90 @@
     EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
 }
 
+// Test with a block instance array containing an array of structs.
+TEST_P(UniformBufferTest, BlockArrayContainingArrayOfStructs)
+{
+    const std::string &fragmentShader =
+        R"(#version 300 es
+
+        precision highp float;
+        out vec4 my_FragColor;
+        struct light_t
+        {
+            vec4 intensity;
+        };
+
+        layout(std140) uniform lightData { light_t lights[2]; } buffers[2];
+
+        vec4 processLight(vec4 lighting, light_t light)
+        {
+            return lighting + light.intensity;
+        }
+        void main()
+        {
+            vec4 lighting = vec4(0, 0, 0, 1);
+            lighting = processLight(lighting, buffers[0].lights[0]);
+            lighting = processLight(lighting, buffers[1].lights[1]);
+            my_FragColor = lighting;
+        })";
+
+    ANGLE_GL_PROGRAM(program, mVertexShaderSource, fragmentShader);
+    GLint uniformBufferIndex  = glGetUniformBlockIndex(program, "lightData[0]");
+    GLint uniformBuffer2Index = glGetUniformBlockIndex(program, "lightData[1]");
+
+    glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffer);
+    const GLsizei kStructCount        = 2;
+    const GLsizei kVectorElementCount = 4;
+    const GLsizei kBytesPerElement    = 4;
+    const GLsizei kDataSize           = kStructCount * kVectorElementCount * kBytesPerElement;
+    std::vector<GLubyte> v(kDataSize, 0);
+    float *vAsFloat = reinterpret_cast<float *>(v.data());
+
+    // In the first struct/vector of the first block
+    vAsFloat[1] = 0.5f;
+
+    glBufferData(GL_UNIFORM_BUFFER, kDataSize, v.data(), GL_STATIC_DRAW);
+
+    GLBuffer uniformBuffer2;
+    glBindBuffer(GL_UNIFORM_BUFFER, uniformBuffer2);
+
+    vAsFloat[1] = 0.0f;
+    // In the second struct/vector of the second block
+    vAsFloat[kVectorElementCount + 1] = 0.5f;
+    glBufferData(GL_UNIFORM_BUFFER, kDataSize, v.data(), GL_STATIC_DRAW);
+
+    glBindBufferBase(GL_UNIFORM_BUFFER, 0, mUniformBuffer);
+    glBindBufferBase(GL_UNIFORM_BUFFER, 1, uniformBuffer2);
+    glUniformBlockBinding(program, uniformBufferIndex, 0);
+    glUniformBlockBinding(program, uniformBuffer2Index, 1);
+    drawQuad(program.get(), "position", 0.5f);
+    EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
+}
+
 // Test with a block containing an array of structs containing arrays.
 TEST_P(UniformBufferTest, BlockContainingArrayOfStructsContainingArrays)
 {
     const std::string &fragmentShader =
-        "#version 300 es\n"
-        "precision highp float;\n"
-        "out vec4 my_FragColor;\n"
-        "struct light_t {\n"
-        "    vec4 intensity[3];\n"
-        "};\n"
-        "const int maxLights = 2;\n"
-        "layout(std140) uniform lightData { light_t lights[maxLights]; };\n"
-        "vec4 processLight(vec4 lighting, light_t light)\n"
-        "{\n"
-        "    return lighting + light.intensity[1];\n"
-        "}\n"
-        "void main()\n"
-        "{\n"
-        "    vec4 lighting = vec4(0, 0, 0, 1);\n"
-        "    for (int n = 0; n < maxLights; n++)\n"
-        "    {\n"
-        "        lighting = processLight(lighting, lights[n]);\n"
-        "    }\n"
-        "    my_FragColor = lighting;\n"
-        "}\n";
+        R"(#version 300 es
+        precision highp float;
+        out vec4 my_FragColor;
+        struct light_t
+        {
+            vec4 intensity[3];
+        };
+        const int maxLights = 2;
+        layout(std140) uniform lightData { light_t lights[maxLights]; };
+        vec4 processLight(vec4 lighting, light_t light)
+        {
+            return lighting + light.intensity[1];
+        }
+        void main()
+        {
+            vec4 lighting = vec4(0, 0, 0, 1);
+            lighting = processLight(lighting, lights[0]);
+            lighting = processLight(lighting, lights[1]);
+            my_FragColor = lighting;
+        })";
 
     ANGLE_GL_PROGRAM(program, mVertexShaderSource, fragmentShader);
     GLint uniformBufferIndex = glGetUniformBlockIndex(program, "lightData");
@@ -1038,6 +1097,65 @@
     EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
 }
 
+// Test that uniform block instance with nested structs that contain vec3s inside is handled
+// correctly. This is meant to test that HLSL structure padding to implement std140 layout works
+// together with uniform blocks.
+TEST_P(UniformBufferTest, Std140UniformBlockInstanceWithNestedStructsContainingVec3s)
+{
+    // Got incorrect test result on non-NVIDIA Android - the alpha channel was not set correctly
+    // from the second vector, possibly the platform doesn't implement std140 packing right?
+    // http://anglebug.com/2217
+    ANGLE_SKIP_TEST_IF(IsAndroid() && !IsNVIDIA());
+
+    const std::string &fragmentShader =
+        R"(#version 300 es
+
+        precision highp float;
+        out vec4 my_FragColor;
+
+        struct Sinner {
+          vec3 v;
+        };
+
+        struct S {
+            Sinner s1;
+            Sinner s2;
+        };
+
+        layout(std140) uniform structBuffer { S s; } buffer;
+
+        void accessStruct(S s)
+        {
+            my_FragColor = vec4(s.s1.v.xy, s.s2.v.xy);
+        }
+
+        void main()
+        {
+            accessStruct(buffer.s);
+        })";
+
+    ANGLE_GL_PROGRAM(program, mVertexShaderSource, fragmentShader);
+    GLint uniformBufferIndex = glGetUniformBlockIndex(program, "structBuffer");
+
+    glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffer);
+    const GLsizei kVectorsPerBlock         = 2;
+    const GLsizei kElementsPerPaddedVector = 4;
+    const GLsizei kBytesPerElement         = 4;
+    const GLsizei kDataSize = kVectorsPerBlock * kElementsPerPaddedVector * kBytesPerElement;
+    std::vector<GLubyte> v(kDataSize, 0);
+    float *vAsFloat = reinterpret_cast<float *>(v.data());
+
+    // Set second value in each vec3.
+    vAsFloat[1u]      = 1.0f;
+    vAsFloat[4u + 1u] = 1.0f;
+
+    glBufferData(GL_UNIFORM_BUFFER, kDataSize, v.data(), GL_STATIC_DRAW);
+    glBindBufferBase(GL_UNIFORM_BUFFER, 0, mUniformBuffer);
+    glUniformBlockBinding(program, uniformBufferIndex, 0);
+    drawQuad(program.get(), "position", 0.5f);
+    EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
+}
+
 // Use this to select which configurations (e.g. which renderer, which GLES major version) these tests should be run against.
 ANGLE_INSTANTIATE_TEST(UniformBufferTest,
                        ES3_D3D11(),