Vulkan: Emulate subgroup ops in seamful cubemap emulation

Where subgroup ops are not available, they are emulated as such:

Code with subgroup ops:

    float lH = subgroupQuadSwapHorizontal(layer);
    float lV = subgroupQuadSwapVertical(layer);
    float lD = subgroupQuadSwapDiagonal(layer);

    bool isHelperH = subgroupQuadSwapHorizontal(gl_HelperInvocation);
    bool isHelperV = subgroupQuadSwapVertical(gl_HelperInvocation);

    if (gl_HelperInvocation)
    {
        layer = !isHelperH ? lH : !isHelperV ? lV : lD;
    }

Emulated code:

    float nonHelperLayer = gl_HelperInvocation ? 0.0 : layer;
    float lH = abs(dFdxFine(nonHelperLayer));
    float lV = abs(dFdyFine(nonHelperLayer));
    float lD = abs(dFdxFine(lV));

    float isHelperDiffH = abs(dFdxFine(float(gl_HelperInvocation)));
    bool isNonHelperH = isHelperDiffH > 0.5;

    float isHelperDiffV = abs(dFdyFine(float(gl_HelperInvocation)));
    bool isNonHelperV = isHelperDiffV > 0.5;

    if (gl_HelperInvocation)
    {
        layer = isNonHelperH ? lH : isNonHelperV ? lV : lD;
    }

Both paths are supported as on nvidia devices the emulated code
misbehaves.  This change therefore effectively only enables seamful cube
map emulation on Android where subgroup operations are not supported.

Bug: angleproject:3243
Bug: angleproject:3732
Change-Id: I9664d9760756758748183eb121c626f176789f3a
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1742222
Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
diff --git a/include/GLSLANG/ShaderLang.h b/include/GLSLANG/ShaderLang.h
index c26310a..0134c59 100644
--- a/include/GLSLANG/ShaderLang.h
+++ b/include/GLSLANG/ShaderLang.h
@@ -26,7 +26,7 @@
 
 // Version number for shader translation API.
 // It is incremented every time the API changes.
-#define ANGLE_SH_VERSION 211
+#define ANGLE_SH_VERSION 212
 
 enum ShShaderSpec
 {
@@ -288,9 +288,14 @@
 const ShCompileOptions SH_EMULATE_GL_BASE_VERTEX_BASE_INSTANCE = UINT64_C(1) << 43;
 
 // Emulate seamful cube map sampling for OpenGL ES2.0.  Currently only applies to the Vulkan
-// backend, as subgroup operations are used.  Once that dependency is broken, could be used with
+// backend, as is done after samplers are moved out of structs.  Can likely be made to work on
 // the other backends as well.
+//
+// There are two variations of this.  One using subgroup operations where available, and another
+// that emulates those operations using dFdxFine and dFdyFine.  The latter is more universally
+// available, but is buggy on Nvidia.
 const ShCompileOptions SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING = UINT64_C(1) << 44;
+const ShCompileOptions SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP = UINT64_C(1) << 45;
 
 // Defines alternate strategies for implementing array index clamping.
 enum ShArrayIndexClampingStrategy
diff --git a/src/compiler/translator/TranslatorVulkan.cpp b/src/compiler/translator/TranslatorVulkan.cpp
index 3819e51..fe4560c 100644
--- a/src/compiler/translator/TranslatorVulkan.cpp
+++ b/src/compiler/translator/TranslatorVulkan.cpp
@@ -645,7 +645,7 @@
 
     sink << "#version 450 core\n";
 
-    if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING)
+    if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP)
     {
         sink << "#extension GL_KHR_shader_subgroup_quad : require\n";
     }
@@ -688,10 +688,12 @@
 
     // Rewrite samplerCubes as sampler2DArrays.  This must be done after rewriting struct samplers
     // as it doesn't expect that.
-    if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING)
+    if (compileOptions & (SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING |
+                          SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP))
     {
-        RewriteCubeMapSamplersAs2DArray(root, &getSymbolTable(),
-                                        getShaderType() == GL_FRAGMENT_SHADER);
+        RewriteCubeMapSamplersAs2DArray(
+            root, &getSymbolTable(), getShaderType() == GL_FRAGMENT_SHADER,
+            compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP);
     }
 
     if (defaultUniformCount > 0)
diff --git a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp
index bc1a2ae..9201202 100644
--- a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp
+++ b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp
@@ -39,9 +39,56 @@
     return neighbor;
 }
 
+// Calculate the difference of a value with another invocation in the quad.  Used to emulate
+// GetValueFromNeighbor where subgroup operations are not present.
+//
+// See comment in declareCoordTranslationFunction.
+TIntermSymbol *GetDiffWithNeighbor(TSymbolTable *symbolTable,
+                                   TIntermBlock *body,
+                                   TFunction *dFdxyFine,
+                                   TIntermTyped *variable,
+                                   const TType *variableType)
+{
+    TIntermTyped *neighborValue =
+        TIntermAggregate::CreateRawFunctionCall(*dFdxyFine, new TIntermSequence({variable}));
+    TIntermTyped *absNeighborValue = new TIntermUnary(EOpAbs, neighborValue, nullptr);
+
+    TIntermSymbol *neighbor = new TIntermSymbol(CreateTempVariable(symbolTable, variableType));
+    body->appendStatement(CreateTempInitDeclarationNode(&neighbor->variable(), absNeighborValue));
+
+    return neighbor;
+}
+
+// Used to emulate GetValueFromNeighbor with bool values.
+TIntermSymbol *IsNeighborNonHelper(TSymbolTable *symbolTable,
+                                   TIntermBlock *body,
+                                   TFunction *dFdxyFine,
+                                   TIntermTyped *gl_HelperInvocation)
+{
+    const TType *boolType  = StaticType::GetBasic<EbtBool>();
+    const TType *floatType = StaticType::GetBasic<EbtFloat>();
+
+    TIntermTyped *gl_HelperInvocationAsFloat =
+        TIntermAggregate::CreateConstructor(*floatType, new TIntermSequence({gl_HelperInvocation}));
+    TIntermSymbol *diffWithNeighbor =
+        GetDiffWithNeighbor(symbolTable, body, dFdxyFine, gl_HelperInvocationAsFloat, floatType);
+
+    TIntermTyped *isNeighborNonHelperValue =
+        new TIntermBinary(EOpGreaterThan, diffWithNeighbor, CreateFloatNode(0.5f));
+    TIntermSymbol *isNeighborNonHelper =
+        new TIntermSymbol(CreateTempVariable(symbolTable, boolType));
+    body->appendStatement(
+        CreateTempInitDeclarationNode(&isNeighborNonHelper->variable(), isNeighborNonHelperValue));
+
+    return isNeighborNonHelper;
+}
+
 // If this is a helper invocation, retrieve the layer index (cube map face) from another invocation
 // in the quad that is not a helper.  See comment in declareCoordTranslationFunction.
-void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable, TIntermBlock *body, TIntermTyped *l)
+void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable,
+                                     TIntermBlock *body,
+                                     TIntermTyped *l,
+                                     bool useSubgroupOps)
 {
     TVariable *gl_HelperInvocationVar =
         new TVariable(symbolTable, ImmutableString("gl_HelperInvocation"),
@@ -50,48 +97,97 @@
 
     const TType *boolType  = StaticType::GetBasic<EbtBool>();
     const TType *floatType = StaticType::GetBasic<EbtFloat>();
-    TFunction *quadSwapHorizontalBool =
-        new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
-                      SymbolType::AngleInternal, boolType, true);
-    TFunction *quadSwapHorizontalFloat =
-        new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
-                      SymbolType::AngleInternal, floatType, true);
-    TFunction *quadSwapVerticalBool =
-        new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
-                      SymbolType::AngleInternal, boolType, true);
-    TFunction *quadSwapVerticalFloat =
-        new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
-                      SymbolType::AngleInternal, floatType, true);
-    TFunction *quadSwapDiagonalFloat =
-        new TFunction(symbolTable, ImmutableString("subgroupQuadSwapDiagonal"),
-                      SymbolType::AngleInternal, floatType, true);
 
-    quadSwapHorizontalBool->addParameter(CreateTempVariable(symbolTable, boolType));
-    quadSwapVerticalBool->addParameter(CreateTempVariable(symbolTable, boolType));
-    quadSwapHorizontalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
-    quadSwapVerticalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
-    quadSwapDiagonalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
+    TIntermSymbol *lH;
+    TIntermSymbol *lV;
+    TIntermSymbol *lD;
 
-    // Get the layer from the horizontal, vertical and diagonal neighbor.  These should be done
-    // outside `if`s so the non-helper thread is not turned inactive.
-    TIntermSymbol *lH =
-        GetValueFromNeighbor(symbolTable, body, quadSwapHorizontalFloat, l, floatType);
-    TIntermSymbol *lV =
-        GetValueFromNeighbor(symbolTable, body, quadSwapVerticalFloat, l->deepCopy(), floatType);
-    TIntermSymbol *lD =
-        GetValueFromNeighbor(symbolTable, body, quadSwapDiagonalFloat, l->deepCopy(), floatType);
+    TIntermTyped *horizontalIsNonHelper;
+    TIntermTyped *verticalIsNonHelper;
 
-    // Get the value of gl_HelperInvocation from the neighbors too.
-    TIntermSymbol *horizontalIsHelper = GetValueFromNeighbor(
-        symbolTable, body, quadSwapHorizontalBool, gl_HelperInvocation->deepCopy(), boolType);
-    TIntermSymbol *verticalIsHelper = GetValueFromNeighbor(
-        symbolTable, body, quadSwapVerticalBool, gl_HelperInvocation->deepCopy(), boolType);
+    if (useSubgroupOps)
+    {
+        TFunction *quadSwapHorizontalBool =
+            new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
+                          SymbolType::AngleInternal, boolType, true);
+        TFunction *quadSwapHorizontalFloat =
+            new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
+                          SymbolType::AngleInternal, floatType, true);
+        TFunction *quadSwapVerticalBool =
+            new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
+                          SymbolType::AngleInternal, boolType, true);
+        TFunction *quadSwapVerticalFloat =
+            new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
+                          SymbolType::AngleInternal, floatType, true);
+        TFunction *quadSwapDiagonalFloat =
+            new TFunction(symbolTable, ImmutableString("subgroupQuadSwapDiagonal"),
+                          SymbolType::AngleInternal, floatType, true);
 
-    // Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's not
-    // enough to test if the neighbor is not a helper, we should also check if it's active.
-    TIntermTyped *horizontalIsNonHelper =
-        new TIntermUnary(EOpLogicalNot, horizontalIsHelper, nullptr);
-    TIntermTyped *verticalIsNonHelper = new TIntermUnary(EOpLogicalNot, verticalIsHelper, nullptr);
+        quadSwapHorizontalBool->addParameter(CreateTempVariable(symbolTable, boolType));
+        quadSwapVerticalBool->addParameter(CreateTempVariable(symbolTable, boolType));
+        quadSwapHorizontalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
+        quadSwapVerticalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
+        quadSwapDiagonalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
+
+        // Get the layer from the horizontal, vertical and diagonal neighbor.  These should be done
+        // outside `if`s so the non-helper thread is not turned inactive.
+        lH = GetValueFromNeighbor(symbolTable, body, quadSwapHorizontalFloat, l, floatType);
+        lV = GetValueFromNeighbor(symbolTable, body, quadSwapVerticalFloat, l->deepCopy(),
+                                  floatType);
+        lD = GetValueFromNeighbor(symbolTable, body, quadSwapDiagonalFloat, l->deepCopy(),
+                                  floatType);
+
+        // Get the value of gl_HelperInvocation from the neighbors too.
+        TIntermSymbol *horizontalIsHelper = GetValueFromNeighbor(
+            symbolTable, body, quadSwapHorizontalBool, gl_HelperInvocation->deepCopy(), boolType);
+        TIntermSymbol *verticalIsHelper = GetValueFromNeighbor(
+            symbolTable, body, quadSwapVerticalBool, gl_HelperInvocation->deepCopy(), boolType);
+
+        // Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's
+        // not enough to test if the neighbor is not a helper, we should also check if it's active.
+        horizontalIsNonHelper = new TIntermUnary(EOpLogicalNot, horizontalIsHelper, nullptr);
+        verticalIsNonHelper   = new TIntermUnary(EOpLogicalNot, verticalIsHelper, nullptr);
+    }
+    else
+    {
+        TFunction *dFdxFineBool  = new TFunction(symbolTable, ImmutableString("dFdxFine"),
+                                                SymbolType::AngleInternal, boolType, true);
+        TFunction *dFdxFineFloat = new TFunction(symbolTable, ImmutableString("dFdxFine"),
+                                                 SymbolType::AngleInternal, floatType, true);
+        TFunction *dFdyFineBool  = new TFunction(symbolTable, ImmutableString("dFdyFine"),
+                                                SymbolType::AngleInternal, boolType, true);
+        TFunction *dFdyFineFloat = new TFunction(symbolTable, ImmutableString("dFdyFine"),
+                                                 SymbolType::AngleInternal, floatType, true);
+
+        dFdxFineBool->addParameter(CreateTempVariable(symbolTable, boolType));
+        dFdyFineBool->addParameter(CreateTempVariable(symbolTable, boolType));
+        dFdxFineFloat->addParameter(CreateTempVariable(symbolTable, floatType));
+        dFdyFineFloat->addParameter(CreateTempVariable(symbolTable, floatType));
+
+        // layerQuadSwapHelper = gl_HelperInvocation ? 0.0 : layer;
+        TIntermTyped *layerQuadSwapHelperValue =
+            new TIntermTernary(gl_HelperInvocation->deepCopy(), CreateZeroNode(*floatType), l);
+        TIntermSymbol *layerQuadSwapHelper =
+            new TIntermSymbol(CreateTempVariable(symbolTable, floatType));
+        body->appendStatement(CreateTempInitDeclarationNode(&layerQuadSwapHelper->variable(),
+                                                            layerQuadSwapHelperValue));
+
+        // Get the layer from the horizontal, vertical and diagonal neighbor.  These should be done
+        // outside `if`s so the non-helper thread is not turned inactive.
+        lH = GetDiffWithNeighbor(symbolTable, body, dFdxFineFloat, layerQuadSwapHelper, floatType);
+        lV = GetDiffWithNeighbor(symbolTable, body, dFdyFineFloat, layerQuadSwapHelper->deepCopy(),
+                                 floatType);
+        lD = GetDiffWithNeighbor(symbolTable, body, dFdxFineFloat, lV->deepCopy(), floatType);
+
+        // Get the value of gl_HelperInvocation from the neighbors too.
+        //
+        // Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's
+        // not enough to test if the neighbor is not a helper, we should also check if it's active.
+        horizontalIsNonHelper =
+            IsNeighborNonHelper(symbolTable, body, dFdxFineBool, gl_HelperInvocation->deepCopy());
+        verticalIsNonHelper =
+            IsNeighborNonHelper(symbolTable, body, dFdyFineBool, gl_HelperInvocation->deepCopy());
+    }
 
     TIntermTyped *lVD  = new TIntermTernary(verticalIsNonHelper, lV, lD);
     TIntermTyped *lHVD = new TIntermTernary(horizontalIsNonHelper, lH, lVD);
@@ -163,10 +259,13 @@
 class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
 {
   public:
-    RewriteCubeMapSamplersAs2DArrayTraverser(TSymbolTable *symbolTable, bool isFragmentShader)
+    RewriteCubeMapSamplersAs2DArrayTraverser(TSymbolTable *symbolTable,
+                                             bool isFragmentShader,
+                                             bool useSubgroupOps)
         : TIntermTraverser(true, true, true, symbolTable),
           mCubeXYZToArrayUVL(nullptr),
           mIsFragmentShader(isFragmentShader),
+          mUseSubgroupOps(useSubgroupOps),
           mCoordTranslationFunctionDecl(nullptr)
     {}
 
@@ -543,14 +642,15 @@
         // incorrect and the wrong mip would be selected.
         //
         // We therefore use gl_HelperInvocation to identify these invocations and subgroupQuadSwap*
-        // operations to retrieve the layer from a non-helper invocation.  As a result, the UVs
-        // calculated for the helper invocations correspond to the same face and end up outside the
-        // [0, 1] range, but result in correct derivatives.  Indeed, sampling from any other kind of
-        // texture using varyings that range from [0, 1] would follow the same behavior (where
-        // helper invocations generate UVs out of range).
+        // (where available) or dFdx/dFdy (emulating subgroupQuadSwap*) to retrieve the layer from a
+        // non-helper invocation.  As a result, the UVs calculated for the helper invocations
+        // correspond to the same face and end up outside the [0, 1] range, but result in correct
+        // derivatives.  Indeed, sampling from any other kind of texture using varyings that range
+        // from [0, 1] would follow the same behavior (where helper invocations generate UVs out of
+        // range).
         if (mIsFragmentShader)
         {
-            GetLayerFromNonHelperInvocation(mSymbolTable, body, l->deepCopy());
+            GetLayerFromNonHelperInvocation(mSymbolTable, body, l->deepCopy(), mUseSubgroupOps);
         }
 
         // layer < 1.5 (covering faces 0 and 1, corresponding to major axis being X) and layer < 3.5
@@ -819,6 +919,7 @@
     TFunction *mCubeXYZToArrayUVL;
 
     bool mIsFragmentShader;
+    bool mUseSubgroupOps;
 
     // Stored to be put before the first function after the pass.
     TIntermFunctionDefinition *mCoordTranslationFunctionDecl;
@@ -828,9 +929,11 @@
 
 void RewriteCubeMapSamplersAs2DArray(TIntermBlock *root,
                                      TSymbolTable *symbolTable,
-                                     bool isFragmentShader)
+                                     bool isFragmentShader,
+                                     bool useSubgroupOps)
 {
-    RewriteCubeMapSamplersAs2DArrayTraverser traverser(symbolTable, isFragmentShader);
+    RewriteCubeMapSamplersAs2DArrayTraverser traverser(symbolTable, isFragmentShader,
+                                                       useSubgroupOps);
     root->traverse(&traverser);
     traverser.updateTree();
 
diff --git a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h
index 0b58b09..028f526 100644
--- a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h
+++ b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h
@@ -19,7 +19,8 @@
 
 void RewriteCubeMapSamplersAs2DArray(TIntermBlock *root,
                                      TSymbolTable *symbolTable,
-                                     bool isFragmentShader);
+                                     bool isFragmentShader,
+                                     bool useSubgroupOps);
 }  // namespace sh
 
 #endif  // COMPILER_TRANSLATOR_TREEOPS_REWRITECUBEMAPSAMPLERSAS2DARRAY_H_
diff --git a/src/libANGLE/renderer/vulkan/ContextVk.cpp b/src/libANGLE/renderer/vulkan/ContextVk.cpp
index 919e893..dde0d5d 100644
--- a/src/libANGLE/renderer/vulkan/ContextVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ContextVk.cpp
@@ -238,6 +238,8 @@
       mClearColorMask(kAllColorChannelsMask),
       mFlipYForCurrentSurface(false),
       mIsAnyHostVisibleBufferWritten(false),
+      mEmulateSeamfulCubeMapSampling(false),
+      mEmulateSeamfulCubeMapSamplingWithSubgroupOps(false),
       mLastCompletedQueueSerial(renderer->nextSerial()),
       mCurrentQueueSerial(renderer->nextSerial()),
       mPoolAllocator(kDefaultPoolAllocatorPageSize, 1),
@@ -441,7 +443,8 @@
         ANGLE_TRY(synchronizeCpuGpuTime());
     }
 
-    mEmulateSeamfulCubeMapSampling = shouldEmulateSeamfulCubeMapSampling();
+    mEmulateSeamfulCubeMapSampling =
+        shouldEmulateSeamfulCubeMapSampling(&mEmulateSeamfulCubeMapSamplingWithSubgroupOps);
 
     return angle::Result::Continue;
 }
@@ -2896,9 +2899,10 @@
     return desc;
 }
 
-bool ContextVk::shouldEmulateSeamfulCubeMapSampling() const
+bool ContextVk::shouldEmulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const
 {
-    if (mState.getClientMajorVersion() != 2)
+    // Only allow seamful cube map sampling in non-webgl ES2.
+    if (mState.getClientMajorVersion() != 2 || mState.isWebGL())
     {
         return false;
     }
@@ -2908,17 +2912,15 @@
         return false;
     }
 
+    // Use subgroup ops where available.
     constexpr VkSubgroupFeatureFlags kSeamfulCubeMapSubgroupOperations =
         VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
         VK_SUBGROUP_FEATURE_QUAD_BIT;
     const VkSubgroupFeatureFlags deviceSupportedOperations =
         mRenderer->getPhysicalDeviceSubgroupProperties().supportedOperations;
-    bool hasSeamfulCubeMapSubgroupOperations =
-        (deviceSupportedOperations & kSeamfulCubeMapSubgroupOperations) ==
-        kSeamfulCubeMapSubgroupOperations;
+    *useSubgroupOpsOut = (deviceSupportedOperations & kSeamfulCubeMapSubgroupOperations) ==
+                         kSeamfulCubeMapSubgroupOperations;
 
-    // Only enable seamful cube map emulation if the necessary subgroup operations are supported.
-    // Without them, we cannot remove derivative-related artifacts caused by helper invocations.
-    return hasSeamfulCubeMapSubgroupOperations;
+    return true;
 }
 }  // namespace rx
diff --git a/src/libANGLE/renderer/vulkan/ContextVk.h b/src/libANGLE/renderer/vulkan/ContextVk.h
index a6c4c83..dc607cd 100644
--- a/src/libANGLE/renderer/vulkan/ContextVk.h
+++ b/src/libANGLE/renderer/vulkan/ContextVk.h
@@ -313,7 +313,11 @@
 
     void updateScissor(const gl::State &glState);
 
-    bool emulateSeamfulCubeMapSampling() const { return mEmulateSeamfulCubeMapSampling; }
+    bool emulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const
+    {
+        *useSubgroupOpsOut = mEmulateSeamfulCubeMapSamplingWithSubgroupOps;
+        return mEmulateSeamfulCubeMapSampling;
+    }
 
   private:
     // Dirty bits.
@@ -472,7 +476,7 @@
 
     void waitForSwapchainImageIfNecessary();
 
-    bool shouldEmulateSeamfulCubeMapSampling() const;
+    bool shouldEmulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const;
 
     vk::PipelineHelper *mCurrentGraphicsPipeline;
     vk::PipelineAndSerial *mCurrentComputePipeline;
@@ -535,8 +539,10 @@
     // at the end of the command buffer to make that write available to the host.
     bool mIsAnyHostVisibleBufferWritten;
 
-    // Whether this context should do seamful cube map sampling emulation.
+    // Whether this context should do seamful cube map sampling emulation, and whether subgroup
+    // operations should be used.
     bool mEmulateSeamfulCubeMapSampling;
+    bool mEmulateSeamfulCubeMapSamplingWithSubgroupOps;
 
     struct DriverUniformsDescriptorSet
     {
diff --git a/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp b/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp
index 14b5de6..ff8d160 100644
--- a/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp
+++ b/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp
@@ -936,7 +936,7 @@
 angle::Result GlslangWrapper::GetShaderCode(vk::Context *context,
                                             const gl::Caps &glCaps,
                                             bool enableLineRasterEmulation,
-                                            bool enableSeamfulCubeMapEmulation,
+                                            bool enableSubgroupOps,
                                             const gl::ShaderMap<std::string> &shaderSources,
                                             gl::ShaderMap<std::vector<uint32_t>> *shaderCodeOut)
 {
@@ -956,20 +956,18 @@
                                                kVersionDefine, kLineRasterDefine),
                        VK_ERROR_INVALID_SHADER_NV);
 
-        return GetShaderCodeImpl(context, glCaps, enableSeamfulCubeMapEmulation, patchedSources,
-                                 shaderCodeOut);
+        return GetShaderCodeImpl(context, glCaps, enableSubgroupOps, patchedSources, shaderCodeOut);
     }
     else
     {
-        return GetShaderCodeImpl(context, glCaps, enableSeamfulCubeMapEmulation, shaderSources,
-                                 shaderCodeOut);
+        return GetShaderCodeImpl(context, glCaps, enableSubgroupOps, shaderSources, shaderCodeOut);
     }
 }
 
 // static
 angle::Result GlslangWrapper::GetShaderCodeImpl(vk::Context *context,
                                                 const gl::Caps &glCaps,
-                                                bool enableSeamfulCubeMapEmulation,
+                                                bool enableSubgroupOps,
                                                 const gl::ShaderMap<std::string> &shaderSources,
                                                 gl::ShaderMap<std::vector<uint32_t>> *shaderCodeOut)
 {
@@ -1005,9 +1003,9 @@
         glslang::TShader *shader = shaders[shaderType];
         shader->setStringsWithLengths(&shaderString, &shaderLength, 1);
         shader->setEntryPoint("main");
-        if (enableSeamfulCubeMapEmulation)
+        if (enableSubgroupOps)
         {
-            // Enable SPIR-V 1.3 if this workaround is used, as it uses subgroup operations.
+            // Enable SPIR-V 1.3 if to be able to use subgroup operations.
             shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetSpv_1_3);
         }
 
diff --git a/src/libANGLE/renderer/vulkan/GlslangWrapper.h b/src/libANGLE/renderer/vulkan/GlslangWrapper.h
index d8afe62..719bde7 100644
--- a/src/libANGLE/renderer/vulkan/GlslangWrapper.h
+++ b/src/libANGLE/renderer/vulkan/GlslangWrapper.h
@@ -29,14 +29,14 @@
     static angle::Result GetShaderCode(vk::Context *context,
                                        const gl::Caps &glCaps,
                                        bool enableLineRasterEmulation,
-                                       bool enableSeamfulCubeMapEmulation,
+                                       bool enableSubgroupOps,
                                        const gl::ShaderMap<std::string> &shaderSources,
                                        gl::ShaderMap<std::vector<uint32_t>> *shaderCodesOut);
 
   private:
     static angle::Result GetShaderCodeImpl(vk::Context *context,
                                            const gl::Caps &glCaps,
-                                           bool enableSeamfulCubeMapEmulation,
+                                           bool enableSubgroupOps,
                                            const gl::ShaderMap<std::string> &shaderSources,
                                            gl::ShaderMap<std::vector<uint32_t>> *shaderCodesOut);
 };
diff --git a/src/libANGLE/renderer/vulkan/ProgramVk.cpp b/src/libANGLE/renderer/vulkan/ProgramVk.cpp
index c1a8474..d9ee7d9 100644
--- a/src/libANGLE/renderer/vulkan/ProgramVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ProgramVk.cpp
@@ -304,10 +304,16 @@
 {
     ASSERT(!valid());
 
+    bool useSubgroupOpsWithSeamfulCubeMapEmulation = false;
+    bool emulateSeamfulCubeMapSampling =
+        contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOpsWithSeamfulCubeMapEmulation);
+    bool useSubgroupOps =
+        emulateSeamfulCubeMapSampling && useSubgroupOpsWithSeamfulCubeMapEmulation;
+
     gl::ShaderMap<std::vector<uint32_t>> shaderCodes;
-    ANGLE_TRY(GlslangWrapper::GetShaderCode(
-        contextVk, contextVk->getCaps(), enableLineRasterEmulation,
-        contextVk->emulateSeamfulCubeMapSampling(), shaderSources, &shaderCodes));
+    ANGLE_TRY(GlslangWrapper::GetShaderCode(contextVk, contextVk->getCaps(),
+                                            enableLineRasterEmulation, useSubgroupOps,
+                                            shaderSources, &shaderCodes));
 
     for (const gl::ShaderType shaderType : gl::AllShaderTypes())
     {
@@ -1437,7 +1443,8 @@
 
     const gl::ActiveTextureArray<vk::TextureUnit> &activeTextures = contextVk->getActiveTextures();
 
-    bool emulateSeamfulCubeMapSampling = contextVk->emulateSeamfulCubeMapSampling();
+    bool useSubgroupOps                = false;
+    bool emulateSeamfulCubeMapSampling = contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOps);
 
     for (uint32_t textureIndex = 0; textureIndex < mState.getSamplerBindings().size();
          ++textureIndex)
diff --git a/src/libANGLE/renderer/vulkan/RendererVk.cpp b/src/libANGLE/renderer/vulkan/RendererVk.cpp
index f09b291..0d60b1e 100644
--- a/src/libANGLE/renderer/vulkan/RendererVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp
@@ -1279,6 +1279,7 @@
 
     if (IsWindows() && IsAMD(mPhysicalDeviceProperties.vendorID))
     {
+        // Disabled on AMD/windows due to buggy behavior.
         mFeatures.disallowSeamfulCubeMapEmulation.enabled = true;
     }
 
diff --git a/src/libANGLE/renderer/vulkan/ShaderVk.cpp b/src/libANGLE/renderer/vulkan/ShaderVk.cpp
index 11b3f88..cf59679 100644
--- a/src/libANGLE/renderer/vulkan/ShaderVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ShaderVk.cpp
@@ -40,9 +40,17 @@
         compileOptions |= SH_CLAMP_POINT_SIZE;
     }
 
-    if (contextVk->emulateSeamfulCubeMapSampling())
+    bool useSubgroupOps = false;
+    if (contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOps))
     {
-        compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING;
+        if (useSubgroupOps)
+        {
+            compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP;
+        }
+        else
+        {
+            compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING;
+        }
     }
 
     return compileImpl(context, compilerInstance, mData.getSource(), compileOptions | options);
diff --git a/src/tests/deqp_support/deqp_gles2_test_expectations.txt b/src/tests/deqp_support/deqp_gles2_test_expectations.txt
index 2759524..9c37b8b 100644
--- a/src/tests/deqp_support/deqp_gles2_test_expectations.txt
+++ b/src/tests/deqp_support/deqp_gles2_test_expectations.txt
@@ -329,25 +329,6 @@
 3306 VULKAN ANDROID : dEQP-GLES2.functional.polygon_offset.fixed16_factor_1_slope = FAIL
 3307 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.nearest_linear = FAIL
 
-// Seamful cubemap sampling failures on Android (due to missing support subgroupQuad* operations).
-3243 VULKAN ANDROID : dEQP-GLES2.functional.shaders.texture_functions.vertex.texturecubelod = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_linear = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_nearest = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_linear = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_nearest = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_linear = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_clamp = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_mirror = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_clamp = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_mirror = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_clamp = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_repeat = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_mirror = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_clamp = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_repeat = FAIL
-3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_mirror = FAIL
-
 // These tests also fail on AMD windows driver as it is not allowed to use emulation due to errors.
 3243 VULKAN WIN AMD : dEQP-GLES2.functional.shaders.texture_functions.vertex.texturecubelod = FAIL
 3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest = FAIL