blob: 6693a1d1a7f68205119b7cdd796f6f398edb5f60 [file] [log] [blame]
//
// Copyright 2022 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
#include "compiler/translator/tree_ops/RewritePixelLocalStorage.h"
#include "common/angleutils.h"
#include "compiler/translator/StaticType.h"
#include "compiler/translator/SymbolTable.h"
#include "compiler/translator/tree_ops/MonomorphizeUnsupportedFunctions.h"
#include "compiler/translator/tree_util/BuiltIn.h"
#include "compiler/translator/tree_util/FindMain.h"
#include "compiler/translator/tree_util/IntermNode_util.h"
#include "compiler/translator/tree_util/IntermTraverse.h"
namespace sh
{
namespace
{
constexpr static TBasicType DataTypeOfPLSType(TBasicType plsType)
{
switch (plsType)
{
case EbtPixelLocalANGLE:
return EbtFloat;
case EbtIPixelLocalANGLE:
return EbtInt;
case EbtUPixelLocalANGLE:
return EbtUInt;
default:
UNREACHABLE();
return EbtVoid;
}
}
constexpr static TBasicType DataTypeOfImageType(TBasicType imageType)
{
switch (imageType)
{
case EbtImage2D:
return EbtFloat;
case EbtIImage2D:
return EbtInt;
case EbtUImage2D:
return EbtUInt;
default:
UNREACHABLE();
return EbtVoid;
}
}
// Maps PLS symbols to a backing store.
template <typename T>
class PLSBackingStoreMap
{
public:
// Sets the given variable as the backing storage for the plsSymbol's binding point. An entry
// must not already exist in the map for this binding point.
void insertNew(TIntermSymbol *plsSymbol, const T &backingStore)
{
ASSERT(plsSymbol);
ASSERT(IsPixelLocal(plsSymbol->getBasicType()));
int binding = plsSymbol->getType().getLayoutQualifier().binding;
ASSERT(binding >= 0);
auto result = mMap.insert({binding, backingStore});
ASSERT(result.second); // Ensure an image didn't already exist for this symbol.
}
// Looks up the backing store for the given plsSymbol's binding point. An entry must already
// exist in the map for this binding point.
const T &find(TIntermSymbol *plsSymbol)
{
ASSERT(plsSymbol);
ASSERT(IsPixelLocal(plsSymbol->getBasicType()));
int binding = plsSymbol->getType().getLayoutQualifier().binding;
ASSERT(binding >= 0);
auto iter = mMap.find(binding);
ASSERT(iter != mMap.end()); // Ensure PLSImages already exist for this symbol.
return iter->second;
}
const std::map<int, T> &bindingOrderedMap() const { return mMap; }
private:
// Use std::map so the backing stores are ordered by binding when we iterate.
std::map<int, T> mMap;
};
// Base class for rewriting high level PLS operations to AST operations specified by
// ShPixelLocalStorageType.
class RewritePLSTraverser : public TIntermTraverser
{
public:
RewritePLSTraverser(TCompiler *compiler,
TSymbolTable &symbolTable,
const ShCompileOptions &compileOptions,
int shaderVersion)
: TIntermTraverser(true, false, false, &symbolTable),
mCompiler(compiler),
mCompileOptions(&compileOptions),
mShaderVersion(shaderVersion)
{}
bool visitDeclaration(Visit, TIntermDeclaration *decl) override
{
TIntermTyped *declVariable = (decl->getSequence())->front()->getAsTyped();
ASSERT(declVariable);
if (!IsPixelLocal(declVariable->getBasicType()))
{
return true;
}
// PLS is not allowed in arrays.
ASSERT(!declVariable->isArray());
// This visitDeclaration doesn't get called for function arguments, and opaque types can
// otherwise only be uniforms.
ASSERT(declVariable->getQualifier() == EvqUniform);
TIntermSymbol *plsSymbol = declVariable->getAsSymbolNode();
ASSERT(plsSymbol);
visitPLSDeclaration(plsSymbol);
return false;
}
bool visitAggregate(Visit, TIntermAggregate *aggregate) override
{
if (!BuiltInGroup::IsPixelLocal(aggregate->getOp()))
{
return true;
}
const TIntermSequence &args = *aggregate->getSequence();
ASSERT(args.size() >= 1);
TIntermSymbol *plsSymbol = args[0]->getAsSymbolNode();
// Rewrite pixelLocalLoadANGLE -> imageLoad.
if (aggregate->getOp() == EOpPixelLocalLoadANGLE)
{
visitPLSLoad(plsSymbol);
return false; // No need to recurse since this node is being dropped.
}
// Rewrite pixelLocalStoreANGLE -> imageStore.
if (aggregate->getOp() == EOpPixelLocalStoreANGLE)
{
// Also hoist the 'value' expression into a temp. In the event of
// "pixelLocalStoreANGLE(..., pixelLocalLoadANGLE(...))", this ensures the load occurs
// _before_ any potential barriers required by the subclass.
//
// NOTE: It is generally unsafe to hoist function arguments due to short circuiting,
// e.g., "if (false && function(...))", but pixelLocalStoreANGLE returns type void, so
// it is safe in this particular case.
TType *valueType = new TType(DataTypeOfPLSType(plsSymbol->getBasicType()),
plsSymbol->getPrecision(), EvqTemporary, 4);
TVariable *valueVar = CreateTempVariable(mSymbolTable, valueType);
TIntermDeclaration *valueDecl =
CreateTempInitDeclarationNode(valueVar, args[1]->getAsTyped());
valueDecl->traverse(this); // Rewrite any potential pixelLocalLoadANGLEs in valueDecl.
insertStatementInParentBlock(valueDecl);
visitPLSStore(plsSymbol, valueVar);
return false; // No need to recurse since this node is being dropped.
}
return true;
}
// Called after rewrite. Injects one-time setup code that needs to run before any PLS accesses.
virtual void injectPrePLSCode(TCompiler *,
TSymbolTable &,
const ShCompileOptions &,
TIntermBlock *mainBody,
size_t plsBeginPosition)
{}
// Called after rewrite. Injects one-time finalization code that needs to run after all PLS.
virtual void injectPostPLSCode(TCompiler *,
TSymbolTable &,
const ShCompileOptions &,
TIntermBlock *mainBody,
size_t plsEndPosition)
{}
// Called after all other operations have completed.
void injectPixelCoordInitializationCodeIfNeeded(TCompiler *compiler,
TIntermBlock *root,
TIntermBlock *mainBody)
{
if (mGlobalPixelCoord)
{
// Initialize the global pixel coord at the beginning of main():
//
// pixelCoord = ivec2(floor(gl_FragCoord.xy));
//
TIntermTyped *exp;
exp = ReferenceBuiltInVariable(ImmutableString("gl_FragCoord"), *mSymbolTable,
mShaderVersion);
exp = CreateSwizzle(exp, 0, 1);
exp = CreateBuiltInFunctionCallNode("floor", {exp}, *mSymbolTable, mShaderVersion);
exp = TIntermAggregate::CreateConstructor(TType(EbtInt, 2), {exp});
exp = CreateTempAssignmentNode(mGlobalPixelCoord, exp);
mainBody->insertStatement(0, exp);
}
}
protected:
virtual void visitPLSDeclaration(TIntermSymbol *plsSymbol) = 0;
virtual void visitPLSLoad(TIntermSymbol *plsSymbol) = 0;
virtual void visitPLSStore(TIntermSymbol *plsSymbol, TVariable *value) = 0;
// Inserts a global to hold the pixel coordinate as soon as we see PLS declared. This will be
// initialized at the beginning of main().
void ensureGlobalPixelCoordDeclared()
{
if (!mGlobalPixelCoord)
{
TType *coordType = new TType(EbtInt, EbpHigh, EvqGlobal, 2);
mGlobalPixelCoord = CreateTempVariable(mSymbolTable, coordType);
insertStatementInParentBlock(CreateTempDeclarationNode(mGlobalPixelCoord));
}
}
// anglebug.com/42265993: Storing to integer formats with larger-than-representable values has
// different behavior on the various APIs.
//
// This method clamps sub-32-bit integers to the min/max representable values of their format.
void clampPLSVarIfNeeded(TVariable *plsVar, TLayoutImageInternalFormat plsFormat)
{
switch (plsFormat)
{
case EiifRGBA8I:
{
// Clamp r,g,b,a to their min/max 8-bit values:
//
// plsVar = clamp(plsVar, -128, 127)
//
TIntermTyped *newPLSValue = CreateBuiltInFunctionCallNode(
"clamp",
{new TIntermSymbol(plsVar), CreateIndexNode(-128), CreateIndexNode(127)},
*mSymbolTable, mShaderVersion);
insertStatementInParentBlock(CreateTempAssignmentNode(plsVar, newPLSValue));
break;
}
case EiifRGBA8UI:
{
// Clamp r,g,b,a to their max 8-bit values:
//
// plsVar = min(plsVar, 255)
//
TIntermTyped *newPLSValue = CreateBuiltInFunctionCallNode(
"min", {new TIntermSymbol(plsVar), CreateUIntNode(255)}, *mSymbolTable,
mShaderVersion);
insertStatementInParentBlock(CreateTempAssignmentNode(plsVar, newPLSValue));
break;
}
default:
break;
}
}
// Expands an expression to 4 components, filling in the missing components with [0, 0, 0, 1].
static TIntermTyped *Expand(TIntermTyped *expr)
{
const TType &type = expr->getType();
ASSERT(type.getNominalSize() == 1 || type.getNominalSize() == 4);
if (type.getNominalSize() == 1)
{
switch (type.getBasicType())
{
case EbtFloat:
expr = TIntermAggregate::CreateConstructor( // "vec4(r, 0, 0, 1)"
TType(EbtFloat, 4),
{expr, CreateFloatNode(0, EbpLow), CreateFloatNode(0, EbpLow),
CreateFloatNode(1, EbpLow)});
break;
case EbtInt:
expr = TIntermAggregate::CreateConstructor( // "ivec4(r, 0, 0, 1)"
TType(EbtInt, 4),
{expr, CreateIndexNode(0), CreateIndexNode(0), CreateIndexNode(1)});
break;
case EbtUInt:
expr = TIntermAggregate::CreateConstructor( // "uvec4(r, 0, 0, 1)"
TType(EbtUInt, 4),
{expr, CreateUIntNode(0), CreateUIntNode(0), CreateUIntNode(1)});
break;
default:
UNREACHABLE();
break;
}
}
return expr;
}
static TIntermTyped *Expand(TVariable *var) { return Expand(new TIntermSymbol(var)); }
// Returns an expression that swizzles a variable down to 'n' components.
static TIntermTyped *Swizzle(TVariable *var, int n)
{
TIntermTyped *swizzled = new TIntermSymbol(var);
if (var->getType().getNominalSize() != n)
{
ASSERT(var->getType().getNominalSize() > n);
TVector<uint32_t> swizzleOffsets{0, 1, 2, 3};
swizzleOffsets.resize(n);
swizzled = new TIntermSwizzle(swizzled, swizzleOffsets);
}
return swizzled;
}
const TCompiler *const mCompiler;
const ShCompileOptions *const mCompileOptions;
const int mShaderVersion;
// Stores the shader invocation's pixel coordinate as "ivec2(floor(gl_FragCoord.xy))".
TVariable *mGlobalPixelCoord = nullptr;
};
// Rewrites high level PLS operations to shader image operations.
class RewritePLSToImagesTraverser : public RewritePLSTraverser
{
public:
RewritePLSToImagesTraverser(TCompiler *compiler,
TSymbolTable &symbolTable,
const ShCompileOptions &compileOptions,
int shaderVersion)
: RewritePLSTraverser(compiler, symbolTable, compileOptions, shaderVersion)
{}
private:
void visitPLSDeclaration(TIntermSymbol *plsSymbol) override
{
// Replace the PLS declaration with an image2D.
ensureGlobalPixelCoordDeclared();
TVariable *image2D = createPLSImageReplacement(plsSymbol);
mImages.insertNew(plsSymbol, image2D);
queueReplacement(new TIntermDeclaration({new TIntermSymbol(image2D)}),
OriginalNode::IS_DROPPED);
}
// Creates an image2D that replaces a pixel local storage handle.
TVariable *createPLSImageReplacement(const TIntermSymbol *plsSymbol)
{
ASSERT(plsSymbol);
ASSERT(IsPixelLocal(plsSymbol->getBasicType()));
TType *imageType = new TType(plsSymbol->getType());
TLayoutQualifier imageLayoutQualifier = imageType->getLayoutQualifier();
switch (imageLayoutQualifier.imageInternalFormat)
{
case TLayoutImageInternalFormat::EiifRGBA8:
if (!mCompileOptions->pls.supportsNativeRGBA8ImageFormats)
{
imageLayoutQualifier.imageInternalFormat = EiifR32UI;
imageType->setPrecision(EbpHigh);
imageType->setBasicType(EbtUImage2D);
}
else
{
imageType->setBasicType(EbtImage2D);
}
break;
case TLayoutImageInternalFormat::EiifRGBA8I:
if (!mCompileOptions->pls.supportsNativeRGBA8ImageFormats)
{
imageLayoutQualifier.imageInternalFormat = EiifR32I;
imageType->setPrecision(EbpHigh);
}
imageType->setBasicType(EbtIImage2D);
break;
case TLayoutImageInternalFormat::EiifRGBA8UI:
if (!mCompileOptions->pls.supportsNativeRGBA8ImageFormats)
{
imageLayoutQualifier.imageInternalFormat = EiifR32UI;
imageType->setPrecision(EbpHigh);
}
imageType->setBasicType(EbtUImage2D);
break;
case TLayoutImageInternalFormat::EiifR32F:
imageType->setBasicType(EbtImage2D);
break;
case TLayoutImageInternalFormat::EiifR32I:
imageType->setBasicType(EbtIImage2D);
break;
case TLayoutImageInternalFormat::EiifR32UI:
imageType->setBasicType(EbtUImage2D);
break;
default:
UNREACHABLE();
}
ASSERT(mCompileOptions->pls.fragmentSyncType !=
ShFragmentSynchronizationType::NotSupported ||
mCompileOptions->pls.supportsNoncoherent);
const bool wantsNoncoherent = mCompileOptions->pls.supportsNoncoherent &&
plsSymbol->getType().getLayoutQualifier().noncoherent;
const bool supportsRasterOrderQualifier =
mCompileOptions->pls.fragmentSyncType ==
ShFragmentSynchronizationType::RasterizerOrderViews_D3D ||
mCompileOptions->pls.fragmentSyncType ==
ShFragmentSynchronizationType::RasterOrderGroups_Metal;
// Clear the noncoherent qualifier for the image, in case it got copied over from the PLS
// variable. (noncoherent is not valid for images.)
imageLayoutQualifier.noncoherent = false;
imageLayoutQualifier.rasterOrdered = !wantsNoncoherent && supportsRasterOrderQualifier;
if (!wantsNoncoherent)
{
mAllPLSVarsNoncoherent = false;
}
imageType->setLayoutQualifier(imageLayoutQualifier);
TMemoryQualifier memoryQualifier{};
memoryQualifier.coherent = true;
memoryQualifier.restrictQualifier = true;
memoryQualifier.volatileQualifier = false;
// TODO(anglebug.com/40096838): Maybe we could walk the tree first and see which PLS is used
// how. If the PLS is never loaded, we could add a writeonly qualifier, for example.
memoryQualifier.readonly = false;
memoryQualifier.writeonly = false;
imageType->setMemoryQualifier(memoryQualifier);
const TVariable &plsVar = plsSymbol->variable();
return new TVariable(plsVar.uniqueId(), plsVar.name(), plsVar.symbolType(),
plsVar.extensions(), imageType);
}
void visitPLSLoad(TIntermSymbol *plsSymbol) override
{
// Replace the pixelLocalLoadANGLE with imageLoad.
TVariable *image2D = mImages.find(plsSymbol);
ASSERT(mGlobalPixelCoord);
TIntermTyped *pls = CreateBuiltInFunctionCallNode(
"imageLoad", {new TIntermSymbol(image2D), new TIntermSymbol(mGlobalPixelCoord)},
*mSymbolTable, 310);
pls = unpackImageDataIfNecessary(pls, plsSymbol, image2D);
queueReplacement(pls, OriginalNode::IS_DROPPED);
}
// Unpacks the raw PLS data if the output shader language needs r32* packing.
TIntermTyped *unpackImageDataIfNecessary(TIntermTyped *data,
TIntermSymbol *plsSymbol,
TVariable *image2D)
{
TLayoutImageInternalFormat plsFormat =
plsSymbol->getType().getLayoutQualifier().imageInternalFormat;
TLayoutImageInternalFormat imageFormat =
image2D->getType().getLayoutQualifier().imageInternalFormat;
if (plsFormat == imageFormat)
{
return data; // This PLS storage isn't packed.
}
switch (plsFormat)
{
case EiifRGBA8:
ASSERT(!mCompileOptions->pls.supportsNativeRGBA8ImageFormats);
// Unpack and normalize r,g,b,a from a single 32-bit unsigned int:
//
// unpackUnorm4x8(data.r)
//
data = CreateBuiltInFunctionCallNode("unpackUnorm4x8", {CreateSwizzle(data, 0)},
*mSymbolTable, 310);
break;
case EiifRGBA8I:
case EiifRGBA8UI:
{
ASSERT(!mCompileOptions->pls.supportsNativeRGBA8ImageFormats);
constexpr unsigned shifts[] = {24, 16, 8, 0};
// Unpack r,g,b,a form a single (signed or unsigned) 32-bit int. Shift left,
// then right, to preserve the sign for ints. (highp integers are exactly
// 32-bit, two's compliment.)
//
// data.rrrr << uvec4(24, 16, 8, 0) >> 24u
//
data = CreateSwizzle(data, 0, 0, 0, 0);
data = new TIntermBinary(EOpBitShiftLeft, data, CreateUVecNode(shifts, 4, EbpLow));
data = new TIntermBinary(EOpBitShiftRight, data, CreateUIntNode(24));
break;
}
default:
UNREACHABLE();
}
return data;
}
void visitPLSStore(TIntermSymbol *plsSymbol, TVariable *value) override
{
TVariable *image2D = mImages.find(plsSymbol);
TIntermTyped *packedData = clampAndPackPLSDataIfNecessary(value, plsSymbol, image2D);
// Surround the store with memoryBarrierImage calls in order to ensure dependent stores and
// loads in a single shader invocation are coherent. From the ES 3.1 spec:
//
// Using variables declared as "coherent" guarantees only that the results of stores will
// be immediately visible to shader invocations using similarly-declared variables;
// calling MemoryBarrier is required to ensure that the stores are visible to other
// operations.
//
insertStatementsInParentBlock(
{CreateBuiltInFunctionCallNode("memoryBarrierImage", {}, *mSymbolTable,
310)}, // Before.
{CreateBuiltInFunctionCallNode("memoryBarrierImage", {}, *mSymbolTable,
310)}); // After.
// Rewrite the pixelLocalStoreANGLE with imageStore.
ASSERT(mGlobalPixelCoord);
queueReplacement(
CreateBuiltInFunctionCallNode(
"imageStore",
{new TIntermSymbol(image2D), new TIntermSymbol(mGlobalPixelCoord), packedData},
*mSymbolTable, 310),
OriginalNode::IS_DROPPED);
}
// Packs the PLS to raw data if the output shader language needs r32* packing.
TIntermTyped *clampAndPackPLSDataIfNecessary(TVariable *plsVar,
TIntermSymbol *plsSymbol,
TVariable *image2D)
{
TLayoutImageInternalFormat plsFormat =
plsSymbol->getType().getLayoutQualifier().imageInternalFormat;
clampPLSVarIfNeeded(plsVar, plsFormat);
TIntermTyped *result = new TIntermSymbol(plsVar);
TLayoutImageInternalFormat imageFormat =
image2D->getType().getLayoutQualifier().imageInternalFormat;
if (plsFormat == imageFormat)
{
return result; // This PLS storage isn't packed.
}
switch (plsFormat)
{
case EiifRGBA8:
{
ASSERT(!mCompileOptions->pls.supportsNativeRGBA8ImageFormats);
if (mCompileOptions->passHighpToPackUnormSnormBuiltins)
{
// anglebug.com/42265995: unpackUnorm4x8 doesn't work on Pixel 4 when passed
// a mediump vec4. Use an intermediate highp vec4.
//
// It's safe to inject a variable here because it happens right before
// pixelLocalStoreANGLE, which returns type void. (See visitAggregate.)
TType *highpType = new TType(EbtFloat, EbpHigh, EvqTemporary, 4);
TVariable *workaroundHighpVar = CreateTempVariable(mSymbolTable, highpType);
insertStatementInParentBlock(
CreateTempInitDeclarationNode(workaroundHighpVar, result));
result = new TIntermSymbol(workaroundHighpVar);
}
// Denormalize and pack r,g,b,a into a single 32-bit unsigned int:
//
// packUnorm4x8(workaroundHighpVar)
//
result =
CreateBuiltInFunctionCallNode("packUnorm4x8", {result}, *mSymbolTable, 310);
break;
}
case EiifRGBA8I:
case EiifRGBA8UI:
{
ASSERT(!mCompileOptions->pls.supportsNativeRGBA8ImageFormats);
if (plsFormat == EiifRGBA8I)
{
// Mask off extra sign bits beyond 8.
//
// plsVar &= 0xff
//
insertStatementInParentBlock(new TIntermBinary(
EOpBitwiseAndAssign, new TIntermSymbol(plsVar), CreateIndexNode(0xff)));
}
// Pack r,g,b,a into a single 32-bit (signed or unsigned) int:
//
// r | (g << 8) | (b << 16) | (a << 24)
//
// Note that this calculation needs to be done in highp, which coincidentally works
// out as |CreateUIntNode| returns a highp constant.
//
auto shiftComponent = [=](int componentIdx) {
return new TIntermBinary(EOpBitShiftLeft,
CreateSwizzle(new TIntermSymbol(plsVar), componentIdx),
CreateUIntNode(componentIdx * 8));
};
result = CreateSwizzle(result, 0);
result = new TIntermBinary(EOpBitwiseOr, result, shiftComponent(1));
result = new TIntermBinary(EOpBitwiseOr, result, shiftComponent(2));
result = new TIntermBinary(EOpBitwiseOr, result, shiftComponent(3));
break;
}
default:
UNREACHABLE();
}
// Convert the packed data to a {u,i}vec4 for imageStore.
TType imageStoreType(DataTypeOfImageType(image2D->getType().getBasicType()), 4);
return TIntermAggregate::CreateConstructor(imageStoreType, {result});
}
void injectPrePLSCode(TCompiler *compiler,
TSymbolTable &symbolTable,
const ShCompileOptions &compileOptions,
TIntermBlock *mainBody,
size_t plsBeginPosition) override
{
// When PLS is implemented with images, early_fragment_tests ensure that depth/stencil
// can also block stores to PLS.
compiler->specifyEarlyFragmentTests();
if (!mAllPLSVarsNoncoherent)
{
// Delimit the beginning of a per-pixel critical section, if supported. This makes pixel
// local storage coherent.
//
// Either: GL_NV_fragment_shader_interlock
// GL_INTEL_fragment_shader_ordering
// GL_ARB_fragment_shader_interlock (may compile to
// SPV_EXT_fragment_shader_interlock)
switch (compileOptions.pls.fragmentSyncType)
{
// Raster ordered resources don't need explicit synchronization calls.
case ShFragmentSynchronizationType::RasterizerOrderViews_D3D:
case ShFragmentSynchronizationType::RasterOrderGroups_Metal:
case ShFragmentSynchronizationType::NotSupported:
break;
case ShFragmentSynchronizationType::FragmentShaderInterlock_NV_GL:
mainBody->insertStatement(
plsBeginPosition,
CreateBuiltInFunctionCallNode("beginInvocationInterlockNV", {}, symbolTable,
kESSLInternalBackendBuiltIns));
break;
case ShFragmentSynchronizationType::FragmentShaderOrdering_INTEL_GL:
mainBody->insertStatement(
plsBeginPosition,
CreateBuiltInFunctionCallNode("beginFragmentShaderOrderingINTEL", {},
symbolTable, kESSLInternalBackendBuiltIns));
break;
case ShFragmentSynchronizationType::FragmentShaderInterlock_ARB_GL:
mainBody->insertStatement(
plsBeginPosition,
CreateBuiltInFunctionCallNode("beginInvocationInterlockARB", {},
symbolTable, kESSLInternalBackendBuiltIns));
break;
default:
UNREACHABLE();
}
}
}
void injectPostPLSCode(TCompiler *,
TSymbolTable &symbolTable,
const ShCompileOptions &compileOptions,
TIntermBlock *mainBody,
size_t plsEndPosition) override
{
if (!mAllPLSVarsNoncoherent)
{
// Delimit the end of the PLS critical section, if required.
//
// Either: GL_NV_fragment_shader_interlock
// GL_ARB_fragment_shader_interlock (may compile to
// SPV_EXT_fragment_shader_interlock)
switch (compileOptions.pls.fragmentSyncType)
{
// Raster ordered resources don't need explicit synchronization calls.
case ShFragmentSynchronizationType::RasterizerOrderViews_D3D:
case ShFragmentSynchronizationType::RasterOrderGroups_Metal:
// GL_INTEL_fragment_shader_ordering doesn't have an "end()" call.
case ShFragmentSynchronizationType::FragmentShaderOrdering_INTEL_GL:
case ShFragmentSynchronizationType::NotSupported:
break;
case ShFragmentSynchronizationType::FragmentShaderInterlock_NV_GL:
mainBody->insertStatement(
plsEndPosition,
CreateBuiltInFunctionCallNode("endInvocationInterlockNV", {}, symbolTable,
kESSLInternalBackendBuiltIns));
break;
case ShFragmentSynchronizationType::FragmentShaderInterlock_ARB_GL:
mainBody->insertStatement(
plsEndPosition,
CreateBuiltInFunctionCallNode("endInvocationInterlockARB", {}, symbolTable,
kESSLInternalBackendBuiltIns));
break;
default:
UNREACHABLE();
}
}
}
PLSBackingStoreMap<TVariable *> mImages;
bool mAllPLSVarsNoncoherent = true;
};
// Rewrites high level PLS operations to framebuffer fetch operations.
class RewritePLSToFramebufferFetchTraverser : public RewritePLSTraverser
{
public:
RewritePLSToFramebufferFetchTraverser(TCompiler *compiler,
TSymbolTable &symbolTable,
const ShCompileOptions &compileOptions,
int shaderVersion)
: RewritePLSTraverser(compiler, symbolTable, compileOptions, shaderVersion)
{}
void visitPLSDeclaration(TIntermSymbol *plsSymbol) override
{
// Replace the PLS declaration with a framebuffer attachment.
PLSAttachment attachment(mCompiler, mSymbolTable, *mCompileOptions, plsSymbol->variable());
mPLSAttachments.insertNew(plsSymbol, attachment);
insertStatementInParentBlock(
new TIntermDeclaration({new TIntermSymbol(attachment.fragmentVar)}));
queueReplacement(CreateTempDeclarationNode(attachment.accessVar), OriginalNode::IS_DROPPED);
}
void visitPLSLoad(TIntermSymbol *plsSymbol) override
{
// Read our temporary accessVar.
const PLSAttachment &attachment = mPLSAttachments.find(plsSymbol);
queueReplacement(Expand(attachment.accessVar), OriginalNode::IS_DROPPED);
}
void visitPLSStore(TIntermSymbol *plsSymbol, TVariable *value) override
{
// Set our temporary accessVar.
const PLSAttachment &attachment = mPLSAttachments.find(plsSymbol);
queueReplacement(CreateTempAssignmentNode(attachment.accessVar, attachment.swizzle(value)),
OriginalNode::IS_DROPPED);
}
void injectPrePLSCode(TCompiler *compiler,
TSymbolTable &symbolTable,
const ShCompileOptions &compileOptions,
TIntermBlock *mainBody,
size_t plsBeginPosition) override
{
// [OpenGL ES Version 3.0.6, 3.9.2.3 "Shader Output"]: Any colors, or color components,
// associated with a fragment that are not written by the fragment shader are undefined.
//
// [EXT_shader_framebuffer_fetch]: Prior to fragment shading, fragment outputs declared
// inout are populated with the value last written to the framebuffer at the same(x, y,
// sample) position.
//
// It's unclear from the EXT_shader_framebuffer_fetch spec whether inout fragment variables
// become undefined if not explicitly written, but either way, when this compiles to subpass
// loads in Vulkan, we definitely get undefined behavior if PLS variables are not written.
//
// To make sure every PLS variable gets written, we read them all before PLS operations,
// then write them all back out after all PLS is complete.
TIntermSequence plsPreloads;
plsPreloads.reserve(mPLSAttachments.bindingOrderedMap().size());
for (const auto &entry : mPLSAttachments.bindingOrderedMap())
{
const PLSAttachment &attachment = entry.second;
plsPreloads.push_back(
CreateTempAssignmentNode(attachment.accessVar, attachment.swizzleFragmentVar()));
}
mainBody->getSequence()->insert(mainBody->getSequence()->begin() + plsBeginPosition,
plsPreloads.begin(), plsPreloads.end());
}
void injectPostPLSCode(TCompiler *,
TSymbolTable &symbolTable,
const ShCompileOptions &compileOptions,
TIntermBlock *mainBody,
size_t plsEndPosition) override
{
TIntermSequence plsWrites;
plsWrites.reserve(mPLSAttachments.bindingOrderedMap().size());
for (const auto &entry : mPLSAttachments.bindingOrderedMap())
{
const PLSAttachment &attachment = entry.second;
plsWrites.push_back(new TIntermBinary(EOpAssign, attachment.swizzleFragmentVar(),
new TIntermSymbol(attachment.accessVar)));
}
mainBody->getSequence()->insert(mainBody->getSequence()->begin() + plsEndPosition,
plsWrites.begin(), plsWrites.end());
}
private:
struct PLSAttachment
{
PLSAttachment(const TCompiler *compiler,
TSymbolTable *symbolTable,
const ShCompileOptions &compileOptions,
const TVariable &plsVar)
{
const TType &plsType = plsVar.getType();
TType *accessVarType;
switch (plsType.getLayoutQualifier().imageInternalFormat)
{
default:
UNREACHABLE();
[[fallthrough]];
case EiifRGBA8:
accessVarType = new TType(EbtFloat, 4);
break;
case EiifRGBA8I:
accessVarType = new TType(EbtInt, 4);
break;
case EiifRGBA8UI:
accessVarType = new TType(EbtUInt, 4);
break;
case EiifR32F:
accessVarType = new TType(EbtFloat, 1);
break;
case EiifR32I:
accessVarType = new TType(EbtInt, 1);
break;
case EiifR32UI:
accessVarType = new TType(EbtUInt, 1);
break;
}
accessVarType->setPrecision(plsType.getPrecision());
accessVar = CreateTempVariable(symbolTable, accessVarType);
// Qualcomm seems to want fragment outputs to be 4-component vectors, and produces a
// compile error from "inout uint". Our Metal translator also saturates color outputs to
// 4 components. And since the spec also seems silent on how many components an output
// must have, we always use 4.
TType *fragmentVarType = new TType(accessVarType->getBasicType(), 4);
fragmentVarType->setPrecision(plsType.getPrecision());
fragmentVarType->setQualifier(EvqFragmentInOut);
// PLS attachments are bound in reverse order from the rear.
TLayoutQualifier layoutQualifier = TLayoutQualifier::Create();
layoutQualifier.location =
compiler->getResources().MaxCombinedDrawBuffersAndPixelLocalStoragePlanes -
plsType.getLayoutQualifier().binding - 1;
layoutQualifier.locationsSpecified = 1;
ASSERT(compileOptions.pls.fragmentSyncType !=
ShFragmentSynchronizationType::NotSupported ||
compileOptions.pls.supportsNoncoherent);
if (compileOptions.pls.supportsNoncoherent)
{
// EXT_shader_framebuffer_fetch_non_coherent requires the "noncoherent" qualifier if
// the coherent version of the extension isn't supported. The extension also allows
// us to opt into noncoherent accesses when PLS planes are specified as noncoherent
// by the shader.
//
// (Without EXT_shader_framebuffer_fetch_non_coherent, we just ignore the
// noncoherent qualifier, which is a perfectly valid implementation of the PLS
// spec.)
layoutQualifier.noncoherent = plsType.getLayoutQualifier().noncoherent ||
compileOptions.pls.fragmentSyncType ==
ShFragmentSynchronizationType::NotSupported;
}
fragmentVarType->setLayoutQualifier(layoutQualifier);
fragmentVar = new TVariable(plsVar.uniqueId(), plsVar.name(), plsVar.symbolType(),
plsVar.extensions(), fragmentVarType);
}
// Swizzles a variable down to the same number of components as the PLS internalformat.
TIntermTyped *swizzle(TVariable *var) const
{
return Swizzle(var, accessVar->getType().getNominalSize());
}
TIntermTyped *swizzleFragmentVar() const { return swizzle(fragmentVar); }
TVariable *fragmentVar;
TVariable *accessVar;
};
PLSBackingStoreMap<PLSAttachment> mPLSAttachments;
};
} // anonymous namespace
bool RewritePixelLocalStorage(TCompiler *compiler,
TIntermBlock *root,
TSymbolTable &symbolTable,
const ShCompileOptions &compileOptions,
int shaderVersion)
{
// If any functions take PLS arguments, monomorphize the functions by removing said parameters
// and making the PLS calls from main() instead, using the global uniform from the call site
// instead of the function argument. This is necessary because function arguments don't carry
// the necessary "binding" or "format" layout qualifiers.
if (!MonomorphizeUnsupportedFunctions(
compiler, root, &symbolTable,
UnsupportedFunctionArgsBitSet{UnsupportedFunctionArgs::PixelLocalStorage}))
{
return false;
}
TIntermBlock *mainBody = FindMainBody(root);
std::unique_ptr<RewritePLSTraverser> traverser;
switch (compileOptions.pls.type)
{
case ShPixelLocalStorageType::ImageLoadStore:
traverser = std::make_unique<RewritePLSToImagesTraverser>(
compiler, symbolTable, compileOptions, shaderVersion);
break;
case ShPixelLocalStorageType::FramebufferFetch:
traverser = std::make_unique<RewritePLSToFramebufferFetchTraverser>(
compiler, symbolTable, compileOptions, shaderVersion);
break;
case ShPixelLocalStorageType::NotSupported:
UNREACHABLE();
return false;
}
// Rewrite PLS operations.
root->traverse(traverser.get());
if (!traverser->updateTree(compiler, root))
{
return false;
}
// Inject the code that needs to run before and after all PLS operations.
// TODO(anglebug.com/40096838): Inject these functions in a tight critical section, instead of
// just locking the entire main() function:
// - Monomorphize all PLS calls into main().
// - Insert begin/end calls around the first/last PLS calls (and outside of flow control).
const size_t plsBeginPos = 0;
traverser->injectPrePLSCode(compiler, symbolTable, compileOptions, mainBody, plsBeginPos);
size_t plsEndPos = mainBody->getChildCount();
if (plsEndPos > 0 && mainBody->getChildNode(plsEndPos - 1)->getAsBranchNode() != nullptr)
{
// Make sure not to add code after terminating return, if any.
--plsEndPos;
}
traverser->injectPostPLSCode(compiler, symbolTable, compileOptions, mainBody, plsEndPos);
// Assign the global pixel coord at the beginning of main(), if used.
traverser->injectPixelCoordInitializationCodeIfNeeded(compiler, root, mainBody);
return compiler->validateAST(root);
}
} // namespace sh