blob: 34bc2d367013e5a14c4239713db2dbe7d845eb35 [file] [log] [blame]
//
// Copyright 2022 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
#include "compiler/translator/tree_ops/RewritePixelLocalStorage.h"
#include "common/angleutils.h"
#include "compiler/translator/StaticType.h"
#include "compiler/translator/SymbolTable.h"
#include "compiler/translator/tree_ops/MonomorphizeUnsupportedFunctions.h"
#include "compiler/translator/tree_util/BuiltIn.h"
#include "compiler/translator/tree_util/FindMain.h"
#include "compiler/translator/tree_util/IntermNode_util.h"
#include "compiler/translator/tree_util/IntermTraverse.h"
namespace sh
{
namespace
{
constexpr static TBasicType DataTypeOfPLSType(TBasicType plsType)
{
switch (plsType)
{
case EbtPixelLocalANGLE:
return EbtFloat;
case EbtIPixelLocalANGLE:
return EbtInt;
case EbtUPixelLocalANGLE:
return EbtUInt;
default:
UNREACHABLE();
return EbtVoid;
}
}
constexpr static TBasicType DataTypeOfImageType(TBasicType imageType)
{
switch (imageType)
{
case EbtImage2D:
return EbtFloat;
case EbtIImage2D:
return EbtInt;
case EbtUImage2D:
return EbtUInt;
default:
UNREACHABLE();
return EbtVoid;
}
}
// Delimits the beginning of a per-pixel critical section. Makes pixel local storage coherent.
//
// Either: GL_NV_fragment_shader_interlock
// GL_INTEL_fragment_shader_ordering
// GL_ARB_fragment_shader_interlock (also compiles to SPV_EXT_fragment_shader_interlock)
static TIntermNode *CreateBuiltInInterlockBeginCall(const ShCompileOptions &compileOptions,
TSymbolTable &symbolTable)
{
switch (compileOptions.pls.fragmentSynchronizationType)
{
case ShFragmentSynchronizationType::FragmentShaderInterlock_NV_GL:
return CreateBuiltInFunctionCallNode("beginInvocationInterlockNV", {}, symbolTable,
kESSLInternalBackendBuiltIns);
case ShFragmentSynchronizationType::FragmentShaderOrdering_INTEL_GL:
return CreateBuiltInFunctionCallNode("beginFragmentShaderOrderingINTEL", {},
symbolTable, kESSLInternalBackendBuiltIns);
case ShFragmentSynchronizationType::FragmentShaderInterlock_ARB_GL:
return CreateBuiltInFunctionCallNode("beginInvocationInterlockARB", {}, symbolTable,
kESSLInternalBackendBuiltIns);
default:
return nullptr;
}
}
// Delimits the end of a per-pixel critical section. Makes pixel local storage coherent.
//
// Either: GL_NV_fragment_shader_interlock
// GL_ARB_fragment_shader_interlock (also compiles to SPV_EXT_fragment_shader_interlock)
//
// GL_INTEL_fragment_shader_ordering doesn't have an "end()" delimiter.
static TIntermNode *CreateBuiltInInterlockEndCall(const ShCompileOptions &compileOptions,
TSymbolTable &symbolTable)
{
switch (compileOptions.pls.fragmentSynchronizationType)
{
case ShFragmentSynchronizationType::FragmentShaderInterlock_NV_GL:
return CreateBuiltInFunctionCallNode("endInvocationInterlockNV", {}, symbolTable,
kESSLInternalBackendBuiltIns);
case ShFragmentSynchronizationType::FragmentShaderOrdering_INTEL_GL:
return nullptr; // GL_INTEL_fragment_shader_ordering doesn't have an "end()" call.
case ShFragmentSynchronizationType::FragmentShaderInterlock_ARB_GL:
return CreateBuiltInFunctionCallNode("endInvocationInterlockARB", {}, symbolTable,
kESSLInternalBackendBuiltIns);
default:
return nullptr;
}
}
// Rewrites high level PLS operations to shader image operations.
class RewriteToImagesTraverser : public TIntermTraverser
{
public:
RewriteToImagesTraverser(TSymbolTable &symbolTable,
const ShCompileOptions &compileOptions,
int shaderVersion)
: TIntermTraverser(true, false, false, &symbolTable),
mCompileOptions(&compileOptions),
mShaderVersion(shaderVersion)
{}
bool visitDeclaration(Visit, TIntermDeclaration *decl) override
{
TIntermTyped *declVariable = (decl->getSequence())->front()->getAsTyped();
ASSERT(declVariable);
if (!IsPixelLocal(declVariable->getBasicType()))
{
return true;
}
// PLS is not allowed in arrays.
ASSERT(!declVariable->isArray());
// This visitDeclaration doesn't get called for function arguments, and opaque types can
// otherwise only be uniforms.
ASSERT(declVariable->getQualifier() == EvqUniform);
TIntermSymbol *plsSymbol = declVariable->getAsSymbolNode();
ASSERT(plsSymbol);
// Insert a global to hold the pixel coordinate as soon as we see PLS declared. This will be
// initialized at the beginning of main().
if (!mGlobalPixelCoord)
{
TType *coordType = new TType(EbtInt, EbpHigh, EvqGlobal, 2);
mGlobalPixelCoord = CreateTempVariable(mSymbolTable, coordType);
insertStatementInParentBlock(CreateTempDeclarationNode(mGlobalPixelCoord));
}
// Replace the PLS declaration with an image2D.
TVariable *image2D = createPLSImageReplacement(plsSymbol);
insertNewPLSImage(plsSymbol, image2D);
queueReplacement(new TIntermDeclaration({new TIntermSymbol(image2D)}),
OriginalNode::IS_DROPPED);
return false;
}
bool visitAggregate(Visit, TIntermAggregate *aggregate) override
{
if (!BuiltInGroup::IsPixelLocal(aggregate->getOp()))
{
return true;
}
const TIntermSequence &args = *aggregate->getSequence();
ASSERT(args.size() >= 1);
TIntermSymbol *plsSymbol = args[0]->getAsSymbolNode();
TVariable *image2D = findPLSImage(plsSymbol);
ASSERT(mGlobalPixelCoord);
// Rewrite pixelLocalLoadANGLE -> imageLoad.
if (aggregate->getOp() == EOpPixelLocalLoadANGLE)
{
// Replace the pixelLocalLoadANGLE with imageLoad.
TIntermTyped *pls;
pls = CreateBuiltInFunctionCallNode(
"imageLoad", {new TIntermSymbol(image2D), new TIntermSymbol(mGlobalPixelCoord)},
*mSymbolTable, mShaderVersion);
pls = unpackImageDataIfNecessary(pls, plsSymbol, image2D);
queueReplacement(pls, OriginalNode::IS_DROPPED);
return false; // No need to recurse since this node is being dropped.
}
// Rewrite pixelLocalStoreANGLE -> imageStore.
if (aggregate->getOp() == EOpPixelLocalStoreANGLE)
{
// Surround the store with memoryBarrierImage calls in order to ensure dependent stores
// and loads in a single shader invocation are coherent. From the ES 3.1 spec:
//
// Using variables declared as "coherent" guarantees only that the results of stores
// will be immediately visible to shader invocations using similarly-declared
// variables; calling MemoryBarrier is required to ensure that the stores are visible
// to other operations.
//
// Also hoist the 'value' expression into a temp. In the event of
// "pixelLocalStoreANGLE(..., pixelLocalLoadANGLE(...))", this ensures the load occurs
// _before_ the memoryBarrierImage.
//
// NOTE: It is generally unsafe to hoist function arguments due to short circuiting,
// e.g., "if (false && function(...))", but pixelLocalStoreANGLE returns type void, so
// it is safe in this particular case.
TType *valueType = new TType(DataTypeOfPLSType(plsSymbol->getBasicType()),
plsSymbol->getPrecision(), EvqTemporary, 4);
TVariable *valueVar = CreateTempVariable(mSymbolTable, valueType);
TIntermDeclaration *valueDecl =
CreateTempInitDeclarationNode(valueVar, args[1]->getAsTyped());
valueDecl->traverse(this); // Rewrite any potential pixelLocalLoadANGLEs in valueDecl.
insertStatementsInParentBlock(
{valueDecl, CreateBuiltInFunctionCallNode("memoryBarrierImage", {}, *mSymbolTable,
mShaderVersion)}, // Before.
{CreateBuiltInFunctionCallNode("memoryBarrierImage", {}, *mSymbolTable,
mShaderVersion)}); // After.
// Rewrite the pixelLocalStoreANGLE with imageStore.
queueReplacement(CreateBuiltInFunctionCallNode(
"imageStore",
{new TIntermSymbol(image2D), new TIntermSymbol(mGlobalPixelCoord),
clampAndPackPLSDataIfNecessary(valueVar, plsSymbol, image2D)},
*mSymbolTable, mShaderVersion),
OriginalNode::IS_DROPPED);
return false; // No need to recurse since this node is being dropped.
}
return true;
}
TVariable *globalPixelCoord() const { return mGlobalPixelCoord; }
private:
// Do all PLS formats need to be packed into r32f, r32i, or r32ui image2Ds?
bool needsR32Packing() const
{
return mCompileOptions->pls.type == ShPixelLocalStorageType::ImageStoreR32PackedFormats;
}
// Sets the given image2D as the backing storage for the plsSymbol's binding point. An entry
// must not already exist in the map for this binding point.
void insertNewPLSImage(TIntermSymbol *plsSymbol, TVariable *image2D)
{
ASSERT(plsSymbol);
ASSERT(IsPixelLocal(plsSymbol->getBasicType()));
int binding = plsSymbol->getType().getLayoutQualifier().binding;
ASSERT(binding >= 0);
auto result = mPLSImages.insert({binding, image2D});
ASSERT(result.second); // Ensure an image didn't already exist for this symbol.
}
// Looks up the image2D backing storage for the given plsSymbol's binding point. An entry
// must already exist in the map for this binding point.
TVariable *findPLSImage(TIntermSymbol *plsSymbol)
{
ASSERT(plsSymbol);
ASSERT(IsPixelLocal(plsSymbol->getBasicType()));
int binding = plsSymbol->getType().getLayoutQualifier().binding;
ASSERT(binding >= 0);
auto iter = mPLSImages.find(binding);
ASSERT(iter != mPLSImages.end()); // Ensure PLSImages already exist for this symbol.
return iter->second;
}
// Creates an image2D that replaces a pixel local storage handle.
TVariable *createPLSImageReplacement(const TIntermSymbol *plsSymbol)
{
ASSERT(plsSymbol);
ASSERT(IsPixelLocal(plsSymbol->getBasicType()));
TType *imageType = new TType(plsSymbol->getType());
TLayoutQualifier layoutQualifier = imageType->getLayoutQualifier();
switch (layoutQualifier.imageInternalFormat)
{
case TLayoutImageInternalFormat::EiifRGBA8:
if (needsR32Packing())
{
layoutQualifier.imageInternalFormat = EiifR32UI;
imageType->setPrecision(EbpHigh);
imageType->setBasicType(EbtUImage2D);
}
else
{
imageType->setBasicType(EbtImage2D);
}
break;
case TLayoutImageInternalFormat::EiifRGBA8I:
if (needsR32Packing())
{
layoutQualifier.imageInternalFormat = EiifR32I;
imageType->setPrecision(EbpHigh);
}
imageType->setBasicType(EbtIImage2D);
break;
case TLayoutImageInternalFormat::EiifRGBA8UI:
if (needsR32Packing())
{
layoutQualifier.imageInternalFormat = EiifR32UI;
imageType->setPrecision(EbpHigh);
}
imageType->setBasicType(EbtUImage2D);
break;
case TLayoutImageInternalFormat::EiifR32F:
imageType->setBasicType(EbtImage2D);
break;
case TLayoutImageInternalFormat::EiifR32UI:
imageType->setBasicType(EbtUImage2D);
break;
default:
UNREACHABLE();
}
layoutQualifier.rasterOrdered = mCompileOptions->pls.fragmentSynchronizationType ==
ShFragmentSynchronizationType::RasterizerOrderViews_D3D;
imageType->setLayoutQualifier(layoutQualifier);
TMemoryQualifier memoryQualifier{};
memoryQualifier.coherent = true;
memoryQualifier.restrictQualifier = true;
memoryQualifier.volatileQualifier = false;
// TODO(anglebug.com/7279): Maybe we could walk the tree first and see which PLS is used
// how. If the PLS is never loaded, we could add a writeonly qualifier, for example.
memoryQualifier.readonly = false;
memoryQualifier.writeonly = false;
imageType->setMemoryQualifier(memoryQualifier);
const TVariable &plsVar = plsSymbol->variable();
return new TVariable(plsVar.uniqueId(), plsVar.name(), plsVar.symbolType(),
plsVar.extensions(), imageType);
}
// Unpacks the raw PLS data if the output shader language needs r32* packing.
TIntermTyped *unpackImageDataIfNecessary(TIntermTyped *data,
TIntermSymbol *plsSymbol,
TVariable *image2D)
{
TLayoutImageInternalFormat plsFormat =
plsSymbol->getType().getLayoutQualifier().imageInternalFormat;
TLayoutImageInternalFormat imageFormat =
image2D->getType().getLayoutQualifier().imageInternalFormat;
if (plsFormat == imageFormat)
{
return data; // This PLS storage isn't packed.
}
ASSERT(needsR32Packing());
switch (plsFormat)
{
case EiifRGBA8:
// Unpack and normalize r,g,b,a from a single 32-bit unsigned int:
//
// unpackUnorm4x8(data.r)
//
data = CreateBuiltInFunctionCallNode("unpackUnorm4x8", {CreateSwizzle(data, 0)},
*mSymbolTable, mShaderVersion);
break;
case EiifRGBA8I:
case EiifRGBA8UI:
{
constexpr unsigned shifts[] = {24, 16, 8, 0};
// Unpack r,g,b,a form a single (signed or unsigned) 32-bit int. Shift left,
// then right, to preserve the sign for ints. (highp integers are exactly
// 32-bit, two's compliment.)
//
// data.rrrr << uvec4(24, 16, 8, 0) >> 24u
//
data = CreateSwizzle(data, 0, 0, 0, 0);
data = new TIntermBinary(EOpBitShiftLeft, data, CreateUVecNode(shifts, 4, EbpHigh));
data = new TIntermBinary(EOpBitShiftRight, data, CreateUIntNode(24));
break;
}
default:
UNREACHABLE();
}
return data;
}
// Packs the PLS to raw data if the output shader language needs r32* packing.
TIntermTyped *clampAndPackPLSDataIfNecessary(TVariable *plsVar,
TIntermSymbol *plsSymbol,
TVariable *image2D)
{
TLayoutImageInternalFormat plsFormat =
plsSymbol->getType().getLayoutQualifier().imageInternalFormat;
// anglebug.com/7524: Storing to integer formats with values larger than can be represented
// is specified differently on different APIs. Clamp integer formats here to make it uniform
// and more GL-like.
switch (plsFormat)
{
case EiifRGBA8I:
{
// Clamp r,g,b,a to their min/max 8-bit values:
//
// plsVar = clamp(plsVar, -128, 127) & 0xff
//
TIntermTyped *newPLSValue = CreateBuiltInFunctionCallNode(
"clamp",
{new TIntermSymbol(plsVar), CreateIndexNode(-128), CreateIndexNode(127)},
*mSymbolTable, mShaderVersion);
insertStatementInParentBlock(CreateTempAssignmentNode(plsVar, newPLSValue));
break;
}
case EiifRGBA8UI:
{
// Clamp r,g,b,a to their max 8-bit values:
//
// plsVar = min(plsVar, 255)
//
TIntermTyped *newPLSValue = CreateBuiltInFunctionCallNode(
"min", {new TIntermSymbol(plsVar), CreateUIntNode(255)}, *mSymbolTable,
mShaderVersion);
insertStatementInParentBlock(CreateTempAssignmentNode(plsVar, newPLSValue));
break;
}
default:
break;
}
TIntermTyped *result = new TIntermSymbol(plsVar);
TLayoutImageInternalFormat imageFormat =
image2D->getType().getLayoutQualifier().imageInternalFormat;
if (plsFormat == imageFormat)
{
return result; // This PLS storage isn't packed.
}
ASSERT(needsR32Packing());
switch (plsFormat)
{
case EiifRGBA8:
{
if (mCompileOptions->passHighpToPackUnormSnormBuiltins)
{
// anglebug.com/7527: unpackUnorm4x8 doesn't work on Pixel 4 when passed
// a mediump vec4. Use an intermediate highp vec4.
//
// It's safe to inject a variable here because it happens right before
// pixelLocalStoreANGLE, which returns type void. (See visitAggregate.)
TType *highpType = new TType(EbtFloat, EbpHigh, EvqTemporary, 4);
TVariable *workaroundHighpVar = CreateTempVariable(mSymbolTable, highpType);
insertStatementInParentBlock(
CreateTempInitDeclarationNode(workaroundHighpVar, result));
result = new TIntermSymbol(workaroundHighpVar);
}
// Denormalize and pack r,g,b,a into a single 32-bit unsigned int:
//
// packUnorm4x8(workaroundHighpVar)
//
result = CreateBuiltInFunctionCallNode("packUnorm4x8", {result}, *mSymbolTable,
mShaderVersion);
break;
}
case EiifRGBA8I:
case EiifRGBA8UI:
{
if (plsFormat == EiifRGBA8I)
{
// Mask off extra sign bits beyond 8.
//
// plsVar &= 0xff
//
insertStatementInParentBlock(new TIntermBinary(
EOpBitwiseAndAssign, new TIntermSymbol(plsVar), CreateIndexNode(0xff)));
}
// Pack r,g,b,a into a single 32-bit (signed or unsigned) int:
//
// r | (g << 8) | (b << 16) | (a << 24)
//
auto shiftComponent = [=](int componentIdx) {
return new TIntermBinary(EOpBitShiftLeft,
CreateSwizzle(new TIntermSymbol(plsVar), componentIdx),
CreateUIntNode(componentIdx * 8));
};
result = CreateSwizzle(result, 0);
result = new TIntermBinary(EOpBitwiseOr, result, shiftComponent(1));
result = new TIntermBinary(EOpBitwiseOr, result, shiftComponent(2));
result = new TIntermBinary(EOpBitwiseOr, result, shiftComponent(3));
break;
}
default:
UNREACHABLE();
}
// Convert the packed data to a {u,i}vec4 for imageStore.
TType imageStoreType(DataTypeOfImageType(image2D->getType().getBasicType()), 4);
return TIntermAggregate::CreateConstructor(imageStoreType, {result});
}
const ShCompileOptions *const mCompileOptions;
const int mShaderVersion;
// Stores the shader invocation's pixel coordinate as "ivec2(floor(gl_FragCoord.xy))".
TVariable *mGlobalPixelCoord = nullptr;
// Maps PLS variables to their image2D backing storage.
angle::HashMap<int, TVariable *> mPLSImages;
};
} // anonymous namespace
bool RewritePixelLocalStorageToImages(TCompiler *compiler,
TIntermBlock *root,
TSymbolTable &symbolTable,
const ShCompileOptions &compileOptions,
int shaderVersion)
{
// If any functions take PLS arguments, monomorphize the functions by removing said parameters
// and making the PLS calls from main() instead, using the global uniform from the call site
// instead of the function argument. This is necessary because function arguments don't carry
// the necessary "binding" or "format" layout qualifiers.
if (!MonomorphizeUnsupportedFunctions(
compiler, root, &symbolTable, compileOptions,
UnsupportedFunctionArgsBitSet{UnsupportedFunctionArgs::PixelLocalStorage}))
{
return false;
}
TIntermBlock *mainBody = FindMainBody(root);
// Surround the critical section of PLS operations in fragment synchronization calls, if
// supported. This makes pixel local storage coherent.
TIntermNode *interlockBeginCall = CreateBuiltInInterlockBeginCall(compileOptions, symbolTable);
if (interlockBeginCall)
{
// TODO(anglebug.com/7279): Inject these functions in a tight critical section, instead of
// just locking the entire main() function:
// - Monomorphize all PLS calls into main().
// - Insert begin/end calls around the first/last PLS calls (and outside of flow control).
mainBody->insertStatement(0, interlockBeginCall);
TIntermNode *interlockEndCall = CreateBuiltInInterlockEndCall(compileOptions, symbolTable);
if (interlockEndCall)
{
// Not all fragment synchronization extensions have an end() call.
mainBody->appendStatement(interlockEndCall);
}
}
// Rewrite PLS operations to image operations.
RewriteToImagesTraverser traverser(symbolTable, compileOptions, shaderVersion);
root->traverse(&traverser);
if (!traverser.updateTree(compiler, root))
{
return false;
}
// Initialize the global pixel coord at the beginning of main():
//
// pixelCoord = ivec2(floor(gl_FragCoord.xy));
//
if (traverser.globalPixelCoord())
{
TIntermTyped *exp;
exp = ReferenceBuiltInVariable(ImmutableString("gl_FragCoord"), symbolTable, shaderVersion);
exp = CreateSwizzle(exp, 0, 1);
exp = CreateBuiltInFunctionCallNode("floor", {exp}, symbolTable, shaderVersion);
exp = TIntermAggregate::CreateConstructor(TType(EbtInt, 2), {exp});
exp = CreateTempAssignmentNode(traverser.globalPixelCoord(), exp);
mainBody->insertStatement(0, exp);
}
return compiler->validateAST(root);
}
} // namespace sh