| /////////////////////////////////////////////////////////////////////////////// |
| // // |
| // DxilDebugInstrumentation.cpp // |
| // Copyright (C) Microsoft Corporation. All rights reserved. // |
| // This file is distributed under the University of Illinois Open Source // |
| // License. See LICENSE.TXT for details. // |
| // // |
| // Adds instrumentation that enables shader debugging in PIX // |
| // // |
| /////////////////////////////////////////////////////////////////////////////// |
| |
| #include <optional> |
| #include <vector> |
| |
| #include "dxc/DXIL/DxilFunctionProps.h" |
| #include "dxc/DXIL/DxilModule.h" |
| #include "dxc/DXIL/DxilOperations.h" |
| #include "dxc/DXIL/DxilUtil.h" |
| #include "dxc/DxilPIXPasses/DxilPIXPasses.h" |
| #include "dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h" |
| #include "dxc/HLSL/DxilGenerationPass.h" |
| |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/InstIterator.h" |
| #include "llvm/IR/Module.h" |
| |
| #include "PixPassHelpers.h" |
| |
| using namespace llvm; |
| using namespace hlsl; |
| |
| // Overview of instrumentation: |
| // |
| // In summary, instructions are added that cause a "trace" of the execution of |
| // the shader to be written out to a UAV. This trace is then used by a debugger |
| // application to provide a postmortem debugging experience that reconstructs |
| // the execution history of the shader. The caller specifies the power-of-two |
| // size of the UAV. |
| // |
| // The instrumentation is added per basic block, and each block will then write |
| // a contiguous sequence of values into the UAV. |
| // |
| // The trace is only required for particular shader instances of interest, and |
| // a branchless mechanism is used to write the trace either to an incrementing |
| // location within the UAV, or to a "dumping ground" area in the top half of the |
| // UAV if the instance is not of interest. |
| // |
| // In addition, each half of the UAV is further subdivided: the first quarter is |
| // the area in which blocks are permitted to start writing their sequence, and |
| // that sequence is constrained to be no longer than the size of the second |
| // quarter. This allows us to limit writes to the appropriate half of the UAV |
| // via a single AND at the beginning of the basic block. An additional OR |
| // provides the offset, either 0 for threads-of-interest, or UAVSize/2 for |
| // not-of-interest. |
| // |
| // Threads determine where to start writing their data by incrementing a DWORD |
| // that lives at the very top of that thread's half of the UAV. This is done |
| // because several threads may satisfy the selection criteria (e.g. a pixel |
| // shader may be invoked several times for a given pixel coordinate if the model |
| // has overlapping triangles). |
| // |
| // A picture of the UAV layout: |
| // <--------------power-of-two-size-of-UAV----------------> |
| // [1 ][2 ][3 ][4 ] |
| // <------A-----> ^ ^ |
| // B C |
| // <------D------> |
| // |
| // A: the size of the AND for interesting writes. Their payloads extend |
| // beyond this into area 2, but those payloads are limited to be small |
| // enough (1/4 UAV size -1) that they don't overwrite B. |
| // B: The interesting thread's counter. |
| // C: The uninteresting thread's counter. |
| // D: Size of the AND for uninteresting threads (same value as A) |
| // |
| // The following modifications are made by this pass: |
| // |
| // First, instructions are added to the top of the entry point function that |
| // implement the following: |
| // - Examine the input variables that define the instance of the shader that is |
| // running. This will be SV_Position for pixel shaders, SV_Vertex+SV_Instance |
| // for vertex shaders, thread id for compute shaders etc. If these system |
| // values need to be added to the shader, then they are also added to the |
| // input signature, if appropriate. |
| // - Compare the above variables with the instance of interest defined by the |
| // invoker of this pass. If equal, create an OR value of zero that will |
| // not affect the block's starting write offset. If not equal, the OR will |
| // move the writes into the second half of the UAV. |
| // - Calculate an "instance identifier". Even with the above instance |
| // identification, several invocations may end up matching the selection |
| // criteria. More on this below. |
| // |
| // As mentioned, a counter/offset is maintained at the top of the thread's |
| // half of the UAV. The very first value of this counter that |
| // is encountered by each invocation is used as the "instance identifier" |
| // mentioned above. That instance identifier is written out with each packet, |
| // since many threads executing in parallel will emit interleaved packets, |
| // and the debugger application uses the identifiers to gather packets from each |
| // separate invocation together. |
| // |
| // In addition to the above, this pass creates a text precis of the structure |
| // being written out for each basic block. This precis is passed back to the |
| // caller, and can be used to parse the UAV output later. The precis will |
| // contain notes about void-type instructions, which won't write anything to the |
| // UAV, allowing the caller to reconstruct those instructions. |
| // Some care has to be taken about whether to emit UAV writes after the |
| // corresponding instruction or before. Terminators must emit their UAV data |
| // before the terminator itself, of course. Phi instructions get special |
| // treatment also: their instrumentation has to come after (since phis must be |
| // the first instructions in the block), but also the instrumentation must |
| // execute in the same order as the precis specifies, or the caller will mix |
| // up the phi values. We achieve this by saying that phi instrumentation must |
| // come before the first non-phi instruction in the block. |
| // Some blocks will have all-void instructions, so that no debugging |
| // data is emitted at all. These blocks still produce a precis, and still |
| // need to be noticed during execution, so an empty block header is emitted |
| // into the UAV. |
| // |
| // Error conditions: |
| // Overflow of the debug output from the interesting threads will start to |
| // overwrite their own area of the UAV (after the AND limits those writes |
| // to the lower half of the UAV (thus, by the way, avoiding overwriting |
| // their counter value)). The caller must check the counter value after |
| // the debugging run is complete to see if this happened, and if so, increase |
| // the UAV size and try again. |
| // Uninteresting threads use an AND value that limits their writes to the |
| // upper half of the UAV and can be entirely ignored by the caller. |
| // Since a sufficiently-large block is guaranteed to overflow the UAV, |
| // the precis-creation can exit early and report this "static" overflow |
| // condition to the caller. |
| // In all overflow cases, the caller is expected to try to instrument again, |
| // with a larger UAV. |
| |
| // These definitions echo those in the debugger application's |
| // debugshaderrecord.h file |
| enum DebugShaderModifierRecordType { |
| DebugShaderModifierRecordTypeInvocationStartMarker, |
| DebugShaderModifierRecordTypeStep, |
| DebugShaderModifierRecordTypeEvent, |
| DebugShaderModifierRecordTypeInputRegister, |
| DebugShaderModifierRecordTypeReadRegister, |
| DebugShaderModifierRecordTypeWrittenRegister, |
| DebugShaderModifierRecordTypeRegisterRelativeIndex0, |
| DebugShaderModifierRecordTypeRegisterRelativeIndex1, |
| DebugShaderModifierRecordTypeRegisterRelativeIndex2, |
| // Note that everything above this line is no longer used, but is kept |
| // here in order to keep this file more in-sync with the debugger source. |
| // (As of this writing, the debugger still supports older versions of this |
| // pass which produced finer-grained debug packets.) |
| DebugShaderModifierRecordTypeDXILStepBlock = 249, |
| DebugShaderModifierRecordTypeDXILStepRet = 250, |
| DebugShaderModifierRecordTypeDXILStepVoid = 251, |
| DebugShaderModifierRecordTypeDXILStepFloat = 252, |
| DebugShaderModifierRecordTypeDXILStepUint32 = 253, |
| DebugShaderModifierRecordTypeDXILStepUint64 = 254, |
| DebugShaderModifierRecordTypeDXILStepDouble = 255, |
| }; |
| |
| // These structs echo those in the debugger application's debugshaderrecord.h |
| // file, but are recapitulated here because the originals use unnamed unions |
| // which are disallowed by DXCompiler's build. |
| // |
| #pragma pack(push, 4) |
| struct DebugShaderModifierRecordHeader { |
| union { |
| struct { |
| uint32_t SizeDwords : 4; |
| uint32_t Flags : 4; |
| uint32_t Type : 8; |
| uint32_t HeaderPayload : 16; |
| } Details; |
| uint32_t u32Header; |
| } Header; |
| uint32_t UID; |
| }; |
| |
| struct DebugShaderModifierRecordDXILStepBase { |
| union { |
| struct { |
| uint32_t SizeDwords : 4; |
| uint32_t Flags : 4; |
| uint32_t Type : 8; |
| uint32_t Opcode : 16; |
| } Details; |
| uint32_t u32Header; |
| } Header; |
| uint32_t UID; |
| uint32_t InstructionOffset; |
| }; |
| |
| struct DebugShaderModifierRecordDXILBlock { |
| union { |
| struct { |
| uint32_t NotUsed0 : 4; |
| uint32_t NotUsed1 : 4; |
| uint32_t Type : 8; |
| uint32_t CountOfInstructions : 16; |
| } Details; |
| uint32_t u32Header; |
| } Header; |
| uint32_t UID; |
| uint32_t FirstInstructionOrdinal; |
| }; |
| |
| template <typename ReturnType> |
| struct DebugShaderModifierRecordDXILStep |
| : public DebugShaderModifierRecordDXILStepBase { |
| ReturnType ReturnValue; |
| union { |
| struct { |
| uint32_t ValueOrdinalBase : 16; |
| uint32_t ValueOrdinalIndex : 16; |
| } Details; |
| uint32_t u32ValueOrdinal; |
| } ValueOrdinal; |
| }; |
| |
| template <> |
| struct DebugShaderModifierRecordDXILStep<void> |
| : public DebugShaderModifierRecordDXILStepBase {}; |
| #pragma pack(pop) |
| |
| uint32_t |
| DebugShaderModifierRecordPayloadSizeDwords(size_t recordTotalSizeBytes) { |
| return ((recordTotalSizeBytes - sizeof(DebugShaderModifierRecordHeader)) / |
| sizeof(uint32_t)); |
| } |
| |
| struct InstructionAndType { |
| Instruction *Inst; |
| std::uint32_t InstructionOrdinal; |
| DebugShaderModifierRecordType Type; |
| std::uint32_t RegisterNumber; |
| std::uint32_t AllocaBase; |
| Value *AllocaWriteIndex = nullptr; |
| std::optional<uint64_t> ConstantAllocaStoreValue; |
| }; |
| |
| class DxilDebugInstrumentation : public ModulePass { |
| |
| private: |
| union ParametersAllTogether { |
| unsigned Parameters[3]; |
| struct PixelShaderParameters { |
| unsigned X; |
| unsigned Y; |
| } PixelShader; |
| struct VertexShaderParameters { |
| unsigned VertexId; |
| unsigned InstanceId; |
| } VertexShader; |
| struct ComputeShaderParameters { |
| unsigned ThreadIdX; |
| unsigned ThreadIdY; |
| unsigned ThreadIdZ; |
| } ComputeShader; |
| struct GeometryShaderParameters { |
| unsigned PrimitiveId; |
| unsigned InstanceId; |
| } GeometryShader; |
| struct HullShaderParameters { |
| unsigned PrimitiveId; |
| unsigned ControlPointId; |
| } HullShader; |
| struct DomainShaderParameters { |
| unsigned PrimitiveId; |
| } DomainShader; |
| } m_Parameters = {{0, 0, 0}}; |
| |
| union SystemValueIndices { |
| struct PixelShaderParameters { |
| unsigned Position; |
| } PixelShader; |
| struct VertexShaderParameters { |
| unsigned VertexId; |
| unsigned InstanceId; |
| } VertexShader; |
| }; |
| unsigned m_FirstInstruction = 0; |
| unsigned m_LastInstruction = static_cast<unsigned>(-1); |
| |
| uint64_t m_UAVSize = 1024 * 1024; |
| struct PerFunctionValues { |
| CallInst *UAVHandle = nullptr; |
| Instruction *CounterOffset = nullptr; |
| Value *InvocationId = nullptr; |
| // Together these two values allow branchless writing to the UAV. An |
| // invocation of the shader is either of interest or not (e.g. it writes to |
| // the pixel the user selected for debugging or it doesn't). If not of |
| // interest, debugging output will still occur, but it will be relegated to |
| // the top half of the UAV. Invocations of interest, by contrast, |
| // will be written to the UAV at sequentially increasing offsets. |
| Value *OffsetMask = nullptr; |
| Instruction *OffsetOr = nullptr; |
| Value *SelectionCriterion = nullptr; |
| Value *CurrentIndex = nullptr; |
| std::vector<BasicBlock *> AddedBlocksToIgnoreForInstrumentation; |
| }; |
| std::map<llvm::Function *, PerFunctionValues> m_FunctionToValues; |
| |
| struct BuilderContext { |
| Module &M; |
| DxilModule &DM; |
| LLVMContext &Ctx; |
| OP *HlslOP; |
| IRBuilder<> &Builder; |
| }; |
| |
| uint32_t m_RemainingReservedSpaceInBytes = 0; |
| |
| public: |
| static char ID; // Pass identification, replacement for typeid |
| explicit DxilDebugInstrumentation() : ModulePass(ID) {} |
| StringRef getPassName() const override { |
| return "Add PIX debug instrumentation"; |
| } |
| void applyOptions(PassOptions O) override; |
| bool runOnModule(Module &M) override; |
| |
| bool RunOnFunction(Module &M, DxilModule &DM, llvm::Function *function); |
| |
| private: |
| SystemValueIndices addRequiredSystemValues(BuilderContext &BC, |
| DXIL::ShaderKind shaderKind); |
| void addInvocationSelectionProlog(BuilderContext &BC, |
| SystemValueIndices SVIndices, |
| DXIL::ShaderKind shaderKind); |
| Value *addPixelShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices); |
| Value *addGeometryShaderProlog(BuilderContext &BC); |
| Value *addDispatchedShaderProlog(BuilderContext &BC); |
| Value *addRaygenShaderProlog(BuilderContext &BC); |
| Value *addVertexShaderProlog(BuilderContext &BC, |
| SystemValueIndices SVIndices); |
| Value *addHullhaderProlog(BuilderContext &BC); |
| Value *addComparePrimitiveIdProlog(BuilderContext &BC, unsigned SVIndices); |
| uint32_t addDebugEntryValue(BuilderContext &BC, Value *TheValue); |
| void addInvocationStartMarker(BuilderContext &BC); |
| void determineLimitANDAndInitializeCounter(BuilderContext &BC); |
| void reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInDwords); |
| std::optional<InstructionAndType> addStoreStepDebugEntry(BuilderContext *BC, |
| StoreInst *Inst); |
| std::optional<InstructionAndType> |
| addStepDebugEntry(BuilderContext *BC, Instruction *Inst, |
| llvm::SmallPtrSetImpl<Value *> const &RayQueryHandles); |
| std::optional<DebugShaderModifierRecordType> |
| addStepDebugEntryValue(BuilderContext *BC, std::uint32_t InstNum, Value *V, |
| std::uint32_t ValueOrdinal, Value *ValueOrdinalIndex); |
| uint32_t UAVDumpingGroundOffset(); |
| template <typename ReturnType> |
| void addStepEntryForType(DebugShaderModifierRecordType RecordType, |
| BuilderContext &BC, std::uint32_t InstNum, Value *V, |
| std::uint32_t ValueOrdinal, |
| Value *ValueOrdinalIndex); |
| struct InstructionToInstrument { |
| Value *ValueToWriteToDebugMemory; |
| DebugShaderModifierRecordType ValueType; |
| Instruction *InstructionAfterWhichToAddInstrumentation; |
| Instruction *InstructionBeforeWhichToAddInstrumentation; |
| }; |
| struct BlockInstrumentationData { |
| uint32_t FirstInstructionOrdinalInBlock; |
| std::vector<InstructionToInstrument> Instructions; |
| }; |
| BlockInstrumentationData FindInstrumentableInstructionsInBlock( |
| BasicBlock &BB, OP *HlslOP, |
| llvm::SmallPtrSetImpl<Value *> const &RayQueryHandles); |
| uint32_t |
| CountBlockPayloadBytes(std::vector<InstructionToInstrument> const &IsAndTs); |
| }; |
| |
| void DxilDebugInstrumentation::applyOptions(PassOptions O) { |
| GetPassOptionUnsigned(O, "FirstInstruction", &m_FirstInstruction, 0); |
| GetPassOptionUnsigned(O, "LastInstruction", &m_LastInstruction, |
| static_cast<unsigned>(-1)); |
| GetPassOptionUnsigned(O, "parameter0", &m_Parameters.Parameters[0], 0); |
| GetPassOptionUnsigned(O, "parameter1", &m_Parameters.Parameters[1], 0); |
| GetPassOptionUnsigned(O, "parameter2", &m_Parameters.Parameters[2], 0); |
| GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024); |
| } |
| |
| uint32_t DxilDebugInstrumentation::UAVDumpingGroundOffset() { |
| return static_cast<uint32_t>(m_UAVSize / 2); |
| } |
| |
| static unsigned FindOrAddInputSignatureElement( |
| hlsl::DxilSignature &InputSignature, const char *name, |
| DXIL::SigPointKind sigPointKind, hlsl::DXIL::SemanticKind semanticKind) { |
| |
| auto &InputElements = InputSignature.GetElements(); |
| |
| auto ExistingElement = |
| std::find_if(InputElements.begin(), InputElements.end(), |
| [&](const std::unique_ptr<DxilSignatureElement> &Element) { |
| return Element->GetSemantic()->GetKind() == semanticKind; |
| }); |
| |
| if (ExistingElement == InputElements.end()) { |
| auto AddedElement = llvm::make_unique<DxilSignatureElement>(sigPointKind); |
| AddedElement->Initialize(name, hlsl::CompType::getF32(), |
| hlsl::DXIL::InterpolationMode::Undefined, 1, 1); |
| AddedElement->AppendSemanticIndex(0); |
| AddedElement->SetSigPointKind(sigPointKind); |
| AddedElement->SetKind(semanticKind); |
| |
| auto index = InputSignature.AppendElement(std::move(AddedElement)); |
| return InputElements[index]->GetID(); |
| } else { |
| return ExistingElement->get()->GetID(); |
| } |
| } |
| |
| DxilDebugInstrumentation::SystemValueIndices |
| DxilDebugInstrumentation::addRequiredSystemValues(BuilderContext &BC, |
| DXIL::ShaderKind shaderKind) { |
| SystemValueIndices SVIndices{}; |
| |
| switch (shaderKind) { |
| case DXIL::ShaderKind::Amplification: |
| case DXIL::ShaderKind::Mesh: |
| case DXIL::ShaderKind::Compute: |
| case DXIL::ShaderKind::RayGeneration: |
| case DXIL::ShaderKind::Intersection: |
| case DXIL::ShaderKind::AnyHit: |
| case DXIL::ShaderKind::ClosestHit: |
| case DXIL::ShaderKind::Miss: |
| // Dispatch* thread Id is not in the input signature |
| break; |
| case DXIL::ShaderKind::Vertex: { |
| hlsl::DxilSignature &InputSignature = BC.DM.GetInputSignature(); |
| SVIndices.VertexShader.VertexId = FindOrAddInputSignatureElement( |
| InputSignature, "VertexId", DXIL::SigPointKind::VSIn, |
| hlsl::DXIL::SemanticKind::VertexID); |
| SVIndices.VertexShader.InstanceId = FindOrAddInputSignatureElement( |
| InputSignature, "InstanceId", DXIL::SigPointKind::VSIn, |
| hlsl::DXIL::SemanticKind::InstanceID); |
| } break; |
| case DXIL::ShaderKind::Geometry: |
| case DXIL::ShaderKind::Hull: |
| case DXIL::ShaderKind::Domain: |
| // GS, HS, DS Primitive id, HS control point id, and GS Instance id are not |
| // in the input signature |
| break; |
| case DXIL::ShaderKind::Pixel: { |
| hlsl::DxilSignature &InputSignature = BC.DM.GetInputSignature(); |
| auto &InputElements = InputSignature.GetElements(); |
| |
| auto Existing_SV_Position = |
| std::find_if(InputElements.begin(), InputElements.end(), |
| [](const std::unique_ptr<DxilSignatureElement> &Element) { |
| return Element->GetSemantic()->GetKind() == |
| hlsl::DXIL::SemanticKind::Position; |
| }); |
| |
| // SV_Position, if present, has to have full mask, so we needn't worry |
| // about the shader having selected components that don't include x or y. |
| // If not present, we add it. |
| if (Existing_SV_Position == InputElements.end()) { |
| auto Added_SV_Position = |
| llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn); |
| Added_SV_Position->Initialize("Position", hlsl::CompType::getF32(), |
| hlsl::DXIL::InterpolationMode::Linear, 1, |
| 4); |
| Added_SV_Position->AppendSemanticIndex(0); |
| Added_SV_Position->SetSigPointKind(DXIL::SigPointKind::PSIn); |
| Added_SV_Position->SetKind(hlsl::DXIL::SemanticKind::Position); |
| |
| auto index = InputSignature.AppendElement(std::move(Added_SV_Position)); |
| SVIndices.PixelShader.Position = InputElements[index]->GetID(); |
| } else { |
| SVIndices.PixelShader.Position = Existing_SV_Position->get()->GetID(); |
| } |
| } break; |
| default: |
| assert(false); // guaranteed by runOnModule |
| } |
| |
| return SVIndices; |
| } |
| |
| Value *DxilDebugInstrumentation::addDispatchedShaderProlog(BuilderContext &BC) { |
| Constant *Zero32Arg = BC.HlslOP->GetU32Const(0); |
| Constant *One32Arg = BC.HlslOP->GetU32Const(1); |
| Constant *Two32Arg = BC.HlslOP->GetU32Const(2); |
| |
| auto ThreadIdFunc = |
| BC.HlslOP->GetOpFunc(DXIL::OpCode::ThreadId, Type::getInt32Ty(BC.Ctx)); |
| Constant *Opcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::ThreadId); |
| auto ThreadIdX = |
| BC.Builder.CreateCall(ThreadIdFunc, {Opcode, Zero32Arg}, "ThreadIdX"); |
| auto ThreadIdY = |
| BC.Builder.CreateCall(ThreadIdFunc, {Opcode, One32Arg}, "ThreadIdY"); |
| auto ThreadIdZ = |
| BC.Builder.CreateCall(ThreadIdFunc, {Opcode, Two32Arg}, "ThreadIdZ"); |
| |
| // Compare to expected thread ID |
| auto CompareToX = BC.Builder.CreateICmpEQ( |
| ThreadIdX, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdX), |
| "CompareToThreadIdX"); |
| auto CompareToY = BC.Builder.CreateICmpEQ( |
| ThreadIdY, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdY), |
| "CompareToThreadIdY"); |
| auto CompareToZ = BC.Builder.CreateICmpEQ( |
| ThreadIdZ, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdZ), |
| "CompareToThreadIdZ"); |
| |
| auto CompareXAndY = |
| BC.Builder.CreateAnd(CompareToX, CompareToY, "CompareXAndY"); |
| |
| auto CompareAll = |
| BC.Builder.CreateAnd(CompareXAndY, CompareToZ, "CompareAll"); |
| |
| return CompareAll; |
| } |
| |
| Value *DxilDebugInstrumentation::addRaygenShaderProlog(BuilderContext &BC) { |
| auto DispatchRaysIndexOpFunc = BC.HlslOP->GetOpFunc( |
| DXIL::OpCode::DispatchRaysIndex, Type::getInt32Ty(BC.Ctx)); |
| Constant *DispatchRaysIndexOpcode = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::DispatchRaysIndex); |
| auto RayX = BC.Builder.CreateCall( |
| DispatchRaysIndexOpFunc, |
| {DispatchRaysIndexOpcode, BC.HlslOP->GetI8Const(0)}, "RayX"); |
| auto RayY = BC.Builder.CreateCall( |
| DispatchRaysIndexOpFunc, |
| {DispatchRaysIndexOpcode, BC.HlslOP->GetI8Const(1)}, "RayY"); |
| auto RayZ = BC.Builder.CreateCall( |
| DispatchRaysIndexOpFunc, |
| {DispatchRaysIndexOpcode, BC.HlslOP->GetI8Const(2)}, "RayZ"); |
| |
| auto CompareToX = BC.Builder.CreateICmpEQ( |
| RayX, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdX), |
| "CompareToThreadIdX"); |
| |
| auto CompareToY = BC.Builder.CreateICmpEQ( |
| RayY, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdY), |
| "CompareToThreadIdY"); |
| |
| auto CompareToZ = BC.Builder.CreateICmpEQ( |
| RayZ, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdZ), |
| "CompareToThreadIdZ"); |
| |
| auto CompareXAndY = |
| BC.Builder.CreateAnd(CompareToX, CompareToY, "CompareXAndY"); |
| |
| auto CompareAll = |
| BC.Builder.CreateAnd(CompareXAndY, CompareToZ, "CompareAll"); |
| return CompareAll; |
| } |
| |
| Value * |
| DxilDebugInstrumentation::addVertexShaderProlog(BuilderContext &BC, |
| SystemValueIndices SVIndices) { |
| Constant *Zero32Arg = BC.HlslOP->GetU32Const(0); |
| Constant *Zero8Arg = BC.HlslOP->GetI8Const(0); |
| UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx)); |
| |
| auto LoadInputOpFunc = |
| BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getInt32Ty(BC.Ctx)); |
| Constant *LoadInputOpcode = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput); |
| Constant *SV_Vert_ID = |
| BC.HlslOP->GetU32Const(SVIndices.VertexShader.VertexId); |
| auto VertId = |
| BC.Builder.CreateCall(LoadInputOpFunc, |
| {LoadInputOpcode, SV_Vert_ID, Zero32Arg /*row*/, |
| Zero8Arg /*column*/, UndefArg}, |
| "VertId"); |
| |
| Constant *SV_Instance_ID = |
| BC.HlslOP->GetU32Const(SVIndices.VertexShader.InstanceId); |
| auto InstanceId = |
| BC.Builder.CreateCall(LoadInputOpFunc, |
| {LoadInputOpcode, SV_Instance_ID, Zero32Arg /*row*/, |
| Zero8Arg /*column*/, UndefArg}, |
| "InstanceId"); |
| |
| // Compare to expected vertex ID and instance ID |
| auto CompareToVert = BC.Builder.CreateICmpEQ( |
| VertId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.VertexId), |
| "CompareToVertId"); |
| auto CompareToInstance = BC.Builder.CreateICmpEQ( |
| InstanceId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.InstanceId), |
| "CompareToInstanceId"); |
| auto CompareBoth = |
| BC.Builder.CreateAnd(CompareToVert, CompareToInstance, "CompareBoth"); |
| |
| return CompareBoth; |
| } |
| |
| Value *DxilDebugInstrumentation::addHullhaderProlog(BuilderContext &BC) { |
| auto LoadControlPointFunction = BC.HlslOP->GetOpFunc( |
| DXIL::OpCode::OutputControlPointID, Type::getInt32Ty(BC.Ctx)); |
| Constant *LoadControlPointOpcode = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::OutputControlPointID); |
| auto ControlPointId = BC.Builder.CreateCall( |
| LoadControlPointFunction, {LoadControlPointOpcode}, "ControlPointId"); |
| |
| auto *CompareToPrimId = |
| addComparePrimitiveIdProlog(BC, m_Parameters.HullShader.PrimitiveId); |
| |
| auto CompareToControlPoint = BC.Builder.CreateICmpEQ( |
| ControlPointId, |
| BC.HlslOP->GetU32Const(m_Parameters.HullShader.ControlPointId), |
| "CompareToControlPointId"); |
| |
| auto CompareBoth = BC.Builder.CreateAnd(CompareToControlPoint, |
| CompareToPrimId, "CompareBoth"); |
| |
| return CompareBoth; |
| } |
| |
| Value *DxilDebugInstrumentation::addComparePrimitiveIdProlog(BuilderContext &BC, |
| unsigned primId) { |
| auto PrimitiveIdFunction = |
| BC.HlslOP->GetOpFunc(DXIL::OpCode::PrimitiveID, Type::getInt32Ty(BC.Ctx)); |
| Constant *PrimitiveIdOpcode = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::PrimitiveID); |
| auto PrimId = |
| BC.Builder.CreateCall(PrimitiveIdFunction, {PrimitiveIdOpcode}, "PrimId"); |
| |
| return BC.Builder.CreateICmpEQ(PrimId, BC.HlslOP->GetU32Const(primId), |
| "CompareToPrimId"); |
| } |
| |
| Value *DxilDebugInstrumentation::addGeometryShaderProlog(BuilderContext &BC) { |
| auto CompareToPrim = |
| addComparePrimitiveIdProlog(BC, m_Parameters.GeometryShader.PrimitiveId); |
| |
| if (BC.DM.GetGSInstanceCount() <= 1) { |
| return CompareToPrim; |
| } |
| |
| auto GSInstanceIdOpFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::GSInstanceID, |
| Type::getInt32Ty(BC.Ctx)); |
| Constant *GSInstanceIdOpcode = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::GSInstanceID); |
| auto GSInstanceId = BC.Builder.CreateCall( |
| GSInstanceIdOpFunc, {GSInstanceIdOpcode}, "GSInstanceId"); |
| |
| // Compare to expected vertex ID and instance ID |
| auto CompareToInstance = BC.Builder.CreateICmpEQ( |
| GSInstanceId, |
| BC.HlslOP->GetU32Const(m_Parameters.GeometryShader.InstanceId), |
| "CompareToInstanceId"); |
| auto CompareBoth = |
| BC.Builder.CreateAnd(CompareToPrim, CompareToInstance, "CompareBoth"); |
| |
| return CompareBoth; |
| } |
| |
| Value * |
| DxilDebugInstrumentation::addPixelShaderProlog(BuilderContext &BC, |
| SystemValueIndices SVIndices) { |
| Constant *Zero32Arg = BC.HlslOP->GetU32Const(0); |
| Constant *Zero8Arg = BC.HlslOP->GetI8Const(0); |
| Constant *One8Arg = BC.HlslOP->GetI8Const(1); |
| UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx)); |
| |
| // Convert SV_POSITION to UINT |
| Value *XAsInt; |
| Value *YAsInt; |
| { |
| auto LoadInputOpFunc = |
| BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getFloatTy(BC.Ctx)); |
| Constant *LoadInputOpcode = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput); |
| Constant *SV_Pos_ID = |
| BC.HlslOP->GetU32Const(SVIndices.PixelShader.Position); |
| auto XPos = |
| BC.Builder.CreateCall(LoadInputOpFunc, |
| {LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, |
| Zero8Arg /*column*/, UndefArg}, |
| "XPos"); |
| auto YPos = |
| BC.Builder.CreateCall(LoadInputOpFunc, |
| {LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, |
| One8Arg /*column*/, UndefArg}, |
| "YPos"); |
| |
| XAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, XPos, |
| Type::getInt32Ty(BC.Ctx), "XIndex"); |
| YAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, YPos, |
| Type::getInt32Ty(BC.Ctx), "YIndex"); |
| } |
| |
| // Compare to expected pixel position and primitive ID |
| auto CompareToX = BC.Builder.CreateICmpEQ( |
| XAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.X), "CompareToX"); |
| auto CompareToY = BC.Builder.CreateICmpEQ( |
| YAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.Y), "CompareToY"); |
| auto ComparePos = BC.Builder.CreateAnd(CompareToX, CompareToY, "ComparePos"); |
| |
| return ComparePos; |
| } |
| |
| void DxilDebugInstrumentation::addInvocationSelectionProlog( |
| BuilderContext &BC, SystemValueIndices SVIndices, |
| DXIL::ShaderKind shaderKind) { |
| Value *ParameterTestResult = nullptr; |
| switch (shaderKind) { |
| case DXIL::ShaderKind::RayGeneration: |
| case DXIL::ShaderKind::ClosestHit: |
| case DXIL::ShaderKind::Intersection: |
| case DXIL::ShaderKind::AnyHit: |
| case DXIL::ShaderKind::Miss: |
| ParameterTestResult = addRaygenShaderProlog(BC); |
| break; |
| case DXIL::ShaderKind::Compute: |
| case DXIL::ShaderKind::Amplification: |
| case DXIL::ShaderKind::Mesh: |
| ParameterTestResult = addDispatchedShaderProlog(BC); |
| break; |
| case DXIL::ShaderKind::Geometry: |
| ParameterTestResult = addGeometryShaderProlog(BC); |
| break; |
| case DXIL::ShaderKind::Vertex: |
| ParameterTestResult = addVertexShaderProlog(BC, SVIndices); |
| break; |
| case DXIL::ShaderKind::Hull: |
| ParameterTestResult = addHullhaderProlog(BC); |
| break; |
| case DXIL::ShaderKind::Domain: |
| ParameterTestResult = |
| addComparePrimitiveIdProlog(BC, m_Parameters.DomainShader.PrimitiveId); |
| break; |
| case DXIL::ShaderKind::Pixel: |
| ParameterTestResult = addPixelShaderProlog(BC, SVIndices); |
| break; |
| default: |
| assert(false); // guaranteed by runOnModule |
| } |
| |
| auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()]; |
| values.SelectionCriterion = ParameterTestResult; |
| } |
| |
| void DxilDebugInstrumentation::determineLimitANDAndInitializeCounter( |
| BuilderContext &BC) { |
| |
| auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()]; |
| |
| // Split the block at the current insertion point. Insert a conditional |
| // branch that will invoke one of two new blocks depending on if this |
| // is a thread-of-interest. The two different classes of thread will |
| // then be given different limiting AND values within these new |
| // blocks. |
| |
| BasicBlock *RestOfMainBlock = BC.Builder.GetInsertBlock()->splitBasicBlock( |
| *BC.Builder.GetInsertPoint()); |
| |
| // Up to this split point is a new block that we don't need to instrument: |
| values.AddedBlocksToIgnoreForInstrumentation.push_back( |
| BC.Builder.GetInsertBlock()); |
| |
| auto *InterestingInvocationBlock = BasicBlock::Create( |
| BC.Ctx, "PIXInterestingBlock", BC.Builder.GetInsertBlock()->getParent(), |
| RestOfMainBlock); |
| values.AddedBlocksToIgnoreForInstrumentation.push_back( |
| InterestingInvocationBlock); |
| IRBuilder<> BuilderForInteresting(InterestingInvocationBlock); |
| BuilderForInteresting.CreateBr(RestOfMainBlock); |
| |
| auto *NonInterestingInvocationBlock = BasicBlock::Create( |
| BC.Ctx, "PIXNonInterestingBlock", |
| BC.Builder.GetInsertBlock()->getParent(), RestOfMainBlock); |
| values.AddedBlocksToIgnoreForInstrumentation.push_back( |
| NonInterestingInvocationBlock); |
| |
| IRBuilder<> BuilderForNonInteresting(NonInterestingInvocationBlock); |
| BuilderForNonInteresting.CreateBr(RestOfMainBlock); |
| |
| // Connect these new blocks as necessary: |
| BC.Builder.SetInsertPoint(BC.Builder.GetInsertBlock()->getTerminator()); |
| BC.Builder.CreateCondBr(values.SelectionCriterion, InterestingInvocationBlock, |
| NonInterestingInvocationBlock); |
| BC.Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); |
| |
| values.OffsetMask = BC.HlslOP->GetU32Const(m_UAVSize / 4 - 1); |
| |
| // Now add a phi that selects between two constant OR values based on |
| // which branch the thread followed above (interesting or not). |
| // The OR will either place the output in the lower half or the upper |
| // half of the UAV. |
| BC.Builder.SetInsertPoint(RestOfMainBlock->getFirstInsertionPt()); |
| auto *PHIForOr = |
| BC.Builder.CreatePHI(Type::getInt32Ty(BC.Ctx), 2, "PIXOffsetOr"); |
| PHIForOr->addIncoming(BC.HlslOP->GetU32Const(0), InterestingInvocationBlock); |
| PHIForOr->addIncoming(BC.HlslOP->GetU32Const(m_UAVSize / 2), |
| NonInterestingInvocationBlock); |
| values.OffsetOr = PHIForOr; |
| |
| auto *PHIForCounterOffset = |
| BC.Builder.CreatePHI(Type::getInt32Ty(BC.Ctx), 2, "PIXCounterLocation"); |
| const uint32_t InterestingCounterOffset = |
| static_cast<uint32_t>(m_UAVSize / 2 - 1); |
| PHIForCounterOffset->addIncoming( |
| BC.HlslOP->GetU32Const(InterestingCounterOffset), |
| InterestingInvocationBlock); |
| const uint32_t UninterestingCounterOffsetValue = |
| static_cast<uint32_t>(m_UAVSize - 1); |
| PHIForCounterOffset->addIncoming( |
| BC.HlslOP->GetU32Const(UninterestingCounterOffsetValue), |
| NonInterestingInvocationBlock); |
| values.CounterOffset = PHIForCounterOffset; |
| |
| // These are reported to the caller so there are fewer assumptions made by the |
| // caller about these internal details: |
| *OSOverride << "InterestingCounterOffset:" |
| << std::to_string(InterestingCounterOffset) << "\n"; |
| *OSOverride << "OverflowThreshold:" << std::to_string(m_UAVSize / 4 - 1) |
| << "\n"; |
| } |
| |
| void DxilDebugInstrumentation::reserveDebugEntrySpace(BuilderContext &BC, |
| uint32_t SpaceInBytes) { |
| auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()]; |
| assert(values.CurrentIndex == nullptr); |
| assert(m_RemainingReservedSpaceInBytes == 0); |
| |
| m_RemainingReservedSpaceInBytes = SpaceInBytes; |
| |
| // Insert the UAV increment instruction: |
| Function *AtomicOpFunc = |
| BC.HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(BC.Ctx)); |
| Constant *AtomicBinOpcode = |
| BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp); |
| Constant *AtomicAdd = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add); |
| UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx)); |
| |
| Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes); |
| auto PreviousValue = BC.Builder.CreateCall( |
| AtomicOpFunc, |
| { |
| AtomicBinOpcode, // i32, ; opcode |
| values.UAVHandle, // %dx.types.Handle, ; resource handle |
| AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, |
| // XOR, IMIN, IMAX, UMIN, UMAX |
| values.CounterOffset, // i32, ; coordinate c0: index in bytes |
| UndefArg, // i32, ; coordinate c1 (unused) |
| UndefArg, // i32, ; coordinate c2 (unused) |
| Increment, // i32); increment value |
| }, |
| "UAVIncResult"); |
| |
| if (values.InvocationId == nullptr) { |
| values.InvocationId = PreviousValue; |
| } |
| |
| auto *Masked = BC.Builder.CreateAnd(PreviousValue, values.OffsetMask, |
| "MaskedForUAVLimit"); |
| values.CurrentIndex = |
| BC.Builder.CreateOr(Masked, values.OffsetOr, "ORedForUAVStart"); |
| } |
| |
| uint32_t DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC, |
| Value *TheValue) { |
| assert(m_RemainingReservedSpaceInBytes > 0); |
| |
| uint32_t BytesToBeEmitted = 0; |
| |
| auto TheValueTypeID = TheValue->getType()->getTypeID(); |
| if (TheValueTypeID == Type::TypeID::DoubleTyID) { |
| Function *SplitDouble = |
| BC.HlslOP->GetOpFunc(OP::OpCode::SplitDouble, TheValue->getType()); |
| Constant *SplitDoubleOpcode = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::SplitDouble); |
| auto SplitDoubleIntruction = BC.Builder.CreateCall( |
| SplitDouble, {SplitDoubleOpcode, TheValue}, "SplitDouble"); |
| auto LowBits = |
| BC.Builder.CreateExtractValue(SplitDoubleIntruction, 0, "LowBits"); |
| auto HighBits = |
| BC.Builder.CreateExtractValue(SplitDoubleIntruction, 1, "HighBits"); |
| // addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding |
| addDebugEntryValue(BC, LowBits); |
| addDebugEntryValue(BC, HighBits); |
| BytesToBeEmitted += 8; |
| } else if (TheValueTypeID == Type::TypeID::IntegerTyID && |
| TheValue->getType()->getIntegerBitWidth() == 64) { |
| auto LowBits = |
| BC.Builder.CreateTrunc(TheValue, Type::getInt32Ty(BC.Ctx), "LowBits"); |
| auto ShiftedBits = BC.Builder.CreateLShr(TheValue, 32, "ShiftedBits"); |
| auto HighBits = BC.Builder.CreateTrunc( |
| ShiftedBits, Type::getInt32Ty(BC.Ctx), "HighBits"); |
| // addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding |
| addDebugEntryValue(BC, LowBits); |
| addDebugEntryValue(BC, HighBits); |
| BytesToBeEmitted += 8; |
| } else if (TheValueTypeID == Type::TypeID::IntegerTyID && |
| (TheValue->getType()->getIntegerBitWidth() == 16 || |
| TheValue->getType()->getIntegerBitWidth() == 1)) { |
| auto As32 = |
| BC.Builder.CreateZExt(TheValue, Type::getInt32Ty(BC.Ctx), "As32"); |
| BytesToBeEmitted += addDebugEntryValue(BC, As32); |
| } else if (TheValueTypeID == Type::TypeID::HalfTyID) { |
| auto AsFloat = |
| BC.Builder.CreateFPCast(TheValue, Type::getFloatTy(BC.Ctx), "AsFloat"); |
| BytesToBeEmitted += addDebugEntryValue(BC, AsFloat); |
| } else { |
| Function *StoreValue = |
| BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore, |
| TheValue->getType()); // Type::getInt32Ty(BC.Ctx)); |
| Constant *StoreValueOpcode = |
| BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferStore); |
| UndefValue *Undef32Arg = UndefValue::get(Type::getInt32Ty(BC.Ctx)); |
| UndefValue *UndefArg = nullptr; |
| if (TheValueTypeID == Type::TypeID::IntegerTyID) { |
| UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx)); |
| } else if (TheValueTypeID == Type::TypeID::FloatTyID) { |
| UndefArg = UndefValue::get(Type::getFloatTy(BC.Ctx)); |
| } else { |
| // The above are the only two valid types for a UAV store |
| assert(false); |
| } |
| BytesToBeEmitted += 4; |
| Constant *WriteMask_X = BC.HlslOP->GetI8Const(1); |
| |
| auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()]; |
| |
| (void)BC.Builder.CreateCall( |
| StoreValue, {StoreValueOpcode, // i32 opcode |
| values.UAVHandle, // %dx.types.Handle, ; resource handle |
| values.CurrentIndex, // i32 c0: index in bytes into UAV |
| Undef32Arg, // i32 c1: unused |
| TheValue, |
| UndefArg, // unused values |
| UndefArg, // unused values |
| UndefArg, // unused values |
| WriteMask_X}); |
| |
| assert(m_RemainingReservedSpaceInBytes >= 4); // check for underflow |
| m_RemainingReservedSpaceInBytes -= 4; |
| |
| if (m_RemainingReservedSpaceInBytes != 0) { |
| values.CurrentIndex = |
| BC.Builder.CreateAdd(values.CurrentIndex, BC.HlslOP->GetU32Const(4)); |
| } else { |
| values.CurrentIndex = nullptr; |
| } |
| } |
| |
| return BytesToBeEmitted; |
| } |
| |
| void DxilDebugInstrumentation::addInvocationStartMarker(BuilderContext &BC) { |
| DebugShaderModifierRecordHeader marker{{{0, 0, 0, 0}}, 0}; |
| reserveDebugEntrySpace(BC, sizeof(marker)); |
| |
| marker.Header.Details.SizeDwords = |
| DebugShaderModifierRecordPayloadSizeDwords(sizeof(marker)); |
| marker.Header.Details.Flags = 0; |
| marker.Header.Details.Type = |
| DebugShaderModifierRecordTypeInvocationStartMarker; |
| addDebugEntryValue(BC, BC.HlslOP->GetU32Const(marker.Header.u32Header)); |
| auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()]; |
| addDebugEntryValue(BC, values.InvocationId); |
| } |
| |
| template <typename ReturnType> |
| void DxilDebugInstrumentation::addStepEntryForType( |
| DebugShaderModifierRecordType RecordType, BuilderContext &BC, |
| std::uint32_t InstNum, Value *V, std::uint32_t ValueOrdinal, |
| Value *ValueOrdinalIndex) { |
| DebugShaderModifierRecordDXILStep<ReturnType> step = {}; |
| reserveDebugEntrySpace(BC, sizeof(step)); |
| |
| auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()]; |
| |
| step.Header.Details.SizeDwords = |
| DebugShaderModifierRecordPayloadSizeDwords(sizeof(step)); |
| step.Header.Details.Type = static_cast<uint8_t>(RecordType); |
| addDebugEntryValue(BC, BC.HlslOP->GetU32Const(step.Header.u32Header)); |
| addDebugEntryValue(BC, values.InvocationId); |
| addDebugEntryValue(BC, BC.HlslOP->GetU32Const(InstNum)); |
| if (RecordType != DebugShaderModifierRecordTypeDXILStepVoid && |
| RecordType != DebugShaderModifierRecordTypeDXILStepRet) { |
| addDebugEntryValue(BC, V); |
| IRBuilder<> &B = BC.Builder; |
| |
| Value *VO = BC.HlslOP->GetU32Const(ValueOrdinal << 16); |
| Value *VOI = B.CreateAnd(ValueOrdinalIndex, BC.HlslOP->GetU32Const(0xFFFF), |
| "ValueOrdinalIndex"); |
| Value *EncodedValueOrdinalAndIndex = |
| BC.Builder.CreateOr(VO, VOI, "ValueOrdinal"); |
| addDebugEntryValue(BC, EncodedValueOrdinalAndIndex); |
| } |
| } |
| |
| std::optional<InstructionAndType> |
| DxilDebugInstrumentation::addStoreStepDebugEntry(BuilderContext *BC, |
| StoreInst *Inst) { |
| std::uint32_t ValueOrdinalBase; |
| std::uint32_t UnusedValueOrdinalSize; |
| llvm::Value *ValueOrdinalIndex; |
| if (!pix_dxil::PixAllocaRegWrite::FromInst(Inst, &ValueOrdinalBase, |
| &UnusedValueOrdinalSize, |
| &ValueOrdinalIndex)) { |
| return std::nullopt; |
| } |
| |
| std::uint32_t InstNum; |
| if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) { |
| return std::nullopt; |
| } |
| |
| auto Type = addStepDebugEntryValue(BC, InstNum, Inst->getValueOperand(), |
| ValueOrdinalBase, ValueOrdinalIndex); |
| if (Type) { |
| if (Instruction *ValueAsInst = |
| dyn_cast<Instruction>(Inst->getValueOperand())) { |
| uint32_t RegNum = 0; |
| if (pix_dxil::PixDxilReg::FromInst(ValueAsInst, &RegNum)) { |
| InstructionAndType ret{}; |
| ret.Inst = Inst; |
| ret.InstructionOrdinal = InstNum; |
| ret.Type = *Type; |
| ret.RegisterNumber = RegNum; |
| ret.AllocaBase = ValueOrdinalBase; |
| ret.AllocaWriteIndex = ValueOrdinalIndex; |
| return ret; |
| } |
| } else if (Constant *ValueAsConst = |
| dyn_cast<Constant>(Inst->getValueOperand())) { |
| InstructionAndType ret{}; |
| ret.Inst = Inst; |
| ret.InstructionOrdinal = InstNum; |
| ret.Type = *Type; |
| ret.AllocaBase = ValueOrdinalBase; |
| ret.AllocaWriteIndex = ValueOrdinalIndex; |
| |
| switch (ValueAsConst->getType()->getTypeID()) { |
| case Type::HalfTyID: |
| case Type::FloatTyID: |
| case Type::DoubleTyID: |
| ret.ConstantAllocaStoreValue = dyn_cast<ConstantFP>(ValueAsConst) |
| ->getValueAPF() |
| .bitcastToAPInt() |
| .getLimitedValue(); |
| break; |
| case Type::IntegerTyID: |
| ret.ConstantAllocaStoreValue = |
| dyn_cast<ConstantInt>(ValueAsConst)->getLimitedValue(); |
| break; |
| default: |
| return std::nullopt; |
| } |
| return ret; |
| } |
| } |
| return std::nullopt; |
| } |
| |
| std::optional<InstructionAndType> DxilDebugInstrumentation::addStepDebugEntry( |
| BuilderContext *BC, Instruction *Inst, |
| llvm::SmallPtrSetImpl<Value *> const &RayQueryHandles) { |
| |
| std::uint32_t InstNum; |
| if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) { |
| return std::nullopt; |
| } |
| |
| if (RayQueryHandles.count(Inst) != 0) { |
| InstructionAndType ret{}; |
| ret.Inst = Inst; |
| ret.InstructionOrdinal = InstNum; |
| ret.Type = DebugShaderModifierRecordTypeDXILStepVoid; |
| return ret; |
| } |
| |
| if (auto *St = llvm::dyn_cast<llvm::StoreInst>(Inst)) { |
| return addStoreStepDebugEntry(BC, St); |
| } |
| |
| if (auto *Ld = llvm::dyn_cast<llvm::LoadInst>(Inst)) { |
| if (llvm::isa<ConstantExpr>(Ld->getPointerOperand())) { |
| auto *constant = llvm::cast<ConstantExpr>(Ld->getPointerOperand()); |
| if (constant->getOpcode() == Instruction::GetElementPtr) { |
| PIXPassHelpers::ScopedInstruction asInstr(constant->getAsInstruction()); |
| auto *GEP = llvm::cast<GetElementPtrInst>(asInstr.Get()); |
| if (GEP->getPointerOperand()->getName().equals("dx.nothing.a")) { |
| // These debug-only loads are interesting as instructions to |
| // step though where otherwise no step might exist for the |
| // given HLSL lines, so we include them in the instrumentation: |
| InstructionAndType ret{}; |
| ret.Inst = Inst; |
| ret.InstructionOrdinal = InstNum; |
| ret.Type = DebugShaderModifierRecordTypeDXILStepVoid; |
| return ret; |
| } |
| } |
| } |
| } |
| |
| std::uint32_t RegNum; |
| if (!pix_dxil::PixDxilReg::FromInst(Inst, &RegNum)) { |
| if (Inst->getOpcode() == Instruction::Ret) { |
| if (BC != nullptr) |
| addStepEntryForType<void>(DebugShaderModifierRecordTypeDXILStepRet, *BC, |
| InstNum, nullptr, 0, 0); |
| InstructionAndType ret{}; |
| ret.Inst = Inst; |
| ret.InstructionOrdinal = InstNum; |
| ret.Type = DebugShaderModifierRecordTypeDXILStepRet; |
| return ret; |
| } else if (Inst->isTerminator()) { |
| if (BC != nullptr) |
| addStepEntryForType<void>(DebugShaderModifierRecordTypeDXILStepVoid, |
| *BC, InstNum, nullptr, 0, 0); |
| InstructionAndType ret{}; |
| ret.Inst = Inst; |
| ret.InstructionOrdinal = InstNum; |
| ret.Type = DebugShaderModifierRecordTypeDXILStepVoid; |
| return ret; |
| } |
| return std::nullopt; |
| } |
| auto Type = addStepDebugEntryValue(BC, InstNum, Inst, RegNum, |
| BC ? BC->Builder.getInt32(0) : nullptr); |
| if (Type) { |
| InstructionAndType ret{}; |
| ret.Inst = Inst; |
| ret.InstructionOrdinal = InstNum; |
| ret.Type = *Type; |
| ret.RegisterNumber = RegNum; |
| return ret; |
| } |
| return std::nullopt; |
| } |
| |
| std::optional<DebugShaderModifierRecordType> |
| DxilDebugInstrumentation::addStepDebugEntryValue(BuilderContext *BC, |
| std::uint32_t InstNum, |
| Value *V, |
| std::uint32_t ValueOrdinal, |
| Value *ValueOrdinalIndex) { |
| const Type::TypeID ID = V->getType()->getTypeID(); |
| |
| switch (ID) { |
| case Type::TypeID::StructTyID: |
| case Type::TypeID::VoidTyID: |
| if (BC != nullptr) |
| addStepEntryForType<void>(DebugShaderModifierRecordTypeDXILStepVoid, *BC, |
| InstNum, V, ValueOrdinal, ValueOrdinalIndex); |
| return DebugShaderModifierRecordTypeDXILStepVoid; |
| case Type::TypeID::FloatTyID: |
| if (BC != nullptr) |
| addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, |
| *BC, InstNum, V, ValueOrdinal, |
| ValueOrdinalIndex); |
| return DebugShaderModifierRecordTypeDXILStepFloat; |
| case Type::TypeID::IntegerTyID: |
| if (V->getType()->getIntegerBitWidth() == 64) { |
| if (BC != nullptr) |
| addStepEntryForType<uint64_t>( |
| DebugShaderModifierRecordTypeDXILStepUint64, *BC, InstNum, V, |
| ValueOrdinal, ValueOrdinalIndex); |
| return DebugShaderModifierRecordTypeDXILStepUint64; |
| } else { |
| if (BC != nullptr) |
| addStepEntryForType<uint32_t>( |
| DebugShaderModifierRecordTypeDXILStepUint32, *BC, InstNum, V, |
| ValueOrdinal, ValueOrdinalIndex); |
| return DebugShaderModifierRecordTypeDXILStepUint32; |
| } |
| case Type::TypeID::DoubleTyID: |
| if (BC != nullptr) |
| addStepEntryForType<double>(DebugShaderModifierRecordTypeDXILStepDouble, |
| *BC, InstNum, V, ValueOrdinal, |
| ValueOrdinalIndex); |
| return DebugShaderModifierRecordTypeDXILStepDouble; |
| case Type::TypeID::HalfTyID: |
| if (BC != nullptr) |
| addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat, |
| *BC, InstNum, V, ValueOrdinal, |
| ValueOrdinalIndex); |
| return DebugShaderModifierRecordTypeDXILStepFloat; |
| case Type::TypeID::PointerTyID: |
| // Skip pointer calculation instructions. They aren't particularly |
| // meaningful to the user (being a mere implementation detail for lookup |
| // tables, etc.), and their type is problematic from a UI point of view. |
| // The subsequent instructions that dereference the pointer will be |
| // properly instrumented and show the (meaningful) retrieved value. |
| break; |
| case Type::TypeID::VectorTyID: |
| // Shows up in "insertelement" in raygen shader? |
| break; |
| case Type::TypeID::FP128TyID: |
| case Type::TypeID::LabelTyID: |
| case Type::TypeID::MetadataTyID: |
| case Type::TypeID::FunctionTyID: |
| case Type::TypeID::ArrayTyID: |
| case Type::TypeID::X86_FP80TyID: |
| case Type::TypeID::X86_MMXTyID: |
| case Type::TypeID::PPC_FP128TyID: |
| assert(false); |
| } |
| return std::nullopt; |
| } |
| |
| bool DxilDebugInstrumentation::runOnModule(Module &M) { |
| DxilModule &DM = M.GetOrCreateDxilModule(); |
| |
| // There is no point running this pass if it can't return its report: |
| if (OSOverride == nullptr) |
| return false; |
| |
| auto ShaderModel = DM.GetShaderModel(); |
| auto shaderKind = ShaderModel->GetKind(); |
| |
| bool modified = false; |
| if (shaderKind == DXIL::ShaderKind::Library) { |
| auto instrumentableFunctions = |
| PIXPassHelpers::GetAllInstrumentableFunctions(DM); |
| for (auto *F : instrumentableFunctions) { |
| if (RunOnFunction(M, DM, F)) { |
| modified = true; |
| } |
| } |
| } else { |
| llvm::Function *entryFunction = PIXPassHelpers::GetEntryFunction(DM); |
| modified = RunOnFunction(M, DM, entryFunction); |
| } |
| return modified; |
| } |
| |
| struct RecordTypeDatum { |
| DebugShaderModifierRecordType Type; |
| uint32_t PayloadSize; |
| const char *AsString; |
| }; |
| |
| static const RecordTypeDatum RecordTypeData[] = { |
| {DebugShaderModifierRecordTypeDXILStepRet, 0, "r"}, |
| {DebugShaderModifierRecordTypeDXILStepVoid, 0, "v"}, |
| {DebugShaderModifierRecordTypeDXILStepFloat, 4, "f"}, |
| {DebugShaderModifierRecordTypeDXILStepUint32, 4, "3"}, |
| {DebugShaderModifierRecordTypeDXILStepUint64, 8, "6"}, |
| {DebugShaderModifierRecordTypeDXILStepDouble, 8, "d"}}; |
| |
| std::optional<RecordTypeDatum const *> |
| FindDatum(DebugShaderModifierRecordType RecordType) { |
| for (auto const &datum : RecordTypeData) { |
| if (datum.Type == RecordType) { |
| return &datum; |
| } |
| } |
| return std::nullopt; |
| } |
| |
| uint32_t DxilDebugInstrumentation::CountBlockPayloadBytes( |
| std::vector<InstructionToInstrument> const &IsAndTs) { |
| uint32_t count = 0; |
| for (auto const &IandT : IsAndTs) { |
| auto datum = FindDatum(IandT.ValueType); |
| if (datum) |
| count += (*datum)->PayloadSize; |
| } |
| return count; |
| } |
| |
| const char *TypeString(InstructionAndType const &IandT) { |
| auto datum = FindDatum(IandT.Type); |
| if (datum) |
| return (*datum)->AsString; |
| assert(false); |
| return "v"; |
| } |
| |
| Instruction *FindFirstNonPhiInstruction(Instruction *I) { |
| while (llvm::isa<llvm::PHINode>(I)) |
| I = I->getNextNode(); |
| return I; |
| } |
| |
| // This function reports a textual representation of the format |
| // of the debug data that will be output by the instructions |
| // added by this pass. |
| // The string has one or more lines of the exemplary form |
| // Block#3:5,f,22,a;7,f,22,s,20;9,f,22,s,20;10,f,23,a;12,f,23,s,21; |
| // The integer after the Block# is the first instruction number in the |
| // block. |
| // Instructions are delimited by ; The fields within the instruction |
| // (delimited by ,) are, in order: |
| // -instruction ordinal |
| // -data type (r=ret, v=void, f=float, 3=int32, 6=int64, d=double) |
| // -scalar register number |
| // -alloca/scalar indicator: |
| // r == ret instruction |
| // a == scalar is being created and assigned a value, and that |
| // value is in the debug output. |
| // s == Existing scalar is being assigned via static alloca index. |
| // Index is appended to this instruction record. No |
| // corresponding data in the debug output. |
| // d == A dynamic index added to the static base index. Base index |
| // is appended to this record. The corresponding debug entry is |
| // the dynamic index into that alloca. |
| // v == A void terminator or other void-valued instruction. No |
| // corresponding data in the debug output. |
| // If indicator is "a", a string of the form [base+index] for the alloca |
| // store location. |
| // If indicator is "d", a single integer denoting the base for the alloca |
| // store. |
| DxilDebugInstrumentation::BlockInstrumentationData |
| DxilDebugInstrumentation::FindInstrumentableInstructionsInBlock( |
| BasicBlock &BB, OP *HlslOP, |
| llvm::SmallPtrSetImpl<Value *> const &RayQueryHandles) { |
| BlockInstrumentationData ret{}; |
| auto &Is = BB.getInstList(); |
| *OSOverride << "Block#"; |
| bool FoundFirstInstruction = false; |
| for (auto &Inst : Is) { |
| if (!FoundFirstInstruction) { |
| std::uint32_t InstNum; |
| if (pix_dxil::PixDxilInstNum::FromInst(&Inst, &InstNum)) { |
| *OSOverride << std::to_string(InstNum) << ":"; |
| ret.FirstInstructionOrdinalInBlock = InstNum; |
| FoundFirstInstruction = true; |
| } |
| } |
| auto IandT = addStepDebugEntry(nullptr, &Inst, RayQueryHandles); |
| if (IandT) { |
| InstructionToInstrument DebugOutputForThisInstruction{}; |
| DebugOutputForThisInstruction.ValueType = IandT->Type; |
| auto *InsertionPoint = FindFirstNonPhiInstruction(&Inst); |
| if (InsertionPoint->isTerminator() || llvm::isa<llvm::PHINode>(Inst)) |
| DebugOutputForThisInstruction |
| .InstructionBeforeWhichToAddInstrumentation = InsertionPoint; |
| else |
| DebugOutputForThisInstruction |
| .InstructionAfterWhichToAddInstrumentation = InsertionPoint; |
| |
| const char *IndexingToken = nullptr; |
| std::optional<std::string> RegisterOrStaticIndex; |
| if (IandT->Type == DebugShaderModifierRecordTypeDXILStepRet) { |
| IndexingToken = "r"; |
| } else if (IandT->Type == DebugShaderModifierRecordTypeDXILStepVoid) { |
| IndexingToken = "v"; // void instruction, no debug output required |
| } else if (IandT->AllocaWriteIndex != nullptr) { |
| if (ConstantInt *IndexAsConstant = |
| dyn_cast<ConstantInt>(IandT->AllocaWriteIndex)) { |
| RegisterOrStaticIndex = |
| std::to_string(IandT->AllocaBase) + "+" + |
| std::to_string(IndexAsConstant->getLimitedValue()); |
| IndexingToken = "s"; // static indexing, no debug output required |
| } else { |
| IndexingToken = "d"; // dynamic indexing |
| RegisterOrStaticIndex = std::to_string(IandT->AllocaBase); |
| DebugOutputForThisInstruction.ValueToWriteToDebugMemory = |
| IandT->AllocaWriteIndex; |
| } |
| } else { |
| IndexingToken = "a"; // meaning an SSA assignment |
| // todo: Can SSA Values be assigned a literal constant? |
| DebugOutputForThisInstruction.ValueToWriteToDebugMemory = IandT->Inst; |
| } |
| |
| *OSOverride << std::to_string(IandT->InstructionOrdinal) << "," |
| << TypeString(*IandT) << "," |
| << std::to_string(IandT->RegisterNumber) << "," |
| << IndexingToken; |
| if (RegisterOrStaticIndex) { |
| *OSOverride << "," << *RegisterOrStaticIndex; |
| } |
| if (IandT->ConstantAllocaStoreValue) { |
| *OSOverride << "," << std::to_string(*IandT->ConstantAllocaStoreValue); |
| } |
| *OSOverride << ";"; |
| if (DebugOutputForThisInstruction.ValueToWriteToDebugMemory) |
| ret.Instructions.push_back(std::move(DebugOutputForThisInstruction)); |
| } |
| } |
| *OSOverride << "\n"; |
| return ret; |
| } |
| |
| bool DxilDebugInstrumentation::RunOnFunction(Module &M, DxilModule &DM, |
| llvm::Function *function) { |
| DXIL::ShaderKind shaderKind = |
| PIXPassHelpers::GetFunctionShaderKind(DM, function); |
| |
| switch (shaderKind) { |
| case DXIL::ShaderKind::Amplification: |
| case DXIL::ShaderKind::Mesh: |
| case DXIL::ShaderKind::Vertex: |
| case DXIL::ShaderKind::Geometry: |
| case DXIL::ShaderKind::Pixel: |
| case DXIL::ShaderKind::Compute: |
| case DXIL::ShaderKind::RayGeneration: |
| case DXIL::ShaderKind::Hull: |
| case DXIL::ShaderKind::Domain: |
| case DXIL::ShaderKind::Intersection: |
| case DXIL::ShaderKind::AnyHit: |
| case DXIL::ShaderKind::ClosestHit: |
| case DXIL::ShaderKind::Miss: |
| break; |
| default: |
| return false; |
| } |
| llvm::SmallPtrSet<Value *, 16> RayQueryHandles; |
| PIXPassHelpers::FindRayQueryHandlesForFunction(function, RayQueryHandles); |
| |
| Instruction *firstInsertionPt = dxilutil::FirstNonAllocaInsertionPt(function); |
| IRBuilder<> Builder(firstInsertionPt); |
| |
| LLVMContext &Ctx = M.getContext(); |
| OP *HlslOP = DM.GetOP(); |
| |
| BuilderContext BC{M, DM, Ctx, HlslOP, Builder}; |
| |
| auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()]; |
| |
| // PIX binds two UAVs when running this instrumentation: one for raygen |
| // shaders and another for the hitgroups and miss shaders. Since PIX invokes |
| // this pass at the library level, which may contain examples of both types, |
| // PIX can't really specify which UAV index to use per-shader. This pass |
| // therefore just has to know this: |
| constexpr unsigned int RayGenUAVRegister = 0; |
| constexpr unsigned int HitGroupAndMissUAVRegister = 1; |
| unsigned int UAVRegisterId = RayGenUAVRegister; |
| switch (shaderKind) { |
| case DXIL::ShaderKind::ClosestHit: |
| case DXIL::ShaderKind::Intersection: |
| case DXIL::ShaderKind::AnyHit: |
| case DXIL::ShaderKind::Miss: |
| UAVRegisterId = HitGroupAndMissUAVRegister; |
| break; |
| } |
| |
| values.UAVHandle = PIXPassHelpers::CreateUAV(DM, Builder, UAVRegisterId, |
| "PIX_DebugUAV_Handle"); |
| |
| auto SystemValues = addRequiredSystemValues(BC, shaderKind); |
| addInvocationSelectionProlog(BC, SystemValues, shaderKind); |
| determineLimitANDAndInitializeCounter(BC); |
| addInvocationStartMarker(BC); |
| |
| // Instrument original instructions: |
| for (auto &BB : function->getBasicBlockList()) { |
| if (std::find(values.AddedBlocksToIgnoreForInstrumentation.begin(), |
| values.AddedBlocksToIgnoreForInstrumentation.end(), |
| &BB) == values.AddedBlocksToIgnoreForInstrumentation.end()) { |
| auto BlockInstrumentation = |
| FindInstrumentableInstructionsInBlock(BB, BC.HlslOP, RayQueryHandles); |
| if (BlockInstrumentation.FirstInstructionOrdinalInBlock < |
| m_FirstInstruction || |
| BlockInstrumentation.FirstInstructionOrdinalInBlock >= |
| m_LastInstruction) |
| continue; |
| uint32_t BlockPayloadBytes = |
| CountBlockPayloadBytes(BlockInstrumentation.Instructions); |
| // If the block has no instructions which require debug output, |
| // we will still write an empty block header at the end of that |
| // block (i.e. before the terminator) so that the instrumentation |
| // at least indicates that flow control went through the block. |
| Instruction *BlockInstrumentationStart = (BB).getTerminator(); |
| if (!BlockInstrumentation.Instructions.empty()) { |
| auto const &First = BlockInstrumentation.Instructions[0]; |
| if (First.InstructionAfterWhichToAddInstrumentation != nullptr) |
| BlockInstrumentationStart = |
| First.InstructionAfterWhichToAddInstrumentation; |
| else if (First.InstructionBeforeWhichToAddInstrumentation != nullptr) |
| BlockInstrumentationStart = |
| First.InstructionBeforeWhichToAddInstrumentation; |
| else { |
| assert(false); |
| continue; |
| } |
| } |
| IRBuilder<> Builder(BlockInstrumentationStart); |
| BuilderContext BCForBlock{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder}; |
| |
| DebugShaderModifierRecordDXILBlock step = {}; |
| auto FullRecordSize = |
| static_cast<uint32_t>(sizeof(step) + BlockPayloadBytes); |
| if (FullRecordSize >= (m_UAVSize / 4) - 1) { |
| *OSOverride << "StaticOverflow:" << std::to_string(FullRecordSize) |
| << "\n"; |
| break; |
| } |
| reserveDebugEntrySpace(BCForBlock, FullRecordSize); |
| step.Header.Details.CountOfInstructions = |
| static_cast<uint16_t>(BlockInstrumentation.Instructions.size()); |
| step.Header.Details.Type = |
| static_cast<uint8_t>(DebugShaderModifierRecordTypeDXILStepBlock); |
| addDebugEntryValue(BCForBlock, |
| BCForBlock.HlslOP->GetU32Const(step.Header.u32Header)); |
| addDebugEntryValue(BCForBlock, values.InvocationId); |
| addDebugEntryValue( |
| BCForBlock, BCForBlock.HlslOP->GetU32Const( |
| BlockInstrumentation.FirstInstructionOrdinalInBlock)); |
| for (auto &Inst : BlockInstrumentation.Instructions) { |
| Instruction *BuilderInstruction; |
| if (Inst.InstructionAfterWhichToAddInstrumentation != nullptr) |
| BuilderInstruction = |
| Inst.InstructionAfterWhichToAddInstrumentation->getNextNode(); |
| else if (Inst.InstructionBeforeWhichToAddInstrumentation != nullptr) |
| BuilderInstruction = Inst.InstructionBeforeWhichToAddInstrumentation; |
| else { |
| assert(false); |
| continue; |
| } |
| IRBuilder<> Builder(BuilderInstruction); |
| BuilderContext BC2{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder}; |
| addDebugEntryValue(BC2, Inst.ValueToWriteToDebugMemory); |
| } |
| } |
| } |
| |
| DM.ReEmitDxilResources(); |
| |
| return true; |
| } |
| |
| char DxilDebugInstrumentation::ID = 0; |
| |
| ModulePass *llvm::createDxilDebugInstrumentationPass() { |
| return new DxilDebugInstrumentation(); |
| } |
| |
| INITIALIZE_PASS(DxilDebugInstrumentation, "hlsl-dxil-debug-instrumentation", |
| "HLSL DXIL debug instrumentation for PIX", false, false) |