blob: 66e16c241513483c5bf634416b613a3f4c0fe8a3 [file] [log] [blame] [edit]
///////////////////////////////////////////////////////////////////////////////
// //
// DxilDebugInstrumentation.cpp //
// Copyright (C) Microsoft Corporation. All rights reserved. //
// This file is distributed under the University of Illinois Open Source //
// License. See LICENSE.TXT for details. //
// //
// Adds instrumentation that enables shader debugging in PIX //
// //
///////////////////////////////////////////////////////////////////////////////
#include <optional>
#include <vector>
#include "dxc/DXIL/DxilFunctionProps.h"
#include "dxc/DXIL/DxilModule.h"
#include "dxc/DXIL/DxilOperations.h"
#include "dxc/DXIL/DxilUtil.h"
#include "dxc/DxilPIXPasses/DxilPIXPasses.h"
#include "dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h"
#include "dxc/HLSL/DxilGenerationPass.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
#include "PixPassHelpers.h"
using namespace llvm;
using namespace hlsl;
// Overview of instrumentation:
//
// In summary, instructions are added that cause a "trace" of the execution of
// the shader to be written out to a UAV. This trace is then used by a debugger
// application to provide a postmortem debugging experience that reconstructs
// the execution history of the shader. The caller specifies the power-of-two
// size of the UAV.
//
// The instrumentation is added per basic block, and each block will then write
// a contiguous sequence of values into the UAV.
//
// The trace is only required for particular shader instances of interest, and
// a branchless mechanism is used to write the trace either to an incrementing
// location within the UAV, or to a "dumping ground" area in the top half of the
// UAV if the instance is not of interest.
//
// In addition, each half of the UAV is further subdivided: the first quarter is
// the area in which blocks are permitted to start writing their sequence, and
// that sequence is constrained to be no longer than the size of the second
// quarter. This allows us to limit writes to the appropriate half of the UAV
// via a single AND at the beginning of the basic block. An additional OR
// provides the offset, either 0 for threads-of-interest, or UAVSize/2 for
// not-of-interest.
//
// Threads determine where to start writing their data by incrementing a DWORD
// that lives at the very top of that thread's half of the UAV. This is done
// because several threads may satisfy the selection criteria (e.g. a pixel
// shader may be invoked several times for a given pixel coordinate if the model
// has overlapping triangles).
//
// A picture of the UAV layout:
// <--------------power-of-two-size-of-UAV---------------->
// [1 ][2 ][3 ][4 ]
// <------A-----> ^ ^
// B C
// <------D------>
//
// A: the size of the AND for interesting writes. Their payloads extend
// beyond this into area 2, but those payloads are limited to be small
// enough (1/4 UAV size -1) that they don't overwrite B.
// B: The interesting thread's counter.
// C: The uninteresting thread's counter.
// D: Size of the AND for uninteresting threads (same value as A)
//
// The following modifications are made by this pass:
//
// First, instructions are added to the top of the entry point function that
// implement the following:
// - Examine the input variables that define the instance of the shader that is
// running. This will be SV_Position for pixel shaders, SV_Vertex+SV_Instance
// for vertex shaders, thread id for compute shaders etc. If these system
// values need to be added to the shader, then they are also added to the
// input signature, if appropriate.
// - Compare the above variables with the instance of interest defined by the
// invoker of this pass. If equal, create an OR value of zero that will
// not affect the block's starting write offset. If not equal, the OR will
// move the writes into the second half of the UAV.
// - Calculate an "instance identifier". Even with the above instance
// identification, several invocations may end up matching the selection
// criteria. More on this below.
//
// As mentioned, a counter/offset is maintained at the top of the thread's
// half of the UAV. The very first value of this counter that
// is encountered by each invocation is used as the "instance identifier"
// mentioned above. That instance identifier is written out with each packet,
// since many threads executing in parallel will emit interleaved packets,
// and the debugger application uses the identifiers to gather packets from each
// separate invocation together.
//
// In addition to the above, this pass creates a text precis of the structure
// being written out for each basic block. This precis is passed back to the
// caller, and can be used to parse the UAV output later. The precis will
// contain notes about void-type instructions, which won't write anything to the
// UAV, allowing the caller to reconstruct those instructions.
// Some care has to be taken about whether to emit UAV writes after the
// corresponding instruction or before. Terminators must emit their UAV data
// before the terminator itself, of course. Phi instructions get special
// treatment also: their instrumentation has to come after (since phis must be
// the first instructions in the block), but also the instrumentation must
// execute in the same order as the precis specifies, or the caller will mix
// up the phi values. We achieve this by saying that phi instrumentation must
// come before the first non-phi instruction in the block.
// Some blocks will have all-void instructions, so that no debugging
// data is emitted at all. These blocks still produce a precis, and still
// need to be noticed during execution, so an empty block header is emitted
// into the UAV.
//
// Error conditions:
// Overflow of the debug output from the interesting threads will start to
// overwrite their own area of the UAV (after the AND limits those writes
// to the lower half of the UAV (thus, by the way, avoiding overwriting
// their counter value)). The caller must check the counter value after
// the debugging run is complete to see if this happened, and if so, increase
// the UAV size and try again.
// Uninteresting threads use an AND value that limits their writes to the
// upper half of the UAV and can be entirely ignored by the caller.
// Since a sufficiently-large block is guaranteed to overflow the UAV,
// the precis-creation can exit early and report this "static" overflow
// condition to the caller.
// In all overflow cases, the caller is expected to try to instrument again,
// with a larger UAV.
// These definitions echo those in the debugger application's
// debugshaderrecord.h file
enum DebugShaderModifierRecordType {
DebugShaderModifierRecordTypeInvocationStartMarker,
DebugShaderModifierRecordTypeStep,
DebugShaderModifierRecordTypeEvent,
DebugShaderModifierRecordTypeInputRegister,
DebugShaderModifierRecordTypeReadRegister,
DebugShaderModifierRecordTypeWrittenRegister,
DebugShaderModifierRecordTypeRegisterRelativeIndex0,
DebugShaderModifierRecordTypeRegisterRelativeIndex1,
DebugShaderModifierRecordTypeRegisterRelativeIndex2,
// Note that everything above this line is no longer used, but is kept
// here in order to keep this file more in-sync with the debugger source.
// (As of this writing, the debugger still supports older versions of this
// pass which produced finer-grained debug packets.)
DebugShaderModifierRecordTypeDXILStepBlock = 249,
DebugShaderModifierRecordTypeDXILStepRet = 250,
DebugShaderModifierRecordTypeDXILStepVoid = 251,
DebugShaderModifierRecordTypeDXILStepFloat = 252,
DebugShaderModifierRecordTypeDXILStepUint32 = 253,
DebugShaderModifierRecordTypeDXILStepUint64 = 254,
DebugShaderModifierRecordTypeDXILStepDouble = 255,
};
// These structs echo those in the debugger application's debugshaderrecord.h
// file, but are recapitulated here because the originals use unnamed unions
// which are disallowed by DXCompiler's build.
//
#pragma pack(push, 4)
struct DebugShaderModifierRecordHeader {
union {
struct {
uint32_t SizeDwords : 4;
uint32_t Flags : 4;
uint32_t Type : 8;
uint32_t HeaderPayload : 16;
} Details;
uint32_t u32Header;
} Header;
uint32_t UID;
};
struct DebugShaderModifierRecordDXILStepBase {
union {
struct {
uint32_t SizeDwords : 4;
uint32_t Flags : 4;
uint32_t Type : 8;
uint32_t Opcode : 16;
} Details;
uint32_t u32Header;
} Header;
uint32_t UID;
uint32_t InstructionOffset;
};
struct DebugShaderModifierRecordDXILBlock {
union {
struct {
uint32_t NotUsed0 : 4;
uint32_t NotUsed1 : 4;
uint32_t Type : 8;
uint32_t CountOfInstructions : 16;
} Details;
uint32_t u32Header;
} Header;
uint32_t UID;
uint32_t FirstInstructionOrdinal;
};
template <typename ReturnType>
struct DebugShaderModifierRecordDXILStep
: public DebugShaderModifierRecordDXILStepBase {
ReturnType ReturnValue;
union {
struct {
uint32_t ValueOrdinalBase : 16;
uint32_t ValueOrdinalIndex : 16;
} Details;
uint32_t u32ValueOrdinal;
} ValueOrdinal;
};
template <>
struct DebugShaderModifierRecordDXILStep<void>
: public DebugShaderModifierRecordDXILStepBase {};
#pragma pack(pop)
uint32_t
DebugShaderModifierRecordPayloadSizeDwords(size_t recordTotalSizeBytes) {
return ((recordTotalSizeBytes - sizeof(DebugShaderModifierRecordHeader)) /
sizeof(uint32_t));
}
struct InstructionAndType {
Instruction *Inst;
std::uint32_t InstructionOrdinal;
DebugShaderModifierRecordType Type;
std::uint32_t RegisterNumber;
std::uint32_t AllocaBase;
Value *AllocaWriteIndex = nullptr;
std::optional<uint64_t> ConstantAllocaStoreValue;
};
class DxilDebugInstrumentation : public ModulePass {
private:
union ParametersAllTogether {
unsigned Parameters[3];
struct PixelShaderParameters {
unsigned X;
unsigned Y;
} PixelShader;
struct VertexShaderParameters {
unsigned VertexId;
unsigned InstanceId;
} VertexShader;
struct ComputeShaderParameters {
unsigned ThreadIdX;
unsigned ThreadIdY;
unsigned ThreadIdZ;
} ComputeShader;
struct GeometryShaderParameters {
unsigned PrimitiveId;
unsigned InstanceId;
} GeometryShader;
struct HullShaderParameters {
unsigned PrimitiveId;
unsigned ControlPointId;
} HullShader;
struct DomainShaderParameters {
unsigned PrimitiveId;
} DomainShader;
} m_Parameters = {{0, 0, 0}};
union SystemValueIndices {
struct PixelShaderParameters {
unsigned Position;
} PixelShader;
struct VertexShaderParameters {
unsigned VertexId;
unsigned InstanceId;
} VertexShader;
};
unsigned m_FirstInstruction = 0;
unsigned m_LastInstruction = static_cast<unsigned>(-1);
uint64_t m_UAVSize = 1024 * 1024;
struct PerFunctionValues {
CallInst *UAVHandle = nullptr;
Instruction *CounterOffset = nullptr;
Value *InvocationId = nullptr;
// Together these two values allow branchless writing to the UAV. An
// invocation of the shader is either of interest or not (e.g. it writes to
// the pixel the user selected for debugging or it doesn't). If not of
// interest, debugging output will still occur, but it will be relegated to
// the top half of the UAV. Invocations of interest, by contrast,
// will be written to the UAV at sequentially increasing offsets.
Value *OffsetMask = nullptr;
Instruction *OffsetOr = nullptr;
Value *SelectionCriterion = nullptr;
Value *CurrentIndex = nullptr;
std::vector<BasicBlock *> AddedBlocksToIgnoreForInstrumentation;
};
std::map<llvm::Function *, PerFunctionValues> m_FunctionToValues;
struct BuilderContext {
Module &M;
DxilModule &DM;
LLVMContext &Ctx;
OP *HlslOP;
IRBuilder<> &Builder;
};
uint32_t m_RemainingReservedSpaceInBytes = 0;
public:
static char ID; // Pass identification, replacement for typeid
explicit DxilDebugInstrumentation() : ModulePass(ID) {}
StringRef getPassName() const override {
return "Add PIX debug instrumentation";
}
void applyOptions(PassOptions O) override;
bool runOnModule(Module &M) override;
bool RunOnFunction(Module &M, DxilModule &DM, llvm::Function *function);
private:
SystemValueIndices addRequiredSystemValues(BuilderContext &BC,
DXIL::ShaderKind shaderKind);
void addInvocationSelectionProlog(BuilderContext &BC,
SystemValueIndices SVIndices,
DXIL::ShaderKind shaderKind);
Value *addPixelShaderProlog(BuilderContext &BC, SystemValueIndices SVIndices);
Value *addGeometryShaderProlog(BuilderContext &BC);
Value *addDispatchedShaderProlog(BuilderContext &BC);
Value *addRaygenShaderProlog(BuilderContext &BC);
Value *addVertexShaderProlog(BuilderContext &BC,
SystemValueIndices SVIndices);
Value *addHullhaderProlog(BuilderContext &BC);
Value *addComparePrimitiveIdProlog(BuilderContext &BC, unsigned SVIndices);
uint32_t addDebugEntryValue(BuilderContext &BC, Value *TheValue);
void addInvocationStartMarker(BuilderContext &BC);
void determineLimitANDAndInitializeCounter(BuilderContext &BC);
void reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInDwords);
std::optional<InstructionAndType> addStoreStepDebugEntry(BuilderContext *BC,
StoreInst *Inst);
std::optional<InstructionAndType>
addStepDebugEntry(BuilderContext *BC, Instruction *Inst,
llvm::SmallPtrSetImpl<Value *> const &RayQueryHandles);
std::optional<DebugShaderModifierRecordType>
addStepDebugEntryValue(BuilderContext *BC, std::uint32_t InstNum, Value *V,
std::uint32_t ValueOrdinal, Value *ValueOrdinalIndex);
uint32_t UAVDumpingGroundOffset();
template <typename ReturnType>
void addStepEntryForType(DebugShaderModifierRecordType RecordType,
BuilderContext &BC, std::uint32_t InstNum, Value *V,
std::uint32_t ValueOrdinal,
Value *ValueOrdinalIndex);
struct InstructionToInstrument {
Value *ValueToWriteToDebugMemory;
DebugShaderModifierRecordType ValueType;
Instruction *InstructionAfterWhichToAddInstrumentation;
Instruction *InstructionBeforeWhichToAddInstrumentation;
};
struct BlockInstrumentationData {
uint32_t FirstInstructionOrdinalInBlock;
std::vector<InstructionToInstrument> Instructions;
};
BlockInstrumentationData FindInstrumentableInstructionsInBlock(
BasicBlock &BB, OP *HlslOP,
llvm::SmallPtrSetImpl<Value *> const &RayQueryHandles);
uint32_t
CountBlockPayloadBytes(std::vector<InstructionToInstrument> const &IsAndTs);
};
void DxilDebugInstrumentation::applyOptions(PassOptions O) {
GetPassOptionUnsigned(O, "FirstInstruction", &m_FirstInstruction, 0);
GetPassOptionUnsigned(O, "LastInstruction", &m_LastInstruction,
static_cast<unsigned>(-1));
GetPassOptionUnsigned(O, "parameter0", &m_Parameters.Parameters[0], 0);
GetPassOptionUnsigned(O, "parameter1", &m_Parameters.Parameters[1], 0);
GetPassOptionUnsigned(O, "parameter2", &m_Parameters.Parameters[2], 0);
GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024);
}
uint32_t DxilDebugInstrumentation::UAVDumpingGroundOffset() {
return static_cast<uint32_t>(m_UAVSize / 2);
}
static unsigned FindOrAddInputSignatureElement(
hlsl::DxilSignature &InputSignature, const char *name,
DXIL::SigPointKind sigPointKind, hlsl::DXIL::SemanticKind semanticKind) {
auto &InputElements = InputSignature.GetElements();
auto ExistingElement =
std::find_if(InputElements.begin(), InputElements.end(),
[&](const std::unique_ptr<DxilSignatureElement> &Element) {
return Element->GetSemantic()->GetKind() == semanticKind;
});
if (ExistingElement == InputElements.end()) {
auto AddedElement = llvm::make_unique<DxilSignatureElement>(sigPointKind);
AddedElement->Initialize(name, hlsl::CompType::getF32(),
hlsl::DXIL::InterpolationMode::Undefined, 1, 1);
AddedElement->AppendSemanticIndex(0);
AddedElement->SetSigPointKind(sigPointKind);
AddedElement->SetKind(semanticKind);
auto index = InputSignature.AppendElement(std::move(AddedElement));
return InputElements[index]->GetID();
} else {
return ExistingElement->get()->GetID();
}
}
DxilDebugInstrumentation::SystemValueIndices
DxilDebugInstrumentation::addRequiredSystemValues(BuilderContext &BC,
DXIL::ShaderKind shaderKind) {
SystemValueIndices SVIndices{};
switch (shaderKind) {
case DXIL::ShaderKind::Amplification:
case DXIL::ShaderKind::Mesh:
case DXIL::ShaderKind::Compute:
case DXIL::ShaderKind::RayGeneration:
case DXIL::ShaderKind::Intersection:
case DXIL::ShaderKind::AnyHit:
case DXIL::ShaderKind::ClosestHit:
case DXIL::ShaderKind::Miss:
// Dispatch* thread Id is not in the input signature
break;
case DXIL::ShaderKind::Vertex: {
hlsl::DxilSignature &InputSignature = BC.DM.GetInputSignature();
SVIndices.VertexShader.VertexId = FindOrAddInputSignatureElement(
InputSignature, "VertexId", DXIL::SigPointKind::VSIn,
hlsl::DXIL::SemanticKind::VertexID);
SVIndices.VertexShader.InstanceId = FindOrAddInputSignatureElement(
InputSignature, "InstanceId", DXIL::SigPointKind::VSIn,
hlsl::DXIL::SemanticKind::InstanceID);
} break;
case DXIL::ShaderKind::Geometry:
case DXIL::ShaderKind::Hull:
case DXIL::ShaderKind::Domain:
// GS, HS, DS Primitive id, HS control point id, and GS Instance id are not
// in the input signature
break;
case DXIL::ShaderKind::Pixel: {
hlsl::DxilSignature &InputSignature = BC.DM.GetInputSignature();
auto &InputElements = InputSignature.GetElements();
auto Existing_SV_Position =
std::find_if(InputElements.begin(), InputElements.end(),
[](const std::unique_ptr<DxilSignatureElement> &Element) {
return Element->GetSemantic()->GetKind() ==
hlsl::DXIL::SemanticKind::Position;
});
// SV_Position, if present, has to have full mask, so we needn't worry
// about the shader having selected components that don't include x or y.
// If not present, we add it.
if (Existing_SV_Position == InputElements.end()) {
auto Added_SV_Position =
llvm::make_unique<DxilSignatureElement>(DXIL::SigPointKind::PSIn);
Added_SV_Position->Initialize("Position", hlsl::CompType::getF32(),
hlsl::DXIL::InterpolationMode::Linear, 1,
4);
Added_SV_Position->AppendSemanticIndex(0);
Added_SV_Position->SetSigPointKind(DXIL::SigPointKind::PSIn);
Added_SV_Position->SetKind(hlsl::DXIL::SemanticKind::Position);
auto index = InputSignature.AppendElement(std::move(Added_SV_Position));
SVIndices.PixelShader.Position = InputElements[index]->GetID();
} else {
SVIndices.PixelShader.Position = Existing_SV_Position->get()->GetID();
}
} break;
default:
assert(false); // guaranteed by runOnModule
}
return SVIndices;
}
Value *DxilDebugInstrumentation::addDispatchedShaderProlog(BuilderContext &BC) {
Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
Constant *One32Arg = BC.HlslOP->GetU32Const(1);
Constant *Two32Arg = BC.HlslOP->GetU32Const(2);
auto ThreadIdFunc =
BC.HlslOP->GetOpFunc(DXIL::OpCode::ThreadId, Type::getInt32Ty(BC.Ctx));
Constant *Opcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::ThreadId);
auto ThreadIdX =
BC.Builder.CreateCall(ThreadIdFunc, {Opcode, Zero32Arg}, "ThreadIdX");
auto ThreadIdY =
BC.Builder.CreateCall(ThreadIdFunc, {Opcode, One32Arg}, "ThreadIdY");
auto ThreadIdZ =
BC.Builder.CreateCall(ThreadIdFunc, {Opcode, Two32Arg}, "ThreadIdZ");
// Compare to expected thread ID
auto CompareToX = BC.Builder.CreateICmpEQ(
ThreadIdX, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdX),
"CompareToThreadIdX");
auto CompareToY = BC.Builder.CreateICmpEQ(
ThreadIdY, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdY),
"CompareToThreadIdY");
auto CompareToZ = BC.Builder.CreateICmpEQ(
ThreadIdZ, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdZ),
"CompareToThreadIdZ");
auto CompareXAndY =
BC.Builder.CreateAnd(CompareToX, CompareToY, "CompareXAndY");
auto CompareAll =
BC.Builder.CreateAnd(CompareXAndY, CompareToZ, "CompareAll");
return CompareAll;
}
Value *DxilDebugInstrumentation::addRaygenShaderProlog(BuilderContext &BC) {
auto DispatchRaysIndexOpFunc = BC.HlslOP->GetOpFunc(
DXIL::OpCode::DispatchRaysIndex, Type::getInt32Ty(BC.Ctx));
Constant *DispatchRaysIndexOpcode =
BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::DispatchRaysIndex);
auto RayX = BC.Builder.CreateCall(
DispatchRaysIndexOpFunc,
{DispatchRaysIndexOpcode, BC.HlslOP->GetI8Const(0)}, "RayX");
auto RayY = BC.Builder.CreateCall(
DispatchRaysIndexOpFunc,
{DispatchRaysIndexOpcode, BC.HlslOP->GetI8Const(1)}, "RayY");
auto RayZ = BC.Builder.CreateCall(
DispatchRaysIndexOpFunc,
{DispatchRaysIndexOpcode, BC.HlslOP->GetI8Const(2)}, "RayZ");
auto CompareToX = BC.Builder.CreateICmpEQ(
RayX, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdX),
"CompareToThreadIdX");
auto CompareToY = BC.Builder.CreateICmpEQ(
RayY, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdY),
"CompareToThreadIdY");
auto CompareToZ = BC.Builder.CreateICmpEQ(
RayZ, BC.HlslOP->GetU32Const(m_Parameters.ComputeShader.ThreadIdZ),
"CompareToThreadIdZ");
auto CompareXAndY =
BC.Builder.CreateAnd(CompareToX, CompareToY, "CompareXAndY");
auto CompareAll =
BC.Builder.CreateAnd(CompareXAndY, CompareToZ, "CompareAll");
return CompareAll;
}
Value *
DxilDebugInstrumentation::addVertexShaderProlog(BuilderContext &BC,
SystemValueIndices SVIndices) {
Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
Constant *Zero8Arg = BC.HlslOP->GetI8Const(0);
UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
auto LoadInputOpFunc =
BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getInt32Ty(BC.Ctx));
Constant *LoadInputOpcode =
BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
Constant *SV_Vert_ID =
BC.HlslOP->GetU32Const(SVIndices.VertexShader.VertexId);
auto VertId =
BC.Builder.CreateCall(LoadInputOpFunc,
{LoadInputOpcode, SV_Vert_ID, Zero32Arg /*row*/,
Zero8Arg /*column*/, UndefArg},
"VertId");
Constant *SV_Instance_ID =
BC.HlslOP->GetU32Const(SVIndices.VertexShader.InstanceId);
auto InstanceId =
BC.Builder.CreateCall(LoadInputOpFunc,
{LoadInputOpcode, SV_Instance_ID, Zero32Arg /*row*/,
Zero8Arg /*column*/, UndefArg},
"InstanceId");
// Compare to expected vertex ID and instance ID
auto CompareToVert = BC.Builder.CreateICmpEQ(
VertId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.VertexId),
"CompareToVertId");
auto CompareToInstance = BC.Builder.CreateICmpEQ(
InstanceId, BC.HlslOP->GetU32Const(m_Parameters.VertexShader.InstanceId),
"CompareToInstanceId");
auto CompareBoth =
BC.Builder.CreateAnd(CompareToVert, CompareToInstance, "CompareBoth");
return CompareBoth;
}
Value *DxilDebugInstrumentation::addHullhaderProlog(BuilderContext &BC) {
auto LoadControlPointFunction = BC.HlslOP->GetOpFunc(
DXIL::OpCode::OutputControlPointID, Type::getInt32Ty(BC.Ctx));
Constant *LoadControlPointOpcode =
BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::OutputControlPointID);
auto ControlPointId = BC.Builder.CreateCall(
LoadControlPointFunction, {LoadControlPointOpcode}, "ControlPointId");
auto *CompareToPrimId =
addComparePrimitiveIdProlog(BC, m_Parameters.HullShader.PrimitiveId);
auto CompareToControlPoint = BC.Builder.CreateICmpEQ(
ControlPointId,
BC.HlslOP->GetU32Const(m_Parameters.HullShader.ControlPointId),
"CompareToControlPointId");
auto CompareBoth = BC.Builder.CreateAnd(CompareToControlPoint,
CompareToPrimId, "CompareBoth");
return CompareBoth;
}
Value *DxilDebugInstrumentation::addComparePrimitiveIdProlog(BuilderContext &BC,
unsigned primId) {
auto PrimitiveIdFunction =
BC.HlslOP->GetOpFunc(DXIL::OpCode::PrimitiveID, Type::getInt32Ty(BC.Ctx));
Constant *PrimitiveIdOpcode =
BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::PrimitiveID);
auto PrimId =
BC.Builder.CreateCall(PrimitiveIdFunction, {PrimitiveIdOpcode}, "PrimId");
return BC.Builder.CreateICmpEQ(PrimId, BC.HlslOP->GetU32Const(primId),
"CompareToPrimId");
}
Value *DxilDebugInstrumentation::addGeometryShaderProlog(BuilderContext &BC) {
auto CompareToPrim =
addComparePrimitiveIdProlog(BC, m_Parameters.GeometryShader.PrimitiveId);
if (BC.DM.GetGSInstanceCount() <= 1) {
return CompareToPrim;
}
auto GSInstanceIdOpFunc = BC.HlslOP->GetOpFunc(DXIL::OpCode::GSInstanceID,
Type::getInt32Ty(BC.Ctx));
Constant *GSInstanceIdOpcode =
BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::GSInstanceID);
auto GSInstanceId = BC.Builder.CreateCall(
GSInstanceIdOpFunc, {GSInstanceIdOpcode}, "GSInstanceId");
// Compare to expected vertex ID and instance ID
auto CompareToInstance = BC.Builder.CreateICmpEQ(
GSInstanceId,
BC.HlslOP->GetU32Const(m_Parameters.GeometryShader.InstanceId),
"CompareToInstanceId");
auto CompareBoth =
BC.Builder.CreateAnd(CompareToPrim, CompareToInstance, "CompareBoth");
return CompareBoth;
}
Value *
DxilDebugInstrumentation::addPixelShaderProlog(BuilderContext &BC,
SystemValueIndices SVIndices) {
Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
Constant *Zero8Arg = BC.HlslOP->GetI8Const(0);
Constant *One8Arg = BC.HlslOP->GetI8Const(1);
UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
// Convert SV_POSITION to UINT
Value *XAsInt;
Value *YAsInt;
{
auto LoadInputOpFunc =
BC.HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getFloatTy(BC.Ctx));
Constant *LoadInputOpcode =
BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput);
Constant *SV_Pos_ID =
BC.HlslOP->GetU32Const(SVIndices.PixelShader.Position);
auto XPos =
BC.Builder.CreateCall(LoadInputOpFunc,
{LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/,
Zero8Arg /*column*/, UndefArg},
"XPos");
auto YPos =
BC.Builder.CreateCall(LoadInputOpFunc,
{LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/,
One8Arg /*column*/, UndefArg},
"YPos");
XAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, XPos,
Type::getInt32Ty(BC.Ctx), "XIndex");
YAsInt = BC.Builder.CreateCast(Instruction::CastOps::FPToUI, YPos,
Type::getInt32Ty(BC.Ctx), "YIndex");
}
// Compare to expected pixel position and primitive ID
auto CompareToX = BC.Builder.CreateICmpEQ(
XAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.X), "CompareToX");
auto CompareToY = BC.Builder.CreateICmpEQ(
YAsInt, BC.HlslOP->GetU32Const(m_Parameters.PixelShader.Y), "CompareToY");
auto ComparePos = BC.Builder.CreateAnd(CompareToX, CompareToY, "ComparePos");
return ComparePos;
}
void DxilDebugInstrumentation::addInvocationSelectionProlog(
BuilderContext &BC, SystemValueIndices SVIndices,
DXIL::ShaderKind shaderKind) {
Value *ParameterTestResult = nullptr;
switch (shaderKind) {
case DXIL::ShaderKind::RayGeneration:
case DXIL::ShaderKind::ClosestHit:
case DXIL::ShaderKind::Intersection:
case DXIL::ShaderKind::AnyHit:
case DXIL::ShaderKind::Miss:
ParameterTestResult = addRaygenShaderProlog(BC);
break;
case DXIL::ShaderKind::Compute:
case DXIL::ShaderKind::Amplification:
case DXIL::ShaderKind::Mesh:
ParameterTestResult = addDispatchedShaderProlog(BC);
break;
case DXIL::ShaderKind::Geometry:
ParameterTestResult = addGeometryShaderProlog(BC);
break;
case DXIL::ShaderKind::Vertex:
ParameterTestResult = addVertexShaderProlog(BC, SVIndices);
break;
case DXIL::ShaderKind::Hull:
ParameterTestResult = addHullhaderProlog(BC);
break;
case DXIL::ShaderKind::Domain:
ParameterTestResult =
addComparePrimitiveIdProlog(BC, m_Parameters.DomainShader.PrimitiveId);
break;
case DXIL::ShaderKind::Pixel:
ParameterTestResult = addPixelShaderProlog(BC, SVIndices);
break;
default:
assert(false); // guaranteed by runOnModule
}
auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()];
values.SelectionCriterion = ParameterTestResult;
}
void DxilDebugInstrumentation::determineLimitANDAndInitializeCounter(
BuilderContext &BC) {
auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()];
// Split the block at the current insertion point. Insert a conditional
// branch that will invoke one of two new blocks depending on if this
// is a thread-of-interest. The two different classes of thread will
// then be given different limiting AND values within these new
// blocks.
BasicBlock *RestOfMainBlock = BC.Builder.GetInsertBlock()->splitBasicBlock(
*BC.Builder.GetInsertPoint());
// Up to this split point is a new block that we don't need to instrument:
values.AddedBlocksToIgnoreForInstrumentation.push_back(
BC.Builder.GetInsertBlock());
auto *InterestingInvocationBlock = BasicBlock::Create(
BC.Ctx, "PIXInterestingBlock", BC.Builder.GetInsertBlock()->getParent(),
RestOfMainBlock);
values.AddedBlocksToIgnoreForInstrumentation.push_back(
InterestingInvocationBlock);
IRBuilder<> BuilderForInteresting(InterestingInvocationBlock);
BuilderForInteresting.CreateBr(RestOfMainBlock);
auto *NonInterestingInvocationBlock = BasicBlock::Create(
BC.Ctx, "PIXNonInterestingBlock",
BC.Builder.GetInsertBlock()->getParent(), RestOfMainBlock);
values.AddedBlocksToIgnoreForInstrumentation.push_back(
NonInterestingInvocationBlock);
IRBuilder<> BuilderForNonInteresting(NonInterestingInvocationBlock);
BuilderForNonInteresting.CreateBr(RestOfMainBlock);
// Connect these new blocks as necessary:
BC.Builder.SetInsertPoint(BC.Builder.GetInsertBlock()->getTerminator());
BC.Builder.CreateCondBr(values.SelectionCriterion, InterestingInvocationBlock,
NonInterestingInvocationBlock);
BC.Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
values.OffsetMask = BC.HlslOP->GetU32Const(m_UAVSize / 4 - 1);
// Now add a phi that selects between two constant OR values based on
// which branch the thread followed above (interesting or not).
// The OR will either place the output in the lower half or the upper
// half of the UAV.
BC.Builder.SetInsertPoint(RestOfMainBlock->getFirstInsertionPt());
auto *PHIForOr =
BC.Builder.CreatePHI(Type::getInt32Ty(BC.Ctx), 2, "PIXOffsetOr");
PHIForOr->addIncoming(BC.HlslOP->GetU32Const(0), InterestingInvocationBlock);
PHIForOr->addIncoming(BC.HlslOP->GetU32Const(m_UAVSize / 2),
NonInterestingInvocationBlock);
values.OffsetOr = PHIForOr;
auto *PHIForCounterOffset =
BC.Builder.CreatePHI(Type::getInt32Ty(BC.Ctx), 2, "PIXCounterLocation");
const uint32_t InterestingCounterOffset =
static_cast<uint32_t>(m_UAVSize / 2 - 1);
PHIForCounterOffset->addIncoming(
BC.HlslOP->GetU32Const(InterestingCounterOffset),
InterestingInvocationBlock);
const uint32_t UninterestingCounterOffsetValue =
static_cast<uint32_t>(m_UAVSize - 1);
PHIForCounterOffset->addIncoming(
BC.HlslOP->GetU32Const(UninterestingCounterOffsetValue),
NonInterestingInvocationBlock);
values.CounterOffset = PHIForCounterOffset;
// These are reported to the caller so there are fewer assumptions made by the
// caller about these internal details:
*OSOverride << "InterestingCounterOffset:"
<< std::to_string(InterestingCounterOffset) << "\n";
*OSOverride << "OverflowThreshold:" << std::to_string(m_UAVSize / 4 - 1)
<< "\n";
}
void DxilDebugInstrumentation::reserveDebugEntrySpace(BuilderContext &BC,
uint32_t SpaceInBytes) {
auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()];
assert(values.CurrentIndex == nullptr);
assert(m_RemainingReservedSpaceInBytes == 0);
m_RemainingReservedSpaceInBytes = SpaceInBytes;
// Insert the UAV increment instruction:
Function *AtomicOpFunc =
BC.HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(BC.Ctx));
Constant *AtomicBinOpcode =
BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
Constant *AtomicAdd =
BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes);
auto PreviousValue = BC.Builder.CreateCall(
AtomicOpFunc,
{
AtomicBinOpcode, // i32, ; opcode
values.UAVHandle, // %dx.types.Handle, ; resource handle
AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR,
// XOR, IMIN, IMAX, UMIN, UMAX
values.CounterOffset, // i32, ; coordinate c0: index in bytes
UndefArg, // i32, ; coordinate c1 (unused)
UndefArg, // i32, ; coordinate c2 (unused)
Increment, // i32); increment value
},
"UAVIncResult");
if (values.InvocationId == nullptr) {
values.InvocationId = PreviousValue;
}
auto *Masked = BC.Builder.CreateAnd(PreviousValue, values.OffsetMask,
"MaskedForUAVLimit");
values.CurrentIndex =
BC.Builder.CreateOr(Masked, values.OffsetOr, "ORedForUAVStart");
}
uint32_t DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC,
Value *TheValue) {
assert(m_RemainingReservedSpaceInBytes > 0);
uint32_t BytesToBeEmitted = 0;
auto TheValueTypeID = TheValue->getType()->getTypeID();
if (TheValueTypeID == Type::TypeID::DoubleTyID) {
Function *SplitDouble =
BC.HlslOP->GetOpFunc(OP::OpCode::SplitDouble, TheValue->getType());
Constant *SplitDoubleOpcode =
BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::SplitDouble);
auto SplitDoubleIntruction = BC.Builder.CreateCall(
SplitDouble, {SplitDoubleOpcode, TheValue}, "SplitDouble");
auto LowBits =
BC.Builder.CreateExtractValue(SplitDoubleIntruction, 0, "LowBits");
auto HighBits =
BC.Builder.CreateExtractValue(SplitDoubleIntruction, 1, "HighBits");
// addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding
addDebugEntryValue(BC, LowBits);
addDebugEntryValue(BC, HighBits);
BytesToBeEmitted += 8;
} else if (TheValueTypeID == Type::TypeID::IntegerTyID &&
TheValue->getType()->getIntegerBitWidth() == 64) {
auto LowBits =
BC.Builder.CreateTrunc(TheValue, Type::getInt32Ty(BC.Ctx), "LowBits");
auto ShiftedBits = BC.Builder.CreateLShr(TheValue, 32, "ShiftedBits");
auto HighBits = BC.Builder.CreateTrunc(
ShiftedBits, Type::getInt32Ty(BC.Ctx), "HighBits");
// addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding
addDebugEntryValue(BC, LowBits);
addDebugEntryValue(BC, HighBits);
BytesToBeEmitted += 8;
} else if (TheValueTypeID == Type::TypeID::IntegerTyID &&
(TheValue->getType()->getIntegerBitWidth() == 16 ||
TheValue->getType()->getIntegerBitWidth() == 1)) {
auto As32 =
BC.Builder.CreateZExt(TheValue, Type::getInt32Ty(BC.Ctx), "As32");
BytesToBeEmitted += addDebugEntryValue(BC, As32);
} else if (TheValueTypeID == Type::TypeID::HalfTyID) {
auto AsFloat =
BC.Builder.CreateFPCast(TheValue, Type::getFloatTy(BC.Ctx), "AsFloat");
BytesToBeEmitted += addDebugEntryValue(BC, AsFloat);
} else {
Function *StoreValue =
BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore,
TheValue->getType()); // Type::getInt32Ty(BC.Ctx));
Constant *StoreValueOpcode =
BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferStore);
UndefValue *Undef32Arg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
UndefValue *UndefArg = nullptr;
if (TheValueTypeID == Type::TypeID::IntegerTyID) {
UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
} else if (TheValueTypeID == Type::TypeID::FloatTyID) {
UndefArg = UndefValue::get(Type::getFloatTy(BC.Ctx));
} else {
// The above are the only two valid types for a UAV store
assert(false);
}
BytesToBeEmitted += 4;
Constant *WriteMask_X = BC.HlslOP->GetI8Const(1);
auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()];
(void)BC.Builder.CreateCall(
StoreValue, {StoreValueOpcode, // i32 opcode
values.UAVHandle, // %dx.types.Handle, ; resource handle
values.CurrentIndex, // i32 c0: index in bytes into UAV
Undef32Arg, // i32 c1: unused
TheValue,
UndefArg, // unused values
UndefArg, // unused values
UndefArg, // unused values
WriteMask_X});
assert(m_RemainingReservedSpaceInBytes >= 4); // check for underflow
m_RemainingReservedSpaceInBytes -= 4;
if (m_RemainingReservedSpaceInBytes != 0) {
values.CurrentIndex =
BC.Builder.CreateAdd(values.CurrentIndex, BC.HlslOP->GetU32Const(4));
} else {
values.CurrentIndex = nullptr;
}
}
return BytesToBeEmitted;
}
void DxilDebugInstrumentation::addInvocationStartMarker(BuilderContext &BC) {
DebugShaderModifierRecordHeader marker{{{0, 0, 0, 0}}, 0};
reserveDebugEntrySpace(BC, sizeof(marker));
marker.Header.Details.SizeDwords =
DebugShaderModifierRecordPayloadSizeDwords(sizeof(marker));
marker.Header.Details.Flags = 0;
marker.Header.Details.Type =
DebugShaderModifierRecordTypeInvocationStartMarker;
addDebugEntryValue(BC, BC.HlslOP->GetU32Const(marker.Header.u32Header));
auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()];
addDebugEntryValue(BC, values.InvocationId);
}
template <typename ReturnType>
void DxilDebugInstrumentation::addStepEntryForType(
DebugShaderModifierRecordType RecordType, BuilderContext &BC,
std::uint32_t InstNum, Value *V, std::uint32_t ValueOrdinal,
Value *ValueOrdinalIndex) {
DebugShaderModifierRecordDXILStep<ReturnType> step = {};
reserveDebugEntrySpace(BC, sizeof(step));
auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()];
step.Header.Details.SizeDwords =
DebugShaderModifierRecordPayloadSizeDwords(sizeof(step));
step.Header.Details.Type = static_cast<uint8_t>(RecordType);
addDebugEntryValue(BC, BC.HlslOP->GetU32Const(step.Header.u32Header));
addDebugEntryValue(BC, values.InvocationId);
addDebugEntryValue(BC, BC.HlslOP->GetU32Const(InstNum));
if (RecordType != DebugShaderModifierRecordTypeDXILStepVoid &&
RecordType != DebugShaderModifierRecordTypeDXILStepRet) {
addDebugEntryValue(BC, V);
IRBuilder<> &B = BC.Builder;
Value *VO = BC.HlslOP->GetU32Const(ValueOrdinal << 16);
Value *VOI = B.CreateAnd(ValueOrdinalIndex, BC.HlslOP->GetU32Const(0xFFFF),
"ValueOrdinalIndex");
Value *EncodedValueOrdinalAndIndex =
BC.Builder.CreateOr(VO, VOI, "ValueOrdinal");
addDebugEntryValue(BC, EncodedValueOrdinalAndIndex);
}
}
std::optional<InstructionAndType>
DxilDebugInstrumentation::addStoreStepDebugEntry(BuilderContext *BC,
StoreInst *Inst) {
std::uint32_t ValueOrdinalBase;
std::uint32_t UnusedValueOrdinalSize;
llvm::Value *ValueOrdinalIndex;
if (!pix_dxil::PixAllocaRegWrite::FromInst(Inst, &ValueOrdinalBase,
&UnusedValueOrdinalSize,
&ValueOrdinalIndex)) {
return std::nullopt;
}
std::uint32_t InstNum;
if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) {
return std::nullopt;
}
auto Type = addStepDebugEntryValue(BC, InstNum, Inst->getValueOperand(),
ValueOrdinalBase, ValueOrdinalIndex);
if (Type) {
if (Instruction *ValueAsInst =
dyn_cast<Instruction>(Inst->getValueOperand())) {
uint32_t RegNum = 0;
if (pix_dxil::PixDxilReg::FromInst(ValueAsInst, &RegNum)) {
InstructionAndType ret{};
ret.Inst = Inst;
ret.InstructionOrdinal = InstNum;
ret.Type = *Type;
ret.RegisterNumber = RegNum;
ret.AllocaBase = ValueOrdinalBase;
ret.AllocaWriteIndex = ValueOrdinalIndex;
return ret;
}
} else if (Constant *ValueAsConst =
dyn_cast<Constant>(Inst->getValueOperand())) {
InstructionAndType ret{};
ret.Inst = Inst;
ret.InstructionOrdinal = InstNum;
ret.Type = *Type;
ret.AllocaBase = ValueOrdinalBase;
ret.AllocaWriteIndex = ValueOrdinalIndex;
switch (ValueAsConst->getType()->getTypeID()) {
case Type::HalfTyID:
case Type::FloatTyID:
case Type::DoubleTyID:
ret.ConstantAllocaStoreValue = dyn_cast<ConstantFP>(ValueAsConst)
->getValueAPF()
.bitcastToAPInt()
.getLimitedValue();
break;
case Type::IntegerTyID:
ret.ConstantAllocaStoreValue =
dyn_cast<ConstantInt>(ValueAsConst)->getLimitedValue();
break;
default:
return std::nullopt;
}
return ret;
}
}
return std::nullopt;
}
std::optional<InstructionAndType> DxilDebugInstrumentation::addStepDebugEntry(
BuilderContext *BC, Instruction *Inst,
llvm::SmallPtrSetImpl<Value *> const &RayQueryHandles) {
std::uint32_t InstNum;
if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) {
return std::nullopt;
}
if (RayQueryHandles.count(Inst) != 0) {
InstructionAndType ret{};
ret.Inst = Inst;
ret.InstructionOrdinal = InstNum;
ret.Type = DebugShaderModifierRecordTypeDXILStepVoid;
return ret;
}
if (auto *St = llvm::dyn_cast<llvm::StoreInst>(Inst)) {
return addStoreStepDebugEntry(BC, St);
}
if (auto *Ld = llvm::dyn_cast<llvm::LoadInst>(Inst)) {
if (llvm::isa<ConstantExpr>(Ld->getPointerOperand())) {
auto *constant = llvm::cast<ConstantExpr>(Ld->getPointerOperand());
if (constant->getOpcode() == Instruction::GetElementPtr) {
PIXPassHelpers::ScopedInstruction asInstr(constant->getAsInstruction());
auto *GEP = llvm::cast<GetElementPtrInst>(asInstr.Get());
if (GEP->getPointerOperand()->getName().equals("dx.nothing.a")) {
// These debug-only loads are interesting as instructions to
// step though where otherwise no step might exist for the
// given HLSL lines, so we include them in the instrumentation:
InstructionAndType ret{};
ret.Inst = Inst;
ret.InstructionOrdinal = InstNum;
ret.Type = DebugShaderModifierRecordTypeDXILStepVoid;
return ret;
}
}
}
}
std::uint32_t RegNum;
if (!pix_dxil::PixDxilReg::FromInst(Inst, &RegNum)) {
if (Inst->getOpcode() == Instruction::Ret) {
if (BC != nullptr)
addStepEntryForType<void>(DebugShaderModifierRecordTypeDXILStepRet, *BC,
InstNum, nullptr, 0, 0);
InstructionAndType ret{};
ret.Inst = Inst;
ret.InstructionOrdinal = InstNum;
ret.Type = DebugShaderModifierRecordTypeDXILStepRet;
return ret;
} else if (Inst->isTerminator()) {
if (BC != nullptr)
addStepEntryForType<void>(DebugShaderModifierRecordTypeDXILStepVoid,
*BC, InstNum, nullptr, 0, 0);
InstructionAndType ret{};
ret.Inst = Inst;
ret.InstructionOrdinal = InstNum;
ret.Type = DebugShaderModifierRecordTypeDXILStepVoid;
return ret;
}
return std::nullopt;
}
auto Type = addStepDebugEntryValue(BC, InstNum, Inst, RegNum,
BC ? BC->Builder.getInt32(0) : nullptr);
if (Type) {
InstructionAndType ret{};
ret.Inst = Inst;
ret.InstructionOrdinal = InstNum;
ret.Type = *Type;
ret.RegisterNumber = RegNum;
return ret;
}
return std::nullopt;
}
std::optional<DebugShaderModifierRecordType>
DxilDebugInstrumentation::addStepDebugEntryValue(BuilderContext *BC,
std::uint32_t InstNum,
Value *V,
std::uint32_t ValueOrdinal,
Value *ValueOrdinalIndex) {
const Type::TypeID ID = V->getType()->getTypeID();
switch (ID) {
case Type::TypeID::StructTyID:
case Type::TypeID::VoidTyID:
if (BC != nullptr)
addStepEntryForType<void>(DebugShaderModifierRecordTypeDXILStepVoid, *BC,
InstNum, V, ValueOrdinal, ValueOrdinalIndex);
return DebugShaderModifierRecordTypeDXILStepVoid;
case Type::TypeID::FloatTyID:
if (BC != nullptr)
addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat,
*BC, InstNum, V, ValueOrdinal,
ValueOrdinalIndex);
return DebugShaderModifierRecordTypeDXILStepFloat;
case Type::TypeID::IntegerTyID:
if (V->getType()->getIntegerBitWidth() == 64) {
if (BC != nullptr)
addStepEntryForType<uint64_t>(
DebugShaderModifierRecordTypeDXILStepUint64, *BC, InstNum, V,
ValueOrdinal, ValueOrdinalIndex);
return DebugShaderModifierRecordTypeDXILStepUint64;
} else {
if (BC != nullptr)
addStepEntryForType<uint32_t>(
DebugShaderModifierRecordTypeDXILStepUint32, *BC, InstNum, V,
ValueOrdinal, ValueOrdinalIndex);
return DebugShaderModifierRecordTypeDXILStepUint32;
}
case Type::TypeID::DoubleTyID:
if (BC != nullptr)
addStepEntryForType<double>(DebugShaderModifierRecordTypeDXILStepDouble,
*BC, InstNum, V, ValueOrdinal,
ValueOrdinalIndex);
return DebugShaderModifierRecordTypeDXILStepDouble;
case Type::TypeID::HalfTyID:
if (BC != nullptr)
addStepEntryForType<float>(DebugShaderModifierRecordTypeDXILStepFloat,
*BC, InstNum, V, ValueOrdinal,
ValueOrdinalIndex);
return DebugShaderModifierRecordTypeDXILStepFloat;
case Type::TypeID::PointerTyID:
// Skip pointer calculation instructions. They aren't particularly
// meaningful to the user (being a mere implementation detail for lookup
// tables, etc.), and their type is problematic from a UI point of view.
// The subsequent instructions that dereference the pointer will be
// properly instrumented and show the (meaningful) retrieved value.
break;
case Type::TypeID::VectorTyID:
// Shows up in "insertelement" in raygen shader?
break;
case Type::TypeID::FP128TyID:
case Type::TypeID::LabelTyID:
case Type::TypeID::MetadataTyID:
case Type::TypeID::FunctionTyID:
case Type::TypeID::ArrayTyID:
case Type::TypeID::X86_FP80TyID:
case Type::TypeID::X86_MMXTyID:
case Type::TypeID::PPC_FP128TyID:
assert(false);
}
return std::nullopt;
}
bool DxilDebugInstrumentation::runOnModule(Module &M) {
DxilModule &DM = M.GetOrCreateDxilModule();
// There is no point running this pass if it can't return its report:
if (OSOverride == nullptr)
return false;
auto ShaderModel = DM.GetShaderModel();
auto shaderKind = ShaderModel->GetKind();
bool modified = false;
if (shaderKind == DXIL::ShaderKind::Library) {
auto instrumentableFunctions =
PIXPassHelpers::GetAllInstrumentableFunctions(DM);
for (auto *F : instrumentableFunctions) {
if (RunOnFunction(M, DM, F)) {
modified = true;
}
}
} else {
llvm::Function *entryFunction = PIXPassHelpers::GetEntryFunction(DM);
modified = RunOnFunction(M, DM, entryFunction);
}
return modified;
}
struct RecordTypeDatum {
DebugShaderModifierRecordType Type;
uint32_t PayloadSize;
const char *AsString;
};
static const RecordTypeDatum RecordTypeData[] = {
{DebugShaderModifierRecordTypeDXILStepRet, 0, "r"},
{DebugShaderModifierRecordTypeDXILStepVoid, 0, "v"},
{DebugShaderModifierRecordTypeDXILStepFloat, 4, "f"},
{DebugShaderModifierRecordTypeDXILStepUint32, 4, "3"},
{DebugShaderModifierRecordTypeDXILStepUint64, 8, "6"},
{DebugShaderModifierRecordTypeDXILStepDouble, 8, "d"}};
std::optional<RecordTypeDatum const *>
FindDatum(DebugShaderModifierRecordType RecordType) {
for (auto const &datum : RecordTypeData) {
if (datum.Type == RecordType) {
return &datum;
}
}
return std::nullopt;
}
uint32_t DxilDebugInstrumentation::CountBlockPayloadBytes(
std::vector<InstructionToInstrument> const &IsAndTs) {
uint32_t count = 0;
for (auto const &IandT : IsAndTs) {
auto datum = FindDatum(IandT.ValueType);
if (datum)
count += (*datum)->PayloadSize;
}
return count;
}
const char *TypeString(InstructionAndType const &IandT) {
auto datum = FindDatum(IandT.Type);
if (datum)
return (*datum)->AsString;
assert(false);
return "v";
}
Instruction *FindFirstNonPhiInstruction(Instruction *I) {
while (llvm::isa<llvm::PHINode>(I))
I = I->getNextNode();
return I;
}
// This function reports a textual representation of the format
// of the debug data that will be output by the instructions
// added by this pass.
// The string has one or more lines of the exemplary form
// Block#3:5,f,22,a;7,f,22,s,20;9,f,22,s,20;10,f,23,a;12,f,23,s,21;
// The integer after the Block# is the first instruction number in the
// block.
// Instructions are delimited by ; The fields within the instruction
// (delimited by ,) are, in order:
// -instruction ordinal
// -data type (r=ret, v=void, f=float, 3=int32, 6=int64, d=double)
// -scalar register number
// -alloca/scalar indicator:
// r == ret instruction
// a == scalar is being created and assigned a value, and that
// value is in the debug output.
// s == Existing scalar is being assigned via static alloca index.
// Index is appended to this instruction record. No
// corresponding data in the debug output.
// d == A dynamic index added to the static base index. Base index
// is appended to this record. The corresponding debug entry is
// the dynamic index into that alloca.
// v == A void terminator or other void-valued instruction. No
// corresponding data in the debug output.
// If indicator is "a", a string of the form [base+index] for the alloca
// store location.
// If indicator is "d", a single integer denoting the base for the alloca
// store.
DxilDebugInstrumentation::BlockInstrumentationData
DxilDebugInstrumentation::FindInstrumentableInstructionsInBlock(
BasicBlock &BB, OP *HlslOP,
llvm::SmallPtrSetImpl<Value *> const &RayQueryHandles) {
BlockInstrumentationData ret{};
auto &Is = BB.getInstList();
*OSOverride << "Block#";
bool FoundFirstInstruction = false;
for (auto &Inst : Is) {
if (!FoundFirstInstruction) {
std::uint32_t InstNum;
if (pix_dxil::PixDxilInstNum::FromInst(&Inst, &InstNum)) {
*OSOverride << std::to_string(InstNum) << ":";
ret.FirstInstructionOrdinalInBlock = InstNum;
FoundFirstInstruction = true;
}
}
auto IandT = addStepDebugEntry(nullptr, &Inst, RayQueryHandles);
if (IandT) {
InstructionToInstrument DebugOutputForThisInstruction{};
DebugOutputForThisInstruction.ValueType = IandT->Type;
auto *InsertionPoint = FindFirstNonPhiInstruction(&Inst);
if (InsertionPoint->isTerminator() || llvm::isa<llvm::PHINode>(Inst))
DebugOutputForThisInstruction
.InstructionBeforeWhichToAddInstrumentation = InsertionPoint;
else
DebugOutputForThisInstruction
.InstructionAfterWhichToAddInstrumentation = InsertionPoint;
const char *IndexingToken = nullptr;
std::optional<std::string> RegisterOrStaticIndex;
if (IandT->Type == DebugShaderModifierRecordTypeDXILStepRet) {
IndexingToken = "r";
} else if (IandT->Type == DebugShaderModifierRecordTypeDXILStepVoid) {
IndexingToken = "v"; // void instruction, no debug output required
} else if (IandT->AllocaWriteIndex != nullptr) {
if (ConstantInt *IndexAsConstant =
dyn_cast<ConstantInt>(IandT->AllocaWriteIndex)) {
RegisterOrStaticIndex =
std::to_string(IandT->AllocaBase) + "+" +
std::to_string(IndexAsConstant->getLimitedValue());
IndexingToken = "s"; // static indexing, no debug output required
} else {
IndexingToken = "d"; // dynamic indexing
RegisterOrStaticIndex = std::to_string(IandT->AllocaBase);
DebugOutputForThisInstruction.ValueToWriteToDebugMemory =
IandT->AllocaWriteIndex;
}
} else {
IndexingToken = "a"; // meaning an SSA assignment
// todo: Can SSA Values be assigned a literal constant?
DebugOutputForThisInstruction.ValueToWriteToDebugMemory = IandT->Inst;
}
*OSOverride << std::to_string(IandT->InstructionOrdinal) << ","
<< TypeString(*IandT) << ","
<< std::to_string(IandT->RegisterNumber) << ","
<< IndexingToken;
if (RegisterOrStaticIndex) {
*OSOverride << "," << *RegisterOrStaticIndex;
}
if (IandT->ConstantAllocaStoreValue) {
*OSOverride << "," << std::to_string(*IandT->ConstantAllocaStoreValue);
}
*OSOverride << ";";
if (DebugOutputForThisInstruction.ValueToWriteToDebugMemory)
ret.Instructions.push_back(std::move(DebugOutputForThisInstruction));
}
}
*OSOverride << "\n";
return ret;
}
bool DxilDebugInstrumentation::RunOnFunction(Module &M, DxilModule &DM,
llvm::Function *function) {
DXIL::ShaderKind shaderKind =
PIXPassHelpers::GetFunctionShaderKind(DM, function);
switch (shaderKind) {
case DXIL::ShaderKind::Amplification:
case DXIL::ShaderKind::Mesh:
case DXIL::ShaderKind::Vertex:
case DXIL::ShaderKind::Geometry:
case DXIL::ShaderKind::Pixel:
case DXIL::ShaderKind::Compute:
case DXIL::ShaderKind::RayGeneration:
case DXIL::ShaderKind::Hull:
case DXIL::ShaderKind::Domain:
case DXIL::ShaderKind::Intersection:
case DXIL::ShaderKind::AnyHit:
case DXIL::ShaderKind::ClosestHit:
case DXIL::ShaderKind::Miss:
break;
default:
return false;
}
llvm::SmallPtrSet<Value *, 16> RayQueryHandles;
PIXPassHelpers::FindRayQueryHandlesForFunction(function, RayQueryHandles);
Instruction *firstInsertionPt = dxilutil::FirstNonAllocaInsertionPt(function);
IRBuilder<> Builder(firstInsertionPt);
LLVMContext &Ctx = M.getContext();
OP *HlslOP = DM.GetOP();
BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()];
// PIX binds two UAVs when running this instrumentation: one for raygen
// shaders and another for the hitgroups and miss shaders. Since PIX invokes
// this pass at the library level, which may contain examples of both types,
// PIX can't really specify which UAV index to use per-shader. This pass
// therefore just has to know this:
constexpr unsigned int RayGenUAVRegister = 0;
constexpr unsigned int HitGroupAndMissUAVRegister = 1;
unsigned int UAVRegisterId = RayGenUAVRegister;
switch (shaderKind) {
case DXIL::ShaderKind::ClosestHit:
case DXIL::ShaderKind::Intersection:
case DXIL::ShaderKind::AnyHit:
case DXIL::ShaderKind::Miss:
UAVRegisterId = HitGroupAndMissUAVRegister;
break;
}
values.UAVHandle = PIXPassHelpers::CreateUAV(DM, Builder, UAVRegisterId,
"PIX_DebugUAV_Handle");
auto SystemValues = addRequiredSystemValues(BC, shaderKind);
addInvocationSelectionProlog(BC, SystemValues, shaderKind);
determineLimitANDAndInitializeCounter(BC);
addInvocationStartMarker(BC);
// Instrument original instructions:
for (auto &BB : function->getBasicBlockList()) {
if (std::find(values.AddedBlocksToIgnoreForInstrumentation.begin(),
values.AddedBlocksToIgnoreForInstrumentation.end(),
&BB) == values.AddedBlocksToIgnoreForInstrumentation.end()) {
auto BlockInstrumentation =
FindInstrumentableInstructionsInBlock(BB, BC.HlslOP, RayQueryHandles);
if (BlockInstrumentation.FirstInstructionOrdinalInBlock <
m_FirstInstruction ||
BlockInstrumentation.FirstInstructionOrdinalInBlock >=
m_LastInstruction)
continue;
uint32_t BlockPayloadBytes =
CountBlockPayloadBytes(BlockInstrumentation.Instructions);
// If the block has no instructions which require debug output,
// we will still write an empty block header at the end of that
// block (i.e. before the terminator) so that the instrumentation
// at least indicates that flow control went through the block.
Instruction *BlockInstrumentationStart = (BB).getTerminator();
if (!BlockInstrumentation.Instructions.empty()) {
auto const &First = BlockInstrumentation.Instructions[0];
if (First.InstructionAfterWhichToAddInstrumentation != nullptr)
BlockInstrumentationStart =
First.InstructionAfterWhichToAddInstrumentation;
else if (First.InstructionBeforeWhichToAddInstrumentation != nullptr)
BlockInstrumentationStart =
First.InstructionBeforeWhichToAddInstrumentation;
else {
assert(false);
continue;
}
}
IRBuilder<> Builder(BlockInstrumentationStart);
BuilderContext BCForBlock{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder};
DebugShaderModifierRecordDXILBlock step = {};
auto FullRecordSize =
static_cast<uint32_t>(sizeof(step) + BlockPayloadBytes);
if (FullRecordSize >= (m_UAVSize / 4) - 1) {
*OSOverride << "StaticOverflow:" << std::to_string(FullRecordSize)
<< "\n";
break;
}
reserveDebugEntrySpace(BCForBlock, FullRecordSize);
step.Header.Details.CountOfInstructions =
static_cast<uint16_t>(BlockInstrumentation.Instructions.size());
step.Header.Details.Type =
static_cast<uint8_t>(DebugShaderModifierRecordTypeDXILStepBlock);
addDebugEntryValue(BCForBlock,
BCForBlock.HlslOP->GetU32Const(step.Header.u32Header));
addDebugEntryValue(BCForBlock, values.InvocationId);
addDebugEntryValue(
BCForBlock, BCForBlock.HlslOP->GetU32Const(
BlockInstrumentation.FirstInstructionOrdinalInBlock));
for (auto &Inst : BlockInstrumentation.Instructions) {
Instruction *BuilderInstruction;
if (Inst.InstructionAfterWhichToAddInstrumentation != nullptr)
BuilderInstruction =
Inst.InstructionAfterWhichToAddInstrumentation->getNextNode();
else if (Inst.InstructionBeforeWhichToAddInstrumentation != nullptr)
BuilderInstruction = Inst.InstructionBeforeWhichToAddInstrumentation;
else {
assert(false);
continue;
}
IRBuilder<> Builder(BuilderInstruction);
BuilderContext BC2{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder};
addDebugEntryValue(BC2, Inst.ValueToWriteToDebugMemory);
}
}
}
DM.ReEmitDxilResources();
return true;
}
char DxilDebugInstrumentation::ID = 0;
ModulePass *llvm::createDxilDebugInstrumentationPass() {
return new DxilDebugInstrumentation();
}
INITIALIZE_PASS(DxilDebugInstrumentation, "hlsl-dxil-debug-instrumentation",
"HLSL DXIL debug instrumentation for PIX", false, false)