| /////////////////////////////////////////////////////////////////////////////// |
| // // |
| // ExecutionTest.cpp // |
| // Copyright (C) Microsoft Corporation. All rights reserved. // |
| // This file is distributed under the University of Illinois Open Source // |
| // License. See LICENSE.TXT for details. // |
| // // |
| // These tests run by executing compiled programs, and thus involve more // |
| // moving parts, like the runtime and drivers. // |
| // // |
| /////////////////////////////////////////////////////////////////////////////// |
| |
| // We need to keep & fix these warnings to integrate smoothly with HLK |
| #pragma warning(error: 4100 4146 4242 4244 4267 4701 4389) |
| |
| #include <algorithm> |
| #include <memory> |
| #include <array> |
| #include <vector> |
| #include <string> |
| #include <map> |
| #include <unordered_set> |
| #include <strstream> |
| #include <iomanip> |
| #include "dxc/Test/CompilationResult.h" |
| #include "dxc/Test/HLSLTestData.h" |
| #include <Shlwapi.h> |
| #include <atlcoll.h> |
| #include <locale> |
| #include <algorithm> |
| #include <bitset> |
| |
| #undef _read |
| #include "WexTestClass.h" |
| #include "dxc/Test/HlslTestUtils.h" |
| #include "dxc/Test/DxcTestUtils.h" |
| #include "dxc/Support/Global.h" |
| #include "dxc/Support/WinIncludes.h" |
| #include "dxc/Support/FileIOHelper.h" |
| #include "dxc/Support/Unicode.h" |
| |
| // |
| // d3d12.h and dxgi1_4.h are included in the Windows 10 SDK |
| // https://msdn.microsoft.com/en-us/library/windows/desktop/dn899120(v=vs.85).aspx |
| // https://developer.microsoft.com/en-US/windows/downloads/windows-10-sdk |
| // |
| #include <d3d12.h> |
| #include <dxgi1_4.h> |
| #include <DXGIDebug.h> |
| #include "dxc/Support/d3dx12.h" |
| #include <DirectXMath.h> |
| #include <strsafe.h> |
| #include <d3dcompiler.h> |
| #include <wincodec.h> |
| #include "ShaderOpTest.h" |
| #include <libloaderapi.h> |
| |
| #pragma comment(lib, "d3dcompiler.lib") |
| #pragma comment(lib, "windowscodecs.lib") |
| #pragma comment(lib, "dxguid.lib") |
| #pragma comment(lib, "version.lib") |
| |
| // A more recent Windows SDK than currently required is needed for these. |
| typedef HRESULT(WINAPI *D3D12EnableExperimentalFeaturesFn)( |
| UINT NumFeatures, |
| __in_ecount(NumFeatures) const IID* pIIDs, |
| __in_ecount_opt(NumFeatures) void* pConfigurationStructs, |
| __in_ecount_opt(NumFeatures) UINT* pConfigurationStructSizes); |
| |
| static const GUID D3D12ExperimentalShaderModelsID = { /* 76f5573e-f13a-40f5-b297-81ce9e18933f */ |
| 0x76f5573e, |
| 0xf13a, |
| 0x40f5, |
| { 0xb2, 0x97, 0x81, 0xce, 0x9e, 0x18, 0x93, 0x3f } |
| }; |
| |
| // Used to create D3D12SDKConfiguration to enable AgilitySDK programmatically. |
| typedef HRESULT(WINAPI *D3D12GetInterfaceFn)(REFCLSID rclsid, REFIID riid, void **ppvDebug); |
| |
| #ifndef __ID3D12SDKConfiguration_INTERFACE_DEFINED__ |
| // Copied from AgilitySDK D3D12.h to programmatically enable when in developer mode. |
| #define __ID3D12SDKConfiguration_INTERFACE_DEFINED__ |
| |
| EXTERN_C const GUID DECLSPEC_SELECTANY IID_ID3D12SDKConfiguration = {0xe9eb5314,0x33aa,0x42b2, {0xa7,0x18,0xd7,0x7f,0x58,0xb1,0xf1,0xc7}}; |
| EXTERN_C const GUID DECLSPEC_SELECTANY CLSID_D3D12SDKConfiguration = {0x7cda6aca, 0xa03e, 0x49c8, {0x94, 0x58, 0x03, 0x34, 0xd2, 0x0e, 0x07, 0xce}}; |
| |
| MIDL_INTERFACE("e9eb5314-33aa-42b2-a718-d77f58b1f1c7") |
| ID3D12SDKConfiguration : public IUnknown |
| { |
| public: |
| virtual HRESULT STDMETHODCALLTYPE SetSDKVersion( |
| UINT SDKVersion, |
| _In_z_ LPCSTR SDKPath) = 0; |
| }; |
| #endif /* __ID3D12SDKConfiguration_INTERFACE_DEFINED__ */ |
| |
| using namespace DirectX; |
| using namespace hlsl_test; |
| |
| |
| template <typename TSequence, typename T> |
| static bool contains(TSequence s, const T &val) { |
| return std::cend(s) != std::find(std::cbegin(s), std::cend(s), val); |
| } |
| |
| template <typename InputIterator, typename T> |
| static bool contains(InputIterator b, InputIterator e, const T &val) { |
| return e != std::find(b, e, val); |
| } |
| |
| static HRESULT ReportLiveObjects() { |
| CComPtr<IDXGIDebug1> pDebug; |
| IFR(DXGIGetDebugInterface1(0, IID_PPV_ARGS(&pDebug))); |
| IFR(pDebug->ReportLiveObjects(DXGI_DEBUG_ALL, DXGI_DEBUG_RLO_ALL)); |
| return S_OK; |
| } |
| |
| static void WriteInfoQueueMessages(void *pStrCtx, st::OutputStringFn pOutputStrFn, ID3D12InfoQueue *pInfoQueue) { |
| bool allMessagesOK = true; |
| UINT64 count = pInfoQueue->GetNumStoredMessages(); |
| CAtlArray<BYTE> message; |
| for (UINT64 i = 0; i < count; ++i) { |
| // 'GetMessageA' rather than 'GetMessage' is an artifact of user32 headers. |
| SIZE_T msgLen = 0; |
| if (FAILED(pInfoQueue->GetMessageA(i, nullptr, &msgLen))) { |
| allMessagesOK = false; |
| continue; |
| } |
| if (message.GetCount() < msgLen) { |
| if (!message.SetCount(msgLen)) { |
| allMessagesOK = false; |
| continue; |
| } |
| } |
| D3D12_MESSAGE *pMessage = (D3D12_MESSAGE *)message.GetData(); |
| if (FAILED(pInfoQueue->GetMessageA(i, pMessage, &msgLen))) { |
| allMessagesOK = false; |
| continue; |
| } |
| CA2W msgW(pMessage->pDescription, CP_ACP); |
| pOutputStrFn(pStrCtx, msgW.m_psz); |
| pOutputStrFn(pStrCtx, L"\r\n"); |
| } |
| if (!allMessagesOK) { |
| pOutputStrFn(pStrCtx, L"Failed to retrieve some messages.\r\n"); |
| } |
| } |
| |
| class CComContext { |
| private: |
| bool m_init; |
| public: |
| CComContext() : m_init(false) {} |
| ~CComContext() { Dispose(); } |
| void Dispose() { if (!m_init) return; m_init = false; CoUninitialize(); } |
| HRESULT Init() { HRESULT hr = CoInitializeEx(0, COINIT_MULTITHREADED); if (SUCCEEDED(hr)) { m_init = true; } return hr; } |
| }; |
| |
| static void SavePixelsToFile(LPCVOID pPixels, DXGI_FORMAT format, UINT32 m_width, UINT32 m_height, LPCWSTR pFileName) { |
| CComContext ctx; |
| CComPtr<IWICImagingFactory> pFactory; |
| CComPtr<IWICBitmap> pBitmap; |
| CComPtr<IWICBitmapEncoder> pEncoder; |
| CComPtr<IWICBitmapFrameEncode> pFrameEncode; |
| CComPtr<hlsl::AbstractMemoryStream> pStream; |
| CComPtr<IMalloc> pMalloc; |
| |
| struct PF { |
| DXGI_FORMAT Format; |
| GUID PixelFormat; |
| UINT32 PixelSize; |
| bool operator==(DXGI_FORMAT F) const { |
| return F == Format; |
| } |
| } Vals[] = { |
| // Add more pixel format mappings as needed. |
| { DXGI_FORMAT_R8G8B8A8_UNORM, GUID_WICPixelFormat32bppRGBA, 4 } |
| }; |
| PF *pFormat = std::find(Vals, Vals + _countof(Vals), format); |
| |
| VERIFY_SUCCEEDED(ctx.Init()); |
| VERIFY_SUCCEEDED(CoCreateInstance(CLSID_WICImagingFactory, NULL, CLSCTX_INPROC_SERVER, IID_IWICImagingFactory, (LPVOID*)&pFactory)); |
| VERIFY_SUCCEEDED(CoGetMalloc(1, &pMalloc)); |
| VERIFY_SUCCEEDED(hlsl::CreateMemoryStream(pMalloc, &pStream)); |
| VERIFY_ARE_NOT_EQUAL(pFormat, Vals + _countof(Vals)); |
| VERIFY_SUCCEEDED(pFactory->CreateBitmapFromMemory(m_width, m_height, pFormat->PixelFormat, m_width * pFormat->PixelSize, m_width * m_height * pFormat->PixelSize, (BYTE *)pPixels, &pBitmap)); |
| VERIFY_SUCCEEDED(pFactory->CreateEncoder(GUID_ContainerFormatBmp, nullptr, &pEncoder)); |
| VERIFY_SUCCEEDED(pEncoder->Initialize(pStream, WICBitmapEncoderNoCache)); |
| VERIFY_SUCCEEDED(pEncoder->CreateNewFrame(&pFrameEncode, nullptr)); |
| VERIFY_SUCCEEDED(pFrameEncode->Initialize(nullptr)); |
| VERIFY_SUCCEEDED(pFrameEncode->WriteSource(pBitmap, nullptr)); |
| VERIFY_SUCCEEDED(pFrameEncode->Commit()); |
| VERIFY_SUCCEEDED(pEncoder->Commit()); |
| hlsl::WriteBinaryFile(pFileName, pStream->GetPtr(), pStream->GetPtrSize()); |
| } |
| |
| // Checks if the given warp version supports the given operation. |
| bool IsValidWarpDllVersion(unsigned int minBuildNumber) { |
| HMODULE pLibrary = LoadLibrary("D3D10Warp.dll"); |
| if (pLibrary) { |
| char path[MAX_PATH]; |
| DWORD length = GetModuleFileName(pLibrary, path, MAX_PATH); |
| if (length) { |
| DWORD dwVerHnd = 0; |
| DWORD dwVersionInfoSize = GetFileVersionInfoSize(path, &dwVerHnd); |
| std::unique_ptr<int[]> VffInfo(new int[dwVersionInfoSize]); |
| if (GetFileVersionInfo(path, NULL, dwVersionInfoSize, VffInfo.get())) { |
| LPVOID versionInfo; |
| UINT size; |
| if (VerQueryValue(VffInfo.get(), "\\", &versionInfo, &size)) { |
| if (size) { |
| VS_FIXEDFILEINFO *verInfo = (VS_FIXEDFILEINFO *)versionInfo; |
| unsigned int warpBuildNumber = verInfo->dwFileVersionLS >> 16 & 0xffff; |
| if (verInfo->dwSignature == 0xFEEF04BD && warpBuildNumber >= minBuildNumber) { |
| return true; |
| } |
| } |
| } |
| } |
| } |
| FreeLibrary(pLibrary); |
| } |
| return false; |
| } |
| |
| #if WDK_NTDDI_VERSION <= NTDDI_WIN10_RS2 |
| #define D3D12_FEATURE_D3D12_OPTIONS3 ((D3D12_FEATURE)21) |
| #define NTDDI_WIN10_RS3 0x0A000004 /* ABRACADABRA_WIN10_RS2 */ |
| typedef |
| enum D3D12_COMMAND_LIST_SUPPORT_FLAGS |
| { |
| D3D12_COMMAND_LIST_SUPPORT_FLAG_NONE = 0, |
| D3D12_COMMAND_LIST_SUPPORT_FLAG_DIRECT = (1 << D3D12_COMMAND_LIST_TYPE_DIRECT), |
| D3D12_COMMAND_LIST_SUPPORT_FLAG_BUNDLE = (1 << D3D12_COMMAND_LIST_TYPE_BUNDLE), |
| D3D12_COMMAND_LIST_SUPPORT_FLAG_COMPUTE = (1 << D3D12_COMMAND_LIST_TYPE_COMPUTE), |
| D3D12_COMMAND_LIST_SUPPORT_FLAG_COPY = (1 << D3D12_COMMAND_LIST_TYPE_COPY), |
| D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_DECODE = (1 << 4), |
| D3D12_COMMAND_LIST_SUPPORT_FLAG_VIDEO_PROCESS = (1 << 5) |
| } D3D12_COMMAND_LIST_SUPPORT_FLAGS; |
| |
| typedef |
| enum D3D12_VIEW_INSTANCING_TIER |
| { |
| D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED = 0, |
| D3D12_VIEW_INSTANCING_TIER_1 = 1, |
| D3D12_VIEW_INSTANCING_TIER_2 = 2, |
| D3D12_VIEW_INSTANCING_TIER_3 = 3 |
| } D3D12_VIEW_INSTANCING_TIER; |
| |
| typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS3 |
| { |
| _Out_ BOOL CopyQueueTimestampQueriesSupported; |
| _Out_ BOOL CastingFullyTypedFormatSupported; |
| _Out_ DWORD WriteBufferImmediateSupportFlags; |
| _Out_ D3D12_VIEW_INSTANCING_TIER ViewInstancingTier; |
| _Out_ BOOL BarycentricsSupported; |
| } D3D12_FEATURE_DATA_D3D12_OPTIONS3; |
| #endif |
| |
| #if WDK_NTDDI_VERSION <= NTDDI_WIN10_RS3 |
| #define D3D12_FEATURE_D3D12_OPTIONS4 ((D3D12_FEATURE)23) |
| typedef enum D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER |
| { |
| D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER_0, |
| D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER_1, |
| } D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER; |
| |
| typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS4 |
| { |
| _Out_ BOOL ReservedBufferPlacementSupported; |
| _Out_ D3D12_SHARED_RESOURCE_COMPATIBILITY_TIER SharedResourceCompatibilityTier; |
| _Out_ BOOL Native16BitShaderOpsSupported; |
| } D3D12_FEATURE_DATA_D3D12_OPTIONS4; |
| |
| #endif |
| |
| // Virtual class to compute the expected result given a set of inputs |
| struct TableParameter; |
| |
| class ExecutionTest { |
| public: |
| // By default, ignore these tests, which require a recent build to run properly. |
| BEGIN_TEST_CLASS(ExecutionTest) |
| TEST_CLASS_PROPERTY(L"Parallel", L"true") |
| TEST_CLASS_PROPERTY(L"Ignore", L"true") |
| TEST_METHOD_PROPERTY(L"Priority", L"0") |
| END_TEST_CLASS() |
| TEST_CLASS_SETUP(ExecutionTestClassSetup) |
| |
| TEST_METHOD(BasicComputeTest); |
| TEST_METHOD(BasicTriangleTest); |
| TEST_METHOD(BasicTriangleOpTest); |
| |
| TEST_METHOD(BasicTriangleOpTestHalf); |
| |
| TEST_METHOD(OutOfBoundsTest); |
| TEST_METHOD(SaturateTest); |
| TEST_METHOD(SignTest); |
| TEST_METHOD(Int64Test); |
| TEST_METHOD(LifetimeIntrinsicTest) |
| TEST_METHOD(WaveIntrinsicsTest); |
| TEST_METHOD(WaveIntrinsicsDDITest); |
| TEST_METHOD(WaveIntrinsicsInPSTest); |
| TEST_METHOD(WaveSizeTest); |
| TEST_METHOD(PartialDerivTest); |
| TEST_METHOD(DerivativesTest); |
| TEST_METHOD(ComputeSampleTest); |
| TEST_METHOD(ATOProgOffset); |
| TEST_METHOD(ATOSampleCmpLevelTest); |
| TEST_METHOD(ATOWriteMSAATest); |
| TEST_METHOD(ATORawGather); |
| TEST_METHOD(AtomicsTest); |
| TEST_METHOD(Atomics64Test); |
| TEST_METHOD(AtomicsRawHeap64Test); |
| TEST_METHOD(AtomicsTyped64Test); |
| TEST_METHOD(AtomicsShared64Test); |
| TEST_METHOD(AtomicsFloatTest); |
| TEST_METHOD(HelperLaneTest); |
| TEST_METHOD(HelperLaneTestWave); |
| TEST_METHOD(SignatureResourcesTest) |
| TEST_METHOD(DynamicResourcesTest) |
| TEST_METHOD(DynamicResourcesDynamicIndexingTest) |
| |
| TEST_METHOD(QuadReadTest) |
| TEST_METHOD(QuadAnyAll); |
| |
| TEST_METHOD(CBufferTestHalf); |
| |
| TEST_METHOD(BasicShaderModel61); |
| TEST_METHOD(BasicShaderModel63); |
| |
| BEGIN_TEST_METHOD(WaveIntrinsicsActiveIntTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#WaveIntrinsicsActiveIntTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(WaveIntrinsicsActiveUintTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#WaveIntrinsicsActiveUintTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(WaveIntrinsicsPrefixIntTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#WaveIntrinsicsPrefixIntTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(WaveIntrinsicsPrefixUintTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#WaveIntrinsicsPrefixUintTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(WaveIntrinsicsSM65IntTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#WaveIntrinsicsMultiPrefixIntTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(WaveIntrinsicsSM65UintTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#WaveIntrinsicsMultiPrefixUintTable") |
| END_TEST_METHOD() |
| |
| // TAEF data-driven tests. |
| BEGIN_TEST_METHOD(UnaryFloatOpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#UnaryFloatOpTable") |
| END_TEST_METHOD() |
| BEGIN_TEST_METHOD(BinaryFloatOpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#BinaryFloatOpTable") |
| END_TEST_METHOD() |
| BEGIN_TEST_METHOD(TertiaryFloatOpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#TertiaryFloatOpTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(UnaryHalfOpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#UnaryHalfOpTable") |
| END_TEST_METHOD() |
| BEGIN_TEST_METHOD(BinaryHalfOpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#BinaryHalfOpTable") |
| END_TEST_METHOD() |
| BEGIN_TEST_METHOD(TertiaryHalfOpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#TertiaryHalfOpTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(UnaryIntOpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#UnaryIntOpTable") |
| END_TEST_METHOD() |
| BEGIN_TEST_METHOD(BinaryIntOpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#BinaryIntOpTable") |
| END_TEST_METHOD() |
| BEGIN_TEST_METHOD(TertiaryIntOpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#TertiaryIntOpTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(UnaryUintOpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#UnaryUintOpTable") |
| END_TEST_METHOD() |
| BEGIN_TEST_METHOD(BinaryUintOpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#BinaryUintOpTable") |
| END_TEST_METHOD() |
| BEGIN_TEST_METHOD(TertiaryUintOpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#TertiaryUintOpTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(UnaryInt16OpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#UnaryInt16OpTable") |
| END_TEST_METHOD() |
| BEGIN_TEST_METHOD(BinaryInt16OpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#BinaryInt16OpTable") |
| END_TEST_METHOD() |
| BEGIN_TEST_METHOD(TertiaryInt16OpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#TertiaryInt16OpTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(UnaryUint16OpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#UnaryUint16OpTable") |
| END_TEST_METHOD() |
| BEGIN_TEST_METHOD(BinaryUint16OpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#BinaryUint16OpTable") |
| END_TEST_METHOD() |
| BEGIN_TEST_METHOD(TertiaryUint16OpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#TertiaryUint16OpTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(DotTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#DotOpTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(Dot2AddHalfTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#Dot2AddHalfOpTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(Dot4AddI8PackedTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#Dot4AddI8PackedOpTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(Dot4AddU8PackedTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#Dot4AddU8PackedOpTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(Msad4Test) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#Msad4Table") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(DenormBinaryFloatOpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#DenormBinaryFloatOpTable") |
| END_TEST_METHOD() |
| |
| BEGIN_TEST_METHOD(DenormTertiaryFloatOpTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#DenormTertiaryFloatOpTable") |
| END_TEST_METHOD() |
| |
| TEST_METHOD(BarycentricsTest); |
| |
| TEST_METHOD(ComputeRawBufferLdStI32); |
| TEST_METHOD(ComputeRawBufferLdStFloat); |
| |
| TEST_METHOD(ComputeRawBufferLdStI64); |
| TEST_METHOD(ComputeRawBufferLdStDouble); |
| |
| TEST_METHOD(ComputeRawBufferLdStI16); |
| TEST_METHOD(ComputeRawBufferLdStHalf); |
| |
| TEST_METHOD(GraphicsRawBufferLdStI32); |
| TEST_METHOD(GraphicsRawBufferLdStFloat); |
| |
| TEST_METHOD(GraphicsRawBufferLdStI64); |
| TEST_METHOD(GraphicsRawBufferLdStDouble); |
| |
| TEST_METHOD(GraphicsRawBufferLdStI16); |
| TEST_METHOD(GraphicsRawBufferLdStHalf); |
| |
| BEGIN_TEST_METHOD(PackUnpackTest) |
| TEST_METHOD_PROPERTY(L"DataSource", L"Table:ShaderOpArithTable.xml#PackUnpackOpTable") |
| END_TEST_METHOD() |
| |
| dxc::DxcDllSupport m_support; |
| VersionSupportInfo m_ver; |
| |
| bool m_D3DInitCompleted = false; |
| bool m_ExperimentalModeEnabled = false; |
| bool m_AgilitySDKEnabled = false; |
| |
| const float ClearColor[4] = { 0.0f, 0.2f, 0.4f, 1.0f }; |
| |
| bool DivergentClassSetup() { |
| // Run this only once. |
| if (!m_D3DInitCompleted) { |
| m_D3DInitCompleted = true; |
| |
| HMODULE hRuntime = LoadLibraryW(L"d3d12.dll"); |
| if (hRuntime == NULL) |
| return false; |
| // Do not: FreeLibrary(hRuntime); |
| // If we actually free the library, it defeats the purpose of |
| // EnableAgilitySDK and EnableExperimentalMode. |
| |
| HRESULT hr; |
| hr = EnableAgilitySDK(hRuntime); |
| if (FAILED(hr)) { |
| LogCommentFmt(L"Unable to enable Agility SDK - 0x%08x.", hr); |
| } else if (hr == S_FALSE) { |
| LogCommentFmt(L"Agility SDK not enabled."); |
| } else { |
| LogCommentFmt(L"Agility SDK enabled."); |
| } |
| |
| hr = EnableExperimentalMode(hRuntime); |
| if (FAILED(hr)) { |
| LogCommentFmt(L"Unable to enable shader experimental mode - 0x%08x.", hr); |
| } else if (hr == S_FALSE) { |
| LogCommentFmt(L"Experimental mode not enabled."); |
| } else { |
| LogCommentFmt(L"Experimental mode enabled."); |
| } |
| |
| hr = EnableDebugLayer(); |
| if (FAILED(hr)) { |
| LogCommentFmt(L"Unable to enable debug layer - 0x%08x.", hr); |
| } else if (hr == S_FALSE) { |
| LogCommentFmt(L"Debug layer not enabled."); |
| } else { |
| LogCommentFmt(L"Debug layer enabled."); |
| } |
| } |
| |
| return true; |
| } |
| |
| // Do not remove the following line - it is used by TranslateExecutionTest.py |
| // MARKER: ExecutionTest/DxilConf Shared Implementation Start |
| |
| // This is defined in d3d.h for Windows 10 Anniversary Edition SDK, but we only |
| // require the Windows 10 SDK. |
| typedef enum D3D_SHADER_MODEL { |
| D3D_SHADER_MODEL_5_1 = 0x51, |
| D3D_SHADER_MODEL_6_0 = 0x60, |
| D3D_SHADER_MODEL_6_1 = 0x61, |
| D3D_SHADER_MODEL_6_2 = 0x62, |
| D3D_SHADER_MODEL_6_3 = 0x63, |
| D3D_SHADER_MODEL_6_4 = 0x64, |
| D3D_SHADER_MODEL_6_5 = 0x65, |
| D3D_SHADER_MODEL_6_6 = 0x66, |
| D3D_SHADER_MODEL_6_7 = 0x67, |
| } D3D_SHADER_MODEL; |
| |
| static const D3D_SHADER_MODEL HIGHEST_SHADER_MODEL = D3D_SHADER_MODEL_6_7; |
| |
| bool UseDxbc() { |
| #ifdef _HLK_CONF |
| return false; |
| #else |
| return GetTestParamBool(L"DXBC"); |
| #endif |
| } |
| |
| bool UseWarpByDefault() { |
| #ifdef _HLK_CONF |
| return false; |
| #else |
| return true; |
| #endif |
| } |
| |
| bool UseDebugIfaces() { |
| return true; |
| } |
| |
| bool SaveImages() { |
| return GetTestParamBool(L"SaveImages"); |
| } |
| |
| // Base class used by raw gather test for polymorphic assignments |
| struct RawGatherTexture { |
| // Set Element <i> to a format-appropriate value derived from 2D coords <x,y> |
| virtual void SetElement(int i, int x, int y) = 0; |
| // Retrieve pointer to the elements |
| virtual void *GetElements() = 0; |
| // Get dimensions/format |
| virtual unsigned GetXDim() = 0; |
| virtual unsigned GetYDim() = 0; |
| virtual DXGI_FORMAT GetFormat() = 0; |
| }; |
| |
| template<typename GatherType> |
| void DoRawGatherTest(ID3D12Device *pDevice, RawGatherTexture *rawTex, DXGI_FORMAT viewFormat); |
| void RunResourceTest(ID3D12Device *pDevice, const char *pShader, const wchar_t *sm, bool isDynamic); |
| |
| template <class T1, class T2> |
| void WaveIntrinsicsActivePrefixTest(TableParameter *pParameterList, |
| size_t numParameter, bool isPrefix); |
| |
| template <typename T> |
| void WaveIntrinsicsMultiPrefixOpTest(TableParameter *pParameterList, |
| size_t numParameters); |
| |
| void BasicTriangleTestSetup(LPCSTR OpName, LPCWSTR FileName, D3D_SHADER_MODEL testModel); |
| |
| void RunBasicShaderModelTest(D3D_SHADER_MODEL shaderModel); |
| |
| enum class RawBufferLdStType { |
| I32, |
| Float, |
| I64, |
| Double, |
| I16, |
| Half |
| }; |
| |
| template <class Ty> |
| struct RawBufferLdStTestData { |
| Ty v1, v2[2], v3[3], v4[4]; |
| }; |
| |
| template <class Ty> |
| struct RawBufferLdStUavData { |
| RawBufferLdStTestData<Ty> input, output, srvOut; |
| }; |
| |
| template <class Ty> |
| void RunComputeRawBufferLdStTest(D3D_SHADER_MODEL shaderModel, RawBufferLdStType dataType, |
| const char *shaderOpName, const RawBufferLdStTestData<Ty> &testData); |
| |
| template <class Ty> |
| void RunGraphicsRawBufferLdStTest(D3D_SHADER_MODEL shaderModel, RawBufferLdStType dataType, |
| const char *shaderOpName, const RawBufferLdStTestData<Ty> &testData); |
| |
| template <class Ty> |
| void VerifyRawBufferLdStTestResults(const std::shared_ptr<st::ShaderOpTest> test, const RawBufferLdStTestData<Ty> &testData); |
| |
| bool SetupRawBufferLdStTest(D3D_SHADER_MODEL shaderModel, RawBufferLdStType dataType, CComPtr<ID3D12Device> &pDevice, |
| CComPtr<IStream> &pStream, char *&sTy, char *&additionalOptions); |
| |
| template <class Ty> |
| void RunBasicShaderModelTest(CComPtr<ID3D12Device> pDevice, const char *pShaderModelStr, const char *pShader, Ty *pInputDataPairs, unsigned inputDataCount); |
| |
| template <class Ty> |
| const wchar_t* BasicShaderModelTest_GetFormatString(); |
| |
| void CompileFromText(LPCSTR pText, LPCWSTR pEntryPoint, LPCWSTR pTargetProfile, ID3DBlob **ppBlob, LPCWSTR *pOptions = nullptr, int numOptions = 0) { |
| VERIFY_SUCCEEDED(m_support.Initialize()); |
| CComPtr<IDxcCompiler> pCompiler; |
| CComPtr<IDxcLibrary> pLibrary; |
| CComPtr<IDxcBlobEncoding> pTextBlob; |
| CComPtr<IDxcOperationResult> pResult; |
| HRESULT resultCode; |
| VERIFY_SUCCEEDED(m_support.CreateInstance(CLSID_DxcCompiler, &pCompiler)); |
| VERIFY_SUCCEEDED(m_support.CreateInstance(CLSID_DxcLibrary, &pLibrary)); |
| VERIFY_SUCCEEDED(pLibrary->CreateBlobWithEncodingFromPinned(pText, (UINT32)strlen(pText), CP_UTF8, &pTextBlob)); |
| VERIFY_SUCCEEDED(pCompiler->Compile(pTextBlob, L"hlsl.hlsl", pEntryPoint, pTargetProfile, pOptions, numOptions, nullptr, 0, nullptr, &pResult)); |
| VERIFY_SUCCEEDED(pResult->GetStatus(&resultCode)); |
| if (FAILED(resultCode)) { |
| CComPtr<IDxcBlobEncoding> errors; |
| VERIFY_SUCCEEDED(pResult->GetErrorBuffer(&errors)); |
| #ifndef _HLK_CONF |
| LogCommentFmt(L"Failed to compile shader: %s", BlobToWide(errors).data()); |
| #endif |
| } |
| VERIFY_SUCCEEDED(resultCode); |
| VERIFY_SUCCEEDED(pResult->GetResult((IDxcBlob **)ppBlob)); |
| } |
| |
| void CreateCommandQueue(ID3D12Device *pDevice, LPCWSTR pName, ID3D12CommandQueue **ppCommandQueue, D3D12_COMMAND_LIST_TYPE type) { |
| D3D12_COMMAND_QUEUE_DESC queueDesc = {}; |
| queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; |
| queueDesc.Type = type; |
| VERIFY_SUCCEEDED(pDevice->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(ppCommandQueue))); |
| VERIFY_SUCCEEDED((*ppCommandQueue)->SetName(pName)); |
| } |
| |
| void CreateComputeCommandQueue(ID3D12Device *pDevice, LPCWSTR pName, ID3D12CommandQueue **ppCommandQueue) { |
| CreateCommandQueue(pDevice, pName, ppCommandQueue, D3D12_COMMAND_LIST_TYPE_COMPUTE); |
| } |
| |
| void CreateComputePSO(ID3D12Device *pDevice, ID3D12RootSignature *pRootSignature, LPCSTR pShader, LPCWSTR pTargetProfile, ID3D12PipelineState **ppComputeState, LPCWSTR *pOptions = nullptr, int numOptions = 0) { |
| CComPtr<ID3DBlob> pComputeShader; |
| |
| // Load and compile shaders. |
| if (UseDxbc()) { |
| #ifndef _HLK_CONF |
| DXBCFromText(pShader, L"main", pTargetProfile, &pComputeShader); |
| #endif |
| } |
| else { |
| CompileFromText(pShader, L"main", pTargetProfile, &pComputeShader, pOptions, numOptions); |
| } |
| |
| // Describe and create the compute pipeline state object (PSO). |
| D3D12_COMPUTE_PIPELINE_STATE_DESC computePsoDesc = {}; |
| computePsoDesc.pRootSignature = pRootSignature; |
| computePsoDesc.CS = CD3DX12_SHADER_BYTECODE(pComputeShader); |
| |
| VERIFY_SUCCEEDED(pDevice->CreateComputePipelineState(&computePsoDesc, IID_PPV_ARGS(ppComputeState))); |
| } |
| |
| bool CreateDevice(_COM_Outptr_ ID3D12Device **ppDevice, |
| D3D_SHADER_MODEL testModel = D3D_SHADER_MODEL_6_0, bool skipUnsupported = true) { |
| if (testModel > HIGHEST_SHADER_MODEL) { |
| UINT minor = (UINT)testModel & 0x0f; |
| LogCommentFmt(L"Installed SDK does not support " |
| L"shader model 6.%1u", minor); |
| |
| if (skipUnsupported) { |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| } |
| |
| return false; |
| } |
| CComPtr<IDXGIFactory4> factory; |
| CComPtr<ID3D12Device> pDevice; |
| |
| *ppDevice = nullptr; |
| |
| VERIFY_SUCCEEDED(CreateDXGIFactory1(IID_PPV_ARGS(&factory))); |
| if (GetTestParamUseWARP(UseWarpByDefault())) { |
| CComPtr<IDXGIAdapter> warpAdapter; |
| VERIFY_SUCCEEDED(factory->EnumWarpAdapter(IID_PPV_ARGS(&warpAdapter))); |
| HRESULT createHR = D3D12CreateDevice(warpAdapter, D3D_FEATURE_LEVEL_11_0, |
| IID_PPV_ARGS(&pDevice)); |
| if (FAILED(createHR)) { |
| LogCommentFmt(L"The available version of WARP does not support d3d12."); |
| |
| if (skipUnsupported) { |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| } |
| |
| return false; |
| } |
| } else { |
| CComPtr<IDXGIAdapter1> hardwareAdapter; |
| WEX::Common::String AdapterValue; |
| HRESULT hr = WEX::TestExecution::RuntimeParameters::TryGetValue(L"Adapter", |
| AdapterValue); |
| if (SUCCEEDED(hr)) { |
| GetHardwareAdapter(factory, AdapterValue, &hardwareAdapter); |
| } else { |
| WEX::Logging::Log::Comment( |
| L"Using default hardware adapter with D3D12 support."); |
| } |
| VERIFY_SUCCEEDED(D3D12CreateDevice(hardwareAdapter, D3D_FEATURE_LEVEL_11_0, |
| IID_PPV_ARGS(&pDevice))); |
| } |
| // retrieve adapter information |
| LUID adapterID = pDevice->GetAdapterLuid(); |
| CComPtr<IDXGIAdapter> adapter; |
| factory->EnumAdapterByLuid(adapterID, IID_PPV_ARGS(&adapter)); |
| DXGI_ADAPTER_DESC AdapterDesc; |
| VERIFY_SUCCEEDED(adapter->GetDesc(&AdapterDesc)); |
| LogCommentFmt(L"Using Adapter:%s", AdapterDesc.Description); |
| |
| if (pDevice == nullptr) |
| return false; |
| |
| if (!UseDxbc()) { |
| // Check for DXIL support. |
| typedef struct D3D12_FEATURE_DATA_SHADER_MODEL { |
| _Inout_ D3D_SHADER_MODEL HighestShaderModel; |
| } D3D12_FEATURE_DATA_SHADER_MODEL; |
| const UINT D3D12_FEATURE_SHADER_MODEL = 7; |
| D3D12_FEATURE_DATA_SHADER_MODEL SMData; |
| SMData.HighestShaderModel = testModel; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_SHADER_MODEL, |
| &SMData, sizeof(SMData))) || |
| SMData.HighestShaderModel < testModel) { |
| UINT minor = (UINT)testModel & 0x0f; |
| LogCommentFmt(L"The selected device does not support " |
| L"shader model 6.%1u", minor); |
| |
| if (skipUnsupported) { |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| } |
| |
| return false; |
| } |
| } |
| |
| if (UseDebugIfaces()) { |
| CComPtr<ID3D12InfoQueue> pInfoQueue; |
| if (SUCCEEDED(pDevice->QueryInterface(&pInfoQueue))) { |
| pInfoQueue->SetMuteDebugOutput(FALSE); |
| } |
| } |
| |
| *ppDevice = pDevice.Detach(); |
| return true; |
| } |
| |
| void CreateGraphicsCommandQueue(ID3D12Device *pDevice, ID3D12CommandQueue **ppCommandQueue) { |
| D3D12_COMMAND_QUEUE_DESC queueDesc = {}; |
| queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; |
| queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;; |
| VERIFY_SUCCEEDED(pDevice->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(ppCommandQueue))); |
| } |
| |
| void CreateGraphicsCommandQueueAndList( |
| ID3D12Device *pDevice, ID3D12CommandQueue **ppCommandQueue, |
| ID3D12CommandAllocator **ppAllocator, |
| ID3D12GraphicsCommandList **ppCommandList, ID3D12PipelineState *pPSO) { |
| CreateGraphicsCommandQueue(pDevice, ppCommandQueue); |
| VERIFY_SUCCEEDED(pDevice->CreateCommandAllocator( |
| D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(ppAllocator))); |
| VERIFY_SUCCEEDED(pDevice->CreateCommandList( |
| 0, D3D12_COMMAND_LIST_TYPE_DIRECT, *ppAllocator, pPSO, |
| IID_PPV_ARGS(ppCommandList))); |
| } |
| |
| void CreateGraphicsPSO(ID3D12Device *pDevice, |
| D3D12_INPUT_LAYOUT_DESC *pInputLayout, |
| ID3D12RootSignature *pRootSignature, LPCSTR pShaders, |
| ID3D12PipelineState **ppPSO) { |
| CComPtr<ID3DBlob> vertexShader; |
| CComPtr<ID3DBlob> pixelShader; |
| |
| if (UseDxbc()) { |
| #ifndef _HLK_CONF |
| DXBCFromText(pShaders, L"VSMain", L"vs_6_0", &vertexShader); |
| DXBCFromText(pShaders, L"PSMain", L"ps_6_0", &pixelShader); |
| #endif |
| } else { |
| CompileFromText(pShaders, L"VSMain", L"vs_6_0", &vertexShader); |
| CompileFromText(pShaders, L"PSMain", L"ps_6_0", &pixelShader); |
| } |
| |
| // Describe and create the graphics pipeline state object (PSO). |
| D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; |
| psoDesc.InputLayout = *pInputLayout; |
| psoDesc.pRootSignature = pRootSignature; |
| psoDesc.VS = CD3DX12_SHADER_BYTECODE(vertexShader); |
| psoDesc.PS = CD3DX12_SHADER_BYTECODE(pixelShader); |
| psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); |
| psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); |
| psoDesc.DepthStencilState.DepthEnable = FALSE; |
| psoDesc.DepthStencilState.StencilEnable = FALSE; |
| psoDesc.SampleMask = UINT_MAX; |
| psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; |
| psoDesc.NumRenderTargets = 1; |
| psoDesc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; |
| psoDesc.SampleDesc.Count = 1; |
| VERIFY_SUCCEEDED( |
| pDevice->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(ppPSO))); |
| } |
| |
| void CreateRenderTargetAndReadback(ID3D12Device *pDevice, |
| ID3D12DescriptorHeap *pHeap, UINT width, |
| UINT height, |
| ID3D12Resource **ppRenderTarget, |
| ID3D12Resource **ppBuffer) { |
| const DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM; |
| const size_t formatElementSize = 4; |
| CComPtr<ID3D12Resource> pRenderTarget; |
| CComPtr<ID3D12Resource> pBuffer; |
| |
| CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle( |
| pHeap->GetCPUDescriptorHandleForHeapStart()); |
| CD3DX12_HEAP_PROPERTIES rtHeap(D3D12_HEAP_TYPE_DEFAULT); |
| CD3DX12_RESOURCE_DESC rtDesc( |
| CD3DX12_RESOURCE_DESC::Tex2D(format, width, height)); |
| CD3DX12_CLEAR_VALUE rtClearVal(format, ClearColor); |
| rtDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; |
| VERIFY_SUCCEEDED(pDevice->CreateCommittedResource( |
| &rtHeap, D3D12_HEAP_FLAG_NONE, &rtDesc, D3D12_RESOURCE_STATE_COPY_DEST, |
| &rtClearVal, IID_PPV_ARGS(&pRenderTarget))); |
| pDevice->CreateRenderTargetView(pRenderTarget, nullptr, rtvHandle); |
| // rtvHandle.Offset(1, rtvDescriptorSize); // Not needed for a single |
| // resource. |
| |
| CD3DX12_HEAP_PROPERTIES readHeap(D3D12_HEAP_TYPE_READBACK); |
| CD3DX12_RESOURCE_DESC readDesc( |
| CD3DX12_RESOURCE_DESC::Buffer(width * height * formatElementSize)); |
| VERIFY_SUCCEEDED(pDevice->CreateCommittedResource( |
| &readHeap, D3D12_HEAP_FLAG_NONE, &readDesc, |
| D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&pBuffer))); |
| |
| *ppRenderTarget = pRenderTarget.Detach(); |
| *ppBuffer = pBuffer.Detach(); |
| } |
| |
| void CreateRootSignatureFromDesc(ID3D12Device *pDevice, |
| const D3D12_ROOT_SIGNATURE_DESC *pDesc, |
| ID3D12RootSignature **pRootSig) { |
| CComPtr<ID3DBlob> signature; |
| CComPtr<ID3DBlob> error; |
| VERIFY_SUCCEEDED(D3D12SerializeRootSignature(pDesc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error)); |
| VERIFY_SUCCEEDED(pDevice->CreateRootSignature( |
| 0, signature->GetBufferPointer(), signature->GetBufferSize(), |
| IID_PPV_ARGS(pRootSig))); |
| } |
| |
| void CreateRootSignatureFromRanges(ID3D12Device *pDevice, ID3D12RootSignature **pRootSig, |
| CD3DX12_DESCRIPTOR_RANGE *resRanges, UINT resCt, |
| CD3DX12_DESCRIPTOR_RANGE *sampRanges = nullptr, UINT sampCt = 0, |
| D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) { |
| UINT paramCt = 0; |
| CD3DX12_ROOT_PARAMETER rootParameters[2]; |
| rootParameters[paramCt++].InitAsDescriptorTable(resCt, resRanges, D3D12_SHADER_VISIBILITY_ALL); |
| if (sampCt) |
| rootParameters[paramCt++].InitAsDescriptorTable(sampCt, sampRanges, D3D12_SHADER_VISIBILITY_ALL); |
| |
| CD3DX12_ROOT_SIGNATURE_DESC rootSignatureDesc; |
| rootSignatureDesc.Init(paramCt, rootParameters, 0, nullptr, flags); |
| CreateRootSignatureFromDesc(pDevice, &rootSignatureDesc, pRootSig); |
| } |
| |
| void CreateRtvDescriptorHeap(ID3D12Device *pDevice, UINT numDescriptors, |
| ID3D12DescriptorHeap **pRtvHeap, UINT *rtvDescriptorSize) { |
| D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {}; |
| rtvHeapDesc.NumDescriptors = numDescriptors; |
| rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; |
| rtvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; |
| VERIFY_SUCCEEDED( |
| pDevice->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(pRtvHeap))); |
| |
| if (rtvDescriptorSize != nullptr) { |
| *rtvDescriptorSize = pDevice->GetDescriptorHandleIncrementSize( |
| D3D12_DESCRIPTOR_HEAP_TYPE_RTV); |
| } |
| } |
| |
| #if defined(NTDDI_WIN10_CU) && WDK_NTDDI_VERSION >= NTDDI_WIN10_CU |
| // Copy common fields from desc0 to desc1 and zero out the new one |
| void CopyDesc0ToDesc1(D3D12_RESOURCE_DESC1 &desc1, const D3D12_RESOURCE_DESC &desc0) { |
| desc1.Dimension = desc0.Dimension; |
| desc1.Alignment = desc0.Alignment; |
| desc1.Width = desc0.Width; |
| desc1.Height = desc0.Height; |
| desc1.DepthOrArraySize = desc0.DepthOrArraySize; |
| desc1.MipLevels = desc0.MipLevels; |
| desc1.Format = desc0.Format; |
| desc1.SampleDesc = desc0.SampleDesc; |
| desc1.Layout = desc0.Layout; |
| desc1.Flags = desc0.Flags; |
| desc1.SamplerFeedbackMipRegion = {}; |
| } |
| #endif |
| |
| // Create resources for the given <resDesc> described main resource |
| // creating and returning the resource, the upload resource, |
| // and the readback resource if requested, populating with <values> of size |
| // <valueSizeInBytes> using <pCommandList> and <pDevice> |
| // A pointer to a single <castFormat> target may be specified |
| // where CreateCommittedResource3 is available |
| void CreateTestResources(ID3D12Device *pDevice, |
| ID3D12GraphicsCommandList *pCommandList, LPCVOID values, |
| UINT64 valueSizeInBytes, D3D12_RESOURCE_DESC resDesc, |
| ID3D12Resource **ppResource, |
| ID3D12Resource **ppUploadResource, |
| ID3D12Resource **ppReadBuffer = nullptr, |
| DXGI_FORMAT *castFormat = nullptr) { |
| CComPtr<ID3D12Resource> pResource; |
| CComPtr<ID3D12Resource> pReadBuffer; |
| CComPtr<ID3D12Resource> pUploadResource; |
| D3D12_SUBRESOURCE_DATA transferData; |
| D3D12_HEAP_PROPERTIES defaultHeapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); |
| D3D12_HEAP_PROPERTIES uploadHeapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); |
| D3D12_RESOURCE_DESC uploadBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(valueSizeInBytes); |
| CD3DX12_HEAP_PROPERTIES readHeap(D3D12_HEAP_TYPE_READBACK); |
| CD3DX12_RESOURCE_DESC readDesc(CD3DX12_RESOURCE_DESC::Buffer(valueSizeInBytes)); |
| |
| pDevice->GetCopyableFootprints(&resDesc, 0, 1/*mipleveles*/, 0, nullptr, nullptr, nullptr, &uploadBufferDesc.Width); |
| uploadBufferDesc.Height = 1; |
| |
| #if defined(NTDDI_WIN10_CU) && WDK_NTDDI_VERSION >= NTDDI_WIN10_CU |
| if (castFormat) { |
| CComPtr<ID3D12Device10> pDevice10; |
| // Copy resDesc0 to resDesc1 zeroing anything new |
| D3D12_RESOURCE_DESC1 resDesc1 = {0}; |
| CopyDesc0ToDesc1(resDesc1, resDesc); |
| VERIFY_SUCCEEDED(pDevice->QueryInterface(IID_PPV_ARGS(&pDevice10))); |
| VERIFY_SUCCEEDED(pDevice10->CreateCommittedResource3( |
| &defaultHeapProperties, |
| D3D12_HEAP_FLAG_NONE, |
| &resDesc1, |
| D3D12_BARRIER_LAYOUT_COPY_DEST, |
| nullptr, |
| nullptr, |
| 1, castFormat, |
| IID_PPV_ARGS(&pResource))); |
| } else |
| #else |
| UNREFERENCED_PARAMETER(castFormat); |
| #endif |
| { |
| VERIFY_SUCCEEDED(pDevice->CreateCommittedResource( |
| &defaultHeapProperties, |
| D3D12_HEAP_FLAG_NONE, |
| &resDesc, |
| D3D12_RESOURCE_STATE_COPY_DEST, |
| nullptr, |
| IID_PPV_ARGS(&pResource))); |
| } |
| |
| if (ppUploadResource) |
| VERIFY_SUCCEEDED(pDevice->CreateCommittedResource( |
| &uploadHeapProperties, |
| D3D12_HEAP_FLAG_NONE, |
| &uploadBufferDesc, |
| D3D12_RESOURCE_STATE_GENERIC_READ, |
| nullptr, |
| IID_PPV_ARGS(&pUploadResource))); |
| |
| if (ppReadBuffer) |
| VERIFY_SUCCEEDED(pDevice->CreateCommittedResource( |
| &readHeap, D3D12_HEAP_FLAG_NONE, &readDesc, |
| D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&pReadBuffer))); |
| |
| if (ppUploadResource) { |
| transferData.pData = values; |
| transferData.RowPitch = (LONG_PTR)(valueSizeInBytes/resDesc.Height); |
| transferData.SlicePitch = (LONG_PTR)valueSizeInBytes; |
| |
| UpdateSubresources<1>(pCommandList, pResource.p, pUploadResource.p, 0, 0, 1, &transferData); |
| if (resDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) |
| RecordTransitionBarrier(pCommandList, pResource, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); |
| else |
| RecordTransitionBarrier(pCommandList, pResource, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COMMON); |
| } |
| |
| *ppResource = pResource.Detach(); |
| if (ppUploadResource) |
| *ppUploadResource = pUploadResource.Detach(); |
| if (ppReadBuffer) |
| *ppReadBuffer = pReadBuffer.Detach(); |
| } |
| |
| void CreateTestUavs(ID3D12Device *pDevice, |
| ID3D12GraphicsCommandList *pCommandList, LPCVOID values, |
| UINT64 valueSizeInBytes, ID3D12Resource **ppUavResource, |
| ID3D12Resource **ppUploadResource = nullptr, |
| ID3D12Resource **ppReadBuffer = nullptr) { |
| D3D12_RESOURCE_DESC bufferDesc = CD3DX12_RESOURCE_DESC::Buffer(valueSizeInBytes, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); |
| CreateTestResources(pDevice, pCommandList, values, valueSizeInBytes, bufferDesc, |
| ppUavResource, ppUploadResource, ppReadBuffer); |
| |
| } |
| |
| // Create and return descriptor heaps for the given device |
| // with the given number of resources and samples. |
| // using some reasonable defaults |
| void CreateDefaultDescHeaps(ID3D12Device *pDevice, |
| int NumResources, int NumSamplers, |
| ID3D12DescriptorHeap **ppResHeap, ID3D12DescriptorHeap **ppSampHeap) { |
| // Describe and create descriptor heaps. |
| ID3D12DescriptorHeap *pResHeap, *pSampHeap; |
| D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; |
| heapDesc.NumDescriptors = NumResources; |
| heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; |
| heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; |
| VERIFY_SUCCEEDED(pDevice->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&pResHeap))); |
| |
| heapDesc.NumDescriptors = NumSamplers; |
| heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; |
| VERIFY_SUCCEEDED(pDevice->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&pSampHeap))); |
| |
| *ppResHeap = pResHeap; |
| *ppSampHeap = pSampHeap; |
| } |
| |
| void CreateSRV(ID3D12Device *pDevice, CD3DX12_CPU_DESCRIPTOR_HANDLE &baseHandle, |
| DXGI_FORMAT format, D3D12_SRV_DIMENSION viewDimension, UINT numElements, UINT stride, |
| const CComPtr<ID3D12Resource> pResource) { |
| UINT descriptorSize = pDevice->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); |
| // Create SRV |
| D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; |
| srvDesc.Format = format; |
| srvDesc.ViewDimension = viewDimension; |
| srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; |
| switch (viewDimension) { |
| case D3D12_SRV_DIMENSION_BUFFER: |
| srvDesc.Buffer.FirstElement = 0; |
| srvDesc.Buffer.NumElements = numElements; |
| srvDesc.Buffer.StructureByteStride = stride; |
| if (format == DXGI_FORMAT_R32_TYPELESS && stride == 0) |
| srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; |
| else |
| srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; |
| break; |
| case D3D12_SRV_DIMENSION_TEXTURE1D: |
| srvDesc.Texture1D.MostDetailedMip = 0; |
| srvDesc.Texture1D.MipLevels = 1; |
| srvDesc.Texture1D.ResourceMinLODClamp = 0; |
| break; |
| case D3D12_SRV_DIMENSION_TEXTURE2D: |
| srvDesc.Texture2D.MostDetailedMip = 0; |
| srvDesc.Texture2D.MipLevels = 1; |
| srvDesc.Texture2D.PlaneSlice = 0; |
| srvDesc.Texture2D.ResourceMinLODClamp = 0; |
| break; |
| } |
| pDevice->CreateShaderResourceView(pResource, &srvDesc, baseHandle); |
| baseHandle.Offset(descriptorSize); |
| } |
| |
| |
| void CreateRawSRV(ID3D12Device *pDevice, CD3DX12_CPU_DESCRIPTOR_HANDLE &heapStart, |
| UINT numElements, const CComPtr<ID3D12Resource> pResource) { |
| CreateSRV(pDevice, heapStart, DXGI_FORMAT_R32_TYPELESS, D3D12_SRV_DIMENSION_BUFFER, numElements, 0, pResource); |
| } |
| |
| void CreateStructSRV(ID3D12Device *pDevice, CD3DX12_CPU_DESCRIPTOR_HANDLE &heapStart, |
| UINT numElements, UINT stride, const CComPtr<ID3D12Resource> pResource) { |
| CreateSRV(pDevice, heapStart, DXGI_FORMAT_UNKNOWN, D3D12_SRV_DIMENSION_BUFFER, numElements, stride, pResource); |
| } |
| |
| void CreateTypedSRV(ID3D12Device *pDevice, CD3DX12_CPU_DESCRIPTOR_HANDLE &heapStart, |
| UINT numElements, DXGI_FORMAT format, const CComPtr<ID3D12Resource> pResource) { |
| CreateSRV(pDevice, heapStart, format, D3D12_SRV_DIMENSION_BUFFER, numElements, 0, pResource); |
| } |
| |
| void CreateTex1DSRV(ID3D12Device *pDevice, CD3DX12_CPU_DESCRIPTOR_HANDLE &heapStart, |
| UINT numElements, DXGI_FORMAT format, const CComPtr<ID3D12Resource> pResource) { |
| CreateSRV(pDevice, heapStart, format, D3D12_SRV_DIMENSION_TEXTURE1D, numElements, 0, pResource); |
| } |
| |
| void CreateTex2DSRV(ID3D12Device *pDevice, CD3DX12_CPU_DESCRIPTOR_HANDLE &heapStart, |
| DXGI_FORMAT format, const CComPtr<ID3D12Resource> pResource) { |
| CreateSRV(pDevice, heapStart, format, D3D12_SRV_DIMENSION_TEXTURE2D, 0/*numElements*/, 0/*stride*/, pResource); |
| } |
| |
| void CreateUAV(ID3D12Device *pDevice, CD3DX12_CPU_DESCRIPTOR_HANDLE &baseHandle, |
| DXGI_FORMAT format, D3D12_UAV_DIMENSION viewDimension, UINT numElements, UINT stride, |
| const CComPtr<ID3D12Resource> pResource) { |
| UINT descriptorSize = pDevice->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); |
| D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; |
| uavDesc.Format = format; |
| uavDesc.ViewDimension = viewDimension; |
| switch (viewDimension) { |
| case D3D12_UAV_DIMENSION_BUFFER: |
| uavDesc.Buffer.FirstElement = 0; |
| uavDesc.Buffer.NumElements = numElements; |
| uavDesc.Buffer.StructureByteStride = stride; |
| if (format == DXGI_FORMAT_R32_TYPELESS && stride == 0) |
| uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; |
| else |
| uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; |
| break; |
| case D3D12_UAV_DIMENSION_TEXTURE1D: |
| uavDesc.Texture1D.MipSlice = 0; |
| break; |
| case D3D12_UAV_DIMENSION_TEXTURE2D: |
| uavDesc.Texture2D.MipSlice = 0; |
| uavDesc.Texture2D.PlaneSlice = 0; |
| break; |
| case D3D12_UAV_DIMENSION_TEXTURE2DARRAY: |
| uavDesc.Texture2DArray.MipSlice = 0; |
| uavDesc.Texture2DArray.PlaneSlice = 0; |
| uavDesc.Texture2DArray.FirstArraySlice = 0; |
| uavDesc.Texture2DArray.ArraySize = numElements; |
| break; |
| default: |
| break; |
| } |
| pDevice->CreateUnorderedAccessView(pResource, nullptr, &uavDesc, baseHandle); |
| baseHandle.Offset(descriptorSize); |
| } |
| |
| void CreateRawUAV(ID3D12Device *pDevice, CD3DX12_CPU_DESCRIPTOR_HANDLE &heapStart, |
| UINT numElements, const CComPtr<ID3D12Resource> pResource) { |
| CreateUAV(pDevice, heapStart, DXGI_FORMAT_R32_TYPELESS, D3D12_UAV_DIMENSION_BUFFER, numElements, 0/*stride*/, pResource); |
| } |
| |
| void CreateStructUAV(ID3D12Device *pDevice, CD3DX12_CPU_DESCRIPTOR_HANDLE &heapStart, |
| UINT numElements, UINT stride, const CComPtr<ID3D12Resource> pResource) { |
| CreateUAV(pDevice, heapStart, DXGI_FORMAT_UNKNOWN, D3D12_UAV_DIMENSION_BUFFER, numElements, stride, pResource); |
| } |
| |
| void CreateTypedUAV(ID3D12Device *pDevice, CD3DX12_CPU_DESCRIPTOR_HANDLE &heapStart, |
| UINT numElements, DXGI_FORMAT format, const CComPtr<ID3D12Resource> pResource) { |
| CreateUAV(pDevice, heapStart, format, D3D12_UAV_DIMENSION_BUFFER, numElements, 0/*stride*/, pResource); |
| } |
| |
| void CreateTex1DUAV(ID3D12Device *pDevice, CD3DX12_CPU_DESCRIPTOR_HANDLE &heapStart, |
| DXGI_FORMAT format, const CComPtr<ID3D12Resource> pResource) { |
| CreateUAV(pDevice, heapStart, format, D3D12_UAV_DIMENSION_TEXTURE1D, 0/*numElements*/, 0/*stride*/, pResource); |
| } |
| |
| void CreateTex2DUAV(ID3D12Device *pDevice, CD3DX12_CPU_DESCRIPTOR_HANDLE &heapStart, |
| DXGI_FORMAT format, const CComPtr<ID3D12Resource> pResource) { |
| CreateUAV(pDevice, heapStart, format, D3D12_UAV_DIMENSION_TEXTURE2D, 0/*numElements*/, 0/*stride*/, pResource); |
| } |
| |
| void CreateTex2DArrayUAV(ID3D12Device *pDevice, CD3DX12_CPU_DESCRIPTOR_HANDLE &heapStart, |
| UINT numElements, DXGI_FORMAT format, const CComPtr<ID3D12Resource> pResource) { |
| CreateUAV(pDevice, heapStart, format, D3D12_UAV_DIMENSION_TEXTURE2DARRAY, numElements, 0/*stride*/, pResource); |
| } |
| |
| void CreateTex2DMSUAV(ID3D12Device *pDevice, CD3DX12_CPU_DESCRIPTOR_HANDLE &heapStart, |
| DXGI_FORMAT format, const CComPtr<ID3D12Resource> pResource) { |
| CreateUAV(pDevice, heapStart, format, (D3D12_UAV_DIMENSION)6 /*D3D12_UAV_DIMENSION_TEXTURE2DMS*/, 0 /*numElements*/, 0/*stride*/, pResource); |
| } |
| |
| // Create Samplers for <pDevice> given the filter and border color information provided |
| // using some reasonable defaults |
| void CreateDefaultSamplers(ID3D12Device *pDevice, D3D12_CPU_DESCRIPTOR_HANDLE heapStart, |
| D3D12_FILTER filters[], float *perSamplerBorderColors, int NumSamplers) { |
| |
| CD3DX12_CPU_DESCRIPTOR_HANDLE sampHandle(heapStart); |
| UINT descriptorSize = pDevice->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); |
| D3D12_SAMPLER_DESC sampDesc = {}; |
| sampDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; |
| D3D12_TEXTURE_ADDRESS_MODE addrMode = perSamplerBorderColors? D3D12_TEXTURE_ADDRESS_MODE_BORDER : D3D12_TEXTURE_ADDRESS_MODE_CLAMP; |
| sampDesc.AddressU = sampDesc.AddressV = sampDesc.AddressW = addrMode; |
| sampDesc.MipLODBias = 0; |
| sampDesc.MaxAnisotropy = 1; |
| sampDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_EQUAL; |
| sampDesc.MinLOD = 0; |
| sampDesc.MaxLOD = 0; |
| |
| for (int i = 0; i < NumSamplers; i++) { |
| sampDesc.Filter = filters[i]; |
| if (perSamplerBorderColors) { |
| for (int j = 0; j < 4; j++) |
| sampDesc.BorderColor[j] = perSamplerBorderColors[i]; |
| } |
| |
| pDevice->CreateSampler(&sampDesc, sampHandle); |
| sampHandle = sampHandle.Offset(descriptorSize); |
| } |
| } |
| |
| template <typename TVertex, int len> |
| void CreateVertexBuffer(ID3D12Device *pDevice, TVertex(&vertices)[len], |
| ID3D12Resource **ppVertexBuffer, |
| D3D12_VERTEX_BUFFER_VIEW *pVertexBufferView) { |
| size_t vertexBufferSize = sizeof(vertices); |
| CComPtr<ID3D12Resource> pVertexBuffer; |
| CD3DX12_HEAP_PROPERTIES heapProps(D3D12_HEAP_TYPE_UPLOAD); |
| CD3DX12_RESOURCE_DESC bufferDesc( |
| CD3DX12_RESOURCE_DESC::Buffer(vertexBufferSize)); |
| VERIFY_SUCCEEDED(pDevice->CreateCommittedResource( |
| &heapProps, D3D12_HEAP_FLAG_NONE, &bufferDesc, |
| D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, |
| IID_PPV_ARGS(&pVertexBuffer))); |
| |
| UINT8 *pVertexDataBegin; |
| CD3DX12_RANGE readRange(0, 0); |
| VERIFY_SUCCEEDED(pVertexBuffer->Map( |
| 0, &readRange, reinterpret_cast<void **>(&pVertexDataBegin))); |
| memcpy(pVertexDataBegin, vertices, vertexBufferSize); |
| pVertexBuffer->Unmap(0, nullptr); |
| |
| // Initialize the vertex buffer view. |
| pVertexBufferView->BufferLocation = pVertexBuffer->GetGPUVirtualAddress(); |
| pVertexBufferView->StrideInBytes = sizeof(TVertex); |
| pVertexBufferView->SizeInBytes = (UINT)vertexBufferSize; |
| |
| *ppVertexBuffer = pVertexBuffer.Detach(); |
| } |
| |
| // Requires Anniversary Edition headers, so simplifying things for current setup. |
| const UINT D3D12_FEATURE_D3D12_OPTIONS1 = 8; |
| struct D3D12_FEATURE_DATA_D3D12_OPTIONS1 { |
| BOOL WaveOps; |
| UINT WaveLaneCountMin; |
| UINT WaveLaneCountMax; |
| UINT TotalLaneCount; |
| BOOL ExpandedComputeResourceStates; |
| BOOL Int64ShaderOps; |
| }; |
| |
| bool IsDeviceBasicAdapter(ID3D12Device *pDevice) { |
| CComPtr<IDXGIFactory4> factory; |
| VERIFY_SUCCEEDED(CreateDXGIFactory1(IID_PPV_ARGS(&factory))); |
| LUID adapterID = pDevice->GetAdapterLuid(); |
| CComPtr<IDXGIAdapter1> adapter; |
| factory->EnumAdapterByLuid(adapterID, IID_PPV_ARGS(&adapter)); |
| DXGI_ADAPTER_DESC1 AdapterDesc; |
| VERIFY_SUCCEEDED(adapter->GetDesc1(&AdapterDesc)); |
| return (AdapterDesc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) || |
| (AdapterDesc.VendorId == 0x1414 && |
| (AdapterDesc.DeviceId == 0x8c || AdapterDesc.DeviceId == 0x8d)); |
| } |
| |
| bool DoesDeviceSupportInt64(ID3D12Device *pDevice) { |
| D3D12_FEATURE_DATA_D3D12_OPTIONS1 O; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS1, &O, sizeof(O)))) |
| return false; |
| return O.Int64ShaderOps != FALSE; |
| } |
| |
| bool DoesDeviceSupportDouble(ID3D12Device *pDevice) { |
| D3D12_FEATURE_DATA_D3D12_OPTIONS O; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS, &O, sizeof(O)))) |
| return false; |
| return O.DoublePrecisionFloatShaderOps != FALSE; |
| } |
| |
| bool DoesDeviceSupportWaveOps(ID3D12Device *pDevice) { |
| D3D12_FEATURE_DATA_D3D12_OPTIONS1 O; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS1, &O, sizeof(O)))) |
| return false; |
| return O.WaveOps != FALSE; |
| } |
| |
| bool DoesDeviceSupportBarycentrics(ID3D12Device *pDevice) { |
| D3D12_FEATURE_DATA_D3D12_OPTIONS3 O; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS3, &O, sizeof(O)))) |
| return false; |
| return O.BarycentricsSupported != FALSE; |
| } |
| |
| bool DoesDeviceSupportNative16bitOps(ID3D12Device *pDevice) { |
| D3D12_FEATURE_DATA_D3D12_OPTIONS4 O; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS4, &O, sizeof(O)))) |
| return false; |
| return O.Native16BitShaderOpsSupported != FALSE; |
| } |
| |
| bool DoesDeviceSupportMeshShaders(ID3D12Device *pDevice) { |
| #if defined(NTDDI_WIN10_VB) && WDK_NTDDI_VERSION >= NTDDI_WIN10_VB |
| D3D12_FEATURE_DATA_D3D12_OPTIONS7 O7; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS7, &O7, sizeof(O7)))) |
| return false; |
| return O7.MeshShaderTier != D3D12_MESH_SHADER_TIER_NOT_SUPPORTED; |
| #else |
| UNREFERENCED_PARAMETER(pDevice); |
| return false; |
| #endif |
| } |
| |
| bool DoesDeviceSupportRayTracing(ID3D12Device *pDevice) { |
| #if WDK_NTDDI_VERSION > NTDDI_WIN10_RS4 |
| D3D12_FEATURE_DATA_D3D12_OPTIONS5 O5; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS5, &O5, sizeof(O5)))) |
| return false; |
| return O5.RaytracingTier != D3D12_RAYTRACING_TIER_NOT_SUPPORTED; |
| #else |
| UNREFERENCED_PARAMETER(pDevice); |
| return false; |
| #endif |
| } |
| |
| |
| bool DoesDeviceSupportMeshAmpDerivatives(ID3D12Device *pDevice) { |
| #if defined(NTDDI_WIN10_FE) && WDK_NTDDI_VERSION >= NTDDI_WIN10_FE |
| D3D12_FEATURE_DATA_D3D12_OPTIONS7 O7; |
| D3D12_FEATURE_DATA_D3D12_OPTIONS9 O9; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS7, &O7, sizeof(O7))) || |
| FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS9, &O9, sizeof(O9)))) |
| return false; |
| return O7.MeshShaderTier != D3D12_MESH_SHADER_TIER_NOT_SUPPORTED && |
| O9.DerivativesInMeshAndAmplificationShadersSupported != FALSE; |
| #else |
| UNREFERENCED_PARAMETER(pDevice); |
| return false; |
| #endif |
| } |
| |
| bool DoesDeviceSupportTyped64Atomics(ID3D12Device *pDevice) { |
| #if defined(NTDDI_WIN10_FE) && WDK_NTDDI_VERSION >= NTDDI_WIN10_FE |
| D3D12_FEATURE_DATA_D3D12_OPTIONS9 O9; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS9, &O9, sizeof(O9)))) |
| return false; |
| return O9.AtomicInt64OnTypedResourceSupported != FALSE; |
| #else |
| UNREFERENCED_PARAMETER(pDevice); |
| return false; |
| #endif |
| } |
| |
| bool DoesDeviceSupportHeap64Atomics(ID3D12Device *pDevice) { |
| #if defined(NTDDI_WIN10_CO) && WDK_NTDDI_VERSION >= NTDDI_WIN10_CO |
| D3D12_FEATURE_DATA_D3D12_OPTIONS11 O11; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS11, &O11, sizeof(O11)))) |
| return false; |
| return O11.AtomicInt64OnDescriptorHeapResourceSupported != FALSE; |
| #else |
| UNREFERENCED_PARAMETER(pDevice); |
| return false; |
| #endif |
| } |
| |
| bool DoesDeviceSupportShared64Atomics(ID3D12Device *pDevice) { |
| #if defined(NTDDI_WIN10_FE) && WDK_NTDDI_VERSION >= NTDDI_WIN10_FE |
| D3D12_FEATURE_DATA_D3D12_OPTIONS9 O9; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS9, &O9, sizeof(O9)))) |
| return false; |
| return O9.AtomicInt64OnGroupSharedSupported != FALSE; |
| #else |
| UNREFERENCED_PARAMETER(pDevice); |
| return false; |
| #endif |
| } |
| |
| bool DoesDeviceSupportAdvancedTexOps(ID3D12Device *pDevice) { |
| #if defined(NTDDI_WIN10_CU) && WDK_NTDDI_VERSION >= NTDDI_WIN10_CU |
| D3D12_FEATURE_DATA_D3D12_OPTIONS14 O14; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS14, &O14, sizeof(O14)))) |
| return false; |
| return O14.AdvancedTextureOpsSupported != FALSE; |
| #else |
| UNREFERENCED_PARAMETER(pDevice); |
| return false; |
| #endif |
| } |
| |
| bool DoesDeviceSupportWritableMSAA(ID3D12Device *pDevice) { |
| #if defined(NTDDI_WIN10_CU) && WDK_NTDDI_VERSION >= NTDDI_WIN10_CU |
| D3D12_FEATURE_DATA_D3D12_OPTIONS14 O14; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS14, &O14, sizeof(O14)))) |
| return false; |
| return O14.WriteableMSAATexturesSupported != FALSE; |
| #else |
| UNREFERENCED_PARAMETER(pDevice); |
| return false; |
| #endif |
| } |
| |
| bool DoesDeviceSupportEnhancedBarriers(ID3D12Device *pDevice) { |
| #if defined(NTDDI_WIN10_CU) && WDK_NTDDI_VERSION >= NTDDI_WIN10_CU |
| D3D12_FEATURE_DATA_D3D12_OPTIONS12 O12; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS12, &O12, sizeof(O12)))) |
| return false; |
| return O12.EnhancedBarriersSupported != FALSE; |
| #else |
| UNREFERENCED_PARAMETER(pDevice); |
| return false; |
| #endif |
| } |
| |
| bool DoesDeviceSupportRelaxedFormatCasting(ID3D12Device *pDevice) { |
| #if defined(NTDDI_WIN10_CU) && WDK_NTDDI_VERSION >= NTDDI_WIN10_CU |
| D3D12_FEATURE_DATA_D3D12_OPTIONS12 O12; |
| if (!DoesDeviceSupportEnhancedBarriers(pDevice)) |
| return false; |
| |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS12, &O12, sizeof(O12)))) |
| return false; |
| return O12.RelaxedFormatCastingSupported != FALSE; |
| #else |
| UNREFERENCED_PARAMETER(pDevice); |
| return false; |
| #endif |
| } |
| |
| bool IsFallbackPathEnabled(){ |
| // Enable fallback paths with: /p:"EnableFallback=1" |
| UINT EnableFallbackValue = 0; |
| WEX::TestExecution::RuntimeParameters::TryGetValue(L"EnableFallback", EnableFallbackValue); |
| return EnableFallbackValue != 0; |
| } |
| |
| #ifndef _HLK_CONF |
| void DXBCFromText(LPCSTR pText, LPCWSTR pEntryPoint, LPCWSTR pTargetProfile, ID3DBlob **ppBlob) { |
| CW2A pEntryPointA(pEntryPoint, CP_UTF8); |
| CW2A pTargetProfileA(pTargetProfile, CP_UTF8); |
| CComPtr<ID3DBlob> pErrors; |
| D3D_SHADER_MACRO d3dMacro[2]; |
| ZeroMemory(d3dMacro, sizeof(d3dMacro)); |
| d3dMacro[0].Definition = "1"; |
| d3dMacro[0].Name = "USING_DXBC"; |
| HRESULT hr = D3DCompile(pText, strlen(pText), "hlsl.hlsl", d3dMacro, nullptr, pEntryPointA, pTargetProfileA, 0, 0, ppBlob, &pErrors); |
| if (pErrors != nullptr) { |
| CA2W errors((char *)pErrors->GetBufferPointer(), CP_ACP); |
| LogCommentFmt(L"Compilation failure: %s", errors.m_szBuffer); |
| } |
| VERIFY_SUCCEEDED(hr); |
| } |
| #endif |
| |
| HRESULT EnableDebugLayer() { |
| // The debug layer does net yet validate DXIL programs that require rewriting, |
| // but basic logging should work properly. |
| HRESULT hr = S_FALSE; |
| if (UseDebugIfaces()) { |
| CComPtr<ID3D12Debug> debugController; |
| hr = D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)); |
| if (SUCCEEDED(hr)) { |
| debugController->EnableDebugLayer(); |
| hr = S_OK; |
| } |
| } |
| return hr; |
| } |
| |
| static std::wstring GetModuleName() { |
| wchar_t moduleName[MAX_PATH+1] = {0}; |
| DWORD length = GetModuleFileNameW(NULL, moduleName, MAX_PATH); |
| if (length == 0 || length == MAX_PATH) { |
| return std::wstring(); // Error condition |
| } |
| return std::wstring(moduleName, length); |
| } |
| |
| static std::wstring ComputeSDKFullPath(std::wstring SDKPath) { |
| std::wstring modulePath = GetModuleName(); |
| size_t pos = modulePath.rfind('\\'); |
| if (pos == std::wstring::npos) |
| return SDKPath; |
| if (SDKPath.substr(0, 2) != L".\\") |
| return SDKPath; |
| return modulePath.substr(0, pos) + SDKPath.substr(1); |
| } |
| |
| static UINT GetD3D12SDKVersion(std::wstring SDKPath) { |
| // Try to automatically get the D3D12SDKVersion from the DLL |
| UINT SDKVersion = 0; |
| std::wstring D3DCorePath = ComputeSDKFullPath(SDKPath); |
| D3DCorePath.append(L"D3D12Core.dll"); |
| HMODULE hCore = LoadLibraryW(D3DCorePath.c_str()); |
| if (hCore) { |
| if (UINT *pSDKVersion = (UINT*)GetProcAddress(hCore, "D3D12SDKVersion")) |
| SDKVersion = *pSDKVersion; |
| FreeModule(hCore); |
| } |
| return SDKVersion; |
| } |
| |
| static HRESULT EnableAgilitySDK(HMODULE hRuntime, UINT SDKVersion, |
| LPCWSTR SDKPath) { |
| D3D12GetInterfaceFn pD3D12GetInterface = |
| (D3D12GetInterfaceFn)GetProcAddress(hRuntime, "D3D12GetInterface"); |
| CComPtr<ID3D12SDKConfiguration> pD3D12SDKConfiguration; |
| IFR(pD3D12GetInterface(CLSID_D3D12SDKConfiguration, |
| IID_PPV_ARGS(&pD3D12SDKConfiguration))); |
| IFR(pD3D12SDKConfiguration->SetSDKVersion(SDKVersion, CW2A(SDKPath))); |
| |
| // Currently, it appears that the SetSDKVersion will succeed even when |
| // D3D12Core is not found, or its version doesn't match. When that's the |
| // case, will cause a failure in the very next thing that actually requires |
| // D3D12Core.dll to be loaded instead. So, we attempt to clear experimental |
| // features next, which is a valid use case and a no-op at this point. This |
| // requires D3D12Core to be loaded. If this fails, we know the AgilitySDK |
| // setting actually failed. |
| D3D12EnableExperimentalFeaturesFn pD3D12EnableExperimentalFeatures = |
| (D3D12EnableExperimentalFeaturesFn)GetProcAddress( |
| hRuntime, "D3D12EnableExperimentalFeatures"); |
| if (pD3D12EnableExperimentalFeatures == nullptr) { |
| // If this failed, D3D12 must be too old for AgilitySDK. But if that's |
| // the case, creating D3D12SDKConfiguration should have failed. So while |
| // this case shouldn't be hit, fail if it is. |
| return HRESULT_FROM_WIN32(GetLastError()); |
| } |
| return pD3D12EnableExperimentalFeatures(0, nullptr, nullptr, nullptr); |
| } |
| |
| static HRESULT EnableExperimentalShaderModels(HMODULE hRuntime) { |
| D3D12EnableExperimentalFeaturesFn pD3D12EnableExperimentalFeatures = |
| (D3D12EnableExperimentalFeaturesFn)GetProcAddress( |
| hRuntime, "D3D12EnableExperimentalFeatures"); |
| if (pD3D12EnableExperimentalFeatures == nullptr) { |
| return HRESULT_FROM_WIN32(GetLastError()); |
| } |
| return pD3D12EnableExperimentalFeatures(1, &D3D12ExperimentalShaderModelsID, |
| nullptr, nullptr); |
| } |
| |
| static HRESULT EnableExperimentalShaderModels() { |
| HMODULE hRuntime = LoadLibraryW(L"d3d12.dll"); |
| if (hRuntime == NULL) |
| return E_FAIL; |
| return EnableExperimentalShaderModels(hRuntime); |
| } |
| |
| HRESULT EnableAgilitySDK(HMODULE hRuntime) { |
| // D3D12SDKVersion > 1 will use provided version, otherwise, auto-detect. |
| // D3D12SDKVersion == 1 means fail if we can't auto-detect. |
| UINT SDKVersion = 0; |
| WEX::TestExecution::RuntimeParameters::TryGetValue( |
| L"D3D12SDKVersion", SDKVersion); |
| |
| // SDKPath must be relative path from .exe, which means relative to |
| // TE.exe location, and must start with ".\\", such as with the |
| // default: ".\\D3D12\\" |
| WEX::Common::String SDKPath; |
| if (SUCCEEDED(WEX::TestExecution::RuntimeParameters::TryGetValue( |
| L"D3D12SDKPath", SDKPath))) { |
| // Make sure path ends in backslash |
| if (!SDKPath.IsEmpty() && SDKPath.Right(1) != "\\") { |
| SDKPath.Append("\\"); |
| } |
| } |
| if (SDKPath.IsEmpty()) { |
| SDKPath = L".\\D3D12\\"; |
| } |
| |
| bool mustFind = SDKVersion > 0; |
| if (SDKVersion <= 1) { |
| // lookup version from D3D12Core.dll |
| SDKVersion = GetD3D12SDKVersion((LPCWSTR)SDKPath); |
| if (mustFind && SDKVersion == 0) { |
| LogErrorFmt(L"Agility SDK not found in relative path: %s", (LPCWSTR)SDKPath); |
| return E_FAIL; |
| } |
| } |
| |
| // Not found, not asked for. |
| if (SDKVersion == 0) |
| return S_FALSE; |
| |
| HRESULT hr= EnableAgilitySDK(hRuntime, SDKVersion, (LPCWSTR)SDKPath); |
| if (FAILED(hr)) { |
| // If SDKVersion provided, fail if not successful. |
| // 1 means we should find it, and fill in the version automatically. |
| if (mustFind) { |
| LogErrorFmt(L"Failed to set Agility SDK version %d at path: %s", SDKVersion, (LPCWSTR)SDKPath); |
| return hr; |
| } |
| return S_FALSE; |
| } |
| if (hr == S_OK) { |
| LogCommentFmt(L"Agility SDK version set to: %d", SDKVersion); |
| m_AgilitySDKEnabled = true; |
| } |
| return hr; |
| } |
| |
| HRESULT EnableExperimentalMode(HMODULE hRuntime) { |
| if (m_ExperimentalModeEnabled) { |
| return S_OK; |
| } |
| |
| bool bExperimentalShaderModels = GetTestParamBool(L"ExperimentalShaders"); |
| |
| HRESULT hr = S_FALSE; |
| if (bExperimentalShaderModels) { |
| hr = EnableExperimentalShaderModels(hRuntime); |
| if (SUCCEEDED(hr)) { |
| m_ExperimentalModeEnabled = true; |
| } |
| } |
| |
| return hr; |
| } |
| |
| struct FenceObj { |
| HANDLE m_fenceEvent = NULL; |
| CComPtr<ID3D12Fence> m_fence; |
| UINT64 m_fenceValue; |
| ~FenceObj() { |
| if (m_fenceEvent) CloseHandle(m_fenceEvent); |
| } |
| }; |
| |
| void InitFenceObj(ID3D12Device *pDevice, FenceObj *pObj) { |
| pObj->m_fenceValue = 1; |
| VERIFY_SUCCEEDED(pDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, |
| IID_PPV_ARGS(&pObj->m_fence))); |
| // Create an event handle to use for frame synchronization. |
| pObj->m_fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); |
| if (pObj->m_fenceEvent == nullptr) { |
| VERIFY_SUCCEEDED(HRESULT_FROM_WIN32(GetLastError())); |
| } |
| } |
| |
| void ReadHlslDataIntoNewStream(LPCWSTR relativePath, IStream **ppStream) { |
| VERIFY_SUCCEEDED(m_support.Initialize()); |
| CComPtr<IDxcLibrary> pLibrary; |
| CComPtr<IDxcBlobEncoding> pBlob; |
| CComPtr<IStream> pStream; |
| std::wstring path = GetPathToHlslDataFile(relativePath); |
| VERIFY_SUCCEEDED(m_support.CreateInstance(CLSID_DxcLibrary, &pLibrary)); |
| VERIFY_SUCCEEDED(pLibrary->CreateBlobFromFile(path.c_str(), nullptr, &pBlob)); |
| VERIFY_SUCCEEDED(pLibrary->CreateStreamFromBlobReadOnly(pBlob, &pStream)); |
| *ppStream = pStream.Detach(); |
| } |
| |
| void RecordRenderAndReadback(ID3D12GraphicsCommandList *pList, |
| ID3D12DescriptorHeap *pRtvHeap, |
| UINT rtvDescriptorSize, |
| UINT instanceCount, |
| D3D12_VERTEX_BUFFER_VIEW *pVertexBufferView, |
| ID3D12RootSignature *pRootSig, |
| ID3D12Resource *pRenderTarget, |
| ID3D12Resource *pReadBuffer) { |
| D3D12_RESOURCE_DESC rtDesc = pRenderTarget->GetDesc(); |
| D3D12_VIEWPORT viewport; |
| D3D12_RECT scissorRect; |
| |
| memset(&viewport, 0, sizeof(viewport)); |
| viewport.Height = (float)rtDesc.Height; |
| viewport.Width = (float)rtDesc.Width; |
| viewport.MaxDepth = 1.0f; |
| memset(&scissorRect, 0, sizeof(scissorRect)); |
| scissorRect.right = (long)rtDesc.Width; |
| scissorRect.bottom = rtDesc.Height; |
| if (pRootSig != nullptr) { |
| pList->SetGraphicsRootSignature(pRootSig); |
| } |
| pList->RSSetViewports(1, &viewport); |
| pList->RSSetScissorRects(1, &scissorRect); |
| |
| // Indicate that the buffer will be used as a render target. |
| RecordTransitionBarrier(pList, pRenderTarget, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_RENDER_TARGET); |
| |
| CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(pRtvHeap->GetCPUDescriptorHandleForHeapStart(), 0, rtvDescriptorSize); |
| pList->OMSetRenderTargets(1, &rtvHandle, FALSE, nullptr); |
| |
| pList->ClearRenderTargetView(rtvHandle, ClearColor, 0, nullptr); |
| pList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); |
| pList->IASetVertexBuffers(0, 1, pVertexBufferView); |
| pList->DrawInstanced(3, instanceCount, 0, 0); |
| |
| // Transition to copy source and copy into read-back buffer. |
| RecordTransitionBarrier(pList, pRenderTarget, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); |
| |
| // Copy into read-back buffer. |
| UINT64 rowPitch = rtDesc.Width * 4; |
| if (rowPitch % D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) |
| rowPitch += D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - (rowPitch % D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); |
| D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint; |
| Footprint.Offset = 0; |
| Footprint.Footprint = CD3DX12_SUBRESOURCE_FOOTPRINT(DXGI_FORMAT_R8G8B8A8_UNORM, (UINT)rtDesc.Width, rtDesc.Height, 1, (UINT)rowPitch); |
| CD3DX12_TEXTURE_COPY_LOCATION DstLoc(pReadBuffer, Footprint); |
| CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(pRenderTarget, 0); |
| pList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); |
| } |
| |
| void RunRWByteBufferComputeTest(ID3D12Device *pDevice, LPCSTR shader, std::vector<uint32_t> &values); |
| void RunLifetimeIntrinsicTest(ID3D12Device *pDevice, LPCSTR shader, D3D_SHADER_MODEL shaderModel, bool useLibTarget, LPCWSTR *pOptions, int numOptions, std::vector<uint32_t> &values); |
| void RunLifetimeIntrinsicComputeTest(ID3D12Device *pDevice, LPCSTR pShader, CComPtr<ID3D12DescriptorHeap>& pUavHeap, CComPtr<ID3D12RootSignature>& pRootSignature, |
| LPCWSTR pTargetProfile, LPCWSTR *pOptions, int numOptions, std::vector<uint32_t> &values); |
| void RunLifetimeIntrinsicLibTest(ID3D12Device *pDevice0, LPCSTR pShader, CComPtr<ID3D12RootSignature>& pRootSignature, |
| LPCWSTR pTargetProfile, LPCWSTR *pOptions, int numOptions); |
| |
| void SetDescriptorHeap(ID3D12GraphicsCommandList *pCommandList, ID3D12DescriptorHeap *pHeap) { |
| ID3D12DescriptorHeap *const pHeaps[1] = { pHeap }; |
| pCommandList->SetDescriptorHeaps(1, pHeaps); |
| } |
| |
| void WaitForSignal(ID3D12CommandQueue *pCQ, FenceObj &FO) { |
| ::WaitForSignal(pCQ, FO.m_fence, FO.m_fenceEvent, FO.m_fenceValue++); |
| } |
| }; |
| #define WAVE_INTRINSIC_DXBC_GUARD \ |
| "#ifdef USING_DXBC\r\n" \ |
| "uint WaveGetLaneIndex() { return 1; }\r\n" \ |
| "uint WaveReadLaneFirst(uint u) { return u; }\r\n" \ |
| "bool WaveIsFirstLane() { return true; }\r\n" \ |
| "uint WaveGetLaneCount() { return 1; }\r\n" \ |
| "uint WaveReadLaneAt(uint n, uint u) { return u; }\r\n" \ |
| "bool WaveActiveAnyTrue(bool b) { return b; }\r\n" \ |
| "bool WaveActiveAllTrue(bool b) { return false; }\r\n" \ |
| "uint WaveActiveAllEqual(uint u) { return u; }\r\n" \ |
| "uint4 WaveActiveBallot(bool b) { return 1; }\r\n" \ |
| "uint WaveActiveCountBits(uint u) { return 1; }\r\n" \ |
| "uint WaveActiveSum(uint u) { return 1; }\r\n" \ |
| "uint WaveActiveProduct(uint u) { return 1; }\r\n" \ |
| "uint WaveActiveBitAnd(uint u) { return 1; }\r\n" \ |
| "uint WaveActiveBitOr(uint u) { return 1; }\r\n" \ |
| "uint WaveActiveBitXor(uint u) { return 1; }\r\n" \ |
| "uint WaveActiveMin(uint u) { return 1; }\r\n" \ |
| "uint WaveActiveMax(uint u) { return 1; }\r\n" \ |
| "uint WavePrefixCountBits(uint u) { return 1; }\r\n" \ |
| "uint WavePrefixSum(uint u) { return 1; }\r\n" \ |
| "uint WavePrefixProduct(uint u) { return 1; }\r\n" \ |
| "uint QuadReadLaneAt(uint a, uint u) { return 1; }\r\n" \ |
| "uint QuadReadAcrossX(uint u) { return 1; }\r\n" \ |
| "uint QuadReadAcrossY(uint u) { return 1; }\r\n" \ |
| "uint QuadReadAcrossDiagonal(uint u) { return 1; }\r\n" \ |
| "#endif\r\n" |
| |
| static void SetupComputeValuePattern(std::vector<uint32_t> &values, |
| size_t count) { |
| values.resize(count); // one element per dispatch group, in bytes |
| for (size_t i = 0; i < count; ++i) { |
| values[i] = (uint32_t)i; |
| } |
| } |
| |
| bool ExecutionTest::ExecutionTestClassSetup() { |
| return DivergentClassSetup(); |
| } |
| |
| void ExecutionTest::RunRWByteBufferComputeTest(ID3D12Device *pDevice, LPCSTR pShader, std::vector<uint32_t> &values) { |
| static const int DispatchGroupX = 1; |
| static const int DispatchGroupY = 1; |
| static const int DispatchGroupZ = 1; |
| |
| CComPtr<ID3D12GraphicsCommandList> pCommandList; |
| CComPtr<ID3D12CommandQueue> pCommandQueue; |
| CComPtr<ID3D12DescriptorHeap> pUavHeap; |
| CComPtr<ID3D12CommandAllocator> pCommandAllocator; |
| UINT uavDescriptorSize; |
| FenceObj FO; |
| |
| const UINT valueSizeInBytes = (UINT)values.size() * sizeof(uint32_t); |
| CreateComputeCommandQueue(pDevice, L"RunRWByteBufferComputeTest Command Queue", &pCommandQueue); |
| InitFenceObj(pDevice, &FO); |
| |
| // Describe and create a UAV descriptor heap. |
| D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; |
| heapDesc.NumDescriptors = 1; |
| heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; |
| heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; |
| VERIFY_SUCCEEDED(pDevice->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&pUavHeap))); |
| uavDescriptorSize = pDevice->GetDescriptorHandleIncrementSize(heapDesc.Type); |
| |
| // Create root signature. |
| CComPtr<ID3D12RootSignature> pRootSignature; |
| { |
| CD3DX12_DESCRIPTOR_RANGE ranges[1]; |
| ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0, 0); |
| |
| CD3DX12_ROOT_PARAMETER rootParameters[1]; |
| rootParameters[0].InitAsDescriptorTable(1, &ranges[0], D3D12_SHADER_VISIBILITY_ALL); |
| |
| CD3DX12_ROOT_SIGNATURE_DESC rootSignatureDesc; |
| rootSignatureDesc.Init(_countof(rootParameters), rootParameters, 0, nullptr, D3D12_ROOT_SIGNATURE_FLAG_NONE); |
| |
| CreateRootSignatureFromDesc(pDevice, &rootSignatureDesc, &pRootSignature); |
| } |
| |
| // Create pipeline state object. |
| CComPtr<ID3D12PipelineState> pComputeState; |
| CreateComputePSO(pDevice, pRootSignature, pShader, L"cs_6_0", &pComputeState); |
| |
| // Create a command allocator and list for compute. |
| VERIFY_SUCCEEDED(pDevice->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE, IID_PPV_ARGS(&pCommandAllocator))); |
| VERIFY_SUCCEEDED(pDevice->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE, pCommandAllocator, pComputeState, IID_PPV_ARGS(&pCommandList))); |
| pCommandList->SetName(L"ExecutionTest::RunRWByteButterComputeTest Command List"); |
| |
| // Set up UAV resource. |
| CComPtr<ID3D12Resource> pUavResource; |
| CComPtr<ID3D12Resource> pReadBuffer; |
| CComPtr<ID3D12Resource> pUploadResource; |
| CreateTestUavs(pDevice, pCommandList, values.data(), valueSizeInBytes, &pUavResource, &pUploadResource, &pReadBuffer); |
| VERIFY_SUCCEEDED(pUavResource->SetName(L"RunRWByteBufferComputeText UAV")); |
| VERIFY_SUCCEEDED(pReadBuffer->SetName(L"RunRWByteBufferComputeText UAV Read Buffer")); |
| VERIFY_SUCCEEDED(pUploadResource->SetName(L"RunRWByteBufferComputeText UAV Upload Buffer")); |
| |
| // Close the command list and execute it to perform the GPU setup. |
| pCommandList->Close(); |
| ExecuteCommandList(pCommandQueue, pCommandList); |
| WaitForSignal(pCommandQueue, FO); |
| VERIFY_SUCCEEDED(pCommandAllocator->Reset()); |
| VERIFY_SUCCEEDED(pCommandList->Reset(pCommandAllocator, pComputeState)); |
| |
| // Run the compute shader and copy the results back to readable memory. |
| { |
| D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; |
| uavDesc.Format = DXGI_FORMAT_R32_TYPELESS; |
| uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; |
| uavDesc.Buffer.FirstElement = 0; |
| uavDesc.Buffer.NumElements = (UINT)values.size(); |
| uavDesc.Buffer.StructureByteStride = 0; |
| uavDesc.Buffer.CounterOffsetInBytes = 0; |
| uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; |
| CD3DX12_CPU_DESCRIPTOR_HANDLE uavHandle(pUavHeap->GetCPUDescriptorHandleForHeapStart()); |
| CD3DX12_GPU_DESCRIPTOR_HANDLE uavHandleGpu(pUavHeap->GetGPUDescriptorHandleForHeapStart()); |
| pDevice->CreateUnorderedAccessView(pUavResource, nullptr, &uavDesc, uavHandle); |
| SetDescriptorHeap(pCommandList, pUavHeap); |
| pCommandList->SetComputeRootSignature(pRootSignature); |
| pCommandList->SetComputeRootDescriptorTable(0, uavHandleGpu); |
| } |
| pCommandList->Dispatch(DispatchGroupX, DispatchGroupY, DispatchGroupZ); |
| RecordTransitionBarrier(pCommandList, pUavResource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); |
| pCommandList->CopyResource(pReadBuffer, pUavResource); |
| pCommandList->Close(); |
| ExecuteCommandList(pCommandQueue, pCommandList); |
| WaitForSignal(pCommandQueue, FO); |
| { |
| MappedData mappedData(pReadBuffer, valueSizeInBytes); |
| uint32_t *pData = (uint32_t *)mappedData.data(); |
| memcpy(values.data(), pData, (size_t)valueSizeInBytes); |
| } |
| WaitForSignal(pCommandQueue, FO); |
| } |
| |
| void ExecutionTest::RunLifetimeIntrinsicComputeTest(ID3D12Device *pDevice, LPCSTR pShader, CComPtr<ID3D12DescriptorHeap>& pUavHeap, CComPtr<ID3D12RootSignature>& pRootSignature, |
| LPCWSTR pTargetProfile, LPCWSTR *pOptions, int numOptions, std::vector<uint32_t> &values) { |
| // Create command queue. |
| CComPtr<ID3D12CommandQueue> pCommandQueue; |
| CreateComputeCommandQueue(pDevice, L"RunLifetimeIntrinsicTest Command Queue", &pCommandQueue); |
| |
| FenceObj FO; |
| InitFenceObj(pDevice, &FO); |
| |
| // Compile shader "main" and create pipeline state object. |
| CComPtr<ID3D12PipelineState> pComputeState; |
| CreateComputePSO(pDevice, pRootSignature, pShader, pTargetProfile, &pComputeState, pOptions, numOptions); |
| |
| // Create a command allocator and list for compute. |
| CComPtr<ID3D12CommandAllocator> pCommandAllocator; |
| CComPtr<ID3D12GraphicsCommandList> pCommandList; |
| VERIFY_SUCCEEDED(pDevice->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE, IID_PPV_ARGS(&pCommandAllocator))); |
| VERIFY_SUCCEEDED(pDevice->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE, pCommandAllocator, pComputeState, IID_PPV_ARGS(&pCommandList))); |
| pCommandList->SetName(L"ExecutionTest::RunLifetimeIntrinsicTest Command List"); |
| |
| // Set up UAV resource. |
| const UINT valueSizeInBytes = (UINT)values.size() * sizeof(uint32_t); |
| CComPtr<ID3D12Resource> pUavResource; |
| CComPtr<ID3D12Resource> pReadBuffer; |
| CComPtr<ID3D12Resource> pUploadResource; |
| CreateTestUavs(pDevice, pCommandList, values.data(), valueSizeInBytes, &pUavResource, &pUploadResource, &pReadBuffer); |
| VERIFY_SUCCEEDED(pUavResource->SetName(L"RunLifetimeIntrinsicTest UAV")); |
| VERIFY_SUCCEEDED(pReadBuffer->SetName(L"RunLifetimeIntrinsicTest UAV Read Buffer")); |
| VERIFY_SUCCEEDED(pUploadResource->SetName(L"RunLifetimeIntrinsicTest UAV Upload Buffer")); |
| |
| // Close the command list and execute it to perform the GPU setup. |
| pCommandList->Close(); |
| ExecuteCommandList(pCommandQueue, pCommandList); |
| WaitForSignal(pCommandQueue, FO); |
| VERIFY_SUCCEEDED(pCommandAllocator->Reset()); |
| VERIFY_SUCCEEDED(pCommandList->Reset(pCommandAllocator, pComputeState)); |
| |
| // Run the compute shader and copy the results back to readable memory. |
| { |
| D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; |
| uavDesc.Format = DXGI_FORMAT_R32_TYPELESS; |
| uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; |
| uavDesc.Buffer.FirstElement = 0; |
| uavDesc.Buffer.NumElements = (UINT)values.size(); |
| uavDesc.Buffer.StructureByteStride = 0; |
| uavDesc.Buffer.CounterOffsetInBytes = 0; |
| uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; |
| CD3DX12_CPU_DESCRIPTOR_HANDLE uavHandle(pUavHeap->GetCPUDescriptorHandleForHeapStart()); |
| CD3DX12_GPU_DESCRIPTOR_HANDLE uavHandleGpu(pUavHeap->GetGPUDescriptorHandleForHeapStart()); |
| pDevice->CreateUnorderedAccessView(pUavResource, nullptr, &uavDesc, uavHandle); |
| SetDescriptorHeap(pCommandList, pUavHeap); |
| pCommandList->SetComputeRootSignature(pRootSignature); |
| pCommandList->SetComputeRootDescriptorTable(0, uavHandleGpu); |
| } |
| |
| static const int DispatchGroupX = 1; |
| static const int DispatchGroupY = 1; |
| static const int DispatchGroupZ = 1; |
| pCommandList->Dispatch(DispatchGroupX, DispatchGroupY, DispatchGroupZ); |
| RecordTransitionBarrier(pCommandList, pUavResource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); |
| pCommandList->CopyResource(pReadBuffer, pUavResource); |
| pCommandList->Close(); |
| ExecuteCommandList(pCommandQueue, pCommandList); |
| WaitForSignal(pCommandQueue, FO); |
| { |
| MappedData mappedData(pReadBuffer, valueSizeInBytes); |
| uint32_t *pData = (uint32_t *)mappedData.data(); |
| memcpy(values.data(), pData, (size_t)valueSizeInBytes); |
| } |
| WaitForSignal(pCommandQueue, FO); |
| } |
| |
| void ExecutionTest::RunLifetimeIntrinsicLibTest(ID3D12Device *pDevice0, LPCSTR pShader, CComPtr<ID3D12RootSignature>& pRootSignature, |
| LPCWSTR pTargetProfile, LPCWSTR *pOptions, int numOptions) { |
| CComPtr<ID3D12Device5> pDevice; |
| VERIFY_SUCCEEDED(pDevice0->QueryInterface(IID_PPV_ARGS(&pDevice))); |
| |
| // Create command queue. |
| CComPtr<ID3D12CommandQueue> pCommandQueue; |
| CreateCommandQueue(pDevice, L"RunLifetimeIntrinsicTest Command Queue", &pCommandQueue, D3D12_COMMAND_LIST_TYPE_DIRECT); |
| |
| FenceObj FO; |
| InitFenceObj(pDevice, &FO); |
| |
| // Compile raygen shader. |
| CComPtr<ID3DBlob> pShaderLib; |
| CompileFromText(pShader, L"RayGen", pTargetProfile, &pShaderLib, pOptions, numOptions); |
| |
| // Describe and create the RT pipeline state object (RTPSO). |
| CD3DX12_STATE_OBJECT_DESC stateObjectDesc(D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE); |
| auto lib = stateObjectDesc.CreateSubobject<CD3DX12_DXIL_LIBRARY_SUBOBJECT>(); |
| CD3DX12_SHADER_BYTECODE byteCode(pShaderLib); |
| lib->SetDXILLibrary(&byteCode); |
| lib->DefineExport(L"RayGen"); |
| |
| const int payloadCount = 4; |
| const int attributeCount = 2; |
| const int maxRecursion = 2; |
| stateObjectDesc.CreateSubobject<CD3DX12_RAYTRACING_SHADER_CONFIG_SUBOBJECT>()->Config(payloadCount * sizeof(float), attributeCount * sizeof(float)); |
| stateObjectDesc.CreateSubobject<CD3DX12_RAYTRACING_PIPELINE_CONFIG_SUBOBJECT>()->Config(maxRecursion); |
| |
| // Create (local!) root sig subobject and associate with shader. |
| auto localRootSigSubObj = stateObjectDesc.CreateSubobject<CD3DX12_LOCAL_ROOT_SIGNATURE_SUBOBJECT>(); |
| localRootSigSubObj->SetRootSignature(pRootSignature); |
| auto x = stateObjectDesc.CreateSubobject<CD3DX12_SUBOBJECT_TO_EXPORTS_ASSOCIATION_SUBOBJECT>(); |
| x->SetSubobjectToAssociate(*localRootSigSubObj); |
| x->AddExport(L"RayGen"); |
| |
| CComPtr<ID3D12StateObject> pStateObject; |
| VERIFY_SUCCEEDED(pDevice->CreateStateObject(stateObjectDesc, IID_PPV_ARGS(&pStateObject))); |
| |
| // Create a command allocator and list. |
| CComPtr<ID3D12CommandAllocator> pCommandAllocator; |
| CComPtr<ID3D12GraphicsCommandList4> pCommandList; |
| VERIFY_SUCCEEDED(pDevice->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&pCommandAllocator))); |
| VERIFY_SUCCEEDED(pDevice->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, pCommandAllocator, nullptr, IID_PPV_ARGS(&pCommandList))); |
| pCommandList->SetPipelineState1(pStateObject); |
| pCommandList->SetName(L"ExecutionTest::RunLifetimeIntrinsicTest Command List"); |
| |
| // Close the command list and execute it to kick-off compilation in the driver. |
| // NOTE: We don't care about anything else, so we're not setting up any resources and don't actually execute the shader. |
| pCommandList->Close(); |
| ExecuteCommandList(pCommandQueue, pCommandList); |
| WaitForSignal(pCommandQueue, FO); |
| } |
| |
| void ExecutionTest::RunLifetimeIntrinsicTest(ID3D12Device *pDevice, LPCSTR pShader, D3D_SHADER_MODEL shaderModel, bool useLibTarget, |
| LPCWSTR *pOptions, int numOptions, std::vector<uint32_t> &values) { |
| LPCWSTR pTargetProfile; |
| switch (shaderModel) { |
| default: pTargetProfile = useLibTarget ? L"lib_6_3" : L"cs_6_0"; break; // Default to 6.3 for lib, 6.0 otherwise. |
| case D3D_SHADER_MODEL_6_0: pTargetProfile = useLibTarget ? L"lib_6_0" : L"cs_6_0"; break; |
| case D3D_SHADER_MODEL_6_3: pTargetProfile = useLibTarget ? L"lib_6_3" : L"cs_6_3"; break; |
| case D3D_SHADER_MODEL_6_5: pTargetProfile = useLibTarget ? L"lib_6_5" : L"cs_6_5"; break; |
| case D3D_SHADER_MODEL_6_6: pTargetProfile = useLibTarget ? L"lib_6_6" : L"cs_6_6"; break; |
| } |
| |
| // Describe a UAV descriptor heap. |
| D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; |
| heapDesc.NumDescriptors = 1; |
| heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; |
| heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; |
| |
| // Create the UAV descriptor heap. |
| CComPtr<ID3D12DescriptorHeap> pUavHeap; |
| VERIFY_SUCCEEDED(pDevice->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&pUavHeap))); |
| |
| // Create root signature. |
| CComPtr<ID3D12RootSignature> pRootSignature; |
| { |
| CD3DX12_DESCRIPTOR_RANGE ranges[1]; |
| ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0, 0); |
| |
| CD3DX12_ROOT_PARAMETER rootParameters[1]; |
| rootParameters[0].InitAsDescriptorTable(1, &ranges[0], D3D12_SHADER_VISIBILITY_ALL); |
| |
| CD3DX12_ROOT_SIGNATURE_DESC rootSignatureDesc; |
| D3D12_ROOT_SIGNATURE_FLAGS rootSigFlag = useLibTarget ? D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE : D3D12_ROOT_SIGNATURE_FLAG_NONE; |
| rootSignatureDesc.Init(_countof(rootParameters), rootParameters, 0, nullptr, rootSigFlag); |
| |
| CreateRootSignatureFromDesc(pDevice, &rootSignatureDesc, &pRootSignature); |
| } |
| |
| if (useLibTarget) { |
| RunLifetimeIntrinsicLibTest(pDevice, pShader, pRootSignature, pTargetProfile, |
| pOptions, numOptions); |
| } else { |
| RunLifetimeIntrinsicComputeTest(pDevice, pShader, pUavHeap, pRootSignature, pTargetProfile, |
| pOptions, numOptions, values); |
| } |
| } |
| |
| TEST_F(ExecutionTest, LifetimeIntrinsicTest) { |
| // The only thing we test here is that existence of lifetime intrinsics or |
| // their fallback replacement (store undef or store zeroinitializer) do not |
| // cause any issues in the runtime and driver stack. |
| // The easiest way to force placement of intrinsics is to create an array in |
| // a local scope that is dynamically indexed. It must not be optimized away, |
| // so we do some bogus initialization that prevents this. Since all the code |
| // is guarded by a conditional that is dynamically always false, the actual |
| // effect of the shader is that the same value that was read is written back. |
| static const char* pShader = R"( |
| RWByteAddressBuffer g_bab : register(u0); |
| |
| void fn(uint GI) { |
| const uint addr = GI * 4; |
| const int val = g_bab.Load(addr); |
| int res = val; |
| if (val < 0) { // Never true. |
| int arr[200]; |
| for (int i = 0; i < 200; ++i) { |
| arr[i] = arr[val - i]; |
| } |
| res += arr[val]; |
| } |
| g_bab.Store(addr, (uint)res); |
| } |
| |
| [numthreads(8,8,1)] |
| void main(uint GI : SV_GroupIndex) { |
| fn(GI); |
| } |
| |
| [shader("raygeneration")] |
| void RayGen() { |
| const uint d = DispatchRaysIndex().x; |
| const uint g = g > 64 ? 63 : g; |
| fn(g); |
| } |
| )"; |
| static const int NumThreadsX = 8; |
| static const int NumThreadsY = 8; |
| static const int NumThreadsZ = 1; |
| static const int ThreadsPerGroup = NumThreadsX * NumThreadsY * NumThreadsZ; |
| static const int DispatchGroupCount = 1; |
| |
| CComPtr<ID3D12Device> pDevice; |
| bool bSM_6_6_Supported = CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6, false); |
| bool bSM_6_3_Supported = bSM_6_6_Supported; |
| if (!bSM_6_6_Supported) { |
| // Try 6.3 for downlevel DXR case |
| bSM_6_3_Supported = CreateDevice(&pDevice, D3D_SHADER_MODEL_6_3, false); |
| } |
| if (!bSM_6_3_Supported) { |
| // Otherwise, 6.0 better be supported for compute case |
| VERIFY_IS_TRUE(CreateDevice(&pDevice, D3D_SHADER_MODEL_6_0, false)); |
| } |
| bool bDXRSupported = bSM_6_3_Supported && DoesDeviceSupportRayTracing(pDevice); |
| |
| if (GetTestParamUseWARP(UseWarpByDefault()) || IsDeviceBasicAdapter(pDevice)) { |
| WEX::Logging::Log::Comment(L"WARP has a known issue with LifetimeIntrinsicTest."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| if (!bSM_6_6_Supported) { |
| WEX::Logging::Log::Comment(L"Native lifetime markers skipped, device does not support SM 6.6"); |
| } |
| if (!bDXRSupported) { |
| WEX::Logging::Log::Comment(L"DXR lifetime tests skipped, device does not support DXR"); |
| } |
| |
| std::vector<uint32_t> values; |
| SetupComputeValuePattern(values, ThreadsPerGroup * DispatchGroupCount); |
| |
| // Run a number of tests for different configurations that will cause |
| // lifetime intrinsics to be: |
| // - placed directly |
| // - translated to an undef store |
| // - translated to a zeroinitializer store |
| // against compute and DXR targets, downlevel and SM 6.6: |
| // - downlevel: cs_6_0, lib_6_3 (DXR) |
| // - cs_6_6, lib_6_6 (DXR) |
| |
| VERIFY_ARE_EQUAL(values[1], (uint32_t)1); |
| |
| LPCWSTR optsBase[] = {L"-enable-lifetime-markers"}; |
| LPCWSTR optsZeroStore[] = {L"-enable-lifetime-markers", L"-force-zero-store-lifetimes"}; |
| |
| WEX::Logging::Log::Comment(L"==== cs_6_0 with default translation"); |
| RunLifetimeIntrinsicTest(pDevice, pShader, D3D_SHADER_MODEL_6_0, false, |
| optsBase, _countof(optsBase), values); |
| VERIFY_ARE_EQUAL(values[1], (uint32_t)1); |
| |
| if (bDXRSupported) { |
| WEX::Logging::Log::Comment(L"==== DXR lib_6_3 with default translation"); |
| RunLifetimeIntrinsicTest(pDevice, pShader, D3D_SHADER_MODEL_6_3, true, |
| optsBase, _countof(optsBase), values); |
| VERIFY_ARE_EQUAL(values[1], (uint32_t)1); |
| } |
| |
| WEX::Logging::Log::Comment(L"==== cs_6_0 with zeroinitializer translation"); |
| RunLifetimeIntrinsicTest(pDevice, pShader, D3D_SHADER_MODEL_6_0, false, |
| optsZeroStore, _countof(optsZeroStore), values); |
| VERIFY_ARE_EQUAL(values[1], (uint32_t)1); |
| |
| if (bDXRSupported) { |
| WEX::Logging::Log::Comment(L"==== DXR lib_6_3 with zeroinitializer translation"); |
| RunLifetimeIntrinsicTest(pDevice, pShader, D3D_SHADER_MODEL_6_3, true, |
| optsZeroStore, _countof(optsZeroStore), values); |
| VERIFY_ARE_EQUAL(values[1], (uint32_t)1); |
| } |
| |
| if (bSM_6_6_Supported) { |
| WEX::Logging::Log::Comment(L"==== cs_6_6 with zeroinitializer translation"); |
| RunLifetimeIntrinsicTest(pDevice, pShader, D3D_SHADER_MODEL_6_6, false, |
| optsZeroStore, _countof(optsZeroStore), values); |
| VERIFY_ARE_EQUAL(values[1], (uint32_t)1); |
| |
| if (bDXRSupported) { |
| WEX::Logging::Log::Comment(L"==== DXR lib_6_6 with zeroinitializer translation"); |
| RunLifetimeIntrinsicTest(pDevice, pShader, D3D_SHADER_MODEL_6_6, true, |
| optsZeroStore, _countof(optsZeroStore), values); |
| VERIFY_ARE_EQUAL(values[1], (uint32_t)1); |
| } |
| |
| WEX::Logging::Log::Comment(L"==== cs_6_6 with native lifetime markers"); |
| RunLifetimeIntrinsicTest(pDevice, pShader, D3D_SHADER_MODEL_6_6, false, |
| optsBase, _countof(optsBase), values); |
| VERIFY_ARE_EQUAL(values[1], (uint32_t)1); |
| |
| if (bDXRSupported) { |
| WEX::Logging::Log::Comment(L"==== DXR lib_6_6 with native lifetime markers"); |
| RunLifetimeIntrinsicTest(pDevice, pShader, D3D_SHADER_MODEL_6_6, true, |
| optsBase, _countof(optsBase), values); |
| VERIFY_ARE_EQUAL(values[1], (uint32_t)1); |
| } |
| } |
| } |
| |
| TEST_F(ExecutionTest, BasicComputeTest) { |
| #ifndef _HLK_CONF |
| // |
| // BasicComputeTest is a simple compute shader that can be used as the basis |
| // for more interesting compute execution tests. |
| // The HLSL is compatible with shader models <=5.1 to allow using the DXBC |
| // rendering code paths for comparison. |
| // |
| static const char pShader[] = |
| "RWByteAddressBuffer g_bab : register(u0);\r\n" |
| "[numthreads(8,8,1)]\r\n" |
| "void main(uint GI : SV_GroupIndex) {" |
| " uint addr = GI * 4;\r\n" |
| " uint val = g_bab.Load(addr);\r\n" |
| " DeviceMemoryBarrierWithGroupSync();\r\n" |
| " g_bab.Store(addr, val + 1);\r\n" |
| "}"; |
| static const int NumThreadsX = 8; |
| static const int NumThreadsY = 8; |
| static const int NumThreadsZ = 1; |
| static const int ThreadsPerGroup = NumThreadsX * NumThreadsY * NumThreadsZ; |
| static const int DispatchGroupCount = 1; |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) |
| return; |
| |
| std::vector<uint32_t> values; |
| SetupComputeValuePattern(values, ThreadsPerGroup * DispatchGroupCount); |
| VERIFY_ARE_EQUAL(values[0], (uint32_t)0); |
| RunRWByteBufferComputeTest(pDevice, pShader, values); |
| VERIFY_ARE_EQUAL(values[0], (uint32_t)1); |
| #endif |
| } |
| |
| TEST_F(ExecutionTest, BasicTriangleTest) { |
| #ifndef _HLK_CONF |
| static const UINT FrameCount = 2; |
| static const UINT m_width = 320; |
| static const UINT m_height = 200; |
| static const float m_aspectRatio = static_cast<float>(m_width) / static_cast<float>(m_height); |
| |
| struct Vertex { |
| XMFLOAT3 position; |
| XMFLOAT4 color; |
| }; |
| |
| // Pipeline objects. |
| CComPtr<ID3D12Device> pDevice; |
| CComPtr<ID3D12Resource> pRenderTarget; |
| CComPtr<ID3D12CommandAllocator> pCommandAllocator; |
| CComPtr<ID3D12CommandQueue> pCommandQueue; |
| CComPtr<ID3D12RootSignature> pRootSig; |
| CComPtr<ID3D12DescriptorHeap> pRtvHeap; |
| CComPtr<ID3D12PipelineState> pPipelineState; |
| CComPtr<ID3D12GraphicsCommandList> pCommandList; |
| CComPtr<ID3D12Resource> pReadBuffer; |
| UINT rtvDescriptorSize; |
| |
| CComPtr<ID3D12Resource> pVertexBuffer; |
| D3D12_VERTEX_BUFFER_VIEW vertexBufferView; |
| |
| // Synchronization objects. |
| FenceObj FO; |
| |
| // Shaders. |
| static const char pShaders[] = |
| "struct PSInput {\r\n" |
| " float4 position : SV_POSITION;\r\n" |
| " float4 color : COLOR;\r\n" |
| "};\r\n\r\n" |
| "PSInput VSMain(float4 position : POSITION, float4 color : COLOR) {\r\n" |
| " PSInput result;\r\n" |
| "\r\n" |
| " result.position = position;\r\n" |
| " result.color = color;\r\n" |
| " return result;\r\n" |
| "}\r\n\r\n" |
| "float4 PSMain(PSInput input) : SV_TARGET {\r\n" |
| " return 1; //input.color;\r\n" |
| "};\r\n"; |
| |
| if (!CreateDevice(&pDevice)) |
| return; |
| |
| struct BasicTestChecker { |
| CComPtr<ID3D12Device> m_pDevice; |
| CComPtr<ID3D12InfoQueue> m_pInfoQueue; |
| bool m_OK = false; |
| void SetOK(bool value) { m_OK = value; } |
| BasicTestChecker(ID3D12Device *pDevice) : m_pDevice(pDevice) { |
| if (FAILED(m_pDevice.QueryInterface(&m_pInfoQueue))) |
| return; |
| m_pInfoQueue->PushEmptyStorageFilter(); |
| m_pInfoQueue->PushEmptyRetrievalFilter(); |
| } |
| ~BasicTestChecker() { |
| if (!m_OK && m_pInfoQueue != nullptr) { |
| UINT64 count = m_pInfoQueue->GetNumStoredMessages(); |
| bool invalidBytecodeFound = false; |
| CAtlArray<BYTE> m_pBytes; |
| for (UINT64 i = 0; i < count; ++i) { |
| SIZE_T len = 0; |
| if (FAILED(m_pInfoQueue->GetMessageA(i, nullptr, &len))) |
| continue; |
| if (m_pBytes.GetCount() < len && !m_pBytes.SetCount(len)) |
| continue; |
| D3D12_MESSAGE *pMsg = (D3D12_MESSAGE *)m_pBytes.GetData(); |
| if (FAILED(m_pInfoQueue->GetMessageA(i, pMsg, &len))) |
| continue; |
| if (pMsg->ID == D3D12_MESSAGE_ID_CREATEVERTEXSHADER_INVALIDSHADERBYTECODE || |
| pMsg->ID == D3D12_MESSAGE_ID_CREATEPIXELSHADER_INVALIDSHADERBYTECODE) { |
| invalidBytecodeFound = true; |
| break; |
| } |
| } |
| if (invalidBytecodeFound) { |
| LogCommentFmt(L"%s", L"Found an invalid bytecode message. This " |
| L"typically indicates that experimental mode " |
| L"is not set up properly."); |
| if (!GetTestParamBool(L"ExperimentalShaders")) { |
| LogCommentFmt(L"Note that the ExperimentalShaders test parameter isn't set."); |
| } |
| } |
| else { |
| LogCommentFmt(L"Did not find corrupt pixel or vertex shaders in " |
| L"queue - dumping complete queue."); |
| WriteInfoQueueMessages(nullptr, OutputFn, m_pInfoQueue); |
| } |
| } |
| } |
| static void __stdcall OutputFn(void *pCtx, const wchar_t *pMsg) { |
| UNREFERENCED_PARAMETER(pCtx); |
| LogCommentFmt(L"%s", pMsg); |
| } |
| }; |
| BasicTestChecker BTC(pDevice); |
| { |
| InitFenceObj(pDevice, &FO); |
| CreateRtvDescriptorHeap(pDevice, FrameCount, &pRtvHeap, &rtvDescriptorSize); |
| CreateRenderTargetAndReadback(pDevice, pRtvHeap, m_width, m_height, &pRenderTarget, &pReadBuffer); |
| |
| // Create an empty root signature. |
| CD3DX12_ROOT_SIGNATURE_DESC rootSignatureDesc; |
| rootSignatureDesc.Init( |
| 0, nullptr, 0, nullptr, |
| D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT); |
| CreateRootSignatureFromDesc(pDevice, &rootSignatureDesc, &pRootSig); |
| |
| // Create the pipeline state, which includes compiling and loading shaders. |
| // Define the vertex input layout. |
| D3D12_INPUT_ELEMENT_DESC inputElementDescs[] = { |
| {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, |
| D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}, |
| {"COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, |
| D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}}; |
| D3D12_INPUT_LAYOUT_DESC InputLayout = { inputElementDescs, _countof(inputElementDescs) }; |
| CreateGraphicsPSO(pDevice, &InputLayout, pRootSig, pShaders, &pPipelineState); |
| |
| CreateGraphicsCommandQueueAndList(pDevice, &pCommandQueue, |
| &pCommandAllocator, &pCommandList, |
| pPipelineState); |
| |
| // Define the geometry for a triangle. |
| Vertex triangleVertices[] = { |
| { { 0.0f, 0.25f * m_aspectRatio, 0.0f },{ 1.0f, 0.0f, 0.0f, 1.0f } }, |
| { { 0.25f, -0.25f * m_aspectRatio, 0.0f },{ 0.0f, 1.0f, 0.0f, 1.0f } }, |
| { { -0.25f, -0.25f * m_aspectRatio, 0.0f },{ 0.0f, 0.0f, 1.0f, 1.0f } } }; |
| |
| CreateVertexBuffer(pDevice, triangleVertices, &pVertexBuffer, &vertexBufferView); |
| WaitForSignal(pCommandQueue, FO); |
| } |
| |
| // Render and execute the command list. |
| RecordRenderAndReadback(pCommandList, pRtvHeap, rtvDescriptorSize, 1, |
| &vertexBufferView, pRootSig, pRenderTarget, |
| pReadBuffer); |
| VERIFY_SUCCEEDED(pCommandList->Close()); |
| ExecuteCommandList(pCommandQueue, pCommandList); |
| |
| // Wait for previous frame. |
| WaitForSignal(pCommandQueue, FO); |
| |
| // At this point, we've verified that execution succeeded with DXIL. |
| BTC.SetOK(true); |
| |
| // Read back to CPU and examine contents. |
| { |
| MappedData data(pReadBuffer, m_width * m_height * 4); |
| const uint32_t *pPixels = (uint32_t *)data.data(); |
| if (SaveImages()) { |
| SavePixelsToFile(pPixels, DXGI_FORMAT_R8G8B8A8_UNORM, m_width, m_height, L"basic.bmp"); |
| } |
| uint32_t top = pPixels[m_width / 2]; // Top center. |
| uint32_t mid = pPixels[m_width / 2 + m_width * (m_height / 2)]; // Middle center. |
| VERIFY_ARE_EQUAL(0xff663300, top); // clear color |
| VERIFY_ARE_EQUAL(0xffffffff, mid); // white |
| } |
| #endif |
| } |
| |
| TEST_F(ExecutionTest, Int64Test) { |
| static const char pShader[] = |
| "RWByteAddressBuffer g_bab : register(u0);\r\n" |
| "[numthreads(8,8,1)]\r\n" |
| "void main(uint GI : SV_GroupIndex) {" |
| " uint addr = GI * 4;\r\n" |
| " uint val = g_bab.Load(addr);\r\n" |
| " uint64_t u64 = val;\r\n" |
| " u64 *= val;\r\n" |
| " g_bab.Store(addr, (uint)(u64 >> 32));\r\n" |
| "}"; |
| static const int NumThreadsX = 8; |
| static const int NumThreadsY = 8; |
| static const int NumThreadsZ = 1; |
| static const int ThreadsPerGroup = NumThreadsX * NumThreadsY * NumThreadsZ; |
| static const int DispatchGroupCount = 1; |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) |
| return; |
| |
| if (!DoesDeviceSupportInt64(pDevice)) { |
| // Optional feature, so it's correct to not support it if declared as such. |
| WEX::Logging::Log::Comment(L"Device does not support int64 operations."); |
| return; |
| } |
| std::vector<uint32_t> values; |
| SetupComputeValuePattern(values, ThreadsPerGroup * DispatchGroupCount); |
| VERIFY_ARE_EQUAL(values[0], (uint32_t)0); |
| RunRWByteBufferComputeTest(pDevice, pShader, values); |
| VERIFY_ARE_EQUAL(values[0], (uint32_t)0); |
| } |
| |
| TEST_F(ExecutionTest, SignTest) { |
| static const char pShader[] = |
| "RWByteAddressBuffer g_bab : register(u0);\r\n" |
| "[numthreads(8,1,1)]\r\n" |
| "void main(uint GI : SV_GroupIndex) {" |
| " uint addr = GI * 4;\r\n" |
| " int val = g_bab.Load(addr);\r\n" |
| " g_bab.Store(addr, (uint)(sign(val)));\r\n" |
| "}"; |
| static const int NumThreadsX = 8; |
| static const int NumThreadsY = 1; |
| static const int NumThreadsZ = 1; |
| static const int ThreadsPerGroup = NumThreadsX * NumThreadsY * NumThreadsZ; |
| static const int DispatchGroupCount = 1; |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) |
| return; |
| |
| const uint32_t neg1 = (uint32_t)-1; |
| uint32_t origValues[] = { (uint32_t)-3, (uint32_t)-2, neg1, 0, 1, 2, 3, 4 }; |
| std::vector<uint32_t> values(origValues, origValues + _countof(origValues)); |
| |
| RunRWByteBufferComputeTest(pDevice, pShader, values); |
| VERIFY_ARE_EQUAL(values[0], neg1); |
| VERIFY_ARE_EQUAL(values[1], neg1); |
| VERIFY_ARE_EQUAL(values[2], neg1); |
| VERIFY_ARE_EQUAL(values[3], (uint32_t)0); |
| VERIFY_ARE_EQUAL(values[4], (uint32_t)1); |
| VERIFY_ARE_EQUAL(values[5], (uint32_t)1); |
| VERIFY_ARE_EQUAL(values[6], (uint32_t)1); |
| VERIFY_ARE_EQUAL(values[7], (uint32_t)1); |
| } |
| |
| TEST_F(ExecutionTest, WaveIntrinsicsDDITest) { |
| #ifndef _HLK_CONF |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) |
| return; |
| D3D12_FEATURE_DATA_D3D12_OPTIONS1 O; |
| if (FAILED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS1, &O, sizeof(O)))) |
| return; |
| bool waveSupported = O.WaveOps; |
| UINT laneCountMin = O.WaveLaneCountMin; |
| UINT laneCountMax = O.WaveLaneCountMax; |
| LogCommentFmt(L"WaveOps %i, WaveLaneCountMin %u, WaveLaneCountMax %u", waveSupported, laneCountMin, laneCountMax); |
| VERIFY_IS_TRUE(laneCountMin <= laneCountMax); |
| if (waveSupported) { |
| VERIFY_IS_TRUE(laneCountMin > 0 && laneCountMax > 0); |
| } |
| else { |
| VERIFY_IS_TRUE(laneCountMin == 0 && laneCountMax == 0); |
| } |
| #endif |
| } |
| |
| TEST_F(ExecutionTest, WaveIntrinsicsTest) { |
| #ifndef _HLK_CONF |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| |
| struct PerThreadData { |
| uint32_t id, flags, laneIndex, laneCount, firstLaneId, preds, firstlaneX, lane1X; |
| uint32_t allBC, allSum, allProd, allAND, allOR, allXOR, allMin, allMax; |
| uint32_t pfBC, pfSum, pfProd; |
| uint32_t ballot[4]; |
| uint32_t diver; // divergent value, used in calculation |
| int32_t i_diver; // divergent value, used in calculation |
| int32_t i_allMax, i_allMin, i_allSum, i_allProd; |
| int32_t i_pfSum, i_pfProd; |
| }; |
| static const char pShader[] = |
| WAVE_INTRINSIC_DXBC_GUARD |
| "struct PerThreadData {\r\n" |
| " uint id, flags, laneIndex, laneCount, firstLaneId, preds, firstlaneX, lane1X;\r\n" |
| " uint allBC, allSum, allProd, allAND, allOR, allXOR, allMin, allMax;\r\n" |
| " uint pfBC, pfSum, pfProd;\r\n" |
| " uint4 ballot;\r\n" |
| " uint diver;\r\n" |
| " int i_diver;\r\n" |
| " int i_allMax, i_allMin, i_allSum, i_allProd;\r\n" |
| " int i_pfSum, i_pfProd;\r\n" |
| "};\r\n" |
| "RWStructuredBuffer<PerThreadData> g_sb : register(u0);\r\n" |
| "[numthreads(8,8,1)]\r\n" |
| "void main(uint GI : SV_GroupIndex, uint3 GTID : SV_GroupThreadID) {" |
| " PerThreadData pts = g_sb[GI];\r\n" |
| " uint diver = GTID.x + 2;\r\n" |
| " pts.diver = diver;\r\n" |
| " pts.flags = 0;\r\n" |
| " pts.preds = 0;\r\n" |
| " if (WaveIsFirstLane()) pts.flags |= 1;\r\n" |
| " pts.laneIndex = WaveGetLaneIndex();\r\n" |
| " pts.laneCount = WaveGetLaneCount();\r\n" |
| " pts.firstLaneId = WaveReadLaneFirst(pts.id);\r\n" |
| " pts.preds |= ((WaveActiveAnyTrue(diver == 1) ? 1 : 0) << 0);\r\n" |
| " pts.preds |= ((WaveActiveAllTrue(diver == 1) ? 1 : 0) << 1);\r\n" |
| " pts.preds |= ((WaveActiveAllEqual(diver) ? 1 : 0) << 2);\r\n" |
| " pts.preds |= ((WaveActiveAllEqual(GTID.z) ? 1 : 0) << 3);\r\n" |
| " pts.preds |= ((WaveActiveAllEqual(WaveReadLaneFirst(diver)) ? 1 : 0) << 4);\r\n" |
| " pts.ballot = WaveActiveBallot(diver > 3);\r\n" |
| " pts.firstlaneX = WaveReadLaneFirst(GTID.x);\r\n" |
| " pts.lane1X = WaveReadLaneAt(GTID.x, 1);\r\n" |
| "\r\n" |
| " pts.allBC = WaveActiveCountBits(diver > 3);\r\n" |
| " pts.allSum = WaveActiveSum(diver);\r\n" |
| " pts.allProd = WaveActiveProduct(diver);\r\n" |
| " pts.allAND = WaveActiveBitAnd(diver);\r\n" |
| " pts.allOR = WaveActiveBitOr(diver);\r\n" |
| " pts.allXOR = WaveActiveBitXor(diver);\r\n" |
| " pts.allMin = WaveActiveMin(diver);\r\n" |
| " pts.allMax = WaveActiveMax(diver);\r\n" |
| "\r\n" |
| " pts.pfBC = WavePrefixCountBits(diver > 3);\r\n" |
| " pts.pfSum = WavePrefixSum(diver);\r\n" |
| " pts.pfProd = WavePrefixProduct(diver);\r\n" |
| "\r\n" |
| " int i_diver = pts.i_diver;\r\n" |
| " pts.i_allMax = WaveActiveMax(i_diver);\r\n" |
| " pts.i_allMin = WaveActiveMin(i_diver);\r\n" |
| " pts.i_allSum = WaveActiveSum(i_diver);\r\n" |
| " pts.i_allProd = WaveActiveProduct(i_diver);\r\n" |
| " pts.i_pfSum = WavePrefixSum(i_diver);\r\n" |
| " pts.i_pfProd = WavePrefixProduct(i_diver);\r\n" |
| "\r\n" |
| " g_sb[GI] = pts;\r\n" |
| "}"; |
| static const int NumtheadsX = 8; |
| static const int NumtheadsY = 8; |
| static const int NumtheadsZ = 1; |
| static const int ThreadsPerGroup = NumtheadsX * NumtheadsY * NumtheadsZ; |
| static const int DispatchGroupCount = 1; |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) |
| return; |
| |
| if (!DoesDeviceSupportWaveOps(pDevice)) { |
| // Optional feature, so it's correct to not support it if declared as such. |
| WEX::Logging::Log::Comment(L"Device does not support wave operations."); |
| return; |
| } |
| |
| std::vector<PerThreadData> values; |
| values.resize(ThreadsPerGroup * DispatchGroupCount); |
| for (size_t i = 0; i < values.size(); ++i) { |
| memset(&values[i], 0, sizeof(PerThreadData)); |
| values[i].id = (uint32_t)i; |
| values[i].i_diver = (int)i; |
| values[i].i_diver *= (i % 2) ? 1 : -1; |
| } |
| |
| static const int DispatchGroupX = 1; |
| static const int DispatchGroupY = 1; |
| static const int DispatchGroupZ = 1; |
| |
| CComPtr<ID3D12GraphicsCommandList> pCommandList; |
| CComPtr<ID3D12CommandQueue> pCommandQueue; |
| CComPtr<ID3D12DescriptorHeap> pUavHeap; |
| CComPtr<ID3D12CommandAllocator> pCommandAllocator; |
| UINT uavDescriptorSize; |
| FenceObj FO; |
| bool dxbc = UseDxbc(); |
| |
| const size_t valueSizeInBytes = values.size() * sizeof(PerThreadData); |
| CreateComputeCommandQueue(pDevice, L"WaveIntrinsicsTest Command Queue", &pCommandQueue); |
| InitFenceObj(pDevice, &FO); |
| |
| // Describe and create a UAV descriptor heap. |
| D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; |
| heapDesc.NumDescriptors = 1; |
| heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; |
| heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; |
| VERIFY_SUCCEEDED(pDevice->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&pUavHeap))); |
| uavDescriptorSize = pDevice->GetDescriptorHandleIncrementSize(heapDesc.Type); |
| |
| // Create root signature. |
| CComPtr<ID3D12RootSignature> pRootSignature; |
| { |
| CD3DX12_DESCRIPTOR_RANGE ranges[1]; |
| ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0, 0); |
| |
| CD3DX12_ROOT_PARAMETER rootParameters[1]; |
| rootParameters[0].InitAsDescriptorTable(1, &ranges[0], D3D12_SHADER_VISIBILITY_ALL); |
| |
| CD3DX12_ROOT_SIGNATURE_DESC rootSignatureDesc; |
| rootSignatureDesc.Init(_countof(rootParameters), rootParameters, 0, nullptr, D3D12_ROOT_SIGNATURE_FLAG_NONE); |
| |
| CComPtr<ID3DBlob> signature; |
| CComPtr<ID3DBlob> error; |
| VERIFY_SUCCEEDED(D3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error)); |
| VERIFY_SUCCEEDED(pDevice->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&pRootSignature))); |
| } |
| |
| // Create pipeline state object. |
| CComPtr<ID3D12PipelineState> pComputeState; |
| CreateComputePSO(pDevice, pRootSignature, pShader, L"cs_6_0", &pComputeState); |
| |
| // Create a command allocator and list for compute. |
| VERIFY_SUCCEEDED(pDevice->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE, IID_PPV_ARGS(&pCommandAllocator))); |
| VERIFY_SUCCEEDED(pDevice->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE, pCommandAllocator, pComputeState, IID_PPV_ARGS(&pCommandList))); |
| |
| // Set up UAV resource. |
| CComPtr<ID3D12Resource> pUavResource; |
| CComPtr<ID3D12Resource> pReadBuffer; |
| CComPtr<ID3D12Resource> pUploadResource; |
| CreateTestUavs(pDevice, pCommandList, values.data(), (UINT)valueSizeInBytes, &pUavResource, &pUploadResource, &pReadBuffer); |
| |
| // Close the command list and execute it to perform the GPU setup. |
| pCommandList->Close(); |
| ExecuteCommandList(pCommandQueue, pCommandList); |
| WaitForSignal(pCommandQueue, FO); |
| VERIFY_SUCCEEDED(pCommandAllocator->Reset()); |
| VERIFY_SUCCEEDED(pCommandList->Reset(pCommandAllocator, pComputeState)); |
| |
| // Run the compute shader and copy the results back to readable memory. |
| { |
| D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; |
| uavDesc.Format = DXGI_FORMAT_UNKNOWN; |
| uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; |
| uavDesc.Buffer.FirstElement = 0; |
| uavDesc.Buffer.NumElements = (UINT)values.size(); |
| uavDesc.Buffer.StructureByteStride = sizeof(PerThreadData); |
| uavDesc.Buffer.CounterOffsetInBytes = 0; |
| uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; |
| CD3DX12_CPU_DESCRIPTOR_HANDLE uavHandle(pUavHeap->GetCPUDescriptorHandleForHeapStart()); |
| CD3DX12_GPU_DESCRIPTOR_HANDLE uavHandleGpu(pUavHeap->GetGPUDescriptorHandleForHeapStart()); |
| pDevice->CreateUnorderedAccessView(pUavResource, nullptr, &uavDesc, uavHandle); |
| SetDescriptorHeap(pCommandList, pUavHeap); |
| pCommandList->SetComputeRootSignature(pRootSignature); |
| pCommandList->SetComputeRootDescriptorTable(0, uavHandleGpu); |
| } |
| pCommandList->Dispatch(DispatchGroupX, DispatchGroupY, DispatchGroupZ); |
| RecordTransitionBarrier(pCommandList, pUavResource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); |
| pCommandList->CopyResource(pReadBuffer, pUavResource); |
| pCommandList->Close(); |
| ExecuteCommandList(pCommandQueue, pCommandList); |
| WaitForSignal(pCommandQueue, FO); |
| { |
| MappedData mappedData(pReadBuffer, (UINT)valueSizeInBytes); |
| PerThreadData *pData = (PerThreadData *)mappedData.data(); |
| memcpy(values.data(), pData, valueSizeInBytes); |
| |
| // Gather some general data. |
| // The 'firstLaneId' captures a unique number per first-lane per wave. |
| // Counting the number distinct firstLaneIds gives us the number of waves. |
| std::vector<uint32_t> firstLaneIds; |
| for (size_t i = 0; i < values.size(); ++i) { |
| PerThreadData &pts = values[i]; |
| uint32_t firstLaneId = pts.firstLaneId; |
| if (!contains(firstLaneIds, firstLaneId)) { |
| firstLaneIds.push_back(firstLaneId); |
| } |
| } |
| |
| // Waves should cover 4 threads or more. |
| LogCommentFmt(L"Found %u distinct lane ids: %u", firstLaneIds.size()); |
| if (!dxbc) { |
| VERIFY_IS_GREATER_THAN_OR_EQUAL(values.size() / 4, firstLaneIds.size()); |
| } |
| |
| // Now, group threads into waves. |
| std::map<uint32_t, std::unique_ptr<std::vector<PerThreadData *> > > waves; |
| for (size_t i = 0; i < firstLaneIds.size(); ++i) { |
| waves[firstLaneIds[i]] = std::make_unique<std::vector<PerThreadData *> >(); |
| } |
| for (size_t i = 0; i < values.size(); ++i) { |
| PerThreadData &pts = values[i]; |
| std::unique_ptr<std::vector<PerThreadData *> > &wave = waves[pts.firstLaneId]; |
| wave->push_back(&pts); |
| } |
| |
| // Verify that all the wave values are coherent across the wave. |
| for (size_t i = 0; i < values.size(); ++i) { |
| PerThreadData &pts = values[i]; |
| std::unique_ptr<std::vector<PerThreadData *> > &wave = waves[pts.firstLaneId]; |
| // Sort the lanes by increasing lane ID. |
| struct LaneIdOrderPred { |
| bool operator()(PerThreadData *a, PerThreadData *b) { |
| return a->laneIndex < b->laneIndex; |
| } |
| }; |
| std::sort(wave.get()->begin(), wave.get()->end(), LaneIdOrderPred()); |
| |
| // Verify some interesting properties of the first lane. |
| uint32_t pfBC, pfSum, pfProd; |
| int32_t i_pfSum, i_pfProd; |
| int32_t i_allMax, i_allMin; |
| { |
| PerThreadData *ptdFirst = wave->front(); |
| VERIFY_IS_TRUE(0 != (ptdFirst->flags & 1)); // FirstLane sets this bit. |
| VERIFY_IS_TRUE(0 == ptdFirst->pfBC); |
| VERIFY_IS_TRUE(0 == ptdFirst->pfSum); |
| VERIFY_IS_TRUE(1 == ptdFirst->pfProd); |
| VERIFY_IS_TRUE(0 == ptdFirst->i_pfSum); |
| VERIFY_IS_TRUE(1 == ptdFirst->i_pfProd); |
| pfBC = (ptdFirst->diver > 3) ? 1 : 0; |
| pfSum = ptdFirst->diver; |
| pfProd = ptdFirst->diver; |
| i_pfSum = ptdFirst->i_diver; |
| i_pfProd = ptdFirst->i_diver; |
| i_allMax = i_allMin = ptdFirst->i_diver; |
| } |
| |
| // Calculate values which take into consideration all lanes. |
| uint32_t preds = 0; |
| preds |= 1 << 1; // AllTrue starts true, switches to false if needed. |
| preds |= 1 << 2; // AllEqual starts true, switches to false if needed. |
| preds |= 1 << 3; // WaveActiveAllEqual(GTID.z) is always true |
| preds |= 1 << 4; // (WaveActiveAllEqual(WaveReadLaneFirst(diver)) is always true |
| uint32_t ballot[4] = { 0, 0, 0, 0 }; |
| int32_t i_allSum = 0, i_allProd = 1; |
| for (size_t n = 0; n < wave->size(); ++n) { |
| std::vector<PerThreadData *> &lanes = *wave.get(); |
| // pts.preds |= ((WaveActiveAnyTrue(diver == 1) ? 1 : 0) << 0); |
| if (lanes[n]->diver == 1) preds |= (1 << 0); |
| // pts.preds |= ((WaveActiveAllTrue(diver == 1) ? 1 : 0) << 1); |
| if (lanes[n]->diver != 1) preds &= ~(1 << 1); |
| // pts.preds |= ((WaveActiveAllEqual(diver) ? 1 : 0) << 2); |
| if (lanes[0]->diver != lanes[n]->diver) preds &= ~(1 << 2); |
| // pts.ballot = WaveActiveBallot(diver > 3);\r\n" |
| if (lanes[n]->diver > 3) { |
| // This is the uint4 result layout: |
| // .x -> bits 0 .. 31 |
| // .y -> bits 32 .. 63 |
| // .z -> bits 64 .. 95 |
| // .w -> bits 96 ..127 |
| uint32_t component = lanes[n]->laneIndex / 32; |
| uint32_t bit = lanes[n]->laneIndex % 32; |
| ballot[component] |= 1 << bit; |
| } |
| i_allMax = std::max(lanes[n]->i_diver, i_allMax); |
| i_allMin = std::min(lanes[n]->i_diver, i_allMin); |
| i_allProd *= lanes[n]->i_diver; |
| i_allSum += lanes[n]->i_diver; |
| } |
| |
| for (size_t n = 1; n < wave->size(); ++n) { |
| // 'All' operations are uniform across the wave. |
| std::vector<PerThreadData *> &lanes = *wave.get(); |
| VERIFY_IS_TRUE(0 == (lanes[n]->flags & 1)); // non-firstlanes do not set this bit |
| VERIFY_ARE_EQUAL(lanes[0]->allBC, lanes[n]->allBC); |
| VERIFY_ARE_EQUAL(lanes[0]->allSum, lanes[n]->allSum); |
| VERIFY_ARE_EQUAL(lanes[0]->allProd, lanes[n]->allProd); |
| VERIFY_ARE_EQUAL(lanes[0]->allAND, lanes[n]->allAND); |
| VERIFY_ARE_EQUAL(lanes[0]->allOR, lanes[n]->allOR); |
| VERIFY_ARE_EQUAL(lanes[0]->allXOR, lanes[n]->allXOR); |
| VERIFY_ARE_EQUAL(lanes[0]->allMin, lanes[n]->allMin); |
| VERIFY_ARE_EQUAL(lanes[0]->allMax, lanes[n]->allMax); |
| VERIFY_ARE_EQUAL(i_allMax, lanes[n]->i_allMax); |
| VERIFY_ARE_EQUAL(i_allMin, lanes[n]->i_allMin); |
| VERIFY_ARE_EQUAL(i_allProd, lanes[n]->i_allProd); |
| VERIFY_ARE_EQUAL(i_allSum, lanes[n]->i_allSum); |
| |
| // first-lane reads and uniform reads are uniform across the wave. |
| VERIFY_ARE_EQUAL(lanes[0]->firstlaneX, lanes[n]->firstlaneX); |
| VERIFY_ARE_EQUAL(lanes[0]->lane1X, lanes[n]->lane1X); |
| |
| // the lane count is uniform across the wave. |
| VERIFY_ARE_EQUAL(lanes[0]->laneCount, lanes[n]->laneCount); |
| |
| // The predicates are uniform across the wave. |
| VERIFY_ARE_EQUAL(lanes[n]->preds, preds); |
| |
| // the lane index is distinct per thread. |
| for (size_t prior = 0; prior < n; ++prior) { |
| VERIFY_ARE_NOT_EQUAL(lanes[prior]->laneIndex, lanes[n]->laneIndex); |
| } |
| // Ballot results are uniform across the wave. |
| VERIFY_ARE_EQUAL(0, memcmp(ballot, lanes[n]->ballot, sizeof(ballot))); |
| |
| // Keep running total of prefix calculation. Prefix values are exclusive to |
| // the executing lane. |
| VERIFY_ARE_EQUAL(pfBC, lanes[n]->pfBC); |
| VERIFY_ARE_EQUAL(pfSum, lanes[n]->pfSum); |
| VERIFY_ARE_EQUAL(pfProd, lanes[n]->pfProd); |
| VERIFY_ARE_EQUAL(i_pfSum, lanes[n]->i_pfSum); |
| VERIFY_ARE_EQUAL(i_pfProd, lanes[n]->i_pfProd); |
| pfBC += (lanes[n]->diver > 3) ? 1 : 0; |
| pfSum += lanes[n]->diver; |
| pfProd *= lanes[n]->diver; |
| i_pfSum += lanes[n]->i_diver; |
| i_pfProd *= lanes[n]->i_diver; |
| } |
| // TODO: add divergent branching and verify that the otherwise uniform values properly diverge |
| } |
| |
| // Compare each value of each per-thread element. |
| for (size_t i = 0; i < values.size(); ++i) { |
| PerThreadData &pts = values[i]; |
| VERIFY_ARE_EQUAL(i, pts.id); // ID is unchanged. |
| } |
| } |
| #endif |
| } |
| |
| // This test is assuming that the adapter implements WaveReadLaneFirst correctly |
| TEST_F(ExecutionTest, WaveIntrinsicsInPSTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| |
| struct Vertex { |
| XMFLOAT3 position; |
| }; |
| |
| struct PerPixelData { |
| XMFLOAT4 position; |
| uint32_t id, flags, laneIndex, laneCount, firstLaneId, sum1; |
| uint32_t id0, id1, id2, id3; |
| uint32_t acrossX, acrossY, acrossDiag, quadActiveCount; |
| }; |
| |
| const UINT RTWidth = 128; |
| const UINT RTHeight = 128; |
| |
| // Shaders. |
| static const char pShaders[] = |
| WAVE_INTRINSIC_DXBC_GUARD |
| "struct PSInput {\r\n" |
| " float4 position : SV_POSITION;\r\n" |
| "};\r\n\r\n" |
| "PSInput VSMain(float4 position : POSITION) {\r\n" |
| " PSInput result;\r\n" |
| "\r\n" |
| " result.position = position;\r\n" |
| " return result;\r\n" |
| "}\r\n\r\n" |
| "uint pos_to_id(float4 pos) { return pos.x * 128 + pos.y; }\r\n" |
| "struct PerPixelData {\r\n" |
| " float4 position;\r\n" |
| " uint id, flags, laneIndex, laneCount, firstLaneId, sum1;\r\n" |
| " uint id0, id1, id2, id3;\r\n" |
| " uint acrossX, acrossY, acrossDiag, quadActiveCount;\r\n" |
| "};\r\n" |
| "AppendStructuredBuffer<PerPixelData> g_sb : register(u1);\r\n" |
| "float4 PSMain(PSInput input) : SV_TARGET {\r\n" |
| " uint one = 1;\r\n" |
| " PerPixelData d;\r\n" |
| " d.position = input.position;\r\n" |
| " d.id = pos_to_id(input.position);\r\n" |
| " d.flags = 0;\r\n" |
| " if (WaveIsFirstLane()) d.flags |= 1;\r\n" |
| " d.laneIndex = WaveGetLaneIndex();\r\n" |
| " d.laneCount = WaveGetLaneCount();\r\n" |
| " d.firstLaneId = WaveReadLaneFirst(d.id);\r\n" |
| " d.sum1 = WaveActiveSum(one);\r\n" |
| " d.id0 = QuadReadLaneAt(d.id, 0);\r\n" |
| " d.id1 = QuadReadLaneAt(d.id, 1);\r\n" |
| " d.id2 = QuadReadLaneAt(d.id, 2);\r\n" |
| " d.id3 = QuadReadLaneAt(d.id, 3);\r\n" |
| " d.acrossX = QuadReadAcrossX(d.id);\r\n" |
| " d.acrossY = QuadReadAcrossY(d.id);\r\n" |
| " d.acrossDiag = QuadReadAcrossDiagonal(d.id);\r\n" |
| " d.quadActiveCount = one + QuadReadAcrossX(one) + QuadReadAcrossY(one) + QuadReadAcrossDiagonal(one);\r\n" |
| " g_sb.Append(d);\r\n" |
| " return 1;\r\n" |
| "};\r\n"; |
| |
| CComPtr<ID3D12Device> pDevice; |
| CComPtr<ID3D12CommandQueue> pCommandQueue; |
| CComPtr<ID3D12DescriptorHeap> pUavHeap, pRtvHeap; |
| CComPtr<ID3D12CommandAllocator> pCommandAllocator; |
| CComPtr<ID3D12GraphicsCommandList> pCommandList; |
| CComPtr<ID3D12PipelineState> pPSO; |
| CComPtr<ID3D12Resource> pRenderTarget, pReadBuffer; |
| UINT uavDescriptorSize, rtvDescriptorSize; |
| CComPtr<ID3D12Resource> pVertexBuffer; |
| D3D12_VERTEX_BUFFER_VIEW vertexBufferView; |
| |
| if (!CreateDevice(&pDevice)) |
| return; |
| if (!DoesDeviceSupportWaveOps(pDevice)) { |
| // Optional feature, so it's correct to not support it if declared as such. |
| WEX::Logging::Log::Comment(L"Device does not support wave operations."); |
| return; |
| } |
| |
| FenceObj FO; |
| InitFenceObj(pDevice, &FO); |
| |
| // Describe and create a UAV descriptor heap. |
| D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; |
| heapDesc.NumDescriptors = 1; |
| heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; |
| heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; |
| VERIFY_SUCCEEDED(pDevice->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&pUavHeap))); |
| uavDescriptorSize = pDevice->GetDescriptorHandleIncrementSize(heapDesc.Type); |
| |
| CreateRtvDescriptorHeap(pDevice, 1, &pRtvHeap, &rtvDescriptorSize); |
| CreateRenderTargetAndReadback(pDevice, pRtvHeap, RTHeight, RTWidth, &pRenderTarget, &pReadBuffer); |
| |
| // Create root signature: one UAV. |
| CComPtr<ID3D12RootSignature> pRootSignature; |
| { |
| CD3DX12_DESCRIPTOR_RANGE ranges[1]; |
| ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 1, 0, 0); |
| |
| CD3DX12_ROOT_PARAMETER rootParameters[1]; |
| rootParameters[0].InitAsDescriptorTable(1, &ranges[0], D3D12_SHADER_VISIBILITY_ALL); |
| |
| CD3DX12_ROOT_SIGNATURE_DESC rootSignatureDesc; |
| rootSignatureDesc.Init(_countof(rootParameters), rootParameters, 0, nullptr, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT); |
| |
| CreateRootSignatureFromDesc(pDevice, &rootSignatureDesc, &pRootSignature); |
| } |
| |
| D3D12_INPUT_ELEMENT_DESC elementDesc[] = { |
| {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, |
| D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}}; |
| D3D12_INPUT_LAYOUT_DESC InputLayout = {elementDesc, _countof(elementDesc)}; |
| CreateGraphicsPSO(pDevice, &InputLayout, pRootSignature, pShaders, &pPSO); |
| |
| CreateGraphicsCommandQueueAndList(pDevice, &pCommandQueue, &pCommandAllocator, |
| &pCommandList, pPSO); |
| |
| // Single triangle covering half the target. |
| Vertex vertices[] = { |
| { { -1.0f, 1.0f, 0.0f } }, |
| { { 1.0f, 1.0f, 0.0f } }, |
| { { -1.0f, -1.0f, 0.0f } } }; |
| const UINT TriangleCount = _countof(vertices) / 3; |
| |
| CreateVertexBuffer(pDevice, vertices, &pVertexBuffer, &vertexBufferView); |
| |
| bool dxbc = UseDxbc(); |
| |
| // Set up UAV resource. |
| std::vector<PerPixelData> values; |
| values.resize(RTWidth * RTHeight * 2); |
| UINT valueSizeInBytes = (UINT)values.size() * sizeof(PerPixelData); |
| memset(values.data(), 0, valueSizeInBytes); |
| CComPtr<ID3D12Resource> pUavResource; |
| CComPtr<ID3D12Resource> pUavReadBuffer; |
| CComPtr<ID3D12Resource> pUploadResource; |
| CreateTestUavs(pDevice, pCommandList, values.data(), valueSizeInBytes, &pUavResource, &pUploadResource, &pUavReadBuffer); |
| |
| // Set up the append counter resource. |
| CComPtr<ID3D12Resource> pUavCounterResource; |
| CComPtr<ID3D12Resource> pReadCounterBuffer; |
| CComPtr<ID3D12Resource> pUploadCounterResource; |
| BYTE zero[sizeof(UINT)] = { 0 }; |
| CreateTestUavs(pDevice, pCommandList, zero, sizeof(zero), &pUavCounterResource, &pUploadCounterResource, &pReadCounterBuffer); |
| |
| // Close the command list and execute it to perform the GPU setup. |
| pCommandList->Close(); |
| ExecuteCommandList(pCommandQueue, pCommandList); |
| WaitForSignal(pCommandQueue, FO); |
| VERIFY_SUCCEEDED(pCommandAllocator->Reset()); |
| VERIFY_SUCCEEDED(pCommandList->Reset(pCommandAllocator, pPSO)); |
| |
| pCommandList->SetGraphicsRootSignature(pRootSignature); |
| SetDescriptorHeap(pCommandList, pUavHeap); |
| { |
| D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; |
| uavDesc.Format = DXGI_FORMAT_UNKNOWN; |
| uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; |
| uavDesc.Buffer.FirstElement = 0; |
| uavDesc.Buffer.NumElements = (UINT)values.size(); |
| uavDesc.Buffer.StructureByteStride = sizeof(PerPixelData); |
| uavDesc.Buffer.CounterOffsetInBytes = 0; |
| uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; |
| CD3DX12_CPU_DESCRIPTOR_HANDLE uavHandle(pUavHeap->GetCPUDescriptorHandleForHeapStart()); |
| CD3DX12_GPU_DESCRIPTOR_HANDLE uavHandleGpu(pUavHeap->GetGPUDescriptorHandleForHeapStart()); |
| pDevice->CreateUnorderedAccessView(pUavResource, pUavCounterResource, &uavDesc, uavHandle); |
| pCommandList->SetGraphicsRootDescriptorTable(0, uavHandleGpu); |
| } |
| RecordRenderAndReadback(pCommandList, pRtvHeap, rtvDescriptorSize, TriangleCount, &vertexBufferView, nullptr, pRenderTarget, pReadBuffer); |
| RecordTransitionBarrier(pCommandList, pUavResource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); |
| RecordTransitionBarrier(pCommandList, pUavCounterResource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); |
| pCommandList->CopyResource(pUavReadBuffer, pUavResource); |
| pCommandList->CopyResource(pReadCounterBuffer, pUavCounterResource); |
| VERIFY_SUCCEEDED(pCommandList->Close()); |
| LogCommentFmt(L"Rendering to %u by %u", RTWidth, RTHeight); |
| ExecuteCommandList(pCommandQueue, pCommandList); |
| WaitForSignal(pCommandQueue, FO); |
| { |
| MappedData data(pReadBuffer, RTWidth * RTHeight * 4); |
| const uint32_t *pPixels = (uint32_t *)data.data(); |
| if (SaveImages()) { |
| SavePixelsToFile(pPixels, DXGI_FORMAT_R8G8B8A8_UNORM, RTWidth, RTHeight, L"psintrin.bmp"); |
| } |
| } |
| |
| uint32_t appendCount; |
| { |
| MappedData mappedData(pReadCounterBuffer, sizeof(uint32_t)); |
| appendCount = *((uint32_t *)mappedData.data()); |
| LogCommentFmt(L"%u elements in append buffer", appendCount); |
| } |
| |
| { |
| MappedData mappedData(pUavReadBuffer, (UINT32)values.size()); |
| PerPixelData *pData = (PerPixelData *)mappedData.data(); |
| memcpy(values.data(), pData, valueSizeInBytes); |
| |
| // DXBC is handy to test pipeline setup, but interesting functions are |
| // stubbed out, so there is no point in further validation. |
| if (dxbc) |
| return; |
| |
| uint32_t maxActiveLaneCount = 0; |
| uint32_t maxLaneCount = 0; |
| for (uint32_t i = 0; i < appendCount; ++i) { |
| maxActiveLaneCount = std::max(maxActiveLaneCount, values[i].sum1); |
| maxLaneCount = std::max(maxLaneCount, values[i].laneCount); |
| } |
| |
| uint32_t peerOfHelperLanes = 0; |
| for (uint32_t i = 0; i < appendCount; ++i) { |
| if (values[i].sum1 != maxActiveLaneCount) { |
| ++peerOfHelperLanes; |
| } |
| } |
| |
| LogCommentFmt( |
| L"Found: %u threads. Waves reported up to %u total lanes, up " |
| L"to %u active lanes, and %u threads had helper/inactive lanes.", |
| appendCount, maxLaneCount, maxActiveLaneCount, peerOfHelperLanes); |
| |
| // Group threads into quad invocations. |
| uint32_t singlePixelCount = 0; |
| uint32_t multiPixelCount = 0; |
| std::unordered_set<uint32_t> ids; |
| std::multimap<uint32_t, PerPixelData *> idGroups; |
| std::multimap<uint32_t, PerPixelData *> firstIdGroups; |
| for (uint32_t i = 0; i < appendCount; ++i) { |
| ids.insert(values[i].id); |
| idGroups.insert(std::make_pair(values[i].id, &values[i])); |
| firstIdGroups.insert(std::make_pair(values[i].firstLaneId, &values[i])); |
| } |
| for (uint32_t id : ids) { |
| if (idGroups.count(id) == 1) |
| ++singlePixelCount; |
| else |
| ++multiPixelCount; |
| } |
| LogCommentFmt(L"%u pixels were processed by a single thread. %u invocations were for shared pixels.", |
| singlePixelCount, multiPixelCount); |
| |
| // Multiple threads may have tried to shade the same pixel. (Is this true even if we have only one triangle?) |
| // Where every pixel is distinct, it's very straightforward to validate. |
| { |
| auto cur = firstIdGroups.begin(), end = firstIdGroups.end(); |
| while (cur != end) { |
| bool simpleWave = true; |
| uint32_t firstId = (*cur).first; |
| auto groupEnd = cur; |
| while (groupEnd != end && (*groupEnd).first == firstId) { |
| if (idGroups.count((*groupEnd).second->id) > 1) |
| simpleWave = false; |
| ++groupEnd; |
| } |
| if (simpleWave) { |
| // Break the wave into quads. |
| struct QuadData { |
| unsigned count; |
| PerPixelData *data[4]; |
| }; |
| std::map<uint32_t, QuadData> quads; |
| for (auto i = cur; i != groupEnd; ++i) { |
| // assuming that it is a simple wave, idGroups has a unique id for each entry. |
| uint32_t laneId = (*i).second->id; |
| uint32_t laneIds[4] = {(*i).second->id0, (*i).second->id1, |
| (*i).second->id2, (*i).second->id3}; |
| // Since this is a simple wave, each lane has an unique id and |
| // therefore should not have any ids in there. |
| VERIFY_IS_TRUE(quads.find(laneId) == quads.end()); |
| // check if QuadReadLaneAt is returning same values in a single quad. |
| bool newQuad = true; |
| for (unsigned quadIndex = 0; quadIndex < 4; ++quadIndex) { |
| auto match = quads.find(laneIds[quadIndex]); |
| if (match != quads.end()) { |
| (*match).second.data[(*match).second.count++] = (*i).second; |
| newQuad = false; |
| break; |
| } |
| auto quadMemberData = idGroups.find(laneIds[quadIndex]); |
| if (quadMemberData != idGroups.end()) { |
| VERIFY_IS_TRUE((*quadMemberData).second->id0 == laneIds[0]); |
| VERIFY_IS_TRUE((*quadMemberData).second->id1 == laneIds[1]); |
| VERIFY_IS_TRUE((*quadMemberData).second->id2 == laneIds[2]); |
| VERIFY_IS_TRUE((*quadMemberData).second->id3 == laneIds[3]); |
| } |
| } |
| if (newQuad) { |
| QuadData qdata; |
| qdata.count = 1; |
| qdata.data[0] = (*i).second; |
| quads.insert(std::make_pair(laneId, qdata)); |
| } |
| } |
| for (auto quadPair : quads) { |
| unsigned count = quadPair.second.count; |
| // There could be only one pixel data on the edge of the triangle |
| if (count < 2) continue; |
| PerPixelData **data = quadPair.second.data; |
| bool isTop[4]; |
| bool isLeft[4]; |
| PerPixelData helperData; |
| memset(&helperData, sizeof(helperData), 0); |
| PerPixelData *layout[4]; // tl,tr,bl,br |
| memset(layout, sizeof(layout), 0); |
| auto fnToLayout = [&](bool top, bool left) -> PerPixelData ** { |
| int idx = top ? 0 : 2; |
| idx += left ? 0 : 1; |
| return &layout[idx]; |
| }; |
| auto fnToLayoutData = [&](bool top, bool left) -> PerPixelData * { |
| PerPixelData **pResult = fnToLayout(top, left); |
| if (*pResult == nullptr) return &helperData; |
| return *pResult; |
| }; |
| VERIFY_IS_TRUE(count <= 4); |
| if (count == 2) { |
| isTop[0] = data[0]->position.y < data[1]->position.y; |
| isTop[1] = (data[0]->position.y == data[1]->position.y) ? isTop[0] : !isTop[0]; |
| isLeft[0] = data[0]->position.x < data[1]->position.x; |
| isLeft[1] = (data[0]->position.x == data[1]->position.x) ? isLeft[0] : !isLeft[0]; |
| } |
| else { |
| // with at least three samples, we have distinct x and y coordinates. |
| float left = std::min(data[0]->position.x, data[1]->position.x); |
| left = std::min(data[2]->position.x, left); |
| float top = std::min(data[0]->position.y, data[1]->position.y); |
| top = std::min(data[2]->position.y, top); |
| for (unsigned i = 0; i < count; ++i) { |
| isTop[i] = data[i]->position.y == top; |
| isLeft[i] = data[i]->position.x == left; |
| } |
| } |
| for (unsigned i = 0; i < count; ++i) { |
| *(fnToLayout(isTop[i], isLeft[i])) = data[i]; |
| } |
| |
| // Finally, we have a proper quad reconstructed. Validate. |
| for (unsigned i = 0; i < count; ++i) { |
| PerPixelData *d = data[i]; |
| VERIFY_ARE_EQUAL(d->id0, fnToLayoutData(true, true)->id); |
| VERIFY_ARE_EQUAL(d->id1, fnToLayoutData(true, false)->id); |
| VERIFY_ARE_EQUAL(d->id2, fnToLayoutData(false, true)->id); |
| VERIFY_ARE_EQUAL(d->id3, fnToLayoutData(false, false)->id); |
| VERIFY_ARE_EQUAL(d->acrossX, fnToLayoutData(isTop[i], !isLeft[i])->id); |
| VERIFY_ARE_EQUAL(d->acrossY, fnToLayoutData(!isTop[i], isLeft[i])->id); |
| VERIFY_ARE_EQUAL(d->acrossDiag, fnToLayoutData(!isTop[i], !isLeft[i])->id); |
| VERIFY_ARE_EQUAL(d->quadActiveCount, count); |
| } |
| } |
| } |
| cur = groupEnd; |
| } |
| } |
| |
| // TODO: provide validation for quads where the same pixel was shaded multiple times |
| // |
| // Consider: for pixels that were shaded multiple times, check whether |
| // some grouping of threads into quads satisfies all value requirements. |
| } |
| } |
| |
| struct ShaderOpTestResult { |
| st::ShaderOp *ShaderOp; |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet; |
| std::shared_ptr<st::ShaderOpTest> Test; |
| }; |
| |
| struct SPrimitives { |
| float f_float; |
| float f_float2; |
| float f_float_o; |
| float f_float2_o; |
| }; |
| |
| std::shared_ptr<ShaderOpTestResult> |
| RunShaderOpTestAfterParse(ID3D12Device *pDevice, dxc::DxcDllSupport &support, |
| LPCSTR pName, |
| st::ShaderOpTest::TInitCallbackFn pInitCallback, |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet) { |
| st::ShaderOp *pShaderOp; |
| if (pName == nullptr) { |
| if (ShaderOpSet->ShaderOps.size() != 1) { |
| VERIFY_FAIL(L"Expected a single shader operation."); |
| } |
| pShaderOp = ShaderOpSet->ShaderOps[0].get(); |
| } |
| else { |
| pShaderOp = ShaderOpSet->GetShaderOp(pName); |
| } |
| if (pShaderOp == nullptr) { |
| std::string msg = "Unable to find shader op "; |
| msg += pName; |
| msg += "; available ops"; |
| const char sep = ':'; |
| for (auto &pAvailOp : ShaderOpSet->ShaderOps) { |
| msg += sep; |
| msg += pAvailOp->Name ? pAvailOp->Name : "[n/a]"; |
| } |
| CA2W msgWide(msg.c_str()); |
| VERIFY_FAIL(msgWide.m_psz); |
| } |
| |
| // This won't actually be used since we're supplying the device, |
| // but let's make it consistent. |
| pShaderOp->UseWarpDevice = GetTestParamUseWARP(true); |
| |
| std::shared_ptr<st::ShaderOpTest> test = std::make_shared<st::ShaderOpTest>(); |
| test->SetDxcSupport(&support); |
| test->SetInitCallback(pInitCallback); |
| test->SetDevice(pDevice); |
| test->RunShaderOp(pShaderOp); |
| |
| std::shared_ptr<ShaderOpTestResult> result = |
| std::make_shared<ShaderOpTestResult>(); |
| result->ShaderOpSet = ShaderOpSet; |
| result->Test = test; |
| result->ShaderOp = pShaderOp; |
| return result; |
| } |
| |
| std::shared_ptr<ShaderOpTestResult> |
| RunShaderOpTest(ID3D12Device *pDevice, dxc::DxcDllSupport &support, |
| IStream *pStream, LPCSTR pName, |
| st::ShaderOpTest::TInitCallbackFn pInitCallback) { |
| DXASSERT_NOMSG(pStream != nullptr); |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| return RunShaderOpTestAfterParse(pDevice, support, pName, pInitCallback, ShaderOpSet); |
| } |
| |
| TEST_F(ExecutionTest, OutOfBoundsTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| // Single operation test at the moment. |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) |
| return; |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(pDevice, m_support, pStream, "OOB", nullptr); |
| MappedData data; |
| // Read back to CPU and examine contents - should get pure red. |
| { |
| MappedData data; |
| test->Test->GetReadBackData("RTarget", &data); |
| const uint32_t *pPixels = (uint32_t *)data.data(); |
| uint32_t first = *pPixels; |
| VERIFY_ARE_EQUAL(0xff0000ff, first); // pure red - only first component is read |
| } |
| } |
| |
| TEST_F(ExecutionTest, SaturateTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| // Single operation test at the moment. |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) |
| return; |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(pDevice, m_support, pStream, "Saturate", nullptr); |
| MappedData data; |
| test->Test->GetReadBackData("U0", &data); |
| const float *pValues = (float *)data.data(); |
| // Everything is zero except for 1.5f and +Inf, which saturate to 1.0f |
| const float ExpectedCases[9] = { |
| 0.0f, 0.0f, 0.0f, 0.0f, // -inf, -1.5, -denorm, -0 |
| 0.0f, 0.0f, 1.0f, 1.0f, // 0, denorm, 1.5f, inf |
| 0.0f // nan |
| }; |
| for (size_t i = 0; i < _countof(ExpectedCases); ++i) { |
| VERIFY_IS_TRUE(ifdenorm_flushf_eq(*pValues, ExpectedCases[i])); |
| ++pValues; |
| } |
| } |
| |
| void ExecutionTest::BasicTriangleTestSetup(LPCSTR ShaderOpName, LPCWSTR FileName, D3D_SHADER_MODEL testModel) { |
| #ifdef _HLK_CONF |
| UNREFERENCED_PARAMETER(ShaderOpName); |
| UNREFERENCED_PARAMETER(FileName); |
| UNREFERENCED_PARAMETER(testModel); |
| #else |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| // Single operation test at the moment. |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, testModel)) |
| return; |
| |
| // As this is used, 6.2 requirement always comes with requiring native 16-bit ops |
| if (testModel == D3D_SHADER_MODEL_6_2 && !DoesDeviceSupportNative16bitOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support native 16-bit operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(pDevice, m_support, pStream, ShaderOpName, nullptr); |
| MappedData data; |
| D3D12_RESOURCE_DESC &D = test->ShaderOp->GetResourceByName("RTarget")->Desc; |
| UINT width = (UINT)D.Width; |
| UINT height = D.Height; |
| test->Test->GetReadBackData("RTarget", &data); |
| const uint32_t *pPixels = (uint32_t *)data.data(); |
| if (SaveImages()) { |
| SavePixelsToFile(pPixels, DXGI_FORMAT_R8G8B8A8_UNORM, 320, 200, FileName); |
| } |
| uint32_t top = pPixels[width / 2]; // Top center. |
| uint32_t mid = pPixels[width / 2 + width * (height / 2)]; // Middle center. |
| VERIFY_ARE_EQUAL(0xff663300, top); // clear color |
| VERIFY_ARE_EQUAL(0xffffffff, mid); // white |
| |
| // This is the basic validation test for shader operations, so it's good to |
| // check this here at least for this one test case. |
| data.reset(); |
| test.reset(); |
| ReportLiveObjects(); |
| #endif |
| } |
| |
| TEST_F(ExecutionTest, BasicTriangleOpTest) { |
| BasicTriangleTestSetup("Triangle", L"basic-triangle.bmp", D3D_SHADER_MODEL_6_0); |
| } |
| |
| TEST_F(ExecutionTest, BasicTriangleOpTestHalf) { |
| BasicTriangleTestSetup("TriangleHalf", L"basic-triangle-half.bmp", D3D_SHADER_MODEL_6_2); |
| } |
| |
| void VerifyDerivResults(const float *pPixels, UINT offsetCenter) { |
| |
| // pixel at the center |
| float CenterDDXFine = pPixels[offsetCenter]; |
| float CenterDDYFine = pPixels[offsetCenter + 1]; |
| float CenterDDXCoarse = pPixels[offsetCenter + 2]; |
| float CenterDDYCoarse = pPixels[offsetCenter + 3]; |
| |
| LogCommentFmt( |
| L"center ddx_fine: %8f, ddy_fine: %8f, ddx_coarse: %8f, ddy_coarse: %8f", |
| CenterDDXFine, CenterDDYFine, CenterDDXCoarse, CenterDDYCoarse); |
| |
| // The texture for the 9 pixels in the center should look like the following |
| |
| // 256 32 64 |
| // 2048 256 512 |
| // 1 .125 .25 |
| |
| // In D3D12 there is no guarantee of how the adapter is grouping 2x2 pixels |
| // So for fine derivatives there can be up to two possible results for the center pixel, |
| // while for coarse derivatives there can be up to six possible results. |
| int ulpTolerance = 1; |
| // 512 - 256 or 2048 - 256 |
| bool left = CompareFloatULP(CenterDDXFine, -1792.0f, ulpTolerance); |
| VERIFY_IS_TRUE(left || CompareFloatULP(CenterDDXFine, 256.0f, ulpTolerance)); |
| // 256 - 32 or 256 - .125 |
| bool top = CompareFloatULP(CenterDDYFine, 224.0f, ulpTolerance); |
| VERIFY_IS_TRUE(top || CompareFloatULP(CenterDDYFine, -255.875, ulpTolerance)); |
| |
| if (top && left) { |
| VERIFY_IS_TRUE((CompareFloatULP(CenterDDXCoarse, -224.0f, ulpTolerance) || |
| CompareFloatULP(CenterDDXCoarse, -1792.0f, ulpTolerance)) && |
| (CompareFloatULP(CenterDDYCoarse, 224.0f, ulpTolerance) || |
| CompareFloatULP(CenterDDYCoarse, 1792.0f, ulpTolerance))); |
| } |
| else if (top) { // top right quad |
| VERIFY_IS_TRUE((CompareFloatULP(CenterDDXCoarse, 256.0f, ulpTolerance) || |
| CompareFloatULP(CenterDDXCoarse, 32.0f, ulpTolerance)) && |
| (CompareFloatULP(CenterDDYCoarse, 224.0f, ulpTolerance) || |
| CompareFloatULP(CenterDDYCoarse, 448.0f, ulpTolerance))); |
| } |
| else if (left) { // bottom left quad |
| VERIFY_IS_TRUE((CompareFloatULP(CenterDDXCoarse, -1792.0f, ulpTolerance) || |
| CompareFloatULP(CenterDDXCoarse, -.875f, ulpTolerance)) && |
| (CompareFloatULP(CenterDDYCoarse, -2047.0f, ulpTolerance) || |
| CompareFloatULP(CenterDDYCoarse, -255.875f, ulpTolerance))); |
| } |
| else { // bottom right |
| VERIFY_IS_TRUE((CompareFloatULP(CenterDDXCoarse, 256.0f, ulpTolerance) || |
| CompareFloatULP(CenterDDXCoarse, .125f, ulpTolerance)) && |
| (CompareFloatULP(CenterDDYCoarse, -255.875f, ulpTolerance) || |
| CompareFloatULP(CenterDDYCoarse, -511.75f, ulpTolerance))); |
| } |
| } |
| |
| // Rendering two right triangles forming a square and assigning a texture value |
| // for each pixel to calculate derivates. |
| TEST_F(ExecutionTest, PartialDerivTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) |
| return; |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(pDevice, m_support, pStream, "DerivFine", nullptr); |
| MappedData data; |
| D3D12_RESOURCE_DESC &D = test->ShaderOp->GetResourceByName("RTarget")->Desc; |
| UINT width = (UINT)D.Width; |
| UINT height = D.Height; |
| UINT pixelSize = GetByteSizeForFormat(D.Format) / 4; |
| |
| test->Test->GetReadBackData("RTarget", &data); |
| const float *pPixels = (float *)data.data(); |
| |
| UINT centerIndex = (UINT64)width * height / 2 - width / 2; |
| UINT offsetCenter = centerIndex * pixelSize; |
| |
| VerifyDerivResults(pPixels, offsetCenter); |
| } |
| |
| struct Dispatch { |
| int width, height, depth; |
| }; |
| |
| std::shared_ptr<st::ShaderOpTest> |
| RunDispatch(ID3D12Device *pDevice, dxc::DxcDllSupport &support, |
| st::ShaderOp *pShaderOp, const Dispatch D) { |
| char compilerOptions[256]; |
| |
| std::shared_ptr<st::ShaderOpTest> test = std::make_shared<st::ShaderOpTest>(); |
| test->SetDxcSupport(&support); |
| test->SetInitCallback(nullptr); |
| test->SetDevice(pDevice); |
| |
| // format compiler args |
| VERIFY_IS_TRUE(sprintf_s(compilerOptions, sizeof(compilerOptions), |
| "-D DISPATCHX=%d -D DISPATCHY=%d -D DISPATCHZ=%d ", |
| D.width, D.height, D.depth)); |
| |
| for (st::ShaderOpShader &S : pShaderOp->Shaders) |
| S.Arguments = compilerOptions; |
| |
| pShaderOp->DispatchX = D.width; |
| pShaderOp->DispatchY = D.height; |
| pShaderOp->DispatchZ = D.depth; |
| |
| test->RunShaderOp(pShaderOp); |
| |
| return test; |
| } |
| |
| TEST_F(ExecutionTest, DerivativesTest) { |
| const UINT pixelSize = 4; // always float4 |
| |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) |
| return; |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| |
| st::ShaderOp *pShaderOp = ShaderOpSet->GetShaderOp("Derivatives"); |
| |
| std::vector<Dispatch> dispatches = |
| { |
| {40, 1, 1}, |
| {1000, 1, 1}, |
| {32, 32, 1}, |
| {16, 64, 1}, |
| {4, 12, 4}, |
| {4, 64, 1}, |
| {16, 16, 3}, |
| {32, 8, 2} |
| }; |
| |
| std::vector<Dispatch> meshDispatches = |
| { |
| {60, 1, 1}, |
| {128, 1, 1}, |
| {8, 8, 1}, |
| {32, 8, 1}, |
| {8, 16, 4}, |
| {8, 64, 1}, |
| {8, 8, 3}, |
| }; |
| |
| std::vector<Dispatch> badDispatches = |
| { |
| {16, 3, 1}, |
| {2, 16, 1}, |
| {33, 1, 1} |
| }; |
| |
| pShaderOp->UseWarpDevice = GetTestParamUseWARP(true); |
| LPCSTR CS = pShaderOp->CS; |
| |
| MappedData data; |
| |
| for (Dispatch &D : dispatches) { |
| // Test Compute Shader |
| std::shared_ptr<st::ShaderOpTest> test = RunDispatch(pDevice, m_support, pShaderOp, D); |
| |
| test->GetReadBackData("U0", &data); |
| |
| float *pPixels = (float *)data.data();; |
| |
| UINT centerIndex = 0; |
| if (D.height == 1) { |
| centerIndex = (((UINT64)(D.width * D.height * D.depth) / 2) & ~0xF) + 10; |
| } else { |
| // To find roughly the center for compute, divide the height and width in half, |
| // truncate to the previous multiple of 4 to get to the start of the repeating pattern |
| // and then add 2 rows to get to the second row of quads and 2 to get to the first texel |
| // of the second row of that quad row |
| UINT centerRow = ((D.height/2UL) & ~0x3) + 2; |
| UINT centerCol = ((D.width/2UL) & ~0x3) + 2; |
| centerIndex = centerRow * D.width + centerCol; |
| } |
| UINT offsetCenter = centerIndex * pixelSize; |
| LogCommentFmt(L"Verifying derivatives in compute shader results"); |
| VerifyDerivResults(pPixels, offsetCenter); |
| } |
| |
| if (DoesDeviceSupportMeshAmpDerivatives(pDevice)) { |
| // Disable CS so mesh goes forward |
| pShaderOp->CS = nullptr; |
| |
| for (Dispatch &D : meshDispatches) { |
| std::shared_ptr<st::ShaderOpTest> test = RunDispatch(pDevice, m_support, pShaderOp, D); |
| |
| test->GetReadBackData("U1", &data); |
| const float *pPixels = (float *)data.data(); |
| UINT centerIndex = (((UINT64)(D.width * D.height * D.depth)/2) & ~0xF) + 10; |
| UINT offsetCenter = centerIndex * pixelSize; |
| LogCommentFmt(L"Verifying derivatives in mesh shader results"); |
| VerifyDerivResults(pPixels, offsetCenter); |
| |
| test->GetReadBackData("U2", &data); |
| pPixels = (float *)data.data(); |
| LogCommentFmt(L"Verifying derivatives in amplification shader results"); |
| VerifyDerivResults(pPixels, offsetCenter); |
| } |
| } |
| |
| // Final tests with invalid dispatch size just to make sure they run |
| for (Dispatch &D : badDispatches) { |
| // Test Compute Shader |
| pShaderOp->CS = CS; |
| std::shared_ptr<st::ShaderOpTest> test = RunDispatch(pDevice, m_support, pShaderOp, D); |
| |
| if (DoesDeviceSupportMeshAmpDerivatives(pDevice)) { |
| pShaderOp->CS = nullptr; |
| test = RunDispatch(pDevice, m_support, pShaderOp, D); |
| } |
| } |
| } |
| |
| // Verify the results for the quad starting with the given index |
| void VerifyQuadReadResults(const UINT *pPixels, UINT quadIndex) { |
| for (UINT i = 0; i < 4; i++) { |
| UINT ix = quadIndex + i; |
| UINT lix = pPixels[4*ix]; |
| VERIFY_ARE_EQUAL(pPixels[4*ix + 1], (lix^1));// ReadAcrossX |
| VERIFY_ARE_EQUAL(pPixels[4*ix + 2], (lix^2));// ReadAcrossY |
| VERIFY_ARE_EQUAL(pPixels[4*ix + 3], (lix^3));// ReadAcrossDiagonal |
| } |
| } |
| |
| |
| TEST_F(ExecutionTest, QuadReadTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) |
| return; |
| |
| if (GetTestParamUseWARP(UseWarpByDefault()) || IsDeviceBasicAdapter(pDevice)) { |
| WEX::Logging::Log::Comment(L"WARP does not support QuadRead in compute shaders."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| if (!DoesDeviceSupportWaveOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support wave operations."); |
| return; |
| } |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| |
| st::ShaderOp *pShaderOp = ShaderOpSet->GetShaderOp("QuadRead"); |
| LPCSTR CS = pShaderOp->CS; |
| |
| struct Dispatch { |
| int x, y, z; |
| int mx, my, mz; |
| }; |
| //std::vector<std::tuple<int, int, int, int, int>> dispatches = |
| std::vector<Dispatch> dispatches = |
| { |
| {32, 32, 1, 8, 8, 1}, |
| {64, 4, 1, 64, 2, 1}, |
| {64, 1, 1, 64, 1, 1}, |
| {16, 16, 3, 4, 4, 3}, |
| }; |
| |
| for (Dispatch &D : dispatches) { |
| |
| UINT width = D.x; |
| UINT height = D.y; |
| UINT depth = D.z; |
| |
| UINT mwidth = D.mx; |
| UINT mheight = D.my; |
| UINT mdepth = D.mz; |
| // format compiler args |
| char compilerOptions[256]; |
| VERIFY_IS_TRUE(sprintf_s(compilerOptions, sizeof(compilerOptions), |
| "-D DISPATCHX=%d -D DISPATCHY=%d -D DISPATCHZ=%d " |
| "-D MESHDISPATCHX=%d -D MESHDISPATCHY=%d -D MESHDISPATCHZ=%d", |
| width, height, depth, mwidth, mheight, mdepth)); |
| |
| for (st::ShaderOpShader &S : pShaderOp->Shaders) |
| S.Arguments = compilerOptions; |
| |
| pShaderOp->DispatchX = width; |
| pShaderOp->DispatchY = height; |
| pShaderOp->DispatchZ = depth; |
| |
| // Test Compute Shader |
| pShaderOp->CS = CS; |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "QuadRead", nullptr, ShaderOpSet); |
| MappedData data; |
| |
| test->Test->GetReadBackData("U0", &data); |
| const UINT *pPixels = (UINT *)data.data(); |
| |
| // To find roughly the center for compute, divide the pixel count in half |
| // and truncate to next lowest power of 4 to start at a quad |
| UINT offsetCenter = ((UINT64)(width * height * depth)/2) & ~0x3; |
| |
| // Test first, second and center quads |
| LogCommentFmt(L"Verifying QuadRead* in compute shader results"); |
| VerifyQuadReadResults(pPixels, 0); |
| VerifyQuadReadResults(pPixels, 4); |
| VerifyQuadReadResults(pPixels, offsetCenter); |
| |
| if (DoesDeviceSupportMeshAmpDerivatives(pDevice)) { |
| offsetCenter = ((UINT64)(mwidth * mheight * mdepth)/2) & ~0x3; |
| |
| // Disable CS so mesh goes forward |
| pShaderOp->CS = nullptr; |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "QuadRead", nullptr, ShaderOpSet); |
| test->Test->GetReadBackData("U1", &data); |
| pPixels = (UINT *)data.data(); |
| // Test first, second and center quads |
| LogCommentFmt(L"Verifying QuadRead* in mesh shader results"); |
| VerifyQuadReadResults(pPixels, 0); |
| VerifyQuadReadResults(pPixels, 4); |
| VerifyQuadReadResults(pPixels, offsetCenter); |
| |
| test->Test->GetReadBackData("U2", &data); |
| pPixels = (UINT *)data.data(); |
| // Test first, second and center quads |
| LogCommentFmt(L"Verifying QuadRead* in amplification shader results"); |
| VerifyQuadReadResults(pPixels, 0); |
| VerifyQuadReadResults(pPixels, 4); |
| VerifyQuadReadResults(pPixels, offsetCenter); |
| } |
| } |
| } |
| |
| void VerifySampleResults(const UINT *pPixels, UINT width) { |
| UINT xlod = 0; |
| UINT ylod = 0; |
| // Each pixel contains 4 samples and 4 LOD calculations. |
| // 2 of these (called 'left' and 'right') have X values that vary and a constant Y |
| // 2 others (called 'top' and 'bot') have Y values that vary and a constant X |
| // Only of the X variant sample results and one of the Y variant results |
| // are actually reported for the pixel. |
| // The other 2 serve as "helpers" to the other pixels in the quad. |
| // On the left side of the quad, the 'left' samples are reported. |
| // Op the top of the quad, the 'top' samples are reported and so on. |
| // The varying coordinate values alternate between zero and a |
| // value whose magnitude increases with the index. |
| // As a result, the LOD level should steadily increas. |
| // Due to vagaries of implementation, the same derivatives |
| // in both directions might result in different levels for different locations |
| // in the quad. So only comparisons between sample results and LOD calculations |
| // and ensuring that the LOD increased and reaches the max can be tested reliably. |
| for (unsigned i = 0; i < width; i++) { |
| // CalculateLOD and Sample from texture with mip levels containing LOD index should match |
| VERIFY_ARE_EQUAL(pPixels[4*i + 0], pPixels[4*i + 1]); |
| VERIFY_ARE_EQUAL(pPixels[4*i + 2], pPixels[4*i + 3]); |
| // Make sure LODs are ever climbing as magnitudes increase |
| VERIFY_IS_TRUE(pPixels[4*i] >= xlod); |
| xlod = pPixels[4*i]; |
| VERIFY_IS_TRUE(pPixels[4*i + 2] >= ylod); |
| ylod = pPixels[4*i + 2]; |
| } |
| // Make sure we reached the max lod level for both tracks |
| VERIFY_ARE_EQUAL(xlod, 6u); |
| VERIFY_ARE_EQUAL(ylod, 6u); |
| } |
| |
| TEST_F(ExecutionTest, ComputeSampleTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) |
| return; |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| |
| st::ShaderOp *pShaderOp = ShaderOpSet->GetShaderOp("ComputeSample"); |
| |
| // Initialize texture with the LOD number in each corresponding mip level |
| auto SampleInitFn = [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| UNREFERENCED_PARAMETER(pShaderOp); |
| VERIFY_ARE_EQUAL(0, _stricmp(Name, "T0")); |
| D3D12_RESOURCE_DESC &texDesc = pShaderOp->GetResourceByName("T0")->Desc; |
| UINT texWidth = (UINT)texDesc.Width; |
| UINT texHeight = (UINT)texDesc.Height; |
| size_t size = sizeof(float) * texWidth * texHeight * 2; |
| Data.resize(size); |
| float *pPrimitives = (float *)Data.data(); |
| float lod = 0.0; |
| int ix = 0; |
| while (texHeight > 0 && texWidth > 0) { |
| if(!texHeight) texHeight = 1; |
| if(!texWidth) texWidth = 1; |
| for (size_t j = 0; j < texHeight; ++j) { |
| for (size_t i = 0; i < texWidth; ++i) { |
| pPrimitives[ix++] = lod; |
| } |
| } |
| lod += 1.0; |
| texHeight >>= 1; |
| texWidth >>= 1; |
| } |
| }; |
| LPCSTR CS2 = nullptr, AS2 = nullptr, MS2 = nullptr; |
| for (st::ShaderOpShader &S : pShaderOp->Shaders) { |
| if (!strcmp(S.Name, "CS2")) CS2 = S.Name; |
| if (!strcmp(S.Name, "AS2")) AS2 = S.Name; |
| if (!strcmp(S.Name, "MS2")) MS2 = S.Name; |
| } |
| |
| // Test 1D compute shader |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", SampleInitFn, ShaderOpSet); |
| MappedData data; |
| |
| test->Test->GetReadBackData("U0", &data); |
| const UINT *pPixels = (UINT *)data.data(); |
| |
| VerifySampleResults(pPixels, 84*4); |
| |
| // Test 2D compute shader |
| pShaderOp->CS = CS2; |
| |
| test.reset(); |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", SampleInitFn, ShaderOpSet); |
| |
| test->Test->GetReadBackData("U0", &data); |
| pPixels = (UINT *)data.data(); |
| |
| VerifySampleResults(pPixels, 84*4); |
| |
| |
| if (DoesDeviceSupportMeshAmpDerivatives(pDevice)) { |
| // Disable CS so mesh goes forward |
| pShaderOp->CS = nullptr; |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", SampleInitFn, ShaderOpSet); |
| test->Test->GetReadBackData("U1", &data); |
| pPixels = (UINT *)data.data(); |
| |
| VerifySampleResults(pPixels, 116); |
| |
| test->Test->GetReadBackData("U2", &data); |
| pPixels = (UINT *)data.data(); |
| |
| VerifySampleResults(pPixels, 84); |
| |
| pShaderOp->AS = AS2; |
| pShaderOp->MS = MS2; |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", SampleInitFn, ShaderOpSet); |
| test->Test->GetReadBackData("U1", &data); |
| pPixels = (UINT *)data.data(); |
| |
| VerifySampleResults(pPixels, 116); |
| |
| test->Test->GetReadBackData("U2", &data); |
| pPixels = (UINT *)data.data(); |
| |
| VerifySampleResults(pPixels, 84); |
| } |
| } |
| |
| TEST_F(ExecutionTest, ATOWriteMSAATest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| |
| // #define WRITEMSAA_FALLBACK |
| |
| CComPtr<ID3D12Device> pDevice; |
| #ifdef WRITEMSAA_FALLBACK |
| D3D_SHADER_MODEL sm = D3D_SHADER_MODEL_6_6; |
| #else |
| D3D_SHADER_MODEL sm = D3D_SHADER_MODEL_6_7; |
| #endif |
| if (!CreateDevice(&pDevice, sm)) |
| return; |
| |
| #ifndef WRITEMSAA_FALLBACK |
| if (!DoesDeviceSupportAdvancedTexOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support Advanced Texture Operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| if (!DoesDeviceSupportWritableMSAA(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support Writable MSAA."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| #endif |
| |
| static const char pWriteShader[] = |
| "#define SAMPLES 4\n" |
| "RWStructuredBuffer<float> g_out : register(u0);\n" |
| "#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 7)\n" |
| "RWTexture2DMS<float, 4> g_texms : register(u1);\n" |
| "#else\n" |
| "RWTexture2DArray<float> g_texms : register(u1);\n" |
| "#endif\n" |
| "[NumThreads(32, 32, 1)]\n" |
| "void main(uint3 id : SV_GroupThreadID) {\n" |
| " for(uint i = 0; i < SAMPLES; i++) {\n" |
| "#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 7)\n" |
| " g_texms.sample[i][id.xy] = id.x*id.y*(i+1);\n" |
| "#else\n" |
| " g_texms[uint3(id.xy, i)] = id.x*id.y*(i+1);\n" |
| "#endif\n" |
| " }\n" |
| "}"; |
| |
| static const char pCopyShader[] = |
| "#define SAMPLES 4\n" |
| "RWStructuredBuffer<float> g_out : register(u0);\n" |
| "#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 7)\n" |
| "RWTexture2DMS<float, 4> g_texms : register(u1);\n" |
| "#else\n" |
| "RWTexture2DArray<float> g_texms : register(u1);\n" |
| "#endif\n" |
| "[NumThreads(32, 32, 1)]\n" |
| " void main(uint3 id : SV_GroupThreadID) {\n" |
| " for(uint i = 0; i < SAMPLES; i++) {\n" |
| "#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 7)\n" |
| " g_out[i*32*32 + id.y*32 + id.x] = g_texms.sample[i][id.xy];\n" |
| "#else\n" |
| " g_out[i*32*32 + id.y*32 + id.x] = g_texms[uint3(id.xy, i)];\n" |
| "#endif\n" |
| " }" |
| "}"; |
| |
| static const int NumThreadsX = 32; |
| static const int NumThreadsY = 32; |
| |
| #ifdef WRITEMSAA_FALLBACK |
| static const int NumSamples = 4; |
| static const int ArraySize = 4; |
| #else |
| static const int NumSamples = 4; |
| static const int ArraySize = 1; |
| #endif |
| static const int ThreadsPerGroup = NumThreadsX * NumThreadsY; |
| const size_t valueSize = NumSamples * ThreadsPerGroup; |
| const size_t valueSizeInBytes = valueSize * sizeof(float); |
| |
| static const int DispatchGroupX = 1; |
| static const int DispatchGroupY = 1; |
| static const int DispatchGroupZ = 1; |
| |
| CComPtr<ID3D12CommandQueue> pCommandQueue; |
| CComPtr<ID3D12CommandAllocator> pCommandAllocator; |
| FenceObj FO; |
| |
| CreateComputeCommandQueue(pDevice, L"WriteMSAA Queue", &pCommandQueue); |
| InitFenceObj(pDevice, &FO); |
| |
| // Create root signature. |
| CComPtr<ID3D12RootSignature> pRootSignature; |
| CD3DX12_DESCRIPTOR_RANGE ranges[2]; |
| ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0); |
| ranges[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 1, 0); |
| |
| CreateRootSignatureFromRanges(pDevice, &pRootSignature, ranges, 2); |
| |
| VERIFY_SUCCEEDED(pDevice->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE, IID_PPV_ARGS(&pCommandAllocator))); |
| |
| // Create command list and resources |
| CComPtr<ID3D12GraphicsCommandList> pCommandList; |
| VERIFY_SUCCEEDED(pDevice->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE, |
| pCommandAllocator, nullptr, IID_PPV_ARGS(&pCommandList))); |
| |
| // Set up Output Resource |
| CComPtr<ID3D12Resource> pOutputResource; |
| CComPtr<ID3D12Resource> pOutputReadBuffer; |
| CComPtr<ID3D12Resource> pOutputUploadResource; |
| |
| float outVals[valueSize]; |
| int ix = 0; |
| for (int i = 0; i < NumSamples; i++) |
| for (int j = 0; j < NumThreadsY; j++) |
| for (int k = 0; k < NumThreadsX; k++) |
| outVals[ix++] = (float)ix + 5; |
| CreateTestUavs(pDevice, pCommandList, outVals, sizeof(outVals), &pOutputResource, |
| &pOutputUploadResource, &pOutputReadBuffer); |
| |
| // Set up texture Resource. |
| CComPtr<ID3D12Resource> pUavResource; |
| float values[valueSize]; |
| memset(values, 0xc, valueSizeInBytes); |
| |
| |
| #ifdef WRITEMSAA_FALLBACK |
| int numsamp = 1; |
| #else |
| int numsamp = NumSamples; |
| #endif |
| |
| D3D12_RESOURCE_DESC tex2dDesc = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, |
| NumThreadsX, NumThreadsY, ArraySize, 1, numsamp, 0, |
| D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS | D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); |
| CreateTestResources(pDevice, pCommandList, values, valueSizeInBytes, tex2dDesc, |
| &pUavResource, nullptr); |
| |
| // Close the command list and execute it to perform the resource uploads |
| pCommandList->Close(); |
| ID3D12CommandList *ppCommandLists[] = { pCommandList }; |
| pCommandQueue->ExecuteCommandLists(1, ppCommandLists); |
| WaitForSignal(pCommandQueue, FO); |
| |
| // Create shaders |
| #ifdef WRITEMSAA_FALLBACK |
| const wchar_t *target = L"cs_6_6"; |
| #else |
| const wchar_t *target = L"cs_6_7"; |
| #endif |
| |
| CComPtr<ID3D12PipelineState> pWritePSO; |
| CreateComputePSO(pDevice, pRootSignature, pWriteShader, target, &pWritePSO); |
| CComPtr<ID3D12PipelineState> pCopyPSO; |
| CreateComputePSO(pDevice, pRootSignature, pCopyShader, target, &pCopyPSO); |
| |
| // Reset commandlist to write PSO |
| VERIFY_SUCCEEDED(pCommandList->Reset(pCommandAllocator, pWritePSO)); |
| |
| // Describe and create a UAV descriptor heap. |
| CComPtr<ID3D12DescriptorHeap> pUavHeap; |
| D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; |
| heapDesc.NumDescriptors = 2; |
| heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; |
| heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; |
| VERIFY_SUCCEEDED(pDevice->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&pUavHeap))); |
| |
| CD3DX12_CPU_DESCRIPTOR_HANDLE cpuHandle(pUavHeap->GetCPUDescriptorHandleForHeapStart()); |
| CreateStructUAV(pDevice, cpuHandle, valueSize, sizeof(float), pOutputResource); |
| #ifdef WRITEMSAA_FALLBACK |
| CreateTex2DArrayUAV(pDevice, cpuHandle, NumSamples, DXGI_FORMAT_R32_FLOAT, pUavResource); |
| #else |
| CreateTex2DMSUAV(pDevice, cpuHandle, DXGI_FORMAT_R32_FLOAT, pUavResource); |
| #endif |
| |
| // Set Heaps, Rootsignature and table |
| ID3D12DescriptorHeap *const pHeaps[1] = { pUavHeap }; |
| pCommandList->SetDescriptorHeaps(1, pHeaps); |
| pCommandList->SetComputeRootSignature(pRootSignature); |
| pCommandList->SetComputeRootDescriptorTable(0, pUavHeap->GetGPUDescriptorHandleForHeapStart()); |
| |
| // dispatch and close write shader |
| pCommandList->Dispatch(DispatchGroupX, DispatchGroupY, DispatchGroupZ); |
| pCommandList->Close(); |
| |
| pCommandQueue->ExecuteCommandLists(1, ppCommandLists); |
| WaitForSignal(pCommandQueue, FO); |
| |
| // Create copy command list |
| VERIFY_SUCCEEDED(pCommandList->Reset(pCommandAllocator, pCopyPSO)); |
| |
| // Set Rootsignature and descriptor tables |
| SetDescriptorHeap(pCommandList, pUavHeap); |
| pCommandList->SetComputeRootSignature(pRootSignature); |
| |
| pCommandList->SetComputeRootDescriptorTable(0, pUavHeap->GetGPUDescriptorHandleForHeapStart()); |
| |
| // Run Copy shader and copy the results back to readable memory |
| pCommandList->Dispatch(DispatchGroupX, DispatchGroupY, DispatchGroupZ); |
| |
| CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(pOutputResource, |
| D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); |
| pCommandList->ResourceBarrier(1, &barrier); |
| pCommandList->CopyResource(pOutputReadBuffer, pOutputResource); |
| |
| pCommandList->Close(); |
| |
| pCommandQueue->ExecuteCommandLists(1, ppCommandLists); |
| WaitForSignal(pCommandQueue, FO); |
| |
| MappedData mappedData(pOutputReadBuffer, valueSize*sizeof(float)); |
| float *pData = (float *)mappedData.data(); |
| ix = 0; |
| for (int i = 0; i < NumSamples; i++) |
| for (int j = 0; j < NumThreadsY; j++) |
| for (int k = 0; k < NumThreadsX; k++) |
| VERIFY_ARE_EQUAL(pData[ix++], j*k*(i+1)); |
| } |
| |
| // Used to determine how an out of bounds offset should be converted |
| #define CLAMPOFFSET(offset) ((offset<<28)>>28) |
| |
| // Determine if the values in pPixels correspond to the expected locations encoded into a uint |
| // based on the coordinates and offsets that were provided. |
| void VerifyProgOffsetResults(unsigned *pPixels, bool bCheckDeriv) { |
| // Check that each element matches the expected value given the offset |
| unsigned ix = 0; |
| int coords[18] = {100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950}; |
| int offsets[18] = {CLAMPOFFSET(-9), -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, CLAMPOFFSET(8)}; |
| for (unsigned y = 0; y < _countof(coords); y++) { |
| for (unsigned x = 0; x < _countof(coords); x++) { |
| unsigned cmp = (coords[y] + offsets[y])*1000 + coords[x] + offsets[x]; |
| if (bCheckDeriv) { |
| VERIFY_ARE_EQUAL(pPixels[2*4*ix+0], cmp); // Sample |
| VERIFY_ARE_EQUAL(pPixels[2*4*ix+1], 1U); // SampleCmp |
| } |
| VERIFY_ARE_EQUAL(pPixels[2*4*ix+2], 1U); // SampleCmpLevel |
| VERIFY_ARE_EQUAL(pPixels[2*4*ix+3], 1U); // SampleCmpLevelZero |
| VERIFY_ARE_EQUAL(pPixels[2*4*ix+4], cmp); // Load |
| if (bCheckDeriv) { |
| VERIFY_ARE_EQUAL(pPixels[2*4*ix+5], cmp); // SampleBias |
| } |
| VERIFY_ARE_EQUAL(pPixels[2*4*ix+6], cmp); // SampleGrad |
| VERIFY_ARE_EQUAL(pPixels[2*4*ix+7], cmp); // SampleLevel |
| ix++; |
| } |
| } |
| } |
| |
| // Fills a 1000x1000 float texture with index values increasing in row-major order |
| // The shader then uses non-immediate offsets extending from -9 to 8 to access these using |
| // Load, Sample, SampleCmp and variants thereof. |
| // The test verifies that the locations accessed correspond to where they should. |
| TEST_F(ExecutionTest, ATOProgOffset) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| |
| st::ShaderOp *pShaderOp = ShaderOpSet->GetShaderOp("ProgOffset"); |
| |
| auto SampleInitFn = [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| UNREFERENCED_PARAMETER(pShaderOp); |
| D3D12_RESOURCE_DESC &texDesc = pShaderOp->GetResourceByName(Name)->Desc; |
| UINT texWidth = (UINT)texDesc.Width; |
| UINT texHeight = (UINT)texDesc.Height; |
| size_t size = sizeof(float) * texWidth * texHeight; |
| Data.resize(size); |
| float *pPrimitives = (float *)Data.data(); |
| int ix = 0; |
| for (size_t j = 0; j < texHeight; ++j) { |
| for (size_t i = 0; i < texWidth; ++i) { |
| pPrimitives[ix] = float(ix); |
| ix++; |
| } |
| } |
| }; |
| |
| bool bTestsSkipped = true; |
| D3D_SHADER_MODEL TestShaderModels[] = {D3D_SHADER_MODEL_6_5, |
| D3D_SHADER_MODEL_6_6, |
| D3D_SHADER_MODEL_6_7}; |
| for (unsigned i = 0; i < _countof(TestShaderModels); i++) { |
| D3D_SHADER_MODEL sm = TestShaderModels[i]; |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, sm, /*skipUnsupported*/false)) { |
| LogCommentFmt(L"Device does not support shader model 6.%1u", |
| ((UINT)sm & 0x0f)); |
| break; |
| } |
| if (sm >= D3D_SHADER_MODEL_6_7 && !DoesDeviceSupportAdvancedTexOps(pDevice)) { |
| LogCommentFmt(L"Device does not support Advanced Texture Ops"); |
| break; |
| } |
| |
| bool bSupportMSASDeriv = DoesDeviceSupportMeshAmpDerivatives(pDevice); |
| |
| bool bCheckDerivCS = sm >= D3D_SHADER_MODEL_6_6; |
| bool bCheckDerivMSAS = bCheckDerivCS && bSupportMSASDeriv; |
| |
| if (bCheckDerivCS && !bSupportMSASDeriv) { |
| LogCommentFmt(L"Device does not support derivatives in Mesh and Amplification shaders"); |
| } |
| |
| switch (sm) { |
| case D3D_SHADER_MODEL_6_5: |
| pShaderOp->CS = pShaderOp->GetString("CS"); |
| pShaderOp->PS = pShaderOp->GetString("PS"); |
| pShaderOp->MS = pShaderOp->GetString("MS"); |
| pShaderOp->AS = pShaderOp->GetString("AS"); |
| break; |
| case D3D_SHADER_MODEL_6_6: |
| pShaderOp->CS = pShaderOp->GetString("CS66"); |
| pShaderOp->PS = pShaderOp->GetString("PS"); |
| if (bCheckDerivMSAS) { |
| pShaderOp->MS = pShaderOp->GetString("MS66D"); |
| pShaderOp->AS = pShaderOp->GetString("AS66D"); |
| } else { |
| pShaderOp->MS = pShaderOp->GetString("MS66"); |
| pShaderOp->AS = pShaderOp->GetString("AS66"); |
| } |
| break; |
| case D3D_SHADER_MODEL_6_7: |
| pShaderOp->CS = pShaderOp->GetString("CS67"); |
| pShaderOp->PS = pShaderOp->GetString("PS67"); |
| if (bCheckDerivMSAS) { |
| pShaderOp->MS = pShaderOp->GetString("MS67D"); |
| pShaderOp->AS = pShaderOp->GetString("AS67D"); |
| } else { |
| pShaderOp->MS = pShaderOp->GetString("MS67"); |
| pShaderOp->AS = pShaderOp->GetString("AS67"); |
| } |
| break; |
| } |
| |
| // Test compute shader |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", SampleInitFn, ShaderOpSet); |
| MappedData data; |
| |
| test->Test->GetReadBackData("U0", &data); |
| VerifyProgOffsetResults((UINT*)data.data(), bCheckDerivCS); |
| |
| // Disable CS so graphics shaders go forward |
| pShaderOp->CS = nullptr; |
| |
| if (DoesDeviceSupportMeshShaders(pDevice)) { |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", SampleInitFn, ShaderOpSet); |
| |
| // PS |
| test->Test->GetReadBackData("U0", &data); |
| VerifyProgOffsetResults((UINT*)data.data(), true); |
| |
| // MS |
| test->Test->GetReadBackData("U1", &data); |
| VerifyProgOffsetResults((UINT*)data.data(), bCheckDerivMSAS); |
| |
| // AS |
| test->Test->GetReadBackData("U2", &data); |
| VerifyProgOffsetResults((UINT*)data.data(), bCheckDerivMSAS); |
| } |
| |
| // Disable MS so PS goes forward |
| pShaderOp->MS = nullptr; |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", SampleInitFn, ShaderOpSet); |
| |
| test->Test->GetReadBackData("U0", &data); |
| VerifyProgOffsetResults((UINT*)data.data(), true); |
| |
| bTestsSkipped = false; |
| } |
| |
| if (bTestsSkipped) { |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| } |
| |
| } |
| |
| // A mipmapped texture containing the value of LOD at each location in each |
| // level is used to sample at each level using SampleCmpLevel and confirm |
| // that the correct level is used for the comparison. |
| TEST_F(ExecutionTest, ATOSampleCmpLevelTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_7)) |
| return; |
| |
| if (!DoesDeviceSupportAdvancedTexOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support Advanced Texture Operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| |
| st::ShaderOp *pShaderOp = ShaderOpSet->GetShaderOp("SampleCmpLevel"); |
| |
| // Initialize texture with the LOD number in each corresponding mip level |
| auto SampleInitFn = [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| UNREFERENCED_PARAMETER(pShaderOp); |
| D3D12_RESOURCE_DESC &texDesc = pShaderOp->GetResourceByName(Name)->Desc; |
| UINT texWidth = (UINT)texDesc.Width; |
| UINT texHeight = (UINT)texDesc.Height; |
| size_t size = sizeof(float) * texWidth * texHeight * 2; |
| Data.resize(size); |
| float *pPrimitives = (float *)Data.data(); |
| float val = 0.5; |
| int ix = 0; |
| while (texHeight > 0 && texWidth > 0) { |
| if(!texHeight) texHeight = 1; |
| if(!texWidth) texWidth = 1; |
| for (size_t j = 0; j < texHeight; ++j) { |
| for (size_t i = 0; i < texWidth; ++i) { |
| pPrimitives[ix++] = val; |
| } |
| } |
| val += 1.0; |
| texHeight >>= 1; |
| texWidth >>= 1; |
| } |
| }; |
| |
| // Test compute shader |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "SampleCmpLevel", SampleInitFn, ShaderOpSet); |
| MappedData data; |
| |
| test->Test->GetReadBackData("U0", &data); |
| const UINT *pPixels = (UINT *)data.data(); |
| |
| // Check that each LOD matches what's expected |
| unsigned count = 2*7; |
| // Since the results consist of a boolean, which should be true followed by the result of a sampcmplvl, |
| // the only result expected is 1. |
| for (unsigned i = 0; i < count; i++) |
| VERIFY_ARE_EQUAL(pPixels[i], 1U); |
| |
| if (DoesDeviceSupportMeshShaders(pDevice)) { |
| // Disable CS so mesh goes forward |
| pShaderOp->CS = nullptr; |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "SampleCmpLevel", SampleInitFn, ShaderOpSet); |
| |
| test->Test->GetReadBackData("U0", &data); |
| pPixels = (UINT *)data.data(); |
| |
| for (unsigned i = 0; i < count; i++) |
| VERIFY_ARE_EQUAL(pPixels[i], 1U); |
| |
| test->Test->GetReadBackData("U1", &data); |
| pPixels = (UINT *)data.data(); |
| |
| for (unsigned i = 0; i < count; i++) |
| VERIFY_ARE_EQUAL(pPixels[i], 1U); |
| |
| test->Test->GetReadBackData("U2", &data); |
| pPixels = (UINT *)data.data(); |
| |
| for (unsigned i = 0; i < count; i++) |
| VERIFY_ARE_EQUAL(pPixels[i], 1U); |
| } |
| } |
| |
| template <unsigned RSize> |
| struct IntR { |
| unsigned R : RSize; |
| void SetChannels(unsigned R, unsigned G, unsigned B, unsigned A) { |
| this->R = R; |
| UNREFERENCED_PARAMETER(G); |
| UNREFERENCED_PARAMETER(B); |
| UNREFERENCED_PARAMETER(A); |
| } |
| static unsigned GetRSize() { return RSize; } |
| static unsigned GetGSize() { return 0; } |
| static unsigned GetBSize() { return 0; } |
| static unsigned GetASize() { return 0; } |
| }; |
| |
| template <unsigned RSize, unsigned GSize> |
| struct IntRG { |
| unsigned R : RSize; |
| unsigned G : GSize; |
| void SetChannels(unsigned R, unsigned G, unsigned B, unsigned A) { |
| this->R = R; |
| this->G = G; |
| UNREFERENCED_PARAMETER(B); |
| UNREFERENCED_PARAMETER(A); |
| } |
| static unsigned GetRSize() { return RSize; } |
| static unsigned GetGSize() { return GSize; } |
| static unsigned GetBSize() { return 0; } |
| static unsigned GetASize() { return 0; } |
| }; |
| |
| template <unsigned RSize, unsigned GSize, unsigned BSize> |
| struct IntRGB { |
| unsigned R : RSize; |
| unsigned G : GSize; |
| unsigned B : BSize; |
| void SetChannels(unsigned R, unsigned G, unsigned B, unsigned A) { |
| this->R = R; |
| this->G = G; |
| this->B = B; |
| UNREFERENCED_PARAMETER(A); |
| } |
| static unsigned GetRSize() { return RSize; } |
| static unsigned GetGSize() { return GSize; } |
| static unsigned GetBSize() { return BSize; } |
| static unsigned GetASize() { return 0; } |
| }; |
| |
| |
| template <unsigned RSize, unsigned GSize, unsigned BSize, unsigned ASize> |
| struct IntRGBA { |
| unsigned R : RSize; |
| unsigned G : GSize; |
| unsigned B : BSize; |
| unsigned A : ASize; |
| |
| void SetChannels(unsigned R, unsigned G, unsigned B, unsigned A) { |
| this->R = R; |
| this->G = G; |
| this->B = B; |
| this->A = A; |
| } |
| static unsigned GetRSize() { return RSize; } |
| static unsigned GetGSize() { return GSize; } |
| static unsigned GetBSize() { return BSize; } |
| static unsigned GetASize() { return ASize; } |
| }; |
| |
| struct IntRGBA10XRA2UNORM { |
| uint32_t RGBA; |
| void SetChannels(float R, float G, float B, float A) { |
| uint32_t ur, ug, ub, ua; |
| // Conversion values taken from XR documentation |
| ur = GetMantissa(R*510+385); |
| ub = GetMantissa(B*510+385); |
| ug = GetMantissa(G*510+385); |
| ua = (uint32_t)A; |
| |
| // Cast off all but the 10 MSB and shift for packing |
| ur = (ur&0x7fE000) >> 13; |
| ug = (ur&0x7fE000) >> 3; |
| ub = (ur&0x7fE000) << 7; |
| ua = (ua&0x3) << 30; |
| |
| RGBA = ur | ug | ub | ua; |
| } |
| }; |
| |
| struct Float32R { |
| float R; |
| void SetChannels(float R, float G, float B, float A) { |
| this->R = R; |
| UNREFERENCED_PARAMETER(G); |
| UNREFERENCED_PARAMETER(B); |
| UNREFERENCED_PARAMETER(A); |
| } |
| }; |
| |
| struct Float32RG { |
| float R, G; |
| void SetChannels(float R, float G, float B, float A) { |
| this->R = R; |
| this->G = G; |
| UNREFERENCED_PARAMETER(B); |
| UNREFERENCED_PARAMETER(A); |
| } |
| }; |
| |
| struct Float16R { |
| uint16_t R; |
| void SetChannels(float R, float G, float B, float A) { |
| this->R = ConvertFloat32ToFloat16(R); |
| UNREFERENCED_PARAMETER(G); |
| UNREFERENCED_PARAMETER(B); |
| UNREFERENCED_PARAMETER(A); |
| } |
| }; |
| |
| struct Float16RG { |
| uint16_t R, G; |
| void SetChannels(float R, float G, float B, float A) { |
| this->R = ConvertFloat32ToFloat16(R); |
| this->G = ConvertFloat32ToFloat16(G); |
| UNREFERENCED_PARAMETER(B); |
| UNREFERENCED_PARAMETER(A); |
| } |
| }; |
| |
| // No Float16RGB needed |
| |
| struct Float16RGBA { |
| uint16_t R, G, B, A; |
| void SetChannels(float R, float G, float B, float A) { |
| this->R = ConvertFloat32ToFloat16(R); |
| this->G = ConvertFloat32ToFloat16(G); |
| this->B = ConvertFloat32ToFloat16(B); |
| this->A = ConvertFloat32ToFloat16(A); |
| } |
| }; |
| |
| struct FloatR11G11B10 { |
| uint32_t RGB; |
| void SetChannels(float R, float G, float B, float A) { |
| uint32_t ur, ug, ub; |
| // Shift and mask so as to place R: 0-10, G: 11-21, B: 22-31 |
| // Sign and lesser-significant mantissa bits are truncated |
| ur = (ConvertFloat32ToFloat16(R) >> 4) & 0x000007FF; |
| ug = (ConvertFloat32ToFloat16(G) << 7) & 0x003FF800; |
| ub = (ConvertFloat32ToFloat16(B) << 17) & 0xFFC00000; |
| UNREFERENCED_PARAMETER(A); |
| RGB = ur | ug | ub; |
| } |
| }; |
| |
| struct FloatRGBE { |
| uint32_t RGBE; |
| // Conversion logic taken from miniengine PixelPacking header |
| void SetChannels(UINT R, UINT G, UINT B, UINT A) { |
| union { uint32_t i; float f; } ur, ug, ub, maxChannel, nextPow2; |
| ur.f = (float)R; |
| ug.f = (float)G; |
| ub.f = (float)B; |
| maxChannel.f = std::max(ur.f, std::max(ug.f, ub.f)); |
| // nextPow2 has to have the biggest exponent plus 1 (and nothing in the mantissa) |
| nextPow2.i = (maxChannel.i + 0x800000) & 0x7F800000; |
| |
| // By adding nextPow2, all channels have the same exponent, shifting their mantissa bits |
| // to the right to accomodate it. This also shifts in the implicit '1' bit of all channels. |
| // The largest channel will always have the high bit set. |
| ur.f += nextPow2.f; |
| ug.f += nextPow2.f; |
| ub.f += nextPow2.f; |
| UNREFERENCED_PARAMETER(A); |
| |
| ur.i = (ur.i << 9) >> 23; |
| ug.i = (ug.i << 9) >> 23; |
| ub.i = (ub.i << 9) >> 23; |
| |
| uint32_t e = ConvertFloat32ToFloat16(nextPow2.f) << 17; |
| RGBE = ur.i | ug.i << 9 | ub.i << 18 | e; |
| } |
| |
| static unsigned GetRSize() { return 9; } |
| static unsigned GetGSize() { return 9; } |
| static unsigned GetBSize() { return 9; } |
| static unsigned GetASize() { return 0; } |
| }; |
| |
| template <typename RGBAType, unsigned xdim, unsigned ydim> |
| struct RawFloatTexture : public ExecutionTest::RawGatherTexture { |
| DXGI_FORMAT m_format; |
| RGBAType RGBA[xdim*ydim]; |
| RawFloatTexture(DXGI_FORMAT format) : m_format(format) {} |
| // Set i'th element to floatified x,y and some derived values |
| virtual void SetElement(int i, int x, int y) override { |
| float r = (float)x; |
| float g = (float)y; |
| // provide some different values just to fill in b and a |
| float b = (float)(x + y)*0.5f; |
| float a = (float)(x + y)*0.1f; |
| RGBA[i].SetChannels(r, g, b, a); |
| } |
| virtual void *GetElements() { return (void*)RGBA; } |
| virtual unsigned GetXDim() { return xdim; } |
| virtual unsigned GetYDim() { return ydim; } |
| virtual DXGI_FORMAT GetFormat() override { return m_format; }; |
| }; |
| |
| template <unsigned xdim, unsigned ydim> |
| struct RawFloatR11G11B10ATexture : public ExecutionTest::RawGatherTexture { |
| FloatR11G11B10 RGBA[xdim*ydim]; |
| // Set i'th element to floatified x,y and some derived values |
| virtual void SetElement(int i, int x, int y) override { |
| float r = (float)x; |
| float g = (float)y; |
| float b = (float)(x + y)*0.5f; |
| RGBA[i].SetChannels(r, g, b, 0); |
| } |
| virtual void *GetElements() { return (void*)RGBA; } |
| virtual unsigned GetXDim() { return xdim; } |
| virtual unsigned GetYDim() { return ydim; } |
| virtual DXGI_FORMAT GetFormat() override { return DXGI_FORMAT_R11G11B10_FLOAT; }; |
| }; |
| |
| template <typename RGBAType, unsigned xdim, unsigned ydim> |
| struct RawIntTexture : public ExecutionTest::RawGatherTexture { |
| bool m_isSigned; |
| bool m_isNorm; |
| unsigned m_maxVal; |
| DXGI_FORMAT m_format; |
| RGBAType RGBA[xdim*ydim]; |
| RawIntTexture(bool isSigned, bool isNorm, int maxVal, DXGI_FORMAT format) |
| : m_isSigned(isSigned), m_isNorm(isNorm), m_maxVal(maxVal + 2), m_format(format) { |
| if (isSigned) |
| m_maxVal /= 2; |
| } |
| // Set i'th element to values scaled per max dimentions for norms, shifted for signed |
| // but otherwise just the x and y values themselves |
| virtual void SetElement(int i, int x, int y) override { |
| double fr = x; |
| double fg = y; |
| // provide some different values just to fill in b and a |
| double fb = x + 2; |
| double fa = y + 2; |
| // If signed, get some unsigned values in there |
| if (m_isSigned) { |
| fr -= m_maxVal; |
| fg -= m_maxVal; |
| fb -= m_maxVal; |
| fa -= m_maxVal; |
| } |
| // If normalized, scale to given range |
| if (m_isNorm) { |
| fr /= m_maxVal; |
| fg /= m_maxVal; |
| fb /= m_maxVal; |
| fa /= m_maxVal; |
| |
| fr *= (1 << (RGBAType::GetRSize() - m_isSigned - 1)); |
| fg *= (1 << (RGBAType::GetGSize() - m_isSigned - 1)); |
| fb *= (1 << (RGBAType::GetBSize() - m_isSigned - 1)); |
| fa *= (1 << (RGBAType::GetASize() - 1)); |
| } |
| RGBA[i].SetChannels((UINT)fr, (UINT)fg, (UINT)fb, (UINT)fa); |
| } |
| virtual void *GetElements() { return (void*)RGBA; } |
| virtual unsigned GetXDim() { return xdim; } |
| virtual unsigned GetYDim() { return ydim; } |
| virtual DXGI_FORMAT GetFormat() override { return m_format; }; |
| }; |
| |
| template <unsigned xdim, unsigned ydim> |
| struct RawR10G10B10XRA2Texture : public ExecutionTest::RawGatherTexture { |
| unsigned m_maxVal; |
| DXGI_FORMAT m_format; |
| IntRGBA10XRA2UNORM RGBA[xdim*ydim]; |
| RawR10G10B10XRA2Texture(int maxVal, DXGI_FORMAT format) |
| : m_maxVal((maxVal + 2)/2), m_format(format) {} |
| // Set i'th element to values scaled and shifted for available range |
| virtual void SetElement(int i, int x, int y) override { |
| double fr = x; |
| double fg = y; |
| // provide some different values just to fill in b and a |
| double fb = x + 2; |
| double fa = y + 2; |
| |
| // Shift RGB to valid range which will be -0.75 - 1.25 |
| fr -= m_maxVal*.75; |
| fg -= m_maxVal*.75; |
| fb -= m_maxVal*.75; |
| |
| // normalize to something that will fit in the limited range |
| fr /= m_maxVal; |
| fg /= m_maxVal; |
| fb /= m_maxVal; |
| fa /= m_maxVal*2; |
| |
| fa *= 3; // scale to max in range |
| |
| RGBA[i].SetChannels((float)fr, (float)fg, (float)fb, (float)fa); |
| } |
| virtual void *GetElements() { return (void*)RGBA; } |
| virtual unsigned GetXDim() { return xdim; } |
| virtual unsigned GetYDim() { return ydim; } |
| virtual DXGI_FORMAT GetFormat() override { return m_format; }; |
| }; |
| |
| //#define RAWGATHER_FALLBACK // Enable to use pre-6.7 fallback mechanisms to vet raw gather tests |
| |
| // Create a single resource of <resFormat> and alias it to a view of <viewFormat> |
| // Then execute a shader that uses raw gather to copy the values into a UAV |
| // Verify that the UAV has the same values as passed in. |
| template<typename GatherType> |
| void ExecutionTest::DoRawGatherTest(ID3D12Device *pDevice, RawGatherTexture *rawTex, DXGI_FORMAT viewFormat) { |
| |
| DXGI_FORMAT resFormat = rawTex->GetFormat(); |
| #ifdef RAWGATHER_FALLBACK |
| // There is no uint64 version of Gather, so 64-bit fallback needs to use Loads |
| const char shaderTemplate64[] = |
| "Texture2D<uint%d_t> g_tex : register(t0);\n" |
| "RWStructuredBuffer<uint%d_t> g_out : register(u0);\n" |
| "SamplerState g_samp : register(s0);\n" |
| "[NumThreads(32, 32, 1)]\n" |
| "void main(uint3 id : SV_GroupThreadID, uint ix : SV_GroupIndex) {\n" |
| " //uint%d_t4 res = g_tex.%s(g_samp, (id.xy+0.5)/31.0);\n" |
| " g_out[4*ix+0] = g_tex.Load(uint3(id.x, id.y+1, 0));\n" |
| " g_out[4*ix+1] = g_tex.Load(uint3(id.x+1, id.y+1, 0));\n" |
| " g_out[4*ix+2] = g_tex.Load(uint3(id.x+1, id.y, 0));\n" |
| " g_out[4*ix+3] = g_tex.Load(uint3(id.x, id.y, 0));\n" |
| "}"; |
| #endif |
| const char shaderTemplate[] = |
| "Texture2D<uint%d_t> g_tex : register(t0);\n" |
| "RWStructuredBuffer<uint%d_t> g_out : register(u0);\n" |
| "SamplerState g_samp : register(s0);\n" |
| "[NumThreads(32, 32, 1)]\n" |
| "void main(uint3 id : SV_GroupThreadID, uint ix : SV_GroupIndex) {\n" |
| " uint%d_t4 res = g_tex.%s(g_samp, (id.xy+0.5)/31.0);\n" |
| " g_out[4*ix+0] = res.x;\n" |
| " g_out[4*ix+1] = res.y;\n" |
| " g_out[4*ix+2] = res.z;\n" |
| " g_out[4*ix+3] = res.w;\n" |
| "}"; |
| |
| char pShader[sizeof(shaderTemplate) + 200]; // A little padding to account for variations |
| UINT uintSize = sizeof(GatherType)*8; // bytes to bits |
| |
| const char *gatherFuncName = "GatherRaw"; |
| #ifdef RAWGATHER_FALLBACK |
| gatherFuncName = "Gather"; |
| if (sizeof(GatherType) == 8) |
| VERIFY_IS_GREATER_THAN(sprintf(pShader, shaderTemplate64, uintSize, uintSize, uintSize, gatherFuncName), 0); |
| else |
| #endif |
| VERIFY_IS_GREATER_THAN(sprintf(pShader, shaderTemplate, uintSize, uintSize, uintSize, gatherFuncName), 0); |
| |
| const UINT xDim = rawTex->GetXDim(); |
| const UINT yDim = rawTex->GetYDim(); |
| const UINT valueSize = xDim * yDim; |
| const UINT valueSizeInBytes = valueSize * sizeof(GatherType); |
| |
| CComPtr<ID3D12CommandQueue> pCommandQueue; |
| CComPtr<ID3D12CommandAllocator> pCommandAllocator; |
| FenceObj FO; |
| |
| CreateComputeCommandQueue(pDevice, L"RawGather Queue", &pCommandQueue); |
| InitFenceObj(pDevice, &FO); |
| |
| // Create root signature. |
| CComPtr<ID3D12RootSignature> pRootSignature; |
| CD3DX12_DESCRIPTOR_RANGE ranges[2]; |
| CD3DX12_DESCRIPTOR_RANGE srange[1]; |
| ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0, 0); |
| ranges[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0); |
| srange[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 1, 0, 0); |
| |
| CreateRootSignatureFromRanges(pDevice, &pRootSignature, ranges, 2, srange, 1); |
| |
| VERIFY_SUCCEEDED(pDevice->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE, IID_PPV_ARGS(&pCommandAllocator))); |
| |
| // Create command list and resources |
| CComPtr<ID3D12GraphicsCommandList> pCommandList; |
| VERIFY_SUCCEEDED(pDevice->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE, |
| pCommandAllocator, nullptr, IID_PPV_ARGS(&pCommandList))); |
| |
| // Set up castable format list (of one) if possible, or else just alias the |
| // formats with the expectation that unsupported cases won't be used by the caller |
| DXGI_FORMAT *castableFmt = nullptr; |
| if (DoesDeviceSupportEnhancedBarriers(pDevice)) |
| castableFmt = &viewFormat; |
| else |
| resFormat = viewFormat; |
| |
| // Set up texture to be raw gathered from |
| CComPtr<ID3D12Resource> pTexResource; |
| CComPtr<ID3D12Resource> pTexUploadResource; |
| int ix = 0; |
| for (UINT y = 0; y < yDim; y++) |
| for (UINT x = 0; x < xDim; x++) |
| rawTex->SetElement(ix++, x, y); |
| D3D12_RESOURCE_DESC tex2dDesc = CD3DX12_RESOURCE_DESC::Tex2D(resFormat, xDim, yDim, 1/* sampCt */, 1/* mipCt */); |
| |
| CreateTestResources(pDevice, pCommandList, rawTex->GetElements(), valueSizeInBytes, tex2dDesc, |
| &pTexResource, &pTexUploadResource, |
| nullptr /*pReadBufer*/, castableFmt); |
| |
| // Set up Output Resource |
| CComPtr<ID3D12Resource> pOutputResource; |
| CComPtr<ID3D12Resource> pOutputReadBuffer; |
| CComPtr<ID3D12Resource> pOutputUploadResource; |
| |
| // 4x because gather produces four result values |
| GatherType *outVals = new GatherType[valueSize*4]; |
| memset(outVals, 0xd, valueSizeInBytes*4); // 0xd to give a sentinal value for failures |
| CreateTestUavs(pDevice, pCommandList, outVals, valueSizeInBytes*4, &pOutputResource, |
| &pOutputUploadResource, &pOutputReadBuffer); |
| delete[] outVals; |
| |
| // Close the command list and execute it to perform the resource uploads |
| pCommandList->Close(); |
| ID3D12CommandList *ppCommandLists[] = { pCommandList }; |
| pCommandQueue->ExecuteCommandLists(1, ppCommandLists); |
| WaitForSignal(pCommandQueue, FO); |
| |
| // Create shaders |
| #ifdef RAWGATHER_FALLBACK |
| const wchar_t *target = L"cs_6_2"; |
| #else |
| const wchar_t *target = L"cs_6_7"; |
| #endif |
| |
| LPCWSTR opts[] = {L"-enable-16bit-types"}; |
| |
| CComPtr<ID3D12PipelineState> pPSO; |
| CreateComputePSO(pDevice, pRootSignature, pShader, target, &pPSO, opts, _countof(opts)); |
| |
| // Reset commandlist to shader PSO |
| VERIFY_SUCCEEDED(pCommandList->Reset(pCommandAllocator, pPSO)); |
| |
| // Describe and create a resource descriptor heap. |
| CComPtr<ID3D12DescriptorHeap> pResHeap; |
| CComPtr<ID3D12DescriptorHeap> pSampHeap; |
| D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; |
| heapDesc.NumDescriptors = 2; |
| heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; |
| heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; |
| VERIFY_SUCCEEDED(pDevice->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&pResHeap))); |
| |
| // Describe and create a sampler descriptor heap. |
| heapDesc.NumDescriptors = 1; |
| heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; |
| VERIFY_SUCCEEDED(pDevice->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&pSampHeap))); |
| |
| CD3DX12_CPU_DESCRIPTOR_HANDLE cpuHandle(pResHeap->GetCPUDescriptorHandleForHeapStart()); |
| CreateTex2DSRV(pDevice, cpuHandle, viewFormat, pTexResource); |
| CreateStructUAV(pDevice, cpuHandle, 4*valueSize, sizeof(GatherType), pOutputResource); |
| |
| D3D12_FILTER filters[] = {D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT, |
| D3D12_FILTER_COMPARISON_MIN_MAG_LINEAR_MIP_POINT}; |
| CreateDefaultSamplers(pDevice, pSampHeap->GetCPUDescriptorHandleForHeapStart(), |
| filters, nullptr /*perSampleBorderColors*/, 1); |
| |
| // Set Heaps, Rootsignature and table |
| ID3D12DescriptorHeap *const pHeaps[2] = { pResHeap, pSampHeap }; |
| pCommandList->SetDescriptorHeaps(2, pHeaps); |
| pCommandList->SetComputeRootSignature(pRootSignature); |
| pCommandList->SetComputeRootDescriptorTable(0, pResHeap->GetGPUDescriptorHandleForHeapStart()); |
| pCommandList->SetComputeRootDescriptorTable(1, pSampHeap->GetGPUDescriptorHandleForHeapStart()); |
| |
| // dispatch and close shader |
| pCommandList->Dispatch(1, 1, 1); |
| |
| // Copy the results back to readable memory |
| CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(pOutputResource, |
| D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); |
| pCommandList->ResourceBarrier(1, &barrier); |
| pCommandList->CopyResource(pOutputReadBuffer, pOutputResource); |
| |
| pCommandList->Close(); |
| |
| pCommandQueue->ExecuteCommandLists(1, ppCommandLists); |
| WaitForSignal(pCommandQueue, FO); |
| |
| MappedData mappedData(pOutputReadBuffer, 4*valueSizeInBytes); |
| GatherType *pData = (GatherType*)mappedData.data(); |
| GatherType *texVals = (GatherType*)rawTex->GetElements(); |
| UINT yCt = yDim; |
| UINT xCt = xDim; |
| #ifdef RAWGATHER_FALLBACK |
| // 64-bit fallback uses Load, which doesn't support clamp addressing. so don't test it |
| if (sizeof(GatherType) == 8) { |
| yCt--; |
| xCt--; |
| } |
| #endif |
| for (UINT y = 0; y < yCt; y++) { |
| UINT yp1 = y+1>=yDim?y:y+1; |
| for (UINT x = 0; x < xCt; x++) { |
| UINT xp1 = x+1>=xDim?x:x+1; |
| // Because this order may be unexpected, I'll quote the spec: |
| // "The four samples that would contribute to filtering are placed into xyzw |
| // in counter clockwise order starting with the sample to the lower left" |
| VERIFY_ARE_EQUAL(pData[4*(32*y + x)+0], texVals[yp1*xDim + x]); |
| VERIFY_ARE_EQUAL(pData[4*(32*y + x)+1], texVals[yp1*xDim + xp1]); |
| VERIFY_ARE_EQUAL(pData[4*(32*y + x)+2], texVals[y*xDim + xp1]); |
| VERIFY_ARE_EQUAL(pData[4*(32*y + x)+3], texVals[y*xDim + x]); |
| } |
| } |
| } |
| |
| // Create textures of various types and alias them to the unsigned integer format |
| // that has the same element size and initializes them with various values, |
| // The shader code copies the results of raw gather to an unsigned integer UAV |
| // The UAV contents are compared to the values assigned to the texture |
| // A few levels of support are available: |
| // pre-6.7 fallback - fakey hand waving to make it look like it's doing the right thing |
| // 6.7 support only - No casting ability of resources to views beyond native support, but GatherRaw is available |
| // 6.7 + Enh. Barriers - Same formats can be cast as in native, but use new createcommittedresource3() |
| // 6.7 + Enh. Barriers + Relaxed Cast - All format casting and raw gathering of all |
| TEST_F(ExecutionTest, ATORawGather) { |
| |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| |
| #ifdef RAWGATHER_FALLBACK |
| D3D_SHADER_MODEL sm = D3D_SHADER_MODEL_6_6; |
| #else |
| D3D_SHADER_MODEL sm = D3D_SHADER_MODEL_6_7; |
| #endif |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, sm)) |
| return; |
| |
| #ifndef RAWGATHER_FALLBACK |
| if (!DoesDeviceSupportAdvancedTexOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support Advanced Texture Operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| #endif |
| |
| static const int NumThreadsX = 32; |
| static const int NumThreadsY = 32; |
| static const int ThreadsPerGroup = NumThreadsX * NumThreadsY; |
| |
| // Create an array of texture variants with the raw texture base class |
| // Then plug them into DoRawGather to perform the test and evaluate the results for each |
| RawIntTexture<IntRG<32, 32>, NumThreadsX, NumThreadsY> R32G32_TYPELESS(false, false, NumThreadsX, DXGI_FORMAT_R32G32_TYPELESS); |
| RawIntTexture<IntRG<32, 32>, NumThreadsX, NumThreadsY> R32G32_UINT(false, false, NumThreadsX, DXGI_FORMAT_R32G32_UINT); |
| RawIntTexture<IntRG<32, 32>, NumThreadsX, NumThreadsY> R32G32_SINT(true, false, NumThreadsX, DXGI_FORMAT_R32G32_SINT); |
| |
| RawIntTexture<IntRGBA<16, 16, 16, 16>, NumThreadsX, NumThreadsY> R16G16B16A16_TYPELESS(false, false, NumThreadsX, DXGI_FORMAT_R16G16B16A16_TYPELESS); |
| RawIntTexture<IntRGBA<16, 16, 16, 16>, NumThreadsX, NumThreadsY> R16G16B16A16_UINT(false, false, NumThreadsX, DXGI_FORMAT_R16G16B16A16_UINT); |
| RawIntTexture<IntRGBA<16, 16, 16, 16>, NumThreadsX, NumThreadsY> R16G16B16A16_SINT(true, false, NumThreadsX, DXGI_FORMAT_R16G16B16A16_SINT); |
| RawIntTexture<IntRGBA<16, 16, 16, 16>, NumThreadsX, NumThreadsY> R16G16B16A16_UNORM(false, true, NumThreadsX, DXGI_FORMAT_R16G16B16A16_UNORM); |
| RawIntTexture<IntRGBA<16, 16, 16, 16>, NumThreadsX, NumThreadsY> R16G16B16A16_SNORM(true, true, NumThreadsX, DXGI_FORMAT_R16G16B16A16_SNORM); |
| RawFloatTexture<Float16RGBA, NumThreadsX, NumThreadsY> R16G16B16A16_FLOAT(DXGI_FORMAT_R16G16B16A16_FLOAT); |
| RawFloatTexture<Float32RG, NumThreadsX, NumThreadsY> R32G32_FLOAT(DXGI_FORMAT_R32G32_FLOAT); |
| |
| RawGatherTexture *Int64Textures[] = { |
| &R32G32_TYPELESS, |
| &R32G32_UINT, |
| &R32G32_SINT, |
| &R16G16B16A16_TYPELESS, |
| &R16G16B16A16_UINT, |
| &R16G16B16A16_SINT, |
| &R16G16B16A16_UNORM, |
| &R16G16B16A16_SNORM, |
| &R16G16B16A16_FLOAT, |
| &R32G32_FLOAT}; |
| |
| RawIntTexture<IntR<32>, NumThreadsX, NumThreadsY> R32_TYPELESS(false, false, NumThreadsX, DXGI_FORMAT_R32_TYPELESS); |
| RawIntTexture<IntR<32>, NumThreadsX, NumThreadsY> R32_SINT(true, false, NumThreadsX, DXGI_FORMAT_R32_SINT); |
| RawIntTexture<IntR<32>, NumThreadsX, NumThreadsY> R32_UINT(true, false, NumThreadsX, DXGI_FORMAT_R32_UINT); |
| |
| RawIntTexture<IntRGBA<10, 10, 10, 2>, NumThreadsX, NumThreadsY> R10G10B10A2_TYPELESS(false, false, NumThreadsX, DXGI_FORMAT_R10G10B10A2_TYPELESS); |
| RawIntTexture<IntRGBA<10, 10, 10, 2>, NumThreadsX, NumThreadsY> R10G10B10A2_UNORM(false, true, NumThreadsX, DXGI_FORMAT_R10G10B10A2_UNORM); |
| RawIntTexture<IntRGBA<10, 10, 10, 2>, NumThreadsX, NumThreadsY> R10G10B10A2_UINT(false, false, NumThreadsX, DXGI_FORMAT_R10G10B10A2_UINT); |
| RawR10G10B10XRA2Texture<NumThreadsX, NumThreadsY> R10G10B10A2_XR_BIAS_A2_UNORM(NumThreadsX, DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM); |
| RawIntTexture<FloatRGBE, NumThreadsX, NumThreadsY> R9G9B9E5_SHAREDEXP(false, false, NumThreadsX, DXGI_FORMAT_R9G9B9E5_SHAREDEXP); |
| |
| RawIntTexture<IntRGBA<8, 8, 8, 8>, NumThreadsX, NumThreadsY> R8G8B8A8_TYPELESS(false, false, NumThreadsX, DXGI_FORMAT_R8G8B8A8_TYPELESS); |
| RawIntTexture<IntRGBA<8, 8, 8, 8>, NumThreadsX, NumThreadsY> R8G8B8A8_UNORM(false, true, NumThreadsX, DXGI_FORMAT_R8G8B8A8_UNORM); |
| RawIntTexture<IntRGBA<8, 8, 8, 8>, NumThreadsX, NumThreadsY> R8G8B8A8_UNORM_SRGB(false, true, NumThreadsX, DXGI_FORMAT_R8G8B8A8_UNORM); |
| RawIntTexture<IntRGBA<8, 8, 8, 8>, NumThreadsX, NumThreadsY> R8G8B8A8_UINT(false, false, NumThreadsX, DXGI_FORMAT_R8G8B8A8_UINT); |
| RawIntTexture<IntRGBA<8, 8, 8, 8>, NumThreadsX, NumThreadsY> R8G8B8A8_SNORM(true, true, NumThreadsX, DXGI_FORMAT_R8G8B8A8_SNORM); |
| RawIntTexture<IntRGBA<8, 8, 8, 8>, NumThreadsX, NumThreadsY> R8G8B8A8_SINT(true, false, NumThreadsX, DXGI_FORMAT_R8G8B8A8_SINT); |
| |
| RawIntTexture<IntRG<16, 16>, NumThreadsX, NumThreadsY> R16G16_TYPELESS(false, false, NumThreadsX, DXGI_FORMAT_R16G16_TYPELESS); |
| RawIntTexture<IntRG<16, 16>, NumThreadsX, NumThreadsY> R16G16_UNORM(false, true, NumThreadsX, DXGI_FORMAT_R16G16_UNORM); |
| RawIntTexture<IntRG<16, 16>, NumThreadsX, NumThreadsY> R16G16_UINT(false, false, NumThreadsX, DXGI_FORMAT_R16G16_UINT); |
| RawIntTexture<IntRG<16, 16>, NumThreadsX, NumThreadsY> R16G16_SNORM(true, true, NumThreadsX, DXGI_FORMAT_R16G16_SNORM); |
| RawIntTexture<IntRG<16, 16>, NumThreadsX, NumThreadsY> R16G16_SINT(true, false, NumThreadsX, DXGI_FORMAT_R16G16_SINT); |
| |
| RawIntTexture<IntRGBA<8, 8, 8, 8>, NumThreadsX, NumThreadsY> B8G8R8A8_TYPELESS(false, false, NumThreadsX, DXGI_FORMAT_B8G8R8A8_TYPELESS); |
| RawIntTexture<IntRGBA<8, 8, 8, 8>, NumThreadsX, NumThreadsY> B8G8R8A8_UNORM(false, true, NumThreadsX, DXGI_FORMAT_B8G8R8A8_UNORM); |
| RawIntTexture<IntRGBA<8, 8, 8, 8>, NumThreadsX, NumThreadsY> B8G8R8A8_UNORM_SRGB(false, true, NumThreadsX, DXGI_FORMAT_B8G8R8A8_UNORM_SRGB); |
| |
| RawIntTexture<IntRGBA<8, 8, 8, 8>, NumThreadsX, NumThreadsY> B8G8R8X8_TYPELESS(false, false, NumThreadsX, DXGI_FORMAT_B8G8R8X8_TYPELESS); |
| RawIntTexture<IntRGBA<8, 8, 8, 8>, NumThreadsX, NumThreadsY> B8G8R8X8_UNORM(false, true, NumThreadsX, DXGI_FORMAT_B8G8R8X8_UNORM); |
| RawIntTexture<IntRGBA<8, 8, 8, 8>, NumThreadsX, NumThreadsY> B8G8R8X8_UNORM_SRGB(false, true, NumThreadsX, DXGI_FORMAT_B8G8R8X8_UNORM_SRGB); |
| |
| RawFloatTexture<Float32R, NumThreadsX, NumThreadsY> R32_FLOAT(DXGI_FORMAT_R32_FLOAT); |
| RawFloatR11G11B10ATexture<NumThreadsX, NumThreadsY> R11G11B10_FLOAT; |
| RawFloatTexture<Float16RG, NumThreadsX, NumThreadsY> R16G16_FLOAT(DXGI_FORMAT_R16G16_FLOAT); |
| |
| RawGatherTexture *Int32Textures[] = { |
| &R32_TYPELESS, |
| &R32_UINT, |
| &R32_SINT, |
| &R10G10B10A2_TYPELESS, |
| &R10G10B10A2_UNORM, |
| &R10G10B10A2_UINT, |
| &R10G10B10A2_XR_BIAS_A2_UNORM, |
| &R9G9B9E5_SHAREDEXP, |
| &R8G8B8A8_TYPELESS, |
| &R8G8B8A8_UNORM, |
| &R8G8B8A8_UNORM_SRGB, |
| &R8G8B8A8_UINT, |
| &R8G8B8A8_SNORM, |
| &R8G8B8A8_SINT, |
| &R16G16_TYPELESS, |
| &R16G16_UNORM, |
| &R16G16_UINT, |
| &R16G16_SNORM, |
| &R16G16_SINT, |
| &B8G8R8A8_TYPELESS, |
| &B8G8R8A8_UNORM, |
| &B8G8R8A8_UNORM_SRGB, |
| &B8G8R8X8_TYPELESS, |
| &B8G8R8X8_UNORM, |
| &B8G8R8X8_UNORM_SRGB, |
| &R32_FLOAT, |
| &R11G11B10_FLOAT, |
| &R16G16_FLOAT}; |
| |
| RawIntTexture<IntR<16>, NumThreadsX, NumThreadsY> R16_TYPELESS(false, false, NumThreadsX, DXGI_FORMAT_R16_TYPELESS); |
| RawIntTexture<IntR<16>, NumThreadsX, NumThreadsY> R16_SINT(true, false, NumThreadsX, DXGI_FORMAT_R16_SINT); |
| RawIntTexture<IntR<16>, NumThreadsX, NumThreadsY> R16_UINT(true, false, NumThreadsX, DXGI_FORMAT_R16_UINT); |
| RawIntTexture<IntR<16>, NumThreadsX, NumThreadsY> R16_UNORM(false, true, NumThreadsX, DXGI_FORMAT_R16_UNORM); |
| RawIntTexture<IntR<16>, NumThreadsX, NumThreadsY> R16_SNORM(true, true, NumThreadsX, DXGI_FORMAT_R16_SNORM); |
| RawFloatTexture<Float16R, NumThreadsX, NumThreadsY> R16_FLOAT(DXGI_FORMAT_R16_FLOAT); |
| |
| RawIntTexture<IntRG<8, 8>, NumThreadsX, NumThreadsY> R8G8_TYPELESS(false, false, NumThreadsX, DXGI_FORMAT_R8G8_TYPELESS); |
| RawIntTexture<IntRG<8, 8>, NumThreadsX, NumThreadsY> R8G8_UINT(false, false, NumThreadsX, DXGI_FORMAT_R8G8_UINT); |
| RawIntTexture<IntRG<8, 8>, NumThreadsX, NumThreadsY> R8G8_SINT(true, false, NumThreadsX, DXGI_FORMAT_R8G8_SINT); |
| RawIntTexture<IntRG<8, 8>, NumThreadsX, NumThreadsY> R8G8_UNORM(false, true, NumThreadsX, DXGI_FORMAT_R8G8_UNORM); |
| RawIntTexture<IntRG<8, 8>, NumThreadsX, NumThreadsY> R8G8_SNORM(true, true, NumThreadsX, DXGI_FORMAT_R8G8_SNORM); |
| RawIntTexture<IntRGB<5, 6, 5>, NumThreadsX, NumThreadsY> B5G6R5_UNORM(false, true, NumThreadsX, DXGI_FORMAT_B5G6R5_UNORM); |
| RawIntTexture<IntRGBA<5, 5, 5, 1>, NumThreadsX, NumThreadsY> B5G5R5A1_UNORM(false, true, NumThreadsX, DXGI_FORMAT_B5G5R5A1_UNORM); |
| RawIntTexture<IntRGBA<4, 4, 4, 4>, NumThreadsX, NumThreadsY> B4G4R4A4_UNORM(false, true, NumThreadsX, DXGI_FORMAT_B4G4R4A4_UNORM); |
| |
| RawGatherTexture *Int16Textures[] = { |
| &R16_TYPELESS, |
| &R16_UINT, |
| &R16_SINT, |
| &R16_UNORM, |
| &R16_SNORM, |
| &R8G8_TYPELESS, |
| &R8G8_UINT, |
| &R8G8_SINT, |
| &R8G8_UNORM, |
| &R8G8_SNORM, |
| &B5G6R5_UNORM, |
| &B5G5R5A1_UNORM, |
| &B4G4R4A4_UNORM, |
| &R16_FLOAT}; |
| |
| bool canCast = DoesDeviceSupportRelaxedFormatCasting(pDevice); |
| int int32Ct = canCast? _countof(Int32Textures) : 3; // The first three are already castable to UINT32 |
| |
| for (int i = 0; i < int32Ct; i++) { |
| DoRawGatherTest<uint32_t>(pDevice, Int32Textures[i], DXGI_FORMAT_R32_UINT); |
| } |
| |
| if (DoesDeviceSupportNative16bitOps(pDevice)) { |
| int int16Ct = canCast? _countof(Int16Textures) : 5; // The first five are already castable to UINT16 |
| for (int i = 0; i < int16Ct; i++) { |
| DoRawGatherTest<uint16_t>(pDevice, Int16Textures[i], DXGI_FORMAT_R16_UINT); |
| } |
| } |
| if (DoesDeviceSupportInt64(pDevice)) { |
| int int64Ct = canCast? _countof(Int64Textures) : 3; // The first three are already castable to UINT64 |
| for (int i = 0; i < int64Ct; i++) { |
| DoRawGatherTest<uint64_t>(pDevice, Int64Textures[i], DXGI_FORMAT_R32G32_UINT); |
| } |
| } |
| } |
| |
| // Executing a simple binop to verify shadel model 6.1 support; runs with |
| // ShaderModel61.CoreRequirement |
| TEST_F(ExecutionTest, BasicShaderModel61) { |
| RunBasicShaderModelTest(D3D_SHADER_MODEL_6_1); |
| } |
| |
| // Executing a simple binop to verify shadel model 6.3 support; runs with |
| // ShaderModel63.CoreRequirement |
| TEST_F(ExecutionTest, BasicShaderModel63) { |
| RunBasicShaderModelTest(D3D_SHADER_MODEL_6_3); |
| } |
| |
| void ExecutionTest::RunBasicShaderModelTest(D3D_SHADER_MODEL shaderModel) { |
| |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, shaderModel)) { |
| return; |
| } |
| |
| char *pShaderModelStr; |
| if (shaderModel == D3D_SHADER_MODEL_6_1) { |
| pShaderModelStr = "cs_6_1"; |
| } else if (shaderModel == D3D_SHADER_MODEL_6_3) { |
| pShaderModelStr = "cs_6_3"; |
| } else { |
| DXASSERT_NOMSG("Invalid Shader Model Parameter"); |
| pShaderModelStr = nullptr; |
| } |
| |
| const char shaderTemplate[] = |
| "struct SBinaryOp { %s input1; %s input2; %s output; };" |
| "RWStructuredBuffer<SBinaryOp> g_buf : register(u0);" |
| "[numthreads(8,8,1)]" |
| "void main(uint GI : SV_GroupIndex) {" |
| " SBinaryOp l = g_buf[GI];" |
| " l.output = l.input1 + l.input2;" |
| " g_buf[GI] = l;" |
| "}"; |
| char shader[sizeof(shaderTemplate) + 50]; |
| |
| // Run simple shader with float data types |
| char* sTy = "float"; |
| float inputFloatPairs[] = { 1.5f, -2.8f, 3.23e-5f, 6.0f, 181.621f, 14.978f }; |
| VERIFY_IS_TRUE(sprintf(shader, shaderTemplate, sTy, sTy, sTy) > 0); |
| WEX::Logging::Log::Comment(L"BasicShaderModel float"); |
| RunBasicShaderModelTest<float>(pDevice, pShaderModelStr, shader, inputFloatPairs, sizeof(inputFloatPairs) / (2 * sizeof(float))); |
| |
| // Run simple shader with double data types |
| if (DoesDeviceSupportDouble(pDevice)) { |
| sTy = "double"; |
| double inputDoublePairs[] = { 1.5891020, -2.8, 3.23e-5, 1 / 3, 181.91621, 14.654978 }; |
| VERIFY_IS_TRUE(sprintf(shader, shaderTemplate, sTy, sTy, sTy) > 0); |
| WEX::Logging::Log::Comment(L"BasicShaderModel double"); |
| RunBasicShaderModelTest<double>(pDevice, pShaderModelStr, shader, inputDoublePairs, sizeof(inputDoublePairs) / (2 * sizeof(double))); |
| } |
| else { |
| // Optional feature, so it's correct to not support it if declared as such. |
| WEX::Logging::Log::Comment(L"Device does not support double operations."); |
| } |
| |
| // Run simple shader with int64 types |
| if (DoesDeviceSupportInt64(pDevice)) { |
| sTy = "int64_t"; |
| int64_t inputInt64Pairs[] = { 1, -100, 6814684, -9814810, 654, 1021248900 }; |
| VERIFY_IS_TRUE(sprintf(shader, shaderTemplate, sTy, sTy, sTy) > 0); |
| WEX::Logging::Log::Comment(L"BasicShaderModel int64_t"); |
| RunBasicShaderModelTest<int64_t>(pDevice, pShaderModelStr, shader, inputInt64Pairs, sizeof(inputInt64Pairs) / (2 * sizeof(int64_t))); |
| } |
| else { |
| // Optional feature, so it's correct to not support it if declared as such. |
| WEX::Logging::Log::Comment(L"Device does not support int64 operations."); |
| } |
| } |
| |
| template <class Ty> |
| const wchar_t* ExecutionTest::BasicShaderModelTest_GetFormatString() { |
| DXASSERT_NOMSG("Unsupported type"); |
| return ""; |
| } |
| |
| template <> |
| const wchar_t* ExecutionTest::BasicShaderModelTest_GetFormatString<float>() { |
| return L"element #%u: input1 = %6.8f, input1 = %6.8f, output = %6.8f, expected = %6.8f"; |
| } |
| |
| template <> |
| const wchar_t* ExecutionTest::BasicShaderModelTest_GetFormatString<double>() { |
| return BasicShaderModelTest_GetFormatString<float>(); |
| } |
| |
| template <> |
| const wchar_t* ExecutionTest::BasicShaderModelTest_GetFormatString<int64_t>() { |
| return L"element #%u: input1 = %ld, input1 = %ld, output = %ld, expected = %ld"; |
| } |
| |
| template <class Ty> |
| void ExecutionTest::RunBasicShaderModelTest(CComPtr<ID3D12Device> pDevice, const char *pShaderModelStr, const char *pShader, |
| Ty *pInputDataPairs, unsigned inputDataCount) { |
| struct SBinaryOp { |
| Ty input1; |
| Ty input2; |
| Ty output; |
| }; |
| |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "BinaryFPOp", |
| // this callbacked is called when the test is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| UNREFERENCED_PARAMETER(Name); |
| pShaderOp->Shaders.at(0).Target = pShaderModelStr; |
| pShaderOp->Shaders.at(0).Text = pShader; |
| size_t size = sizeof(SBinaryOp) * inputDataCount; |
| Data.resize(size); |
| SBinaryOp *pPrimitives = (SBinaryOp*)Data.data(); |
| Ty *pIn = pInputDataPairs; |
| for (size_t i = 0; i < inputDataCount; i++, pIn += 2) { |
| SBinaryOp *p = &pPrimitives[i]; |
| p->input1 = pIn[0]; |
| p->input2 = pIn[1]; |
| } |
| }); |
| |
| VERIFY_SUCCEEDED(S_OK); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SBinaryFPOp", &data); |
| SBinaryOp *pPrimitives = (SBinaryOp*)data.data(); |
| |
| const wchar_t* formatStr = BasicShaderModelTest_GetFormatString<Ty>(); |
| Ty *pIn = pInputDataPairs; |
| |
| for (unsigned i = 0; i < inputDataCount; i++, pIn += 2) { |
| Ty expValue = pIn[0] + pIn[1]; |
| SBinaryOp *p = &pPrimitives[i]; |
| |
| LogCommentFmt(formatStr, i, pIn[0], pIn[1], p->output, expValue); |
| VERIFY_ARE_EQUAL(p->output, expValue); |
| } |
| } |
| |
| |
| // Resource structure for data-driven tests. |
| |
| struct SUnaryFPOp { |
| float input; |
| float output; |
| }; |
| |
| struct SBinaryFPOp { |
| float input1; |
| float input2; |
| float output1; |
| float output2; |
| }; |
| |
| struct STertiaryFPOp { |
| float input1; |
| float input2; |
| float input3; |
| float output; |
| }; |
| |
| struct SUnaryHalfOp { |
| uint16_t input; |
| uint16_t output; |
| }; |
| |
| struct SBinaryHalfOp { |
| uint16_t input1; |
| uint16_t input2; |
| uint16_t output1; |
| uint16_t output2; |
| }; |
| |
| struct STertiaryHalfOp { |
| uint16_t input1; |
| uint16_t input2; |
| uint16_t input3; |
| uint16_t output; |
| }; |
| |
| struct SUnaryIntOp { |
| int input; |
| int output; |
| }; |
| |
| struct SUnaryUintOp { |
| unsigned int input; |
| unsigned int output; |
| }; |
| |
| struct SBinaryIntOp { |
| int input1; |
| int input2; |
| int output1; |
| int output2; |
| }; |
| |
| struct STertiaryIntOp { |
| int input1; |
| int input2; |
| int input3; |
| int output; |
| }; |
| |
| struct SBinaryUintOp { |
| unsigned int input1; |
| unsigned int input2; |
| unsigned int output1; |
| unsigned int output2; |
| }; |
| |
| struct STertiaryUintOp { |
| unsigned int input1; |
| unsigned int input2; |
| unsigned int input3; |
| unsigned int output; |
| }; |
| |
| struct SUnaryInt16Op { |
| short input; |
| short output; |
| }; |
| |
| struct SUnaryUint16Op { |
| unsigned short input; |
| unsigned short output; |
| }; |
| |
| struct SBinaryInt16Op { |
| short input1; |
| short input2; |
| short output1; |
| short output2; |
| }; |
| |
| struct STertiaryInt16Op { |
| short input1; |
| short input2; |
| short input3; |
| short output; |
| }; |
| |
| struct SBinaryUint16Op { |
| unsigned short input1; |
| unsigned short input2; |
| unsigned short output1; |
| unsigned short output2; |
| }; |
| |
| struct STertiaryUint16Op { |
| unsigned short input1; |
| unsigned short input2; |
| unsigned short input3; |
| unsigned short output; |
| }; |
| // representation for HLSL float vectors |
| struct SDotOp { |
| XMFLOAT4 input1; |
| XMFLOAT4 input2; |
| float o_dot2; |
| float o_dot3; |
| float o_dot4; |
| }; |
| |
| struct Half2 |
| { |
| uint16_t x; |
| uint16_t y; |
| |
| Half2() = default; |
| |
| Half2(const Half2&) = default; |
| Half2& operator=(const Half2&) = default; |
| |
| Half2(Half2&&) = default; |
| Half2& operator=(Half2&&) = default; |
| |
| constexpr Half2(uint16_t _x, uint16_t _y) : x(_x), y(_y) {} |
| explicit Half2(_In_reads_(2) const uint16_t *pArray) : x(pArray[0]), y(pArray[1]) {} |
| }; |
| |
| struct SDot2AddHalfOp { |
| Half2 input1; |
| Half2 input2; |
| float acc; |
| float result; |
| }; |
| |
| struct SDot4AddI8PackedOp { |
| uint32_t input1; |
| uint32_t input2; |
| int32_t acc; |
| int32_t result; |
| }; |
| |
| struct SDot4AddU8PackedOp { |
| uint32_t input1; |
| uint32_t input2; |
| uint32_t acc; |
| uint32_t result; |
| }; |
| |
| struct SMsad4 { |
| unsigned int ref; |
| XMUINT2 src; |
| XMUINT4 accum; |
| XMUINT4 result; |
| }; |
| |
| struct SPackUnpackOpOutPacked |
| { |
| uint32_t packedUint32; |
| uint32_t packedInt32; |
| uint32_t packedUint16; |
| uint32_t packedInt16; |
| |
| uint32_t packedClampedUint32; |
| uint32_t packedClampedInt32; |
| uint32_t packedClampedUint16; |
| uint32_t packedClampedInt16; |
| }; |
| |
| struct SPackUnpackOpOutUnpacked { |
| std::array<uint32_t, 4> outputUint32; |
| std::array<int32_t, 4> outputInt32; |
| std::array<uint16_t, 4> outputUint16; |
| std::array<int16_t, 4> outputInt16; |
| |
| std::array<uint32_t, 4> outputClampedUint32; |
| std::array<int32_t, 4> outputClampedInt32; |
| std::array<uint16_t, 4> outputClampedUint16; |
| std::array<int16_t, 4> outputClampedInt16; |
| }; |
| |
| |
| // Parameter representation for taef data-driven tests |
| struct TableParameter { |
| LPCWSTR m_name; |
| enum TableParameterType { |
| INT8, |
| INT16, |
| INT32, |
| UINT, |
| FLOAT, |
| HALF, |
| DOUBLE, |
| STRING, |
| BOOL, |
| INT8_TABLE, |
| INT16_TABLE, |
| INT32_TABLE, |
| FLOAT_TABLE, |
| HALF_TABLE, |
| DOUBLE_TABLE, |
| STRING_TABLE, |
| UINT8_TABLE, |
| UINT16_TABLE, |
| UINT32_TABLE, |
| BOOL_TABLE |
| }; |
| TableParameterType m_type; |
| bool m_required; // required parameter |
| int8_t m_int8; |
| int16_t m_int16; |
| int m_int32; |
| unsigned int m_uint; |
| float m_float; |
| uint16_t m_half; // no such thing as half type in c++. Use int16 instead |
| double m_double; |
| bool m_bool; |
| WEX::Common::String m_str; |
| std::vector<int8_t> m_int8Table; |
| std::vector<int16_t> m_int16Table; |
| std::vector<int> m_int32Table; |
| std::vector<uint8_t> m_uint8Table; |
| std::vector<uint16_t> m_uint16Table; |
| std::vector<unsigned int> m_uint32Table; |
| std::vector<float> m_floatTable; |
| std::vector<uint16_t> m_halfTable; // no such thing as half type in c++ |
| std::vector<double> m_doubleTable; |
| std::vector<bool> m_boolTable; |
| std::vector<WEX::Common::String> m_StringTable; |
| }; |
| |
| class TableParameterHandler { |
| private: |
| HRESULT ParseTableRow(); |
| public: |
| TableParameter* m_table; |
| size_t m_tableSize; |
| TableParameterHandler(TableParameter *pTable, size_t size) : m_table(pTable), m_tableSize(size) { |
| clearTableParameter(); |
| VERIFY_SUCCEEDED(ParseTableRow()); |
| } |
| |
| TableParameter* GetTableParamByName(LPCWSTR name) { |
| for (size_t i = 0; i < m_tableSize; ++i) { |
| if (_wcsicmp(name, m_table[i].m_name) == 0) { |
| return &m_table[i]; |
| } |
| } |
| DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); |
| return nullptr; |
| } |
| |
| void clearTableParameter() { |
| for (size_t i = 0; i < m_tableSize; ++i) { |
| m_table[i].m_int32 = 0; |
| m_table[i].m_uint = 0; |
| m_table[i].m_double = 0; |
| m_table[i].m_bool = false; |
| m_table[i].m_str = WEX::Common::String(); |
| } |
| } |
| |
| template <class T1> |
| std::vector<T1> *GetDataArray(LPCWSTR name) { |
| return nullptr; |
| } |
| |
| template <> |
| std::vector<int> *GetDataArray(LPCWSTR name) { |
| for (size_t i = 0; i < m_tableSize; ++i) { |
| if (_wcsicmp(name, m_table[i].m_name) == 0) { |
| return &(m_table[i].m_int32Table); |
| } |
| } |
| DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); |
| return nullptr; |
| } |
| |
| template <> |
| std::vector<int8_t> *GetDataArray(LPCWSTR name) { |
| for (size_t i = 0; i < m_tableSize; ++i) { |
| if (_wcsicmp(name, m_table[i].m_name) == 0) { |
| return &(m_table[i].m_int8Table); |
| } |
| } |
| DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); |
| return nullptr; |
| } |
| |
| template <> |
| std::vector<int16_t> *GetDataArray(LPCWSTR name) { |
| for (size_t i = 0; i < m_tableSize; ++i) { |
| if (_wcsicmp(name, m_table[i].m_name) == 0) { |
| return &(m_table[i].m_int16Table); |
| } |
| } |
| DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); |
| return nullptr; |
| } |
| |
| template <> |
| std::vector<unsigned int> *GetDataArray(LPCWSTR name) { |
| for (size_t i = 0; i < m_tableSize; ++i) { |
| if (_wcsicmp(name, m_table[i].m_name) == 0) { |
| return &(m_table[i].m_uint32Table); |
| } |
| } |
| DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); |
| return nullptr; |
| } |
| |
| template <> |
| std::vector<float> *GetDataArray(LPCWSTR name) { |
| for (size_t i = 0; i < m_tableSize; ++i) { |
| if (_wcsicmp(name, m_table[i].m_name) == 0) { |
| return &(m_table[i].m_floatTable); |
| } |
| } |
| DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); |
| return nullptr; |
| } |
| |
| // TODO: uin16_t may be used to represent two different types when we introduce uint16 |
| template <> |
| std::vector<uint16_t> *GetDataArray(LPCWSTR name) { |
| for (size_t i = 0; i < m_tableSize; ++i) { |
| if (_wcsicmp(name, m_table[i].m_name) == 0) { |
| return &(m_table[i].m_halfTable); |
| } |
| } |
| DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); |
| return nullptr; |
| } |
| |
| template <> |
| std::vector<double> *GetDataArray(LPCWSTR name) { |
| for (size_t i = 0; i < m_tableSize; ++i) { |
| if (_wcsicmp(name, m_table[i].m_name) == 0) { |
| return &(m_table[i].m_doubleTable); |
| } |
| } |
| DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); |
| return nullptr; |
| } |
| |
| template <> |
| std::vector<bool> *GetDataArray(LPCWSTR name) { |
| for (size_t i = 0; i < m_tableSize; ++i) { |
| if (_wcsicmp(name, m_table[i].m_name) == 0) { |
| return &(m_table[i].m_boolTable); |
| } |
| } |
| DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); |
| return nullptr; |
| } |
| |
| }; |
| |
| static TableParameter UnaryFPOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::FLOAT_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::FLOAT_TABLE, true }, |
| { L"Validation.Type", TableParameter::STRING, true }, |
| { L"Validation.Tolerance", TableParameter::DOUBLE, true }, |
| { L"Warp.Version", TableParameter::UINT, false } |
| }; |
| |
| static TableParameter BinaryFPOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::FLOAT_TABLE, true }, |
| { L"Validation.Input2", TableParameter::FLOAT_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::FLOAT_TABLE, true }, |
| { L"Validation.Expected2", TableParameter::FLOAT_TABLE, false }, |
| { L"Validation.Type", TableParameter::STRING, true }, |
| { L"Validation.Tolerance", TableParameter::DOUBLE, true }, |
| }; |
| |
| static TableParameter TertiaryFPOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::FLOAT_TABLE, true }, |
| { L"Validation.Input2", TableParameter::FLOAT_TABLE, true }, |
| { L"Validation.Input3", TableParameter::FLOAT_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::FLOAT_TABLE, true }, |
| { L"Validation.Type", TableParameter::STRING, true }, |
| { L"Validation.Tolerance", TableParameter::DOUBLE, true }, |
| }; |
| |
| static TableParameter UnaryHalfOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"ShaderOp.Arguments", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::HALF_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::HALF_TABLE, true }, |
| { L"Validation.Type", TableParameter::STRING, true }, |
| { L"Validation.Tolerance", TableParameter::DOUBLE, true }, |
| { L"Warp.Version", TableParameter::UINT, false } |
| }; |
| |
| static TableParameter BinaryHalfOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"ShaderOp.Arguments", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::HALF_TABLE, true }, |
| { L"Validation.Input2", TableParameter::HALF_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::HALF_TABLE, true }, |
| { L"Validation.Expected2", TableParameter::HALF_TABLE, false }, |
| { L"Validation.Type", TableParameter::STRING, true }, |
| { L"Validation.Tolerance", TableParameter::DOUBLE, true }, |
| }; |
| |
| static TableParameter TertiaryHalfOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"ShaderOp.Arguments", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::HALF_TABLE, true }, |
| { L"Validation.Input2", TableParameter::HALF_TABLE, true }, |
| { L"Validation.Input3", TableParameter::HALF_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::HALF_TABLE, true }, |
| { L"Validation.Type", TableParameter::STRING, true }, |
| { L"Validation.Tolerance", TableParameter::DOUBLE, true }, |
| }; |
| |
| static TableParameter UnaryIntOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::INT32_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::INT32_TABLE, true }, |
| { L"Validation.Tolerance", TableParameter::INT32, true }, |
| }; |
| |
| static TableParameter UnaryUintOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.Tolerance", TableParameter::INT32, true }, |
| }; |
| |
| static TableParameter BinaryIntOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::INT32_TABLE, true }, |
| { L"Validation.Input2", TableParameter::INT32_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::INT32_TABLE, true }, |
| { L"Validation.Expected2", TableParameter::INT32_TABLE, false }, |
| { L"Validation.Tolerance", TableParameter::INT32, true }, |
| }; |
| |
| static TableParameter TertiaryIntOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::INT32_TABLE, true }, |
| { L"Validation.Input2", TableParameter::INT32_TABLE, true }, |
| { L"Validation.Input3", TableParameter::INT32_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::INT32_TABLE, true }, |
| { L"Validation.Tolerance", TableParameter::INT32, true }, |
| }; |
| |
| static TableParameter BinaryUintOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.Input2", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.Expected2", TableParameter::UINT32_TABLE, false }, |
| { L"Validation.Tolerance", TableParameter::INT32, true }, |
| }; |
| |
| static TableParameter TertiaryUintOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.Input2", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.Input3", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.Tolerance", TableParameter::INT32, true }, |
| }; |
| |
| static TableParameter UnaryInt16OpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"ShaderOp.Arguments", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::INT16_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::INT16_TABLE, true }, |
| { L"Validation.Tolerance", TableParameter::INT32, true }, |
| }; |
| |
| static TableParameter UnaryUint16OpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"ShaderOp.Arguments", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::UINT16_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::UINT16_TABLE, true }, |
| { L"Validation.Tolerance", TableParameter::INT32, true }, |
| }; |
| |
| static TableParameter BinaryInt16OpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"ShaderOp.Arguments", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::INT16_TABLE, true }, |
| { L"Validation.Input2", TableParameter::INT16_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::INT16_TABLE, true }, |
| { L"Validation.Expected2", TableParameter::INT16_TABLE, false }, |
| { L"Validation.Tolerance", TableParameter::INT32, true }, |
| }; |
| |
| static TableParameter TertiaryInt16OpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"ShaderOp.Arguments", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::INT16_TABLE, true }, |
| { L"Validation.Input2", TableParameter::INT16_TABLE, true }, |
| { L"Validation.Input3", TableParameter::INT16_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::INT16_TABLE, true }, |
| { L"Validation.Tolerance", TableParameter::INT32, true }, |
| }; |
| |
| static TableParameter BinaryUint16OpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"ShaderOp.Arguments", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::UINT16_TABLE, true }, |
| { L"Validation.Input2", TableParameter::UINT16_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::UINT16_TABLE, true }, |
| { L"Validation.Expected2", TableParameter::UINT16_TABLE, false }, |
| { L"Validation.Tolerance", TableParameter::INT32, true }, |
| }; |
| |
| static TableParameter TertiaryUint16OpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"ShaderOp.Arguments", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::UINT16_TABLE, true }, |
| { L"Validation.Input2", TableParameter::UINT16_TABLE, true }, |
| { L"Validation.Input3", TableParameter::UINT16_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::UINT16_TABLE, true }, |
| { L"Validation.Tolerance", TableParameter::INT32, true }, |
| }; |
| |
| static TableParameter DotOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Input2", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Expected2", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Expected3", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Type", TableParameter::STRING, true }, |
| { L"Validation.Tolerance", TableParameter::DOUBLE, true }, |
| }; |
| |
| static TableParameter Dot2AddHalfOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"ShaderOp.Arguments", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Input2", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Input3", TableParameter::FLOAT_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::FLOAT_TABLE, true }, |
| { L"Validation.Type", TableParameter::STRING, true }, |
| { L"Validation.Tolerance", TableParameter::DOUBLE, true }, |
| }; |
| |
| static TableParameter Dot4AddI8PackedOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.Input2", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.Input3", TableParameter::INT32_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::INT32_TABLE, true }, |
| }; |
| |
| static TableParameter Dot4AddU8PackedOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.Input2", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.Input3", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::UINT32_TABLE, true }, |
| }; |
| |
| static TableParameter Msad4OpParameters[] = { |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Tolerance", TableParameter::DOUBLE, true }, |
| { L"Validation.Input1", TableParameter::UINT32_TABLE, true}, |
| { L"Validation.Input2", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Input3", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::STRING_TABLE, true } |
| }; |
| |
| static TableParameter WaveIntrinsicsActiveIntParameters[] = { |
| { L"ShaderOp.Name", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.NumInputSet", TableParameter::UINT, true }, |
| { L"Validation.InputSet1", TableParameter::INT32_TABLE, true }, |
| { L"Validation.InputSet2", TableParameter::INT32_TABLE, false }, |
| { L"Validation.InputSet3", TableParameter::INT32_TABLE, false }, |
| { L"Validation.InputSet4", TableParameter::INT32_TABLE, false } |
| }; |
| |
| static TableParameter WaveIntrinsicsPrefixIntParameters[] = { |
| { L"ShaderOp.Name", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.NumInputSet", TableParameter::UINT, true }, |
| { L"Validation.InputSet1", TableParameter::INT32_TABLE, true }, |
| { L"Validation.InputSet2", TableParameter::INT32_TABLE, false }, |
| { L"Validation.InputSet3", TableParameter::INT32_TABLE, false }, |
| { L"Validation.InputSet4", TableParameter::INT32_TABLE, false } |
| }; |
| |
| static TableParameter WaveIntrinsicsActiveUintParameters[] = { |
| { L"ShaderOp.Name", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.NumInputSet", TableParameter::UINT, true }, |
| { L"Validation.InputSet1", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.InputSet2", TableParameter::UINT32_TABLE, false }, |
| { L"Validation.InputSet3", TableParameter::UINT32_TABLE, false }, |
| { L"Validation.InputSet4", TableParameter::UINT32_TABLE, false } |
| }; |
| |
| static TableParameter WaveIntrinsicsPrefixUintParameters[] = { |
| { L"ShaderOp.Name", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.NumInputSet", TableParameter::UINT, true }, |
| { L"Validation.InputSet1", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.InputSet2", TableParameter::UINT32_TABLE, false }, |
| { L"Validation.InputSet3", TableParameter::UINT32_TABLE, false }, |
| { L"Validation.InputSet4", TableParameter::UINT32_TABLE, false } |
| }; |
| |
| static TableParameter WaveIntrinsicsMultiPrefixIntParameters[] = { |
| { L"ShaderOp.Name", TableParameter::STRING, true }, |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Keys", TableParameter::INT32_TABLE, true }, |
| { L"Validation.Values", TableParameter::INT32_TABLE, true }, |
| }; |
| |
| static TableParameter WaveIntrinsicsMultiPrefixUintParameters[] = { |
| { L"ShaderOp.Name", TableParameter::STRING, true }, |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Keys", TableParameter::UINT32_TABLE, true }, |
| { L"Validation.Values", TableParameter::UINT32_TABLE, true }, |
| }; |
| |
| static TableParameter WaveIntrinsicsActiveBoolParameters[] = { |
| { L"ShaderOp.Name", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.NumInputSet", TableParameter::UINT, true }, |
| { L"Validation.InputSet1", TableParameter::BOOL_TABLE, true }, |
| { L"Validation.InputSet2", TableParameter::BOOL_TABLE, false }, |
| { L"Validation.InputSet3", TableParameter::BOOL_TABLE, false }, |
| }; |
| |
| static TableParameter CBufferTestHalfParameters[] = { |
| { L"Validation.InputSet", TableParameter::HALF_TABLE, true }, |
| }; |
| |
| static TableParameter DenormBinaryFPOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"ShaderOp.Arguments", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Input2", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Expected2", TableParameter::STRING_TABLE, false }, |
| { L"Validation.Type", TableParameter::STRING, true }, |
| { L"Validation.Tolerance", TableParameter::DOUBLE, true }, |
| }; |
| |
| static TableParameter DenormTertiaryFPOpParameters[] = { |
| { L"ShaderOp.Target", TableParameter::STRING, true }, |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"ShaderOp.Arguments", TableParameter::STRING, true }, |
| { L"Validation.Input1", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Input2", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Input3", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Expected1", TableParameter::STRING_TABLE, true }, |
| { L"Validation.Expected2", TableParameter::STRING_TABLE, false }, |
| { L"Validation.Type", TableParameter::STRING, true }, |
| { L"Validation.Tolerance", TableParameter::DOUBLE, true }, |
| }; |
| |
| static TableParameter PackUnpackOpParameters[] = { |
| { L"ShaderOp.Text", TableParameter::STRING, true }, |
| { L"Validation.Type", TableParameter::STRING, true }, |
| { L"Validation.Tolerance", TableParameter::UINT, true }, |
| { L"Validation.Input", TableParameter::UINT32_TABLE, true }, |
| }; |
| |
| static bool IsHexString(PCWSTR str, uint16_t *value) { |
| std::wstring wString(str); |
| wString.erase(std::remove(wString.begin(), wString.end(), L' '), wString.end()); |
| LPCWSTR wstr = wString.c_str(); |
| if (wcsncmp(wstr, L"0x", 2) == 0 || wcsncmp(wstr, L"0b", 2) == 0) { |
| *value = (uint16_t)wcstol(wstr, NULL, 0); |
| return true; |
| } |
| return false; |
| } |
| |
| static HRESULT ParseDataToFloat(PCWSTR str, float &value) { |
| std::wstring wString(str); |
| wString.erase(std::remove(wString.begin(), wString.end(), L' '), wString.end()); |
| PCWSTR wstr = wString.data(); |
| if (_wcsicmp(wstr, L"NaN") == 0) { |
| value = NAN; |
| } else if (_wcsicmp(wstr, L"-inf") == 0) { |
| value = -(INFINITY); |
| } else if (_wcsicmp(wstr, L"inf") == 0) { |
| value = INFINITY; |
| } else if (_wcsicmp(wstr, L"-denorm") == 0) { |
| value = -(FLT_MIN / 2); |
| } else if (_wcsicmp(wstr, L"denorm") == 0) { |
| value = FLT_MIN / 2; |
| } else if (_wcsicmp(wstr, L"-0.0f") == 0 || _wcsicmp(wstr, L"-0.0") == 0 || |
| _wcsicmp(wstr, L"-0") == 0) { |
| value = -0.0f; |
| } else if (_wcsicmp(wstr, L"0.0f") == 0 || _wcsicmp(wstr, L"0.0") == 0 || |
| _wcsicmp(wstr, L"0") == 0) { |
| value = 0.0f; |
| } else if (_wcsnicmp(wstr, L"0x", 2) == 0) { // For hex values, take values literally |
| unsigned temp_i = std::stoul(wstr, nullptr, 16); |
| value = (float&)temp_i; |
| } |
| else { |
| // evaluate the expression of wstring |
| double val = _wtof(wstr); |
| if (val == 0) { |
| LogErrorFmt(L"Failed to parse parameter %s to float", wstr); |
| return E_FAIL; |
| } |
| value = (float)val; |
| } |
| return S_OK; |
| } |
| |
| static HRESULT ParseDataToInt(PCWSTR str, int &value) { |
| std::wstring wString(str); |
| wString.erase(std::remove(wString.begin(), wString.end(), L' '), wString.end()); |
| PCWSTR wstr = wString.data(); |
| // evaluate the expression of string |
| if (_wcsicmp(wstr, L"0.0") == 0 || _wcsicmp(wstr, L"0") == 0) { |
| value = 0; |
| return S_OK; |
| } |
| int val = _wtoi(wstr); |
| if (val == 0) { |
| LogErrorFmt(L"Failed to parse parameter %s to int", wstr); |
| return E_FAIL; |
| } |
| value = val; |
| return S_OK; |
| } |
| |
| static HRESULT ParseDataToUint(PCWSTR str, unsigned int &value) { |
| std::wstring wString(str); |
| wString.erase(std::remove(wString.begin(), wString.end(), L' '), wString.end()); |
| PCWSTR wstr = wString.data(); |
| // evaluate the expression of string |
| if (_wcsicmp(wstr, L"0") == 0 || _wcsicmp(wstr, L"0x00000000") == 0) { |
| value = 0; |
| return S_OK; |
| } |
| wchar_t *end; |
| unsigned int val = std::wcstoul(wstr, &end, 0); |
| if (val == 0) { |
| LogErrorFmt(L"Failed to parse parameter %s to int", wstr); |
| return E_FAIL; |
| } |
| value = val; |
| return S_OK; |
| } |
| |
| static HRESULT ParseDataToVectorFloat(PCWSTR str, float *ptr, size_t count) { |
| std::wstring wstr(str); |
| size_t curPosition = 0; |
| // parse a string of dot product separated by commas |
| for (size_t i = 0; i < count; ++i) { |
| size_t nextPosition = wstr.find(L",", curPosition); |
| if (FAILED(ParseDataToFloat( |
| wstr.substr(curPosition, nextPosition - curPosition).data(), |
| *(ptr + i)))) { |
| return E_FAIL; |
| } |
| curPosition = nextPosition + 1; |
| } |
| return S_OK; |
| } |
| |
| static HRESULT ParseDataToVectorHalf(PCWSTR str, uint16_t *ptr, size_t count) { |
| std::wstring wstr(str); |
| size_t curPosition = 0; |
| // parse a string of dot product separated by commas |
| for (size_t i = 0; i < count; ++i) { |
| size_t nextPosition = wstr.find(L",", curPosition); |
| float floatValue; |
| if (FAILED(ParseDataToFloat( |
| wstr.substr(curPosition, nextPosition - curPosition).data(), floatValue))) { |
| return E_FAIL; |
| } |
| *(ptr + i) = ConvertFloat32ToFloat16(floatValue); |
| curPosition = nextPosition + 1; |
| } |
| return S_OK; |
| } |
| |
| static HRESULT ParseDataToVectorUint(PCWSTR str, unsigned int *ptr, size_t count) { |
| std::wstring wstr(str); |
| size_t curPosition = 0; |
| // parse a string of dot product separated by commas |
| for (size_t i = 0; i < count; ++i) { |
| size_t nextPosition = wstr.find(L",", curPosition); |
| if (FAILED(ParseDataToUint( |
| wstr.substr(curPosition, nextPosition - curPosition).data(), |
| *(ptr + i)))) { |
| return E_FAIL; |
| } |
| curPosition = nextPosition + 1; |
| } |
| return S_OK; |
| } |
| |
| HRESULT TableParameterHandler::ParseTableRow() { |
| TableParameter *table = m_table; |
| for (unsigned int i = 0; i < m_tableSize; ++i) { |
| switch (table[i].m_type) { |
| case TableParameter::INT8: |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, |
| table[i].m_int32)) && table[i].m_required) { |
| // TryGetValue does not suppport reading from int16 |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| table[i].m_int8 = (int8_t)(table[i].m_int32); |
| break; |
| case TableParameter::INT16: |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, |
| table[i].m_int32)) && table[i].m_required) { |
| // TryGetValue does not suppport reading from int16 |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| table[i].m_int16 = (short)(table[i].m_int32); |
| break; |
| case TableParameter::INT32: |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, |
| table[i].m_int32)) && table[i].m_required) { |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| break; |
| case TableParameter::UINT: |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, |
| table[i].m_uint)) && table[i].m_required) { |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| break; |
| case TableParameter::DOUBLE: |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue( |
| table[i].m_name, table[i].m_double)) && table[i].m_required) { |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| break; |
| case TableParameter::STRING: |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, |
| table[i].m_str)) && table[i].m_required) { |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| break; |
| case TableParameter::BOOL: |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, |
| table[i].m_str)) && table[i].m_bool) { |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| break; |
| case TableParameter::INT8_TABLE: { |
| WEX::TestExecution::TestDataArray<int> tempTable; |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue( |
| table[i].m_name, tempTable)) && table[i].m_required) { |
| |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| // TryGetValue does not suppport reading from int8 |
| table[i].m_int8Table.resize(tempTable.GetSize()); |
| for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { |
| table[i].m_int8Table[j] = (int8_t)tempTable[j]; |
| } |
| break; |
| } |
| case TableParameter::INT16_TABLE: { |
| WEX::TestExecution::TestDataArray<int> tempTable; |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue( |
| table[i].m_name, tempTable)) && table[i].m_required) { |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| // TryGetValue does not suppport reading from int8 |
| table[i].m_int16Table.resize(tempTable.GetSize()); |
| for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { |
| table[i].m_int16Table[j] = (int16_t)tempTable[j]; |
| } |
| break; |
| }case TableParameter::INT32_TABLE: { |
| WEX::TestExecution::TestDataArray<int> tempTable; |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue( |
| table[i].m_name, tempTable)) && table[i].m_required) { |
| // TryGetValue does not suppport reading from int8 |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| table[i].m_int32Table.resize(tempTable.GetSize()); |
| for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { |
| table[i].m_int32Table[j] = tempTable[j]; |
| } |
| break; |
| } |
| case TableParameter::UINT8_TABLE: { |
| WEX::TestExecution::TestDataArray<int> tempTable; |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue( |
| table[i].m_name, tempTable)) && table[i].m_required) { |
| |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| // TryGetValue does not suppport reading from int8 |
| table[i].m_int8Table.resize(tempTable.GetSize()); |
| for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { |
| table[i].m_int8Table[j] = (uint8_t)tempTable[j]; |
| } |
| break; |
| } |
| case TableParameter::UINT16_TABLE: { |
| WEX::TestExecution::TestDataArray<int> tempTable; |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue( |
| table[i].m_name, tempTable)) && table[i].m_required) { |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| // TryGetValue does not suppport reading from int8 |
| table[i].m_uint16Table.resize(tempTable.GetSize()); |
| for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { |
| table[i].m_uint16Table[j] = (uint16_t)tempTable[j]; |
| } |
| break; |
| } |
| case TableParameter::UINT32_TABLE: { |
| WEX::TestExecution::TestDataArray<unsigned int> tempTable; |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue( |
| table[i].m_name, tempTable)) && table[i].m_required) { |
| // TryGetValue does not suppport reading from int8 |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| table[i].m_uint32Table.resize(tempTable.GetSize()); |
| for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { |
| table[i].m_uint32Table[j] = tempTable[j]; |
| } |
| break; |
| } |
| case TableParameter::FLOAT_TABLE: { |
| WEX::TestExecution::TestDataArray<WEX::Common::String> tempTable; |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue( |
| table[i].m_name, tempTable)) && table[i].m_required) { |
| // TryGetValue does not suppport reading from int8 |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| table[i].m_floatTable.resize(tempTable.GetSize()); |
| for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { |
| ParseDataToFloat(tempTable[j], table[i].m_floatTable[j]); |
| } |
| break; |
| } |
| case TableParameter::HALF_TABLE: { |
| WEX::TestExecution::TestDataArray<WEX::Common::String> tempTable; |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue( |
| table[i].m_name, tempTable)) && table[i].m_required) { |
| // TryGetValue does not suppport reading from int8 |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| table[i].m_halfTable.resize(tempTable.GetSize()); |
| for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { |
| uint16_t value = 0; |
| if (IsHexString(tempTable[j], &value)) { |
| table[i].m_halfTable[j] = value; |
| } |
| else { |
| float val; |
| ParseDataToFloat(tempTable[j], val); |
| if (isdenorm(val)) |
| table[i].m_halfTable[j] = signbit(val) ? Float16NegDenorm : Float16PosDenorm; |
| else |
| table[i].m_halfTable[j] = ConvertFloat32ToFloat16(val); |
| } |
| } |
| break; |
| } |
| case TableParameter::DOUBLE_TABLE: { |
| WEX::TestExecution::TestDataArray<double> tempTable; |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue( |
| table[i].m_name, tempTable)) && table[i].m_required) { |
| // TryGetValue does not suppport reading from int8 |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| table[i].m_doubleTable.resize(tempTable.GetSize()); |
| for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { |
| table[i].m_doubleTable[j] = tempTable[j]; |
| } |
| break; |
| } |
| case TableParameter::BOOL_TABLE: { |
| WEX::TestExecution::TestDataArray<bool> tempTable; |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue( |
| table[i].m_name, tempTable)) && table[i].m_required) { |
| // TryGetValue does not suppport reading from int8 |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| table[i].m_boolTable.resize(tempTable.GetSize()); |
| for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { |
| table[i].m_boolTable[j] = tempTable[j]; |
| } |
| break; |
| } |
| case TableParameter::STRING_TABLE: { |
| WEX::TestExecution::TestDataArray<WEX::Common::String> tempTable; |
| if (FAILED(WEX::TestExecution::TestData::TryGetValue( |
| table[i].m_name, tempTable)) && table[i].m_required) { |
| // TryGetValue does not suppport reading from int8 |
| LogErrorFmt(L"Failed to get %s", table[i].m_name); |
| return E_FAIL; |
| } |
| table[i].m_StringTable.resize(tempTable.GetSize()); |
| for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { |
| table[i].m_StringTable[j] = tempTable[j]; |
| } |
| break; |
| } |
| default: |
| DXASSERT_NOMSG("Invalid Parameter Type"); |
| } |
| if (errno == ERANGE) { |
| LogErrorFmt(L"got out of range value for table %s", table[i].m_name); |
| return E_FAIL; |
| } |
| } |
| return S_OK; |
| } |
| |
| static void VerifyOutputWithExpectedValueInt(int output, int ref, int tolerance) { |
| VERIFY_IS_TRUE(output - ref <= tolerance && ref - output <= tolerance); |
| } |
| |
| static void VerifyOutputWithExpectedValueUInt(uint32_t output, uint32_t ref, uint32_t tolerance) { |
| VERIFY_IS_TRUE(output - ref <= tolerance && ref - output <= tolerance); |
| } |
| |
| static void VerifyOutputWithExpectedValueUInt4(XMUINT4 output, XMUINT4 ref) { |
| VERIFY_ARE_EQUAL(output.x, ref.x); |
| VERIFY_ARE_EQUAL(output.y, ref.y); |
| VERIFY_ARE_EQUAL(output.z, ref.z); |
| VERIFY_ARE_EQUAL(output.w, ref.w); |
| } |
| |
| static void VerifyOutputWithExpectedValueFloat( |
| float output, float ref, LPCWSTR type, double tolerance, |
| hlsl::DXIL::Float32DenormMode mode = hlsl::DXIL::Float32DenormMode::Any) { |
| if (_wcsicmp(type, L"Relative") == 0) { |
| VERIFY_IS_TRUE(CompareFloatRelativeEpsilon(output, ref, (int)tolerance, mode)); |
| } else if (_wcsicmp(type, L"Epsilon") == 0) { |
| VERIFY_IS_TRUE(CompareFloatEpsilon(output, ref, (float)tolerance, mode)); |
| } else if (_wcsicmp(type, L"ULP") == 0) { |
| VERIFY_IS_TRUE(CompareFloatULP(output, ref, (int)tolerance, mode)); |
| } else { |
| LogErrorFmt(L"Failed to read comparison type %S", type); |
| } |
| } |
| |
| static bool CompareOutputWithExpectedValueFloat( |
| float output, float ref, LPCWSTR type, double tolerance, |
| hlsl::DXIL::Float32DenormMode mode = hlsl::DXIL::Float32DenormMode::Any) { |
| if (_wcsicmp(type, L"Relative") == 0) { |
| return CompareFloatRelativeEpsilon(output, ref, (int)tolerance, mode); |
| } else if (_wcsicmp(type, L"Epsilon") == 0) { |
| return CompareFloatEpsilon(output, ref, (float)tolerance, mode); |
| } else if (_wcsicmp(type, L"ULP") == 0) { |
| return CompareFloatULP(output, ref, (int)tolerance, mode); |
| } else { |
| LogErrorFmt(L"Failed to read comparison type %S", type); |
| return false; |
| } |
| } |
| |
| static void VerifyOutputWithExpectedValueHalf( |
| uint16_t output, uint16_t ref, LPCWSTR type, double tolerance) { |
| if (_wcsicmp(type, L"Relative") == 0) { |
| VERIFY_IS_TRUE(CompareHalfRelativeEpsilon(output, ref, (int)tolerance)); |
| } |
| else if (_wcsicmp(type, L"Epsilon") == 0) { |
| VERIFY_IS_TRUE(CompareHalfEpsilon(output, ref, (float)tolerance)); |
| } |
| else if (_wcsicmp(type, L"ULP") == 0) { |
| VERIFY_IS_TRUE(CompareHalfULP(output, ref, (float)tolerance)); |
| } |
| else { |
| LogErrorFmt(L"Failed to read comparison type %S", type); |
| } |
| } |
| |
| TEST_F(ExecutionTest, UnaryFloatOpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) { |
| return; |
| } |
| // Read data from the table |
| int tableSize = sizeof(UnaryFPOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(UnaryFPOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| |
| unsigned int WarpVersion = handler.GetTableParamByName(L"Warp.Version")->m_uint; |
| if (GetTestParamUseWARP(true) && !IsValidWarpDllVersion(WarpVersion)) { |
| return; |
| } |
| |
| std::vector<float> *Validation_Input = |
| &(handler.GetTableParamByName(L"Validation.Input1")->m_floatTable); |
| std::vector<float> *Validation_Expected = |
| &(handler.GetTableParamByName(L"Validation.Expected1")->m_floatTable); |
| |
| LPCWSTR Validation_Type = handler.GetTableParamByName(L"Validation.Type")->m_str; |
| double Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_double; |
| |
| size_t count = Validation_Input->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "UnaryFPOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryFPOp")); |
| size_t size = sizeof(SUnaryFPOp) * count; |
| Data.resize(size); |
| SUnaryFPOp *pPrimitives = (SUnaryFPOp *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SUnaryFPOp *p = &pPrimitives[i]; |
| p->input = (*Validation_Input)[i % Validation_Input->size()]; |
| } |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SUnaryFPOp", &data); |
| |
| SUnaryFPOp *pPrimitives = (SUnaryFPOp*)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| for (unsigned i = 0; i < count; ++i) { |
| SUnaryFPOp *p = &pPrimitives[i]; |
| float val = (*Validation_Expected)[i % Validation_Expected->size()]; |
| LogCommentFmt( |
| L"element #%u, input = %6.8f, output = %6.8f, expected = %6.8f", i, |
| p->input, p->output, val); |
| VerifyOutputWithExpectedValueFloat(p->output, val, Validation_Type, Validation_Tolerance); |
| } |
| } |
| |
| TEST_F(ExecutionTest, BinaryFloatOpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) { |
| return; |
| } |
| // Read data from the table |
| int tableSize = sizeof(BinaryFPOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(BinaryFPOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| |
| std::vector<float> *Validation_Input1 = |
| &(handler.GetTableParamByName(L"Validation.Input1")->m_floatTable); |
| std::vector<float> *Validation_Input2 = |
| &(handler.GetTableParamByName(L"Validation.Input2")->m_floatTable); |
| |
| std::vector<float> *Validation_Expected1 = |
| &(handler.GetTableParamByName(L"Validation.Expected1")->m_floatTable); |
| |
| std::vector<float> *Validation_Expected2 = |
| &(handler.GetTableParamByName(L"Validation.Expected2")->m_floatTable); |
| |
| LPCWSTR Validation_Type = handler.GetTableParamByName(L"Validation.Type")->m_str; |
| double Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_double; |
| size_t count = Validation_Input1->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "BinaryFPOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp")); |
| size_t size = sizeof(SBinaryFPOp) * count; |
| Data.resize(size); |
| SBinaryFPOp *pPrimitives = (SBinaryFPOp *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SBinaryFPOp *p = &pPrimitives[i]; |
| p->input1 = (*Validation_Input1)[i % Validation_Input1->size()]; |
| p->input2 = (*Validation_Input2)[i % Validation_Input2->size()]; |
| } |
| |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SBinaryFPOp", &data); |
| |
| SBinaryFPOp *pPrimitives = (SBinaryFPOp *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| unsigned numExpected = Validation_Expected2->size() == 0 ? 1 : 2; |
| if (numExpected == 2) { |
| for (unsigned i = 0; i < count; ++i) { |
| SBinaryFPOp *p = &pPrimitives[i]; |
| float val1 = (*Validation_Expected1)[i % Validation_Expected1->size()]; |
| float val2 = (*Validation_Expected2)[i % Validation_Expected2->size()]; |
| LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, output1 = " |
| L"%6.8f, expected1 = %6.8f, output2 = %6.8f, expected2 = %6.8f", |
| i, p->input1, p->input2, p->output1, val1, p->output2, |
| val2); |
| VerifyOutputWithExpectedValueFloat(p->output1, val1, Validation_Type, |
| Validation_Tolerance); |
| VerifyOutputWithExpectedValueFloat(p->output2, val2, Validation_Type, |
| Validation_Tolerance); |
| } |
| } |
| else if (numExpected == 1) { |
| for (unsigned i = 0; i < count; ++i) { |
| SBinaryFPOp *p = &pPrimitives[i]; |
| float val1 = (*Validation_Expected1)[i % Validation_Expected1->size()]; |
| LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, output1 = " |
| L"%6.8f, expected1 = %6.8f", |
| i, p->input1, p->input2, p->output1, val1); |
| VerifyOutputWithExpectedValueFloat(p->output1, val1, Validation_Type, |
| Validation_Tolerance); |
| } |
| } |
| else { |
| LogErrorFmt(L"Unexpected number of expected values for operation %i", numExpected); |
| } |
| } |
| |
| TEST_F(ExecutionTest, TertiaryFloatOpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) { |
| return; |
| } |
| // Read data from the table |
| |
| int tableSize = sizeof(TertiaryFPOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(TertiaryFPOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| |
| std::vector<float> *Validation_Input1 = |
| &(handler.GetTableParamByName(L"Validation.Input1")->m_floatTable); |
| std::vector<float> *Validation_Input2 = |
| &(handler.GetTableParamByName(L"Validation.Input2")->m_floatTable); |
| std::vector<float> *Validation_Input3 = |
| &(handler.GetTableParamByName(L"Validation.Input3")->m_floatTable); |
| |
| std::vector<float> *Validation_Expected = |
| &(handler.GetTableParamByName(L"Validation.Expected1")->m_floatTable); |
| |
| LPCWSTR Validation_Type = handler.GetTableParamByName(L"Validation.Type")->m_str; |
| double Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_double; |
| size_t count = Validation_Input1->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "TertiaryFPOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp")); |
| size_t size = sizeof(STertiaryFPOp) * count; |
| Data.resize(size); |
| STertiaryFPOp *pPrimitives = (STertiaryFPOp *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| STertiaryFPOp *p = &pPrimitives[i]; |
| p->input1 = (*Validation_Input1)[i % Validation_Input1->size()]; |
| p->input2 = (*Validation_Input2)[i % Validation_Input2->size()]; |
| p->input3 = (*Validation_Input3)[i % Validation_Input3->size()]; |
| } |
| |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("STertiaryFPOp", &data); |
| |
| STertiaryFPOp *pPrimitives = (STertiaryFPOp *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| |
| for (unsigned i = 0; i < count; ++i) { |
| STertiaryFPOp *p = &pPrimitives[i]; |
| float val = (*Validation_Expected)[i % Validation_Expected->size()]; |
| LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, input3 = %6.8f, output1 = " |
| L"%6.8f, expected = %6.8f", |
| i, p->input1, p->input2, p->input3, p->output, val); |
| VerifyOutputWithExpectedValueFloat(p->output, val, Validation_Type, |
| Validation_Tolerance); |
| } |
| } |
| |
| TEST_F(ExecutionTest, UnaryHalfOpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { |
| return; |
| } |
| |
| if (!DoesDeviceSupportNative16bitOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support native 16-bit operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| // Read data from the table |
| int tableSize = sizeof(UnaryHalfOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(UnaryHalfOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| CW2A Arguments(handler.GetTableParamByName(L"ShaderOp.Arguments")->m_str); |
| |
| unsigned int WarpVersion = handler.GetTableParamByName(L"Warp.Version")->m_uint; |
| if (GetTestParamUseWARP(true) && !IsValidWarpDllVersion(WarpVersion)) { |
| return; |
| } |
| |
| std::vector<uint16_t> *Validation_Input = |
| &(handler.GetTableParamByName(L"Validation.Input1")->m_halfTable); |
| std::vector<uint16_t> *Validation_Expected = |
| &(handler.GetTableParamByName(L"Validation.Expected1")->m_halfTable); |
| |
| LPCWSTR Validation_Type = handler.GetTableParamByName(L"Validation.Type")->m_str; |
| double Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_double; |
| |
| size_t count = Validation_Input->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "UnaryFPOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryFPOp")); |
| size_t size = sizeof(SUnaryHalfOp) * count; |
| Data.resize(size); |
| SUnaryHalfOp *pPrimitives = (SUnaryHalfOp *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SUnaryHalfOp *p = &pPrimitives[i]; |
| p->input = (*Validation_Input)[i % Validation_Input->size()]; |
| } |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| pShaderOp->Shaders.at(0).Arguments = Arguments.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SUnaryFPOp", &data); |
| |
| SUnaryHalfOp *pPrimitives = (SUnaryHalfOp*)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| for (unsigned i = 0; i < count; ++i) { |
| SUnaryHalfOp *p = &pPrimitives[i]; |
| uint16_t expected = (*Validation_Expected)[i % Validation_Input->size()]; |
| LogCommentFmt(L"element #%u, input = %6.8f(0x%04x), output = " |
| L"%6.8f(0x%04x), expected = %6.8f(0x%04x)", |
| i, ConvertFloat16ToFloat32(p->input), p->input, |
| ConvertFloat16ToFloat32(p->output), p->output, |
| ConvertFloat16ToFloat32(expected), expected); |
| VerifyOutputWithExpectedValueHalf(p->output, expected, Validation_Type, Validation_Tolerance); |
| } |
| } |
| |
| TEST_F(ExecutionTest, BinaryHalfOpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { |
| return; |
| } |
| |
| if (!DoesDeviceSupportNative16bitOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support native 16-bit operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| // Read data from the table |
| int tableSize = sizeof(BinaryHalfOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(BinaryHalfOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| CW2A Arguments(handler.GetTableParamByName(L"ShaderOp.Arguments")->m_str); |
| |
| std::vector<uint16_t> *Validation_Input1 = |
| &(handler.GetTableParamByName(L"Validation.Input1")->m_halfTable); |
| std::vector<uint16_t> *Validation_Input2 = |
| &(handler.GetTableParamByName(L"Validation.Input2")->m_halfTable); |
| |
| std::vector<uint16_t> *Validation_Expected1 = |
| &(handler.GetTableParamByName(L"Validation.Expected1")->m_halfTable); |
| |
| std::vector<uint16_t> *Validation_Expected2 = |
| &(handler.GetTableParamByName(L"Validation.Expected2")->m_halfTable); |
| |
| LPCWSTR Validation_Type = handler.GetTableParamByName(L"Validation.Type")->m_str; |
| double Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_double; |
| size_t count = Validation_Input1->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "BinaryFPOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp")); |
| size_t size = sizeof(SBinaryHalfOp) * count; |
| Data.resize(size); |
| SBinaryHalfOp *pPrimitives = (SBinaryHalfOp *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SBinaryHalfOp *p = &pPrimitives[i]; |
| p->input1 = (*Validation_Input1)[i % Validation_Input1->size()]; |
| p->input2 = (*Validation_Input2)[i % Validation_Input2->size()]; |
| } |
| |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| pShaderOp->Shaders.at(0).Arguments = Arguments.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SBinaryFPOp", &data); |
| |
| SBinaryHalfOp *pPrimitives = (SBinaryHalfOp *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| unsigned numExpected = Validation_Expected2->size() == 0 ? 1 : 2; |
| if (numExpected == 2) { |
| for (unsigned i = 0; i < count; ++i) { |
| SBinaryHalfOp *p = &pPrimitives[i]; |
| uint16_t expected1 = (*Validation_Expected1)[i % Validation_Input1->size()]; |
| uint16_t expected2 = (*Validation_Expected2)[i % Validation_Input2->size()]; |
| LogCommentFmt(L"element #%u, input1 = %6.8f(0x%04x), input2 = %6.8f(0x%04x), output1 = " |
| L"%6.8f(0x%04x), expected1 = %6.8f(0x%04x), output2 = %6.8f(0x%04x), expected2 = %6.8f(0x%04x)", |
| i, ConvertFloat16ToFloat32(p->input1), p->input1, |
| ConvertFloat16ToFloat32(p->input2), p->input2, |
| ConvertFloat16ToFloat32(p->output1), p->output1, |
| ConvertFloat16ToFloat32(p->output2), p->output2, |
| ConvertFloat16ToFloat32(expected1), expected1, |
| ConvertFloat16ToFloat32(expected2), expected2); |
| VerifyOutputWithExpectedValueHalf(p->output1, expected1, Validation_Type, Validation_Tolerance); |
| VerifyOutputWithExpectedValueHalf(p->output2, expected2, Validation_Type, Validation_Tolerance); |
| } |
| } |
| else if (numExpected == 1) { |
| for (unsigned i = 0; i < count; ++i) { |
| uint16_t expected = (*Validation_Expected1)[i % Validation_Input1->size()]; |
| SBinaryHalfOp *p = &pPrimitives[i]; |
| LogCommentFmt(L"element #%u, input = %6.8f(0x%04x), output = " |
| L"%6.8f(0x%04x), expected = %6.8f(0x%04x)", |
| i, ConvertFloat16ToFloat32(p->input1), p->input1, |
| ConvertFloat16ToFloat32(p->output1), p->output1, |
| ConvertFloat16ToFloat32(expected), expected); |
| VerifyOutputWithExpectedValueHalf(p->output1, expected, Validation_Type, Validation_Tolerance); |
| } |
| } |
| else { |
| LogErrorFmt(L"Unexpected number of expected values for operation %i", numExpected); |
| } |
| } |
| |
| TEST_F(ExecutionTest, TertiaryHalfOpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { |
| return; |
| } |
| |
| if (!DoesDeviceSupportNative16bitOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support native 16-bit operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| // Read data from the table |
| int tableSize = sizeof(TertiaryHalfOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(TertiaryHalfOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| CW2A Arguments(handler.GetTableParamByName(L"ShaderOp.Arguments")->m_str); |
| |
| std::vector<uint16_t> *Validation_Input1 = |
| &(handler.GetTableParamByName(L"Validation.Input1")->m_halfTable); |
| std::vector<uint16_t> *Validation_Input2 = |
| &(handler.GetTableParamByName(L"Validation.Input2")->m_halfTable); |
| std::vector<uint16_t> *Validation_Input3 = |
| &(handler.GetTableParamByName(L"Validation.Input3")->m_halfTable); |
| |
| std::vector<uint16_t> *Validation_Expected = |
| &(handler.GetTableParamByName(L"Validation.Expected1")->m_halfTable); |
| |
| LPCWSTR Validation_Type = handler.GetTableParamByName(L"Validation.Type")->m_str; |
| double Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_double; |
| size_t count = Validation_Input1->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "TertiaryFPOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp")); |
| size_t size = sizeof(STertiaryHalfOp) * count; |
| Data.resize(size); |
| STertiaryHalfOp *pPrimitives = (STertiaryHalfOp *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| STertiaryHalfOp *p = &pPrimitives[i]; |
| p->input1 = (*Validation_Input1)[i % Validation_Input1->size()]; |
| p->input2 = (*Validation_Input2)[i % Validation_Input2->size()]; |
| p->input3 = (*Validation_Input3)[i % Validation_Input3->size()]; |
| } |
| |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| pShaderOp->Shaders.at(0).Arguments = Arguments.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("STertiaryFPOp", &data); |
| |
| STertiaryHalfOp *pPrimitives = (STertiaryHalfOp *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| |
| for (unsigned i = 0; i < count; ++i) { |
| STertiaryHalfOp *p = &pPrimitives[i]; |
| uint16_t expected = (*Validation_Expected)[i % Validation_Expected->size()]; |
| LogCommentFmt(L"element #%u, input1 = %6.8f(0x%04x), input2 = %6.8f(0x%04x), input3 = %6.8f(0x%04x), output = " |
| L"%6.8f(0x%04x), expected = %6.8f(0x%04x)", |
| i, ConvertFloat16ToFloat32(p->input1), p->input1, |
| ConvertFloat16ToFloat32(p->input2), p->input2, |
| ConvertFloat16ToFloat32(p->input3), p->input3, |
| ConvertFloat16ToFloat32(p->output), p->output, |
| ConvertFloat16ToFloat32(expected), expected); |
| VerifyOutputWithExpectedValueHalf(p->output, expected, Validation_Type, Validation_Tolerance); |
| } |
| } |
| |
| TEST_F(ExecutionTest, UnaryIntOpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) { |
| return; |
| } |
| // Read data from the table |
| |
| int tableSize = sizeof(UnaryIntOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(UnaryIntOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| |
| std::vector<int> *Validation_Input = |
| &handler.GetTableParamByName(L"Validation.Input1")->m_int32Table; |
| std::vector<int> *Validation_Expected = |
| &handler.GetTableParamByName(L"Validation.Expected1")->m_int32Table; |
| int Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; |
| size_t count = Validation_Input->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "UnaryIntOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryIntOp")); |
| size_t size = sizeof(SUnaryIntOp) * count; |
| Data.resize(size); |
| SUnaryIntOp *pPrimitives = (SUnaryIntOp *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SUnaryIntOp *p = &pPrimitives[i]; |
| int val = (*Validation_Input)[i % Validation_Input->size()]; |
| p->input = val; |
| } |
| // use shader data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SUnaryIntOp", &data); |
| |
| SUnaryIntOp *pPrimitives = (SUnaryIntOp *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| for (unsigned i = 0; i < count; ++i) { |
| SUnaryIntOp *p = &pPrimitives[i]; |
| int val = (*Validation_Expected)[i % Validation_Expected->size()]; |
| LogCommentFmt(L"element #%u, input = %11i(0x%08x), output = %11i(0x%08x), " |
| L"expected = %11i(0x%08x)", |
| i, p->input, p->input, p->output, p->output, val, val); |
| VerifyOutputWithExpectedValueInt(p->output, val, Validation_Tolerance); |
| } |
| } |
| |
| TEST_F(ExecutionTest, UnaryUintOpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) { |
| return; |
| } |
| // Read data from the table |
| |
| int tableSize = sizeof(UnaryUintOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(UnaryUintOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| |
| std::vector<unsigned int> *Validation_Input = |
| &handler.GetTableParamByName(L"Validation.Input1")->m_uint32Table; |
| std::vector<unsigned int> *Validation_Expected = |
| &handler.GetTableParamByName(L"Validation.Expected1")->m_uint32Table; |
| int Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; |
| size_t count = Validation_Input->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "UnaryUintOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryUintOp")); |
| size_t size = sizeof(SUnaryUintOp) * count; |
| Data.resize(size); |
| SUnaryUintOp *pPrimitives = (SUnaryUintOp *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SUnaryUintOp *p = &pPrimitives[i]; |
| unsigned int val = (*Validation_Input)[i % Validation_Input->size()]; |
| p->input = val; |
| } |
| // use shader data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SUnaryUintOp", &data); |
| |
| SUnaryUintOp *pPrimitives = (SUnaryUintOp *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| for (unsigned i = 0; i < count; ++i) { |
| SUnaryUintOp *p = &pPrimitives[i]; |
| unsigned int val = (*Validation_Expected)[i % Validation_Expected->size()]; |
| LogCommentFmt(L"element #%u, input = %11u(0x%08x), output = %11u(0x%08x), " |
| L"expected = %11u(0x%08x)", |
| i, p->input, p->input, p->output, p->output, val, val); |
| VerifyOutputWithExpectedValueInt(p->output, val, Validation_Tolerance); |
| } |
| } |
| |
| TEST_F(ExecutionTest, BinaryIntOpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) { |
| return; |
| } |
| // Read data from the table |
| size_t tableSize = sizeof(BinaryIntOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(BinaryIntOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| |
| |
| std::vector<int> *Validation_Input1 = |
| &handler.GetTableParamByName(L"Validation.Input1")->m_int32Table; |
| std::vector<int> *Validation_Input2 = |
| &handler.GetTableParamByName(L"Validation.Input2")->m_int32Table; |
| std::vector<int> *Validation_Expected1 = |
| &handler.GetTableParamByName(L"Validation.Expected1")->m_int32Table; |
| std::vector<int> *Validation_Expected2 = |
| &handler.GetTableParamByName(L"Validation.Expected2")->m_int32Table; |
| int Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; |
| size_t count = Validation_Input1->size(); |
| |
| size_t numExpected = Validation_Expected2->size() == 0 ? 1 : 2; |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "BinaryIntOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryIntOp")); |
| size_t size = sizeof(SBinaryIntOp) * count; |
| Data.resize(size); |
| SBinaryIntOp *pPrimitives = (SBinaryIntOp *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SBinaryIntOp *p = &pPrimitives[i]; |
| int val1 = (*Validation_Input1)[i % Validation_Input1->size()]; |
| int val2 = (*Validation_Input2)[i % Validation_Input2->size()]; |
| p->input1 = val1; |
| p->input2 = val2; |
| } |
| |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SBinaryIntOp", &data); |
| |
| SBinaryIntOp *pPrimitives = (SBinaryIntOp *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| |
| if (numExpected == 2) { |
| for (unsigned i = 0; i < count; ++i) { |
| SBinaryIntOp *p = &pPrimitives[i]; |
| int val1 = (*Validation_Expected1)[i % Validation_Expected1->size()]; |
| int val2 = (*Validation_Expected2)[i % Validation_Expected2->size()]; |
| LogCommentFmt(L"element #%u, input1 = %11i(0x%08x), input2 = " |
| L"%11i(0x%08x), output1 = " |
| L"%11i(0x%08x), expected1 = %11i(0x%08x), output2 = " |
| L"%11i(0x%08x), expected2 = %11i(0x%08x)", |
| i, p->input1, p->input1, p->input2, p->input2, p->output1, |
| p->output1, val1, val1, p->output2, p->output2, val2, |
| val2); |
| VerifyOutputWithExpectedValueInt(p->output1, val1, Validation_Tolerance); |
| VerifyOutputWithExpectedValueInt(p->output2, val2, Validation_Tolerance); |
| } |
| } |
| else if (numExpected == 1) { |
| for (unsigned i = 0; i < count; ++i) { |
| SBinaryIntOp *p = &pPrimitives[i]; |
| int val1 = (*Validation_Expected1)[i % Validation_Expected1->size()]; |
| LogCommentFmt(L"element #%u, input1 = %11i(0x%08x), input2 = " |
| L"%11i(0x%08x), output = " |
| L"%11i(0x%08x), expected = %11i(0x%08x)", i, |
| p->input1, p->input1, p->input2, p->input2, |
| p->output1, p->output1, val1, val1); |
| VerifyOutputWithExpectedValueInt(p->output1, val1, Validation_Tolerance); |
| } |
| } |
| else { |
| LogErrorFmt(L"Unexpected number of expected values for operation %i", numExpected); |
| } |
| } |
| |
| TEST_F(ExecutionTest, TertiaryIntOpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) { |
| return; |
| } |
| // Read data from the table |
| size_t tableSize = sizeof(TertiaryIntOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(TertiaryIntOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| |
| std::vector<int> *Validation_Input1 = |
| &handler.GetTableParamByName(L"Validation.Input1")->m_int32Table; |
| std::vector<int> *Validation_Input2 = |
| &handler.GetTableParamByName(L"Validation.Input2")->m_int32Table; |
| std::vector<int> *Validation_Input3 = |
| &handler.GetTableParamByName(L"Validation.Input3")->m_int32Table; |
| std::vector<int> *Validation_Expected = |
| &handler.GetTableParamByName(L"Validation.Expected1")->m_int32Table; |
| int Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; |
| size_t count = Validation_Input1->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "TertiaryIntOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryIntOp")); |
| size_t size = sizeof(STertiaryIntOp) * count; |
| Data.resize(size); |
| STertiaryIntOp *pPrimitives = (STertiaryIntOp *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| STertiaryIntOp *p = &pPrimitives[i]; |
| int val1 = (*Validation_Input1)[i % Validation_Input1->size()]; |
| int val2 = (*Validation_Input2)[i % Validation_Input2->size()]; |
| int val3 = (*Validation_Input3)[i % Validation_Input3->size()]; |
| p->input1 = val1; |
| p->input2 = val2; |
| p->input3 = val3; |
| } |
| |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("STertiaryIntOp", &data); |
| |
| STertiaryIntOp *pPrimitives = (STertiaryIntOp *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| for (unsigned i = 0; i < count; ++i) { |
| STertiaryIntOp *p = &pPrimitives[i]; |
| int val1 = (*Validation_Expected)[i % Validation_Expected->size()]; |
| LogCommentFmt(L"element #%u, input1 = %11i(0x%08x), input2 = " |
| L"%11i(0x%08x), input3= %11i(0x%08x), output = " |
| L"%11i(0x%08x), expected = %11i(0x%08x)", |
| i, p->input1, p->input1, p->input2, p->input2, |
| p->input3, p->input3, p->output, p->output, val1, |
| val1); |
| VerifyOutputWithExpectedValueInt(p->output, val1, Validation_Tolerance); |
| } |
| } |
| |
| TEST_F(ExecutionTest, BinaryUintOpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) { |
| return; |
| } |
| // Read data from the table |
| size_t tableSize = sizeof(BinaryUintOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(BinaryUintOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| |
| |
| std::vector<unsigned int> *Validation_Input1 = |
| &handler.GetTableParamByName(L"Validation.Input1")->m_uint32Table; |
| std::vector<unsigned int> *Validation_Input2 = |
| &handler.GetTableParamByName(L"Validation.Input2")->m_uint32Table; |
| std::vector<unsigned int> *Validation_Expected1 = |
| &handler.GetTableParamByName(L"Validation.Expected1")->m_uint32Table; |
| std::vector<unsigned int> *Validation_Expected2 = |
| &handler.GetTableParamByName(L"Validation.Expected2")->m_uint32Table; |
| int Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; |
| size_t count = Validation_Input1->size(); |
| int numExpected = Validation_Expected2->size() == 0 ? 1 : 2; |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "BinaryUintOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryUintOp")); |
| size_t size = sizeof(SBinaryUintOp) * count; |
| Data.resize(size); |
| SBinaryUintOp *pPrimitives = (SBinaryUintOp *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SBinaryUintOp *p = &pPrimitives[i]; |
| unsigned int val1 = (*Validation_Input1)[i % Validation_Input1->size()]; |
| unsigned int val2 = (*Validation_Input2)[i % Validation_Input2->size()]; |
| p->input1 = val1; |
| p->input2 = val2; |
| } |
| |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SBinaryUintOp", &data); |
| |
| SBinaryUintOp *pPrimitives = (SBinaryUintOp *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| if (numExpected == 2) { |
| for (unsigned i = 0; i < count; ++i) { |
| SBinaryUintOp *p = &pPrimitives[i]; |
| unsigned int val1 = (*Validation_Expected1)[i % Validation_Expected1->size()]; |
| unsigned int val2 = (*Validation_Expected2)[i % Validation_Expected2->size()]; |
| LogCommentFmt(L"element #%u, input1 = %11u(0x%08x), input2 = " |
| L"%11u(0x%08x), output1 = " |
| L"%11u(0x%08x), expected1 = %11u(0x%08x), output2 = " |
| L"%11u(0x%08x), expected2 = %11u(0x%08x)", |
| i, p->input1, p->input1, p->input2, p->input2, p->output1, |
| p->output1, val1, val1, p->output2, p->output2, val2, |
| val2); |
| VerifyOutputWithExpectedValueInt(p->output1, val1, Validation_Tolerance); |
| VerifyOutputWithExpectedValueInt(p->output2, val2, Validation_Tolerance); |
| } |
| } |
| else if (numExpected == 1) { |
| for (unsigned i = 0; i < count; ++i) { |
| SBinaryUintOp *p = &pPrimitives[i]; |
| unsigned int val1 = (*Validation_Expected1)[i % Validation_Expected1->size()]; |
| LogCommentFmt(L"element #%u, input1 = %11u(0x%08x), input2 = " |
| L"%11u(0x%08x), output = " |
| L"%11u(0x%08x), expected = %11u(0x%08x)", i, |
| p->input1, p->input1, p->input2, p->input2, |
| p->output1, p->output1, val1, val1); |
| VerifyOutputWithExpectedValueInt(p->output1, val1, Validation_Tolerance); |
| } |
| } |
| else { |
| LogErrorFmt(L"Unexpected number of expected values for operation %i", numExpected); |
| } |
| } |
| |
| TEST_F(ExecutionTest, TertiaryUintOpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) { |
| return; |
| } |
| // Read data from the table |
| size_t tableSize = sizeof(TertiaryUintOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(TertiaryUintOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| |
| std::vector<unsigned int> *Validation_Input1 = |
| &handler.GetTableParamByName(L"Validation.Input1")->m_uint32Table; |
| std::vector<unsigned int> *Validation_Input2 = |
| &handler.GetTableParamByName(L"Validation.Input2")->m_uint32Table; |
| std::vector<unsigned int> *Validation_Input3 = |
| &handler.GetTableParamByName(L"Validation.Input3")->m_uint32Table; |
| std::vector<unsigned int> *Validation_Expected = |
| &handler.GetTableParamByName(L"Validation.Expected1")->m_uint32Table; |
| int Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; |
| size_t count = Validation_Input1->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "TertiaryUintOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryUintOp")); |
| size_t size = sizeof(STertiaryUintOp) * count; |
| Data.resize(size); |
| STertiaryUintOp *pPrimitives = (STertiaryUintOp *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| STertiaryUintOp *p = &pPrimitives[i]; |
| unsigned int val1 = (*Validation_Input1)[i % Validation_Input1->size()]; |
| unsigned int val2 = (*Validation_Input2)[i % Validation_Input2->size()]; |
| unsigned int val3 = (*Validation_Input3)[i % Validation_Input3->size()]; |
| p->input1 = val1; |
| p->input2 = val2; |
| p->input3 = val3; |
| } |
| |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("STertiaryUintOp", &data); |
| |
| STertiaryUintOp *pPrimitives = (STertiaryUintOp *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| for (unsigned i = 0; i < count; ++i) { |
| STertiaryUintOp *p = &pPrimitives[i]; |
| unsigned int val1 = (*Validation_Expected)[i % Validation_Expected->size()]; |
| LogCommentFmt(L"element #%u, input1 = %11u(0x%08x), input2 = " |
| L"%11u(0x%08x), input3 = %11u(0x%08x), output = " |
| L"%11u(0x%08x), expected = %11u(0x%08x)", i, |
| p->input1, p->input1, p->input2, p->input2, p->input3, p->input3, |
| p->output, p->output, val1, val1); |
| VerifyOutputWithExpectedValueInt(p->output, val1, Validation_Tolerance); |
| } |
| } |
| |
| // 16 bit integer type tests |
| TEST_F(ExecutionTest, UnaryInt16OpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { |
| return; |
| } |
| |
| if (!DoesDeviceSupportNative16bitOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support native 16-bit operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| // Read data from the table |
| int tableSize = sizeof(UnaryInt16OpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(UnaryInt16OpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| CW2A Arguments(handler.GetTableParamByName(L"ShaderOp.Arguments")->m_str); |
| |
| std::vector<short> *Validation_Input = |
| &handler.GetTableParamByName(L"Validation.Input1")->m_int16Table; |
| std::vector<short> *Validation_Expected = |
| &handler.GetTableParamByName(L"Validation.Expected1")->m_int16Table; |
| int Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; |
| size_t count = Validation_Input->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "UnaryIntOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryIntOp")); |
| size_t size = sizeof(SUnaryInt16Op) * count; |
| Data.resize(size); |
| SUnaryInt16Op *pPrimitives = (SUnaryInt16Op *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SUnaryInt16Op *p = &pPrimitives[i]; |
| p->input = (*Validation_Input)[i % Validation_Input->size()]; |
| } |
| // use shader data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| pShaderOp->Shaders.at(0).Arguments = Arguments.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SUnaryIntOp", &data); |
| |
| SUnaryInt16Op *pPrimitives = (SUnaryInt16Op *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| for (unsigned i = 0; i < count; ++i) { |
| SUnaryInt16Op *p = &pPrimitives[i]; |
| short val = (*Validation_Expected)[i % Validation_Expected->size()]; |
| LogCommentFmt(L"element #%u, input = %5hi(0x%08x), output = %5hi(0x%08x), " |
| L"expected = %5hi(0x%08x)", |
| i, p->input, p->input, p->output, p->output, val, val); |
| VerifyOutputWithExpectedValueInt(p->output, val, Validation_Tolerance); |
| } |
| } |
| |
| TEST_F(ExecutionTest, UnaryUint16OpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { |
| return; |
| } |
| |
| if (!DoesDeviceSupportNative16bitOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support native 16-bit operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| // Read data from the table |
| int tableSize = sizeof(UnaryUint16OpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(UnaryUint16OpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| CW2A Arguments(handler.GetTableParamByName(L"ShaderOp.Arguments")->m_str); |
| |
| std::vector<unsigned short> *Validation_Input = |
| &handler.GetTableParamByName(L"Validation.Input1")->m_uint16Table; |
| std::vector<unsigned short> *Validation_Expected = |
| &handler.GetTableParamByName(L"Validation.Expected1")->m_uint16Table; |
| int Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; |
| size_t count = Validation_Input->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "UnaryUintOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SUnaryUintOp")); |
| size_t size = sizeof(SUnaryUint16Op) * count; |
| Data.resize(size); |
| SUnaryUint16Op *pPrimitives = (SUnaryUint16Op *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SUnaryUint16Op *p = &pPrimitives[i]; |
| p->input = (*Validation_Input)[i % Validation_Input->size()]; |
| } |
| // use shader data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| pShaderOp->Shaders.at(0).Arguments = Arguments.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SUnaryUintOp", &data); |
| |
| SUnaryUint16Op *pPrimitives = (SUnaryUint16Op *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| for (unsigned i = 0; i < count; ++i) { |
| SUnaryUint16Op *p = &pPrimitives[i]; |
| unsigned short val = (*Validation_Expected)[i % Validation_Expected->size()]; |
| LogCommentFmt(L"element #%u, input = %5hu(0x%08x), output = %5hu(0x%08x), " |
| L"expected = %5hu(0x%08x)", |
| i, p->input, p->input, p->output, p->output, val, val); |
| VerifyOutputWithExpectedValueInt(p->output, val, Validation_Tolerance); |
| } |
| } |
| |
| TEST_F(ExecutionTest, BinaryInt16OpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { |
| return; |
| } |
| |
| if (!DoesDeviceSupportNative16bitOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support native 16-bit operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| // Read data from the table |
| size_t tableSize = sizeof(BinaryInt16OpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(BinaryInt16OpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| CW2A Arguments(handler.GetTableParamByName(L"ShaderOp.Arguments")->m_str); |
| |
| std::vector<short> *Validation_Input1 = |
| &handler.GetTableParamByName(L"Validation.Input1")->m_int16Table; |
| std::vector<short> *Validation_Input2 = |
| &handler.GetTableParamByName(L"Validation.Input2")->m_int16Table; |
| std::vector<short> *Validation_Expected1 = |
| &handler.GetTableParamByName(L"Validation.Expected1")->m_int16Table; |
| std::vector<short> *Validation_Expected2 = |
| &handler.GetTableParamByName(L"Validation.Expected2")->m_int16Table; |
| int Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; |
| size_t count = Validation_Input1->size(); |
| |
| size_t numExpected = Validation_Expected2->size() == 0 ? 1 : 2; |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "BinaryIntOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryIntOp")); |
| size_t size = sizeof(SBinaryInt16Op) * count; |
| Data.resize(size); |
| SBinaryInt16Op *pPrimitives = (SBinaryInt16Op *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SBinaryInt16Op *p = &pPrimitives[i]; |
| p->input1 = (*Validation_Input1)[i % Validation_Input1->size()]; |
| p->input2 = (*Validation_Input2)[i % Validation_Input2->size()]; |
| } |
| |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| pShaderOp->Shaders.at(0).Arguments = Arguments.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SBinaryIntOp", &data); |
| |
| SBinaryInt16Op *pPrimitives = (SBinaryInt16Op *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| |
| if (numExpected == 2) { |
| for (unsigned i = 0; i < count; ++i) { |
| SBinaryInt16Op *p = &pPrimitives[i]; |
| short val1 = (*Validation_Expected1)[i % Validation_Expected1->size()]; |
| short val2 = (*Validation_Expected2)[i % Validation_Expected2->size()]; |
| LogCommentFmt(L"element #%u, input1 = %5hi(0x%08x), input2 = " |
| L"%5hi(0x%08x), output1 = " |
| L"%5hi(0x%08x), expected1 = %5hi(0x%08x), output2 = " |
| L"%5hi(0x%08x), expected2 = %5hi(0x%08x)", |
| i, p->input1, p->input1, p->input2, p->input2, p->output1, |
| p->output1, val1, val1, p->output2, p->output2, val2, |
| val2); |
| VerifyOutputWithExpectedValueInt(p->output1, val1, Validation_Tolerance); |
| VerifyOutputWithExpectedValueInt(p->output2, val2, Validation_Tolerance); |
| } |
| } |
| else if (numExpected == 1) { |
| for (unsigned i = 0; i < count; ++i) { |
| SBinaryInt16Op *p = &pPrimitives[i]; |
| short val1 = (*Validation_Expected1)[i % Validation_Expected1->size()]; |
| LogCommentFmt(L"element #%u, input1 = %5hi(0x%08x), input2 = " |
| L"%5hi(0x%08x), output = " |
| L"%5hi(0x%08x), expected = %5hi(0x%08x)", i, |
| p->input1, p->input1, p->input2, p->input2, |
| p->output1, p->output1, val1, val1); |
| VerifyOutputWithExpectedValueInt(p->output1, val1, Validation_Tolerance); |
| } |
| } |
| else { |
| LogErrorFmt(L"Unexpected number of expected values for operation %i", numExpected); |
| } |
| } |
| |
| TEST_F(ExecutionTest, TertiaryInt16OpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { |
| return; |
| } |
| |
| if (!DoesDeviceSupportNative16bitOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support native 16-bit operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| // Read data from the table |
| size_t tableSize = sizeof(TertiaryInt16OpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(TertiaryInt16OpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| CW2A Arguments(handler.GetTableParamByName(L"ShaderOp.Arguments")->m_str); |
| |
| std::vector<short> *Validation_Input1 = |
| &handler.GetTableParamByName(L"Validation.Input1")->m_int16Table; |
| std::vector<short> *Validation_Input2 = |
| &handler.GetTableParamByName(L"Validation.Input2")->m_int16Table; |
| std::vector<short> *Validation_Input3 = |
| &handler.GetTableParamByName(L"Validation.Input3")->m_int16Table; |
| std::vector<short> *Validation_Expected = |
| &handler.GetTableParamByName(L"Validation.Expected1")->m_int16Table; |
| int Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; |
| size_t count = Validation_Input1->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "TertiaryIntOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryIntOp")); |
| size_t size = sizeof(STertiaryInt16Op) * count; |
| Data.resize(size); |
| STertiaryInt16Op *pPrimitives = (STertiaryInt16Op *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| STertiaryInt16Op *p = &pPrimitives[i]; |
| p->input1 = (*Validation_Input1)[i % Validation_Input1->size()]; |
| p->input2 = (*Validation_Input2)[i % Validation_Input2->size()]; |
| p->input3 = (*Validation_Input3)[i % Validation_Input3->size()]; |
| } |
| |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| pShaderOp->Shaders.at(0).Arguments = Arguments.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("STertiaryIntOp", &data); |
| |
| STertiaryInt16Op *pPrimitives = (STertiaryInt16Op *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| for (unsigned i = 0; i < count; ++i) { |
| STertiaryInt16Op *p = &pPrimitives[i]; |
| short val1 = (*Validation_Expected)[i % Validation_Expected->size()]; |
| LogCommentFmt(L"element #%u, input1 = %11i(0x%08x), input2 = " |
| L"%11i(0x%08x), input3= %11i(0x%08x), output = " |
| L"%11i(0x%08x), expected = %11i(0x%08x)", |
| i, p->input1, p->input1, p->input2, p->input2, |
| p->input3, p->input3, p->output, p->output, val1, |
| val1); |
| VerifyOutputWithExpectedValueInt(p->output, val1, Validation_Tolerance); |
| } |
| } |
| |
| TEST_F(ExecutionTest, BinaryUint16OpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { |
| return; |
| } |
| |
| if (!DoesDeviceSupportNative16bitOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support native 16-bit operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| // Read data from the table |
| size_t tableSize = sizeof(BinaryUint16OpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(BinaryUint16OpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| CW2A Arguments(handler.GetTableParamByName(L"ShaderOp.Arguments")->m_str); |
| |
| std::vector<unsigned short> *Validation_Input1 = |
| &handler.GetTableParamByName(L"Validation.Input1")->m_uint16Table; |
| std::vector<unsigned short> *Validation_Input2 = |
| &handler.GetTableParamByName(L"Validation.Input2")->m_uint16Table; |
| std::vector<unsigned short> *Validation_Expected1 = |
| &handler.GetTableParamByName(L"Validation.Expected1")->m_uint16Table; |
| std::vector<unsigned short> *Validation_Expected2 = |
| &handler.GetTableParamByName(L"Validation.Expected2")->m_uint16Table; |
| int Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; |
| size_t count = Validation_Input1->size(); |
| int numExpected = Validation_Expected2->size() == 0 ? 1 : 2; |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "BinaryUintOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryUintOp")); |
| size_t size = sizeof(SBinaryUint16Op) * count; |
| Data.resize(size); |
| SBinaryUint16Op *pPrimitives = (SBinaryUint16Op *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SBinaryUint16Op *p = &pPrimitives[i]; |
| p->input1 = (*Validation_Input1)[i % Validation_Input1->size()]; |
| p->input2 = (*Validation_Input2)[i % Validation_Input2->size()]; |
| } |
| |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| pShaderOp->Shaders.at(0).Arguments = Arguments.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SBinaryUintOp", &data); |
| |
| SBinaryUint16Op *pPrimitives = (SBinaryUint16Op *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| if (numExpected == 2) { |
| for (unsigned i = 0; i < count; ++i) { |
| SBinaryUint16Op *p = &pPrimitives[i]; |
| unsigned short val1 = (*Validation_Expected1)[i % Validation_Expected1->size()]; |
| unsigned short val2 = (*Validation_Expected2)[i % Validation_Expected2->size()]; |
| LogCommentFmt(L"element #%u, input1 = %5hu(0x%08x), input2 = " |
| L"%5hu(0x%08x), output1 = " |
| L"%5hu(0x%08x), expected1 = %5hu(0x%08x), output2 = " |
| L"%5hu(0x%08x), expected2 = %5hu(0x%08x)", |
| i, p->input1, p->input1, p->input2, p->input2, p->output1, |
| p->output1, val1, val1, p->output2, p->output2, val2, |
| val2); |
| VerifyOutputWithExpectedValueInt(p->output1, val1, Validation_Tolerance); |
| VerifyOutputWithExpectedValueInt(p->output2, val2, Validation_Tolerance); |
| } |
| } |
| else if (numExpected == 1) { |
| for (unsigned i = 0; i < count; ++i) { |
| SBinaryUint16Op *p = &pPrimitives[i]; |
| unsigned short val1 = (*Validation_Expected1)[i % Validation_Expected1->size()]; |
| LogCommentFmt(L"element #%u, input1 = %5hu(0x%08x), input2 = " |
| L"%5hu(0x%08x), output = " |
| L"%5hu(0x%08x), expected = %5hu(0x%08x)", i, |
| p->input1, p->input1, p->input2, p->input2, |
| p->output1, p->output1, val1, val1); |
| VerifyOutputWithExpectedValueInt(p->output1, val1, Validation_Tolerance); |
| } |
| } |
| else { |
| LogErrorFmt(L"Unexpected number of expected values for operation %i", numExpected); |
| } |
| } |
| |
| TEST_F(ExecutionTest, TertiaryUint16OpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { |
| return; |
| } |
| |
| if (!DoesDeviceSupportNative16bitOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support native 16-bit operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| // Read data from the table |
| size_t tableSize = sizeof(TertiaryUint16OpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(TertiaryUint16OpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| CW2A Arguments(handler.GetTableParamByName(L"ShaderOp.Arguments")->m_str); |
| |
| std::vector<unsigned short> *Validation_Input1 = |
| &handler.GetTableParamByName(L"Validation.Input1")->m_uint16Table; |
| std::vector<unsigned short> *Validation_Input2 = |
| &handler.GetTableParamByName(L"Validation.Input2")->m_uint16Table; |
| std::vector<unsigned short> *Validation_Input3 = |
| &handler.GetTableParamByName(L"Validation.Input3")->m_uint16Table; |
| std::vector<unsigned short> *Validation_Expected = |
| &handler.GetTableParamByName(L"Validation.Expected1")->m_uint16Table; |
| int Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; |
| size_t count = Validation_Input1->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "TertiaryUintOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryUintOp")); |
| size_t size = sizeof(STertiaryUint16Op) * count; |
| Data.resize(size); |
| STertiaryUint16Op *pPrimitives = (STertiaryUint16Op *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| STertiaryUint16Op *p = &pPrimitives[i]; |
| p->input1 = (*Validation_Input1)[i % Validation_Input1->size()]; |
| p->input2 = (*Validation_Input2)[i % Validation_Input2->size()]; |
| p->input3 = (*Validation_Input3)[i % Validation_Input3->size()]; |
| } |
| |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| pShaderOp->Shaders.at(0).Arguments = Arguments.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("STertiaryUintOp", &data); |
| |
| STertiaryUint16Op *pPrimitives = (STertiaryUint16Op *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| for (unsigned i = 0; i < count; ++i) { |
| STertiaryUint16Op *p = &pPrimitives[i]; |
| unsigned short val1 = (*Validation_Expected)[i % Validation_Expected->size()]; |
| LogCommentFmt(L"element #%u, input1 = %5hu(0x%08x), input2 = " |
| L"%5hu(0x%08x), input3 = %5hu(0x%08x), output = " |
| L"%5hu(0x%08x), expected = %5hu(0x%08x)", i, |
| p->input1, p->input1, p->input2, p->input2, p->input3, p->input3, |
| p->output, p->output, val1, val1); |
| VerifyOutputWithExpectedValueInt(p->output, val1, Validation_Tolerance); |
| } |
| } |
| |
| TEST_F(ExecutionTest, DotTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) { |
| return; |
| } |
| |
| int tableSize = sizeof(DotOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(DotOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| |
| std::vector<WEX::Common::String> *Validation_Input1 = |
| &handler.GetTableParamByName(L"Validation.Input1")->m_StringTable; |
| std::vector<WEX::Common::String> *Validation_Input2 = |
| &handler.GetTableParamByName(L"Validation.Input2")->m_StringTable; |
| std::vector<WEX::Common::String> *Validation_dot2 = |
| &handler.GetTableParamByName(L"Validation.Expected1")->m_StringTable; |
| std::vector<WEX::Common::String> *Validation_dot3 = |
| &handler.GetTableParamByName(L"Validation.Expected2")->m_StringTable; |
| std::vector<WEX::Common::String> *Validation_dot4 = |
| &handler.GetTableParamByName(L"Validation.Expected3")->m_StringTable; |
| |
| PCWSTR Validation_type = handler.GetTableParamByName(L"Validation.Type")->m_str; |
| double tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_double; |
| size_t count = Validation_Input1->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "DotOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SDotOp")); |
| size_t size = sizeof(SDotOp) * count; |
| Data.resize(size); |
| SDotOp *pPrimitives = (SDotOp*)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SDotOp *p = &pPrimitives[i]; |
| XMFLOAT4 val1,val2; |
| VERIFY_SUCCEEDED(ParseDataToVectorFloat((*Validation_Input1)[i], |
| (float *)&val1, 4)); |
| VERIFY_SUCCEEDED(ParseDataToVectorFloat((*Validation_Input2)[i], |
| (float *)&val2, 4)); |
| p->input1 = val1; |
| p->input2 = val2; |
| } |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SDotOp", &data); |
| |
| SDotOp *pPrimitives = (SDotOp*)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| for (size_t i = 0; i < count; ++i) { |
| SDotOp *p = &pPrimitives[i]; |
| float dot2, dot3, dot4; |
| VERIFY_SUCCEEDED(ParseDataToFloat((*Validation_dot2)[i], dot2)); |
| VERIFY_SUCCEEDED(ParseDataToFloat((*Validation_dot3)[i], dot3)); |
| VERIFY_SUCCEEDED(ParseDataToFloat((*Validation_dot4)[i], dot4)); |
| LogCommentFmt( |
| L"element #%u, input1 = (%f, %f, %f, %f), input2 = (%f, %f, " |
| L"%f, %f), \n dot2 = %f, dot2_expected = %f, dot3 = %f, " |
| L"dot3_expected = %f, dot4 = %f, dot4_expected = %f", |
| i, p->input1.x, p->input1.y, p->input1.z, p->input1.w, p->input2.x, |
| p->input2.y, p->input2.z, p->input2.w, p->o_dot2, dot2, p->o_dot3, dot3, |
| p->o_dot4, dot4); |
| VerifyOutputWithExpectedValueFloat(p->o_dot2, dot2, Validation_type, |
| tolerance); |
| VerifyOutputWithExpectedValueFloat(p->o_dot3, dot3, Validation_type, |
| tolerance); |
| VerifyOutputWithExpectedValueFloat(p->o_dot4, dot4, Validation_type, |
| tolerance); |
| } |
| } |
| |
| TEST_F(ExecutionTest, Dot2AddHalfTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_4, false)) { |
| return; |
| } |
| |
| if (!DoesDeviceSupportNative16bitOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support native 16-bit operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| int tableSize = sizeof(Dot2AddHalfOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(Dot2AddHalfOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| CW2A Arguments(handler.GetTableParamByName(L"ShaderOp.Arguments")->m_str); |
| |
| std::vector<WEX::Common::String> *validation_input1 = |
| &handler.GetTableParamByName(L"Validation.Input1")->m_StringTable; |
| std::vector<WEX::Common::String> *validation_input2 = |
| &handler.GetTableParamByName(L"Validation.Input2")->m_StringTable; |
| std::vector<float> *validation_acc = &handler.GetTableParamByName(L"Validation.Input3")->m_floatTable; |
| std::vector<float> *validation_result = &handler.GetTableParamByName(L"Validation.Expected1")->m_floatTable; |
| |
| PCWSTR Validation_type = handler.GetTableParamByName(L"Validation.Type")->m_str; |
| double tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_double; |
| size_t count = validation_input1->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "Dot2AddHalfOp", |
| // this callback is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SDot2AddHalfOp")); |
| size_t size = sizeof(SDot2AddHalfOp) * count; |
| Data.resize(size); |
| SDot2AddHalfOp *pPrimitives = (SDot2AddHalfOp*)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SDot2AddHalfOp *p = &pPrimitives[i]; |
| Half2 val1,val2; |
| VERIFY_SUCCEEDED(ParseDataToVectorHalf((*validation_input1)[i], |
| (uint16_t *)&val1, 2)); |
| VERIFY_SUCCEEDED(ParseDataToVectorHalf((*validation_input2)[i], |
| (uint16_t *)&val2, 2)); |
| p->input1 = val1; |
| p->input2 = val2; |
| p->acc = (*validation_acc)[i]; |
| } |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| pShaderOp->Shaders.at(0).Arguments = Arguments.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SDot2AddHalfOp", &data); |
| |
| SDot2AddHalfOp *pPrimitives = (SDot2AddHalfOp*)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| for (size_t i = 0; i < count; ++i) { |
| SDot2AddHalfOp *p = &pPrimitives[i]; |
| float expectedResult = (*validation_result)[i]; |
| float input1x = ConvertFloat16ToFloat32(p->input1.x); |
| float input1y = ConvertFloat16ToFloat32(p->input1.y); |
| float input2x = ConvertFloat16ToFloat32(p->input2.x); |
| float input2y = ConvertFloat16ToFloat32(p->input2.y); |
| LogCommentFmt( |
| L"element #%u, input1 = (%f, %f), input2 = (%f, %f), acc = %f\n" |
| L"result = %f, result_expected = %f", |
| i, input1x, input1y, input2x, input2y, p->acc, p->result, expectedResult); |
| VerifyOutputWithExpectedValueFloat(p->result, expectedResult, Validation_type, tolerance); |
| } |
| } |
| |
| TEST_F(ExecutionTest, Dot4AddI8PackedTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_4, false)) { |
| return; |
| } |
| |
| int tableSize = sizeof(Dot4AddI8PackedOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(Dot4AddI8PackedOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| |
| std::vector<uint32_t> *validation_input1 = &handler.GetTableParamByName(L"Validation.Input1")->m_uint32Table; |
| std::vector<uint32_t> *validation_input2 = &handler.GetTableParamByName(L"Validation.Input2")->m_uint32Table; |
| std::vector<int32_t> *validation_acc = &handler.GetTableParamByName(L"Validation.Input3")->m_int32Table; |
| std::vector<int32_t> *validation_result = &handler.GetTableParamByName(L"Validation.Expected1")->m_int32Table; |
| |
| size_t count = validation_input1->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "Dot4AddI8PackedOp", |
| // this callback is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SDot4AddI8PackedOp")); |
| size_t size = sizeof(SDot4AddI8PackedOp) * count; |
| Data.resize(size); |
| SDot4AddI8PackedOp *pPrimitives = (SDot4AddI8PackedOp*)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SDot4AddI8PackedOp *p = &pPrimitives[i]; |
| p->input1 = (*validation_input1)[i]; |
| p->input2 = (*validation_input2)[i]; |
| p->acc = (*validation_acc)[i]; |
| } |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SDot4AddI8PackedOp", &data); |
| |
| SDot4AddI8PackedOp *pPrimitives = (SDot4AddI8PackedOp*)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| for (size_t i = 0; i < count; ++i) { |
| SDot4AddI8PackedOp *p = &pPrimitives[i]; |
| int32_t expectedResult = (*validation_result)[i]; |
| LogCommentFmt( |
| L"element #%u, input1 = %u, input2 = %u, acc = %d \n" |
| L"result = %d, result_expected = %d", |
| i, p->input1, p->input2, p->acc, p->result, expectedResult); |
| VerifyOutputWithExpectedValueInt(p->result, expectedResult, 0); |
| } |
| } |
| |
| TEST_F(ExecutionTest, Dot4AddU8PackedTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_4, false)) { |
| return; |
| } |
| |
| int tableSize = sizeof(Dot4AddU8PackedOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(Dot4AddU8PackedOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| |
| std::vector<uint32_t> *validation_input1 = &handler.GetTableParamByName(L"Validation.Input1")->m_uint32Table; |
| std::vector<uint32_t> *validation_input2 = &handler.GetTableParamByName(L"Validation.Input2")->m_uint32Table; |
| std::vector<uint32_t> *validation_acc = &handler.GetTableParamByName(L"Validation.Input3")->m_uint32Table; |
| std::vector<uint32_t> *validation_result = &handler.GetTableParamByName(L"Validation.Expected1")->m_uint32Table; |
| |
| size_t count = validation_input1->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "Dot4AddU8PackedOp", |
| // this callback is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SDot4AddU8PackedOp")); |
| size_t size = sizeof(SDot4AddU8PackedOp) * count; |
| Data.resize(size); |
| SDot4AddU8PackedOp *pPrimitives = (SDot4AddU8PackedOp*)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SDot4AddU8PackedOp *p = &pPrimitives[i]; |
| p->input1 = (*validation_input1)[i]; |
| p->input2 = (*validation_input2)[i]; |
| p->acc = (*validation_acc)[i]; |
| } |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SDot4AddU8PackedOp", &data); |
| |
| SDot4AddU8PackedOp *pPrimitives = (SDot4AddU8PackedOp*)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| for (size_t i = 0; i < count; ++i) { |
| SDot4AddU8PackedOp *p = &pPrimitives[i]; |
| uint32_t expectedResult = (*validation_result)[i]; |
| LogCommentFmt( |
| L"element #%u, input1 = %u, input2 = %u, acc = %u \n" |
| L"result = %u, result_expected = %u, ", |
| i, p->input1, p->input2, p->acc, p->result, expectedResult); |
| VerifyOutputWithExpectedValueUInt(p->result, expectedResult, 0); |
| } |
| } |
| |
| TEST_F(ExecutionTest, Msad4Test) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) { |
| return; |
| } |
| size_t tableSize = sizeof(Msad4OpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(Msad4OpParameters, tableSize); |
| |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| double tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_double; |
| |
| std::vector<unsigned int> *Validation_Reference = |
| &handler.GetTableParamByName(L"Validation.Input1")->m_uint32Table; |
| std::vector<WEX::Common::String> *Validation_Source = |
| &handler.GetTableParamByName(L"Validation.Input2")->m_StringTable; |
| std::vector<WEX::Common::String> *Validation_Accum = |
| &handler.GetTableParamByName(L"Validation.Input3")->m_StringTable; |
| std::vector<WEX::Common::String> *Validation_Expected = |
| &handler.GetTableParamByName(L"Validation.Expected1")->m_StringTable; |
| |
| size_t count = Validation_Expected->size(); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "Msad4", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SMsad4")); |
| size_t size = sizeof(SMsad4) * count; |
| Data.resize(size); |
| SMsad4 *pPrimitives = (SMsad4*)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SMsad4 *p = &pPrimitives[i]; |
| XMUINT2 src; |
| XMUINT4 accum; |
| VERIFY_SUCCEEDED(ParseDataToVectorUint((*Validation_Source)[i], (unsigned int*)&src, 2)); |
| VERIFY_SUCCEEDED(ParseDataToVectorUint((*Validation_Accum)[i], (unsigned int*)&accum, 4)); |
| p->ref = (*Validation_Reference)[i]; |
| p->src = src; |
| p->accum = accum; |
| } |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SMsad4", &data); |
| |
| SMsad4 *pPrimitives = (SMsad4*)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| for (size_t i = 0; i < count; ++i) { |
| SMsad4 *p = &pPrimitives[i]; |
| XMUINT4 result; |
| VERIFY_SUCCEEDED(ParseDataToVectorUint((*Validation_Expected)[i], |
| (unsigned int *)&result, 4)); |
| LogCommentFmt( |
| L"element #%u, ref = %u(0x%08x), src = %u(0x%08x), %u(0x%08x), " |
| L"accum = %u(0x%08x), %u(0x%08x), %u(0x%08x), %u(0x%08x),\n" |
| L"result = %u(0x%08x), %u(0x%08x), %u(0x%08x), %u(0x%08x),\n" |
| L"expected = %u(0x%08x), %u(0x%08x), %u(0x%08x), %u(0x%08x)", i, |
| p->ref, p->ref, p->src.x, p->src.x, p->src.y, p->src.y, p->accum.x, |
| p->accum.x, p->accum.y, p->accum.y, p->accum.z, p->accum.z, |
| p->accum.w, p->accum.w, p->result.x, p->result.x, p->result.y, |
| p->result.y, p->result.z, p->result.z, p->result.w, p->result.w, |
| result.x, result.x, result.y, result.y, result.z, result.z, |
| result.w, result.w); |
| |
| int toleranceInt = (int)tolerance; |
| VerifyOutputWithExpectedValueInt(p->result.x, result.x, toleranceInt); |
| VerifyOutputWithExpectedValueInt(p->result.y, result.y, toleranceInt); |
| VerifyOutputWithExpectedValueInt(p->result.z, result.z, toleranceInt); |
| VerifyOutputWithExpectedValueInt(p->result.w, result.w, toleranceInt); |
| } |
| } |
| |
| TEST_F(ExecutionTest, DenormBinaryFloatOpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { |
| return; |
| } |
| |
| // Read data from the table |
| int tableSize = sizeof(DenormBinaryFPOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(DenormBinaryFPOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| CW2A Arguments(handler.GetTableParamByName(L"ShaderOp.Arguments")->m_str); |
| |
| std::vector<WEX::Common::String> *Validation_Input1 = |
| &(handler.GetTableParamByName(L"Validation.Input1")->m_StringTable); |
| std::vector<WEX::Common::String> *Validation_Input2 = |
| &(handler.GetTableParamByName(L"Validation.Input2")->m_StringTable); |
| |
| std::vector<WEX::Common::String> *Validation_Expected1 = |
| &(handler.GetTableParamByName(L"Validation.Expected1")->m_StringTable); |
| // two expected outputs for any mode |
| std::vector<WEX::Common::String> *Validation_Expected2 = |
| &(handler.GetTableParamByName(L"Validation.Expected2")->m_StringTable); |
| |
| LPCWSTR Validation_Type = handler.GetTableParamByName(L"Validation.Type")->m_str; |
| double Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_double; |
| size_t count = Validation_Input1->size(); |
| |
| using namespace hlsl::DXIL; |
| Float32DenormMode mode = Float32DenormMode::Any; |
| if (strcmp(Arguments.m_psz, "-denorm preserve") == 0) { |
| mode = Float32DenormMode::Preserve; |
| } |
| else if (strcmp(Arguments.m_psz, "-denorm ftz") == 0) { |
| mode = Float32DenormMode::FTZ; |
| } |
| if (mode == Float32DenormMode::Any) { |
| DXASSERT(Validation_Expected2->size() == Validation_Expected1->size(), |
| "must have same number of expected values"); |
| } |
| |
| #if defined(_M_ARM64) || defined(_M_ARM64EC) |
| if ((GetTestParamUseWARP(UseWarpByDefault()) || IsDeviceBasicAdapter(pDevice)) && mode == Float32DenormMode::Preserve) { |
| WEX::Logging::Log::Comment(L"WARP has an issue with DenormBinaryFloatOpTest with '-denorm preserve' on ARM64."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| #endif // defined(_M_ARM64) || defined(_M_ARM64EC) |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "BinaryFPOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SBinaryFPOp")); |
| size_t size = sizeof(SBinaryFPOp) * count; |
| Data.resize(size); |
| SBinaryFPOp *pPrimitives = (SBinaryFPOp *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| SBinaryFPOp *p = &pPrimitives[i]; |
| PCWSTR str1 = (*Validation_Input1)[i % Validation_Input1->size()]; |
| PCWSTR str2 = (*Validation_Input2)[i % Validation_Input2->size()]; |
| float val1, val2; |
| VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1)); |
| VERIFY_SUCCEEDED(ParseDataToFloat(str2, val2)); |
| p->input1 = val1; |
| p->input2 = val2; |
| } |
| |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| pShaderOp->Shaders.at(0).Arguments = Arguments.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("SBinaryFPOp", &data); |
| |
| SBinaryFPOp *pPrimitives = (SBinaryFPOp *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| |
| for (unsigned i = 0; i < count; ++i) { |
| SBinaryFPOp *p = &pPrimitives[i]; |
| if (mode == Float32DenormMode::Any) { |
| LPCWSTR str1 = (*Validation_Expected1)[i % Validation_Expected1->size()]; |
| LPCWSTR str2 = (*Validation_Expected2)[i % Validation_Expected2->size()]; |
| float val1; |
| float val2; |
| VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1)); |
| VERIFY_SUCCEEDED(ParseDataToFloat(str2, val2)); |
| LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, output = " |
| L"%6.8f, expected = %6.8f(%x) or %6.8f(%x)", |
| i, p->input1, p->input2, p->output1, val1, *(int *)&val1, val2, *(int *)&val2); |
| VERIFY_IS_TRUE( |
| CompareOutputWithExpectedValueFloat( |
| p->output1, val1, Validation_Type, Validation_Tolerance, mode) || |
| CompareOutputWithExpectedValueFloat( |
| p->output1, val2, Validation_Type, Validation_Tolerance, mode)); |
| } |
| else { |
| LPCWSTR str1 = (*Validation_Expected1)[i % Validation_Expected1->size()]; |
| float val1; |
| VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1)); |
| LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, output = " |
| L"%6.8f, expected = %6.8f(%a)", |
| i, p->input1, p->input2, p->output1, val1, *(int *)&val1); |
| VerifyOutputWithExpectedValueFloat(p->output1, val1, Validation_Type, |
| Validation_Tolerance, mode); |
| } |
| } |
| } |
| |
| TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { |
| return; |
| } |
| |
| // Read data from the table |
| int tableSize = sizeof(DenormTertiaryFPOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(DenormTertiaryFPOpParameters, tableSize); |
| |
| CW2A Target(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| CW2A Arguments(handler.GetTableParamByName(L"ShaderOp.Arguments")->m_str); |
| |
| std::vector<WEX::Common::String> *Validation_Input1 = |
| &(handler.GetTableParamByName(L"Validation.Input1")->m_StringTable); |
| std::vector<WEX::Common::String> *Validation_Input2 = |
| &(handler.GetTableParamByName(L"Validation.Input2")->m_StringTable); |
| std::vector<WEX::Common::String> *Validation_Input3 = |
| &(handler.GetTableParamByName(L"Validation.Input3")->m_StringTable); |
| |
| std::vector<WEX::Common::String> *Validation_Expected1 = |
| &(handler.GetTableParamByName(L"Validation.Expected1")->m_StringTable); |
| |
| // two expected outputs for any mode |
| std::vector<WEX::Common::String> *Validation_Expected2 = |
| &(handler.GetTableParamByName(L"Validation.Expected2")->m_StringTable); |
| LPCWSTR Validation_Type = handler.GetTableParamByName(L"Validation.Type")->m_str; |
| double Validation_Tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_double; |
| size_t count = Validation_Input1->size(); |
| |
| using namespace hlsl::DXIL; |
| Float32DenormMode mode = Float32DenormMode::Any; |
| if (strcmp(Arguments.m_psz, "-denorm preserve") == 0) { |
| mode = Float32DenormMode::Preserve; |
| } |
| else if (strcmp(Arguments.m_psz, "-denorm ftz") == 0) { |
| mode = Float32DenormMode::FTZ; |
| } |
| if (mode == Float32DenormMode::Any) { |
| DXASSERT(Validation_Expected2->size() == Validation_Expected1->size(), |
| "must have same number of expected values"); |
| } |
| |
| #if defined(_M_ARM64) || defined(_M_ARM64EC) |
| if ((GetTestParamUseWARP(UseWarpByDefault()) || IsDeviceBasicAdapter(pDevice)) && mode == Float32DenormMode::Preserve) { |
| WEX::Logging::Log::Comment(L"WARP has an issue with DenormTertiaryFloatOpTest with '-denorm preserve' on ARM64."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| #endif // defined(_M_ARM64) || defined(_M_ARM64EC) |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "TertiaryFPOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "STertiaryFPOp")); |
| size_t size = sizeof(STertiaryFPOp) * count; |
| Data.resize(size); |
| STertiaryFPOp *pPrimitives = (STertiaryFPOp *)Data.data(); |
| for (size_t i = 0; i < count; ++i) { |
| STertiaryFPOp *p = &pPrimitives[i]; |
| PCWSTR str1 = (*Validation_Input1)[i % Validation_Input1->size()]; |
| PCWSTR str2 = (*Validation_Input2)[i % Validation_Input2->size()]; |
| PCWSTR str3 = (*Validation_Input3)[i % Validation_Input3->size()]; |
| float val1, val2, val3; |
| VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1)); |
| VERIFY_SUCCEEDED(ParseDataToFloat(str2, val2)); |
| VERIFY_SUCCEEDED(ParseDataToFloat(str3, val3)); |
| p->input1 = val1; |
| p->input2 = val2; |
| p->input3 = val3; |
| } |
| |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = Target.m_psz; |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| pShaderOp->Shaders.at(0).Arguments = Arguments.m_psz; |
| }); |
| |
| MappedData data; |
| test->Test->GetReadBackData("STertiaryFPOp", &data); |
| |
| STertiaryFPOp *pPrimitives = (STertiaryFPOp *)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| |
| for (unsigned i = 0; i < count; ++i) { |
| STertiaryFPOp *p = &pPrimitives[i]; |
| if (mode == Float32DenormMode::Any) { |
| LPCWSTR str1 = (*Validation_Expected1)[i % Validation_Expected1->size()]; |
| LPCWSTR str2 = (*Validation_Expected2)[i % Validation_Expected2->size()]; |
| float val1; |
| float val2; |
| VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1)); |
| VERIFY_SUCCEEDED(ParseDataToFloat(str2, val2)); |
| LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, input3 = %6.8f, output = " |
| L"%6.8f, expected = %6.8f(%x) or %6.8f(%x)", |
| i, p->input1, p->input2, p->input3, p->output, val1, *(int *)&val1, val2, *(int *)&val2); |
| VERIFY_IS_TRUE( |
| CompareOutputWithExpectedValueFloat( |
| p->output, val1, Validation_Type, Validation_Tolerance, mode) || |
| CompareOutputWithExpectedValueFloat( |
| p->output, val2, Validation_Type, Validation_Tolerance, mode)); |
| } |
| else { |
| LPCWSTR str1 = (*Validation_Expected1)[i % Validation_Expected1->size()]; |
| float val1; |
| VERIFY_SUCCEEDED(ParseDataToFloat(str1, val1)); |
| LogCommentFmt(L"element #%u, input1 = %6.8f, input2 = %6.8f, input3 = %6.8f, output = " |
| L"%6.8f, expected = %6.8f(%a)", |
| i, p->input1, p->input2, p->input3, p->output, val1, *(int *)&val1); |
| VerifyOutputWithExpectedValueFloat(p->output, val1, Validation_Type, |
| Validation_Tolerance, mode); |
| } |
| } |
| } |
| |
| // Setup for wave intrinsics tests |
| enum class ShaderOpKind { |
| WaveSum, |
| WaveProduct, |
| WaveActiveMax, |
| WaveActiveMin, |
| WaveCountBits, |
| WaveActiveAllEqual, |
| WaveActiveAnyTrue, |
| WaveActiveAllTrue, |
| WaveActiveBitOr, |
| WaveActiveBitAnd, |
| WaveActiveBitXor, |
| ShaderOpInvalid |
| }; |
| |
| struct ShaderOpKindPair { |
| LPCWSTR name; |
| ShaderOpKind kind; |
| }; |
| |
| static ShaderOpKindPair ShaderOpKindTable[] = { |
| { L"WaveActiveSum", ShaderOpKind::WaveSum }, |
| { L"WaveActiveUSum", ShaderOpKind::WaveSum }, |
| { L"WaveActiveProduct", ShaderOpKind::WaveProduct }, |
| { L"WaveActiveUProduct", ShaderOpKind::WaveProduct }, |
| { L"WaveActiveMax", ShaderOpKind::WaveActiveMax }, |
| { L"WaveActiveUMax", ShaderOpKind::WaveActiveMax }, |
| { L"WaveActiveMin", ShaderOpKind::WaveActiveMin }, |
| { L"WaveActiveUMin", ShaderOpKind::WaveActiveMin }, |
| { L"WaveActiveCountBits", ShaderOpKind::WaveCountBits }, |
| { L"WaveActiveAllEqual", ShaderOpKind::WaveActiveAllEqual }, |
| { L"WaveActiveAnyTrue", ShaderOpKind::WaveActiveAnyTrue }, |
| { L"WaveActiveAllTrue", ShaderOpKind::WaveActiveAllTrue }, |
| { L"WaveActiveBitOr", ShaderOpKind::WaveActiveBitOr }, |
| { L"WaveActiveBitAnd", ShaderOpKind::WaveActiveBitAnd }, |
| { L"WaveActiveBitXor", ShaderOpKind::WaveActiveBitXor }, |
| { L"WavePrefixSum", ShaderOpKind::WaveSum }, |
| { L"WavePrefixUSum", ShaderOpKind::WaveSum }, |
| { L"WavePrefixProduct", ShaderOpKind::WaveProduct }, |
| { L"WavePrefixUProduct", ShaderOpKind::WaveProduct }, |
| { L"WavePrefixMax", ShaderOpKind::WaveActiveMax }, |
| { L"WavePrefixUMax", ShaderOpKind::WaveActiveMax }, |
| { L"WavePrefixMin", ShaderOpKind::WaveActiveMin }, |
| { L"WavePrefixUMin", ShaderOpKind::WaveActiveMin }, |
| { L"WavePrefixCountBits", ShaderOpKind::WaveCountBits } |
| }; |
| |
| ShaderOpKind GetShaderOpKind(LPCWSTR str) { |
| for (size_t i = 0; i < sizeof(ShaderOpKindTable)/sizeof(ShaderOpKindPair); ++i) { |
| if (_wcsicmp(ShaderOpKindTable[i].name, str) == 0) { |
| return ShaderOpKindTable[i].kind; |
| } |
| } |
| DXASSERT_ARGS(false, "Invalid ShaderOp name: %s", str); |
| return ShaderOpKind::ShaderOpInvalid; |
| } |
| |
| template <typename InType, typename OutType, ShaderOpKind kind> |
| struct computeExpected { |
| OutType operator()(const std::vector<InType> &inputs, |
| const std::vector<int> &masks, int maskValue, |
| unsigned int index) { |
| return 0; |
| } |
| }; |
| |
| template <typename InType, typename OutType> |
| struct computeExpected<InType, OutType, ShaderOpKind::WaveSum> { |
| OutType operator()(const std::vector<InType> &inputs, |
| const std::vector<int> &masks, int maskValue, |
| unsigned int index) { |
| OutType sum = 0; |
| for (size_t i = 0; i < index; ++i) { |
| if (masks.at(i) == maskValue) { |
| sum += inputs.at(i); |
| } |
| } |
| return sum; |
| } |
| }; |
| |
| template <typename InType, typename OutType> |
| struct computeExpected<InType, OutType, ShaderOpKind::WaveProduct> { |
| OutType operator()(const std::vector<InType> &inputs, |
| const std::vector<int> &masks, int maskValue, |
| unsigned int index) { |
| OutType prod = 1; |
| for (size_t i = 0; i < index; ++i) { |
| if (masks.at(i) == maskValue) { |
| prod *= inputs.at(i); |
| } |
| } |
| return prod; |
| } |
| }; |
| |
| template <typename InType, typename OutType> |
| struct computeExpected<InType, OutType, ShaderOpKind::WaveActiveMax> { |
| OutType operator()(const std::vector<InType> &inputs, |
| const std::vector<int> &masks, int maskValue, |
| unsigned int index) { |
| OutType maximum = std::numeric_limits<OutType>::min(); |
| for (size_t i = 0; i < index; ++i) { |
| if (masks.at(i) == maskValue && inputs.at(i) > maximum) |
| maximum = inputs.at(i); |
| } |
| return maximum; |
| } |
| }; |
| |
| template <typename InType, typename OutType> |
| struct computeExpected<InType, OutType, ShaderOpKind::WaveActiveMin> { |
| OutType operator()(const std::vector<InType> &inputs, |
| const std::vector<int> &masks, int maskValue, |
| unsigned int index) { |
| OutType minimum = std::numeric_limits<OutType>::max(); |
| for (size_t i = 0; i < index; ++i) { |
| if (masks.at(i) == maskValue && inputs.at(i) < minimum) |
| minimum = inputs.at(i); |
| } |
| return minimum; |
| } |
| }; |
| |
| template <typename InType, typename OutType> |
| struct computeExpected<InType, OutType, ShaderOpKind::WaveCountBits> { |
| OutType operator()(const std::vector<InType> &inputs, |
| const std::vector<int> &masks, int maskValue, |
| unsigned int index) { |
| OutType count = 0; |
| for (size_t i = 0; i < index; ++i) { |
| if (masks.at(i) == maskValue && inputs.at(i) > 3) { |
| count++; |
| } |
| } |
| return count; |
| } |
| }; |
| |
| // In HLSL, boolean is represented in a 4 byte (uint32) format, |
| // So we cannot use c++ bool type to represent bool in HLSL |
| // HLSL returns 0 for false and 1 for true |
| template <typename InType, typename OutType> |
| struct computeExpected<InType, OutType, ShaderOpKind::WaveActiveAnyTrue> { |
| OutType operator()(const std::vector<InType> &inputs, |
| const std::vector<int> &masks, int maskValue, |
| unsigned int index) { |
| for (size_t i = 0; i < index; ++i) { |
| if (masks.at(i) == maskValue && inputs.at(i) != 0) { |
| return 1; |
| } |
| } |
| return 0; |
| } |
| }; |
| |
| template <typename InType, typename OutType> |
| struct computeExpected<InType, OutType, ShaderOpKind::WaveActiveAllTrue> { |
| OutType operator()(const std::vector<InType> &inputs, |
| const std::vector<int> &masks, int maskValue, |
| unsigned int index) { |
| for (size_t i = 0; i < index; ++i) { |
| if (masks.at(i) == maskValue && inputs.at(i) == 0) { |
| return 0; |
| } |
| } |
| return 1; |
| } |
| }; |
| |
| template <typename InType, typename OutType> |
| struct computeExpected<InType, OutType, ShaderOpKind::WaveActiveAllEqual> { |
| OutType operator()(const std::vector<InType> &inputs, |
| const std::vector<int> &masks, int maskValue, |
| unsigned int index) { |
| const InType *val = nullptr; |
| for (size_t i = 0; i < index; ++i) { |
| if (masks.at(i) == maskValue) { |
| if (val && *val != inputs.at(i)) { |
| return 0; |
| } |
| val = &inputs.at(i); |
| } |
| } |
| return 1; |
| } |
| }; |
| |
| template <typename InType, typename OutType> |
| struct computeExpected<InType, OutType, ShaderOpKind::WaveActiveBitOr> { |
| OutType operator()(const std::vector<InType> &inputs, |
| const std::vector<int> &masks, int maskValue, |
| unsigned int index) { |
| OutType bits = 0x00000000; |
| for (size_t i = 0; i < index; ++i) { |
| if (masks.at(i) == maskValue) { |
| bits |= inputs.at(i); |
| } |
| } |
| return bits; |
| } |
| }; |
| |
| template <typename InType, typename OutType> |
| struct computeExpected<InType, OutType, ShaderOpKind::WaveActiveBitAnd> { |
| OutType operator()(const std::vector<InType> &inputs, |
| const std::vector<int> &masks, int maskValue, |
| unsigned int index) { |
| OutType bits = 0xffffffff; |
| for (size_t i = 0; i < index; ++i) { |
| if (masks.at(i) == maskValue) { |
| bits &= inputs.at(i); |
| } |
| } |
| return bits; |
| } |
| }; |
| |
| template <typename InType, typename OutType> |
| struct computeExpected<InType, OutType, ShaderOpKind::WaveActiveBitXor> { |
| OutType operator()(const std::vector<InType> &inputs, |
| const std::vector<int> &masks, int maskValue, |
| unsigned int index) { |
| OutType bits = 0x00000000; |
| for (size_t i = 0; i < index; ++i) { |
| if (masks.at(i) == maskValue) { |
| bits ^= inputs.at(i); |
| } |
| } |
| return bits; |
| } |
| }; |
| |
| // Mask functions used to control active lanes |
| static int MaskAll(int i) { |
| UNREFERENCED_PARAMETER(i); |
| return 1; |
| } |
| |
| static int MaskEveryOther(int i) { |
| return i % 2 == 0 ? 1 : 0; |
| } |
| |
| static int MaskEveryThird(int i) { |
| return i % 3 == 0 ? 1 : 0; |
| } |
| |
| typedef int(*MaskFunction)(int); |
| static MaskFunction MaskFunctionTable[] = { |
| MaskAll, MaskEveryOther, MaskEveryThird |
| }; |
| |
| template <typename InType, typename OutType> |
| static OutType computeExpectedWithShaderOp(const std::vector<InType> &inputs, |
| const std::vector<int> &masks, |
| int maskValue, unsigned int index, |
| LPCWSTR str) { |
| ShaderOpKind kind = GetShaderOpKind(str); |
| switch (kind) { |
| case ShaderOpKind::WaveSum: |
| return computeExpected<InType, OutType, ShaderOpKind::WaveSum>()(inputs, masks, maskValue, index); |
| case ShaderOpKind::WaveProduct: |
| return computeExpected<InType, OutType, ShaderOpKind::WaveProduct>()(inputs, masks, maskValue, index); |
| case ShaderOpKind::WaveActiveMax: |
| return computeExpected<InType, OutType, ShaderOpKind::WaveActiveMax>()(inputs, masks, maskValue, index); |
| case ShaderOpKind::WaveActiveMin: |
| return computeExpected<InType, OutType, ShaderOpKind::WaveActiveMin>()(inputs, masks, maskValue, index); |
| case ShaderOpKind::WaveCountBits: |
| return computeExpected<InType, OutType, ShaderOpKind::WaveCountBits>()(inputs, masks, maskValue, index); |
| case ShaderOpKind::WaveActiveBitOr: |
| return computeExpected<InType, OutType, ShaderOpKind::WaveActiveBitOr>()(inputs, masks, maskValue, index); |
| case ShaderOpKind::WaveActiveBitAnd: |
| return computeExpected<InType, OutType, ShaderOpKind::WaveActiveBitAnd>()(inputs, masks, maskValue, index); |
| case ShaderOpKind::WaveActiveBitXor: |
| return computeExpected<InType, OutType, ShaderOpKind::WaveActiveBitXor>()(inputs, masks, maskValue, index); |
| case ShaderOpKind::WaveActiveAnyTrue: |
| return computeExpected<InType, OutType, ShaderOpKind::WaveActiveAnyTrue>()(inputs, masks, maskValue, index); |
| case ShaderOpKind::WaveActiveAllTrue: |
| return computeExpected<InType, OutType, ShaderOpKind::WaveActiveAllTrue>()(inputs, masks, maskValue, index); |
| case ShaderOpKind::WaveActiveAllEqual: |
| return computeExpected<InType, OutType, ShaderOpKind::WaveActiveAllEqual>()(inputs, masks, maskValue, index); |
| default: |
| DXASSERT_ARGS(false, "Invalid ShaderOp Name: %s", str); |
| return (OutType) 0; |
| } |
| }; |
| |
| // A framework for testing individual wave intrinsics tests. |
| // This test case is assuming that functions 1) WaveIsFirstLane and 2) WaveGetLaneIndex are correct for all lanes. |
| template <class T1, class T2> |
| void ExecutionTest::WaveIntrinsicsActivePrefixTest( |
| TableParameter *pParameterList, size_t numParameter, bool isPrefix) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| |
| // Resource representation for compute shader |
| // firstLaneId is used to group different waves |
| // laneIndex is used to identify lane within the wave. |
| // Lane ids are not necessarily in same order as thread ids. |
| struct PerThreadData { |
| unsigned firstLaneId; |
| unsigned laneIndex; |
| int mask; |
| T1 input; |
| T2 output; |
| }; |
| |
| unsigned int NumThreadsX = 8; |
| unsigned int NumThreadsY = 12; |
| unsigned int NumThreadsZ = 1; |
| |
| static const unsigned int ThreadsPerGroup = NumThreadsX * NumThreadsY * NumThreadsZ; |
| static const unsigned int DispatchGroupCount = 1; |
| static const unsigned int ThreadCount = ThreadsPerGroup * DispatchGroupCount; |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) { |
| return; |
| } |
| if (!DoesDeviceSupportWaveOps(pDevice)) { |
| // Optional feature, so it's correct to not support it if declared as such. |
| WEX::Logging::Log::Comment(L"Device does not support wave operations."); |
| return; |
| } |
| |
| TableParameterHandler handler(pParameterList, numParameter); |
| |
| unsigned int numInputSet = handler.GetTableParamByName(L"Validation.NumInputSet")->m_uint; |
| |
| // Obtain the list of input lists |
| std::vector<std::vector<T1>*> InputDataList; |
| for (unsigned int i = 0; |
| i < numInputSet; ++i) { |
| std::wstring inputName = L"Validation.InputSet"; |
| inputName.append(std::to_wstring(i + 1)); |
| InputDataList.push_back(handler.GetDataArray<T1>(inputName.data())); |
| } |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.text")->m_str); |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| |
| // Running compute shader for each input set with different masks |
| for (size_t setIndex = 0; setIndex < numInputSet; ++setIndex) { |
| for (size_t maskIndex = 0; maskIndex < sizeof(MaskFunctionTable) / sizeof(MaskFunction); ++maskIndex) { |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse( |
| pDevice, m_support, "WaveIntrinsicsOp", |
| // this callbacked is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "SWaveIntrinsicsOp")); |
| size_t size = sizeof(PerThreadData) * ThreadCount; |
| Data.resize(size); |
| PerThreadData *pPrimitives = (PerThreadData*)Data.data(); |
| // 4 different inputs for each operation test |
| size_t index = 0; |
| std::vector<T1> *IntList = InputDataList[setIndex]; |
| while (index < ThreadCount) { |
| PerThreadData *p = &pPrimitives[index]; |
| p->firstLaneId = 0xFFFFBFFF; |
| p->laneIndex = 0xFFFFBFFF; |
| p->mask = MaskFunctionTable[maskIndex]((int)index); |
| p->input = (*IntList)[index % IntList->size()]; |
| p->output = 0xFFFFBFFF; |
| index++; |
| } |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| }, ShaderOpSet); |
| |
| // Check the value |
| MappedData data; |
| test->Test->GetReadBackData("SWaveIntrinsicsOp", &data); |
| |
| PerThreadData *pPrimitives = (PerThreadData*)data.data(); |
| WEX::TestExecution::DisableVerifyExceptions dve; |
| |
| // Grouping data by waves |
| std::vector<int> firstLaneIds; |
| for (size_t i = 0; i < ThreadCount; ++i) { |
| PerThreadData *p = &pPrimitives[i]; |
| int firstLaneId = p->firstLaneId; |
| if (!contains(firstLaneIds, firstLaneId)) { |
| firstLaneIds.push_back(firstLaneId); |
| } |
| } |
| |
| std::map<int, std::unique_ptr<std::vector<PerThreadData *>>> waves; |
| for (size_t i = 0; i < firstLaneIds.size(); ++i) { |
| waves[firstLaneIds.at(i)] = std::make_unique<std::vector<PerThreadData*>>(); |
| } |
| |
| for (size_t i = 0; i < ThreadCount; ++i) { |
| PerThreadData *p = &pPrimitives[i]; |
| waves[p->firstLaneId].get()->push_back(p); |
| } |
| |
| // validate for each wave |
| for (size_t i = 0; i < firstLaneIds.size(); ++i) { |
| // collect inputs and masks for a given wave |
| std::vector<PerThreadData *> *waveData = waves[firstLaneIds.at(i)].get(); |
| std::vector<T1> inputList(waveData->size()); |
| std::vector<int> maskList(waveData->size(), -1); |
| std::vector<T2> outputList(waveData->size()); |
| // sort inputList and masklist by lane id. input for each lane can be computed for its group index |
| for (size_t j = 0, end = waveData->size(); j < end; ++j) { |
| unsigned laneID = waveData->at(j)->laneIndex; |
| // ensure that each lane ID is unique and within the range |
| VERIFY_IS_TRUE(0 <= laneID && laneID < waveData->size()); |
| VERIFY_IS_TRUE(maskList.at(laneID) == -1); |
| maskList.at(laneID) = waveData->at(j)->mask; |
| inputList.at(laneID) = waveData->at(j)->input; |
| outputList.at(laneID) = waveData->at(j)->output; |
| } |
| std::wstring inputStr = L"Wave Inputs: "; |
| std::wstring maskStr = L"Wave Masks: "; |
| std::wstring outputStr = L"Wave Outputs: "; |
| // append input string and mask string in lane id order |
| for (size_t j = 0, end = waveData->size(); j < end; ++j) { |
| maskStr.append(std::to_wstring(maskList.at(j))); |
| maskStr.append(L" "); |
| inputStr.append(std::to_wstring(inputList.at(j))); |
| inputStr.append(L" "); |
| outputStr.append(std::to_wstring(outputList.at(j))); |
| outputStr.append(L" "); |
| } |
| |
| LogCommentFmt(inputStr.data()); |
| LogCommentFmt(maskStr.data()); |
| LogCommentFmt(outputStr.data()); |
| LogCommentFmt(L"\n"); |
| // Compute expected output for a given inputs, masks, and index |
| for (size_t laneIndex = 0, laneEnd = inputList.size(); laneIndex < laneEnd; ++laneIndex) { |
| T2 expected; |
| // WaveActive is equivalent to WavePrefix lane # lane count |
| unsigned index = isPrefix ? (unsigned)laneIndex : (unsigned)inputList.size(); |
| if (maskList.at(laneIndex) == 1) { |
| expected = computeExpectedWithShaderOp<T1, T2>( |
| inputList, maskList, 1, index, |
| handler.GetTableParamByName(L"ShaderOp.Name")->m_str); |
| } |
| else { |
| expected = computeExpectedWithShaderOp<T1, T2>( |
| inputList, maskList, 0, index, |
| handler.GetTableParamByName(L"ShaderOp.Name")->m_str); |
| } |
| // TODO: use different comparison for floating point inputs |
| bool equal = outputList.at(laneIndex) == expected; |
| if (!equal) { |
| LogCommentFmt(L"lane%d: %4d, Expected : %4d", laneIndex, outputList.at(laneIndex), expected); |
| } |
| VERIFY_IS_TRUE(equal); |
| } |
| } |
| } |
| } |
| } |
| |
| static const unsigned int MinWarpVersionForWaveIntrinsics = 16202; |
| |
| TEST_F(ExecutionTest, WaveIntrinsicsActiveIntTest) { |
| if (GetTestParamUseWARP(true) && |
| !IsValidWarpDllVersion(MinWarpVersionForWaveIntrinsics)) { |
| return; |
| } |
| WaveIntrinsicsActivePrefixTest<int, int>( |
| WaveIntrinsicsActiveIntParameters, |
| sizeof(WaveIntrinsicsActiveIntParameters) / sizeof(TableParameter), |
| /*isPrefix*/ false); |
| } |
| |
| TEST_F(ExecutionTest, WaveIntrinsicsActiveUintTest) { |
| if (GetTestParamUseWARP(true) && |
| !IsValidWarpDllVersion(MinWarpVersionForWaveIntrinsics)) { |
| return; |
| } |
| WaveIntrinsicsActivePrefixTest<unsigned int, unsigned int>( |
| WaveIntrinsicsActiveUintParameters, |
| sizeof(WaveIntrinsicsActiveUintParameters) / sizeof(TableParameter), |
| /*isPrefix*/ false); |
| } |
| |
| TEST_F(ExecutionTest, WaveIntrinsicsPrefixIntTest) { |
| if (GetTestParamUseWARP(true) && |
| !IsValidWarpDllVersion(MinWarpVersionForWaveIntrinsics)) { |
| return; |
| } |
| WaveIntrinsicsActivePrefixTest<int, int>( |
| WaveIntrinsicsPrefixIntParameters, |
| sizeof(WaveIntrinsicsPrefixIntParameters) / sizeof(TableParameter), |
| /*isPrefix*/ true); |
| } |
| |
| TEST_F(ExecutionTest, WaveIntrinsicsPrefixUintTest) { |
| if (GetTestParamUseWARP(true) && |
| !IsValidWarpDllVersion(MinWarpVersionForWaveIntrinsics)) { |
| return; |
| } |
| WaveIntrinsicsActivePrefixTest<unsigned int, unsigned int>( |
| WaveIntrinsicsPrefixUintParameters, |
| sizeof(WaveIntrinsicsPrefixUintParameters) / sizeof(TableParameter), |
| /*isPrefix*/ true); |
| } |
| |
| template <typename T> |
| static T GetWaveMultiPrefixInitialAccumValue(LPCWSTR testName) { |
| if (_wcsicmp(testName, L"WaveMultiPrefixProduct") == 0 || |
| _wcsicmp(testName, L"WaveMultiPrefixUProduct") == 0) { |
| return static_cast<T>(1); |
| } else if (_wcsicmp(testName, L"WaveMultiPrefixSum") == 0 || |
| _wcsicmp(testName, L"WaveMultiPrefixUSum") == 0 || |
| _wcsicmp(testName, L"WaveMultiPrefixBitOr") == 0 || |
| _wcsicmp(testName, L"WaveMultiPrefixUBitOr") == 0 || |
| _wcsicmp(testName, L"WaveMultiPrefixBitXor") == 0 || |
| _wcsicmp(testName, L"WaveMultiPrefixUBitXor") == 0 || |
| _wcsicmp(testName, L"WaveMultiPrefixCountBits") == 0 || |
| _wcsicmp(testName, L"WaveMultiPrefixUCountBits") == 0) { |
| return static_cast<T>(0); |
| } else if (_wcsicmp(testName, L"WaveMultiPrefixBitAnd") == 0 || |
| _wcsicmp(testName, L"WaveMultiPrefixUBitAnd") == 0) { |
| return static_cast<T>(-1); |
| } else { |
| return static_cast<T>(0); |
| } |
| } |
| |
| template <typename T> |
| std::function<T(T, T)> GetWaveMultiPrefixReferenceFunction(LPCWSTR testName) { |
| if (_wcsicmp(testName, L"WaveMultiPrefixProduct") == 0 || |
| _wcsicmp(testName, L"WaveMultiPrefixUProduct") == 0) { |
| return [] (T lhs, T rhs) -> T { return lhs * rhs; }; |
| } else if (_wcsicmp(testName, L"WaveMultiPrefixSum") == 0 || |
| _wcsicmp(testName, L"WaveMultiPrefixUSum") == 0) { |
| return [] (T lhs, T rhs) -> T { return lhs + rhs; }; |
| } else if (_wcsicmp(testName, L"WaveMultiPrefixBitAnd") == 0 || |
| _wcsicmp(testName, L"WaveMultiPrefixUBitAnd") == 0) { |
| return [] (T lhs, T rhs) -> T { return lhs & rhs; }; |
| } else if (_wcsicmp(testName, L"WaveMultiPrefixBitOr") == 0 || |
| _wcsicmp(testName, L"WaveMultiPrefixUBitOr") == 0) { |
| return [] (T lhs, T rhs) -> T { return lhs | rhs; }; |
| } else if (_wcsicmp(testName, L"WaveMultiPrefixBitXor") == 0 || |
| _wcsicmp(testName, L"WaveMultiPrefixUBitXor") == 0) { |
| return [] (T lhs, T rhs) -> T { return lhs ^ rhs; }; |
| } else if (_wcsicmp(testName, L"WaveMultiPrefixCountBits") == 0 || |
| _wcsicmp(testName, L"WaveMultiPrefixUCountBits") == 0) { |
| // For CountBits, each lane contributes a boolean value. The test input is |
| // a zero or non-zero integer. If the input is a non-zero value then the |
| // condition is true, thus we contribute one to the bit count. |
| return [] (T lhs, T rhs) -> T { return lhs + (rhs ? 1 : 0); }; |
| } else { |
| return [] (T lhs, T rhs) -> T { UNREFERENCED_PARAMETER(lhs); UNREFERENCED_PARAMETER(rhs); return 0; }; |
| } |
| } |
| |
| template <class T> |
| void |
| ExecutionTest::WaveIntrinsicsMultiPrefixOpTest(TableParameter *pParameterList, |
| size_t numParameters) { |
| WEX::TestExecution::SetVerifyOutput |
| verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| |
| struct PerThreadData { |
| uint32_t key; |
| uint32_t firstLaneId; |
| uint32_t laneId; |
| uint32_t mask; |
| T value; |
| T result; |
| }; |
| |
| constexpr size_t NumThreadsX = 8; |
| constexpr size_t NumThreadsY = 12; |
| constexpr size_t NumThreadsZ = 1; |
| |
| constexpr size_t ThreadsPerGroup = NumThreadsX * NumThreadsY * NumThreadsZ; |
| constexpr size_t DispatchGroupSize = 1; |
| constexpr size_t ThreadCount = ThreadsPerGroup * DispatchGroupSize; |
| |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_5)) { |
| return; |
| } |
| |
| if (!DoesDeviceSupportWaveOps(pDevice)) { |
| // Optional feature, so it's correct to not support it if declared as such. |
| WEX::Logging::Log::Comment(L"Device does not support wave operations."); |
| return; |
| } |
| |
| std::shared_ptr<st::ShaderOpSet> |
| ShaderOpSet = std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| |
| TableParameterHandler handler(pParameterList, numParameters); |
| CW2A shaderSource(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| CW2A shaderProfile(handler.GetTableParamByName(L"ShaderOp.Target")->m_str); |
| auto testName = handler.GetTableParamByName(L"ShaderOp.Name")->m_str; |
| |
| std::vector<T> *keys = handler.GetDataArray<T>(L"Validation.Keys"); |
| std::vector<T> *values = handler.GetDataArray<T>(L"Validation.Values"); |
| |
| for (size_t maskIndex = 0; maskIndex < _countof(MaskFunctionTable); ++maskIndex) { |
| std::shared_ptr<ShaderOpTestResult> test = |
| RunShaderOpTestAfterParse(pDevice, m_support, "WaveIntrinsicsOp", |
| [&] (LPCSTR name, std::vector<BYTE> &data, st::ShaderOp *pShaderOp) { |
| UNREFERENCED_PARAMETER(name); |
| |
| const size_t dataSize = sizeof(PerThreadData) * ThreadCount; |
| |
| data.resize(dataSize); |
| PerThreadData *pThreadData = reinterpret_cast<PerThreadData *>(data.data()); |
| |
| for (size_t i = 0; i != ThreadCount; ++i) { |
| pThreadData[i].key = keys->at(i % keys->size()); |
| pThreadData[i].value = values->at(i % values->size()); |
| pThreadData[i].firstLaneId = 0xdeadbeef; |
| pThreadData[i].laneId = 0xdeadbeef; |
| pThreadData[i].mask = MaskFunctionTable[maskIndex]((int)i); |
| pThreadData[i].result = 0xdeadbeef; |
| } |
| |
| pShaderOp->Shaders.at(0).Text = shaderSource; |
| pShaderOp->Shaders.at(0).Target = shaderProfile; |
| }, ShaderOpSet); |
| |
| MappedData mappedData; |
| test->Test->GetReadBackData("SWaveIntrinsicsOp", &mappedData); |
| PerThreadData *resultData = reinterpret_cast<PerThreadData *>(mappedData.data()); |
| |
| // Partition our data into waves |
| std::map<uint32_t, std::vector<PerThreadData *>> waves; |
| |
| for (size_t i = 0, e = ThreadCount; i != e; ++i) { |
| PerThreadData *elt = &resultData[i]; |
| |
| // Basic sanity checks |
| VERIFY_IS_TRUE(elt->firstLaneId != 0xdeadbeef); |
| VERIFY_IS_TRUE(elt->laneId != 0xdeadbeef); |
| |
| waves[elt->firstLaneId].push_back(elt); |
| } |
| |
| // Verify each wave |
| auto refFn = GetWaveMultiPrefixReferenceFunction<T>(testName); |
| |
| for (auto &w : waves) { |
| std::vector<PerThreadData *> &waveData = w.second; |
| |
| struct { |
| bool operator()(PerThreadData *a, PerThreadData *b) const { |
| return (a->laneId < b->laneId); |
| } |
| } compare; |
| // Need to sort based on the lane id |
| std::sort(waveData.begin(), waveData.end(), compare); |
| |
| LogCommentFmt(L"LaneId Mask Key Value Result Expected"); |
| LogCommentFmt(L"-------- -------- -------- -------- -------- --------"); |
| for (size_t i = 0, e = waveData.size(); i != e; ++i) { |
| PerThreadData *data = waveData[i]; |
| |
| // Compute prefix operation over each previous lane element that has the |
| // same key value, and is part of the same active thread group |
| T accum = GetWaveMultiPrefixInitialAccumValue<T>(testName); |
| for (unsigned j = 0; j < i; ++j) { |
| if (waveData[j]->key == data->key && waveData[j]->mask == data->mask) { |
| accum = refFn(accum, waveData[j]->value); |
| } |
| } |
| |
| LogCommentFmt(L"%08X %08X %08X %08X %08X %08X", data->laneId, data->mask, data->key, data->value, data->result, accum); |
| |
| VERIFY_IS_TRUE(accum == data->result); |
| } |
| LogCommentFmt(L"\n"); |
| } |
| } |
| } |
| |
| TEST_F(ExecutionTest, WaveIntrinsicsSM65IntTest) { |
| WaveIntrinsicsMultiPrefixOpTest<int>(WaveIntrinsicsMultiPrefixIntParameters, |
| _countof(WaveIntrinsicsMultiPrefixIntParameters)); |
| } |
| |
| TEST_F(ExecutionTest, WaveIntrinsicsSM65UintTest) { |
| WaveIntrinsicsMultiPrefixOpTest<unsigned>(WaveIntrinsicsMultiPrefixUintParameters, |
| _countof(WaveIntrinsicsMultiPrefixUintParameters)); |
| } |
| |
| TEST_F(ExecutionTest, CBufferTestHalf) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| // Single operation test at the moment. |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_2)) |
| return; |
| |
| if (!DoesDeviceSupportNative16bitOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support native 16-bit operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| uint16_t InputData[] = { 0x3F80, 0x3F00, 0x3D80, 0x7BFF }; |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(pDevice, m_support, pStream, "CBufferTestHalf", |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| UNREFERENCED_PARAMETER(pShaderOp); |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "CB0")); |
| // use shader from data table. |
| Data.resize(sizeof(InputData)); |
| uint16_t *pData = (uint16_t *)Data.data(); |
| for (size_t i = 0; i < 4; ++i, ++pData) { |
| *pData = InputData[i]; |
| } |
| }); |
| { |
| MappedData data; |
| test->Test->GetReadBackData("RTarget", &data); |
| const uint16_t *pPixels = (uint16_t *)data.data(); |
| |
| for (int i = 0; i < 4; ++i) { |
| uint16_t output = *(pPixels + i); |
| float outputFloat = ConvertFloat16ToFloat32(output); |
| float inputFloat = ConvertFloat16ToFloat32(InputData[i]); |
| LogCommentFmt(L"element #%u: input = %6.8f(0x%04x), output = %6.8f(0x%04x)", |
| i, inputFloat, InputData[i], outputFloat, output); |
| VERIFY_ARE_EQUAL(inputFloat, outputFloat); |
| } |
| } |
| } |
| |
| TEST_F(ExecutionTest, BarycentricsTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_1)) |
| return; |
| |
| if (!DoesDeviceSupportBarycentrics(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support barycentrics."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(pDevice, m_support, pStream, "Barycentrics", nullptr); |
| MappedData data; |
| D3D12_RESOURCE_DESC &D = test->ShaderOp->GetResourceByName("RTarget")->Desc; |
| UINT width = (UINT)D.Width; |
| UINT height = D.Height; |
| UINT pixelSize = GetByteSizeForFormat(D.Format); |
| |
| test->Test->GetReadBackData("RTarget", &data); |
| //const uint8_t *pPixels = (uint8_t *)data.data(); |
| const float *pPixels = (float *)data.data(); |
| // Get the vertex of barycentric coordinate using VBuffer |
| MappedData triangleData; |
| test->Test->GetReadBackData("VBuffer", &triangleData); |
| const float *pTriangleData = (float*)triangleData.data(); |
| // get the size of the input data |
| unsigned triangleVertexSizeInFloat = 0; |
| for (auto element : test->ShaderOp->InputElements) |
| triangleVertexSizeInFloat += GetByteSizeForFormat(element.Format) / 4; |
| |
| XMFLOAT2 p0(pTriangleData[0], pTriangleData[1]); |
| XMFLOAT2 p1(pTriangleData[triangleVertexSizeInFloat], pTriangleData[triangleVertexSizeInFloat + 1]); |
| XMFLOAT2 p2(pTriangleData[triangleVertexSizeInFloat * 2], pTriangleData[triangleVertexSizeInFloat * 2 + 1]); |
| |
| XMFLOAT3 barycentricWeights[4] = { |
| XMFLOAT3(0.3333f, 0.3333f, 0.3333f), |
| XMFLOAT3(0.5f, 0.25f, 0.25f), |
| XMFLOAT3(0.25f, 0.5f, 0.25f), |
| XMFLOAT3(0.25f, 0.25f, 0.50f) |
| }; |
| |
| float tolerance = 0.001f; |
| for (unsigned i = 0; i < sizeof(barycentricWeights) / sizeof(XMFLOAT3); ++i) { |
| float w0 = barycentricWeights[i].x; |
| float w1 = barycentricWeights[i].y; |
| float w2 = barycentricWeights[i].z; |
| float x1 = w0 * p0.x + w1 * p1.x + w2 * p2.x; |
| float y1 = w0 * p0.y + w1 * p1.y + w2 * p2.y; |
| // map from x1 y1 to rtv pixels |
| int pixelX = (int)((x1 + 1) * (width - 1) / 2); |
| int pixelY = (int)((1 - y1) * (height - 1) / 2); |
| int offset = pixelSize * (pixelX + pixelY * width) / sizeof(pPixels[0]); |
| LogCommentFmt(L"location %u %u, value %f, %f, %f", pixelX, pixelY, pPixels[offset], pPixels[offset + 1], pPixels[offset + 2]); |
| VERIFY_IS_TRUE(CompareFloatEpsilon(pPixels[offset], w0, tolerance)); |
| VERIFY_IS_TRUE(CompareFloatEpsilon(pPixels[offset + 1], w1, tolerance)); |
| VERIFY_IS_TRUE(CompareFloatEpsilon(pPixels[offset + 2], w2, tolerance)); |
| } |
| //SavePixelsToFile(pPixels, DXGI_FORMAT_R32G32B32A32_FLOAT, width, height, L"barycentric.bmp"); |
| } |
| |
| static const char RawBufferTestShaderDeclarations[] = |
| "// Note: COMPONENT_TYPE and COMPONENT_SIZE will be defined via compiler option -D\r\n" |
| "typedef COMPONENT_TYPE scalar; \r\n" |
| "typedef vector<COMPONENT_TYPE, 2> vector2; \r\n" |
| "typedef vector<COMPONENT_TYPE, 3> vector3; \r\n" |
| "typedef vector<COMPONENT_TYPE, 4> vector4; \r\n" |
| "\r\n" |
| "struct TestData { \r\n" |
| " scalar v1; \r\n" |
| " vector2 v2; \r\n" |
| " vector3 v3; \r\n" |
| " vector4 v4; \r\n" |
| "}; \r\n" |
| "\r\n" |
| "struct UavData {\r\n" |
| " TestData input; \r\n" |
| " TestData output; \r\n" |
| " TestData srvOut; \r\n" |
| "}; \r\n" |
| "\r\n" |
| "ByteAddressBuffer srv0 : register(t0); \r\n" |
| "StructuredBuffer<TestData> srv1 : register(t1); \r\n" |
| "ByteAddressBuffer srv2 : register(t2); \r\n" |
| "StructuredBuffer<TestData> srv3 : register(t3); \r\n" |
| "\r\n" |
| "RWByteAddressBuffer uav0 : register(u0); \r\n" |
| "RWStructuredBuffer<UavData> uav1 : register(u1); \r\n" |
| "RWByteAddressBuffer uav2 : register(u2); \r\n" |
| "RWStructuredBuffer<UavData> uav3 : register(u3); \r\n"; |
| |
| static const char RawBufferTestShaderBody[] = |
| " // offset of 'out' in 'UavData'\r\n" |
| " const int out_offset = COMPONENT_SIZE * 10; \r\n" |
| "\r\n" |
| " // offset of 'srv_out' in 'UavData'\r\n" |
| " const int srv_out_offset = COMPONENT_SIZE * 10 * 2; \r\n" |
| "\r\n" |
| " // offsets within the 'Data' struct\r\n" |
| " const int v1_offset = 0; \r\n" |
| " const int v2_offset = COMPONENT_SIZE; \r\n" |
| " const int v3_offset = COMPONENT_SIZE * 3; \r\n" |
| " const int v4_offset = COMPONENT_SIZE * 6; \r\n" |
| "\r\n" |
| " uav0.Store(srv_out_offset + v1_offset, srv0.Load<scalar>(v1_offset)); \r\n" |
| " uav0.Store(srv_out_offset + v2_offset, srv0.Load<vector2>(v2_offset)); \r\n" |
| " uav0.Store(srv_out_offset + v3_offset, srv0.Load<vector3>(v3_offset)); \r\n" |
| " uav0.Store(srv_out_offset + v4_offset, srv0.Load<vector4>(v4_offset)); \r\n" |
| "\r\n" |
| " uav1[0].srvOut.v1 = srv1[0].v1; \r\n" |
| " uav1[0].srvOut.v2 = srv1[0].v2; \r\n" |
| " uav1[0].srvOut.v3 = srv1[0].v3; \r\n" |
| " uav1[0].srvOut.v4 = srv1[0].v4; \r\n" |
| "\r\n" |
| " uav2.Store(srv_out_offset + v1_offset, srv2.Load<scalar>(v1_offset)); \r\n" |
| " uav2.Store(srv_out_offset + v2_offset, srv2.Load<vector2>(v2_offset)); \r\n" |
| " uav2.Store(srv_out_offset + v3_offset, srv2.Load<vector3>(v3_offset)); \r\n" |
| " uav2.Store(srv_out_offset + v4_offset, srv2.Load<vector4>(v4_offset)); \r\n" |
| "\r\n" |
| " uav3[0].srvOut.v1 = srv3[0].v1; \r\n" |
| " uav3[0].srvOut.v2 = srv3[0].v2; \r\n" |
| " uav3[0].srvOut.v3 = srv3[0].v3; \r\n" |
| " uav3[0].srvOut.v4 = srv3[0].v4; \r\n" |
| "\r\n" |
| " uav0.Store(out_offset + v1_offset, uav0.Load<scalar>(v1_offset)); \r\n" |
| " uav0.Store(out_offset + v2_offset, uav0.Load<vector2>(v2_offset)); \r\n" |
| " uav0.Store(out_offset + v3_offset, uav0.Load<vector3>(v3_offset)); \r\n" |
| " uav0.Store(out_offset + v4_offset, uav0.Load<vector4>(v4_offset)); \r\n" |
| "\r\n" |
| " uav1[0].output.v1 = uav1[0].input.v1; \r\n" |
| " uav1[0].output.v2 = uav1[0].input.v2; \r\n" |
| " uav1[0].output.v3 = uav1[0].input.v3; \r\n" |
| " uav1[0].output.v4 = uav1[0].input.v4; \r\n" |
| "\r\n" |
| " uav2.Store(out_offset + v1_offset, uav2.Load<scalar>(v1_offset)); \r\n" |
| " uav2.Store(out_offset + v2_offset, uav2.Load<vector2>(v2_offset)); \r\n" |
| " uav2.Store(out_offset + v3_offset, uav2.Load<vector3>(v3_offset)); \r\n" |
| " uav2.Store(out_offset + v4_offset, uav2.Load<vector4>(v4_offset)); \r\n" |
| "\r\n" |
| " uav3[0].output.v1 = uav3[0].input.v1; \r\n" |
| " uav3[0].output.v2 = uav3[0].input.v2; \r\n" |
| " uav3[0].output.v3 = uav3[0].input.v3; \r\n" |
| " uav3[0].output.v4 = uav3[0].input.v4; \r\n"; |
| |
| |
| static const char RawBufferTestComputeShaderTemplate[] = |
| "%s\r\n" // <- RawBufferTestShaderDeclarations |
| "[numthreads(1, 1, 1)]\r\n" |
| "void main(uint GI : SV_GroupIndex) {\r\n" |
| "%s\r\n" // <- RawBufferTestShaderBody |
| "};"; |
| |
| static const char RawBufferTestGraphicsPixelShaderTemplate[] = |
| "%s\r\n" // <- RawBufferTestShaderDeclarations |
| "struct PSInput { \r\n" |
| " float4 pos : SV_POSITION; \r\n" |
| "}; \r\n" |
| "uint4 main(PSInput input) : SV_TARGET{ \r\n" |
| " if (input.pos.x + input.pos.y == 1.0f) { // pixel { 0.5, 0.5, 0 } \r\n" |
| "%s\r\n" // <- RawBufferTestShaderBody |
| " } \r\n" |
| " return uint4(1, 2, 3, 4); \r\n" |
| "};"; |
| |
| TEST_F(ExecutionTest, ComputeRawBufferLdStI32) { |
| RawBufferLdStTestData<int32_t> data = { { 1 }, { 2, -1 }, { 256, -10517, 980 }, { 465, 13, -89, MAXUINT32 / 2 } }; |
| RunComputeRawBufferLdStTest<int32_t>(D3D_SHADER_MODEL_6_2, RawBufferLdStType::I32, "ComputeRawBufferLdSt32Bit", data); |
| } |
| |
| TEST_F(ExecutionTest, ComputeRawBufferLdStFloat) { |
| RawBufferLdStTestData<float> data = { { 3e-10f }, { 1.5f, -1.99988f }, { 256.0f, -105.17f, 980.0f }, { 465.1652f, -1.5694e2f, -0.8543e-2f, 1333.5f } }; |
| RunComputeRawBufferLdStTest<float>(D3D_SHADER_MODEL_6_2, RawBufferLdStType::Float, "ComputeRawBufferLdSt32Bit", data); |
| } |
| |
| TEST_F(ExecutionTest, ComputeRawBufferLdStI64) { |
| RawBufferLdStTestData<int64_t> data = { { 1 }, { 2, -1 }, { 256, -105171532, 980 }, { 465, 13, -89, MAXUINT64 / 2 } }; |
| RunComputeRawBufferLdStTest<int64_t>(D3D_SHADER_MODEL_6_3, RawBufferLdStType::I64, "ComputeRawBufferLdSt64Bit", data); |
| } |
| |
| TEST_F(ExecutionTest, ComputeRawBufferLdStDouble) { |
| RawBufferLdStTestData<double> data = { { 3e-10 }, { 1.5, -1.99988 }, { 256.0, -105.17, 980.0 }, { 465.1652, -1.5694e2, -0.8543e-2, 1333.5 } }; |
| RunComputeRawBufferLdStTest<double>(D3D_SHADER_MODEL_6_3, RawBufferLdStType::I64, "ComputeRawBufferLdSt64Bit", data); |
| } |
| |
| TEST_F(ExecutionTest, ComputeRawBufferLdStI16) { |
| RawBufferLdStTestData<int16_t> data = { { 1 }, { 2, -1 }, { 256, -10517, 980 }, { 465, 13, -89, MAXUINT16 / 2 } }; |
| RunComputeRawBufferLdStTest<int16_t>(D3D_SHADER_MODEL_6_2, RawBufferLdStType::I16, "ComputeRawBufferLdSt16Bit", data); |
| } |
| |
| TEST_F(ExecutionTest, ComputeRawBufferLdStHalf) { |
| RawBufferLdStTestData<float> floatData = { { 3e-10f }, { 1.5f, -1.99988f }, { 256.0f, 105.17f, 980.0f }, { 465.1652f, -1.5694e2f, -0.8543e-2f, 1333.5f } }; |
| RawBufferLdStTestData<uint16_t> halfData; |
| for (int i = 0; i < sizeof(floatData)/sizeof(float); i++) { |
| ((uint16_t*)&halfData)[i] = ConvertFloat32ToFloat16(((float*)&floatData)[i]); |
| } |
| RunComputeRawBufferLdStTest<uint16_t>(D3D_SHADER_MODEL_6_2, RawBufferLdStType::Half, "ComputeRawBufferLdSt16Bit", halfData); |
| } |
| |
| TEST_F(ExecutionTest, GraphicsRawBufferLdStI32) { |
| RawBufferLdStTestData<int32_t> data = { { 1 }, { 2, -1 }, { 256, -10517, 980 }, { 465, 13, -89, MAXUINT32 / 2 } }; |
| RunGraphicsRawBufferLdStTest<int32_t>(D3D_SHADER_MODEL_6_2, RawBufferLdStType::I32, "GraphicsRawBufferLdSt32Bit", data); |
| } |
| |
| TEST_F(ExecutionTest, GraphicsRawBufferLdStFloat) { |
| RawBufferLdStTestData<float> data = { { 3e-10f }, { 1.5f, -1.99988f }, { 256.0f, -105.17f, 980.0f }, { 465.1652f, -1.5694e2f, -0.8543e-2f, 1333.5f } }; |
| RunGraphicsRawBufferLdStTest<float>(D3D_SHADER_MODEL_6_2, RawBufferLdStType::Float, "GraphicsRawBufferLdSt32Bit", data); |
| } |
| |
| TEST_F(ExecutionTest, GraphicsRawBufferLdStI64) { |
| RawBufferLdStTestData<int64_t> data = { { 1 }, { 2, -1 }, { 256, -105171532, 980 }, { 465, 13, -89, MAXUINT64 / 2 } }; |
| RunGraphicsRawBufferLdStTest<int64_t>(D3D_SHADER_MODEL_6_3, RawBufferLdStType::I64, "GraphicsRawBufferLdSt64Bit", data); |
| } |
| |
| TEST_F(ExecutionTest, GraphicsRawBufferLdStDouble) { |
| RawBufferLdStTestData<double> data = { { 3e-10 }, { 1.5, -1.99988 }, { 256.0, -105.17, 980.0 }, { 465.1652, -1.5694e2, -0.8543e-2, 1333.5 } }; |
| RunGraphicsRawBufferLdStTest<double>(D3D_SHADER_MODEL_6_3, RawBufferLdStType::Double, "GraphicsRawBufferLdSt64Bit", data); |
| } |
| |
| TEST_F(ExecutionTest, GraphicsRawBufferLdStI16) { |
| RawBufferLdStTestData<int16_t> data = { { 1 }, { 2, -1 }, { 256, -10517, 980 }, { 465, 13, -89, MAXUINT16 / 2 } }; |
| RunGraphicsRawBufferLdStTest<int16_t>(D3D_SHADER_MODEL_6_2, RawBufferLdStType::I16, "GraphicsRawBufferLdSt16Bit", data); |
| } |
| |
| TEST_F(ExecutionTest, GraphicsRawBufferLdStHalf) { |
| RawBufferLdStTestData<float> floatData = { { 3e-10f }, { 1.5f, -1.99988f }, { 256.0f, 105.17f, 0.0f }, { 465.1652f, -1.5694e2f, -0.8543e-2f, 1333.5f } }; |
| RawBufferLdStTestData<uint16_t> halfData; |
| for (int i = 0; i < sizeof(floatData) / sizeof(float); i++) { |
| ((uint16_t*)&halfData)[i] = ConvertFloat32ToFloat16(((float*)&floatData)[i]); |
| } |
| RunGraphicsRawBufferLdStTest<uint16_t>(D3D_SHADER_MODEL_6_2, RawBufferLdStType::Half, "GraphicsRawBufferLdSt16Bit", halfData); |
| } |
| |
| bool ExecutionTest::SetupRawBufferLdStTest(D3D_SHADER_MODEL shaderModel, RawBufferLdStType dataType, |
| CComPtr<ID3D12Device> &pDevice, CComPtr<IStream> &pStream, |
| char *&sTy, char *&additionalOptions) { |
| if (!CreateDevice(&pDevice, shaderModel)) { |
| return false; |
| } |
| |
| additionalOptions = ""; |
| |
| switch (dataType) { |
| case RawBufferLdStType::I64: |
| if (!DoesDeviceSupportInt64(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support int64 operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return false; |
| } |
| sTy = "int64_t"; |
| break; |
| case RawBufferLdStType::Double: |
| if (!DoesDeviceSupportDouble(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support double operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return false; |
| } |
| sTy = "double"; |
| break; |
| case RawBufferLdStType::I16: |
| case RawBufferLdStType::Half: |
| if (!DoesDeviceSupportNative16bitOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support native 16-bit operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return false; |
| } |
| additionalOptions = "-enable-16bit-types"; |
| sTy = (dataType == RawBufferLdStType::I16 ? "int16_t" : "half"); |
| break; |
| case RawBufferLdStType::I32: |
| sTy = "int32_t"; |
| break; |
| case RawBufferLdStType::Float: |
| sTy = "float"; |
| break; |
| default: |
| DXASSERT_NOMSG("Invalid RawBufferLdStType"); |
| } |
| |
| // read shader config |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| return true; |
| } |
| |
| template <class Ty> |
| void ExecutionTest::VerifyRawBufferLdStTestResults(const std::shared_ptr<st::ShaderOpTest> test, const RawBufferLdStTestData<Ty> &testData) { |
| // read buffers back & verify expected values |
| static const int UavBufferCount = 4; |
| char bufferName[11] = "UAVBufferX"; |
| |
| for (unsigned i = 0; i < UavBufferCount; i++) { |
| MappedData dataUav; |
| RawBufferLdStUavData<Ty> *pOutData; |
| |
| bufferName[sizeof(bufferName) - 2] = (char)(i + '0'); |
| |
| test->GetReadBackData(bufferName, &dataUav); |
| VERIFY_ARE_EQUAL(sizeof(RawBufferLdStUavData<Ty>), dataUav.size()); |
| pOutData = (RawBufferLdStUavData<Ty> *)dataUav.data(); |
| |
| LogCommentFmt(L"Verifying UAVBuffer%d Load -> UAVBuffer%d Store", i, i); |
| // scalar |
| VERIFY_ARE_EQUAL(pOutData->output.v1, testData.v1); |
| // vector 2 |
| VERIFY_ARE_EQUAL(pOutData->output.v2[0], testData.v2[0]); |
| VERIFY_ARE_EQUAL(pOutData->output.v2[1], testData.v2[1]); |
| // vector 3 |
| VERIFY_ARE_EQUAL(pOutData->output.v3[0], testData.v3[0]); |
| VERIFY_ARE_EQUAL(pOutData->output.v3[1], testData.v3[1]); |
| VERIFY_ARE_EQUAL(pOutData->output.v3[2], testData.v3[2]); |
| // vector 4 |
| VERIFY_ARE_EQUAL(pOutData->output.v4[0], testData.v4[0]); |
| VERIFY_ARE_EQUAL(pOutData->output.v4[1], testData.v4[1]); |
| VERIFY_ARE_EQUAL(pOutData->output.v4[2], testData.v4[2]); |
| VERIFY_ARE_EQUAL(pOutData->output.v4[3], testData.v4[3]); |
| |
| // verify SRV Store |
| LogCommentFmt(L"Verifying SRVBuffer%d Load -> UAVBuffer%d Store", i, i); |
| // scalar |
| VERIFY_ARE_EQUAL(pOutData->srvOut.v1, testData.v1); |
| // vector 2 |
| VERIFY_ARE_EQUAL(pOutData->srvOut.v2[0], testData.v2[0]); |
| VERIFY_ARE_EQUAL(pOutData->srvOut.v2[1], testData.v2[1]); |
| // vector 3 |
| VERIFY_ARE_EQUAL(pOutData->srvOut.v3[0], testData.v3[0]); |
| VERIFY_ARE_EQUAL(pOutData->srvOut.v3[1], testData.v3[1]); |
| VERIFY_ARE_EQUAL(pOutData->srvOut.v3[2], testData.v3[2]); |
| // vector 4 |
| VERIFY_ARE_EQUAL(pOutData->srvOut.v4[0], testData.v4[0]); |
| VERIFY_ARE_EQUAL(pOutData->srvOut.v4[1], testData.v4[1]); |
| VERIFY_ARE_EQUAL(pOutData->srvOut.v4[2], testData.v4[2]); |
| VERIFY_ARE_EQUAL(pOutData->srvOut.v4[3], testData.v4[3]); |
| } |
| } |
| |
| template <class Ty> |
| void ExecutionTest::RunComputeRawBufferLdStTest(D3D_SHADER_MODEL shaderModel, RawBufferLdStType dataType, |
| const char *shaderOpName, const RawBufferLdStTestData<Ty> &testData) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| |
| CComPtr<ID3D12Device> pDevice; |
| CComPtr<IStream> pStream; |
| char *sTy = nullptr, *additionalOptions = nullptr; |
| |
| if (!SetupRawBufferLdStTest(shaderModel, dataType, pDevice, pStream, sTy, additionalOptions)) { |
| return; |
| } |
| |
| // format shader source |
| char rawBufferTestShaderText[sizeof(RawBufferTestComputeShaderTemplate) + sizeof(RawBufferTestShaderDeclarations) + sizeof(RawBufferTestShaderBody)]; |
| VERIFY_IS_TRUE(sprintf_s(rawBufferTestShaderText, sizeof(rawBufferTestShaderText), |
| RawBufferTestComputeShaderTemplate, RawBufferTestShaderDeclarations, RawBufferTestShaderBody) != -1); |
| |
| // format compiler args |
| char compilerOptions[256]; |
| VERIFY_IS_TRUE(sprintf_s(compilerOptions, sizeof(compilerOptions), "-D COMPONENT_TYPE=%s -D COMPONENT_SIZE=%d %s", sTy, (int)sizeof(Ty), additionalOptions) != -1); |
| |
| // run the shader |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(pDevice, m_support, pStream, shaderOpName, |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(((0 == strncmp(Name, "SRVBuffer", 9)) || (0 == strncmp(Name, "UAVBuffer", 9))) && |
| (Name[9] >= '0' && Name[9] <= '3')); |
| pShaderOp->Shaders.at(0).Arguments = compilerOptions; |
| pShaderOp->Shaders.at(0).Text = rawBufferTestShaderText; |
| |
| VERIFY_IS_TRUE(sizeof(RawBufferLdStTestData<Ty>) <= Data.size()); |
| RawBufferLdStTestData<Ty> *pInData = (RawBufferLdStTestData<Ty>*)Data.data(); |
| memcpy(pInData, &testData, sizeof(RawBufferLdStTestData<Ty>)); |
| }); |
| |
| // verify expected values |
| VerifyRawBufferLdStTestResults<Ty>(test->Test, testData); |
| } |
| |
| template <class Ty> |
| void ExecutionTest::RunGraphicsRawBufferLdStTest(D3D_SHADER_MODEL shaderModel, RawBufferLdStType dataType, |
| const char *shaderOpName, const RawBufferLdStTestData<Ty> &testData) { |
| |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| |
| CComPtr<ID3D12Device> pDevice; |
| CComPtr<IStream> pStream; |
| char *sTy = nullptr, *additionalOptions = nullptr; |
| |
| if (!SetupRawBufferLdStTest(shaderModel, dataType, pDevice, pStream, sTy, additionalOptions)) { |
| return; |
| } |
| |
| // format shader source |
| char rawBufferTestPixelShaderText[sizeof(RawBufferTestGraphicsPixelShaderTemplate) + sizeof(RawBufferTestShaderDeclarations) + sizeof(RawBufferTestShaderBody)]; |
| VERIFY_IS_TRUE(sprintf_s(rawBufferTestPixelShaderText, sizeof(rawBufferTestPixelShaderText), |
| RawBufferTestGraphicsPixelShaderTemplate, RawBufferTestShaderDeclarations, RawBufferTestShaderBody) != -1); |
| |
| // format compiler args |
| char compilerOptions[256]; |
| VERIFY_IS_TRUE(sprintf_s(compilerOptions, sizeof(compilerOptions), "-D COMPONENT_TYPE=%s -D COMPONENT_SIZE=%d %s", sTy, (int)sizeof(Ty), additionalOptions) != -1); |
| |
| // run the shader |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest(pDevice, m_support, pStream, shaderOpName, |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE(((0 == strncmp(Name, "SRVBuffer", 9)) || (0 == strncmp(Name, "UAVBuffer", 9))) && |
| (Name[9] >= '0' && Name[9] <= '3')); |
| // pixel shader is at index 1, vertex shader at index 0 |
| pShaderOp->Shaders.at(1).Arguments = compilerOptions; |
| pShaderOp->Shaders.at(1).Text = rawBufferTestPixelShaderText; |
| |
| VERIFY_IS_TRUE(sizeof(RawBufferLdStTestData<Ty>) <= Data.size()); |
| RawBufferLdStTestData<Ty> *pInData = (RawBufferLdStTestData<Ty>*)Data.data(); |
| memcpy(pInData, &testData, sizeof(RawBufferLdStTestData<Ty>)); |
| }); |
| |
| // verify expected values |
| VerifyRawBufferLdStTestResults<Ty>(test->Test, testData); |
| } |
| |
| template<typename T> |
| uint32_t pack(std::array<T, 4> unpackedVals) |
| { |
| uint32_t dst = 0; |
| constexpr uint32_t bitMask = 0xFF; |
| for (uint32_t i = 0U; i < 4U; ++i) |
| { |
| dst |= (unpackedVals[i] & bitMask) << (i * 8); |
| } |
| |
| return dst; |
| } |
| |
| template <typename T> |
| uint32_t pack_clamp_u8(std::array<T, 4> unpackedVals) |
| { |
| int32_t clamp_min = std::numeric_limits<uint8_t>::min(); |
| int32_t clamp_max = std::numeric_limits<uint8_t>::max(); |
| |
| uint32_t dst = 0; |
| for (uint32_t i = 0U; i < 4U; ++i) |
| { |
| int32_t clamped = std::min(std::max((int32_t)unpackedVals[i], clamp_min), clamp_max); |
| dst |= ((uint8_t)clamped) << (i * 8); |
| } |
| |
| return dst; |
| } |
| |
| template <typename T> |
| uint32_t pack_clamp_s8(std::array<T, 4> unpackedVals) |
| { |
| int32_t clamp_min = std::numeric_limits<int8_t>::min(); |
| int32_t clamp_max = std::numeric_limits<int8_t>::max(); |
| |
| uint32_t dst = 0; |
| for (uint32_t i = 0U; i < 4U; ++i) |
| { |
| int32_t clamped = std::min(std::max((int32_t)unpackedVals[i], clamp_min), clamp_max); |
| dst |= ((uint8_t)clamped) << (i * 8); |
| } |
| |
| return dst; |
| } |
| |
| template<typename T> |
| std::array<T, 4> unpack_u(uint32_t packedVal) |
| { |
| std::array<T, 4> ret; |
| ret[0] = (uint8_t)((packedVal & 0x000000FF) >> 0 ); |
| ret[1] = (uint8_t)((packedVal & 0x0000FF00) >> 8 ); |
| ret[2] = (uint8_t)((packedVal & 0x00FF0000) >> 16); |
| ret[3] = (uint8_t)((packedVal & 0xFF000000) >> 24); |
| |
| return ret; |
| } |
| |
| template<typename T> |
| std::array<T, 4> unpack_s(uint32_t packedVal) |
| { |
| std::array<T, 4> ret; |
| ret[0] = (int8_t)((packedVal & 0x000000FF) >> 0 ); |
| ret[1] = (int8_t)((packedVal & 0x0000FF00) >> 8 ); |
| ret[2] = (int8_t)((packedVal & 0x00FF0000) >> 16); |
| ret[3] = (int8_t)((packedVal & 0xFF000000) >> 24); |
| |
| return ret; |
| } |
| |
| |
| TEST_F(ExecutionTest, PackUnpackTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| |
| #ifdef PACKUNPACK_PLACEHOLDER |
| string args = "-enable-16bit-types -DPACKUNPACK_PLACEHOLDER"; |
| string target = "cs_6_2"; |
| |
| if (!CreateDevice(&pDevice)) { |
| return; |
| } |
| #else |
| string args = "-enable-16bit-types"; |
| string target = "cs_6_6"; |
| |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) { |
| return; |
| } |
| #endif |
| |
| if (!DoesDeviceSupportNative16bitOps(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support native 16-bit operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| int tableSize = sizeof(PackUnpackOpParameters) / sizeof(TableParameter); |
| TableParameterHandler handler(PackUnpackOpParameters, tableSize); |
| |
| CW2A Text(handler.GetTableParamByName(L"ShaderOp.Text")->m_str); |
| |
| std::vector<uint32_t> *validation_input = &handler.GetTableParamByName(L"Validation.Input")->m_uint32Table; |
| uint32_t validation_tolerance = handler.GetTableParamByName(L"Validation.Tolerance")->m_uint; |
| |
| size_t count = validation_input->size(); |
| std::vector<SPackUnpackOpOutPacked> expectedPacked(count / 4); |
| std::vector<SPackUnpackOpOutUnpacked> expectedUnpacked(count / 4); |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTest( |
| pDevice, m_support, pStream, "PackUnpackOp", |
| // this callback is called when the test |
| // is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| |
| if (0 == _stricmp(Name, "g_bufIn")) |
| { |
| size_t size = sizeof(uint32_t) * 4 * count; |
| Data.resize(size); |
| uint32_t *pPrimitives = (uint32_t*)Data.data(); |
| |
| for (size_t i = 0; i < count / 4; ++i) { |
| uint32_t *p = &pPrimitives[i * 4]; |
| uint32_t x = (*validation_input)[i * 4 + 0]; |
| uint32_t y = (*validation_input)[i * 4 + 1]; |
| uint32_t z = (*validation_input)[i * 4 + 2]; |
| uint32_t w = (*validation_input)[i * 4 + 3]; |
| |
| p[0] = x; |
| p[1] = y; |
| p[2] = z; |
| p[3] = w; |
| |
| std::array<uint32_t, 4> inputUint32 = { x, y, z, w }; |
| std::array<int32_t, 4> inputInt32 = { (int32_t)x, (int32_t)y, (int32_t)z, (int32_t)w }; |
| std::array<uint16_t, 4> inputUint16 = { (uint16_t)x, (uint16_t)y, (uint16_t)z, (uint16_t)w }; |
| std::array<int16_t, 4> inputInt16 = { (int16_t)x, (int16_t)y, (int16_t)z, (int16_t)w }; |
| |
| // Pack unclamped |
| expectedPacked[i].packedUint32 = pack(inputUint32); |
| expectedPacked[i].packedInt32 = pack(inputInt32); |
| expectedPacked[i].packedUint16 = pack(inputUint16); |
| expectedPacked[i].packedInt16 = pack(inputInt16); |
| // pack clamped |
| expectedPacked[i].packedClampedUint32 = pack_clamp_u8(inputInt32); |
| expectedPacked[i].packedClampedInt32 = pack_clamp_s8(inputInt32); |
| expectedPacked[i].packedClampedUint16 = pack_clamp_u8(inputInt16); |
| expectedPacked[i].packedClampedInt16 = pack_clamp_s8(inputInt16); |
| |
| // unpack |
| expectedUnpacked[i].outputUint32 = unpack_u<uint32_t>(expectedPacked[i].packedUint32); |
| expectedUnpacked[i].outputInt32 = unpack_s<int32_t >(expectedPacked[i].packedInt32 ); |
| expectedUnpacked[i].outputUint16 = unpack_u<uint16_t>(expectedPacked[i].packedUint16); |
| expectedUnpacked[i].outputInt16 = unpack_s<int16_t >(expectedPacked[i].packedInt16 ); |
| expectedUnpacked[i].outputClampedUint32 = unpack_u<uint32_t>(expectedPacked[i].packedClampedUint32); |
| expectedUnpacked[i].outputClampedInt32 = unpack_s<int32_t >(expectedPacked[i].packedClampedInt32 ); |
| expectedUnpacked[i].outputClampedUint16 = unpack_u<uint16_t>(expectedPacked[i].packedClampedUint16); |
| expectedUnpacked[i].outputClampedInt16 = unpack_s<int16_t >(expectedPacked[i].packedClampedInt16 ); |
| } |
| } |
| else |
| { |
| std::fill(Data.begin(), Data.end(), (BYTE)0); |
| } |
| |
| // use shader from data table |
| pShaderOp->Shaders.at(0).Target = target.c_str(); |
| pShaderOp->Shaders.at(0).Text = Text.m_psz; |
| pShaderOp->Shaders.at(0).Arguments = args.c_str(); |
| }); |
| |
| MappedData packedData; |
| test->Test->GetReadBackData("g_bufOutPacked", &packedData); |
| SPackUnpackOpOutPacked *readBackPacked = (SPackUnpackOpOutPacked *)packedData.data(); |
| |
| MappedData unpackedData; |
| test->Test->GetReadBackData("g_bufOutPackedUnpacked", &unpackedData); |
| SPackUnpackOpOutUnpacked *readBackUnpacked = (SPackUnpackOpOutUnpacked *)unpackedData.data(); |
| |
| for (size_t i = 0; i < count / 4; ++i) |
| { |
| VerifyOutputWithExpectedValueUInt(readBackPacked[i].packedUint32, expectedPacked[i].packedUint32, validation_tolerance); |
| VerifyOutputWithExpectedValueInt (readBackPacked[i].packedInt32 , expectedPacked[i].packedInt32 , validation_tolerance); |
| VerifyOutputWithExpectedValueUInt(readBackPacked[i].packedUint16, expectedPacked[i].packedUint16, validation_tolerance); |
| VerifyOutputWithExpectedValueInt (readBackPacked[i].packedInt16 , expectedPacked[i].packedInt16 , validation_tolerance); |
| VerifyOutputWithExpectedValueUInt(readBackPacked[i].packedClampedUint32, expectedPacked[i].packedClampedUint32, validation_tolerance); |
| VerifyOutputWithExpectedValueInt (readBackPacked[i].packedClampedInt32 , expectedPacked[i].packedClampedInt32 , validation_tolerance); |
| VerifyOutputWithExpectedValueUInt(readBackPacked[i].packedClampedUint16, expectedPacked[i].packedClampedUint16, validation_tolerance); |
| VerifyOutputWithExpectedValueInt (readBackPacked[i].packedClampedInt16 , expectedPacked[i].packedClampedInt16 , validation_tolerance); |
| |
| for (uint32_t j = 0; j < 4; ++j) |
| { |
| VerifyOutputWithExpectedValueUInt(readBackUnpacked[i].outputUint32[j], expectedUnpacked[i].outputUint32[j], validation_tolerance); |
| VerifyOutputWithExpectedValueInt (readBackUnpacked[i].outputInt32 [j], expectedUnpacked[i].outputInt32 [j], validation_tolerance); |
| VerifyOutputWithExpectedValueUInt(readBackUnpacked[i].outputUint16[j], expectedUnpacked[i].outputUint16[j], validation_tolerance); |
| VerifyOutputWithExpectedValueInt (readBackUnpacked[i].outputInt16 [j], expectedUnpacked[i].outputInt16 [j], validation_tolerance); |
| VerifyOutputWithExpectedValueUInt(readBackUnpacked[i].outputClampedUint32[j], expectedUnpacked[i].outputClampedUint32[j], validation_tolerance); |
| VerifyOutputWithExpectedValueInt (readBackUnpacked[i].outputClampedInt32 [j], expectedUnpacked[i].outputClampedInt32 [j], validation_tolerance); |
| VerifyOutputWithExpectedValueUInt(readBackUnpacked[i].outputClampedUint16[j], expectedUnpacked[i].outputClampedUint16[j], validation_tolerance); |
| VerifyOutputWithExpectedValueInt (readBackUnpacked[i].outputClampedInt16 [j], expectedUnpacked[i].outputClampedInt16 [j], validation_tolerance); |
| } |
| } |
| } |
| |
| |
| // This test expects a <pShader> that retrieves a signal value from each of a few |
| // resources that are initialized here. <isDynamic> determines if it uses the |
| // 6.6 Dynamic Resources feature. |
| // Values are read back from the result UAV and compared to the expected signals |
| void ExecutionTest::RunResourceTest(ID3D12Device *pDevice, const char *pShader, |
| const wchar_t *sm, bool isDynamic) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| |
| const int NumSRVs = 3; |
| const int NumUAVs = 4; |
| const int NumResources = NumSRVs + NumUAVs; |
| const int NumSamplers = 2; |
| const int valueSize = 16; |
| |
| static const int DispatchGroupX = 1; |
| static const int DispatchGroupY = 1; |
| static const int DispatchGroupZ = 1; |
| |
| CComPtr<ID3D12GraphicsCommandList> pCommandList; |
| CComPtr<ID3D12CommandQueue> pCommandQueue; |
| CComPtr<ID3D12CommandAllocator> pCommandAllocator; |
| FenceObj FO; |
| |
| UINT valueSizeInBytes = valueSize * sizeof(float); |
| CreateComputeCommandQueue(pDevice, L"DynamicResourcesTest Command Queue", &pCommandQueue); |
| InitFenceObj(pDevice, &FO); |
| |
| // Create root signature. |
| CComPtr<ID3D12RootSignature> pRootSignature; |
| if (!isDynamic) { |
| // Not dynamic, create a range for each resource and from them, the root signature |
| CD3DX12_DESCRIPTOR_RANGE ranges[NumResources]; |
| CD3DX12_DESCRIPTOR_RANGE srange[NumSamplers]; |
| for (int i = 0; i < NumSRVs; i++) |
| ranges[i].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, i, 0); |
| |
| for (int i = NumSRVs; i < NumResources; i++) |
| ranges[i].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, i - NumSRVs, 0); |
| |
| for (int i = 0; i < NumSamplers; i++) |
| srange[i].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 1, i, 0); |
| |
| CreateRootSignatureFromRanges(pDevice, &pRootSignature, ranges, NumResources, srange, NumSamplers); |
| } else { |
| // Dynamic just requires the flags indicating that the builtin arrays should be accessible |
| #if !defined(D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED) |
| #define D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED (D3D12_ROOT_SIGNATURE_FLAGS)0x400 |
| #define D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED (D3D12_ROOT_SIGNATURE_FLAGS)0x800 |
| #endif |
| CD3DX12_ROOT_SIGNATURE_DESC rootSignatureDesc; |
| rootSignatureDesc.Init(0, nullptr, 0, nullptr, |
| D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | |
| D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED); |
| CreateRootSignatureFromDesc(pDevice, &rootSignatureDesc, &pRootSignature); |
| } |
| |
| // Create pipeline state object. |
| CComPtr<ID3D12PipelineState> pComputeState; |
| CreateComputePSO(pDevice, pRootSignature, pShader, sm, &pComputeState); |
| |
| // Create a command allocator and list for compute. |
| VERIFY_SUCCEEDED(pDevice->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE, IID_PPV_ARGS(&pCommandAllocator))); |
| VERIFY_SUCCEEDED(pDevice->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE, pCommandAllocator, pComputeState, IID_PPV_ARGS(&pCommandList))); |
| |
| // Set up SRV resources |
| CComPtr<ID3D12Resource> pSRVResources[NumSRVs]; |
| CComPtr<ID3D12Resource> pUAVResources[NumUAVs]; |
| CComPtr<ID3D12Resource> pUploadResources[NumResources]; |
| { |
| D3D12_RESOURCE_DESC bufDesc = CD3DX12_RESOURCE_DESC::Buffer(valueSizeInBytes); |
| float values[valueSize]; |
| for (int i = 0; i < NumSRVs - 1; i++) { |
| for (int j = 0; j < valueSize; j++) |
| values[j] = 10.0f + i; |
| CreateTestResources(pDevice, pCommandList, values, valueSizeInBytes, bufDesc, |
| &pSRVResources[i], &pUploadResources[i]); |
| } |
| D3D12_RESOURCE_DESC tex2dDesc = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, 4, 4); |
| for (int j = 0; j < valueSize; j++) |
| values[j] = 10.0 + (NumSRVs - 1); |
| CreateTestResources(pDevice, pCommandList, values, valueSizeInBytes, tex2dDesc, |
| &pSRVResources[NumSRVs - 1], &pUploadResources[NumSRVs - 1]); |
| } |
| |
| // Set up UAV resources |
| CComPtr<ID3D12Resource> pReadBuffer; |
| float values[valueSize]; |
| for (int i = 0; i < NumUAVs - 2; i++) { |
| for (int j = 0; j < valueSize; j++) |
| values[j] = 20.0f + i; |
| CreateTestUavs(pDevice, pCommandList, values, valueSizeInBytes, |
| &pUAVResources[i], &pUploadResources[NumSRVs + i]); |
| } |
| for (int j = 0; j < valueSize; j++) |
| values[j] = 20.0 + (NumUAVs - 1); |
| CreateTestUavs(pDevice, pCommandList, values, valueSizeInBytes, |
| &pUAVResources[NumUAVs - 2], &pUploadResources[NumResources - 2], &pReadBuffer); |
| |
| for (int j = 0; j < valueSize; j++) |
| values[j] = 20.0 + (NumUAVs - 2); |
| D3D12_RESOURCE_DESC tex1dDesc = CD3DX12_RESOURCE_DESC::Tex1D(DXGI_FORMAT_R32_FLOAT, valueSize, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); |
| CreateTestResources(pDevice, pCommandList, values, valueSizeInBytes, tex1dDesc, |
| &pUAVResources[NumUAVs - 1], &pUploadResources[NumResources - 1]); |
| |
| // Close the command list and execute it to perform the GPU setup. |
| pCommandList->Close(); |
| ExecuteCommandList(pCommandQueue, pCommandList); |
| WaitForSignal(pCommandQueue, FO); |
| VERIFY_SUCCEEDED(pCommandAllocator->Reset()); |
| VERIFY_SUCCEEDED(pCommandList->Reset(pCommandAllocator, pComputeState)); |
| |
| CComPtr<ID3D12DescriptorHeap> pResHeap; |
| CComPtr<ID3D12DescriptorHeap> pSampHeap; |
| CreateDefaultDescHeaps(pDevice, NumSRVs + NumUAVs, NumSamplers, &pResHeap, &pSampHeap); |
| |
| // Create Rootsignature and descriptor tables |
| { |
| ID3D12DescriptorHeap *descHeaps[2] = {pResHeap, pSampHeap}; |
| pCommandList->SetDescriptorHeaps(2, descHeaps); |
| pCommandList->SetComputeRootSignature(pRootSignature); |
| |
| if (!isDynamic) { |
| // Only non-dynamic resources require descriptortables |
| pCommandList->SetComputeRootDescriptorTable(0, pResHeap->GetGPUDescriptorHandleForHeapStart()); |
| pCommandList->SetComputeRootDescriptorTable(1, pSampHeap->GetGPUDescriptorHandleForHeapStart()); |
| } |
| } |
| CD3DX12_CPU_DESCRIPTOR_HANDLE baseHandle(pResHeap->GetCPUDescriptorHandleForHeapStart()); |
| // Create SRVs |
| CreateRawSRV(pDevice, baseHandle, valueSize, pSRVResources[0]); |
| CreateStructSRV(pDevice, baseHandle, valueSize, sizeof(float), pSRVResources[1]); |
| CreateTex2DSRV(pDevice, baseHandle, DXGI_FORMAT_R32_FLOAT, pSRVResources[2]); |
| // Create UAVs |
| CreateRawUAV(pDevice, baseHandle, valueSize, pUAVResources[0]); |
| CreateStructUAV(pDevice, baseHandle, valueSize, sizeof(float), pUAVResources[1]); |
| CreateTypedUAV(pDevice, baseHandle, valueSize, DXGI_FORMAT_R32_FLOAT, pUAVResources[2]); |
| CreateTex1DUAV(pDevice, baseHandle, DXGI_FORMAT_R32_FLOAT, pUAVResources[3]); |
| |
| D3D12_FILTER filters[] = {D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT, D3D12_FILTER_COMPARISON_MIN_MAG_LINEAR_MIP_POINT}; |
| float perSampleBorderColors[] = {30.0, 31.0}; |
| CreateDefaultSamplers(pDevice, pSampHeap->GetCPUDescriptorHandleForHeapStart(), |
| filters, perSampleBorderColors, NumSamplers); |
| |
| // Run the compute shader and copy the results back to readable memory. |
| pCommandList->Dispatch(DispatchGroupX, DispatchGroupY, DispatchGroupZ); |
| |
| RecordTransitionBarrier(pCommandList, pUAVResources[NumUAVs - 2], D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); |
| pCommandList->CopyResource(pReadBuffer, pUAVResources[NumUAVs - 2]); |
| |
| pCommandList->Close(); |
| ExecuteCommandList(pCommandQueue, pCommandList); |
| WaitForSignal(pCommandQueue, FO); |
| |
| MappedData data(pReadBuffer, valueSize*sizeof(float)); |
| const float *pData = (float*)data.data(); |
| LogCommentFmt(L"Verify bound resources are properly selected"); |
| VERIFY_ARE_EQUAL(pData[0], 10); |
| VERIFY_ARE_EQUAL(pData[1], 11); |
| VERIFY_ARE_EQUAL(pData[2], 12); |
| |
| VERIFY_ARE_EQUAL(pData[3], 20); |
| VERIFY_ARE_EQUAL(pData[4], 21); |
| VERIFY_ARE_EQUAL(pData[5], 22); |
| VERIFY_ARE_EQUAL(pData[6], 30); |
| VERIFY_ARE_EQUAL(pData[7], 1); // samplecmp 1 means it matched 31 |
| } |
| |
| TEST_F(ExecutionTest, SignatureResourcesTest) { |
| std::string pShader = |
| "ByteAddressBuffer g_rawBuf : register(t0);\n" |
| "StructuredBuffer<float> g_structBuf : register(t1);\n" |
| "Texture2D<float> g_tex : register(t2);\n" |
| "RWByteAddressBuffer g_rwRawBuf : register(u0);\n" |
| "RWStructuredBuffer<float> g_rwStructBuf : register(u1);\n" |
| "RWBuffer<float> g_result : register(u2);\n" |
| "RWTexture1D<float> g_rwTex : register(u3);\n" |
| "SamplerState g_samp : register(s0);\n" |
| "SamplerComparisonState g_sampCmp : register(s1);\n" |
| "[NumThreads(1, 1, 1)]\n" |
| "void main(uint ix : SV_GroupIndex) {\n" |
| " g_result[0] = g_rawBuf.Load<float>(0);\n" |
| " g_result[1] = g_structBuf.Load(0);\n" |
| " g_result[2] = g_tex.Load(0);\n" |
| " g_result[3] = g_rwRawBuf.Load<float>(0);\n" |
| " g_result[4] = g_rwStructBuf.Load(0);\n" |
| " g_result[5] = g_rwTex.Load(0);\n" |
| " g_result[6] = g_tex.SampleLevel(g_samp, -0.5, 0);\n" |
| " g_result[7] = g_tex.SampleCmpLevelZero(g_sampCmp, -0.5, 31.0);\n" |
| "}\n"; |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) |
| return; |
| |
| RunResourceTest(pDevice, pShader.c_str(), L"cs_6_6", /*isDynamic*/false); |
| } |
| |
| TEST_F(ExecutionTest, DynamicResourcesTest) { |
| static const char pShader[] = |
| "static ByteAddressBuffer g_rawBuf = ResourceDescriptorHeap[0];\n" |
| "static StructuredBuffer<float> g_structBuf = ResourceDescriptorHeap[1];\n" |
| "static Texture2D<float> g_tex = ResourceDescriptorHeap[2];\n" |
| "static RWByteAddressBuffer g_rwRawBuf = ResourceDescriptorHeap[3];\n" |
| "static RWStructuredBuffer<float> g_rwStructBuf = ResourceDescriptorHeap[4];\n" |
| "static RWBuffer<float> g_result = ResourceDescriptorHeap[5];\n" |
| "static RWTexture1D<float> g_rwTex = ResourceDescriptorHeap[6];\n" |
| "static SamplerState g_samp = SamplerDescriptorHeap[0];\n" |
| "static SamplerComparisonState g_sampCmp = SamplerDescriptorHeap[1];\n" |
| "[NumThreads(1, 1, 1)]\n" |
| "void main(uint ix : SV_GroupIndex) {\n" |
| " g_result[0] = g_rawBuf.Load<float>(0);\n" |
| " g_result[1] = g_structBuf.Load(0);\n" |
| " g_result[2] = g_tex.Load(0);\n" |
| " g_result[3] = g_rwRawBuf.Load<float>(0);\n" |
| " g_result[4] = g_rwStructBuf.Load(0);\n" |
| " g_result[5] = g_rwTex.Load(0);\n" |
| " g_result[6] = g_tex.SampleLevel(g_samp, -0.5, 0);\n" |
| " g_result[7] = g_tex.SampleCmpLevelZero(g_sampCmp, -0.5, 31.0);\n" |
| "}\n"; |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) |
| return; |
| |
| // ResourceDescriptorHeap/SamplerDescriptorHeap requires Resource Binding Tier 3 |
| D3D12_FEATURE_DATA_D3D12_OPTIONS devOptions; |
| VERIFY_SUCCEEDED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS, &devOptions, sizeof(devOptions))); |
| if (devOptions.ResourceBindingTier < D3D12_RESOURCE_BINDING_TIER_3) { |
| WEX::Logging::Log::Comment(L"Device does not support Resource Binding Tier 3"); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| RunResourceTest(pDevice, pShader, L"cs_6_6", /*isDynamic*/true); |
| } |
| |
| //void ExecutionTest::TestComputeShaderDynamicResourcesUniformIndexing() |
| |
| void EnableShaderBasedValidation() { |
| CComPtr<ID3D12Debug> spDebugController0; |
| CComPtr<ID3D12Debug1> spDebugController1; |
| VERIFY_SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&spDebugController0))); |
| VERIFY_SUCCEEDED( |
| spDebugController0->QueryInterface(IID_PPV_ARGS(&spDebugController1))); |
| spDebugController1->SetEnableGPUBasedValidation(true); |
| } |
| |
| void VerifyFloatArraysAreEqual(const float* resultFloats, float *expectedResults, int expectedResultsSize) |
| { |
| for (int j = 0; j < expectedResultsSize; j++) |
| { |
| VERIFY_ARE_EQUAL(resultFloats[j], expectedResults[j]); |
| } |
| } |
| |
| TEST_F(ExecutionTest, DynamicResourcesDynamicIndexingTest) { |
| //EnableShaderBasedValidation(); |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| st::ShaderOp *pShaderOp = |
| ShaderOpSet->GetShaderOp("DynamicResourcesDynamicIndexing"); |
| vector<st::ShaderOpRootValue> fallbackRootValues = pShaderOp->RootValues; |
| |
| bool Skipped = true; |
| |
| //D3D_SHADER_MODEL TestShaderModels[] = {D3D_SHADER_MODEL_6_0}; // FALLBACK |
| D3D_SHADER_MODEL TestShaderModels[] = {D3D_SHADER_MODEL_6_6, D3D_SHADER_MODEL_6_0}; |
| |
| const int expectedResultsSize = 16; |
| float expectedResultsUniform[expectedResultsSize] = { |
| 10.0, 10.0, |
| 12.0, 12.0, |
| 14.0, 14.0, |
| 20.0, 20.0, |
| 22.0, 22.0, |
| 24.0, 24.0, |
| 30.0, 30.0, |
| 32.0, 32.0}; |
| |
| float expectedResultsNonUniform[expectedResultsSize] = { |
| 10.0, 11.0, |
| 12.0, 13.0, |
| 14.0, 15.0, |
| 20.0, 21.0, |
| 22.0, 23.0, |
| 24.0, 25.0, |
| 30.0, 31.0, |
| 32.0, 33.0}; |
| |
| // TestShaderModels will be an array, where the first x models are "non-fallback", and the rest of the models |
| // are "fallback". If TestShaderModels has length y, and a test loops through all shader models, a convention |
| // to test based on whether fallback is enabled or not is to limit the loop like this: |
| // unsigned num_models_to_test = ExecutionTest::IsFallbackPathEnabled() ? y : x; |
| unsigned num_models_to_test = ExecutionTest::IsFallbackPathEnabled() ? 2 : 1; |
| for (unsigned i = 0; i < num_models_to_test; i++) { |
| D3D_SHADER_MODEL sm = TestShaderModels[i]; |
| LogCommentFmt(L"\r\nVerifying Dynamic Resources Dynamic Indexing in shader " |
| L"model 6.%1u", |
| ((UINT)sm & 0x0f)); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, sm, false /* skipUnsupported */)) { |
| continue; |
| } |
| D3D12_FEATURE_DATA_D3D12_OPTIONS devOptions; |
| VERIFY_SUCCEEDED( |
| pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS, |
| &devOptions, sizeof(devOptions))); |
| if (devOptions.ResourceBindingTier < D3D12_RESOURCE_BINDING_TIER_3) { |
| WEX::Logging::Log::Comment( |
| L"Device does not support Resource Binding Tier 3"); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| for (unsigned int non_uniform_bit = 0; non_uniform_bit < 2; non_uniform_bit++) { |
| float *expectedResults = non_uniform_bit ? expectedResultsNonUniform : expectedResultsUniform; |
| |
| LogCommentFmt(L"Testing %s Resource Indexing.", non_uniform_bit ? L"NonUniform" : L"Uniform"); |
| |
| // Add compile options |
| std::string compilerOptions = ""; |
| if (sm==D3D_SHADER_MODEL_6_0) |
| compilerOptions += " -D FALLBACK=1"; |
| if (non_uniform_bit) |
| compilerOptions += " -D NON_UNIFORM=1"; |
| |
| // by default a root value is added. |
| // remove the root value if this is the non-fallback path |
| if (sm==D3D_SHADER_MODEL_6_6) |
| { |
| pShaderOp->RootValues.clear(); |
| } |
| else |
| { |
| pShaderOp->RootValues = fallbackRootValues; |
| } |
| |
| // Update shader target in xml. |
| for (st::ShaderOpShader &S : pShaderOp->Shaders){ |
| S.Arguments = NULL; |
| if (!compilerOptions.empty()){ |
| S.Arguments = pShaderOp->GetString(compilerOptions.c_str()); |
| } |
| // Set the target correctly. Setting here permanently overwrites |
| // the Target string even in future iterations. |
| if (sm==D3D_SHADER_MODEL_6_0){ |
| std::string Target(S.Target); |
| Target[Target.length() - 1] = '0'; |
| S.Target = pShaderOp->GetString(Target.c_str()); |
| } |
| else if (sm==D3D_SHADER_MODEL_6_6){ |
| std::string Target(S.Target); |
| Target[Target.length() - 1] = '6'; |
| S.Target = pShaderOp->GetString(Target.c_str()); |
| } |
| } |
| |
| // Test Compute shader |
| { |
| pShaderOp->CS = pShaderOp->GetString("CS66"); |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse( |
| pDevice, m_support, "DynamicResourcesDynamicIndexing", nullptr, |
| ShaderOpSet); |
| |
| MappedData resultData; |
| test->Test->GetReadBackData("g_result", &resultData); |
| const float *resultCSFloats = (float *)resultData.data(); |
| |
| VerifyFloatArraysAreEqual(resultCSFloats, expectedResults, expectedResultsSize); |
| } |
| |
| // Test Vertex + Pixel shader |
| { |
| pShaderOp->CS = nullptr; |
| pShaderOp->VS = pShaderOp->GetString("VS66"); |
| pShaderOp->PS = pShaderOp->GetString("PS66"); |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse( |
| pDevice, m_support, "DynamicResourcesDynamicIndexing", nullptr, |
| ShaderOpSet); |
| |
| MappedData resultVSData; |
| MappedData resultPSData; |
| test->Test->GetReadBackData("g_resultVS", &resultVSData); |
| test->Test->GetReadBackData("g_resultPS", &resultPSData); |
| const float *resultVSFloats = (float *)resultVSData.data(); |
| const float *resultPSFloats = (float *)resultPSData.data(); |
| D3D12_QUERY_DATA_PIPELINE_STATISTICS Stats; |
| test->Test->GetPipelineStats(&Stats); |
| |
| |
| // VS |
| VerifyFloatArraysAreEqual(resultVSFloats, expectedResults, expectedResultsSize); |
| |
| // PS |
| VerifyFloatArraysAreEqual(resultPSFloats, expectedResults, expectedResultsSize); |
| } |
| Skipped = false; |
| } |
| } |
| |
| if (Skipped) { |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| } |
| |
| } |
| |
| #define MAX_WAVESIZE 128 |
| |
| #define strinfigy2(arg) #arg |
| #define strinfigy(arg) strinfigy2(arg) |
| |
| void ExecutionTest::WaveSizeTest() { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) { |
| return; |
| } |
| |
| // Check Wave support |
| if (!DoesDeviceSupportWaveOps(pDevice)) { |
| // Optional feature, so it's correct to not support it if declared as such. |
| WEX::Logging::Log::Comment(L"Device does not support wave operations."); |
| return; |
| } |
| |
| // Get supported wave sizes |
| D3D12_FEATURE_DATA_D3D12_OPTIONS1 waveOpts; |
| VERIFY_SUCCEEDED(pDevice->CheckFeatureSupport((D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS1, &waveOpts, sizeof(waveOpts))); |
| UINT minWaveSize = waveOpts.WaveLaneCountMin; |
| UINT maxWaveSize = waveOpts.WaveLaneCountMax; |
| |
| DXASSERT_NOMSG(minWaveSize <= maxWaveSize); |
| DXASSERT((minWaveSize & (minWaveSize - 1)) == 0, "must be a power of 2"); |
| DXASSERT((maxWaveSize & (maxWaveSize - 1)) == 0, "must be a power of 2"); |
| |
| // read shader config |
| CComPtr<IStream> pStream; |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = std::make_shared<st::ShaderOpSet>(); |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| |
| // format shader source |
| const char waveSizeTestShader[] = |
| "struct TestData { \r\n" |
| " uint count; \r\n" |
| "}; \r\n" |
| "RWStructuredBuffer<TestData> data : register(u0); \r\n" |
| "\r\n" |
| "// Note: WAVESIZE will be defined via compiler option -D\r\n" |
| "[wavesize(WAVESIZE)]\r\n" |
| "[numthreads(" strinfigy(MAX_WAVESIZE) "*2,1,1)]\r\n" |
| "void main(uint3 tid : SV_DispatchThreadID ) { \r\n" |
| " data[tid.x].count = WaveActiveSum(1); \r\n" |
| "}\r\n"; |
| |
| struct WaveSizeTestData { |
| uint32_t count; |
| }; |
| |
| for (UINT waveSize = minWaveSize; waveSize <= maxWaveSize; waveSize *= 2) { |
| // format compiler args |
| char compilerOptions[32]; |
| VERIFY_IS_TRUE(sprintf_s(compilerOptions, sizeof(compilerOptions), "-D WAVESIZE=%d", waveSize) != -1); |
| |
| // run the shader |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "WaveSizeTest", |
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) { |
| VERIFY_IS_TRUE((0 == strncmp(Name, "UAVBuffer0", 10))); |
| pShaderOp->Shaders.at(0).Arguments = compilerOptions; |
| pShaderOp->Shaders.at(0).Text = waveSizeTestShader; |
| |
| VERIFY_IS_TRUE(sizeof(WaveSizeTestData)*MAX_WAVESIZE <= Data.size()); |
| WaveSizeTestData *pInData = (WaveSizeTestData *)Data.data(); |
| memset(&pInData, sizeof(WaveSizeTestData)*MAX_WAVESIZE, 0); |
| }, ShaderOpSet); |
| |
| // verify expected values |
| MappedData dataUav; |
| WaveSizeTestData *pOutData; |
| |
| test->Test->GetReadBackData("UAVBuffer0", &dataUav); |
| VERIFY_ARE_EQUAL(sizeof(WaveSizeTestData)*MAX_WAVESIZE, dataUav.size()); |
| pOutData = (WaveSizeTestData*)dataUav.data(); |
| |
| LogCommentFmt(L"Verifying test result for wave size %d", waveSize); |
| |
| for (unsigned i = 0; i < MAX_WAVESIZE; i++) { |
| if (!VERIFY_ARE_EQUAL(pOutData[i].count, waveSize)) |
| break; |
| } |
| } |
| } |
| |
| |
| // Atomic operation testing |
| |
| // Atomic tests take a single integer index as input and contort it into some |
| // kind of interesting contributor to the operation in question. |
| // So each vertex, pixel, thread, or other will have a unique index that produces |
| // a contributing value to the calculation which is stored in a small resource |
| |
| // For arithmetic or bitwise operations, each contributor accumulates to the same |
| // location in the resource indexed by the operation type. Addition is in index 0 |
| // umin/umax are in 1 and 2 and so on. |
| |
| // To make sure that the most significant bits are involved in the calculation, |
| // particularly in the case of 64-bit values, each contributing value is duplicated |
| // to the lower and upper halves of the value. There is an exception to this when |
| // addition exceeds the available size and also for compare and exchange explained below. |
| |
| // For compare and exchange operations, 64 output locations are shared by the various lanes. |
| // Each lane attempts to write to a location that is shared with several others. |
| // The first one to write to it determines its contents, which will be the lane index <ix> |
| // in the upper bits and the output location index in the lower bits. |
| // This ensures that the compare operations consider the upper bits in the comparison. |
| // The initial compare store is followed by a compare exchange that compares for the |
| // value the current lane would have assigned there. Finally, the output of the cmpxchg |
| // is used to determine if the current lane should perform the final unconditional exchange. |
| // The values are verified by checking the lower bits for the matching location index |
| // and ensuring that the upper bits undergoing the same transformation result in the location index. |
| // For lane index <ix> the location is calculated and final result assigned as if by this code: |
| // g_outputBuf[(ix/3)%64] = (ix << shBits) | ((ix/3)%64); |
| |
| bool AtomicResultMatches(const BYTE *uResults, uint64_t gold, size_t size) { |
| if (memcmp(uResults, &gold, size)) { |
| if (size == 4) |
| LogCommentFmt(L" value %d is not %d", ((uint32_t*)uResults)[0], (uint32_t)gold); |
| else |
| LogCommentFmt(L" value %lld is not %lld", ((uint64_t*)uResults)[0], gold); |
| return false; |
| } |
| return true; |
| } |
| |
| // Used to duplicate the lower half bits into the upper half bits of an integer |
| // To verify that the full value is being considered, many tests duplicate the results into the upper half |
| #define SHIFT(val, bits) (((val)&((1ULL<<(bits))-1ULL)) | ((uint64_t)(val) << (bits))) |
| |
| // Symbolic constants for the results |
| #define ADD_IDX 0 |
| #define UMIN_IDX 1 |
| #define UMAX_IDX 2 |
| #define AND_IDX 3 |
| #define OR_IDX 4 |
| #define XOR_IDX 5 |
| |
| #define SMIN_IDX 0 |
| #define SMAX_IDX 1 |
| |
| // Verify results for atomic operations. <uResults> and <sResults> are pointers to |
| // the readback resource sections containing unsigned and signed integers respectively. |
| // <pXchg> is a poiner to the readback resource containing the results of the compare |
| // and exchange operations tests. <stride> is the number of bytes between results for |
| // all of the results pointers. <maxIdx> is the number of indices that went into the results |
| // which is used to determine what the results should be. <bitSize> is the size in bits of |
| // the produced results, either 32 or 64. |
| void VerifyAtomicResults(const BYTE *uResults, const BYTE *sResults, |
| const BYTE *pXchg, size_t stride, uint64_t maxIdx, size_t bitSize) { |
| // Each atomic test performs the test on the value in the lower half |
| // and also duplicated in the upper half of the value. The SHIFT macros account for this. |
| // This is to verify that the upper bits are considered |
| uint64_t shBits = bitSize/2; |
| size_t byteSize = bitSize/8; |
| |
| // Test ADD Operation |
| // ADD just sums all the indices. The result should the sum of the highest and lowest indices |
| // multiplied by half the number of sums. |
| uint64_t addResult = (maxIdx)*(maxIdx-1)/2; |
| LogCommentFmt(L"Verifying %d-bit integer atomic add", bitSize); |
| // For 32-bit values, the sum exceeds the 16 bit limit, so we can't duplicate |
| // That's fine, the duplication is really for 64-bit values. |
| if (bitSize < 64) |
| VERIFY_IS_TRUE(AtomicResultMatches(uResults + stride*ADD_IDX, addResult, byteSize)); |
| else |
| VERIFY_IS_TRUE(AtomicResultMatches(uResults + stride*ADD_IDX, SHIFT(addResult, shBits), byteSize)); |
| |
| // Test MIN and MAX Operations |
| |
| // The result of a simple min and max of any sequence of indices would be fairly uninteresting |
| // and certain erroneous behavior might mistakenly produce the correct results. |
| |
| // To make it interesting, the contributing values will change depending on the evenness of the index. |
| // On an even index, min and max operate on the bitflipped index. For signed compares, this is |
| // interpretted as a negative value and for unsigned, a very high value. |
| |
| // For unsigned min/max, index 0 will be bitflipped to ~0, which is interpretted as the maximum |
| // Because zero is manipulated, this leaves 1 as the lowest value. |
| LogCommentFmt(L"Verifying %d-bit integer atomic umin", bitSize); |
| VERIFY_IS_TRUE(AtomicResultMatches(uResults + stride*UMIN_IDX, SHIFT(1ULL, shBits), byteSize)); // UMin |
| LogCommentFmt(L"Verifying %d-bit integer atomic umax", bitSize); |
| VERIFY_IS_TRUE(AtomicResultMatches(uResults + stride*UMAX_IDX, ~0ULL, byteSize)); // UMax |
| |
| // For signed min/max, the index just before the last will be bitflipped (maxIndex is always even). |
| // This is interpretted as -(maxIndex-1) and will be the lowest |
| // The maxIndex will be unaltered and interpretted as the highest. |
| LogCommentFmt(L"Verifying %d-bit integer atomic smin", bitSize); |
| VERIFY_IS_TRUE(AtomicResultMatches(sResults + stride*SMIN_IDX, SHIFT(-((int64_t)maxIdx-1), shBits), byteSize)); // SMin |
| LogCommentFmt(L"Verifying %d-bit integer atomic smax", bitSize); |
| VERIFY_IS_TRUE(AtomicResultMatches(sResults + stride*SMAX_IDX, SHIFT(maxIdx-1, shBits), byteSize)); // SMax |
| |
| // Test AND and OR operations. |
| |
| // For AND operations, all indices are bitflipped and ANDed to the previous result. |
| // This means that the highest bits, which are never set by the contributing indices will be set |
| // for all the indices, so they will be set in the final result. |
| |
| // For OR operations, the indices are ORed to the previous result unaltered |
| // This means that any bit that is set in any index will be set in the final OR result. |
| |
| // In practice, this means that the cumulative result of the AND and OR operations |
| // are bitflipped versions of each other. |
| // Finding the most significant set bit by the max index or next power of two (pot) |
| // gives us the pivot point for these results |
| uint64_t nextPot = 1ULL << (bitSize - 1); |
| for (;nextPot && !((maxIdx-1) & (nextPot)); nextPot >>= 1) {} |
| nextPot <<= 1; |
| LogCommentFmt(L"Verifying %d-bit integer atomic and", bitSize); |
| VERIFY_IS_TRUE(AtomicResultMatches(uResults + stride*AND_IDX, ~SHIFT(nextPot-1, shBits), byteSize)); // And |
| LogCommentFmt(L"Verifying %d-bit integer atomic or", bitSize); |
| VERIFY_IS_TRUE(AtomicResultMatches(uResults + stride*OR_IDX, SHIFT(nextPot-1, shBits), byteSize)); // Or |
| |
| // Test XOR operation |
| |
| // For XOR operations, a 1 is shifted by the number of spaces equal to the index and XORed |
| // to the previous result. Because this would rapidely shift off the end of the value, |
| // giving undefined and uninteresting results, the index is moduloed to a value that will |
| // fit within the type size. |
| |
| // Because many of the tests use total numbers of lanes that can be evenly divisible by 32 or 64, |
| // these values aren't used for the modulo since the expected result might be zero, |
| // which could be encountered through erroneous behavior. |
| |
| // Instead, one less than the type size in bits is used for the modulo. |
| // Even though we don't know the actual order these operations are performed, |
| // indices that make up a contiguous sequence of 31 or 63 values can be thought of as one of a series of "passes". |
| // Each "pass" sets or clears the bits depending on what's already there. |
| // if the number of the pass is odd, the bits are being unset and all above the mod position should be set. |
| // If even, the bits are in the process of being set and bits below the mod position should be set. |
| uint64_t xorResult = ((1ULL<<((maxIdx)%(bitSize-1))) -1); |
| |
| if (((maxIdx/(bitSize-1))&1)) { |
| xorResult ^= ~0ULL; |
| // The XOR above may set uninvolved upper bits, messing up the compare. So AND off the uninvolved bits. |
| xorResult &= ((1ULL<<(bitSize-1)) - 1); |
| } |
| |
| LogCommentFmt(L"Verifying %d-bit integer atomic xor", bitSize); |
| VERIFY_IS_TRUE(AtomicResultMatches(uResults + stride*XOR_IDX, xorResult, byteSize)); |
| |
| // Test CMP/XCHG Operations |
| // This tests CompareStore, CompareExchange, and Exchange operations. |
| |
| // Unlike above, every lane isn't contributing to the same resource location |
| // Instead, every lane competes with a few others to update the same resource location. |
| // The first lane to find the contents of their location uninitialized will |
| // update it. To verify that upper bits are considered in the comparison and |
| // in the assignment, the value stored in the lowest bits is the location index. |
| // This ensures that part will be the same for each of the competing lanes. |
| // The uppermost bits are updated with the index of the lane that got there first. |
| // Subsequent calls to CompareExchange will verify this value matches and alter |
| // the content slightly. Finally, a simple check of the output value to what |
| // the current lane would expect and a call to exchange will update the value once more |
| |
| // To verify this has gone through properly, the upper portion is converted as |
| // if to calculate the location index and compared with the location index. |
| // It could be the index of any of several lanes that assign to that location, |
| // but this ensures that it is not any lane outside of that group. |
| // The lower bits are compared to the location index as well. |
| LogCommentFmt(L"Verifying %d-bit integer atomic cmp/xchg results", bitSize); |
| for (size_t i = 0; i < 64; i++) { |
| uint64_t val = *((uint64_t*)(pXchg + i*stride)); |
| // Verify lower bits match location index exactly |
| VERIFY_ARE_EQUAL(i, val & ((1ULL << shBits) - 1ULL)); |
| // Verify that upper bits contain original index that transforms to location index |
| VERIFY_ARE_EQUAL(((val >> shBits)/3)%64, i); |
| } |
| } |
| |
| void VerifyAtomicsRawTest(std::shared_ptr<ShaderOpTestResult> test, |
| uint64_t maxIdx, size_t bitSize) { |
| |
| size_t stride = 8; |
| // struct mirroring that in the shader |
| struct AtomicStuff { |
| float prepad[2][3]; |
| UINT uintEl[4]; |
| int sintEl[4]; |
| struct useless { |
| uint32_t unused[3]; |
| } postpad; |
| float last; |
| }; |
| |
| MappedData uintData, xchgData; |
| |
| test->Test->GetReadBackData("U0", &uintData); |
| test->Test->GetReadBackData("U1", &xchgData); |
| |
| const AtomicStuff *pStruct = (AtomicStuff *)uintData.data(); |
| const AtomicStuff *pStrXchg = (AtomicStuff *)xchgData.data(); |
| |
| LogCommentFmt(L"Verifying %d-bit integer atomic operations on RWStructuredBuffer resource", bitSize); |
| |
| VerifyAtomicResults((const BYTE*)&(pStruct[0].uintEl[2]), (const BYTE*)&(pStruct[1].sintEl[2]), |
| (const BYTE*)&(pStrXchg[0].uintEl[2]), sizeof(AtomicStuff), maxIdx, bitSize); |
| |
| const BYTE *pUint = nullptr; |
| const BYTE *pXchg = nullptr; |
| |
| test->Test->GetReadBackData("U2", &uintData); |
| test->Test->GetReadBackData("U3", &xchgData); |
| |
| pUint = (BYTE *)uintData.data(); |
| pXchg = (BYTE *)xchgData.data(); |
| |
| LogCommentFmt(L"Verifying %d-bit integer atomic operations on RWByteAddressBuffer resource", bitSize); |
| |
| VerifyAtomicResults(pUint, pUint + stride*6, |
| pXchg, stride, maxIdx, bitSize); |
| |
| } |
| |
| void VerifyAtomicsTypedTest(std::shared_ptr<ShaderOpTestResult> test, |
| uint64_t maxIdx, size_t bitSize) { |
| |
| |
| size_t stride = 8; |
| MappedData uintData, sintData, xchgData; |
| const BYTE *pUint = nullptr; |
| const BYTE *pSint = nullptr; |
| const BYTE *pXchg = nullptr; |
| |
| // Typed resources can't share between 32 and 64 bits |
| if (bitSize == 32) { |
| test->Test->GetReadBackData("U6", &uintData); |
| test->Test->GetReadBackData("U7", &sintData); |
| test->Test->GetReadBackData("U8", &xchgData); |
| } else { |
| test->Test->GetReadBackData("U12", &uintData); |
| test->Test->GetReadBackData("U13", &sintData); |
| test->Test->GetReadBackData("U14", &xchgData); |
| } |
| |
| pUint = (BYTE *)uintData.data(); |
| pSint = (BYTE *)sintData.data(); |
| pXchg = (BYTE *)xchgData.data(); |
| |
| LogCommentFmt(L"Verifying %d-bit integer atomic operations on RWBuffer resource", bitSize); |
| |
| VerifyAtomicResults(pUint, pSint + stride, pXchg, stride, maxIdx, bitSize); |
| |
| // Typed resources can't share between 32 and 64 bits |
| if (bitSize == 32) { |
| test->Test->GetReadBackData("U9", &uintData); |
| test->Test->GetReadBackData("U10", &sintData); |
| test->Test->GetReadBackData("U11", &xchgData); |
| } else { |
| test->Test->GetReadBackData("U15", &uintData); |
| test->Test->GetReadBackData("U16", &sintData); |
| test->Test->GetReadBackData("U17", &xchgData); |
| } |
| |
| pUint = (BYTE *)uintData.data(); |
| pSint = (BYTE *)sintData.data(); |
| pXchg = (BYTE *)xchgData.data(); |
| |
| LogCommentFmt(L"Verifying %d-bit integer atomic operations on RWTexture resource", bitSize); |
| |
| VerifyAtomicResults(pUint, pSint + stride, pXchg, stride, maxIdx, bitSize); |
| |
| } |
| |
| void VerifyAtomicsSharedTest(std::shared_ptr<ShaderOpTestResult> test, |
| uint64_t maxIdx, size_t bitSize) { |
| |
| size_t stride = 8; |
| MappedData uintData, xchgData; |
| const BYTE *pUint = nullptr; |
| const BYTE *pXchg = nullptr; |
| |
| test->Test->GetReadBackData("U4", &uintData); |
| test->Test->GetReadBackData("U5", &xchgData); |
| |
| pUint = (BYTE *)uintData.data(); |
| pXchg = (BYTE *)xchgData.data(); |
| |
| LogCommentFmt(L"Verifying %d-bit integer atomic operations on groupshared variables", bitSize); |
| VerifyAtomicResults(pUint, pUint + stride*6, |
| pXchg, stride, maxIdx, bitSize); |
| } |
| |
| void VerifyAtomicsTest(std::shared_ptr<ShaderOpTestResult> test, |
| uint64_t maxIdx, size_t bitSize) { |
| VerifyAtomicsRawTest(test, maxIdx, bitSize); |
| VerifyAtomicsTypedTest(test, maxIdx, bitSize); |
| } |
| |
| TEST_F(ExecutionTest, AtomicsTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) |
| return; |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| |
| st::ShaderOp *pShaderOp = ShaderOpSet->GetShaderOp("AtomicsHeap"); |
| |
| // Test compute shader |
| LogCommentFmt(L"Verifying 32-bit integer atomic operations in compute shader"); |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, ShaderOpSet); |
| |
| VerifyAtomicsTest(test, 32*32, 32); |
| VerifyAtomicsSharedTest(test, 32*32, 32); |
| |
| // Test mesh shader if available |
| pShaderOp->CS = nullptr; |
| if (DoesDeviceSupportMeshShaders(pDevice)) { |
| LogCommentFmt(L"Verifying 32-bit integer atomic operations in amp/mesh/pixel shaders"); |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, ShaderOpSet); |
| VerifyAtomicsTest(test, 8*8*2 + 8*8*2 + 64*64, 32); |
| VerifyAtomicsSharedTest(test, 8*8*2 + 8*8*2, 32); |
| } |
| |
| // Test Vertex + Pixel shader |
| pShaderOp->MS = nullptr; |
| LogCommentFmt(L"Verifying 32-bit integer atomic operations in vert/pixel shaders"); |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, ShaderOpSet); |
| VerifyAtomicsTest(test, 64*64+6, 32); |
| } |
| |
| TEST_F(ExecutionTest, Atomics64Test) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) |
| return; |
| |
| if (!DoesDeviceSupportInt64(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support int64 operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| |
| st::ShaderOp *pShaderOp = ShaderOpSet->GetShaderOp("AtomicsRoot"); |
| |
| // Reassign shader stages to 64-bit versions |
| // Collect 64-bit shaders |
| pShaderOp->CS = pShaderOp->GetString("CS"); |
| pShaderOp->VS = pShaderOp->GetString("VS"); |
| pShaderOp->PS = pShaderOp->GetString("PS"); |
| pShaderOp->AS = pShaderOp->GetString("AS"); |
| pShaderOp->MS = pShaderOp->GetString("MS"); |
| |
| // Test compute shader |
| LogCommentFmt(L"Verifying 64-bit integer atomic operations on raw buffers in compute shader"); |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", nullptr, ShaderOpSet); |
| VerifyAtomicsRawTest(test, 32*32, 64); |
| |
| // Test mesh shader if available |
| pShaderOp->CS = nullptr; |
| if (DoesDeviceSupportMeshShaders(pDevice)) { |
| LogCommentFmt(L"Verifying 64-bit integer atomic operations on raw buffers in amp/mesh/pixel shader"); |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", nullptr, ShaderOpSet); |
| VerifyAtomicsRawTest(test, 8*8*2 + 8*8*2 + 64*64, 64); |
| } |
| |
| // Test Vertex + Pixel shader |
| pShaderOp->MS = nullptr; |
| LogCommentFmt(L"Verifying 64-bit integer atomic operations on raw buffers in vert/pixel shader"); |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", nullptr, ShaderOpSet); |
| VerifyAtomicsRawTest(test, 64*64+6, 64); |
| } |
| |
| TEST_F(ExecutionTest, AtomicsRawHeap64Test) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) |
| return; |
| |
| if (!DoesDeviceSupportInt64(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support int64 operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| if (!DoesDeviceSupportHeap64Atomics(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support 64-bit atomic operations on heap resources."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| |
| st::ShaderOp *pShaderOp = ShaderOpSet->GetShaderOp("AtomicsHeap"); |
| |
| // Reassign shader stages to 64-bit versions |
| // Collect 64-bit shaders |
| pShaderOp->CS = pShaderOp->GetString("CS64"); |
| pShaderOp->VS = pShaderOp->GetString("VS64"); |
| pShaderOp->PS = pShaderOp->GetString("PS64"); |
| pShaderOp->AS = pShaderOp->GetString("AS64"); |
| pShaderOp->MS = pShaderOp->GetString("MS64"); |
| |
| // Test compute shader |
| LogCommentFmt(L"Verifying 64-bit integer atomic operations on heap raw buffers in compute shader"); |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, ShaderOpSet); |
| VerifyAtomicsRawTest(test, 32*32, 64); |
| |
| // Test mesh shader if available |
| pShaderOp->CS = nullptr; |
| if (DoesDeviceSupportMeshShaders(pDevice)) { |
| LogCommentFmt(L"Verifying 64-bit integer atomic operations on heap raw buffers in amp/mesh/pixel shader"); |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, ShaderOpSet); |
| VerifyAtomicsRawTest(test, 8*8*2 + 8*8*2 + 64*64, 64); |
| } |
| |
| // Test Vertex + Pixel shader |
| pShaderOp->MS = nullptr; |
| LogCommentFmt(L"Verifying 64-bit integer atomic operations on heap raw buffers in vert/pixel shader"); |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, ShaderOpSet); |
| VerifyAtomicsRawTest(test, 64*64+6, 64); |
| } |
| |
| TEST_F(ExecutionTest, AtomicsTyped64Test) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) |
| return; |
| |
| if (!DoesDeviceSupportInt64(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support int64 operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| if (!DoesDeviceSupportTyped64Atomics(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support int64 atomic operations on typed resources."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| |
| st::ShaderOp *pShaderOp = ShaderOpSet->GetShaderOp("AtomicsHeap"); |
| |
| // Reassign shader stages to 64-bit versions |
| // Collect 64-bit shaders |
| pShaderOp->CS = pShaderOp->GetString("CSTY64"); |
| pShaderOp->VS = pShaderOp->GetString("VSTY64"); |
| pShaderOp->PS = pShaderOp->GetString("PSTY64"); |
| pShaderOp->AS = pShaderOp->GetString("ASTY64"); |
| pShaderOp->MS = pShaderOp->GetString("MSTY64"); |
| |
| // Test compute shader |
| LogCommentFmt(L"Verifying 64-bit integer atomic operations on typed resources in compute shader"); |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, ShaderOpSet); |
| VerifyAtomicsTypedTest(test, 32*32, 64); |
| |
| // Test mesh shader if available |
| pShaderOp->CS = nullptr; |
| if (DoesDeviceSupportMeshShaders(pDevice)) { |
| LogCommentFmt(L"Verifying 64-bit integer atomic operations on typed resources in amp/mesh/pixel shader"); |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, ShaderOpSet); |
| VerifyAtomicsTypedTest(test, 8*8*2 + 8*8*2 + 64*64, 64); |
| } |
| |
| // Test Vertex + Pixel shader |
| pShaderOp->MS = nullptr; |
| LogCommentFmt(L"Verifying 64-bit integer atomic operations on typed resources in vert/pixel shader"); |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, ShaderOpSet); |
| VerifyAtomicsTypedTest(test, 64*64+6, 64); |
| } |
| |
| TEST_F(ExecutionTest, AtomicsShared64Test) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) |
| return; |
| |
| if (!DoesDeviceSupportInt64(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support int64 operations."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| if (!DoesDeviceSupportShared64Atomics(pDevice)) { |
| WEX::Logging::Log::Comment(L"Device does not support int64 atomic operations on groupshared variables."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| |
| st::ShaderOp *pShaderOp = ShaderOpSet->GetShaderOp("AtomicsRoot"); |
| |
| // Reassign shader stages to 64-bit versions |
| // Collect 64-bit shaders |
| pShaderOp->CS = pShaderOp->GetString("CSSH64"); |
| pShaderOp->AS = pShaderOp->GetString("ASSH64"); |
| pShaderOp->MS = pShaderOp->GetString("MSSH64"); |
| |
| LogCommentFmt(L"Verifying 64-bit integer atomic operations on groupshared variables in compute shader"); |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", nullptr, ShaderOpSet); |
| VerifyAtomicsSharedTest(test, 32*32, 64); |
| |
| // Test mesh shader if available |
| pShaderOp->CS = nullptr; |
| if (DoesDeviceSupportMeshShaders(pDevice)) { |
| LogCommentFmt(L"Verifying 64-bit integer atomic operations on groupshared variables in amp/mesh/pixel shader"); |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", nullptr, ShaderOpSet); |
| VerifyAtomicsSharedTest(test, 8*8*2 + 8*8*2, 64); |
| } |
| } |
| |
| |
| // Float Atomics |
| |
| // These operations are almost the same as for the 32-bit and 64-bit integer tests |
| // The difference is that there is no need to verify the upper bits. |
| // So there is no storing of different parts in upper and lower halves. |
| // Additionally, the only operations that are supported on floats |
| // are compare and exchange operations. So that's all that is tested here. |
| // Just as above, a number of lanes are assigned the same output value. |
| // Unlike above, one location is needed for the result of the special NaN test |
| // For this reason, the conversion is reduced by one and shifted by one to leave |
| // the zero-indexed location available. |
| |
| // Verify results for a particular set of atomics results |
| void VerifyAtomicFloatResults(const float *results) { |
| // The first entry is for NaN to ensure that compares between NaNs succeed |
| // The sentinal value is 0.123, for which this compare is sufficient. |
| VERIFY_IS_TRUE(results[0] >= 0.120 && results[0] < 0.125); |
| // Start at 1 because 0 is just for NaN tests |
| for (int i = 1; i < 64; i++) { |
| VERIFY_ARE_EQUAL((int(results[i])/3)%63 + 1, i); |
| } |
| } |
| |
| void VerifyAtomicsFloatSharedTest(std::shared_ptr<ShaderOpTestResult> test) { |
| MappedData Data; |
| const float *pData = nullptr; |
| |
| test->Test->GetReadBackData("U4", &Data); |
| pData = (float *)Data.data(); |
| |
| LogCommentFmt(L"Verifying float cmp/xchg atomic operations on groupshared variables"); |
| VerifyAtomicFloatResults(pData); |
| } |
| |
| void VerifyAtomicsFloatTest(std::shared_ptr<ShaderOpTestResult> test) { |
| |
| // struct mirroring that in the shader |
| struct AtomicStuff { |
| float prepad[2][3]; |
| float fltEl[2]; |
| struct useless { |
| uint32_t unused[3]; |
| } postpad; |
| }; |
| |
| // Test Compute Shader |
| MappedData Data; |
| const float *pData = nullptr; |
| |
| test->Test->GetReadBackData("U0", &Data); |
| const AtomicStuff *pStructData = (AtomicStuff *)Data.data(); |
| LogCommentFmt(L"Verifying float cmp/xchg atomic operations on RWStructuredBuffer resources"); |
| VERIFY_IS_TRUE(pStructData[0].fltEl[1] >= 0.120 && pStructData[0].fltEl[1] < 0.125); |
| for (int i = 1; i < 64; i++) { |
| VERIFY_ARE_EQUAL((int(pStructData[i].fltEl[1])/3)%63 + 1, i); |
| } |
| |
| test->Test->GetReadBackData("U1", &Data); |
| pData = (float *)Data.data(); |
| LogCommentFmt(L"Verifying float cmp/xchg atomic operations on RWByteAddressBuffer resources"); |
| VerifyAtomicFloatResults(pData); |
| |
| test->Test->GetReadBackData("U2", &Data); |
| pData = (float *)Data.data(); |
| LogCommentFmt(L"Verifying float cmp/xchg atomic operations on RWBuffer resources"); |
| VerifyAtomicFloatResults(pData); |
| |
| test->Test->GetReadBackData("U3", &Data); |
| pData = (float *)Data.data(); |
| LogCommentFmt(L"Verifying float cmp/xchg atomic operations on RWTexture resources"); |
| VerifyAtomicFloatResults(pData); |
| |
| } |
| |
| TEST_F(ExecutionTest, AtomicsFloatTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice)) |
| return; |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| |
| st::ShaderOp *pShaderOp = ShaderOpSet->GetShaderOp("FloatAtomics"); |
| |
| // Test compute shader |
| LogCommentFmt(L"Verifying float cmp/xchg atomic operations in compute shader"); |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "FloatAtomics", nullptr, ShaderOpSet); |
| VerifyAtomicsFloatTest(test); |
| VerifyAtomicsFloatSharedTest(test); |
| |
| // Test mesh shader if available |
| pShaderOp->CS = nullptr; |
| if (DoesDeviceSupportMeshShaders(pDevice)) { |
| LogCommentFmt(L"Verifying float cmp/xchg atomic operations in amp/mesh/pixel shaders"); |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "FloatAtomics", nullptr, ShaderOpSet); |
| VerifyAtomicsFloatTest(test); |
| VerifyAtomicsFloatSharedTest(test); |
| } |
| |
| // Test Vertex + Pixel shader |
| pShaderOp->MS = nullptr; |
| LogCommentFmt(L"Verifying float cmp/xchg atomic operations in vert/pixel shaders"); |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "FloatAtomics", nullptr, ShaderOpSet); |
| VerifyAtomicsFloatTest(test); |
| } |
| |
| // The IsHelperLane test renders 3-pixel triangle into 16x16 render target restricted |
| // to 2x2 viewport alligned at (0,0) which guarantees it will run in a single quad. |
| // |
| // Pixels to be rendered* |
| // (0,0)* (0,1)* |
| // (1,0) (1,1)* |
| // |
| // Pixel (1,0) is not rendered and is in helper lane. |
| // |
| // Each thread will use ddx_fine and ddy_fine to read the IsHelperLane() values from other threads. |
| // The bottom right pixel will write the results into the UAV buffer. |
| // |
| // Then the top level pixel (0,0) is discarded and the process above is repeated. |
| // |
| // Runs with shader models 6.0 and 6.6 to test both the HLSL built-in IsHelperLane fallback |
| // function (sm <= 6.5) and the IsHelperLane intrisics (sm >= 6.6). |
| // |
| TEST_F(ExecutionTest, HelperLaneTest) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| |
| #ifdef ISHELPERLANE_PLACEHOLDER |
| string args = "-DISHELPERLANE_PLACEHOLDER"; |
| #else |
| string args = ""; |
| #endif |
| |
| D3D_SHADER_MODEL TestShaderModels[] = { D3D_SHADER_MODEL_6_0, D3D_SHADER_MODEL_6_6 }; |
| for (unsigned i = 0; i < _countof(TestShaderModels); i++) { |
| D3D_SHADER_MODEL sm = TestShaderModels[i]; |
| LogCommentFmt(L"Verifying IsHelperLane in shader model 6.%1u", ((UINT)sm & 0x0f)); |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, sm, false /* skipUnsupported */)) |
| continue; |
| |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "HelperLaneTestNoWave", |
| // this callbacked is called when the test is creating the resource to run the test |
| [&](LPCSTR Name, std::vector<BYTE>& Data, st::ShaderOp* pShaderOp) { |
| VERIFY_IS_TRUE(0 == _stricmp(Name, "UAVBuffer0")); |
| std::fill(Data.begin(), Data.end(), (BYTE)0xCC); |
| pShaderOp->Shaders.at(0).Arguments = args.c_str(); |
| pShaderOp->Shaders.at(1).Arguments = args.c_str(); |
| }, ShaderOpSet); |
| |
| struct HelperLaneTestResult { |
| int32_t is_helper_00; |
| int32_t is_helper_10; |
| int32_t is_helper_01; |
| int32_t is_helper_11; |
| }; |
| |
| MappedData uavData; |
| test->Test->GetReadBackData("UAVBuffer0", &uavData); |
| HelperLaneTestResult* pTestResults = (HelperLaneTestResult*)uavData.data(); |
| |
| MappedData renderData; |
| test->Test->GetReadBackData("RTarget", &renderData); |
| const uint32_t* pPixels = (uint32_t*)renderData.data(); |
| |
| // before discard |
| VERIFY_ARE_EQUAL(pTestResults[0].is_helper_00, 0); |
| VERIFY_ARE_EQUAL(pTestResults[0].is_helper_10, 0); |
| VERIFY_ARE_EQUAL(pTestResults[0].is_helper_01, 1); |
| VERIFY_ARE_EQUAL(pTestResults[0].is_helper_11, 0); |
| |
| // after discard |
| VERIFY_ARE_EQUAL(pTestResults[1].is_helper_00, 1); |
| VERIFY_ARE_EQUAL(pTestResults[1].is_helper_10, 0); |
| VERIFY_ARE_EQUAL(pTestResults[1].is_helper_01, 1); |
| VERIFY_ARE_EQUAL(pTestResults[1].is_helper_11, 0); |
| |
| UNREFERENCED_PARAMETER(pPixels); |
| } |
| } |
| |
| struct HelperLaneWaveTestResult60 { |
| // 6.0 wave ops |
| int32_t anyTrue; |
| int32_t allTrue; |
| XMUINT4 ballot; |
| int32_t waterfallLoopCount; |
| int32_t allEqual; |
| int32_t countBits; |
| int32_t sum; |
| int32_t product; |
| int32_t bitAnd; |
| int32_t bitOr; |
| int32_t bitXor; |
| int32_t min; |
| int32_t max; |
| int32_t prefixCountBits; |
| int32_t prefixProduct; |
| int32_t prefixSum; |
| }; |
| |
| struct HelperLaneQuadTestResult { |
| int32_t is_helper_this; |
| int32_t is_helper_across_X; |
| int32_t is_helper_across_Y; |
| int32_t is_helper_across_Diag; |
| }; |
| |
| struct HelperLaneWaveTestResult65 { |
| // 6.5 wave ops |
| XMUINT4 match; |
| int32_t mpCountBits; |
| int32_t mpSum; |
| int32_t mpProduct; |
| int32_t mpBitAnd; |
| int32_t mpBitOr; |
| int32_t mpBitXor; |
| }; |
| |
| struct HelperLaneWaveTestResult { |
| HelperLaneWaveTestResult60 sm60; |
| HelperLaneQuadTestResult sm60_quad; |
| HelperLaneWaveTestResult65 sm65; |
| }; |
| |
| struct foo { int32_t a; int32_t b; int32_t c; }; |
| struct bar { foo f; int32_t d; XMUINT4 g; }; |
| foo f = {1, 2, 3}; |
| bar b = { { 1, 2, 3 }, 0, { 1, 2, 3, 4 } }; |
| |
| HelperLaneWaveTestResult HelperLane_CS_ExpectedResults = { |
| // HelperLaneWaveTestResult60 |
| { 0, 1, { 0x7, 0, 0, 0 }, 3, 1, 3, 12, 64, 1, 0, 0, 10, 1, 2, 16, 4 }, |
| // HelperLaneQuadTestResult |
| { 0, 0, 0, 0 }, |
| // HelperLaneWaveTestResult65 |
| { {0x7, 0, 0, 0}, 2, 4, 16, 1, 0, 0 } |
| }; |
| |
| HelperLaneWaveTestResult HelperLane_VS_ExpectedResults = HelperLane_CS_ExpectedResults; |
| |
| HelperLaneWaveTestResult HelperLane_PS_ExpectedResults = { |
| // HelperLaneWaveTestResult60 |
| { 0, 1, { 0xB, 0, 0, 0 }, 3, 1, 3, 12, 64, 1, 0, 0, 10, 1, 2, 16, 4 }, |
| // HelperLaneQuadTestResult |
| { 0, 1, 0, 0 }, |
| // HelperLaneWaveTestResult65 |
| { {0xB, 0, 0, 0}, 2, 4, 16, 1, 0, 0 } |
| }; |
| |
| HelperLaneWaveTestResult HelperLane_PSAfterDiscard_ExpectedResults = { |
| // HelperLaneWaveTestResult60 |
| { 0, 1, { 0xA, 0, 0, 0 }, 2, 1, 2, 8, 16, 1, 0, 0, 10, 1, 1, 4, 2 }, |
| // HelperLaneQuadTestResult |
| { 0, 1, 0, 1 }, |
| // HelperLaneWaveTestResult65 |
| { {0xA, 0, 0, 0}, 1, 2, 4, 1, 0, 0 } |
| }; |
| |
| HelperLaneWaveTestResult IncludesHelperLane_PS_ExpectedResults = { |
| // HelperLaneWaveTestResult60 |
| { 1, 0, { 0xF, 0, 0, 0 }, 4, 0, 4, 16, 256, 0, 1, 1, 1, 10, 3, 64, 6 }, |
| // HelperLaneQuadTestResult |
| { 0, 1, 0, 0 }, |
| // HelperLaneWaveTestResult65 |
| { {0xF, 0, 0, 0}, 3, 6, 64, 0, 1, 1 } |
| }; |
| |
| HelperLaneWaveTestResult IncludesHelperLane_PSAfterDiscard_ExpectedResults = { |
| // HelperLaneWaveTestResult60 |
| { 1, 0, { 0xF, 0, 0, 0 }, 4, 0, 4, 16, 256, 0, 1, 0, 1, 10, 3, 64, 6 }, |
| // HelperLaneQuadTestResult |
| { 0, 1, 0, 1 }, |
| // HelperLaneWaveTestResult65 |
| { {0xF, 0, 0, 0}, 3, 6, 64, 0, 1, 0 } |
| }; |
| |
| bool HelperLaneResultLogAndVerify(const wchar_t* testDesc, uint32_t expectedValue, uint32_t actualValue) { |
| bool matches = (expectedValue == actualValue); |
| LogCommentFmt(L"%s%s, expected = %u, actual = %u", matches ? L" - " : L"FAILED: ", testDesc, expectedValue, actualValue); |
| return matches; |
| } |
| |
| bool HelperLaneResultLogAndVerify(const wchar_t* testDesc, XMUINT4 expectedValue, XMUINT4 actualValue) { |
| bool matches = (expectedValue.x == actualValue.x && expectedValue.y == actualValue.y && |
| expectedValue.z == actualValue.z && expectedValue.w == actualValue.w); |
| LogCommentFmt(L"%s%s, expected = (0x%X,0x%X,0x%X,0x%X), actual = (0x%X,0x%X,0x%X,0x%X)", matches ? L" - " : L"FAILED: ", testDesc, |
| expectedValue.x, expectedValue.y, expectedValue.z, expectedValue.w, actualValue.x, actualValue.y, actualValue.z, actualValue.w); |
| return matches; |
| } |
| |
| |
| bool VerifyHelperLaneWaveResults(ExecutionTest::D3D_SHADER_MODEL sm, HelperLaneWaveTestResult& testResults, HelperLaneWaveTestResult& expectedResults, bool verifyQuads) { |
| bool passed = true; |
| { |
| HelperLaneWaveTestResult60& tr60 = testResults.sm60; |
| HelperLaneWaveTestResult60& tr60exp = expectedResults.sm60; |
| |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveAnyTrue(IsHelperLane())", tr60exp.anyTrue, tr60.anyTrue); |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveAllTrue(!IsHelperLane())", tr60exp.allTrue, tr60.allTrue); |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveBallot(true) has exactly 3 bits set", tr60exp.ballot, tr60.ballot); |
| |
| passed &= HelperLaneResultLogAndVerify(L"!WaveReadLaneFirst(IsHelperLane()) && WaveIsFirstLane() in a waterfall loop", tr60exp.waterfallLoopCount, tr60.waterfallLoopCount); |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveAllEqual(IsHelperLane())", tr60exp.allEqual, tr60.allEqual); |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveCountBits(true)", tr60exp.countBits, tr60.countBits); |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveSum(4)", tr60exp.sum, tr60.sum); |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveProduct(4)", tr60exp.product, tr60.product); |
| |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveBitAnd(!IsHelperLane())", tr60exp.bitAnd, tr60.bitAnd); |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveBitOr(IsHelperLane())", tr60exp.bitOr, tr60.bitOr); |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveBitXor(IsHelperLane())", tr60exp.bitXor, tr60.bitXor); |
| |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveMin(IsHelperLane() ? 1 : 10)", tr60exp.min, tr60.min); |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveMax(IsHelperLane() ? 10 : 1)", tr60exp.max, tr60.max); |
| |
| passed &= HelperLaneResultLogAndVerify(L"WavePrefixCountBits(1)", tr60exp.prefixCountBits, tr60.prefixCountBits); |
| passed &= HelperLaneResultLogAndVerify(L"WavePrefixProduct(4)", tr60exp.prefixProduct, tr60.prefixProduct); |
| passed &= HelperLaneResultLogAndVerify(L"WavePrefixSum(2)", tr60exp.prefixSum, tr60.prefixSum); |
| } |
| |
| if (verifyQuads) { |
| HelperLaneQuadTestResult& quad_tr = testResults.sm60_quad; |
| HelperLaneQuadTestResult& quad_tr_exp = expectedResults.sm60_quad; |
| passed &= HelperLaneResultLogAndVerify(L"QuadReadAcross* - lane 3 / pixel (1,1) - IsHelperLane()", quad_tr_exp.is_helper_this, quad_tr.is_helper_this); |
| passed &= HelperLaneResultLogAndVerify(L"QuadReadAcross* - lane 2 / pixel (0,1) - IsHelperLane()", quad_tr_exp.is_helper_across_X, quad_tr.is_helper_across_X); |
| passed &= HelperLaneResultLogAndVerify(L"QuadReadAcross* - lane 1 / pixel (1,0) - IsHelperLane()", quad_tr_exp.is_helper_across_Y, quad_tr.is_helper_across_Y); |
| passed &= HelperLaneResultLogAndVerify(L"QuadReadAcross* - lane 0 / pixel (0,0) - IsHelperLane()", quad_tr_exp.is_helper_across_Diag, quad_tr.is_helper_across_Diag); |
| } |
| |
| if (sm >= ExecutionTest::D3D_SHADER_MODEL_6_5) { |
| HelperLaneWaveTestResult65& tr65 = testResults.sm65; |
| HelperLaneWaveTestResult65& tr65exp = expectedResults.sm65; |
| |
| passed &= HelperLaneResultLogAndVerify(L"WaveMatch(true) has exactly 3 bits set", tr65exp.match, tr65.match); |
| passed &= HelperLaneResultLogAndVerify(L"WaveMultiPrefixCountBits(1, no_masked_bits)", tr65exp.mpCountBits, tr65.mpCountBits); |
| passed &= HelperLaneResultLogAndVerify(L"WaveMultiPrefixSum(2, no_masked_bits)", tr65exp.mpSum, tr65.mpSum); |
| passed &= HelperLaneResultLogAndVerify(L"WaveMultiPrefixProduct(4, no_masked_bits)", tr65exp.mpProduct, tr65.mpProduct); |
| |
| passed &= HelperLaneResultLogAndVerify(L"WaveMultiPrefixAnd(IsHelperLane() ? 0 : 1, no_masked_bits)", tr65exp.mpBitAnd, tr65.mpBitAnd); |
| passed &= HelperLaneResultLogAndVerify(L"WaveMultiPrefixOr(IsHelperLane() ? 1 : 0, no_masked_bits)", tr65exp.mpBitOr, tr65.mpBitOr); |
| passed &= HelperLaneResultLogAndVerify(L"verify WaveMultiPrefixXor(IsHelperLane() ? 1 : 0, no_masked_bits)", tr65exp.mpBitXor, tr65.mpBitXor); |
| } |
| return passed; |
| } |
| // Contrary to compute or pixel shaders the layout of lanes in vertex shaders is |
| // not specified. A conforming implementation could, in the extreme case, decide |
| // to dispatch three waves that each process only a single vertex. |
| // So instead of compare with fixed expected result, calculate the correct |
| // result from ballot. |
| bool VerifyHelperLaneWaveResultsForVS(ExecutionTest::D3D_SHADER_MODEL sm, |
| HelperLaneWaveTestResult &testResults) { |
| bool passed = true; |
| XMUINT4 mask = testResults.sm60.ballot; |
| unsigned countBits = 0; |
| std::bitset<32> x(mask.x); |
| std::bitset<32> y(mask.y); |
| std::bitset<32> z(mask.z); |
| std::bitset<32> w(mask.w); |
| countBits += (unsigned)x.count(); |
| countBits += (unsigned)y.count(); |
| countBits += (unsigned)z.count(); |
| countBits += (unsigned)w.count(); |
| |
| { |
| // For VS, IsHelperLane always return false. |
| HelperLaneWaveTestResult60 &tr60 = testResults.sm60; |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveAnyTrue(IsHelperLane())", |
| 0, tr60.anyTrue); |
| passed &= HelperLaneResultLogAndVerify( |
| L"WaveActiveAllTrue(!IsHelperLane())", 1, tr60.allTrue); |
| bool ballotMatch = 1 <= countBits && countBits <= 3; |
| |
| LogCommentFmt(L"%sWaveActiveBallot(true) expected 1~3 bits set, actual = %u", |
| ballotMatch ? L" - " : L"FAILED: ", tr60.ballot); |
| |
| |
| passed &= HelperLaneResultLogAndVerify( |
| L"!WaveReadLaneFirst(IsHelperLane()) && WaveIsFirstLane() in a " |
| L"waterfall loop", |
| countBits, tr60.waterfallLoopCount); |
| passed &= HelperLaneResultLogAndVerify( |
| L"WaveActiveAllEqual(IsHelperLane())", 1, tr60.allEqual); |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveCountBits(true)", |
| countBits, tr60.countBits); |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveSum(4)", 4 * countBits, |
| tr60.sum); |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveProduct(4)", (unsigned)std::pow(4, countBits), |
| tr60.product); |
| |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveBitAnd(!IsHelperLane())", |
| 1, tr60.bitAnd); |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveBitOr(IsHelperLane())", |
| 0, tr60.bitOr); |
| passed &= HelperLaneResultLogAndVerify(L"WaveActiveBitXor(IsHelperLane())", |
| 0, tr60.bitXor); |
| |
| passed &= HelperLaneResultLogAndVerify( |
| L"WaveActiveMin(IsHelperLane() ? 1 : 10)", 10, tr60.min); |
| passed &= HelperLaneResultLogAndVerify( |
| L"WaveActiveMax(IsHelperLane() ? 10 : 1)", 1, tr60.max); |
| |
| passed &= HelperLaneResultLogAndVerify(L"WavePrefixCountBits(1)", |
| countBits-1, |
| tr60.prefixCountBits); |
| passed &= HelperLaneResultLogAndVerify(L"WavePrefixProduct(4)", |
| (unsigned)std::pow(4, countBits - 1), |
| tr60.prefixProduct); |
| passed &= HelperLaneResultLogAndVerify(L"WavePrefixSum(2)", |
| 2 * (countBits-1), tr60.prefixSum); |
| } |
| |
| if (sm >= ExecutionTest::D3D_SHADER_MODEL_6_5) { |
| HelperLaneWaveTestResult65 &tr65 = testResults.sm65; |
| |
| passed &= HelperLaneResultLogAndVerify( |
| L"WaveMatch(true) has exactly 3 bits set", mask, tr65.match); |
| passed &= HelperLaneResultLogAndVerify( |
| L"WaveMultiPrefixCountBits(1, no_masked_bits)", countBits-1, |
| tr65.mpCountBits); |
| passed &= HelperLaneResultLogAndVerify( |
| L"WaveMultiPrefixSum(2, no_masked_bits)", 2*(countBits-1), tr65.mpSum); |
| passed &= HelperLaneResultLogAndVerify( |
| L"WaveMultiPrefixProduct(4, no_masked_bits)", |
| (unsigned)std::pow(4, countBits - 1), |
| tr65.mpProduct); |
| |
| passed &= HelperLaneResultLogAndVerify( |
| L"WaveMultiPrefixAnd(IsHelperLane() ? 0 : 1, no_masked_bits)", |
| 1, tr65.mpBitAnd); |
| passed &= HelperLaneResultLogAndVerify( |
| L"WaveMultiPrefixOr(IsHelperLane() ? 1 : 0, no_masked_bits)", |
| 0, tr65.mpBitOr); |
| passed &= HelperLaneResultLogAndVerify( |
| L"verify WaveMultiPrefixXor(IsHelperLane() ? 1 : 0, no_masked_bits)", |
| 0, tr65.mpBitXor); |
| } |
| return passed; |
| } |
| |
| void CleanUAVBuffer0Buffer(LPCSTR BufferName, std::vector<BYTE>& Data, st::ShaderOp* pShaderOp) { |
| UNREFERENCED_PARAMETER(pShaderOp); |
| VERIFY_IS_TRUE(0 == _stricmp(BufferName, "UAVBuffer0")); |
| std::fill(Data.begin(), Data.end(), (BYTE)0xCC); |
| } |
| |
| // |
| // The IsHelperLane test that use Wave intrinsics to verify IsHelperLane() and Wave operations on active lanes. |
| // |
| // Runs with shader models 6.0, 6.5 and 6.6 to test both the HLSL built-in IsHelperLane fallback |
| // function (sm <= 6.5) and the IsHelperLane intrisics (sm >= 6.6) and the shader model 6.5 wave intrinsics (sm >= 6.5). |
| // |
| // For compute and vertex shaders IsHelperLane() always returns false and might be optimized away in the front end. |
| // However it can be exposed to the driver in CS/VS through an exported function in a library so drivers need |
| // to be prepared to handle it. For this reason the test is compiled with disabled optimizations (/Od). |
| // The tests are also validating that wave intrinsics operate correctly with 3 threads in a CS or 3 vertices |
| // in a VS where the rest of the lanes in the wave are not active (dead lanes). |
| // |
| TEST_F(ExecutionTest, HelperLaneTestWave) { |
| WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| st::ShaderOp* pShaderOp = ShaderOpSet->GetShaderOp("HelperLaneTestWave"); |
| |
| #ifdef ISHELPERLANE_PLACEHOLDER |
| LPCSTR args = "/Od -DISHELPERLANE_PLACEHOLDER"; |
| #else |
| LPCSTR args = "/Od"; |
| #endif |
| |
| if (args[0]) { |
| for (st::ShaderOpShader& S : pShaderOp->Shaders) |
| S.Arguments = args; |
| } |
| |
| bool testPassed = true; |
| |
| D3D_SHADER_MODEL TestShaderModels[] = { D3D_SHADER_MODEL_6_0, D3D_SHADER_MODEL_6_5, D3D_SHADER_MODEL_6_6, D3D_SHADER_MODEL_6_7 }; |
| for (unsigned i = 0; i < _countof(TestShaderModels); i++) { |
| D3D_SHADER_MODEL sm = TestShaderModels[i]; |
| LogCommentFmt(L"\r\nVerifying IsHelperLane using Wave intrinsics in shader model 6.%1u", ((UINT)sm & 0x0f)); |
| |
| bool smPassed = true; |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, sm, false /* skipUnsupported */)) { |
| continue; |
| } |
| |
| if (GetTestParamUseWARP(UseWarpByDefault()) || IsDeviceBasicAdapter(pDevice)) { |
| WEX::Logging::Log::Comment(L"WARP has a known issue with HelperLaneTestWave."); |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| return; |
| } |
| |
| if (!DoesDeviceSupportWaveOps(pDevice)) { |
| LogCommentFmt(L"Device does not support wave operations in shader model 6.%1u", ((UINT)sm & 0x0f)); |
| continue; |
| } |
| |
| if (sm == D3D_SHADER_MODEL_6_5) { |
| // Reassign shader stages to 6.5 versions |
| pShaderOp->CS = pShaderOp->GetString("CS65"); |
| pShaderOp->VS = pShaderOp->GetString("VS65"); |
| pShaderOp->PS = pShaderOp->GetString("PS65"); |
| } else if (sm == D3D_SHADER_MODEL_6_6) { |
| // Reassign shader stages to 6.6 versions |
| pShaderOp->CS = pShaderOp->GetString("CS66"); |
| pShaderOp->VS = pShaderOp->GetString("VS66"); |
| pShaderOp->PS = pShaderOp->GetString("PS66"); |
| } else if (sm == D3D_SHADER_MODEL_6_7) { |
| // Reassign shader stages to 6.7 versions |
| pShaderOp->CS = pShaderOp->GetString("CS66"); |
| pShaderOp->VS = pShaderOp->GetString("VS66"); |
| // Only PS has SM 6.7 version to test new [WaveOpsIncludeHelperLanes] attribute |
| pShaderOp->PS = pShaderOp->GetString("PS67"); |
| } |
| |
| const unsigned CS_INDEX = 0, VS_INDEX = 0, PS_INDEX = 1, PS_INDEX_AFTER_DISCARD = 2; |
| |
| // Test Compute shader |
| { |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "HelperLaneTestWave", |
| CleanUAVBuffer0Buffer, ShaderOpSet); |
| |
| MappedData uavData; |
| test->Test->GetReadBackData("UAVBuffer0", &uavData); |
| HelperLaneWaveTestResult* pTestResults = (HelperLaneWaveTestResult*)uavData.data(); |
| LogCommentFmt(L"\r\nCompute shader"); |
| smPassed &= VerifyHelperLaneWaveResults(sm, pTestResults[CS_INDEX], HelperLane_CS_ExpectedResults, true); |
| } |
| |
| HelperLaneWaveTestResult &PS_ExpectedResults = |
| (sm >= D3D_SHADER_MODEL_6_7) ? IncludesHelperLane_PS_ExpectedResults |
| : HelperLane_PS_ExpectedResults; |
| HelperLaneWaveTestResult &PSAfterDiscard_ExpectedResults = |
| (sm >= D3D_SHADER_MODEL_6_7) |
| ? IncludesHelperLane_PSAfterDiscard_ExpectedResults |
| : HelperLane_PSAfterDiscard_ExpectedResults; |
| |
| // Test Vertex + Pixel shader |
| { |
| pShaderOp->CS = nullptr; |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "HelperLaneTestWave", CleanUAVBuffer0Buffer, ShaderOpSet); |
| |
| MappedData uavData; |
| test->Test->GetReadBackData("UAVBuffer0", &uavData); |
| HelperLaneWaveTestResult* pTestResults = (HelperLaneWaveTestResult*)uavData.data(); |
| LogCommentFmt(L"\r\nVertex shader"); |
| smPassed &= VerifyHelperLaneWaveResultsForVS(sm, pTestResults[VS_INDEX]); |
| LogCommentFmt(L"\r\nPixel shader"); |
| smPassed &= VerifyHelperLaneWaveResults(sm, pTestResults[PS_INDEX], PS_ExpectedResults, true); |
| LogCommentFmt(L"\r\nPixel shader with discarded pixel"); |
| smPassed &= VerifyHelperLaneWaveResults(sm, pTestResults[PS_INDEX_AFTER_DISCARD], PSAfterDiscard_ExpectedResults, true); |
| |
| MappedData renderData; |
| test->Test->GetReadBackData("RTarget", &renderData); |
| const uint32_t* pPixels = (uint32_t*)renderData.data(); |
| |
| UNREFERENCED_PARAMETER(pPixels); |
| } |
| testPassed &= smPassed; |
| } |
| VERIFY_ARE_EQUAL(testPassed, true); |
| } |
| |
| struct int2 { |
| int x; |
| int y; |
| }; |
| |
| bool VerifyQuadAnyAllResults(int2 *Res) { |
| int Idx = 0; |
| for (; Idx < 4; ++Idx) { |
| if (Res[Idx].x != 2) |
| return false; |
| if (Res[Idx].y != 4) |
| return false; |
| } |
| for (; Idx < 60; ++Idx) { |
| if (Res[Idx].x != 1) |
| return false; |
| if (Res[Idx].y != 4) |
| return false; |
| } |
| for (; Idx < 64; ++Idx) { |
| if (Res[Idx].x != 1) |
| return false; |
| if (Res[Idx].y != 3) |
| return false; |
| } |
| return true; |
| } |
| |
| TEST_F(ExecutionTest, QuadAnyAll) { |
| WEX::TestExecution::SetVerifyOutput verifySettings( |
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); |
| CComPtr<IStream> pStream; |
| ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); |
| |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| st::ShaderOp *pShaderOp = ShaderOpSet->GetShaderOp("QuadAnyAll"); |
| |
| LPCSTR args = "/Od"; |
| |
| if (args[0]) { |
| for (st::ShaderOpShader &S : pShaderOp->Shaders) |
| S.Arguments = args; |
| } |
| |
| bool Skipped = true; |
| D3D_SHADER_MODEL TestShaderModels[] = {D3D_SHADER_MODEL_6_0, |
| D3D_SHADER_MODEL_6_5, |
| D3D_SHADER_MODEL_6_7}; |
| for (unsigned i = 0; i < _countof(TestShaderModels); i++) { |
| D3D_SHADER_MODEL sm = TestShaderModels[i]; |
| LogCommentFmt(L"\r\nVerifying QuadAny/QuadAll using Wave intrinsics in " |
| L"shader model 6.%1u", |
| ((UINT)sm & 0x0f)); |
| |
| if (sm == D3D_SHADER_MODEL_6_5) { |
| pShaderOp->MS = pShaderOp->GetString("MS"); |
| pShaderOp->AS = pShaderOp->GetString("AS"); |
| } else if (sm == D3D_SHADER_MODEL_6_7) { |
| pShaderOp->AS = pShaderOp->GetString("AS67"); |
| pShaderOp->MS = pShaderOp->GetString("MS67"); |
| pShaderOp->CS = pShaderOp->GetString("CS67"); |
| } |
| |
| CComPtr<ID3D12Device> pDevice; |
| if (!CreateDevice(&pDevice, sm, false /* skipUnsupported */)) { |
| continue; |
| } |
| |
| if (IsDeviceBasicAdapter(pDevice)) { |
| WEX::Logging::Log::Comment(L"QuadAny/All fails on basic render driver."); |
| continue; |
| } |
| |
| if (!DoesDeviceSupportWaveOps(pDevice)) { |
| LogCommentFmt( |
| L"Device does not support wave operations in shader model 6.%1u", |
| ((UINT)sm & 0x0f)); |
| continue; |
| } |
| Skipped = false; |
| |
| // test compute |
| std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse( |
| pDevice, m_support, "QuadAnyAll", CleanUAVBuffer0Buffer, ShaderOpSet); |
| |
| MappedData uavData; |
| test->Test->GetReadBackData("UAVBuffer0", &uavData); |
| bool Result = VerifyQuadAnyAllResults((int2 *)uavData.data()); |
| VERIFY_IS_TRUE(Result); |
| |
| if (sm < D3D_SHADER_MODEL_6_5 || !DoesDeviceSupportMeshShaders(pDevice)) |
| continue; |
| |
| pShaderOp->CS = nullptr; |
| // test AS/MS |
| test = RunShaderOpTestAfterParse(pDevice, m_support, "QuadAnyAll", |
| CleanUAVBuffer0Buffer, ShaderOpSet); |
| |
| test->Test->GetReadBackData("UAVBuffer0", &uavData); |
| Result = VerifyQuadAnyAllResults((int2 *)uavData.data()); |
| VERIFY_IS_TRUE(Result); |
| Result = VerifyQuadAnyAllResults(&((int2 *)uavData.data())[64]); |
| VERIFY_IS_TRUE(Result); |
| } |
| if (Skipped) |
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); |
| } |
| |
| #ifndef _HLK_CONF |
| static void WriteReadBackDump(st::ShaderOp *pShaderOp, st::ShaderOpTest *pTest, |
| char **pReadBackDump) { |
| std::stringstream str; |
| |
| unsigned count = 0; |
| for (auto &R : pShaderOp->Resources) { |
| if (!R.ReadBack) |
| continue; |
| ++count; |
| str << "Resource: " << R.Name << "\r\n"; |
| // Find a descriptor that can tell us how to dump this resource. |
| bool found = false; |
| for (auto &Heaps : pShaderOp->DescriptorHeaps) { |
| for (auto &D : Heaps.Descriptors) { |
| if (_stricmp(D.ResName, R.Name) != 0) { |
| continue; |
| } |
| found = true; |
| if (_stricmp(D.Kind, "UAV") != 0) { |
| str << "Resource dump for kind " << D.Kind << " not implemented yet.\r\n"; |
| break; |
| } |
| if (D.UavDesc.ViewDimension != D3D12_UAV_DIMENSION_BUFFER) { |
| str << "Resource dump for this kind of view dimension not implemented yet.\r\n"; |
| break; |
| } |
| // We can map back to the structure if a structured buffer via the shader, but |
| // we'll keep this simple and simply dump out 32-bit uint/float representations. |
| MappedData data; |
| pTest->GetReadBackData(R.Name, &data); |
| uint32_t *pData = (uint32_t *)data.data(); |
| size_t u32_count = ((size_t)R.Desc.Width) / sizeof(uint32_t); |
| for (size_t i = 0; i < u32_count; ++i) { |
| float f = *(float *)pData; |
| str << i << ": 0n" << *pData << " 0x" << std::hex << *pData |
| << std::dec << " " << f << "\r\n"; |
| ++pData; |
| } |
| break; |
| } |
| if (found) break; |
| } |
| if (!found) { |
| str << "Unable to find a view for the resource.\r\n"; |
| } |
| } |
| |
| str << "Resources read back: " << count << "\r\n"; |
| |
| std::string s(str.str()); |
| CComHeapPtr<char> pDump; |
| if (!pDump.Allocate(s.size() + 1)) |
| throw std::bad_alloc(); |
| memcpy(pDump.m_pData, s.data(), s.size()); |
| pDump.m_pData[s.size()] = '\0'; |
| *pReadBackDump = pDump.Detach(); |
| } |
| |
| // This is the exported interface by use from HLSLHost.exe. |
| // It's exclusive with the use of the DLL as a TAEF target. |
| extern "C" { |
| __declspec(dllexport) HRESULT WINAPI InitializeOpTests(void *pStrCtx, st::OutputStringFn pOutputStrFn) { |
| HRESULT hr = ExecutionTest::EnableExperimentalShaderModels(); |
| if (FAILED(hr)) { |
| pOutputStrFn(pStrCtx, L"Unable to enable experimental shader models.\r\n."); |
| } |
| return S_OK; |
| } |
| |
| __declspec(dllexport) HRESULT WINAPI |
| RunOpTest(void *pStrCtx, st::OutputStringFn pOutputStrFn, LPCSTR pText, |
| ID3D12Device *pDevice, ID3D12CommandQueue *pCommandQueue, |
| ID3D12Resource *pRenderTarget, char **pReadBackDump) { |
| |
| HRESULT hr; |
| if (pReadBackDump) *pReadBackDump = nullptr; |
| st::SetOutputFn(pStrCtx, pOutputStrFn); |
| CComPtr<ID3D12InfoQueue> pInfoQueue; |
| CComHeapPtr<char> pDump; |
| bool FilterCreation = false; |
| if (SUCCEEDED(pDevice->QueryInterface(&pInfoQueue))) { |
| // Creation is largely driven by inputs, so don't log create/destroy messages. |
| pInfoQueue->PushEmptyStorageFilter(); |
| pInfoQueue->PushEmptyRetrievalFilter(); |
| if (FilterCreation) { |
| D3D12_INFO_QUEUE_FILTER filter; |
| D3D12_MESSAGE_CATEGORY denyCategories[] = { D3D12_MESSAGE_CATEGORY_STATE_CREATION }; |
| ZeroMemory(&filter, sizeof(filter)); |
| filter.DenyList.NumCategories = _countof(denyCategories); |
| filter.DenyList.pCategoryList = denyCategories; |
| pInfoQueue->PushStorageFilter(&filter); |
| } |
| } |
| else { |
| pOutputStrFn(pStrCtx, L"Unable to enable info queue for D3D.\r\n."); |
| } |
| try { |
| dxc::DxcDllSupport m_support; |
| m_support.Initialize(); |
| |
| const char *pName = nullptr; |
| CComPtr<IStream> pStream = SHCreateMemStream((BYTE *)pText, (UINT)strlen(pText)); |
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = |
| std::make_shared<st::ShaderOpSet>(); |
| st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); |
| st::ShaderOp *pShaderOp; |
| if (pName == nullptr) { |
| if (ShaderOpSet->ShaderOps.size() != 1) { |
| pOutputStrFn(pStrCtx, L"Expected a single shader operation.\r\n"); |
| return E_FAIL; |
| } |
| pShaderOp = ShaderOpSet->ShaderOps[0].get(); |
| } |
| else { |
| pShaderOp = ShaderOpSet->GetShaderOp(pName); |
| } |
| if (pShaderOp == nullptr) { |
| std::string msg = "Unable to find shader op "; |
| msg += pName; |
| msg += "; available ops"; |
| const char sep = ':'; |
| for (auto &pAvailOp : ShaderOpSet->ShaderOps) { |
| msg += sep; |
| msg += pAvailOp->Name ? pAvailOp->Name : "[n/a]"; |
| } |
| CA2W msgWide(msg.c_str()); |
| pOutputStrFn(pStrCtx, msgWide); |
| return E_FAIL; |
| } |
| |
| std::shared_ptr<st::ShaderOpTest> test = std::make_shared<st::ShaderOpTest>(); |
| test->SetupRenderTarget(pShaderOp, pDevice, pCommandQueue, pRenderTarget); |
| test->SetDxcSupport(&m_support); |
| test->RunShaderOp(pShaderOp); |
| test->PresentRenderTarget(pShaderOp, pCommandQueue, pRenderTarget); |
| |
| pOutputStrFn(pStrCtx, L"Rendering complete.\r\n"); |
| |
| if (!pShaderOp->IsCompute()) { |
| D3D12_QUERY_DATA_PIPELINE_STATISTICS stats; |
| test->GetPipelineStats(&stats); |
| wchar_t statsText[400]; |
| StringCchPrintfW(statsText, _countof(statsText), |
| L"Vertices/primitives read by input assembler: %I64u/%I64u\r\n" |
| L"Vertex shader invocations: %I64u\r\n" |
| L"Geometry shader invocations/output primitive: %I64u/%I64u\r\n" |
| L"Primitives sent to rasterizer/rendered: %I64u/%I64u\r\n" |
| L"PS/HS/DS/CS invocations: %I64u/%I64u/%I64u/%I64u\r\n", |
| stats.IAVertices, stats.IAPrimitives, stats.VSInvocations, |
| stats.GSInvocations, stats.GSPrimitives, stats.CInvocations, |
| stats.CPrimitives, stats.PSInvocations, stats.HSInvocations, |
| stats.DSInvocations, stats.CSInvocations); |
| pOutputStrFn(pStrCtx, statsText); |
| } |
| |
| if (pReadBackDump) { |
| WriteReadBackDump(pShaderOp, test.get(), &pDump); |
| } |
| |
| hr = S_OK; |
| } |
| catch (const CAtlException &E) |
| { |
| hr = E.m_hr; |
| } |
| catch (const std::bad_alloc &) |
| { |
| hr = E_OUTOFMEMORY; |
| } |
| catch (const std::exception &) |
| { |
| hr = E_FAIL; |
| } |
| |
| // Drain the device message queue if available. |
| if (pInfoQueue != nullptr) { |
| wchar_t buf[200]; |
| StringCchPrintfW(buf, _countof(buf), |
| L"NumStoredMessages=%u limit/discarded by limit=%u/%u " |
| L"allowed/denied by storage filter=%u/%u " |
| L"NumStoredMessagesAllowedByRetrievalFilter=%u\r\n", |
| (unsigned)pInfoQueue->GetNumStoredMessages(), |
| (unsigned)pInfoQueue->GetMessageCountLimit(), |
| (unsigned)pInfoQueue->GetNumMessagesDiscardedByMessageCountLimit(), |
| (unsigned)pInfoQueue->GetNumMessagesAllowedByStorageFilter(), |
| (unsigned)pInfoQueue->GetNumMessagesDeniedByStorageFilter(), |
| (unsigned)pInfoQueue->GetNumStoredMessagesAllowedByRetrievalFilter()); |
| pOutputStrFn(pStrCtx, buf); |
| |
| WriteInfoQueueMessages(pStrCtx, pOutputStrFn, pInfoQueue); |
| |
| pInfoQueue->ClearStoredMessages(); |
| pInfoQueue->PopRetrievalFilter(); |
| pInfoQueue->PopStorageFilter(); |
| if (FilterCreation) { |
| pInfoQueue->PopStorageFilter(); |
| } |
| } |
| |
| if (pReadBackDump) *pReadBackDump = pDump.Detach(); |
| |
| return hr; |
| } |
| } |
| #endif |
| // MARKER: ExecutionTest/DxilConf Shared Implementation End |
| // Do not remove the line above - it is used by TranslateExecutionTest.py |