From d328a1933c499a1d75b69dc3a0bd33a878b67152 Mon Sep 17 00:00:00 2001 From: Eric Renaud-Houde Date: Sat, 7 Mar 2026 13:51:55 -0500 Subject: [PATCH 1/8] Add DirectX 12 GPU backend for automated unit testing on Windows Introduce a DirectX 12 / HLSL rendering backend alongside the existing OpenGL / GLSL and Metal / MSL backends, enabling the GPU unit test suite to run natively on Windows without requiring an OpenGL context. Key changes: GraphicalApp abstract interface (graphicalapp.h/cpp) Backend-agnostic base class extracted from OglApp. OglApp and MetalApp now inherit from it. DxApp (dxapp.h/cpp) -- DirectX 12 backend Off-screen RGBA32F render target, full-screen triangle via SV_VertexID, staging readback, SM 6.0 DXC shader compilation. HLSLBuilder (hlsl.h/cpp) -- HLSL shader generation Translates GpuShaderDesc into HLSL pixel shaders with 1D and 3D LUT texture uploads in RGBA32F format. CMake integration OCIO_DIRECTX_ENABLED option, FetchContent for DirectX-Headers, auto-copy of DXC runtime DLLs to the test output directory. Test tolerance adjustments Minor epsilon increases for 4 tests due to DX12/SM6.0 FMA and pow() precision differences. All 263 GPU tests pass on the DirectX 12 backend. Build and run: # Configure (OCIO_DIRECTX_ENABLED defaults to ON on Windows) cmake -S . -B build -DCMAKE_BUILD_TYPE=Release # Build the GPU test binary cmake --build build --target test_gpu_exec --config Release # Run GPU tests with the DX12 backend ctest --test-dir build -C Release -R test_dx Signed-off-by: Eric Renaud-Houde --- CMakeLists.txt | 8 + .../install/InstallDirectXHeaders.cmake | 19 + src/apps/ociochecklut/main.cpp | 12 +- src/apps/ocioconvert/main.cpp | 16 +- src/apps/ociodisplay/main.cpp | 14 +- src/libutils/oglapphelpers/CMakeLists.txt | 131 ++- src/libutils/oglapphelpers/dxapp.cpp | 1038 +++++++++++++++++ src/libutils/oglapphelpers/dxapp.h | 105 ++ src/libutils/oglapphelpers/dxutils.h | 37 + src/libutils/oglapphelpers/graphicalapp.cpp | 15 + src/libutils/oglapphelpers/graphicalapp.h | 81 ++ src/libutils/oglapphelpers/hlsl.cpp | 498 ++++++++ src/libutils/oglapphelpers/hlsl.h | 95 ++ src/libutils/oglapphelpers/metalapp.h | 4 +- src/libutils/oglapphelpers/metalapp.mm | 6 +- src/libutils/oglapphelpers/oglapp.cpp | 63 +- src/libutils/oglapphelpers/oglapp.h | 102 +- src/libutils/oglapphelpers/vulkanapp.cpp | 6 +- src/libutils/oglapphelpers/vulkanapp.h | 38 +- tests/gpu/CMakeLists.txt | 58 +- tests/gpu/FixedFunctionOp_test.cpp | 9 +- tests/gpu/GPUUnitTest.cpp | 239 ++-- tests/gpu/MatrixOp_test.cpp | 3 +- 23 files changed, 2247 insertions(+), 350 deletions(-) create mode 100644 share/cmake/modules/install/InstallDirectXHeaders.cmake create mode 100644 src/libutils/oglapphelpers/dxapp.cpp create mode 100644 src/libutils/oglapphelpers/dxapp.h create mode 100644 src/libutils/oglapphelpers/dxutils.h create mode 100644 src/libutils/oglapphelpers/graphicalapp.cpp create mode 100644 src/libutils/oglapphelpers/graphicalapp.h create mode 100644 src/libutils/oglapphelpers/hlsl.cpp create mode 100644 src/libutils/oglapphelpers/hlsl.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b1fce3398..a57649ac9d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -287,6 +287,14 @@ message(STATUS "") message(STATUS "Checking for GPU configuration...") include(CheckSupportGL) +# DirectX 12 is only available on Windows. +if(WIN32) + option(OCIO_DIRECTX_ENABLED "Enable DirectX 12 GPU rendering support" ON) +else() + set(OCIO_DIRECTX_ENABLED OFF CACHE BOOL "Enable DirectX 12 GPU rendering support" FORCE) +endif() +mark_as_advanced(OCIO_DIRECTX_ENABLED) + ############################################################################### # Check for ARM neon here because we need to know if ARM NEON is supported diff --git a/share/cmake/modules/install/InstallDirectXHeaders.cmake b/share/cmake/modules/install/InstallDirectXHeaders.cmake new file mode 100644 index 0000000000..7d2fe91d55 --- /dev/null +++ b/share/cmake/modules/install/InstallDirectXHeaders.cmake @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright Contributors to the OpenColorIO Project. +# +# Install DirectX-Headers (header-only, Windows only) +# https://github.com/microsoft/DirectX-Headers +# +############################################################################### + +include(FetchContent) + +set(FETCHCONTENT_BASE_DIR "${CMAKE_BINARY_DIR}/ext/build/DirectX-Headers") +set(DIRECTX_HEADERS_BUILD_TEST OFF CACHE BOOL "" FORCE) + +FetchContent_Declare(DirectX-Headers + GIT_REPOSITORY https://github.com/microsoft/DirectX-Headers.git + GIT_TAG v1.619.1 +) + +FetchContent_MakeAvailable(DirectX-Headers) diff --git a/src/apps/ociochecklut/main.cpp b/src/apps/ociochecklut/main.cpp index 9469090a15..593d676236 100644 --- a/src/apps/ociochecklut/main.cpp +++ b/src/apps/ociochecklut/main.cpp @@ -52,18 +52,18 @@ class ProcessorWrapper m_gpu = gpu; if (!m_oglApp) { - m_oglApp = OCIO::OglApp::CreateOglApp("ociochecklut", 256, 20); + m_oglApp = OCIO::GraphicalApp::CreateApp("ociochecklut", 256, 20); if (m_verbose) { - m_oglApp->printGLInfo(); + m_oglApp->printGraphicsInfo(); } } - m_oglApp->setPrintShader(m_verbose); + m_oglApp->setShaderVerbose(m_verbose); float image[4]{ 0.f, 0.f, 0.f, 0.f }; - m_oglApp->initImage(1, 1, OCIO::OglApp::COMPONENTS_RGBA, image); - m_oglApp->createGLBuffers(); + m_oglApp->initImage(1, 1, OCIO::GraphicalApp::COMPONENTS_RGBA, image); + m_oglApp->createBuffers(); OCIO::GpuShaderDescRcPtr shaderDesc = OCIO::GpuShaderDesc::CreateShaderDesc(); shaderDesc->setLanguage(OCIO::GPU_LANGUAGE_GLSL_1_2); m_gpu->extractGpuShaderInfo(shaderDesc); @@ -98,7 +98,7 @@ class ProcessorWrapper m_oglApp->redisplay(); m_oglApp->readImage(pixel.data()); } - OCIO::OglAppRcPtr m_oglApp; + OCIO::GraphicalAppRcPtr m_oglApp; #else void applyGPU(std::vector &) { diff --git a/src/apps/ocioconvert/main.cpp b/src/apps/ocioconvert/main.cpp index 31a5ed3542..165aefc4a8 100644 --- a/src/apps/ocioconvert/main.cpp +++ b/src/apps/ocioconvert/main.cpp @@ -361,18 +361,18 @@ int main(int argc, const char **argv) #ifdef OCIO_GPU_ENABLED // Initialize GPU. - OCIO::OglAppRcPtr oglApp; + OCIO::GraphicalAppRcPtr oglApp; if (usegpu || usegpuLegacy) { - OCIO::OglApp::Components comp = OCIO::OglApp::COMPONENTS_RGBA; + OCIO::GraphicalApp::Components comp = OCIO::GraphicalApp::COMPONENTS_RGBA; if (imgInput.getNumChannels() == 4) { - comp = OCIO::OglApp::COMPONENTS_RGBA; + comp = OCIO::GraphicalApp::COMPONENTS_RGBA; } else if (imgInput.getNumChannels() == 3) { - comp = OCIO::OglApp::COMPONENTS_RGB; + comp = OCIO::GraphicalApp::COMPONENTS_RGB; } else { @@ -383,7 +383,7 @@ int main(int argc, const char **argv) try { - oglApp = OCIO::OglApp::CreateOglApp("ocioconvert", 256, 20); + oglApp = OCIO::GraphicalApp::CreateApp("ocioconvert", 256, 20); } catch (const OCIO::Exception & e) { @@ -393,14 +393,14 @@ int main(int argc, const char **argv) if (verbose) { - oglApp->printGLInfo(); + oglApp->printGraphicsInfo(); } - oglApp->setPrintShader(outputgpuInfo); + oglApp->setShaderVerbose(outputgpuInfo); oglApp->initImage(imgInput.getWidth(), imgInput.getHeight(), comp, (float *)imgInput.getData()); - oglApp->createGLBuffers(); + oglApp->createBuffers(); } #endif // OCIO_GPU_ENABLED diff --git a/src/apps/ociodisplay/main.cpp b/src/apps/ociodisplay/main.cpp index ed7f09759a..6eebc3b6a3 100644 --- a/src/apps/ociodisplay/main.cpp +++ b/src/apps/ociodisplay/main.cpp @@ -64,7 +64,7 @@ float g_display_gamma{1.0f}; int g_channelHot[4]{1, 1, 1, 1}; // show rgb int g_viewsMenuID; -OCIO::OglAppRcPtr g_oglApp; +OCIO::GraphicalAppRcPtr g_oglApp; void UpdateOCIOGLState(); @@ -115,14 +115,14 @@ static void InitImageTexture(const char * filename) } } - OCIO::OglApp::Components comp = OCIO::OglApp::COMPONENTS_RGBA; + OCIO::GraphicalApp::Components comp = OCIO::GraphicalApp::COMPONENTS_RGBA; if (img.getNumChannels() == 4) { - comp = OCIO::OglApp::COMPONENTS_RGBA; + comp = OCIO::GraphicalApp::COMPONENTS_RGBA; } else if (img.getNumChannels() == 3) { - comp = OCIO::OglApp::COMPONENTS_RGB; + comp = OCIO::GraphicalApp::COMPONENTS_RGB; } else { @@ -658,7 +658,7 @@ int main(int argc, char **argv) else #endif { - g_oglApp = std::make_shared("ociodisplay", 512, 512); + g_oglApp = std::make_shared("ociodisplay", 512, 512); } } catch (const OCIO::Exception &e) @@ -669,11 +669,11 @@ int main(int argc, char **argv) if (g_verbose) { - g_oglApp->printGLInfo(); + g_oglApp->printGraphicsInfo(); } g_oglApp->setYMirror(); - g_oglApp->setPrintShader(g_gpuinfo); + g_oglApp->setShaderVerbose(g_gpuinfo); glutReshapeFunc(Reshape); glutKeyboardFunc(Key); diff --git a/src/libutils/oglapphelpers/CMakeLists.txt b/src/libutils/oglapphelpers/CMakeLists.txt index 207caf8497..0979891c81 100644 --- a/src/libutils/oglapphelpers/CMakeLists.txt +++ b/src/libutils/oglapphelpers/CMakeLists.txt @@ -1,20 +1,41 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright Contributors to the OpenColorIO Project. -if(NOT OCIO_GL_ENABLED) - message(WARNING "GL component missing. Skipping oglapphelpers.") +if(NOT OCIO_GL_ENABLED AND NOT (WIN32 AND OCIO_DIRECTX_ENABLED) AND NOT OCIO_VULKAN_ENABLED) + message(WARNING "GL component missing, DirectX disabled, and Vulkan disabled. Skipping oglapphelpers.") return() endif() set(SOURCES - glsl.cpp - oglapp.cpp + graphicalapp.cpp ) set(INCLUDES - glsl.h - oglapp.h + graphicalapp.h ) +if(OCIO_GL_ENABLED) + list(APPEND SOURCES + glsl.cpp + oglapp.cpp + ) + list(APPEND INCLUDES + glsl.h + oglapp.h + ) +endif() + +if(WIN32 AND OCIO_DIRECTX_ENABLED) + list(APPEND SOURCES + dxapp.cpp + hlsl.cpp + ) + list(APPEND INCLUDES + dxapp.h + dxutils.h + hlsl.h + ) +endif() + if(APPLE) list(APPEND SOURCES @@ -46,7 +67,7 @@ if(OCIO_VULKAN_ENABLED) endif() -add_library(oglapphelpers STATIC ${SOURCES}) +add_library(oglapphelpers STATIC ${INCLUDES} ${SOURCES}) set_target_properties(oglapphelpers PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(oglapphelpers PROPERTIES OUTPUT_NAME OpenColorIOoglapphelpers) @@ -71,47 +92,55 @@ set_target_properties(oglapphelpers PROPERTIES target_include_directories(oglapphelpers PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE - ${OPENGL_INCLUDE_DIR} - ${GLEW_INCLUDE_DIRS} - ${GLUT_INCLUDE_DIR} ) +if(OCIO_GL_ENABLED) + target_include_directories(oglapphelpers + PRIVATE + ${OPENGL_INCLUDE_DIR} + ${GLEW_INCLUDE_DIRS} + ${GLUT_INCLUDE_DIR} + ) +endif() -if(${OCIO_USE_GLVND}) - if(${OCIO_EGL_HEADLESS}) - target_include_directories(oglapphelpers - PRIVATE - ${OPENGL_EGL_INCLUDE_DIRS} - ) - target_link_libraries(oglapphelpers - PRIVATE - OpenColorIO - OpenGL::OpenGL - OpenGL::GLU - ${GLEW_LIBRARIES} - ${GLUT_LIBRARIES} - OpenGL::EGL +target_link_libraries(oglapphelpers + PRIVATE + OpenColorIO +) + +if(OCIO_GL_ENABLED) + if(${OCIO_USE_GLVND}) + if(${OCIO_EGL_HEADLESS}) + target_include_directories(oglapphelpers + PRIVATE + ${OPENGL_EGL_INCLUDE_DIRS} ) + target_link_libraries(oglapphelpers + PRIVATE + OpenGL::OpenGL + OpenGL::GLU + ${GLEW_LIBRARIES} + ${GLUT_LIBRARIES} + OpenGL::EGL + ) + else() + target_link_libraries(oglapphelpers + PRIVATE + OpenGL::OpenGL + OpenGL::GLU + ${GLEW_LIBRARIES} + ${GLUT_LIBRARIES} + ) + endif() else() + # if OCIO_USE_GLVND is OFF, OCIO_EGL_HEADLESS is also OFF target_link_libraries(oglapphelpers PRIVATE - OpenColorIO - OpenGL::OpenGL - OpenGL::GLU + ${OPENGL_LIBRARIES} ${GLEW_LIBRARIES} ${GLUT_LIBRARIES} - ) - endif() -else() - # if OCIO_USE_GLVND is OFF, OCIO_EGL_HEADLESS is also OFF - target_link_libraries(oglapphelpers - PRIVATE - OpenColorIO - ${OPENGL_LIBRARIES} - ${GLEW_LIBRARIES} - ${GLUT_LIBRARIES} - ) + ) + endif() endif() if(APPLE) @@ -126,6 +155,30 @@ if(APPLE) ) endif() +if(OCIO_GL_ENABLED) + target_compile_definitions(oglapphelpers + PUBLIC + OCIO_GL_ENABLED + ) +endif() + +if(WIN32 AND OCIO_DIRECTX_ENABLED) + include(InstallDirectXHeaders) + target_compile_definitions(oglapphelpers + PUBLIC + OCIO_DIRECTX_ENABLED + ) + target_link_libraries(oglapphelpers + PUBLIC + Microsoft::DirectX-Headers + PRIVATE + d3d12 + dxgi + dxcompiler + dxguid + ) +endif() + if(OCIO_VULKAN_ENABLED) target_include_directories(oglapphelpers PUBLIC diff --git a/src/libutils/oglapphelpers/dxapp.cpp b/src/libutils/oglapphelpers/dxapp.cpp new file mode 100644 index 0000000000..4859ba600d --- /dev/null +++ b/src/libutils/oglapphelpers/dxapp.cpp @@ -0,0 +1,1038 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright Contributors to the OpenColorIO Project. + +#include +#include +#include + +#include + +#include "dxapp.h" +#include "dxutils.h" + +#include + +namespace OCIO_NAMESPACE +{ + namespace + { + LRESULT CALLBACK WindowProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) + { + switch (message) + { + case WM_DESTROY: + PostQuitMessage(0); + return 0; + } + + return DefWindowProc(hWnd, message, wParam, lParam); + } + + void GetHardwareAdapter( + IDXGIFactory1* pFactory, + IDXGIAdapter1** ppAdapter, + bool requestHighPerformanceAdapter = true) + { + *ppAdapter = nullptr; + + ComPtr adapter; + + ComPtr factory6; + if (SUCCEEDED(pFactory->QueryInterface(IID_PPV_ARGS(&factory6)))) + { + for ( + UINT adapterIndex = 0; + SUCCEEDED(factory6->EnumAdapterByGpuPreference( + adapterIndex, + requestHighPerformanceAdapter ? DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE : DXGI_GPU_PREFERENCE_UNSPECIFIED, + IID_PPV_ARGS(&adapter))); + ++adapterIndex) + { + DXGI_ADAPTER_DESC1 desc; + adapter->GetDesc1(&desc); + + if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) + { + continue; + } + + if (SUCCEEDED(D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), nullptr))) + { + break; + } + } + } + + if (adapter.Get() == nullptr) + { + for (UINT adapterIndex = 0; SUCCEEDED(pFactory->EnumAdapters1(adapterIndex, &adapter)); ++adapterIndex) + { + DXGI_ADAPTER_DESC1 desc; + adapter->GetDesc1(&desc); + + if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) + { + continue; + } + + if (SUCCEEDED(D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), nullptr))) + { + break; + } + } + } + + *ppAdapter = adapter.Detach(); + } + } + +DxApp::DxApp(const char* winTitle, int winWidth, int winHeight) + : m_viewportWidth{ winWidth } + , m_viewportHeight{ winHeight } +{ + // Initialize the window class. + WNDCLASSEXA windowClass = { 0 }; + windowClass.cbSize = sizeof(WNDCLASSEXA); + windowClass.style = CS_HREDRAW | CS_VREDRAW; + windowClass.lpfnWndProc = WindowProc; + windowClass.hInstance = NULL; + windowClass.hCursor = LoadCursor(NULL, IDC_ARROW); + windowClass.lpszClassName = winTitle; + RegisterClassExA(&windowClass); + m_windowClassName = winTitle; + + RECT windowRect = { 0, 0, static_cast(m_viewportWidth), static_cast(m_viewportHeight) }; + AdjustWindowRect(&windowRect, WS_OVERLAPPEDWINDOW, FALSE); + + // Create the window and store a handle to it. + m_hwnd = CreateWindowA( + windowClass.lpszClassName, + winTitle, + WS_OVERLAPPEDWINDOW, + CW_USEDEFAULT, + CW_USEDEFAULT, + windowRect.right - windowRect.left, + windowRect.bottom - windowRect.top, + NULL, // We have no parent window. + NULL, // We aren't using menus. + NULL, + NULL); + + ShowWindow(m_hwnd, SW_RESTORE); + + UINT dxgiFactoryFlags = 0; +#if defined(_DEBUG) + { + ComPtr debugController; + if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)))) + { + debugController->EnableDebugLayer(); + + // Enable additional debug layers. + dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG; + } + } +#endif + + ComPtr factory; + ThrowIfFailed(CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(&factory))); + + ComPtr hardwareAdapter; + GetHardwareAdapter(factory.Get(), &hardwareAdapter); + + ThrowIfFailed(D3D12CreateDevice( + hardwareAdapter.Get(), + D3D_FEATURE_LEVEL_11_0, // Standard minimum for D3D12, maximize compatibility + IID_PPV_ARGS(&m_device) + )); + + // Describe and create the command queue. + D3D12_COMMAND_QUEUE_DESC queueDesc = {}; + queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + ThrowIfFailed(m_device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&m_commandQueue))); + + DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {}; + swapChainDesc.BufferCount = FrameCount; + swapChainDesc.Width = m_viewportWidth; + swapChainDesc.Height = m_viewportHeight; + swapChainDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + swapChainDesc.SampleDesc.Count = 1; + + ComPtr swapChain; + ThrowIfFailed(factory->CreateSwapChainForHwnd( + m_commandQueue.Get(), // Swap chain needs the queue so that it can force a flush on it. + m_hwnd, + &swapChainDesc, + nullptr, + nullptr, + &swapChain + )); + + // This sample does not support fullscreen transitions. + ThrowIfFailed(factory->MakeWindowAssociation(m_hwnd, DXGI_MWA_NO_ALT_ENTER)); + + ThrowIfFailed(swapChain.As(&m_swapChain)); + + // Create descriptor heaps. + { + // Describe and create a render target view (RTV) descriptor heap. + D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {}; + rtvHeapDesc.NumDescriptors = FrameCount; + rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + rtvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + ThrowIfFailed(m_device->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(&m_rtvHeap))); + + m_rtvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + m_cbvSrvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + + // Create frame resources. + { + CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(m_rtvHeap->GetCPUDescriptorHandleForHeapStart()); + + // Create a RTV for each frame. + for (UINT n = 0; n < FrameCount; n++) + { + ThrowIfFailed(m_swapChain->GetBuffer(n, IID_PPV_ARGS(&m_renderTargets[n]))); + m_device->CreateRenderTargetView(m_renderTargets[n].Get(), nullptr, rtvHandle); + rtvHandle.Offset(1, m_rtvDescriptorSize); + } + } + + ThrowIfFailed(m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator))); + // Create the command list. + ThrowIfFailed(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator.Get(), m_pipelineState.Get(), IID_PPV_ARGS(&m_commandList))); + // Close the command list and execute it to begin the initial GPU setup. + ThrowIfFailed(m_commandList->Close()); + + // Create fence + ThrowIfFailed(m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_fence))); + m_fenceValue = 1; + m_fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); + if (!m_fenceEvent) + { + throw Exception("DxApp: CreateEvent failed."); + } +} + +DxApp::~DxApp() +{ + // Ensure that the GPU is no longer referencing resources that are about to be + // cleaned up by the destructor. + waitForPreviousFrame(); + + CloseHandle(m_fenceEvent); + + if (m_hwnd) + { + DestroyWindow(m_hwnd); + } + if (!m_windowClassName.empty()) + { + UnregisterClassA(m_windowClassName.c_str(), NULL); + } +} + +void DxApp::initImage(int imageWidth, int imageHeight, Components comp, const float* imageBuffer) +{ + if (comp != COMPONENTS_RGBA) + { + throw Exception("DxApp: COMPONENTS_RGB is unused and not currently implemented."); + } + + m_imageWidth = imageWidth; + m_imageHeight = imageHeight; + m_comp = comp; + + // Create CBV/SRV heap if not already created (room for image + LUT textures) + if (!m_cbvSrvHeap) + { + D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {}; + srvHeapDesc.NumDescriptors = 16; // Slot 0 for image, remaining for LUT textures + srvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + srvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + ThrowIfFailed(m_device->CreateDescriptorHeap(&srvHeapDesc, IID_PPV_ARGS(&m_cbvSrvHeap))); + } + + // Create the image texture in default heap + D3D12_RESOURCE_DESC textureDesc = {}; + textureDesc.MipLevels = 1; + textureDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + textureDesc.Width = m_imageWidth; + textureDesc.Height = m_imageHeight; + textureDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + textureDesc.DepthOrArraySize = 1; + textureDesc.SampleDesc.Count = 1; + textureDesc.SampleDesc.Quality = 0; + textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + + CD3DX12_HEAP_PROPERTIES defaultHeapProps(D3D12_HEAP_TYPE_DEFAULT); + ThrowIfFailed(m_device->CreateCommittedResource( + &defaultHeapProps, + D3D12_HEAP_FLAG_NONE, + &textureDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&m_imageTexture))); + + // Create the upload buffer with row-pitch aligned size + const UINT pixelSize = 4 * sizeof(float); // RGBA32F + const UINT rowPitch = AlignRowPitch(m_imageWidth, pixelSize); + const UINT64 uploadBufferSize = rowPitch * m_imageHeight; + + CD3DX12_HEAP_PROPERTIES uploadHeapProps(D3D12_HEAP_TYPE_UPLOAD); + CD3DX12_RESOURCE_DESC uploadBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(uploadBufferSize); + ThrowIfFailed(m_device->CreateCommittedResource( + &uploadHeapProps, + D3D12_HEAP_FLAG_NONE, + &uploadBufferDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&m_imageUploadBuffer))); + + // Perform the initial upload + // Map the upload buffer and copy image data + const UINT srcRowPitch = m_imageWidth * pixelSize; // Source data is tightly packed + BYTE* pData = nullptr; + ThrowIfFailed(m_imageUploadBuffer->Map(0, nullptr, reinterpret_cast(&pData))); + + for (int y = 0; y < m_imageHeight; ++y) + { + const BYTE* srcRow = reinterpret_cast(imageBuffer) + y * srcRowPitch; + BYTE* dstRow = pData + y * rowPitch; + memcpy(dstRow, srcRow, srcRowPitch); + } + + m_imageUploadBuffer->Unmap(0, nullptr); + + // Record commands to copy from upload buffer to the image texture + ThrowIfFailed(m_commandAllocator->Reset()); + ThrowIfFailed(m_commandList->Reset(m_commandAllocator.Get(), nullptr)); + + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = {}; + footprint.Offset = 0; + footprint.Footprint.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + footprint.Footprint.Width = m_imageWidth; + footprint.Footprint.Height = m_imageHeight; + footprint.Footprint.Depth = 1; + footprint.Footprint.RowPitch = rowPitch; + + D3D12_TEXTURE_COPY_LOCATION srcLocation = {}; + srcLocation.pResource = m_imageUploadBuffer.Get(); + srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + srcLocation.PlacedFootprint = footprint; + + D3D12_TEXTURE_COPY_LOCATION dstLocation = {}; + dstLocation.pResource = m_imageTexture.Get(); + dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstLocation.SubresourceIndex = 0; + + m_commandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); + + // Transition to PIXEL_SHADER_RESOURCE + auto barrierInit = CD3DX12_RESOURCE_BARRIER::Transition( + m_imageTexture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + m_commandList->ResourceBarrier(1, &barrierInit); + + ThrowIfFailed(m_commandList->Close()); + + // Execute the command list + ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() }; + m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); + + // Wait for GPU to finish + waitForPreviousFrame(); + + // Create SRV for the image texture in slot 0 + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + + CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle(m_cbvSrvHeap->GetCPUDescriptorHandleForHeapStart()); + m_device->CreateShaderResourceView(m_imageTexture.Get(), &srvDesc, srvHandle); +} + +void DxApp::updateImage(const float* imageBuffer) +{ + // Map the upload buffer and copy image data row-by-row with proper pitch alignment + const UINT pixelSize = 4 * sizeof(float); // RGBA32F + const UINT srcRowPitch = m_imageWidth * pixelSize; + const UINT dstRowPitch = AlignRowPitch(m_imageWidth, pixelSize); + + BYTE* pData = nullptr; + ThrowIfFailed(m_imageUploadBuffer->Map(0, nullptr, reinterpret_cast(&pData))); + + for (int y = 0; y < m_imageHeight; ++y) + { + const BYTE* srcRow = reinterpret_cast(imageBuffer) + y * srcRowPitch; + BYTE* dstRow = pData + y * dstRowPitch; + memcpy(dstRow, srcRow, srcRowPitch); + } + + m_imageUploadBuffer->Unmap(0, nullptr); + + // Record commands to copy from upload buffer to the image texture + ThrowIfFailed(m_commandAllocator->Reset()); + ThrowIfFailed(m_commandList->Reset(m_commandAllocator.Get(), nullptr)); + + // Transition texture from PIXEL_SHADER_RESOURCE to COPY_DEST for the update + auto barrierUpdate1 = CD3DX12_RESOURCE_BARRIER::Transition( + m_imageTexture.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COPY_DEST); + m_commandList->ResourceBarrier(1, &barrierUpdate1); + + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = {}; + footprint.Offset = 0; + footprint.Footprint.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + footprint.Footprint.Width = m_imageWidth; + footprint.Footprint.Height = m_imageHeight; + footprint.Footprint.Depth = 1; + footprint.Footprint.RowPitch = dstRowPitch; + + D3D12_TEXTURE_COPY_LOCATION srcLocation = {}; + srcLocation.pResource = m_imageUploadBuffer.Get(); + srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + srcLocation.PlacedFootprint = footprint; + + D3D12_TEXTURE_COPY_LOCATION dstLocation = {}; + dstLocation.pResource = m_imageTexture.Get(); + dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstLocation.SubresourceIndex = 0; + + m_commandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); + + // Transition to PIXEL_SHADER_RESOURCE + auto barrierUpdate2 = CD3DX12_RESOURCE_BARRIER::Transition( + m_imageTexture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + m_commandList->ResourceBarrier(1, &barrierUpdate2); + + ThrowIfFailed(m_commandList->Close()); + + // Execute the command list + ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() }; + m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); + + // Wait for GPU to finish + waitForPreviousFrame(); +} + +void DxApp::createBuffers() +{ + // Create readback buffer for copying GPU results back to CPU + const UINT pixelSize = 4 * sizeof(float); // RGBA32F + m_readbackRowPitch = AlignRowPitch(m_viewportWidth, pixelSize); + const UINT64 readbackBufferSize = m_readbackRowPitch * m_viewportHeight; + + CD3DX12_HEAP_PROPERTIES readbackHeapProps(D3D12_HEAP_TYPE_READBACK); + CD3DX12_RESOURCE_DESC readbackBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(readbackBufferSize); + ThrowIfFailed(m_device->CreateCommittedResource( + &readbackHeapProps, + D3D12_HEAP_FLAG_NONE, + &readbackBufferDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&m_readbackBuffer))); +} + +void DxApp::setShader(GpuShaderDescRcPtr& shaderDesc) +{ + // Reset command list for LUT texture uploads + ThrowIfFailed(m_commandAllocator->Reset()); + ThrowIfFailed(m_commandList->Reset(m_commandAllocator.Get(), nullptr)); + + // Store shader desc so redisplay() can update uniform values each frame. + m_currentShaderDesc = shaderDesc; + + // Create HLSLBuilder to allocate all LUT textures. + // Each texture is placed at the descriptor heap slot matching its HLSL register + // (derived from shaderDesc->get3DTextureShaderBindingIndex / getTextureShaderBindingIndex). + m_hlslBuilder = HLSLBuilder::Create(shaderDesc, m_device.Get(), m_commandList.Get(), + m_cbvSrvHeap.Get()); + + // Execute command list to upload LUT textures and wait for completion + ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() }; + m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); + waitForPreviousFrame(); + + // Create constant buffer for OCIO uniform variables (dynamic properties). + // D3D12 constant buffers must be 256-byte aligned. We always create one so the + // root signature can unconditionally declare a CBV at b0. + { + m_constantBuffer.Reset(); + m_cbMappedData = nullptr; + + const UINT rawSize = static_cast(shaderDesc->getUniformBufferSize()); + m_cbufferAlignedSize = ((rawSize + 255u) & ~255u); + if (m_cbufferAlignedSize == 0) + m_cbufferAlignedSize = 256u; + + auto heapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + auto cbDesc = CD3DX12_RESOURCE_DESC::Buffer(m_cbufferAlignedSize); + ThrowIfFailed(m_device->CreateCommittedResource( + &heapProps, D3D12_HEAP_FLAG_NONE, &cbDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + IID_PPV_ARGS(&m_constantBuffer))); + + CD3DX12_RANGE readRange(0, 0); + ThrowIfFailed(m_constantBuffer->Map(0, &readRange, + reinterpret_cast(&m_cbMappedData))); + memset(m_cbMappedData, 0, m_cbufferAlignedSize); + } + + // Get the OCIO HLSL shader text + std::string ocioShader = m_hlslBuilder->getShaderText(); + + // Build the full HLSL shader source with a full-screen triangle vertex shader + // and pixel shader that applies OCIO color transform + std::ostringstream hlslSource; + + // Add the OCIO shader functions first + hlslSource << ocioShader << "\n\n"; + + // Declare the image texture and its sampler. + // OCIO's generated shader always starts LUT texture bindings at t1+ (textureBindingStart=1), + // so t0/s0 are always free for the input image. + hlslSource << "Texture2D img : register(t0);\n"; + hlslSource << "SamplerState linearSampler : register(s0);\n\n"; + + // Vertex shader: full-screen triangle using SV_VertexID (no vertex buffer needed) + hlslSource << "struct VSOutput {\n"; + hlslSource << " float4 position : SV_Position;\n"; + hlslSource << " float2 texcoord : TEXCOORD0;\n"; + hlslSource << "};\n\n"; + + hlslSource << "VSOutput VSMain(uint vertexID : SV_VertexID) {\n"; + hlslSource << " VSOutput output;\n"; + hlslSource << " // Full-screen triangle: vertices at (-1,-1), (3,-1), (-1,3)\n"; + hlslSource << " float2 texcoord = float2((vertexID << 1) & 2, vertexID & 2);\n"; + hlslSource << " output.position = float4(texcoord * float2(2, -2) + float2(-1, 1), 0, 1);\n"; + hlslSource << " output.texcoord = texcoord;\n"; + hlslSource << " return output;\n"; + hlslSource << "}\n\n"; + + // Pixel shader: sample image and apply OCIO color transform + hlslSource << "float4 PSMain(VSOutput input) : SV_Target {\n"; + hlslSource << " float4 col = img.Sample(linearSampler, input.texcoord);\n"; + hlslSource << " return " << shaderDesc->getFunctionName() << "(col);\n"; + hlslSource << "}\n"; + + std::string fullShader = hlslSource.str(); + + if (isShaderVerbose()) + { + std::cout << std::endl; + std::cout << "GPU Shader Program:" << std::endl; + std::cout << std::endl; + std::cout << fullShader << std::endl; + std::cout << std::endl; + } + + // Compile shaders with DXC (DirectX Shader Compiler) for SM 6.0 + ComPtr dxcUtils; + ComPtr dxcCompiler; + ThrowIfFailed(DxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(&dxcUtils))); + ThrowIfFailed(DxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&dxcCompiler))); + + // Create a source blob from the shader string + ComPtr sourceBlob; + ThrowIfFailed(dxcUtils->CreateBlobFromPinned( + fullShader.c_str(), static_cast(fullShader.size()), + DXC_CP_UTF8, &sourceBlob)); + + DxcBuffer sourceBuffer; + sourceBuffer.Ptr = sourceBlob->GetBufferPointer(); + sourceBuffer.Size = sourceBlob->GetBufferSize(); + sourceBuffer.Encoding = DXC_CP_ACP; + + // Compile vertex shader (vs_6_0) + LPCWSTR vsArgs[] = { L"-T", L"vs_6_0", L"-E", L"VSMain" }; + ComPtr vsResult; + ThrowIfFailed(dxcCompiler->Compile(&sourceBuffer, vsArgs, _countof(vsArgs), + nullptr, IID_PPV_ARGS(&vsResult))); + HRESULT vsHr; + vsResult->GetStatus(&vsHr); + if (FAILED(vsHr)) + { + ComPtr errors; + vsResult->GetOutput(DXC_OUT_ERRORS, IID_PPV_ARGS(&errors), nullptr); + std::ostringstream oss; + oss << "Vertex shader compilation failed (" << HrToString(vsHr) << ")"; + if (errors && errors->GetStringLength()) + oss << ":\n" << errors->GetStringPointer(); + std::cerr << oss.str() << std::endl; + throw Exception(oss.str().c_str()); + } + ComPtr vertexShaderBlob; + ThrowIfFailed(vsResult->GetOutput(DXC_OUT_OBJECT, IID_PPV_ARGS(&vertexShaderBlob), nullptr)); + + // Compile pixel shader (ps_6_0). + LPCWSTR psArgs[] = { L"-T", L"ps_6_0", L"-E", L"PSMain" }; + ComPtr psResult; + ThrowIfFailed(dxcCompiler->Compile(&sourceBuffer, psArgs, _countof(psArgs), + nullptr, IID_PPV_ARGS(&psResult))); + HRESULT psHr; + psResult->GetStatus(&psHr); + if (FAILED(psHr)) + { + ComPtr errors; + psResult->GetOutput(DXC_OUT_ERRORS, IID_PPV_ARGS(&errors), nullptr); + std::ostringstream oss; + oss << "Pixel shader compilation failed (" << HrToString(psHr) << ")"; + if (errors && errors->GetStringLength()) + oss << ":\n" << errors->GetStringPointer(); + std::cerr << oss.str() << std::endl; + throw Exception(oss.str().c_str()); + } + ComPtr pixelShaderBlob; + ThrowIfFailed(psResult->GetOutput(DXC_OUT_OBJECT, IID_PPV_ARGS(&pixelShaderBlob), nullptr)); + + // Build root signature with a descriptor table for all SRVs and a static sampler. + // Total SRVs = 1 (image at t0) + total LUT count (at t1..tN). + // We use the total texture count from shaderDesc to cover all possible binding slots, + // since LUT heap slots are derived from binding indices and may not be sequential. + const UINT numLUTs = shaderDesc->getNumTextures() + shaderDesc->getNum3DTextures(); + UINT totalSRVs = 1 + numLUTs; + + CD3DX12_DESCRIPTOR_RANGE1 srvRange; + srvRange.Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, totalSRVs, 0, 0, D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC); + + // Root parameter 0: descriptor table for all SRVs (image + LUTs). + // Root parameter 1: root CBV at b0 for OCIO uniform variables. + CD3DX12_ROOT_PARAMETER1 rootParameters[2]; + rootParameters[0].InitAsDescriptorTable(1, &srvRange, D3D12_SHADER_VISIBILITY_PIXEL); + rootParameters[1].InitAsConstantBufferView(0, 0, + D3D12_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE, + D3D12_SHADER_VISIBILITY_PIXEL); + + // Build static samplers: s0 for the input image, s1..sN for each OCIO LUT texture. + std::vector staticSamplers(1 + numLUTs); + for (UINT i = 0; i <= numLUTs; ++i) + { + staticSamplers[i] = {}; + staticSamplers[i].Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + staticSamplers[i].AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + staticSamplers[i].AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + staticSamplers[i].AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + staticSamplers[i].MipLODBias = 0; + staticSamplers[i].MaxAnisotropy = 0; + staticSamplers[i].ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + staticSamplers[i].BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + staticSamplers[i].MinLOD = 0.0f; + staticSamplers[i].MaxLOD = D3D12_FLOAT32_MAX; + staticSamplers[i].ShaderRegister = i; // s0 = image, s1..sN = OCIO LUT samplers + staticSamplers[i].RegisterSpace = 0; + staticSamplers[i].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; + } + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc; + rootSignatureDesc.Init_1_1(_countof(rootParameters), rootParameters, + static_cast(staticSamplers.size()), staticSamplers.data(), + D3D12_ROOT_SIGNATURE_FLAG_NONE); + + ComPtr signature; + ComPtr sigErrorBlob; + HRESULT hr = D3DX12SerializeVersionedRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1_1, + &signature, &sigErrorBlob); + if (FAILED(hr)) + { + if (sigErrorBlob) + { + std::cerr << "Root signature serialization error:\n" + << static_cast(sigErrorBlob->GetBufferPointer()) << std::endl; + } + ThrowIfFailed(hr); + } + + // Release previous root signature if it exists + m_rootSignature.Reset(); + + hr = m_device->CreateRootSignature(0, signature->GetBufferPointer(), + signature->GetBufferSize(), + IID_PPV_ARGS(&m_rootSignature)); + if (FAILED(hr)) + { + throw Exception(("CreateRootSignature failed: " + HrToString(hr)).c_str()); + } + + // Create the pipeline state object (PSO) + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.InputLayout = { nullptr, 0 }; // No vertex input layout (using SV_VertexID) + psoDesc.pRootSignature = m_rootSignature.Get(); + psoDesc.VS = { vertexShaderBlob->GetBufferPointer(), vertexShaderBlob->GetBufferSize() }; + psoDesc.PS = { pixelShaderBlob->GetBufferPointer(), pixelShaderBlob->GetBufferSize() }; + psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); + psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); + psoDesc.DepthStencilState.DepthEnable = FALSE; + psoDesc.DepthStencilState.StencilEnable = FALSE; + psoDesc.SampleMask = UINT_MAX; + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + psoDesc.NumRenderTargets = 1; + psoDesc.RTVFormats[0] = DXGI_FORMAT_R32G32B32A32_FLOAT; + psoDesc.SampleDesc.Count = 1; + + // Release previous pipeline state if it exists + m_pipelineState.Reset(); + + hr = m_device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&m_pipelineState)); + if (FAILED(hr)) + { + std::cerr << "CreateGraphicsPipelineState failed (" << HrToString(hr) << ")\n" + << "Full HLSL shader:\n" << fullShader << std::endl; + throw Exception(("CreateGraphicsPipelineState failed: " + HrToString(hr)).c_str()); + } +} + +void DxApp::reshape(int width, int height) +{ + // Skip if nothing changed and resources are already initialized + if (m_viewportWidth == width && m_viewportHeight == height && m_floatRenderTarget) + return; + + // Wait for any in-flight GPU work before resizing resources + waitForPreviousFrame(); + + m_viewportWidth = width; + m_viewportHeight = height; + + // Release swap chain render target references before ResizeBuffers + for (UINT n = 0; n < FrameCount; n++) + m_renderTargets[n].Reset(); + + // Resize swap chain back buffers to the new dimensions + ThrowIfFailed(m_swapChain->ResizeBuffers( + FrameCount, + static_cast(width), + static_cast(height), + DXGI_FORMAT_R8G8B8A8_UNORM, + 0)); + + // Recreate RTVs for the resized swap chain back buffers + { + CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(m_rtvHeap->GetCPUDescriptorHandleForHeapStart()); + for (UINT n = 0; n < FrameCount; n++) + { + ThrowIfFailed(m_swapChain->GetBuffer(n, IID_PPV_ARGS(&m_renderTargets[n]))); + m_device->CreateRenderTargetView(m_renderTargets[n].Get(), nullptr, rtvHandle); + rtvHandle.Offset(1, m_rtvDescriptorSize); + } + } + + // Create (or recreate) the off-screen R32G32B32A32_FLOAT render target for OCIO rendering. + // This avoids 8-bit UNORM quantization errors when comparing GPU vs CPU results. + m_floatRenderTarget.Reset(); + + D3D12_RESOURCE_DESC floatRtDesc = {}; + floatRtDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + floatRtDesc.Width = static_cast(width); + floatRtDesc.Height = static_cast(height); + floatRtDesc.DepthOrArraySize = 1; + floatRtDesc.MipLevels = 1; + floatRtDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + floatRtDesc.SampleDesc.Count = 1; + floatRtDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + + D3D12_CLEAR_VALUE floatClearValue = {}; + floatClearValue.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + + CD3DX12_HEAP_PROPERTIES defaultHeapProps(D3D12_HEAP_TYPE_DEFAULT); + ThrowIfFailed(m_device->CreateCommittedResource( + &defaultHeapProps, + D3D12_HEAP_FLAG_NONE, + &floatRtDesc, + D3D12_RESOURCE_STATE_RENDER_TARGET, + &floatClearValue, + IID_PPV_ARGS(&m_floatRenderTarget))); + + // Create a single-slot RTV heap for the float render target (created once) + if (!m_floatRtvHeap) + { + D3D12_DESCRIPTOR_HEAP_DESC floatRtvHeapDesc = {}; + floatRtvHeapDesc.NumDescriptors = 1; + floatRtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + floatRtvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + ThrowIfFailed(m_device->CreateDescriptorHeap(&floatRtvHeapDesc, IID_PPV_ARGS(&m_floatRtvHeap))); + } + + // Create the float RTV + D3D12_RENDER_TARGET_VIEW_DESC floatRtvDesc = {}; + floatRtvDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + floatRtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + m_device->CreateRenderTargetView(m_floatRenderTarget.Get(), &floatRtvDesc, + m_floatRtvHeap->GetCPUDescriptorHandleForHeapStart()); + + // Recreate readback buffer with updated dimensions (if already allocated by createBuffers) + if (m_readbackBuffer) + { + m_readbackBuffer.Reset(); + const UINT pixelSize = 4 * sizeof(float); // R32G32B32A32_FLOAT = 16 bytes per pixel + m_readbackRowPitch = AlignRowPitch(static_cast(width), pixelSize); + const UINT64 readbackBufferSize = m_readbackRowPitch * static_cast(height); + + CD3DX12_HEAP_PROPERTIES readbackHeapProps(D3D12_HEAP_TYPE_READBACK); + CD3DX12_RESOURCE_DESC readbackBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(readbackBufferSize); + ThrowIfFailed(m_device->CreateCommittedResource( + &readbackHeapProps, + D3D12_HEAP_FLAG_NONE, + &readbackBufferDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&m_readbackBuffer))); + } +} + +void DxApp::redisplay() +{ + ThrowIfFailed(m_commandAllocator->Reset()); + ThrowIfFailed(m_commandList->Reset(m_commandAllocator.Get(), m_pipelineState.Get())); + + UINT frameIndex = m_swapChain->GetCurrentBackBufferIndex(); + + // Transition swap chain back buffer to render target for clearing + auto barrierPresentToRt = CD3DX12_RESOURCE_BARRIER::Transition( + m_renderTargets[frameIndex].Get(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET); + m_commandList->ResourceBarrier(1, &barrierPresentToRt); + + CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle( + m_rtvHeap->GetCPUDescriptorHandleForHeapStart(), frameIndex, m_rtvDescriptorSize); + + // Clear the swap chain back buffer (used for display only) + const float clearColor[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + m_commandList->ClearRenderTargetView(rtvHandle, clearColor, 0, nullptr); + + // If pipeline state is ready, render the OCIO transform to the off-screen float render target. + // The float RT preserves full precision for accurate GPU vs CPU comparison in tests. + if (m_pipelineState && m_floatRenderTarget) + { + CD3DX12_CPU_DESCRIPTOR_HANDLE floatRtvHandle( + m_floatRtvHeap->GetCPUDescriptorHandleForHeapStart()); + + // Clear the float render target + m_commandList->ClearRenderTargetView(floatRtvHandle, clearColor, 0, nullptr); + + // Set descriptor heap for shader resources + ID3D12DescriptorHeap* descriptorHeaps[] = { m_cbvSrvHeap.Get() }; + m_commandList->SetDescriptorHeaps(_countof(descriptorHeaps), descriptorHeaps); + + // Set the root signature and pipeline state + m_commandList->SetGraphicsRootSignature(m_rootSignature.Get()); + m_commandList->SetPipelineState(m_pipelineState.Get()); + + // Set the root descriptor table to the start of the SRV heap (image + LUTs) + CD3DX12_GPU_DESCRIPTOR_HANDLE srvHandle(m_cbvSrvHeap->GetGPUDescriptorHandleForHeapStart()); + m_commandList->SetGraphicsRootDescriptorTable(0, srvHandle); + + // Update and bind the constant buffer (root parameter 1, b0). + // This fills in any OCIO uniform variables (dynamic properties like exposure, curves). + if (m_constantBuffer && m_cbMappedData && m_currentShaderDesc) + { + memset(m_cbMappedData, 0, m_cbufferAlignedSize); + + const unsigned numUniforms = m_currentShaderDesc->getNumUniforms(); + for (unsigned i = 0; i < numUniforms; ++i) + { + GpuShaderDesc::UniformData data; + m_currentShaderDesc->getUniform(i, data); + UINT8* dst = m_cbMappedData + data.m_bufferOffset; + + switch (data.m_type) + { + case UNIFORM_DOUBLE: + if (data.m_getDouble) + { + const float val = static_cast(data.m_getDouble()); + memcpy(dst, &val, sizeof(float)); + } + break; + case UNIFORM_BOOL: + if (data.m_getBool) + { + const int val = data.m_getBool() ? 1 : 0; + memcpy(dst, &val, sizeof(int)); + } + break; + case UNIFORM_FLOAT3: + if (data.m_getFloat3) + { + const Float3& f3 = data.m_getFloat3(); + memcpy(dst, f3.data(), 3 * sizeof(float)); + } + break; + case UNIFORM_VECTOR_FLOAT: + if (data.m_vectorFloat.m_getSize && data.m_vectorFloat.m_getVector) + { + const int sz = data.m_vectorFloat.m_getSize(); + const float* vals = data.m_vectorFloat.m_getVector(); + for (int j = 0; j < sz; ++j) + memcpy(dst + j * 16, &vals[j], sizeof(float)); + } + break; + case UNIFORM_VECTOR_INT: + if (data.m_vectorInt.m_getSize && data.m_vectorInt.m_getVector) + { + const int sz = data.m_vectorInt.m_getSize(); + const int* vals = data.m_vectorInt.m_getVector(); + for (int j = 0; j < sz; ++j) + memcpy(dst + j * 16, &vals[j], sizeof(int)); + } + break; + default: + break; + } + } + + m_commandList->SetGraphicsRootConstantBufferView( + 1, m_constantBuffer->GetGPUVirtualAddress()); + } + + // Set viewport and scissor rect + D3D12_VIEWPORT viewport = {}; + viewport.TopLeftX = 0.0f; + viewport.TopLeftY = 0.0f; + viewport.Width = static_cast(m_viewportWidth); + viewport.Height = static_cast(m_viewportHeight); + viewport.MinDepth = 0.0f; + viewport.MaxDepth = 1.0f; + m_commandList->RSSetViewports(1, &viewport); + + D3D12_RECT scissorRect = { 0, 0, m_viewportWidth, m_viewportHeight }; + m_commandList->RSSetScissorRects(1, &scissorRect); + + // Render to the off-screen float render target + m_commandList->OMSetRenderTargets(1, &floatRtvHandle, FALSE, nullptr); + m_commandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + m_commandList->DrawInstanced(3, 1, 0, 0); + } + + // Transition swap chain back buffer back to PRESENT state + auto barrierRtToPresent = CD3DX12_RESOURCE_BARRIER::Transition( + m_renderTargets[frameIndex].Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT); + m_commandList->ResourceBarrier(1, &barrierRtToPresent); + + ThrowIfFailed(m_commandList->Close()); + + ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() }; + m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); + + ThrowIfFailed(m_swapChain->Present(1, 0)); + + waitForPreviousFrame(); +} + +void DxApp::readImage(float* imageBuffer) +{ + ThrowIfFailed(m_commandAllocator->Reset()); + ThrowIfFailed(m_commandList->Reset(m_commandAllocator.Get(), nullptr)); + + // Transition the float render target from RENDER_TARGET to COPY_SOURCE for readback + auto barrierReadback1 = CD3DX12_RESOURCE_BARRIER::Transition( + m_floatRenderTarget.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); + m_commandList->ResourceBarrier(1, &barrierReadback1); + + // Copy from float render target to readback buffer + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = {}; + footprint.Offset = 0; + footprint.Footprint.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + footprint.Footprint.Width = static_cast(m_viewportWidth); + footprint.Footprint.Height = static_cast(m_viewportHeight); + footprint.Footprint.Depth = 1; + footprint.Footprint.RowPitch = m_readbackRowPitch; + + D3D12_TEXTURE_COPY_LOCATION srcLocation = {}; + srcLocation.pResource = m_floatRenderTarget.Get(); + srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcLocation.SubresourceIndex = 0; + + D3D12_TEXTURE_COPY_LOCATION dstLocation = {}; + dstLocation.pResource = m_readbackBuffer.Get(); + dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dstLocation.PlacedFootprint = footprint; + + m_commandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); + + // Transition the float render target back to RENDER_TARGET for the next frame + auto barrierReadback2 = CD3DX12_RESOURCE_BARRIER::Transition( + m_floatRenderTarget.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET); + m_commandList->ResourceBarrier(1, &barrierReadback2); + + ThrowIfFailed(m_commandList->Close()); + + ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() }; + m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); + + waitForPreviousFrame(); + + // Map the readback buffer and copy float data row-by-row to caller's buffer, + // stripping D3D12 row-pitch alignment padding + BYTE* pData = nullptr; + ThrowIfFailed(m_readbackBuffer->Map(0, nullptr, reinterpret_cast(&pData))); + + const UINT dstRowPitch = static_cast(m_viewportWidth) * 4 * sizeof(float); + for (int y = 0; y < m_viewportHeight; ++y) + { + const BYTE* srcRow = pData + y * m_readbackRowPitch; + BYTE* dstRow = reinterpret_cast(imageBuffer) + y * dstRowPitch; + memcpy(dstRow, srcRow, dstRowPitch); + } + + m_readbackBuffer->Unmap(0, nullptr); +} + +void DxApp::printGraphicsInfo() const noexcept +{ + try + { + // Query IDXGIDevice from the D3D12 device + ComPtr dxgiDevice; + if (FAILED(m_device.As(&dxgiDevice))) + { + return; // Silently ignore failure + } + + // Get the adapter from the DXGI device + ComPtr adapter; + if (FAILED(dxgiDevice->GetAdapter(&adapter))) + { + return; // Silently ignore failure + } + + // Get adapter description + DXGI_ADAPTER_DESC desc; + if (FAILED(adapter->GetDesc(&desc))) + { + return; // Silently ignore failure + } + + // Convert wide string to narrow string for Description + char narrowDesc[128]; + WideCharToMultiByte(CP_UTF8, 0, desc.Description, -1, narrowDesc, sizeof(narrowDesc), nullptr, nullptr); + + // Print adapter name and dedicated video memory + std::cout << "Adapter: " << narrowDesc << std::endl; + std::cout << "Dedicated Video Memory: " << (desc.DedicatedVideoMemory / (1024 * 1024)) << " MB" << std::endl; + } + catch (...) + { + // Silently ignore any exceptions + } +} + +void DxApp::waitForPreviousFrame() +{ + // Signal and increment the fence value. + const UINT64 fence = m_fenceValue; + ThrowIfFailed(m_commandQueue->Signal(m_fence.Get(), fence)); + m_fenceValue++; + + // Wait until the previous frame is finished. + if (m_fence->GetCompletedValue() < fence) + { + ThrowIfFailed(m_fence->SetEventOnCompletion(fence, m_fenceEvent)); + WaitForSingleObject(m_fenceEvent, INFINITE); + } +} + +} // namespace OCIO_NAMESPACE diff --git a/src/libutils/oglapphelpers/dxapp.h b/src/libutils/oglapphelpers/dxapp.h new file mode 100644 index 0000000000..26f3beb4c4 --- /dev/null +++ b/src/libutils/oglapphelpers/dxapp.h @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright Contributors to the OpenColorIO Project. + + +#pragma once +#include +#include + +#include "graphicalapp.h" +#include "hlsl.h" + +#include + +using Microsoft::WRL::ComPtr; + +namespace OCIO_NAMESPACE +{ + +class DxApp : public GraphicalApp +{ +public: + DxApp() = delete; + DxApp(const DxApp&) = delete; + DxApp& operator=(const DxApp&) = delete; + + DxApp(const char* winTitle, int winWidth, int winHeight); + ~DxApp(); + + virtual void initImage(int imageWidth, int imageHeight, + Components comp, const float* imageBuffer) override; + + virtual void updateImage(const float* imageBuffer) override; + + virtual void createBuffers() override; + + virtual void setShader(GpuShaderDescRcPtr& shaderDesc) override; + + virtual void reshape(int width, int height) override; + + virtual void redisplay() override; + + virtual void readImage(float* imageBuffer) override; + + virtual void printGraphicsInfo() const noexcept override; + +private: + void waitForPreviousFrame(); + + static const UINT FrameCount = 2; + + int m_viewportWidth{ 0 }; + int m_viewportHeight{ 0 }; + + int m_imageWidth{ 0 }; + int m_imageHeight{ 0 }; + Components m_comp{ COMPONENTS_RGBA }; + + ComPtr m_swapChain; + ComPtr m_device; + ComPtr m_renderTargets[FrameCount]; + ComPtr m_commandAllocator; + ComPtr m_commandQueue; + ComPtr m_rootSignature; + ComPtr m_rtvHeap; + ComPtr m_cbvSrvHeap; + ComPtr m_pipelineState; + ComPtr m_commandList; + UINT m_rtvDescriptorSize; + UINT m_cbvSrvDescriptorSize; + + // Synchronization objects. + HANDLE m_fenceEvent; + ComPtr m_fence; + UINT64 m_fenceValue; + + // Image texture and upload resources. + ComPtr m_imageTexture; + ComPtr m_imageUploadBuffer; + ComPtr m_readbackBuffer; + UINT m_readbackRowPitch{ 0 }; + + // Off-screen float render target (R32G32B32A32_FLOAT) for OCIO rendering and readback. + // The swap chain back buffers (UNORM) are required for windowing but unused in tests. + ComPtr m_floatRenderTarget; + ComPtr m_floatRtvHeap; + + // HLSL shader builder + HLSLBuilderRcPtr m_hlslBuilder; + + // Constant buffer for OCIO uniform variables (dynamic properties like exposure, curves). + // Always allocated; root signature always includes a CBV at b0. + ComPtr m_constantBuffer; + UINT8* m_cbMappedData{ nullptr }; + UINT m_cbufferAlignedSize{ 0 }; + + // Current shader description, retained so redisplay() can update uniform values. + GpuShaderDescRcPtr m_currentShaderDesc; + + // Window handle and class name for cleanup. + HWND m_hwnd{ nullptr }; + std::string m_windowClassName; +}; + +} + diff --git a/src/libutils/oglapphelpers/dxutils.h b/src/libutils/oglapphelpers/dxutils.h new file mode 100644 index 0000000000..f7e4ec84a6 --- /dev/null +++ b/src/libutils/oglapphelpers/dxutils.h @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright Contributors to the OpenColorIO Project. + +#pragma once + +#include + +#include + +#include + +namespace OCIO_NAMESPACE +{ + +inline std::string HrToString(HRESULT hr) +{ + char s_str[64] = {}; + sprintf_s(s_str, "HRESULT of 0x%08X", static_cast(hr)); + return std::string(s_str); +} + +inline void ThrowIfFailed(HRESULT hr) +{ + if (FAILED(hr)) + { + throw Exception(HrToString(hr).c_str()); + } +} + +// Align a row pitch to D3D12_TEXTURE_DATA_PITCH_ALIGNMENT (256 bytes). +inline UINT AlignRowPitch(UINT width, UINT pixelSize) +{ + return (width * pixelSize + D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1) + & ~(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1); +} + +} // namespace OCIO_NAMESPACE diff --git a/src/libutils/oglapphelpers/graphicalapp.cpp b/src/libutils/oglapphelpers/graphicalapp.cpp new file mode 100644 index 0000000000..3baefdf36c --- /dev/null +++ b/src/libutils/oglapphelpers/graphicalapp.cpp @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright Contributors to the OpenColorIO Project. + +#include "graphicalapp.h" +#include "oglapp.h" + +namespace OCIO_NAMESPACE +{ + +GraphicalAppRcPtr GraphicalApp::CreateApp(const char * winTitle, int winWidth, int winHeight) +{ + return OglApp::CreateApp(winTitle, winWidth, winHeight); +} + +} // namespace OCIO_NAMESPACE diff --git a/src/libutils/oglapphelpers/graphicalapp.h b/src/libutils/oglapphelpers/graphicalapp.h new file mode 100644 index 0000000000..a1b55c4aac --- /dev/null +++ b/src/libutils/oglapphelpers/graphicalapp.h @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright Contributors to the OpenColorIO Project. + +#pragma once + +#include + +namespace OCIO_NAMESPACE +{ +// Forward declaration of GraphicalApp. +class GraphicalApp; +typedef OCIO_SHARED_PTR GraphicalAppRcPtr; + +class GraphicalApp +{ +public: + virtual ~GraphicalApp() = default; + enum Components + { + COMPONENTS_RGB = 0, + COMPONENTS_RGBA + }; + + // Shader code will be printed when generated. + void setShaderVerbose(bool print) + { + m_verboseShader = print; + } + + bool isShaderVerbose() const { return m_verboseShader; } + + // When displaying the processed image in a window, enable Y-axis mirroring. + void setYMirror() + { + m_yMirror = true; + } + + bool isYMirror() const { return m_yMirror; } + + // Initialize the image. + virtual void initImage(int imageWidth, int imageHeight, + Components comp, const float* imageBuffer) = 0; + + // Update the image if it changes. + virtual void updateImage(const float* imageBuffer) = 0; + + // Create frame and rendering buffers. Needed if readImage will be used. + virtual void createBuffers() = 0; + + // Set the shader code. + virtual void setShader(GpuShaderDescRcPtr& shaderDesc) = 0; + + // Update the size of the buffer of the viewport that will be used to process the image + // (it does not modify the UI). To be called at least one time. Use image size if we want to + // read back the processed image. To process another image with the same size or using a + // different shader, reshape does not need to be called again. In case of an interactive + // application it should be called by the glutReshapeFunc callback using the windows size. + virtual void reshape(int width, int height) = 0; + + // Process the image. + virtual void redisplay() = 0; + + // Read the image from the rendering buffer. It is not meant to be used by interactive + // applications used to display the image. + virtual void readImage(float* imageBuffer) = 0; + + // Helper to print graphics info. + void virtual printGraphicsInfo() const noexcept = 0; + + // Factory: returns a platform-appropriate GraphicalApp (OGL or DX). + static GraphicalAppRcPtr CreateApp(const char * winTitle, int winWidth, int winHeight); + +private: + // Will shader code be outputed when setShader is called. + bool m_verboseShader{ false }; + // For interactive applications displaying the processed image. + bool m_yMirror{ false }; +}; + +} // namespace OCIO_NAMESPACE + diff --git a/src/libutils/oglapphelpers/hlsl.cpp b/src/libutils/oglapphelpers/hlsl.cpp new file mode 100644 index 0000000000..e165e420a5 --- /dev/null +++ b/src/libutils/oglapphelpers/hlsl.cpp @@ -0,0 +1,498 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright Contributors to the OpenColorIO Project. + +#include +#include + +#include + +#include "hlsl.h" +#include "dxutils.h" + +namespace OCIO_NAMESPACE +{ + +namespace +{ + +// Upload a TEXTURE_1D LUT as a real D3D12 1D texture with RGBA32F format. +// RGBA32F is used instead of RGB32F because DXGI_FORMAT_R32G32B32_FLOAT has +// limited hardware support in D3D12 (optional for most resource types). +void AllocateTexture1D(ID3D12Device* device, + ID3D12GraphicsCommandList* commandList, + UINT width, + UINT numChannels, + const float* values, + Microsoft::WRL::ComPtr& texture, + Microsoft::WRL::ComPtr& uploadBuffer) +{ + if (!values) + { + throw Exception("Missing 1D LUT texture data"); + } + + // Create a 1D texture resource in default heap. + D3D12_RESOURCE_DESC textureDesc = {}; + textureDesc.MipLevels = 1; + textureDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + textureDesc.Width = width; + textureDesc.Height = 1; + textureDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + textureDesc.DepthOrArraySize = 1; + textureDesc.SampleDesc.Count = 1; + textureDesc.SampleDesc.Quality = 0; + textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE1D; + + CD3DX12_HEAP_PROPERTIES defaultHeapProps(D3D12_HEAP_TYPE_DEFAULT); + ThrowIfFailed(device->CreateCommittedResource( + &defaultHeapProps, + D3D12_HEAP_FLAG_NONE, + &textureDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&texture))); + + // Create upload buffer with row-pitch alignment + const UINT dstPixelSize = 4 * sizeof(float); // RGBA32F + const UINT rowPitch = AlignRowPitch(width, dstPixelSize); + const UINT uploadBufferSize = rowPitch; // Only 1 row + + CD3DX12_HEAP_PROPERTIES uploadHeapProps(D3D12_HEAP_TYPE_UPLOAD); + CD3DX12_RESOURCE_DESC uploadBufferDesc = CD3DX12_RESOURCE_DESC::Buffer(uploadBufferSize); + ThrowIfFailed(device->CreateCommittedResource( + &uploadHeapProps, + D3D12_HEAP_FLAG_NONE, + &uploadBufferDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&uploadBuffer))); + + // Map and expand source data (numChannels floats/pixel) to RGBA32F (4 floats/pixel) + BYTE* pData = nullptr; + ThrowIfFailed(uploadBuffer->Map(0, nullptr, reinterpret_cast(&pData))); + float* dst = reinterpret_cast(pData); + for (UINT i = 0; i < width; ++i) + { + dst[i * 4 + 0] = (numChannels > 0) ? values[i * numChannels + 0] : 0.0f; + dst[i * 4 + 1] = (numChannels > 1) ? values[i * numChannels + 1] : 0.0f; + dst[i * 4 + 2] = (numChannels > 2) ? values[i * numChannels + 2] : 0.0f; + dst[i * 4 + 3] = 1.0f; + } + uploadBuffer->Unmap(0, nullptr); + + // Copy from upload buffer to texture + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = {}; + footprint.Footprint.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + footprint.Footprint.Width = width; + footprint.Footprint.Height = 1; + footprint.Footprint.Depth = 1; + footprint.Footprint.RowPitch = rowPitch; + + D3D12_TEXTURE_COPY_LOCATION srcLocation = {}; + srcLocation.pResource = uploadBuffer.Get(); + srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + srcLocation.PlacedFootprint = footprint; + + D3D12_TEXTURE_COPY_LOCATION dstLocation = {}; + dstLocation.pResource = texture.Get(); + dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstLocation.SubresourceIndex = 0; + + commandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); + + // Transition to shader resource state + auto barrier1D = CD3DX12_RESOURCE_BARRIER::Transition( + texture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(1, &barrier1D); +} + +// Upload a TEXTURE_2D (folded) 1D LUT as a 2D texture of width x height with RGBA32F format. +void AllocateTexture2D( ID3D12Device* device, + ID3D12GraphicsCommandList* commandList, + UINT width, + UINT height, + UINT numChannels, + const float* values, + Microsoft::WRL::ComPtr& texture, + Microsoft::WRL::ComPtr& uploadBuffer) +{ + if (!values) + { + throw Exception("Missing 2D LUT texture data"); + } + + // Create a 2D texture resource in default heap. + D3D12_RESOURCE_DESC textureDesc = {}; + textureDesc.MipLevels = 1; + textureDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + textureDesc.Width = width; + textureDesc.Height = height; + textureDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + textureDesc.DepthOrArraySize = 1; + textureDesc.SampleDesc.Count = 1; + textureDesc.SampleDesc.Quality = 0; + textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + + CD3DX12_HEAP_PROPERTIES defaultHeapProps2D(D3D12_HEAP_TYPE_DEFAULT); + ThrowIfFailed(device->CreateCommittedResource( + &defaultHeapProps2D, + D3D12_HEAP_FLAG_NONE, + &textureDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&texture))); + + // Create upload buffer: each row is padded to D3D12_TEXTURE_DATA_PITCH_ALIGNMENT + const UINT dstPixelSize = 4 * sizeof(float); // RGBA32F + const UINT rowPitch = AlignRowPitch(width, dstPixelSize); + const UINT uploadBufferSize = rowPitch * height; + + CD3DX12_HEAP_PROPERTIES uploadHeapProps2D(D3D12_HEAP_TYPE_UPLOAD); + CD3DX12_RESOURCE_DESC uploadBufferDesc2D = CD3DX12_RESOURCE_DESC::Buffer(uploadBufferSize); + ThrowIfFailed(device->CreateCommittedResource( + &uploadHeapProps2D, + D3D12_HEAP_FLAG_NONE, + &uploadBufferDesc2D, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&uploadBuffer))); + + // Map and expand all rows: source is row-major (width * numChannels floats per row) + BYTE* pData = nullptr; + ThrowIfFailed(uploadBuffer->Map(0, nullptr, reinterpret_cast(&pData))); + for (UINT y = 0; y < height; ++y) + { + const float* srcRow = values + y * width * numChannels; + float* dstRow = reinterpret_cast(pData + y * rowPitch); + for (UINT x = 0; x < width; ++x) + { + dstRow[x * 4 + 0] = (numChannels > 0) ? srcRow[x * numChannels + 0] : 0.0f; + dstRow[x * 4 + 1] = (numChannels > 1) ? srcRow[x * numChannels + 1] : 0.0f; + dstRow[x * 4 + 2] = (numChannels > 2) ? srcRow[x * numChannels + 2] : 0.0f; + dstRow[x * 4 + 3] = 1.0f; + } + } + uploadBuffer->Unmap(0, nullptr); + + // Copy from upload buffer to texture + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = {}; + footprint.Footprint.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + footprint.Footprint.Width = width; + footprint.Footprint.Height = height; + footprint.Footprint.Depth = 1; + footprint.Footprint.RowPitch = rowPitch; + + D3D12_TEXTURE_COPY_LOCATION srcLocation = {}; + srcLocation.pResource = uploadBuffer.Get(); + srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + srcLocation.PlacedFootprint = footprint; + + D3D12_TEXTURE_COPY_LOCATION dstLocation = {}; + dstLocation.pResource = texture.Get(); + dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstLocation.SubresourceIndex = 0; + + commandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); + + // Transition to shader resource state + auto barrier2D = CD3DX12_RESOURCE_BARRIER::Transition( + texture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(1, &barrier2D); +} + +// Upload a 3D LUT as a 3D texture with RGBA32F format. +void AllocateTexture3D(ID3D12Device* device, + ID3D12GraphicsCommandList* commandList, + UINT edgelen, + const float* values, + Microsoft::WRL::ComPtr& texture, + Microsoft::WRL::ComPtr& uploadBuffer) +{ + if (!values) + { + throw Exception("Missing 3D LUT texture data"); + } + + // Create 3D texture in default heap + D3D12_RESOURCE_DESC textureDesc = {}; + textureDesc.MipLevels = 1; + textureDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + textureDesc.Width = edgelen; + textureDesc.Height = edgelen; + textureDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + textureDesc.DepthOrArraySize = edgelen; + textureDesc.SampleDesc.Count = 1; + textureDesc.SampleDesc.Quality = 0; + textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D; + + CD3DX12_HEAP_PROPERTIES defaultHeapProps3D(D3D12_HEAP_TYPE_DEFAULT); + ThrowIfFailed(device->CreateCommittedResource( + &defaultHeapProps3D, + D3D12_HEAP_FLAG_NONE, + &textureDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&texture))); + + // Create upload buffer with row-pitch alignment + // Source data is always RGB (3 floats/voxel) for OCIO 3D LUTs. + const UINT srcNumChannels = 3; // OCIO 3D LUTs always provide RGB data + const UINT dstPixelSize = 4 * sizeof(float); // RGBA32F + const UINT rowPitch = AlignRowPitch(edgelen, dstPixelSize); + const UINT slicePitch = rowPitch * edgelen; + const UINT uploadBufferSize = slicePitch * edgelen; + + CD3DX12_HEAP_PROPERTIES uploadHeapProps3D(D3D12_HEAP_TYPE_UPLOAD); + CD3DX12_RESOURCE_DESC uploadBufferDesc3D = CD3DX12_RESOURCE_DESC::Buffer(uploadBufferSize); + ThrowIfFailed(device->CreateCommittedResource( + &uploadHeapProps3D, + D3D12_HEAP_FLAG_NONE, + &uploadBufferDesc3D, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&uploadBuffer))); + + // Map RGB source data and expand to RGBA32F + BYTE* pData = nullptr; + ThrowIfFailed(uploadBuffer->Map(0, nullptr, reinterpret_cast(&pData))); + + for (UINT z = 0; z < edgelen; ++z) + { + for (UINT y = 0; y < edgelen; ++y) + { + const float* srcRow = values + (z * edgelen * edgelen + y * edgelen) * srcNumChannels; + float* dstRow = reinterpret_cast(pData + z * slicePitch + y * rowPitch); + for (UINT x = 0; x < edgelen; ++x) + { + dstRow[x * 4 + 0] = srcRow[x * srcNumChannels + 0]; // R + dstRow[x * 4 + 1] = srcRow[x * srcNumChannels + 1]; // G + dstRow[x * 4 + 2] = srcRow[x * srcNumChannels + 2]; // B + dstRow[x * 4 + 3] = 1.0f; // A + } + } + } + + uploadBuffer->Unmap(0, nullptr); + + // Copy from upload buffer to texture + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = {}; + footprint.Footprint.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + footprint.Footprint.Width = edgelen; + footprint.Footprint.Height = edgelen; + footprint.Footprint.Depth = edgelen; + footprint.Footprint.RowPitch = rowPitch; + + D3D12_TEXTURE_COPY_LOCATION srcLocation = {}; + srcLocation.pResource = uploadBuffer.Get(); + srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + srcLocation.PlacedFootprint = footprint; + + D3D12_TEXTURE_COPY_LOCATION dstLocation = {}; + dstLocation.pResource = texture.Get(); + dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstLocation.SubresourceIndex = 0; + + commandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); + + // Transition to shader resource state + auto barrier3D = CD3DX12_RESOURCE_BARRIER::Transition( + texture.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(1, &barrier3D); +} + +} // anonymous namespace + + +////////////////////////////////////////////////////////// + +HLSLBuilderRcPtr HLSLBuilder::Create(const GpuShaderDescRcPtr& shaderDesc, + ID3D12Device* device, + ID3D12GraphicsCommandList* commandList, + ID3D12DescriptorHeap* cbvSrvHeap) +{ + if (!shaderDesc) + { + throw Exception("HLSLBuilder: shaderDesc is null"); + } + if (!device) + { + throw Exception("HLSLBuilder: device is null"); + } + if (!commandList) + { + throw Exception("HLSLBuilder: commandList is null"); + } + if (!cbvSrvHeap) + { + throw Exception("HLSLBuilder: cbvSrvHeap is null"); + } + + HLSLBuilderRcPtr builder(new HLSLBuilder(shaderDesc, device, commandList, cbvSrvHeap)); + builder->allocateAllTextures(); + + // Close the command list; the caller executes and fence-waits. + ThrowIfFailed(commandList->Close()); + + return builder; +} + +HLSLBuilder::HLSLBuilder(const GpuShaderDescRcPtr& shaderDesc, + ID3D12Device* device, + ID3D12GraphicsCommandList* commandList, + ID3D12DescriptorHeap* cbvSrvHeap) + : m_shaderDesc(shaderDesc) + , m_device(device) + , m_commandList(commandList) + , m_cbvSrvHeap(cbvSrvHeap) + , m_srvDescriptorSize(0) + , m_verbose(false) +{ + m_srvDescriptorSize = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); +} + +HLSLBuilder::~HLSLBuilder() +{ + deleteAllTextures(); +} + +void HLSLBuilder::allocateAllTextures() +{ + deleteAllTextures(); + + // Process 3D LUTs + const unsigned maxTexture3D = m_shaderDesc->getNum3DTextures(); + for (unsigned idx = 0; idx < maxTexture3D; ++idx) + { + // Get 3D texture information + const char* textureName = nullptr; + const char* samplerName = nullptr; + unsigned edgelen = 0; + Interpolation interpolation = INTERP_LINEAR; + m_shaderDesc->get3DTexture(idx, textureName, samplerName, edgelen, interpolation); + + if (!textureName || !*textureName || !samplerName || !*samplerName || edgelen == 0) + { + throw Exception("HLSLBuilder: 3D texture data is corrupted"); + } + + const float* values = nullptr; + m_shaderDesc->get3DTextureValues(idx, values); + if (!values) + { + throw Exception("HLSLBuilder: 3D texture values are missing"); + } + + // Allocate the 3D texture + TextureResource texRes(textureName, samplerName); + AllocateTexture3D(m_device, m_commandList, edgelen, values, texRes.m_texture, texRes.m_uploadBuffer); + + // Create SRV at the slot matching the HLSL register assignment + const UINT heapSlot = m_shaderDesc->get3DTextureShaderBindingIndex(idx); + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + srvDesc.Texture3D.MipLevels = 1; + + CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle( + m_cbvSrvHeap->GetCPUDescriptorHandleForHeapStart(), + heapSlot, + m_srvDescriptorSize); + + m_device->CreateShaderResourceView(texRes.m_texture.Get(), &srvDesc, srvHandle); + + m_textures.push_back(texRes); + } + + // Process 1D LUTs (stored as 2D textures with height=1) + const unsigned maxTexture1D = m_shaderDesc->getNumTextures(); + for (unsigned idx = 0; idx < maxTexture1D; ++idx) + { + // Get 1D texture information + const char* textureName = nullptr; + const char* samplerName = nullptr; + unsigned width = 0; + unsigned height = 0; + GpuShaderDesc::TextureType channel = GpuShaderDesc::TEXTURE_RGB_CHANNEL; + Interpolation interpolation = INTERP_LINEAR; + GpuShaderDesc::TextureDimensions dimensions = GpuShaderDesc::TEXTURE_1D; + m_shaderDesc->getTexture(idx, textureName, samplerName, width, height, channel, dimensions, interpolation); + + if (!textureName || !*textureName || !samplerName || !*samplerName || width == 0) + { + throw Exception("HLSLBuilder: 1D texture data is corrupted"); + } + + const float* values = nullptr; + m_shaderDesc->getTextureValues(idx, values); + if (!values) + { + throw Exception("HLSLBuilder: 1D texture values are missing"); + } + + // Determine source channel count: RED=1, RGB=3 + const UINT numChannels = (channel == GpuShaderDesc::TEXTURE_RED_CHANNEL) ? 1u : 3u; + TextureResource texRes(textureName, samplerName); + + // Create SRV at the slot matching the HLSL register assignment + const UINT heapSlot = m_shaderDesc->getTextureShaderBindingIndex(idx); + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + + if (dimensions == GpuShaderDesc::TEXTURE_1D) + { + AllocateTexture1D(m_device, m_commandList, width, numChannels, values, + texRes.m_texture, texRes.m_uploadBuffer); + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + srvDesc.Texture1D.MipLevels = 1; + } + else + { + AllocateTexture2D(m_device, m_commandList, width, height, numChannels, values, + texRes.m_texture, texRes.m_uploadBuffer); + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + } + + CD3DX12_CPU_DESCRIPTOR_HANDLE srvHandle( + m_cbvSrvHeap->GetCPUDescriptorHandleForHeapStart(), + heapSlot, + m_srvDescriptorSize); + + m_device->CreateShaderResourceView(texRes.m_texture.Get(), &srvDesc, srvHandle); + + m_textures.push_back(texRes); + } +} + +void HLSLBuilder::deleteAllTextures() +{ + m_textures.clear(); +} + +std::string HLSLBuilder::getShaderText() const +{ + if (!m_shaderDesc) + { + return ""; + } + + const char* shaderText = m_shaderDesc->getShaderText(); + + if (m_verbose && shaderText && *shaderText) + { + std::cout << "\nOCIO HLSL Shader:\n\n" << shaderText << std::endl; + } + + return shaderText ? std::string(shaderText) : std::string(); +} + +UINT HLSLBuilder::getNumSRVs() const +{ + return static_cast(m_textures.size()); +} + +} // namespace OCIO_NAMESPACE diff --git a/src/libutils/oglapphelpers/hlsl.h b/src/libutils/oglapphelpers/hlsl.h new file mode 100644 index 0000000000..a0338de3f9 --- /dev/null +++ b/src/libutils/oglapphelpers/hlsl.h @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright Contributors to the OpenColorIO Project. + +#ifndef INCLUDED_OCIO_HLSL_H +#define INCLUDED_OCIO_HLSL_H + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +#ifndef NOMINMAX +#define NOMINMAX +#endif + +#include + +#include +#include + +#include + +namespace OCIO_NAMESPACE +{ + +class HLSLBuilder; +typedef OCIO_SHARED_PTR HLSLBuilderRcPtr; + +// This is a DirectX implementation showing how to do texture upload & allocation +// for HLSL shaders, mirroring the role of OpenGLBuilder. + +class HLSLBuilder +{ +public: + // Create an HLSL builder using GPU shader information from a specific processor. + // Allocates D3D12 textures for all LUTs using binding indices from the shaderDesc. + // All GPU uploads are fence-synchronized before returning. + static HLSLBuilderRcPtr Create(const GpuShaderDescRcPtr& shaderDesc, + ID3D12Device* device, + ID3D12GraphicsCommandList* commandList, + ID3D12DescriptorHeap* cbvSrvHeap); + + ~HLSLBuilder(); + + inline void setVerbose(bool verbose) { m_verbose = verbose; } + inline bool isVerbose() const { return m_verbose; } + + // Get the OCIO-generated HLSL shader text. + std::string getShaderText() const; + + // Get the number of SRVs (texture slots) allocated for LUTs. + // This does not include the image texture at slot 0. + UINT getNumSRVs() const; + +protected: + HLSLBuilder(const GpuShaderDescRcPtr& shaderDesc, + ID3D12Device* device, + ID3D12GraphicsCommandList* commandList, + ID3D12DescriptorHeap* cbvSrvHeap); + + void allocateAllTextures(); + void deleteAllTextures(); + +private: + HLSLBuilder() = delete; + HLSLBuilder(const HLSLBuilder&) = delete; + HLSLBuilder& operator=(const HLSLBuilder&) = delete; + + struct TextureResource + { + Microsoft::WRL::ComPtr m_texture; + Microsoft::WRL::ComPtr m_uploadBuffer; + std::string m_textureName; + std::string m_samplerName; + + TextureResource(const std::string& textureName, + const std::string& samplerName) + : m_textureName(textureName) + , m_samplerName(samplerName) + {} + }; + + typedef std::vector TextureResources; + + const GpuShaderDescRcPtr m_shaderDesc; // Description of the shader + ID3D12Device* m_device; // D3D12 device (not owned) + ID3D12GraphicsCommandList* m_commandList; // Command list for uploads (not owned) + ID3D12DescriptorHeap* m_cbvSrvHeap; // Descriptor heap for SRVs (not owned) + UINT m_srvDescriptorSize; // Size of one SRV descriptor + TextureResources m_textures; // All allocated texture resources + bool m_verbose; // Print shader text for debugging +}; + +} // namespace OCIO_NAMESPACE + +#endif // INCLUDED_OCIO_HLSL_H diff --git a/src/libutils/oglapphelpers/metalapp.h b/src/libutils/oglapphelpers/metalapp.h index 37fba14018..cb8c7acc42 100644 --- a/src/libutils/oglapphelpers/metalapp.h +++ b/src/libutils/oglapphelpers/metalapp.h @@ -24,7 +24,7 @@ typedef OCIO_SHARED_PTR MetalAppRcPtr; class MtlTexture; typedef OCIO_SHARED_PTR MtlTextureRcPtr; -class MetalApp : public ScreenApp +class MetalApp : public ScreenOglApp { public: MetalApp() = delete; @@ -53,7 +53,7 @@ class MetalApp : public ScreenApp // Process the image. void redisplay() override; - // Return a pointer of either ScreenApp or HeadlessApp depending on the + // Return a pointer of either ScreenOglApp or HeadlessOglApp depending on the // OCIO_HEADLESS_ENABLED preprocessor. static MetalAppRcPtr CreateMetalGlApp(const char * winTitle, int winWidth, int winHeight); diff --git a/src/libutils/oglapphelpers/metalapp.mm b/src/libutils/oglapphelpers/metalapp.mm index be46d4e1b6..4114108848 100644 --- a/src/libutils/oglapphelpers/metalapp.mm +++ b/src/libutils/oglapphelpers/metalapp.mm @@ -43,7 +43,7 @@ }; MetalApp::MetalApp(const char * winTitle, int winWidth, int winHeight) - : ScreenApp(winTitle, winWidth, winHeight) + : ScreenOglApp(winTitle, winWidth, winHeight) { initContext(); } @@ -346,7 +346,7 @@ vertex VertexOut ColorCorrectionVS(unsigned int vId [[ vertex_id ]]) throw Exception("Metal renderer can only consume MSL shaders"); } - if(printShader()) + if(isShaderVerbose()) { std::cout << std::endl; std::cout << "GPU Shader Program:" << std::endl; @@ -380,7 +380,7 @@ vertex VertexOut ColorCorrectionVS(unsigned int vId [[ vertex_id ]]) prepareAndBindOpenGLState(); } - ScreenApp::redisplay(); + ScreenOglApp::redisplay(); } MetalAppRcPtr MetalApp::CreateMetalGlApp(const char * winTitle, int winWidth, int winHeight) diff --git a/src/libutils/oglapphelpers/oglapp.cpp b/src/libutils/oglapphelpers/oglapp.cpp index 32528a55cc..7205cc2b75 100644 --- a/src/libutils/oglapphelpers/oglapp.cpp +++ b/src/libutils/oglapphelpers/oglapp.cpp @@ -12,10 +12,17 @@ #include #elif _WIN32 - #include -#include - +#ifndef NDEBUG + // freeglut's header uses a #pragma comment(lib) that links freeglutd.lib in + // debug builds, which we don't ship. Temporarily define NDEBUG so the header + // selects freeglut.lib instead, then restore the original state. + #define NDEBUG + #include + #undef NDEBUG +#else + #include +#endif // !NDEBUG #else #include @@ -33,6 +40,15 @@ namespace OCIO_NAMESPACE { +GraphicalAppRcPtr OglApp::CreateApp(const char* winTitle, int winWidth, int winHeight) +{ +#ifdef OCIO_HEADLESS_ENABLED + return std::make_shared(winTitle, winWidth, winHeight); +#else + return std::make_shared(winTitle, winWidth, winHeight); +#endif +} + OglApp::OglApp(int winWidth, int winHeight) : m_viewportWidth(winWidth) , m_viewportHeight(winHeight) @@ -113,7 +129,7 @@ void OglApp::redisplay() pts[3] = (float)m_viewportHeight * 0.5f + imgHeightScreenSpace * 0.5f; } - if (m_yMirror) + if (isYMirror()) { std::swap(pts[1], pts[3]); } @@ -162,7 +178,7 @@ void OglApp::reshape(int width, int height) glLoadIdentity(); } -void OglApp::createGLBuffers() +void OglApp::createBuffers() { // Create a framebuffer object, you need to delete them when program exits. GLuint fboId; @@ -197,7 +213,7 @@ void OglApp::setShader(GpuShaderDescRcPtr & shaderDesc) { // Create oglBuilder using the shaderDesc. m_oglBuilder = OpenGLBuilder::Create(shaderDesc); - m_oglBuilder->setVerbose(m_printShader); + m_oglBuilder->setVerbose(isShaderVerbose()); // Allocate & upload all the LUTs in a dedicated GPU texture. // Note: The start index for the texture indices is 1 as one texture @@ -227,7 +243,7 @@ void OglApp::setShader(GpuShaderDescRcPtr & shaderDesc) m_oglBuilder->useAllUniforms(); } -void OglApp::printGLInfo() const noexcept +void OglApp::printGraphicsInfo() const noexcept { std::cout << std::endl << "GL Vendor: " << glGetString(GL_VENDOR) << std::endl @@ -262,16 +278,7 @@ void OglApp::setupCommon() glEnable(GL_TEXTURE_2D); } -OglAppRcPtr OglApp::CreateOglApp(const char * winTitle, int winWidth, int winHeight) -{ -#ifdef OCIO_HEADLESS_ENABLED - return std::make_shared(winTitle, winWidth, winHeight); -#else - return std::make_shared(winTitle, winWidth, winHeight); -#endif -} - -ScreenApp::ScreenApp(const char * winTitle, int winWidth, int winHeight): +ScreenOglApp::ScreenOglApp(const char * winTitle, int winWidth, int winHeight): OglApp(winWidth, winHeight) { int argc = 2; @@ -288,25 +295,25 @@ ScreenApp::ScreenApp(const char * winTitle, int winWidth, int winHeight): setupCommon(); } -ScreenApp::~ScreenApp() +ScreenOglApp::~ScreenOglApp() { glutDestroyWindow(m_mainWin); } -void ScreenApp::redisplay() +void ScreenOglApp::redisplay() { OglApp::redisplay(); glutSwapBuffers(); } -void ScreenApp::printGLInfo() const noexcept +void ScreenOglApp::printGraphicsInfo() const noexcept { - OglApp::printGLInfo(); + OglApp::printGraphicsInfo(); } #ifdef OCIO_HEADLESS_ENABLED -HeadlessApp::HeadlessApp(const char * /* winTitle */, int bufWidth, int bufHeight) +HeadlessOglApp::HeadlessOglApp(const char * /* winTitle */, int bufWidth, int bufHeight) : OglApp(bufWidth, bufHeight) , m_pixBufferWidth(bufWidth) , m_pixBufferHeight(bufHeight) @@ -361,27 +368,27 @@ HeadlessApp::HeadlessApp(const char * /* winTitle */, int bufWidth, int bufHeigh setupCommon(); } -HeadlessApp::~HeadlessApp() +HeadlessOglApp::~HeadlessOglApp() { eglTerminate(m_eglDisplay); } -void HeadlessApp::printGLInfo() const noexcept +void HeadlessOglApp::printGraphicsInfo() const noexcept { - OglApp::printGLInfo(); + GraphicalApp::printGraphicsInfo(); printEGLInfo(); } -void HeadlessApp::printEGLInfo() const noexcept +void HeadlessOglApp::printEGLInfo() const noexcept { std::cout << std::endl << "EGL Vendor: " << eglQueryString(m_eglDisplay, EGL_VENDOR) << std::endl << "EGL Version: " << eglQueryString(m_eglDisplay, EGL_VERSION) << std::endl; } -void HeadlessApp::redisplay() +void HeadlessOglApp::redisplay() { - OglApp::redisplay(); + GraphicalApp::redisplay(); eglSwapBuffers(m_eglDisplay, m_eglSurface); } diff --git a/src/libutils/oglapphelpers/oglapp.h b/src/libutils/oglapphelpers/oglapp.h index 34b2f7d071..51de4c2281 100644 --- a/src/libutils/oglapphelpers/oglapp.h +++ b/src/libutils/oglapphelpers/oglapp.h @@ -9,29 +9,30 @@ #include #include "glsl.h" +#include "graphicalapp.h" namespace OCIO_NAMESPACE { // Here is some sample code to demonstrate how to use this in a simple app that wants to process // using the GPU and OpenGL. Processed image is expected to have the same size as the input image. -// For an interactive application, OglApp can be used, but other OGL code is required. +// For an interactive application, GraphicalApp can be used, but other OGL code is required. // // See ociodisplay for an example of an interactive app that displays an image in the UI and // ocioconvert and ociochecklut for examples of non-interactive apps that just process values with // the GPU. /* -// Create and initialize OglAppRcPtr by creating a shared pointer to ScreenApp. You have to -// specify the name of the window and its size. OglAppRcPtr that points to HeadlessApp object +// Create and initialize OglAppRcPtr by creating a shared pointer to ScreenOglApp. You have to +// specify the name of the window and its size. OglAppRcPtr that points to HeadlessOglApp object // can be created and used in the same way. -OglAppRcPtr scrApp = std::make_shared("Window Name", windowWidth, windowHeight); +OglAppRcPtr scrApp = std::make_shared("Window Name", windowWidth, windowHeight); float * imageBuffer = GetImageBuffer(); int imageWidth = GetImageWidth(); int imageHeight = GetImageHeight(); -scrApp->initImage(imagewidth, imageheight, OglApp::COMPONENTS_RGB, imageBuffer); -scrApp->createGLBuffers(); +scrApp->initImage(imagewidth, imageheight, GraphicalApp::COMPONENTS_RGB, imageBuffer); +scrApp->createBuffers(); // Set (or change) shader. GpuShaderDescRcPtr shader = GpuShaderDesc::CreateShaderDesc(); @@ -51,73 +52,49 @@ scrApp->readImage(imageBufferOut.data()); */ -// Forward declaration of OglApp. -class OglApp; -typedef OCIO_SHARED_PTR OglAppRcPtr; - -class OglApp +class OglApp : public GraphicalApp { public: OglApp() = delete; - OglApp(const OglApp &) = delete; - OglApp & operator=(const OglApp &) = delete; + OglApp(const OglApp&) = delete; + OglApp& operator=(const OglApp&) = delete; // Initialize the app with given window name & client rect size. OglApp(int winWidth, int winHeight); virtual ~OglApp(); - // When displaying the processed image in a window this needs to be done. - // In that case, when image is read, the result will be mirrored on Y. - void setYMirror() - { - m_yMirror = true; - } - - // Shader code will be printed when generated. - void setPrintShader(bool print) - { - m_printShader = print; - } - - enum Components - { - COMPONENTS_RGB = 0, - COMPONENTS_RGBA - }; - - // Initialize the image. virtual void initImage(int imageWidth, int imageHeight, - Components comp, const float * imageBuffer); - // Update the image if it changes. - virtual void updateImage(const float * imageBuffer); + Components comp, const float* imageBuffer) override; + + virtual void updateImage(const float* imageBuffer) override; - // Create GL frame and rendering buffers. Needed if readImage will be used. - void createGLBuffers(); + // Create frame and rendering buffers. Needed if readImage will be used. + void createBuffers() override; // Set the shader code. - virtual void setShader(GpuShaderDescRcPtr & shaderDesc); + virtual void setShader(GpuShaderDescRcPtr& shaderDesc) override; // Update the size of the buffer of the OpenGL viewport that will be used to process the image // (it does not modify the UI). To be called at least one time. Use image size if we want to // read back the processed image. To process another image with the same size or using a // different shader, reshape does not need to be called again. In case of an interactive // application it should be called by the glutReshapeFunc callback using the windows size. - void reshape(int width, int height); + void reshape(int width, int height) override; // Process the image. - void virtual redisplay(); + void virtual redisplay() override; // Read the image from the rendering buffer. It is not meant to be used by interactive // applications used to display the image. - virtual void readImage(float * imageBuffer); + virtual void readImage(float* imageBuffer) override; - // Helper to print GL info. - void virtual printGLInfo() const noexcept; + // Helper to print graphics info. + void virtual printGraphicsInfo() const noexcept override; - // Return a pointer of either ScreenApp or HeadlessApp depending on the + // Return a pointer of either ScreenOglApp or HeadlessOglApp depending on the // OCIO_HEADLESS_ENABLED preprocessor. - static OglAppRcPtr CreateOglApp(const char * winTitle, int winWidth, int winHeight); + static GraphicalAppRcPtr CreateApp(const char* winTitle, int winWidth, int winHeight); protected: // Window or output image size (set using reshape). @@ -133,8 +110,6 @@ class OglApp void setImageDimensions(int imgWidth, int imgHeight, Components comp); Components getImageComponents() const { return m_components; } - - bool printShader() const { return m_printShader; } OpenGLBuilderRcPtr m_oglBuilder; @@ -142,32 +117,27 @@ class OglApp // Keep track of the original image ratio. float m_imageAspect{ 1.0f }; - // For interactive application displaying the processed image, this needs to be true. - bool m_yMirror{ false }; - - // Will shader code be outputed when setShader is called. - bool m_printShader{ false }; - // Image information. int m_imageWidth{ 0 }; int m_imageHeight{ 0 }; Components m_components{ COMPONENTS_RGBA }; + unsigned int m_imageTexID; }; -class ScreenApp: public OglApp +class ScreenOglApp: public OglApp { public: - ScreenApp() = delete; - ScreenApp(const ScreenApp &) = delete; - ScreenApp & operator=(const ScreenApp &) = delete; + ScreenOglApp() = delete; + ScreenOglApp(const ScreenOglApp &) = delete; + ScreenOglApp & operator=(const ScreenOglApp &) = delete; - ScreenApp(const char * winTitle, int winWidth, int winHeight); + ScreenOglApp(const char * winTitle, int winWidth, int winHeight); - ~ScreenApp(); + ~ScreenOglApp(); void redisplay() override; - void printGLInfo() const noexcept override; + void printGraphicsInfo() const noexcept override; private: // Window identifier returned by glutCreateWindow. @@ -178,16 +148,16 @@ class ScreenApp: public OglApp #include -class HeadlessApp: public OglApp +class HeadlessOglApp: public OglApp { public: - HeadlessApp() = delete; + HeadlessOglApp() = delete; - HeadlessApp(const char * winTitle, int bufWidth, int bufHeight); + HeadlessOglApp(const char * winTitle, int bufWidth, int bufHeight); - ~HeadlessApp(); + ~HeadlessOglApp(); - void printGLInfo() const noexcept override; + void printGraphicsInfo() const noexcept override; void redisplay() override; protected: diff --git a/src/libutils/oglapphelpers/vulkanapp.cpp b/src/libutils/oglapphelpers/vulkanapp.cpp index ef4c4230a7..91372c828c 100644 --- a/src/libutils/oglapphelpers/vulkanapp.cpp +++ b/src/libutils/oglapphelpers/vulkanapp.cpp @@ -344,11 +344,11 @@ void VulkanApp::initImage(int imageWidth, int imageHeight, Components comp, cons m_imageHeight = imageHeight; m_components = comp; - createBuffers(); + createVulkanBuffers(); updateImage(imageBuffer); } -void VulkanApp::createBuffers() +void VulkanApp::createVulkanBuffers() { const int numComponents = (m_components == COMPONENTS_RGB) ? 3 : 4; const VkDeviceSize bufferSize = m_imageWidth * m_imageHeight * numComponents * sizeof(float); @@ -399,7 +399,7 @@ void VulkanApp::setShader(GpuShaderDescRcPtr & shaderDesc) m_vulkanBuilder->allocateAllTextures(shaderDesc); m_vulkanBuilder->buildShader(shaderDesc); - if (m_printShader) + if (isShaderVerbose()) { std::cout << "Vulkan Compute Shader:\n" << m_vulkanBuilder->getShaderSource() << std::endl; } diff --git a/src/libutils/oglapphelpers/vulkanapp.h b/src/libutils/oglapphelpers/vulkanapp.h index 07689a951a..caa879d92e 100644 --- a/src/libutils/oglapphelpers/vulkanapp.h +++ b/src/libutils/oglapphelpers/vulkanapp.h @@ -15,6 +15,8 @@ #include +#include "graphicalapp.h" + namespace OCIO_NAMESPACE { @@ -26,7 +28,7 @@ typedef OCIO_SHARED_PTR VulkanAppRcPtr; // VulkanApp provides headless Vulkan rendering for GPU unit testing. // This class is designed to process images using OCIO GPU shaders via Vulkan compute pipelines. -class VulkanApp +class VulkanApp : public GraphicalApp { public: VulkanApp() = delete; @@ -38,39 +40,36 @@ class VulkanApp virtual ~VulkanApp(); - enum Components - { - COMPONENTS_RGB = 0, - COMPONENTS_RGBA - }; - // Initialize the image buffer. - void initImage(int imageWidth, int imageHeight, Components comp, const float * imageBuffer); + void initImage(int imageWidth, int imageHeight, Components comp, const float * imageBuffer) override; // Update the image if it changes. - void updateImage(const float * imageBuffer); + void updateImage(const float * imageBuffer) override; + + // No-op: Vulkan buffers are created during initImage(). + void createBuffers() override {} // Set the shader code from OCIO GpuShaderDesc. - void setShader(GpuShaderDescRcPtr & shaderDesc); + void setShader(GpuShaderDescRcPtr & shaderDesc) override; // Update the size of the buffer used to process the image. - void reshape(int width, int height); + void reshape(int width, int height) override; // Process the image using the Vulkan compute pipeline. - void redisplay(); + void redisplay() override; // Read the processed image from the GPU buffer. - void readImage(float * imageBuffer); + void readImage(float * imageBuffer) override; // Print Vulkan device and instance info. void printVulkanInfo() const noexcept; + // Implements GraphicalApp::printGraphicsInfo(). + void printGraphicsInfo() const noexcept override { printVulkanInfo(); } + // Factory method to create a VulkanApp instance. static VulkanAppRcPtr CreateVulkanApp(int bufWidth, int bufHeight); - // Shader code will be printed when generated. - void setPrintShader(bool print) { m_printShader = print; } - protected: // Initialize Vulkan instance, device, and queues. void initVulkan(); @@ -78,8 +77,8 @@ class VulkanApp // Create Vulkan compute pipeline for shader processing. void createComputePipeline(); - // Create buffers for image data. - void createBuffers(); + // Create Vulkan buffers for image data (called internally from initImage). + void createVulkanBuffers(); // Clean up Vulkan resources. void cleanup(); @@ -130,13 +129,12 @@ class VulkanApp int m_imageHeight{ 0 }; int m_bufferWidth{ 0 }; int m_bufferHeight{ 0 }; - Components m_components{ COMPONENTS_RGBA }; + Components m_components{ GraphicalApp::COMPONENTS_RGBA }; // Shader builder VulkanBuilderRcPtr m_vulkanBuilder; // Debug and configuration - bool m_printShader{ false }; bool m_initialized{ false }; // Validation layers (debug builds) diff --git a/tests/gpu/CMakeLists.txt b/tests/gpu/CMakeLists.txt index 5f1c0379cf..b39a1d87aa 100644 --- a/tests/gpu/CMakeLists.txt +++ b/tests/gpu/CMakeLists.txt @@ -1,8 +1,8 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright Contributors to the OpenColorIO Project. -if(NOT OCIO_GL_ENABLED) - message(WARNING "GL component missing. Skipping the GPU unit tests.") +if(NOT OCIO_GL_ENABLED AND NOT (WIN32 AND OCIO_DIRECTX_ENABLED)) + message(WARNING "GL or DirectX component missing. Skipping the GPU unit tests.") return() endif() @@ -41,10 +41,55 @@ target_link_libraries(test_gpu_exec testutils ) -add_test(NAME test_gpu COMMAND test_gpu_exec) +if(OCIO_GL_ENABLED) + add_test(NAME test_gpu COMMAND test_gpu_exec) +endif() if(APPLE) add_test(NAME test_metal COMMAND test_gpu_exec -metal) endif() +if(WIN32 AND OCIO_DIRECTX_ENABLED) + add_test(NAME test_dx COMMAND test_gpu_exec --dx) +endif() + +# Copy dxcompiler.dll and dxil.dll to the test output directory. +# These are required at runtime when DXC (IDxcCompiler3) is used for SM6.0 shader compilation. +# The Redist/D3D path is the stable, version-independent redistribution location. +if(WIN32 AND OCIO_DIRECTX_ENABLED) + find_file(DXCOMPILER_DLL + NAMES dxcompiler.dll + PATHS + # Note: x64 hardcoded; update if ARM64 Windows support is needed. + "$ENV{WindowsSdkDir}Redist/D3D/x64" + "C:/Program Files (x86)/Windows Kits/10/Redist/D3D/x64" + NO_DEFAULT_PATH + DOC "Path to dxcompiler.dll from Windows SDK" + ) + if(DXCOMPILER_DLL) + get_filename_component(_dxc_dll_dir "${DXCOMPILER_DLL}" DIRECTORY) + find_file(DXIL_DLL + NAMES dxil.dll + HINTS "${_dxc_dll_dir}" + NO_DEFAULT_PATH + ) + add_custom_command(TARGET test_gpu_exec POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${DXCOMPILER_DLL}" "$" + COMMENT "Copying dxcompiler.dll to test output directory" + ) + if(DXIL_DLL) + add_custom_command(TARGET test_gpu_exec POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${DXIL_DLL}" "$" + COMMENT "Copying dxil.dll to test output directory" + ) + endif() + else() + message(STATUS + "dxcompiler.dll not found in Windows SDK Redist/D3D. " + "Add its directory to PATH before running test_dx." + ) + endif() +endif() # Note: To avoid changing PATH from outside the cmake files. if(MSVC AND BUILD_SHARED_LIBS) @@ -58,6 +103,11 @@ if(MSVC AND BUILD_SHARED_LIBS) set(NEW_PATH "${NEW_PATH}\\\;${GLUT_INCLUDE_DIR}/../bin") set(NEW_PATH "${NEW_PATH}\\\;${GLEW_INCLUDE_DIRS}/../bin") - set_tests_properties(test_gpu PROPERTIES ENVIRONMENT PATH=${NEW_PATH}) + if(OCIO_GL_ENABLED) + set_tests_properties(test_gpu PROPERTIES ENVIRONMENT PATH=${NEW_PATH}) + endif() + if(WIN32 AND OCIO_DIRECTX_ENABLED) + set_tests_properties(test_dx PROPERTIES ENVIRONMENT PATH=${NEW_PATH}) + endif() endif() diff --git a/tests/gpu/FixedFunctionOp_test.cpp b/tests/gpu/FixedFunctionOp_test.cpp index 1daca3a77a..ce42bd51ee 100644 --- a/tests/gpu/FixedFunctionOp_test.cpp +++ b/tests/gpu/FixedFunctionOp_test.cpp @@ -303,6 +303,7 @@ OCIO_ADD_GPU_TEST(FixedFunction, style_aces_gamutcomp13_inv) }; test.setCustomValues(values); + // 3e-5 accommodates GPU pow() precision at large output values (~3.08) on DirectX. test.setErrorThreshold(3e-5f); } @@ -618,8 +619,8 @@ OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_1000nit_p3_rndtrip) // TODO: Investigate why this is not closer. // Setting the CPUProcessor to OPTIMIZATION_NONE helps slightly, but is not the main - // cause of the error. - test.setErrorThreshold(0.012f); + // cause of the error. 0.014 accommodates DirectX floating point differences. + test.setErrorThreshold(0.014f); } OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_1000nit_p3_inv) @@ -692,8 +693,8 @@ OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_4000nit_rec2020_rndtrip) GenerateIdentityLut3D(values, lut_size, lum_scale); test.setCustomValues(values); - // TODO: Investigate why this is not closer. - test.setErrorThreshold(0.032f); + // TODO: Investigate why this is not closer. 0.034 accommodates DirectX floating point differences. + test.setErrorThreshold(0.034f); } OCIO_ADD_GPU_TEST(FixedFunction, style_aces2_4000nit_rec2020_inv) diff --git a/tests/gpu/GPUUnitTest.cpp b/tests/gpu/GPUUnitTest.cpp index 252ba43b55..451cecd12a 100644 --- a/tests/gpu/GPUUnitTest.cpp +++ b/tests/gpu/GPUUnitTest.cpp @@ -15,7 +15,16 @@ #include "apputils/argparse.h" #include "utils/StringUtils.h" +#include "graphicalapp.h" + +#ifdef OCIO_GL_ENABLED #include "oglapp.h" +#endif + +#ifdef OCIO_DIRECTX_ENABLED +#include "dxapp.h" +#endif + #if __APPLE__ #include "metalapp.h" #endif @@ -207,23 +216,13 @@ namespace constexpr unsigned g_winHeight = 256; constexpr unsigned g_components = 4; - void AllocateImageTexture(OCIO::OglAppRcPtr & app) - { - const unsigned numEntries = g_winWidth * g_winHeight * g_components; - OCIOGPUTest::CustomValues::Values image(numEntries, 0.0f); - - app->initImage(g_winWidth, g_winHeight, OCIO::OglApp::COMPONENTS_RGBA, &image[0]); - } - -#ifdef OCIO_VULKAN_ENABLED - void AllocateImageTexture(OCIO::VulkanAppRcPtr & app) + void AllocateImageTexture(OCIO::GraphicalAppRcPtr& app) { const unsigned numEntries = g_winWidth * g_winHeight * g_components; OCIOGPUTest::CustomValues::Values image(numEntries, 0.0f); - app->initImage(g_winWidth, g_winHeight, OCIO::VulkanApp::COMPONENTS_RGBA, &image[0]); + app->initImage(g_winWidth, g_winHeight, OCIO::GraphicalApp::COMPONENTS_RGBA, &image[0]); } -#endif void SetTestValue(float * image, float val, unsigned numComponents) { @@ -342,7 +341,7 @@ namespace return &values.m_inputValues[0]; } - void UpdateImageTexture(OCIO::OglAppRcPtr & app, OCIOGPUTestRcPtr & test) + void UpdateImageTexture(OCIO::GraphicalAppRcPtr & app, OCIOGPUTestRcPtr & test) { #if __APPLE__ && __aarch64__ // The Apple M1 chip handles differently the Nan and Inf processing introducing @@ -358,27 +357,9 @@ namespace app->updateImage(inputValues); } -#ifdef OCIO_VULKAN_ENABLED - void UpdateImageTexture(OCIO::VulkanAppRcPtr & app, OCIOGPUTestRcPtr & test) + void UpdateOCIOGPUState(OCIO::GraphicalAppRcPtr & app, OCIOGPUTestRcPtr & test) { -#if __APPLE__ && __aarch64__ - // The Apple M1 chip handles differently the Nan and Inf processing introducing - // differences with CPU processing. - const bool testNaN = false; - const bool testInfinity = false; -#else - const bool testNaN = test->getTestNaN(); - const bool testInfinity = test->getTestInfinity(); -#endif - - const float * inputValues = PrepareInputValues(test, testNaN, testInfinity); - app->updateImage(inputValues); - } -#endif - - void UpdateOCIOGLState(OCIO::OglAppRcPtr & app, OCIOGPUTestRcPtr & test) - { - app->setPrintShader(test->isVerbose()); + app->setShaderVerbose(test->isVerbose()); OCIO::ConstProcessorRcPtr & processor = test->getProcessor(); OCIO::GpuShaderDescRcPtr & shaderDesc = test->getShaderDesc(); @@ -400,32 +381,6 @@ namespace app->setShader(shaderDesc); } -#ifdef OCIO_VULKAN_ENABLED - void UpdateOCIOVulkanState(OCIO::VulkanAppRcPtr & app, OCIOGPUTestRcPtr & test) - { - app->setPrintShader(test->isVerbose()); - - OCIO::ConstProcessorRcPtr & processor = test->getProcessor(); - OCIO::GpuShaderDescRcPtr & shaderDesc = test->getShaderDesc(); - - OCIO::ConstGPUProcessorRcPtr gpu; - if (test->isLegacyShader()) - { - gpu = processor->getOptimizedLegacyGPUProcessor(OCIO::OPTIMIZATION_DEFAULT, - test->getLegacyShaderLutEdge()); - } - else - { - gpu = processor->getDefaultGPUProcessor(); - } - - // Collect the shader program information for a specific processor. - gpu->extractGpuShaderInfo(shaderDesc); - - app->setShader(shaderDesc); - } -#endif - void DiffComponent(const std::vector & cpuImage, const std::vector & gpuImage, size_t idx, bool relativeTest, float expectMin, @@ -563,10 +518,8 @@ namespace } } - // Shared helper to validate GPU processing against CPU. - // Template function to work with both OglApp and VulkanApp. - template - void ValidateImageTextureImpl(AppType & app, OCIOGPUTestRcPtr & test) + // Validate the GPU processing against the CPU one. + void ValidateImageTexture(OCIO::GraphicalAppRcPtr & app, OCIOGPUTestRcPtr & test) { // Each retest is rebuilding a cpu proc. OCIO::ConstCPUProcessorRcPtr processor = test->getProcessor()->getDefaultCPUProcessor(); @@ -604,20 +557,6 @@ namespace // Step 3: Compare the two results. ValidateResults(test, cpuImage, gpuImage, width, height); } - - // Validate the GPU processing against the CPU one. - void ValidateImageTexture(OCIO::OglAppRcPtr & app, OCIOGPUTestRcPtr & test) - { - ValidateImageTextureImpl(app, test); - } - -#ifdef OCIO_VULKAN_ENABLED - // Validate the GPU processing against the CPU one for Vulkan. - void ValidateImageTexture(OCIO::VulkanAppRcPtr & app, OCIOGPUTestRcPtr & test) - { - ValidateImageTextureImpl(app, test); - } -#endif }; int main(int argc, const char ** argv) @@ -631,6 +570,7 @@ int main(int argc, const char ** argv) bool printHelp = false; bool useMetalRenderer = false; bool useVulkanRenderer = false; + bool useDxRenderer = false; bool verbose = false; bool stopOnFirstError = false; @@ -641,7 +581,12 @@ int main(int argc, const char ** argv) ap.options("\nCommand line arguments:\n", "--help", &printHelp, "Print help message", "--metal", &useMetalRenderer, "Run the GPU unit test with Metal", +#ifdef OCIO_DIRECTX_ENABLED + "--dx", &useDxRenderer, "Run the GPU unit test with DirectX 12", +#endif +#ifdef OCIO_VULKAN_ENABLED "--vulkan", &useVulkanRenderer, "Run the GPU unit test with Vulkan", +#endif "-v", &verbose, "Output the GPU shader program", "--stop_on_error", &stopOnFirstError, "Stop on the first error", "--run_only %s", &filter, "Run only some unit tests\n" @@ -684,11 +629,8 @@ int main(int argc, const char ** argv) } // Step 1: Initialize the graphic library engines. - OCIO::OglAppRcPtr app; -#ifdef OCIO_VULKAN_ENABLED - OCIO::VulkanAppRcPtr vulkanApp; -#endif - + OCIO::GraphicalAppRcPtr app; + try { if(useMetalRenderer) @@ -700,19 +642,26 @@ int main(int argc, const char ** argv) return 1; #endif } - else if(useVulkanRenderer) +#ifdef OCIO_DIRECTX_ENABLED + else if(useDxRenderer) { -#ifdef OCIO_VULKAN_ENABLED - vulkanApp = OCIO::VulkanApp::CreateVulkanApp(g_winWidth, g_winHeight); - vulkanApp->printVulkanInfo(); -#else - std::cerr << std::endl << "'GPU tests - Vulkan' is not supported (OCIO_VULKAN_ENABLED not defined)" << std::endl; - return 1; + app = std::make_shared("GPU tests - DirectX 12", 10, 10); + } #endif +#ifdef OCIO_VULKAN_ENABLED + else if(useVulkanRenderer) + { + app = OCIO::VulkanApp::CreateVulkanApp(g_winWidth, g_winHeight); } +#endif else { - app = OCIO::OglApp::CreateOglApp("GPU tests", 10, 10); +#ifdef OCIO_GL_ENABLED + app = OCIO::OglApp::CreateApp("GPU tests - OpenGL", 10, 10); +#else + std::cerr << std::endl << "No GPU backend available." << std::endl; + return 1; +#endif } } catch (const OCIO::Exception & e) @@ -726,28 +675,15 @@ int main(int argc, const char ** argv) return 1; } - if (!useVulkanRenderer) - { - app->printGLInfo(); - } + app->printGraphicsInfo(); // Step 2: Allocate the texture that holds the image. -#ifdef OCIO_VULKAN_ENABLED - if (useVulkanRenderer) - { - AllocateImageTexture(vulkanApp); - vulkanApp->reshape(g_winWidth, g_winHeight); - } - else -#endif - { - AllocateImageTexture(app); + AllocateImageTexture(app); - // Step 3: Create the frame buffer and render buffer. - app->createGLBuffers(); + // Step 3: Create the frame buffer and render buffer. + app->createBuffers(); - app->reshape(g_winWidth, g_winHeight); - } + app->reshape(g_winWidth, g_winHeight); // Step 4: Execute all the unit tests. @@ -788,18 +724,34 @@ int main(int argc, const char ** argv) // Prepare the unit test. test->setVerbose(verbose); - OCIO::GpuLanguage gpuLang = OCIO::GPU_LANGUAGE_GLSL_1_2; + + // Select the appropriate shading language based on the renderer + OCIO::GpuLanguage shadingLanguage = OCIO::GPU_LANGUAGE_GLSL_1_2; #if __APPLE__ if (useMetalRenderer) { - gpuLang = OCIO::GPU_LANGUAGE_MSL_2_0; + shadingLanguage = OCIO::GPU_LANGUAGE_MSL_2_0; } + else +#endif +#ifdef OCIO_DIRECTX_ENABLED + if (useDxRenderer) + { + shadingLanguage = OCIO::GPU_LANGUAGE_HLSL_SM_5_0; + } + else #endif +#ifdef OCIO_VULKAN_ENABLED if (useVulkanRenderer) { - gpuLang = OCIO::GPU_LANGUAGE_GLSL_VK_4_6; + shadingLanguage = OCIO::GPU_LANGUAGE_GLSL_VK_4_6; } - test->setShadingLanguage(gpuLang); + else +#endif + { + shadingLanguage = OCIO::GPU_LANGUAGE_GLSL_1_2; + } + test->setShadingLanguage(shadingLanguage); bool enabledTest = true; try @@ -826,59 +778,28 @@ int main(int argc, const char ** argv) if(test->isValid() && enabledTest) { -#ifdef OCIO_VULKAN_ENABLED - if (useVulkanRenderer) - { - // Initialize the texture with the RGBA values to be processed. - UpdateImageTexture(vulkanApp, test); + // Initialize the texture with the RGBA values to be processed. + UpdateImageTexture(app, test); - // Update the GPU shader program. - UpdateOCIOVulkanState(vulkanApp, test); + // Update the GPU shader program. + UpdateOCIOGPUState(app, test); - const size_t numRetest = test->getNumRetests(); - // Need to run once and for each retest. - for (size_t idxRetest = 0; idxRetest <= numRetest; ++idxRetest) + const size_t numRetest = test->getNumRetests(); + // Need to run once and for each retest. + for (size_t idxRetest = 0; idxRetest <= numRetest; ++idxRetest) + { + if (idxRetest != 0) // Skip first run. { - if (idxRetest != 0) // Skip first run. - { - // Call the retest callback. - test->retestSetup(idxRetest - 1); - } - - // Process the image texture into the rendering buffer. - vulkanApp->redisplay(); - - // Compute the expected values using the CPU and compare - // against the GPU values. - ValidateImageTexture(vulkanApp, test); + // Call the retest callback. + test->retestSetup(idxRetest - 1); } - } - else -#endif - { - // Initialize the texture with the RGBA values to be processed. - UpdateImageTexture(app, test); - // Update the GPU shader program. - UpdateOCIOGLState(app, test); + // Process the image texture into the rendering buffer. + app->redisplay(); - const size_t numRetest = test->getNumRetests(); - // Need to run once and for each retest. - for (size_t idxRetest = 0; idxRetest <= numRetest; ++idxRetest) - { - if (idxRetest != 0) // Skip first run. - { - // Call the retest callback. - test->retestSetup(idxRetest - 1); - } - - // Process the image texture into the rendering buffer. - app->redisplay(); - - // Compute the expected values using the CPU and compare - // against the GPU values. - ValidateImageTexture(app, test); - } + // Compute the expected values using the CPU and compare + // against the GPU values. + ValidateImageTexture(app, test); } } } diff --git a/tests/gpu/MatrixOp_test.cpp b/tests/gpu/MatrixOp_test.cpp index 6f47362d8e..6da6ea8a64 100644 --- a/tests/gpu/MatrixOp_test.cpp +++ b/tests/gpu/MatrixOp_test.cpp @@ -9,7 +9,8 @@ namespace OCIO = OCIO_NAMESPACE; -const float g_epsilon = 5e-7f; +// 1e-6 accommodates 1-2 ULP FMA rounding with DirectX tests (matrix output values ~7). +const float g_epsilon = 1e-6f; // Helper method to build unit tests From c8deabb5a48e1017d6c34c59c234c78f9dc96020 Mon Sep 17 00:00:00 2001 From: Eric Renaud-Houde Date: Mon, 6 Apr 2026 12:45:14 -0400 Subject: [PATCH 2/8] Fix post-rebase issues found in code review - HeadlessOglApp::printGraphicsInfo() was calling pure virtual base (crash on headless EGL) - graphicalapp.cpp included oglapp.h unconditionally; guard under OCIO_GL_ENABLED - tests/gpu/CMakeLists.txt early-return guard excluded Vulkan-only builds - Add missing test_vulkan ctest entry Signed-off-by: Eric Renaud-Houde --- src/libutils/oglapphelpers/graphicalapp.cpp | 14 ++++++++++++++ src/libutils/oglapphelpers/oglapp.cpp | 2 +- tests/gpu/CMakeLists.txt | 7 +++++-- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/libutils/oglapphelpers/graphicalapp.cpp b/src/libutils/oglapphelpers/graphicalapp.cpp index 3baefdf36c..3c8da83e23 100644 --- a/src/libutils/oglapphelpers/graphicalapp.cpp +++ b/src/libutils/oglapphelpers/graphicalapp.cpp @@ -2,14 +2,28 @@ // Copyright Contributors to the OpenColorIO Project. #include "graphicalapp.h" + +#ifdef OCIO_GL_ENABLED #include "oglapp.h" +#endif + +#ifdef OCIO_DIRECTX_ENABLED +#include "dxapp.h" +#endif namespace OCIO_NAMESPACE { GraphicalAppRcPtr GraphicalApp::CreateApp(const char * winTitle, int winWidth, int winHeight) { +#ifdef OCIO_GL_ENABLED return OglApp::CreateApp(winTitle, winWidth, winHeight); +#elif defined(OCIO_DIRECTX_ENABLED) + return std::make_shared(winTitle, winWidth, winHeight); +#else + (void)winTitle; (void)winWidth; (void)winHeight; + throw Exception("No suitable GPU backend available for GraphicalApp::CreateApp"); +#endif } } // namespace OCIO_NAMESPACE diff --git a/src/libutils/oglapphelpers/oglapp.cpp b/src/libutils/oglapphelpers/oglapp.cpp index 7205cc2b75..7a45fea780 100644 --- a/src/libutils/oglapphelpers/oglapp.cpp +++ b/src/libutils/oglapphelpers/oglapp.cpp @@ -375,7 +375,7 @@ HeadlessOglApp::~HeadlessOglApp() void HeadlessOglApp::printGraphicsInfo() const noexcept { - GraphicalApp::printGraphicsInfo(); + OglApp::printGraphicsInfo(); printEGLInfo(); } diff --git a/tests/gpu/CMakeLists.txt b/tests/gpu/CMakeLists.txt index b39a1d87aa..2618bf4cd0 100644 --- a/tests/gpu/CMakeLists.txt +++ b/tests/gpu/CMakeLists.txt @@ -1,8 +1,8 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright Contributors to the OpenColorIO Project. -if(NOT OCIO_GL_ENABLED AND NOT (WIN32 AND OCIO_DIRECTX_ENABLED)) - message(WARNING "GL or DirectX component missing. Skipping the GPU unit tests.") +if(NOT OCIO_GL_ENABLED AND NOT (WIN32 AND OCIO_DIRECTX_ENABLED) AND NOT OCIO_VULKAN_ENABLED) + message(WARNING "GL, DirectX, and Vulkan components all missing. Skipping the GPU unit tests.") return() endif() @@ -50,6 +50,9 @@ endif() if(WIN32 AND OCIO_DIRECTX_ENABLED) add_test(NAME test_dx COMMAND test_gpu_exec --dx) endif() +if(OCIO_VULKAN_ENABLED) + add_test(NAME test_vulkan COMMAND test_gpu_exec --vulkan) +endif() # Copy dxcompiler.dll and dxil.dll to the test output directory. # These are required at runtime when DXC (IDxcCompiler3) is used for SM6.0 shader compilation. From 015da536da47cff94eab697bf2f6eda20c0a7aab Mon Sep 17 00:00:00 2001 From: Eric Renaud-Houde Date: Mon, 6 Apr 2026 12:52:34 -0400 Subject: [PATCH 3/8] Minor additional comments, formatting and fixes. Signed-off-by: Eric Renaud-Houde --- src/libutils/oglapphelpers/graphicalapp.cpp | 3 +++ src/libutils/oglapphelpers/graphicalapp.h | 4 ++-- src/libutils/oglapphelpers/oglapp.cpp | 2 +- tests/gpu/GPUUnitTest.cpp | 2 ++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/libutils/oglapphelpers/graphicalapp.cpp b/src/libutils/oglapphelpers/graphicalapp.cpp index 3c8da83e23..75fdb5c9a3 100644 --- a/src/libutils/oglapphelpers/graphicalapp.cpp +++ b/src/libutils/oglapphelpers/graphicalapp.cpp @@ -14,6 +14,9 @@ namespace OCIO_NAMESPACE { +// Factory for windowed backends (OGL, DX). For headless Vulkan use +// VulkanApp::CreateVulkanApp(). Metal is always paired with OGL (MetalApp +// inherits ScreenOglApp) and is covered by the OCIO_GL_ENABLED branch. GraphicalAppRcPtr GraphicalApp::CreateApp(const char * winTitle, int winWidth, int winHeight) { #ifdef OCIO_GL_ENABLED diff --git a/src/libutils/oglapphelpers/graphicalapp.h b/src/libutils/oglapphelpers/graphicalapp.h index a1b55c4aac..a17730070a 100644 --- a/src/libutils/oglapphelpers/graphicalapp.h +++ b/src/libutils/oglapphelpers/graphicalapp.h @@ -7,7 +7,7 @@ namespace OCIO_NAMESPACE { -// Forward declaration of GraphicalApp. +// Forward declaration of GraphicalApp. class GraphicalApp; typedef OCIO_SHARED_PTR GraphicalAppRcPtr; @@ -65,7 +65,7 @@ class GraphicalApp virtual void readImage(float* imageBuffer) = 0; // Helper to print graphics info. - void virtual printGraphicsInfo() const noexcept = 0; + virtual void printGraphicsInfo() const noexcept = 0; // Factory: returns a platform-appropriate GraphicalApp (OGL or DX). static GraphicalAppRcPtr CreateApp(const char * winTitle, int winWidth, int winHeight); diff --git a/src/libutils/oglapphelpers/oglapp.cpp b/src/libutils/oglapphelpers/oglapp.cpp index 7a45fea780..0b8e49fab6 100644 --- a/src/libutils/oglapphelpers/oglapp.cpp +++ b/src/libutils/oglapphelpers/oglapp.cpp @@ -388,7 +388,7 @@ void HeadlessOglApp::printEGLInfo() const noexcept void HeadlessOglApp::redisplay() { - GraphicalApp::redisplay(); + OglApp::redisplay(); eglSwapBuffers(m_eglDisplay, m_eglSurface); } diff --git a/tests/gpu/GPUUnitTest.cpp b/tests/gpu/GPUUnitTest.cpp index 451cecd12a..d225809b7f 100644 --- a/tests/gpu/GPUUnitTest.cpp +++ b/tests/gpu/GPUUnitTest.cpp @@ -737,6 +737,8 @@ int main(int argc, const char ** argv) #ifdef OCIO_DIRECTX_ENABLED if (useDxRenderer) { + // SM_5_0 controls OCIO shader code generation syntax; DxApp always + // compiles to SM 6.0 via DXC regardless of this enum value. shadingLanguage = OCIO::GPU_LANGUAGE_HLSL_SM_5_0; } else From 85d007ff60661de8ff2851f8e493c8389a2a581a Mon Sep 17 00:00:00 2001 From: Eric Renaud-Houde Date: Sat, 18 Apr 2026 21:55:07 -0400 Subject: [PATCH 4/8] Speed up DX12 GPU test backend (~19%) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DX12 test suite was noticeably slower than the OpenGL and Vulkan backends. Profiling the run showed the gap was almost entirely in DXC shader compilation, not in Present, fence waits, or DxcCreateInstance as initially suspected. Three low-risk changes: - Cache IDxcUtils and IDxcCompiler3 as DxApp members instead of recreating them on every setShader() call. The COM instances are thread-safe and perfectly reusable; recreating them per test added no value. - Compile the full-screen-triangle vertex shader exactly once and reuse the bytecode across all tests. The VSMain HLSL is a hard-coded SV_VertexID-driven triangle with no test-specific state — the bytecode is identical every time. Extracted into a new ensureVertexShaderCompiled() helper. This alone eliminated the biggest redundancy (263 duplicate VS compiles). - Present(1, 0) → Present(0, 0). VSync is meaningless for an off-screen test harness that reads back from a float render target. Locally the win shows up mostly in waitForPreviousFrame, which was being throttled by the swap-chain pipeline even on an invisible window. All 263/263 tests still pass; no tolerance changes, no DXIL codegen changes (except for a UTF8 fix), no precision risk. Signed-off-by: Eric Renaud-Houde --- src/libutils/oglapphelpers/dxapp.cpp | 95 +++++++++++++++++++--------- src/libutils/oglapphelpers/dxapp.h | 16 +++++ 2 files changed, 80 insertions(+), 31 deletions(-) diff --git a/src/libutils/oglapphelpers/dxapp.cpp b/src/libutils/oglapphelpers/dxapp.cpp index 4859ba600d..5b70c9b410 100644 --- a/src/libutils/oglapphelpers/dxapp.cpp +++ b/src/libutils/oglapphelpers/dxapp.cpp @@ -531,48 +531,25 @@ void DxApp::setShader(GpuShaderDescRcPtr& shaderDesc) std::cout << std::endl; } - // Compile shaders with DXC (DirectX Shader Compiler) for SM 6.0 - ComPtr dxcUtils; - ComPtr dxcCompiler; - ThrowIfFailed(DxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(&dxcUtils))); - ThrowIfFailed(DxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&dxcCompiler))); + // The DXC compiler instances and the full-screen-triangle VS bytecode are + // cached across tests — both are invariant. + ensureVertexShaderCompiled(); // Create a source blob from the shader string ComPtr sourceBlob; - ThrowIfFailed(dxcUtils->CreateBlobFromPinned( + ThrowIfFailed(m_dxcUtils->CreateBlobFromPinned( fullShader.c_str(), static_cast(fullShader.size()), DXC_CP_UTF8, &sourceBlob)); DxcBuffer sourceBuffer; sourceBuffer.Ptr = sourceBlob->GetBufferPointer(); sourceBuffer.Size = sourceBlob->GetBufferSize(); - sourceBuffer.Encoding = DXC_CP_ACP; - - // Compile vertex shader (vs_6_0) - LPCWSTR vsArgs[] = { L"-T", L"vs_6_0", L"-E", L"VSMain" }; - ComPtr vsResult; - ThrowIfFailed(dxcCompiler->Compile(&sourceBuffer, vsArgs, _countof(vsArgs), - nullptr, IID_PPV_ARGS(&vsResult))); - HRESULT vsHr; - vsResult->GetStatus(&vsHr); - if (FAILED(vsHr)) - { - ComPtr errors; - vsResult->GetOutput(DXC_OUT_ERRORS, IID_PPV_ARGS(&errors), nullptr); - std::ostringstream oss; - oss << "Vertex shader compilation failed (" << HrToString(vsHr) << ")"; - if (errors && errors->GetStringLength()) - oss << ":\n" << errors->GetStringPointer(); - std::cerr << oss.str() << std::endl; - throw Exception(oss.str().c_str()); - } - ComPtr vertexShaderBlob; - ThrowIfFailed(vsResult->GetOutput(DXC_OUT_OBJECT, IID_PPV_ARGS(&vertexShaderBlob), nullptr)); + sourceBuffer.Encoding = DXC_CP_UTF8; // Compile pixel shader (ps_6_0). LPCWSTR psArgs[] = { L"-T", L"ps_6_0", L"-E", L"PSMain" }; ComPtr psResult; - ThrowIfFailed(dxcCompiler->Compile(&sourceBuffer, psArgs, _countof(psArgs), + ThrowIfFailed(m_dxcCompiler->Compile(&sourceBuffer, psArgs, _countof(psArgs), nullptr, IID_PPV_ARGS(&psResult))); HRESULT psHr; psResult->GetStatus(&psHr); @@ -662,7 +639,7 @@ void DxApp::setShader(GpuShaderDescRcPtr& shaderDesc) D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; psoDesc.InputLayout = { nullptr, 0 }; // No vertex input layout (using SV_VertexID) psoDesc.pRootSignature = m_rootSignature.Get(); - psoDesc.VS = { vertexShaderBlob->GetBufferPointer(), vertexShaderBlob->GetBufferSize() }; + psoDesc.VS = { m_vertexShaderBlob->GetBufferPointer(), m_vertexShaderBlob->GetBufferSize() }; psoDesc.PS = { pixelShaderBlob->GetBufferPointer(), pixelShaderBlob->GetBufferSize() }; psoDesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); psoDesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); @@ -917,7 +894,10 @@ void DxApp::redisplay() ID3D12CommandList* ppCommandLists[] = { m_commandList.Get() }; m_commandQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists); - ThrowIfFailed(m_swapChain->Present(1, 0)); + // SyncInterval = 0: no VSync. Tests render to an off-screen float RT and + // read back from it; the swap chain back buffer is never used as a source + // of truth, so there is no reason to wait for a vblank. + ThrowIfFailed(m_swapChain->Present(0, 0)); waitForPreviousFrame(); } @@ -1020,6 +1000,59 @@ void DxApp::printGraphicsInfo() const noexcept } } +void DxApp::ensureVertexShaderCompiled() +{ + if (m_vertexShaderBlob) + return; + + ThrowIfFailed(DxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(&m_dxcUtils))); + ThrowIfFailed(DxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&m_dxcCompiler))); + + // Full-screen triangle using SV_VertexID — no vertex buffer, no bindings, + // identical across every test. Kept inline here so the VS source is + // self-contained and does not depend on the OCIO-generated fragment. + static const char * kVsSource = + "struct VSOutput {\n" + " float4 position : SV_Position;\n" + " float2 texcoord : TEXCOORD0;\n" + "};\n" + "VSOutput VSMain(uint vertexID : SV_VertexID) {\n" + " VSOutput output;\n" + " float2 texcoord = float2((vertexID << 1) & 2, vertexID & 2);\n" + " output.position = float4(texcoord * float2(2, -2) + float2(-1, 1), 0, 1);\n" + " output.texcoord = texcoord;\n" + " return output;\n" + "}\n"; + + const UINT32 vsLen = static_cast(strlen(kVsSource)); + ComPtr vsSourceBlob; + ThrowIfFailed(m_dxcUtils->CreateBlobFromPinned(kVsSource, vsLen, DXC_CP_UTF8, &vsSourceBlob)); + + DxcBuffer vsBuffer; + vsBuffer.Ptr = vsSourceBlob->GetBufferPointer(); + vsBuffer.Size = vsSourceBlob->GetBufferSize(); + vsBuffer.Encoding = DXC_CP_UTF8; + + LPCWSTR vsArgs[] = { L"-T", L"vs_6_0", L"-E", L"VSMain" }; + ComPtr vsResult; + ThrowIfFailed(m_dxcCompiler->Compile(&vsBuffer, vsArgs, _countof(vsArgs), + nullptr, IID_PPV_ARGS(&vsResult))); + HRESULT vsHr; + vsResult->GetStatus(&vsHr); + if (FAILED(vsHr)) + { + ComPtr errors; + vsResult->GetOutput(DXC_OUT_ERRORS, IID_PPV_ARGS(&errors), nullptr); + std::ostringstream oss; + oss << "Vertex shader compilation failed (" << HrToString(vsHr) << ")"; + if (errors && errors->GetStringLength()) + oss << ":\n" << errors->GetStringPointer(); + std::cerr << oss.str() << std::endl; + throw Exception(oss.str().c_str()); + } + ThrowIfFailed(vsResult->GetOutput(DXC_OUT_OBJECT, IID_PPV_ARGS(&m_vertexShaderBlob), nullptr)); +} + void DxApp::waitForPreviousFrame() { // Signal and increment the fence value. diff --git a/src/libutils/oglapphelpers/dxapp.h b/src/libutils/oglapphelpers/dxapp.h index 26f3beb4c4..fc15bac158 100644 --- a/src/libutils/oglapphelpers/dxapp.h +++ b/src/libutils/oglapphelpers/dxapp.h @@ -11,6 +11,10 @@ #include +struct IDxcUtils; +struct IDxcCompiler3; +struct IDxcBlob; + using Microsoft::WRL::ComPtr; namespace OCIO_NAMESPACE @@ -46,6 +50,10 @@ class DxApp : public GraphicalApp private: void waitForPreviousFrame(); + // Compile the (constant) full-screen-triangle vertex shader once and cache + // the blob. Called lazily from setShader(). + void ensureVertexShaderCompiled(); + static const UINT FrameCount = 2; int m_viewportWidth{ 0 }; @@ -99,6 +107,14 @@ class DxApp : public GraphicalApp // Window handle and class name for cleanup. HWND m_hwnd{ nullptr }; std::string m_windowClassName; + + // DXC compiler — created once and reused across every setShader() call. + ComPtr m_dxcUtils; + ComPtr m_dxcCompiler; + + // Full-screen-triangle vertex shader blob — the VS source is identical for + // every test, so compile it once and reuse the bytecode. + ComPtr m_vertexShaderBlob; }; } From 6419c941f4ad323b5fb1d624b668e1e8900e63c2 Mon Sep 17 00:00:00 2001 From: Eric Renaud-Houde Date: Sat, 18 Apr 2026 22:20:39 -0400 Subject: [PATCH 5/8] Several small fixes tidying up the recently-added GPU test infrastructure. - Fix unused-variable warnings (fatal on macOS with warnings-as-errors): guard useDxRenderer and useVulkanRenderer declarations with the same ifdefs as their usage sites. useMetalRenderer stays unconditional because it's referenced on all platforms. - Propagate the MSVC+shared-libs PATH workaround to test_vulkan so it can find OpenColorIO_*.dll at runtime, matching what's already done for test_dx. - Upgrade the dxcompiler.dll detection message from STATUS to WARNING and rewrite it to name OCIO_DIRECTX_ENABLED and offer concrete recovery paths. The previous STATUS message was easy to miss, leaving users with a silent degradation until test_dx failed at runtime. - Rename the OpenGL ctest from test_gpu to test_opengl now that sibling backend-specific tests (test_dx, test_vulkan, test_metal) exist. The test_gpu_exec binary keeps its name since it's backend-agnostic and selects via CLI flags. - Declare OCIO_VULKAN_ENABLED as a first-class CMake option with mark_as_advanced, matching the existing OCIO_DIRECTX_ENABLED. It was previously used in conditionals without ever being declared, so it never appeared as a toggle in ccmake/cmake-gui. - Document both OCIO_DIRECTX_ENABLED and OCIO_VULKAN_ENABLED in docs/quick_start/installation.rst, noting that Vulkan requires an external SDK. Signed-off-by: Eric Renaud-Houde --- CMakeLists.txt | 5 +++++ docs/quick_start/installation.rst | 2 ++ tests/gpu/CMakeLists.txt | 16 +++++++++++----- tests/gpu/GPUUnitTest.cpp | 4 ++++ 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a57649ac9d..e77f9d5165 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -295,6 +295,11 @@ else() endif() mark_as_advanced(OCIO_DIRECTX_ENABLED) +# Vulkan is cross-platform but requires an external SDK, so it is off by +# default; enable explicitly with -DOCIO_VULKAN_ENABLED=ON. +option(OCIO_VULKAN_ENABLED "Enable Vulkan GPU rendering support" OFF) +mark_as_advanced(OCIO_VULKAN_ENABLED) + ############################################################################### # Check for ARM neon here because we need to know if ARM NEON is supported diff --git a/docs/quick_start/installation.rst b/docs/quick_start/installation.rst index d8d208e05c..97e222db4d 100644 --- a/docs/quick_start/installation.rst +++ b/docs/quick_start/installation.rst @@ -287,6 +287,8 @@ Here are the most common OCIO-specific CMake options (the default values are sho - ``-DOCIO_BUILD_TESTS=ON`` (Set to OFF to not build the unit tests) - ``-DOCIO_BUILD_GPU_TESTS=ON`` (Set to OFF to not build the GPU unit tests) - ``-DOCIO_USE_HEADLESS=OFF`` (Set to ON to do headless GPU rendering) +- ``-DOCIO_DIRECTX_ENABLED=ON`` (Windows only; set to OFF to disable the DirectX 12 GPU rendering backend used by ``test_dx``. Forced OFF on non-Windows platforms.) +- ``-DOCIO_VULKAN_ENABLED=OFF`` (Set to ON to enable the Vulkan GPU rendering backend used by ``test_vulkan``. Requires the Vulkan SDK to be installed and findable by CMake.) - ``-DOCIO_WARNING_AS_ERROR=ON`` (Set to OFF to turn off warnings as errors) - ``-DOCIO_BUILD_DOCS=OFF`` (Set to ON to build the documentation) diff --git a/tests/gpu/CMakeLists.txt b/tests/gpu/CMakeLists.txt index 2618bf4cd0..245184a00d 100644 --- a/tests/gpu/CMakeLists.txt +++ b/tests/gpu/CMakeLists.txt @@ -42,7 +42,7 @@ target_link_libraries(test_gpu_exec ) if(OCIO_GL_ENABLED) - add_test(NAME test_gpu COMMAND test_gpu_exec) + add_test(NAME test_opengl COMMAND test_gpu_exec) endif() if(APPLE) add_test(NAME test_metal COMMAND test_gpu_exec -metal) @@ -87,9 +87,12 @@ if(WIN32 AND OCIO_DIRECTX_ENABLED) ) endif() else() - message(STATUS - "dxcompiler.dll not found in Windows SDK Redist/D3D. " - "Add its directory to PATH before running test_dx." + message(WARNING + "OCIO_DIRECTX_ENABLED is ON but dxcompiler.dll was not found in the " + "Windows SDK Redist/D3D path. test_dx will fail at runtime unless " + "dxcompiler.dll and dxil.dll are on PATH. Install the Windows SDK " + "redistributable components, or set -DOCIO_DIRECTX_ENABLED=OFF to " + "disable the DirectX 12 backend." ) endif() endif() @@ -107,10 +110,13 @@ if(MSVC AND BUILD_SHARED_LIBS) set(NEW_PATH "${NEW_PATH}\\\;${GLEW_INCLUDE_DIRS}/../bin") if(OCIO_GL_ENABLED) - set_tests_properties(test_gpu PROPERTIES ENVIRONMENT PATH=${NEW_PATH}) + set_tests_properties(test_opengl PROPERTIES ENVIRONMENT PATH=${NEW_PATH}) endif() if(WIN32 AND OCIO_DIRECTX_ENABLED) set_tests_properties(test_dx PROPERTIES ENVIRONMENT PATH=${NEW_PATH}) endif() + if(OCIO_VULKAN_ENABLED) + set_tests_properties(test_vulkan PROPERTIES ENVIRONMENT PATH=${NEW_PATH}) + endif() endif() diff --git a/tests/gpu/GPUUnitTest.cpp b/tests/gpu/GPUUnitTest.cpp index d225809b7f..a94ae1a794 100644 --- a/tests/gpu/GPUUnitTest.cpp +++ b/tests/gpu/GPUUnitTest.cpp @@ -569,8 +569,12 @@ int main(int argc, const char ** argv) bool printHelp = false; bool useMetalRenderer = false; +#ifdef OCIO_VULKAN_ENABLED bool useVulkanRenderer = false; +#endif +#ifdef OCIO_DIRECTX_ENABLED bool useDxRenderer = false; +#endif bool verbose = false; bool stopOnFirstError = false; From fb53430e927d0f0cc7dae43f07c342a022045245 Mon Sep 17 00:00:00 2001 From: Eric Renaud-Houde Date: Sat, 18 Apr 2026 22:48:38 -0400 Subject: [PATCH 6/8] Integrate DirectX-Headers with OCIO's external-package pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously InstallDirectXHeaders.cmake was included unconditionally from oglapphelpers/CMakeLists.txt, so DirectX-Headers was always fetched from GitHub regardless of whether the user had a local copy installed. There was no way to use a system install, a vendored copy, or an air-gapped build, and the dep didn't respect OCIO_INSTALL_EXT_PACKAGES. DirectX-Headers is now a first-class OCIO dependency, handled the same way as Imath, ZLIB, yaml-cpp, etc.: try find_package first, fall back to FetchContent only if not found and OCIO_INSTALL_EXT_PACKAGES allows it. Changes: - New share/cmake/modules/FindDirectX-Headers.cmake, modeled on FindImath.cmake. - InstallDirectXHeaders.cmake → InstallDirectX-Headers.cmake (the hyphen matches OCIO's Install convention). - oglapphelpers/CMakeLists.txt now calls ocio_handle_dependency(DirectX-Headers ...) with MIN_VERSION 1.606.0 (Windows SDK 22H2 era — old enough to cover most installed copies) and RECOMMENDED_VERSION 1.619.1 (the version OCIO pins and validates). For users: a local DirectX-Headers install can now be supplied via any of the standard CMake mechanisms — -DDirectX-Headers_DIR, -DDirectX-Headers_ROOT, -DDirectX-Headers_INCLUDE_DIR, or globally with -DOCIO_INSTALL_EXT_PACKAGES=NONE to forbid any network fetch. Signed-off-by: Eric Renaud-Houde --- share/cmake/modules/FindDirectX-Headers.cmake | 76 +++++++++++++++++++ ...ers.cmake => InstallDirectX-Headers.cmake} | 8 ++ src/libutils/oglapphelpers/CMakeLists.txt | 6 +- 3 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 share/cmake/modules/FindDirectX-Headers.cmake rename share/cmake/modules/install/{InstallDirectXHeaders.cmake => InstallDirectX-Headers.cmake} (63%) diff --git a/share/cmake/modules/FindDirectX-Headers.cmake b/share/cmake/modules/FindDirectX-Headers.cmake new file mode 100644 index 0000000000..99f2788d4e --- /dev/null +++ b/share/cmake/modules/FindDirectX-Headers.cmake @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright Contributors to the OpenColorIO Project. +# +# Locate DirectX-Headers (header-only, Windows only) +# +# Variables defined by this module: +# DirectX-Headers_FOUND - Indicate whether the package was found or not +# DirectX-Headers_INCLUDE_DIR - Location of the header files +# DirectX-Headers_VERSION - Package version +# +# Global targets defined by this module: +# Microsoft::DirectX-Headers +# +# DirectX-Headers can be supplied by the caller through any of the standard +# CMake mechanisms: +# -- Set -DDirectX-Headers_DIR to the directory containing directx-headers-config.cmake +# -- Set -DDirectX-Headers_ROOT to the install prefix (with include/directx/ underneath) +# -- Set -DDirectX-Headers_INCLUDE_DIR to the directory containing directx/d3d12.h +# +# When OCIO_INSTALL_EXT_PACKAGES is not ALL, this module first tries to locate +# an existing install via the upstream CMake config, then falls back to a +# manual header search. If still not found and OCIO_INSTALL_EXT_PACKAGES is +# MISSING (the default), OCIO's ocio_install_dependency() pathway will invoke +# InstallDirectX-Headers.cmake to build it via FetchContent. +# + +if(NOT OCIO_INSTALL_EXT_PACKAGES STREQUAL ALL) + # Prefer the upstream CMake config (installed as lower-case). + find_package(directx-headers ${DirectX-Headers_FIND_VERSION} CONFIG QUIET) + + if(directx-headers_FOUND) + set(DirectX-Headers_FOUND TRUE) + if(directx-headers_VERSION) + set(DirectX-Headers_VERSION ${directx-headers_VERSION}) + endif() + else() + # Fall back to locating the public header directly (e.g. when the + # headers were installed without the CMake config, or are provided + # by a vendored copy). + find_path(DirectX-Headers_INCLUDE_DIR + NAMES + directx/d3d12.h + HINTS + ${DirectX-Headers_ROOT} + PATH_SUFFIXES + include + ) + endif() + + # If OCIO can install the package itself, demote REQUIRED so a missing + # dependency here does not abort configuration before the install step. + if(OCIO_INSTALL_EXT_PACKAGES STREQUAL MISSING) + set(DirectX-Headers_FIND_REQUIRED FALSE) + endif() + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(DirectX-Headers + REQUIRED_VARS + DirectX-Headers_INCLUDE_DIR + VERSION_VAR + DirectX-Headers_VERSION + ) +endif() + +############################################################################### +### Create target (only needed for the manual-header-search fallback; the +### upstream CMake config already defines Microsoft::DirectX-Headers). + +if(DirectX-Headers_FOUND AND NOT TARGET Microsoft::DirectX-Headers AND DirectX-Headers_INCLUDE_DIR) + add_library(Microsoft::DirectX-Headers INTERFACE IMPORTED GLOBAL) + set_target_properties(Microsoft::DirectX-Headers PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${DirectX-Headers_INCLUDE_DIR}" + ) + + mark_as_advanced(DirectX-Headers_INCLUDE_DIR DirectX-Headers_VERSION) +endif() diff --git a/share/cmake/modules/install/InstallDirectXHeaders.cmake b/share/cmake/modules/install/InstallDirectX-Headers.cmake similarity index 63% rename from share/cmake/modules/install/InstallDirectXHeaders.cmake rename to share/cmake/modules/install/InstallDirectX-Headers.cmake index 7d2fe91d55..af3d25a5f6 100644 --- a/share/cmake/modules/install/InstallDirectXHeaders.cmake +++ b/share/cmake/modules/install/InstallDirectX-Headers.cmake @@ -17,3 +17,11 @@ FetchContent_Declare(DirectX-Headers ) FetchContent_MakeAvailable(DirectX-Headers) + +# Signal success to ocio_install_dependency so ocio_handle_dependency does not +# abort at the next required-check. FetchContent_MakeAvailable has just created +# the Microsoft::DirectX-Headers target via the upstream CMakeLists. +if(TARGET Microsoft::DirectX-Headers) + set(DirectX-Headers_FOUND TRUE) + set(DirectX-Headers_VERSION "1.619.1") +endif() diff --git a/src/libutils/oglapphelpers/CMakeLists.txt b/src/libutils/oglapphelpers/CMakeLists.txt index 0979891c81..88e53c317b 100644 --- a/src/libutils/oglapphelpers/CMakeLists.txt +++ b/src/libutils/oglapphelpers/CMakeLists.txt @@ -163,7 +163,11 @@ if(OCIO_GL_ENABLED) endif() if(WIN32 AND OCIO_DIRECTX_ENABLED) - include(InstallDirectXHeaders) + include(ocio_handle_dependency) + ocio_handle_dependency(DirectX-Headers REQUIRED ALLOW_INSTALL + MIN_VERSION 1.606.0 + RECOMMENDED_VERSION 1.619.1 + RECOMMENDED_VERSION_REASON "Latest version tested with OCIO") target_compile_definitions(oglapphelpers PUBLIC OCIO_DIRECTX_ENABLED From a9e069a3c0e5ee77f151e77ca54358c4ff362129 Mon Sep 17 00:00:00 2001 From: Eric Renaud-Houde Date: Sat, 18 Apr 2026 23:07:19 -0400 Subject: [PATCH 7/8] Improve dxcompiler.dll diagnostics and allow overriding its path Addresses test crashes seen on stuck Windows 10 hosts caused by an old dxcompiler.dll shipped in that host's Windows SDK Redist. - Print the version of the found dxcompiler.dll at configure time so crash reports identify the exact DXC build without follow-up diagnostics. - Emit a standing hint pointing at the DirectX Shader Compiler releases page, which is the documented workaround. - New -DOCIO_DXCOMPILER_DLL= overrides the Windows SDK Redist search, letting users supply a newer DLL pre-build instead of copying it by hand after. - Extracted the DXC-runtime logic into share/cmake/utils/LocateDXCompilerRuntime.cmake so tests/gpu/CMakeLists.txt stays focused on the test target. Signed-off-by: Eric Renaud-Houde --- .../cmake/utils/LocateDXCompilerRuntime.cmake | 79 +++++++++++++++++++ tests/gpu/CMakeLists.txt | 25 +----- 2 files changed, 80 insertions(+), 24 deletions(-) create mode 100644 share/cmake/utils/LocateDXCompilerRuntime.cmake diff --git a/share/cmake/utils/LocateDXCompilerRuntime.cmake b/share/cmake/utils/LocateDXCompilerRuntime.cmake new file mode 100644 index 0000000000..996caa73d8 --- /dev/null +++ b/share/cmake/utils/LocateDXCompilerRuntime.cmake @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright Contributors to the OpenColorIO Project. +# +# Locate the dxcompiler.dll + dxil.dll runtime pair needed to run D3D12 shader +# compilation at test time, and surface their file version. +# +# Inputs: +# OCIO_DXCOMPILER_DLL - Optional user-supplied path to dxcompiler.dll. +# When set, overrides the Windows SDK Redist/D3D +# search. Useful when the SDK-bundled DLL is too old +# (a known cause of crashes on stuck Windows 10 hosts). +# +# Outputs: +# DXCOMPILER_DLL - Path to dxcompiler.dll (cache variable). +# DXIL_DLL - Path to the adjacent dxil.dll (cache variable, may +# be unset if not found next to dxcompiler.dll). + +set(OCIO_DXCOMPILER_DLL "" CACHE FILEPATH + "Optional explicit path to dxcompiler.dll (e.g. from a newer DirectX Shader Compiler release). \ +Overrides the automatic Windows SDK Redist/D3D search." +) + +if(OCIO_DXCOMPILER_DLL) + if(NOT EXISTS "${OCIO_DXCOMPILER_DLL}") + message(FATAL_ERROR "OCIO_DXCOMPILER_DLL=${OCIO_DXCOMPILER_DLL} does not exist.") + endif() + set(DXCOMPILER_DLL "${OCIO_DXCOMPILER_DLL}" CACHE FILEPATH + "Path to dxcompiler.dll (user-supplied via OCIO_DXCOMPILER_DLL)" FORCE) +else() + find_file(DXCOMPILER_DLL + NAMES dxcompiler.dll + PATHS + # Note: x64 hardcoded; update if ARM64 Windows support is needed. + "$ENV{WindowsSdkDir}Redist/D3D/x64" + "C:/Program Files (x86)/Windows Kits/10/Redist/D3D/x64" + NO_DEFAULT_PATH + DOC "Path to dxcompiler.dll from Windows SDK" + ) +endif() + +if(DXCOMPILER_DLL) + get_filename_component(_dxc_dll_dir "${DXCOMPILER_DLL}" DIRECTORY) + find_file(DXIL_DLL + NAMES dxil.dll + HINTS "${_dxc_dll_dir}" + NO_DEFAULT_PATH + ) + + # Report the found dxcompiler.dll version so crash reports can identify + # mismatched or outdated DXC builds without re-running diagnostics. + string(REPLACE "'" "''" _dxc_dll_ps "${DXCOMPILER_DLL}") + execute_process( + COMMAND powershell -NoProfile -Command + "(Get-Item -LiteralPath '${_dxc_dll_ps}').VersionInfo.FileVersion" + OUTPUT_VARIABLE _dxc_version + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET + ) + if(_dxc_version) + message(STATUS "Found dxcompiler.dll (version ${_dxc_version}): ${DXCOMPILER_DLL}") + else() + message(STATUS "Found dxcompiler.dll (version unknown): ${DXCOMPILER_DLL}") + endif() + message(STATUS + "If test_dx crashes during shader compilation, the Windows SDK's dxcompiler.dll " + "may be too old to produce signed DXIL on this system. Replace it with a newer " + "build from https://github.com/microsoft/DirectXShaderCompiler/releases, or set " + "-DOCIO_DXCOMPILER_DLL= to point at a specific dxcompiler.dll." + ) +else() + message(WARNING + "OCIO_DIRECTX_ENABLED is ON but dxcompiler.dll was not found in the " + "Windows SDK Redist/D3D path. test_dx will fail at runtime unless " + "dxcompiler.dll and dxil.dll are on PATH. Install the Windows SDK " + "redistributable components, set -DOCIO_DXCOMPILER_DLL= to supply " + "a specific dxcompiler.dll, or set -DOCIO_DIRECTX_ENABLED=OFF to " + "disable the DirectX 12 backend." + ) +endif() diff --git a/tests/gpu/CMakeLists.txt b/tests/gpu/CMakeLists.txt index 245184a00d..b61b0a863f 100644 --- a/tests/gpu/CMakeLists.txt +++ b/tests/gpu/CMakeLists.txt @@ -56,24 +56,9 @@ endif() # Copy dxcompiler.dll and dxil.dll to the test output directory. # These are required at runtime when DXC (IDxcCompiler3) is used for SM6.0 shader compilation. -# The Redist/D3D path is the stable, version-independent redistribution location. if(WIN32 AND OCIO_DIRECTX_ENABLED) - find_file(DXCOMPILER_DLL - NAMES dxcompiler.dll - PATHS - # Note: x64 hardcoded; update if ARM64 Windows support is needed. - "$ENV{WindowsSdkDir}Redist/D3D/x64" - "C:/Program Files (x86)/Windows Kits/10/Redist/D3D/x64" - NO_DEFAULT_PATH - DOC "Path to dxcompiler.dll from Windows SDK" - ) + include(LocateDXCompilerRuntime) if(DXCOMPILER_DLL) - get_filename_component(_dxc_dll_dir "${DXCOMPILER_DLL}" DIRECTORY) - find_file(DXIL_DLL - NAMES dxil.dll - HINTS "${_dxc_dll_dir}" - NO_DEFAULT_PATH - ) add_custom_command(TARGET test_gpu_exec POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${DXCOMPILER_DLL}" "$" @@ -86,14 +71,6 @@ if(WIN32 AND OCIO_DIRECTX_ENABLED) COMMENT "Copying dxil.dll to test output directory" ) endif() - else() - message(WARNING - "OCIO_DIRECTX_ENABLED is ON but dxcompiler.dll was not found in the " - "Windows SDK Redist/D3D path. test_dx will fail at runtime unless " - "dxcompiler.dll and dxil.dll are on PATH. Install the Windows SDK " - "redistributable components, or set -DOCIO_DIRECTX_ENABLED=OFF to " - "disable the DirectX 12 backend." - ) endif() endif() From c1aaf9c3e3796ae3368e4badfe3d093e5576c4c4 Mon Sep 17 00:00:00 2001 From: Eric Renaud-Houde Date: Sun, 19 Apr 2026 11:58:05 -0400 Subject: [PATCH 8/8] Minor comment tweaks in LocateDXCompilerRuntime.cmake. Signed-off-by: Eric Renaud-Houde --- share/cmake/utils/LocateDXCompilerRuntime.cmake | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/share/cmake/utils/LocateDXCompilerRuntime.cmake b/share/cmake/utils/LocateDXCompilerRuntime.cmake index 996caa73d8..f8d3aacecf 100644 --- a/share/cmake/utils/LocateDXCompilerRuntime.cmake +++ b/share/cmake/utils/LocateDXCompilerRuntime.cmake @@ -7,13 +7,12 @@ # Inputs: # OCIO_DXCOMPILER_DLL - Optional user-supplied path to dxcompiler.dll. # When set, overrides the Windows SDK Redist/D3D -# search. Useful when the SDK-bundled DLL is too old -# (a known cause of crashes on stuck Windows 10 hosts). +# search. Useful when the SDK-bundled DLL is too old. # # Outputs: # DXCOMPILER_DLL - Path to dxcompiler.dll (cache variable). # DXIL_DLL - Path to the adjacent dxil.dll (cache variable, may -# be unset if not found next to dxcompiler.dll). +# be left unset if not found next to dxcompiler.dll). set(OCIO_DXCOMPILER_DLL "" CACHE FILEPATH "Optional explicit path to dxcompiler.dll (e.g. from a newer DirectX Shader Compiler release). \