diff --git a/meson.build b/meson.build index 13b1edd8..60afa50d 100644 --- a/meson.build +++ b/meson.build @@ -108,7 +108,7 @@ else endif dxvk_version = vcs_tag( - command: ['git', 'describe', '--dirty=+'], + command: ['git', 'describe', '--dirty=-async'], input: 'version.h.in', output: 'version.h', ) diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index ac85428b..7dc80740 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -4138,7 +4138,7 @@ namespace dxvk { // Retrieve and bind actual Vulkan pipeline handle m_gpActivePipeline = m_state.gp.pipeline->getPipelineHandle( - m_state.gp.state, m_state.om.framebufferInfo.renderPass()); + m_state.gp.state, m_state.om.framebufferInfo.renderPass(), this->checkAsyncCompilationCompat()); if (unlikely(!m_gpActivePipeline)) return false; @@ -4379,7 +4379,7 @@ namespace dxvk { } - void DxvkContext::updateFramebuffer() { + void DxvkContext::updateFramebuffer(bool isDraw) { if (m_flags.test(DxvkContextFlag::GpDirtyFramebuffer)) { m_flags.clr(DxvkContextFlag::GpDirtyFramebuffer); @@ -4401,6 +4401,11 @@ namespace dxvk { m_state.gp.state.omSwizzle[i] = DxvkOmAttachmentSwizzle(mapping); } + if (isDraw) { + for (uint32_t i = 0; i < fbInfo.numAttachments(); i++) + fbInfo.getAttachment(i).view->setRtBindingFrameId(m_device->getCurrentFrameId()); + } + m_flags.set(DxvkContextFlag::GpDirtyPipelineState); } } @@ -4804,7 +4809,7 @@ namespace dxvk { } if (m_flags.test(DxvkContextFlag::GpDirtyFramebuffer)) - this->updateFramebuffer(); + this->updateFramebuffer(true); if (!m_flags.test(DxvkContextFlag::GpRenderPassBound)) this->startRenderPass(); @@ -5218,6 +5223,14 @@ namespace dxvk { return true; } + bool DxvkContext::checkAsyncCompilationCompat() { + bool fbCompat = true; + for (uint32_t i = 0; fbCompat && i < m_state.om.framebufferInfo.numAttachments(); i++) { + const auto& attachment = m_state.om.framebufferInfo.getAttachment(i); + fbCompat &= attachment.view->getRtBindingAsyncCompilationCompat(); + } + return fbCompat; + } DxvkGraphicsPipeline* DxvkContext::lookupGraphicsPipeline( const DxvkGraphicsPipelineShaders& shaders) { diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h index 39c5b989..78ef3b02 100644 --- a/src/dxvk/dxvk_context.h +++ b/src/dxvk/dxvk_context.h @@ -1227,7 +1227,7 @@ namespace dxvk { DxvkFramebufferInfo makeFramebufferInfo( const DxvkRenderTargets& renderTargets); - void updateFramebuffer(); + void updateFramebuffer(bool isDraw = false); void applyRenderTargetLoadLayouts(); @@ -1307,6 +1307,8 @@ namespace dxvk { const Rc& buffer, VkDeviceSize copySize); + bool checkAsyncCompilationCompat(); + DxvkGraphicsPipeline* lookupGraphicsPipeline( const DxvkGraphicsPipelineShaders& shaders); diff --git a/src/dxvk/dxvk_graphics.cpp b/src/dxvk/dxvk_graphics.cpp index 66c4131e..a820502e 100644 --- a/src/dxvk/dxvk_graphics.cpp +++ b/src/dxvk/dxvk_graphics.cpp @@ -62,7 +62,8 @@ namespace dxvk { VkPipeline DxvkGraphicsPipeline::getPipelineHandle( const DxvkGraphicsPipelineStateInfo& state, - const DxvkRenderPass* renderPass) { + const DxvkRenderPass* renderPass, + bool async) { DxvkGraphicsPipelineInstance* instance = this->findInstance(state, renderPass); if (unlikely(!instance)) { @@ -71,34 +72,41 @@ namespace dxvk { return VK_NULL_HANDLE; // Prevent other threads from adding new instances and check again - std::lock_guard lock(m_mutex); + //std::lock_guard lock(m_mutex); instance = this->findInstance(state, renderPass); if (!instance) { // Keep pipeline object locked, at worst we're going to stall // a state cache worker and the current thread needs priority. - instance = this->createInstance(state, renderPass); - this->writePipelineStateToCache(state, renderPass->format()); + if (async && m_pipeMgr->m_compiler != nullptr) + m_pipeMgr->m_compiler->queueCompilation(this, state, renderPass); + else { + instance = this->createInstance(state, renderPass); + this->writePipelineStateToCache(state, renderPass->format()); + } } } + if (!instance) + return VK_NULL_HANDLE; + return instance->pipeline(); } - void DxvkGraphicsPipeline::compilePipeline( + bool DxvkGraphicsPipeline::compilePipeline( const DxvkGraphicsPipelineStateInfo& state, const DxvkRenderPass* renderPass) { // Exit early if the state vector is invalid if (!this->validatePipelineState(state, false)) - return; + return false; // Keep the object locked while compiling a pipeline since compiling // similar pipelines concurrently is fragile on some drivers std::lock_guard lock(m_mutex); - if (!this->findInstance(state, renderPass)) - this->createInstance(state, renderPass); + return (this->findInstance(state, renderPass) == nullptr) && + (this->createInstance(state, renderPass) != nullptr); } @@ -107,6 +115,7 @@ namespace dxvk { const DxvkRenderPass* renderPass) { VkPipeline pipeline = this->createPipeline(state, renderPass); + std::lock_guard lock(m_mutex2); m_pipeMgr->m_numGraphicsPipelines += 1; return &(*m_pipelines.emplace(state, renderPass, pipeline)); } @@ -115,6 +124,7 @@ namespace dxvk { DxvkGraphicsPipelineInstance* DxvkGraphicsPipeline::findInstance( const DxvkGraphicsPipelineStateInfo& state, const DxvkRenderPass* renderPass) { + std::lock_guard lock(m_mutex2); for (auto& instance : m_pipelines) { if (instance.isCompatible(state, renderPass)) return &instance; diff --git a/src/dxvk/dxvk_graphics.h b/src/dxvk/dxvk_graphics.h index 50d16b1e..21274fd6 100644 --- a/src/dxvk/dxvk_graphics.h +++ b/src/dxvk/dxvk_graphics.h @@ -199,11 +199,13 @@ namespace dxvk { * state. If necessary, a new pipeline will be created. * \param [in] state Pipeline state vector * \param [in] renderPass The render pass + * \param [in] async Compile asynchronously * \returns Pipeline handle */ VkPipeline getPipelineHandle( const DxvkGraphicsPipelineStateInfo& state, - const DxvkRenderPass* renderPass); + const DxvkRenderPass* renderPass, + bool async); /** * \brief Compiles a pipeline @@ -212,11 +214,16 @@ namespace dxvk { * and stores the result for future use. * \param [in] state Pipeline state vector * \param [in] renderPass The render pass + * \returns \c true if compile succeeded */ - void compilePipeline( + bool compilePipeline( const DxvkGraphicsPipelineStateInfo& state, const DxvkRenderPass* renderPass); + void writePipelineStateToCache( + const DxvkGraphicsPipelineStateInfo& state, + const DxvkRenderPassFormat& format) const; + private: Rc m_vkd; @@ -236,6 +243,8 @@ namespace dxvk { // List of pipeline instances, shared between threads alignas(CACHE_LINE_SIZE) dxvk::mutex m_mutex; + alignas(CACHE_LINE_SIZE) + dxvk::mutex m_mutex2; sync::List m_pipelines; DxvkGraphicsPipelineInstance* createInstance( @@ -264,10 +273,6 @@ namespace dxvk { const DxvkGraphicsPipelineStateInfo& state, bool trusted) const; - void writePipelineStateToCache( - const DxvkGraphicsPipelineStateInfo& state, - const DxvkRenderPassFormat& format) const; - void logPipelineState( LogLevel level, const DxvkGraphicsPipelineStateInfo& state) const; diff --git a/src/dxvk/dxvk_image.h b/src/dxvk/dxvk_image.h index b3b8a58e..922e1f21 100644 --- a/src/dxvk/dxvk_image.h +++ b/src/dxvk/dxvk_image.h @@ -552,6 +552,37 @@ namespace dxvk { view->imageSubresources()); } + /** + * \brief Sets render target usage frame number + * + * The image view will track internally when + * it was last used as a render target. This + * info is used for async shader compilation. + * \param [in] frameId Frame number + */ + void setRtBindingFrameId(uint32_t frameId) { + if (frameId != m_rtBindingFrameId) { + if (frameId == m_rtBindingFrameId + 1) + m_rtBindingFrameCount += 1; + else + m_rtBindingFrameCount = 0; + + m_rtBindingFrameId = frameId; + } + } + + /** + * \brief Checks for async pipeline compatibility + * + * Asynchronous pipeline compilation may be enabled if the + * render target has been drawn to in the previous frames. + * \param [in] frameId Current frame ID + * \returns \c true if async compilation is supported + */ + bool getRtBindingAsyncCompilationCompat() const { + return m_rtBindingFrameCount >= 5; + } + private: Rc m_vkd; @@ -564,6 +595,9 @@ namespace dxvk { static std::atomic s_cookie; + uint32_t m_rtBindingFrameId = 0; + uint32_t m_rtBindingFrameCount = 0; + void createView(VkImageViewType type, uint32_t numLayers); }; diff --git a/src/dxvk/dxvk_options.cpp b/src/dxvk/dxvk_options.cpp index ea82eedc..46f9ff48 100644 --- a/src/dxvk/dxvk_options.cpp +++ b/src/dxvk/dxvk_options.cpp @@ -9,6 +9,8 @@ namespace dxvk { useRawSsbo = config.getOption("dxvk.useRawSsbo", Tristate::Auto); shrinkNvidiaHvvHeap = config.getOption("dxvk.shrinkNvidiaHvvHeap", Tristate::Auto); hud = config.getOption("dxvk.hud", ""); + enableAsync = config.getOption ("dxvk.enableAsync", false); + numAsyncThreads = config.getOption ("dxvk.numAsyncThreads", 0); } } diff --git a/src/dxvk/dxvk_options.h b/src/dxvk/dxvk_options.h index 992ec764..6398e9d1 100644 --- a/src/dxvk/dxvk_options.h +++ b/src/dxvk/dxvk_options.h @@ -18,6 +18,13 @@ namespace dxvk { /// when using the state cache int32_t numCompilerThreads; + // Enable async pipelines + bool enableAsync; + + /// Number of compiler threads + /// when using async pipelines + int32_t numAsyncThreads; + /// Shader-related options Tristate useRawSsbo; diff --git a/src/dxvk/dxvk_pipecompiler.cpp b/src/dxvk/dxvk_pipecompiler.cpp new file mode 100644 index 00000000..40218acd --- /dev/null +++ b/src/dxvk/dxvk_pipecompiler.cpp @@ -0,0 +1,76 @@ +#include "dxvk_device.h" +#include "dxvk_graphics.h" +#include "dxvk_pipecompiler.h" + +namespace dxvk { + + DxvkPipelineCompiler::DxvkPipelineCompiler(const DxvkDevice* device) { + uint32_t numCpuCores = dxvk::thread::hardware_concurrency(); + uint32_t numWorkers = ((std::max(1u, numCpuCores) - 1) * 5) / 7; + + if (numWorkers < 1) numWorkers = 1; + if (numWorkers > 32) numWorkers = 32; + + if (device->config().numAsyncThreads > 0) + numWorkers = device->config().numAsyncThreads; + + Logger::info(str::format("DXVK: Using ", numWorkers, " async compiler threads")); + + // Start the compiler threads + m_compilerThreads.resize(numWorkers); + + for (uint32_t i = 0; i < numWorkers; i++) { + m_compilerThreads.at(i) = dxvk::thread( + [this] { this->runCompilerThread(); }); + } + } + + + DxvkPipelineCompiler::~DxvkPipelineCompiler() { + { std::lock_guard lock(m_compilerLock); + m_compilerStop.store(true); + } + + m_compilerCond.notify_all(); + for (auto& thread : m_compilerThreads) + thread.join(); + } + + + void DxvkPipelineCompiler::queueCompilation( + DxvkGraphicsPipeline* pipeline, + const DxvkGraphicsPipelineStateInfo& state, + const DxvkRenderPass* renderPass) { + std::lock_guard lock(m_compilerLock); + m_compilerQueue.push({ pipeline, state, renderPass }); + m_compilerCond.notify_one(); + } + + + void DxvkPipelineCompiler::runCompilerThread() { + env::setThreadName("dxvk-pcompiler"); + + while (!m_compilerStop.load()) { + PipelineEntry entry; + + { std::unique_lock lock(m_compilerLock); + + m_compilerCond.wait(lock, [this] { + return m_compilerStop.load() + || m_compilerQueue.size() != 0; + }); + + if (m_compilerQueue.size() != 0) { + entry = std::move(m_compilerQueue.front()); + m_compilerQueue.pop(); + } + } + + if (entry.pipeline != nullptr && entry.renderPass != nullptr && + entry.pipeline->compilePipeline(entry.state, entry.renderPass)) { + entry.pipeline->writePipelineStateToCache(entry.state, entry.renderPass->format()); + } + } + } + +} diff --git a/src/dxvk/dxvk_pipecompiler.h b/src/dxvk/dxvk_pipecompiler.h new file mode 100644 index 00000000..d7fcc2cf --- /dev/null +++ b/src/dxvk/dxvk_pipecompiler.h @@ -0,0 +1,61 @@ +#pragma once + +#include +#include +#include +#include + +#include "../util/thread.h" +#include "dxvk_include.h" + +namespace dxvk { + + class DxvkDevice; + class DxvkGraphicsPipeline; + class DxvkGraphicsPipelineStateInfo; + + /** + * \brief Pipeline compiler + * + * Asynchronous pipeline compiler + */ + class DxvkPipelineCompiler : public RcObject { + + public: + + DxvkPipelineCompiler(const DxvkDevice* device); + ~DxvkPipelineCompiler(); + + /** + * \brief Compiles a pipeline asynchronously + * + * This should be used to compile graphics + * pipeline instances asynchronously. + * \param [in] pipeline The pipeline object + * \param [in] state The pipeline state info object + * \param [in] renderPass + */ + void queueCompilation( + DxvkGraphicsPipeline* pipeline, + const DxvkGraphicsPipelineStateInfo& state, + const DxvkRenderPass* renderPass); + + private: + + struct PipelineEntry { + DxvkGraphicsPipeline* pipeline = nullptr; + DxvkGraphicsPipelineStateInfo state; + const DxvkRenderPass* renderPass = nullptr; + }; + + std::atomic m_compilerStop = { false }; + std::mutex m_compilerLock; + std::condition_variable m_compilerCond; + std::queue m_compilerQueue; + std::vector m_compilerThreads; + + void runCompilerThread(); + + }; + +} diff --git a/src/dxvk/dxvk_pipemanager.cpp b/src/dxvk/dxvk_pipemanager.cpp index 0e112e51..afc930ef 100644 --- a/src/dxvk/dxvk_pipemanager.cpp +++ b/src/dxvk/dxvk_pipemanager.cpp @@ -9,7 +9,11 @@ namespace dxvk { DxvkRenderPassPool* passManager) : m_device (device), m_cache (new DxvkPipelineCache(device->vkd())) { + std::string useAsync = env::getEnvVar("DXVK_ASYNC"); std::string useStateCache = env::getEnvVar("DXVK_STATE_CACHE"); + + if (useAsync == "1" || device->config().enableAsync) + m_compiler = new DxvkPipelineCompiler(device); if (useStateCache != "0" && device->config().enableStateCache) m_stateCache = new DxvkStateCache(device, this, passManager); diff --git a/src/dxvk/dxvk_pipemanager.h b/src/dxvk/dxvk_pipemanager.h index f32a8913..533042ca 100644 --- a/src/dxvk/dxvk_pipemanager.h +++ b/src/dxvk/dxvk_pipemanager.h @@ -6,6 +6,7 @@ #include "dxvk_compute.h" #include "dxvk_graphics.h" +#include "dxvk_pipecompiler.h" namespace dxvk { @@ -100,6 +101,7 @@ namespace dxvk { const DxvkDevice* m_device; Rc m_cache; Rc m_stateCache; + Rc m_compiler; std::atomic m_numComputePipelines = { 0 }; std::atomic m_numGraphicsPipelines = { 0 }; diff --git a/src/dxvk/meson.build b/src/dxvk/meson.build index 3153c8cf..9cfd681b 100644 --- a/src/dxvk/meson.build +++ b/src/dxvk/meson.build @@ -90,6 +90,7 @@ dxvk_src = files([ 'dxvk_openxr.cpp', 'dxvk_options.cpp', 'dxvk_pipecache.cpp', + 'dxvk_pipecompiler.cpp', 'dxvk_pipelayout.cpp', 'dxvk_pipemanager.cpp', 'dxvk_queue.cpp',