From 1d43e752289fbd8166c733cec49e16d6e36b10e4 Mon Sep 17 00:00:00 2001 From: Jesse Natalie Date: Wed, 26 Jan 2022 08:31:33 -0800 Subject: [PATCH] d3d12: Add UAV barriers for UAVs that are being used by compute transforms If an indirect arg buffer is being produced by a compute shader, then when we go to consume it as an SSBO in a compute transform pass, we need to insert a UAV barrier to prevent the two dispatches from overlapping. For app dispatches, this is the app's responsibility via explicit barrier APIs, and if they don't, then they're allowed to overlap. Reviewed-by: Sil Vilerino Part-of: --- src/gallium/drivers/d3d12/d3d12_blit.cpp | 6 +++--- .../drivers/d3d12/d3d12_compute_transforms.cpp | 1 + src/gallium/drivers/d3d12/d3d12_context.cpp | 10 +++++----- src/gallium/drivers/d3d12/d3d12_context.h | 2 +- src/gallium/drivers/d3d12/d3d12_draw.cpp | 4 ++-- src/gallium/drivers/d3d12/d3d12_query.cpp | 6 +++--- src/gallium/drivers/d3d12/d3d12_resource.cpp | 4 ++-- .../D3D12ResourceState.cpp | 17 +++++++++++++---- .../resource_state_manager/D3D12ResourceState.h | 5 +++-- 9 files changed, 33 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/d3d12/d3d12_blit.cpp b/src/gallium/drivers/d3d12/d3d12_blit.cpp index 49e9b9e974e..c54ea547f97 100644 --- a/src/gallium/drivers/d3d12/d3d12_blit.cpp +++ b/src/gallium/drivers/d3d12/d3d12_blit.cpp @@ -123,7 +123,7 @@ blit_resolve(struct d3d12_context *ctx, const struct pipe_blit_info *info) D3D12_RESOURCE_STATE_RESOLVE_DEST, D3D12_BIND_INVALIDATE_FULL); - d3d12_apply_resource_states(ctx); + d3d12_apply_resource_states(ctx, false); d3d12_batch_reference_resource(batch, src, false); d3d12_batch_reference_resource(batch, dst, true); @@ -432,7 +432,7 @@ d3d12_direct_copy(struct d3d12_context *ctx, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_BIND_INVALIDATE_FULL); - d3d12_apply_resource_states(ctx); + d3d12_apply_resource_states(ctx, false); d3d12_batch_reference_resource(batch, src, false); d3d12_batch_reference_resource(batch, dst, true); @@ -826,7 +826,7 @@ blit_resolve_stencil(struct d3d12_context *ctx, 0, 1, 0, 1, 1, 1, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_BIND_INVALIDATE_FULL); - d3d12_apply_resource_states(ctx); + d3d12_apply_resource_states(ctx, false); struct d3d12_batch *batch = d3d12_current_batch(ctx); d3d12_batch_reference_resource(batch, d3d12_resource(tmp), false); diff --git a/src/gallium/drivers/d3d12/d3d12_compute_transforms.cpp b/src/gallium/drivers/d3d12/d3d12_compute_transforms.cpp index 19a97bcbf2b..d93e7207dab 100644 --- a/src/gallium/drivers/d3d12/d3d12_compute_transforms.cpp +++ b/src/gallium/drivers/d3d12/d3d12_compute_transforms.cpp @@ -141,6 +141,7 @@ d3d12_get_compute_transform(struct d3d12_context *ctx, const d3d12_compute_trans return NULL; } + data->shader->is_variant = true; entry = _mesa_hash_table_insert(ctx->compute_transform_cache, &data->key, data); assert(entry); } diff --git a/src/gallium/drivers/d3d12/d3d12_context.cpp b/src/gallium/drivers/d3d12/d3d12_context.cpp index 20e7c629c31..72e685a9228 100644 --- a/src/gallium/drivers/d3d12/d3d12_context.cpp +++ b/src/gallium/drivers/d3d12/d3d12_context.cpp @@ -1906,9 +1906,9 @@ d3d12_transition_subresources_state(struct d3d12_context *ctx, } void -d3d12_apply_resource_states(struct d3d12_context *ctx) +d3d12_apply_resource_states(struct d3d12_context *ctx, bool is_implicit_dispatch) { - ctx->resource_state_manager->ApplyAllResourceTransitions(ctx->cmdlist, ctx->fence_value); + ctx->resource_state_manager->ApplyAllResourceTransitions(ctx->cmdlist, ctx->fence_value, is_implicit_dispatch); } static void @@ -1929,7 +1929,7 @@ d3d12_clear_render_target(struct pipe_context *pctx, d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_BIND_INVALIDATE_FULL); - d3d12_apply_resource_states(ctx); + d3d12_apply_resource_states(ctx, false); enum pipe_format format = psurf->texture->format; float clear_color[4]; @@ -1988,7 +1988,7 @@ d3d12_clear_depth_stencil(struct pipe_context *pctx, d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_BIND_INVALIDATE_FULL); - d3d12_apply_resource_states(ctx); + d3d12_apply_resource_states(ctx, false); D3D12_RECT rect = { (int)dstx, (int)dsty, (int)dstx + (int)width, @@ -2057,7 +2057,7 @@ d3d12_flush_resource(struct pipe_context *pctx, d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COMMON, D3D12_BIND_INVALIDATE_FULL); - d3d12_apply_resource_states(ctx); + d3d12_apply_resource_states(ctx, false); } static void diff --git a/src/gallium/drivers/d3d12/d3d12_context.h b/src/gallium/drivers/d3d12/d3d12_context.h index 0b94a863ef2..4913e05c471 100644 --- a/src/gallium/drivers/d3d12/d3d12_context.h +++ b/src/gallium/drivers/d3d12/d3d12_context.h @@ -325,7 +325,7 @@ d3d12_transition_subresources_state(struct d3d12_context *ctx, d3d12_bind_invalidate_option bind_invalidate); void -d3d12_apply_resource_states(struct d3d12_context* ctx); +d3d12_apply_resource_states(struct d3d12_context* ctx, bool is_implicit_dispatch); void d3d12_draw_vbo(struct pipe_context *pctx, diff --git a/src/gallium/drivers/d3d12/d3d12_draw.cpp b/src/gallium/drivers/d3d12/d3d12_draw.cpp index e07ea2ee42e..78f8fab7fa7 100644 --- a/src/gallium/drivers/d3d12/d3d12_draw.cpp +++ b/src/gallium/drivers/d3d12/d3d12_draw.cpp @@ -1174,7 +1174,7 @@ d3d12_draw_vbo(struct pipe_context *pctx, assert(!indirect->count_from_stream_output); } - d3d12_apply_resource_states(ctx); + d3d12_apply_resource_states(ctx, false); for (unsigned i = 0; i < num_root_descriptors; ++i) ctx->cmdlist->SetGraphicsRootDescriptorTable(root_desc_indices[i], root_desc_tables[i]); @@ -1278,7 +1278,7 @@ d3d12_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) int root_desc_indices[MAX_DESCRIPTOR_TABLES]; unsigned num_root_descriptors = update_compute_root_parameters(ctx, info, root_desc_tables, root_desc_indices); - d3d12_apply_resource_states(ctx); + d3d12_apply_resource_states(ctx, ctx->compute_state->is_variant); for (unsigned i = 0; i < num_root_descriptors; ++i) ctx->cmdlist->SetComputeRootDescriptorTable(root_desc_indices[i], root_desc_tables[i]); diff --git a/src/gallium/drivers/d3d12/d3d12_query.cpp b/src/gallium/drivers/d3d12/d3d12_query.cpp index 714efeba1b2..ecebde61d85 100644 --- a/src/gallium/drivers/d3d12/d3d12_query.cpp +++ b/src/gallium/drivers/d3d12/d3d12_query.cpp @@ -377,7 +377,7 @@ end_query(struct d3d12_context *ctx, struct d3d12_query *q) offset += q->buffer_offset + resolve_index * q->query_size; ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, end_index); d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_BIND_INVALIDATE_FULL); - d3d12_apply_resource_states(ctx); + d3d12_apply_resource_states(ctx, false); ctx->cmdlist->ResolveQueryData(q->query_heap, q->d3d12qtype, resolve_index, resolve_count, d3d12_res, offset); @@ -494,13 +494,13 @@ d3d12_render_condition(struct pipe_context *pctx, struct d3d12_resource *res = (struct d3d12_resource *)query->buffer; d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_BIND_INVALIDATE_FULL); d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_BIND_INVALIDATE_NONE); - d3d12_apply_resource_states(ctx); + d3d12_apply_resource_states(ctx, false); ctx->cmdlist->CopyBufferRegion(d3d12_resource_resource(query->predicate), 0, d3d12_resource_resource(res), 0, sizeof(uint64_t)); d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_PREDICATION, D3D12_BIND_INVALIDATE_NONE); - d3d12_apply_resource_states(ctx); + d3d12_apply_resource_states(ctx, false); ctx->current_predication = query->predicate; ctx->predication_condition = condition; diff --git a/src/gallium/drivers/d3d12/d3d12_resource.cpp b/src/gallium/drivers/d3d12/d3d12_resource.cpp index b067d11fd11..ed6e55491f9 100644 --- a/src/gallium/drivers/d3d12/d3d12_resource.cpp +++ b/src/gallium/drivers/d3d12/d3d12_resource.cpp @@ -699,7 +699,7 @@ copy_texture_region(struct d3d12_context *ctx, d3d12_batch_reference_resource(batch, info.dst, true); d3d12_transition_resource_state(ctx, info.src, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_BIND_INVALIDATE_FULL); d3d12_transition_resource_state(ctx, info.dst, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_BIND_INVALIDATE_FULL); - d3d12_apply_resource_states(ctx); + d3d12_apply_resource_states(ctx, false); ctx->cmdlist->CopyTextureRegion(&info.dst_loc, info.dst_x, info.dst_y, info.dst_z, &info.src_loc, info.src_box); } @@ -890,7 +890,7 @@ transfer_buf_to_buf(struct d3d12_context *ctx, assert(src_d3d12 != dst_d3d12); d3d12_transition_resource_state(ctx, src, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_BIND_INVALIDATE_FULL); d3d12_transition_resource_state(ctx, dst, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_BIND_INVALIDATE_FULL); - d3d12_apply_resource_states(ctx); + d3d12_apply_resource_states(ctx, false); ctx->cmdlist->CopyBufferRegion(dst_d3d12, dst_offset, src_d3d12, src_offset, width); diff --git a/src/microsoft/resource_state_manager/D3D12ResourceState.cpp b/src/microsoft/resource_state_manager/D3D12ResourceState.cpp index 5acb948c9bb..5ad24c00965 100644 --- a/src/microsoft/resource_state_manager/D3D12ResourceState.cpp +++ b/src/microsoft/resource_state_manager/D3D12ResourceState.cpp @@ -191,8 +191,9 @@ void ResourceStateManager::TransitionSubresource(TransitionableResourceState& Re } //---------------------------------------------------------------------------------------------------------------------------------- -void ResourceStateManager::ApplyResourceTransitionsPreamble() +void ResourceStateManager::ApplyResourceTransitionsPreamble(bool IsImplicitDispatch) { + m_IsImplicitDispatch = IsImplicitDispatch; m_vResourceBarriers.clear(); } @@ -335,7 +336,15 @@ void ResourceStateManager::ProcessTransitioningSubresourceExplicit( if ( D3D12_RESOURCE_STATE_COMMON == StateIfPromoted ) { - if (TransitionRequired(CurrentLogicalState.State, /*inout*/ after)) + if (CurrentLogicalState.State == D3D12_RESOURCE_STATE_UNORDERED_ACCESS && + after == D3D12_RESOURCE_STATE_UNORDERED_ACCESS && + m_IsImplicitDispatch) + { + D3D12_RESOURCE_BARRIER UAVBarrier = { D3D12_RESOURCE_BARRIER_TYPE_UAV }; + UAVBarrier.UAV.pResource = TransitionDesc.Transition.pResource; + m_vResourceBarriers.push_back(UAVBarrier); + } + else if (TransitionRequired(CurrentLogicalState.State, /*inout*/ after)) { // Insert a single concrete barrier (for non-simultaneous access resources). TransitionDesc.Transition.StateBefore = D3D12_RESOURCE_STATES(CurrentLogicalState.State); @@ -388,9 +397,9 @@ void ResourceStateManager::TransitionSubresource(TransitionableResourceState* pR } //---------------------------------------------------------------------------------------------------------------------------------- -void ResourceStateManager::ApplyAllResourceTransitions(ID3D12GraphicsCommandList *pCommandList, UINT64 ExecutionId) +void ResourceStateManager::ApplyAllResourceTransitions(ID3D12GraphicsCommandList *pCommandList, UINT64 ExecutionId, bool IsImplicitDispatch) { - ApplyResourceTransitionsPreamble(); + ApplyResourceTransitionsPreamble(IsImplicitDispatch); ForEachTransitioningResource([=](TransitionableResourceState& ResourceBase) { diff --git a/src/microsoft/resource_state_manager/D3D12ResourceState.h b/src/microsoft/resource_state_manager/D3D12ResourceState.h index 0c3b49c7cf7..76657b14562 100644 --- a/src/microsoft/resource_state_manager/D3D12ResourceState.h +++ b/src/microsoft/resource_state_manager/D3D12ResourceState.h @@ -211,6 +211,7 @@ protected: struct list_head m_TransitionListHead; std::vector m_vResourceBarriers; + bool m_IsImplicitDispatch; public: ResourceStateManager(); @@ -234,7 +235,7 @@ public: D3D12_RESOURCE_STATES State); // Submit all barriers and queue sync. - void ApplyAllResourceTransitions(ID3D12GraphicsCommandList *pCommandList, UINT64 ExecutionId); + void ApplyAllResourceTransitions(ID3D12GraphicsCommandList *pCommandList, UINT64 ExecutionId, bool IsImplicitDispatch); private: // These methods set the destination state of the resource/subresources and ensure it's in the transition list. @@ -245,7 +246,7 @@ private: D3D12_RESOURCE_STATES State); // Clear out any state from previous iterations. - void ApplyResourceTransitionsPreamble(); + void ApplyResourceTransitionsPreamble(bool IsImplicitDispatch); // What to do with the resource, in the context of the transition list, after processing it. enum class TransitionResult