d3d12: Change memory barrier implementation

Normally, we insert barriers automatically based on bind points. If
a resource is bound as both a shader image/SSBO (UAV) and other read-only
bind points, we'll choose the UAV state. This behavior is modified by
the memory barrier API - if the memory barrier indicates that a read
is desired, previously, we would've just not put any barriers to UAV
states. Now, we just use that state to disambiguate between read states
vs write states for a resource that's bound as both.

This turns out to be problematic in some circumstances, e.g.:
* PBO download, which does a memory barrier afterwards, since it uses
  shader images to write, and those might be accessed immediately as
  a texture afterwards.
* A user-issued draw that writes to a SSBO. This should indicate in the
  state tracker that the SSBO is in the UAV state.
* Some other op, like a copy, that writes to the SSBO.

Before, the "pending memory barrier" state would cause the user draw to
not be put in the UAV state, which means that the copy wouldn't issue a
barrier *out* of the UAV state, resulting in the copy being unsynchronized.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26104>
This commit is contained in:
Jesse Natalie
2023-11-07 13:08:04 -08:00
committed by Marge Bot
parent 3e36be7afe
commit 56589cb02c
4 changed files with 48 additions and 34 deletions

View File

@@ -333,6 +333,7 @@ enum d3d12_transition_flags {
D3D12_TRANSITION_FLAG_NONE = 0,
D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS = 1,
D3D12_TRANSITION_FLAG_ACCUMULATE_STATE = 2,
D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER = 4,
};
void

View File

@@ -326,23 +326,23 @@ fill_image_descriptors(struct d3d12_context *ctx,
unreachable("Unexpected image view dimension");
}
if (!batch->pending_memory_barrier) {
if (res->base.b.target == PIPE_BUFFER) {
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
} else {
unsigned transition_first_layer = view->u.tex.first_layer;
unsigned transition_array_size = array_size;
if (res->base.b.target == PIPE_TEXTURE_3D) {
transition_first_layer = 0;
transition_array_size = 0;
}
d3d12_transition_subresources_state(ctx, res,
view->u.tex.level, 1,
transition_first_layer, transition_array_size,
0, 1,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
d3d12_transition_flags transition_flags = (d3d12_transition_flags)(D3D12_TRANSITION_FLAG_ACCUMULATE_STATE |
(batch->pending_memory_barrier ? D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER : 0));
if (res->base.b.target == PIPE_BUFFER) {
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, transition_flags);
} else {
unsigned transition_first_layer = view->u.tex.first_layer;
unsigned transition_array_size = array_size;
if (res->base.b.target == PIPE_TEXTURE_3D) {
transition_first_layer = 0;
transition_array_size = 0;
}
d3d12_transition_subresources_state(ctx, res,
view->u.tex.level, 1,
transition_first_layer, transition_array_size,
0, 1,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
transition_flags);
}
d3d12_batch_reference_resource(batch, res, true);

View File

@@ -70,17 +70,20 @@ update_subresource_state(D3D12_RESOURCE_STATES *existing_state, D3D12_RESOURCE_S
}
static void
set_desired_resource_state(d3d12_desired_resource_state *state_obj, D3D12_RESOURCE_STATES state)
set_desired_resource_state(d3d12_desired_resource_state *state_obj, D3D12_RESOURCE_STATES state, bool pending_memory_barrier)
{
state_obj->homogenous = true;
state_obj->pending_memory_barrier |= pending_memory_barrier;
update_subresource_state(&state_obj->subresource_states[0], state);
}
static void
set_desired_subresource_state(d3d12_desired_resource_state *state_obj,
uint32_t subresource,
D3D12_RESOURCE_STATES state)
uint32_t subresource,
D3D12_RESOURCE_STATES state,
bool pending_memory_barrier)
{
state_obj->pending_memory_barrier |= pending_memory_barrier;
if (state_obj->homogenous && state_obj->num_subresources > 1) {
for (unsigned i = 1; i < state_obj->num_subresources; ++i) {
state_obj->subresource_states[i] = state_obj->subresource_states[0];
@@ -94,7 +97,8 @@ set_desired_subresource_state(d3d12_desired_resource_state *state_obj,
static void
reset_desired_resource_state(d3d12_desired_resource_state *state_obj)
{
set_desired_resource_state(state_obj, UNKNOWN_RESOURCE_STATE);
set_desired_resource_state(state_obj, UNKNOWN_RESOURCE_STATE, false);
state_obj->pending_memory_barrier = false;
}
bool
@@ -389,7 +393,8 @@ append_barrier(struct d3d12_context *ctx,
d3d12_context_state_table_entry *state_entry,
D3D12_RESOURCE_STATES after,
UINT subresource,
bool is_implicit_dispatch)
bool is_implicit_dispatch,
bool pending_memory_barrier)
{
uint64_t offset;
ID3D12Resource *res = d3d12_bo_get_base(bo, &offset)->res;
@@ -400,13 +405,18 @@ append_barrier(struct d3d12_context *ctx,
transition_desc.Transition.Subresource = subresource;
// This is a transition into a state that is both write and non-write.
// This is invalid according to D3D12. We're venturing into undefined behavior
// land, but let's just pick the write state.
// This is invalid according to D3D12. If there's a pending memory barrier that
// indicates we'll be reading from such a resource, then let the read state win.
// Otherwise, pick the write state.
if (d3d12_is_write_state(after) && (after & ~RESOURCE_STATE_ALL_WRITE_BITS) != 0) {
after &= RESOURCE_STATE_ALL_WRITE_BITS;
if (pending_memory_barrier) {
after &= ~RESOURCE_STATE_ALL_WRITE_BITS;
} else {
after &= RESOURCE_STATE_ALL_WRITE_BITS;
// For now, this is the only way I've seen where this can happen.
assert(after == D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
// For now, this is the only way I've seen where this can happen.
assert(after == D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
}
}
assert((subresource == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES && current_state->homogenous) ||
@@ -466,8 +476,9 @@ d3d12_transition_resource_state(struct d3d12_context *ctx,
d3d12_invalidate_context_bindings(ctx, res);
d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx, res->bo);
bool pending_memory_barrier = (flags & D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER) != 0;
if (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) {
set_desired_resource_state(&state_entry->desired, state);
set_desired_resource_state(&state_entry->desired, state, pending_memory_barrier);
if (ctx->id != D3D12_CONTEXT_NO_ID) {
if ((res->bo->local_needs_resolve_state & (1 << ctx->id)) == 0) {
@@ -479,10 +490,10 @@ d3d12_transition_resource_state(struct d3d12_context *ctx,
_mesa_set_add(ctx->pending_barriers_bos, res->bo);
} else if (state_entry->batch_end.homogenous) {
append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false);
append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false, pending_memory_barrier);
} else {
for (unsigned i = 0; i < state_entry->batch_end.num_subresources; ++i) {
append_barrier(ctx, res->bo, state_entry, state, i, false);
append_barrier(ctx, res->bo, state_entry, state, i, false, pending_memory_barrier);
}
}
}
@@ -502,11 +513,12 @@ d3d12_transition_subresources_state(struct d3d12_context *ctx,
d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx, res->bo);
bool is_whole_resource = num_levels * num_layers * num_planes == state_entry->batch_end.num_subresources;
bool is_accumulate = (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) != 0;
bool pending_memory_barrier = (flags & D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER) != 0;
if (is_whole_resource && is_accumulate) {
set_desired_resource_state(&state_entry->desired, state);
set_desired_resource_state(&state_entry->desired, state, pending_memory_barrier);
} else if (is_whole_resource && state_entry->batch_end.homogenous) {
append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false);
append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false, pending_memory_barrier);
} else {
for (uint32_t l = 0; l < num_levels; l++) {
const uint32_t level = start_level + l;
@@ -518,9 +530,9 @@ d3d12_transition_subresources_state(struct d3d12_context *ctx,
level + (layer * res->mip_levels) + plane * (res->mip_levels * res->base.b.array_size);
assert(subres_id < state_entry->desired.num_subresources);
if (is_accumulate)
set_desired_subresource_state(&state_entry->desired, subres_id, state);
set_desired_subresource_state(&state_entry->desired, subres_id, state, pending_memory_barrier);
else
append_barrier(ctx, res->bo, state_entry, state, subres_id, false);
append_barrier(ctx, res->bo, state_entry, state, subres_id, false, pending_memory_barrier);
}
}
}
@@ -558,7 +570,7 @@ static void apply_resource_state(struct d3d12_context *ctx, bool is_implicit_dis
continue;
}
append_barrier(ctx, bo, state_entry, after, subresource, is_implicit_dispatch);
append_barrier(ctx, bo, state_entry, after, subresource, is_implicit_dispatch, state_entry->desired.pending_memory_barrier);
}
// Update destination states.

View File

@@ -72,6 +72,7 @@ struct d3d12_resource_state
struct d3d12_desired_resource_state
{
bool homogenous;
bool pending_memory_barrier;
uint32_t num_subresources;
D3D12_RESOURCE_STATES* subresource_states;
};