d3d12: Change memory barrier implementation
Normally, we insert barriers automatically based on bind points. If a resource is bound as both a shader image/SSBO (UAV) and other read-only bind points, we'll choose the UAV state. This behavior is modified by the memory barrier API - if the memory barrier indicates that a read is desired, previously, we would've just not put any barriers to UAV states. Now, we just use that state to disambiguate between read states vs write states for a resource that's bound as both. This turns out to be problematic in some circumstances, e.g.: * PBO download, which does a memory barrier afterwards, since it uses shader images to write, and those might be accessed immediately as a texture afterwards. * A user-issued draw that writes to a SSBO. This should indicate in the state tracker that the SSBO is in the UAV state. * Some other op, like a copy, that writes to the SSBO. Before, the "pending memory barrier" state would cause the user draw to not be put in the UAV state, which means that the copy wouldn't issue a barrier *out* of the UAV state, resulting in the copy being unsynchronized. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26104>
This commit is contained in:
@@ -333,6 +333,7 @@ enum d3d12_transition_flags {
|
|||||||
D3D12_TRANSITION_FLAG_NONE = 0,
|
D3D12_TRANSITION_FLAG_NONE = 0,
|
||||||
D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS = 1,
|
D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS = 1,
|
||||||
D3D12_TRANSITION_FLAG_ACCUMULATE_STATE = 2,
|
D3D12_TRANSITION_FLAG_ACCUMULATE_STATE = 2,
|
||||||
|
D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER = 4,
|
||||||
};
|
};
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@@ -326,23 +326,23 @@ fill_image_descriptors(struct d3d12_context *ctx,
|
|||||||
unreachable("Unexpected image view dimension");
|
unreachable("Unexpected image view dimension");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!batch->pending_memory_barrier) {
|
d3d12_transition_flags transition_flags = (d3d12_transition_flags)(D3D12_TRANSITION_FLAG_ACCUMULATE_STATE |
|
||||||
if (res->base.b.target == PIPE_BUFFER) {
|
(batch->pending_memory_barrier ? D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER : 0));
|
||||||
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
|
if (res->base.b.target == PIPE_BUFFER) {
|
||||||
} else {
|
d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, transition_flags);
|
||||||
unsigned transition_first_layer = view->u.tex.first_layer;
|
} else {
|
||||||
unsigned transition_array_size = array_size;
|
unsigned transition_first_layer = view->u.tex.first_layer;
|
||||||
if (res->base.b.target == PIPE_TEXTURE_3D) {
|
unsigned transition_array_size = array_size;
|
||||||
transition_first_layer = 0;
|
if (res->base.b.target == PIPE_TEXTURE_3D) {
|
||||||
transition_array_size = 0;
|
transition_first_layer = 0;
|
||||||
}
|
transition_array_size = 0;
|
||||||
d3d12_transition_subresources_state(ctx, res,
|
|
||||||
view->u.tex.level, 1,
|
|
||||||
transition_first_layer, transition_array_size,
|
|
||||||
0, 1,
|
|
||||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
|
||||||
D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
|
|
||||||
}
|
}
|
||||||
|
d3d12_transition_subresources_state(ctx, res,
|
||||||
|
view->u.tex.level, 1,
|
||||||
|
transition_first_layer, transition_array_size,
|
||||||
|
0, 1,
|
||||||
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
||||||
|
transition_flags);
|
||||||
}
|
}
|
||||||
d3d12_batch_reference_resource(batch, res, true);
|
d3d12_batch_reference_resource(batch, res, true);
|
||||||
|
|
||||||
|
@@ -70,17 +70,20 @@ update_subresource_state(D3D12_RESOURCE_STATES *existing_state, D3D12_RESOURCE_S
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
set_desired_resource_state(d3d12_desired_resource_state *state_obj, D3D12_RESOURCE_STATES state)
|
set_desired_resource_state(d3d12_desired_resource_state *state_obj, D3D12_RESOURCE_STATES state, bool pending_memory_barrier)
|
||||||
{
|
{
|
||||||
state_obj->homogenous = true;
|
state_obj->homogenous = true;
|
||||||
|
state_obj->pending_memory_barrier |= pending_memory_barrier;
|
||||||
update_subresource_state(&state_obj->subresource_states[0], state);
|
update_subresource_state(&state_obj->subresource_states[0], state);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
set_desired_subresource_state(d3d12_desired_resource_state *state_obj,
|
set_desired_subresource_state(d3d12_desired_resource_state *state_obj,
|
||||||
uint32_t subresource,
|
uint32_t subresource,
|
||||||
D3D12_RESOURCE_STATES state)
|
D3D12_RESOURCE_STATES state,
|
||||||
|
bool pending_memory_barrier)
|
||||||
{
|
{
|
||||||
|
state_obj->pending_memory_barrier |= pending_memory_barrier;
|
||||||
if (state_obj->homogenous && state_obj->num_subresources > 1) {
|
if (state_obj->homogenous && state_obj->num_subresources > 1) {
|
||||||
for (unsigned i = 1; i < state_obj->num_subresources; ++i) {
|
for (unsigned i = 1; i < state_obj->num_subresources; ++i) {
|
||||||
state_obj->subresource_states[i] = state_obj->subresource_states[0];
|
state_obj->subresource_states[i] = state_obj->subresource_states[0];
|
||||||
@@ -94,7 +97,8 @@ set_desired_subresource_state(d3d12_desired_resource_state *state_obj,
|
|||||||
static void
|
static void
|
||||||
reset_desired_resource_state(d3d12_desired_resource_state *state_obj)
|
reset_desired_resource_state(d3d12_desired_resource_state *state_obj)
|
||||||
{
|
{
|
||||||
set_desired_resource_state(state_obj, UNKNOWN_RESOURCE_STATE);
|
set_desired_resource_state(state_obj, UNKNOWN_RESOURCE_STATE, false);
|
||||||
|
state_obj->pending_memory_barrier = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
@@ -389,7 +393,8 @@ append_barrier(struct d3d12_context *ctx,
|
|||||||
d3d12_context_state_table_entry *state_entry,
|
d3d12_context_state_table_entry *state_entry,
|
||||||
D3D12_RESOURCE_STATES after,
|
D3D12_RESOURCE_STATES after,
|
||||||
UINT subresource,
|
UINT subresource,
|
||||||
bool is_implicit_dispatch)
|
bool is_implicit_dispatch,
|
||||||
|
bool pending_memory_barrier)
|
||||||
{
|
{
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
ID3D12Resource *res = d3d12_bo_get_base(bo, &offset)->res;
|
ID3D12Resource *res = d3d12_bo_get_base(bo, &offset)->res;
|
||||||
@@ -400,13 +405,18 @@ append_barrier(struct d3d12_context *ctx,
|
|||||||
transition_desc.Transition.Subresource = subresource;
|
transition_desc.Transition.Subresource = subresource;
|
||||||
|
|
||||||
// This is a transition into a state that is both write and non-write.
|
// This is a transition into a state that is both write and non-write.
|
||||||
// This is invalid according to D3D12. We're venturing into undefined behavior
|
// This is invalid according to D3D12. If there's a pending memory barrier that
|
||||||
// land, but let's just pick the write state.
|
// indicates we'll be reading from such a resource, then let the read state win.
|
||||||
|
// Otherwise, pick the write state.
|
||||||
if (d3d12_is_write_state(after) && (after & ~RESOURCE_STATE_ALL_WRITE_BITS) != 0) {
|
if (d3d12_is_write_state(after) && (after & ~RESOURCE_STATE_ALL_WRITE_BITS) != 0) {
|
||||||
after &= RESOURCE_STATE_ALL_WRITE_BITS;
|
if (pending_memory_barrier) {
|
||||||
|
after &= ~RESOURCE_STATE_ALL_WRITE_BITS;
|
||||||
|
} else {
|
||||||
|
after &= RESOURCE_STATE_ALL_WRITE_BITS;
|
||||||
|
|
||||||
// For now, this is the only way I've seen where this can happen.
|
// For now, this is the only way I've seen where this can happen.
|
||||||
assert(after == D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
assert(after == D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert((subresource == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES && current_state->homogenous) ||
|
assert((subresource == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES && current_state->homogenous) ||
|
||||||
@@ -466,8 +476,9 @@ d3d12_transition_resource_state(struct d3d12_context *ctx,
|
|||||||
d3d12_invalidate_context_bindings(ctx, res);
|
d3d12_invalidate_context_bindings(ctx, res);
|
||||||
|
|
||||||
d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx, res->bo);
|
d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx, res->bo);
|
||||||
|
bool pending_memory_barrier = (flags & D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER) != 0;
|
||||||
if (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) {
|
if (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) {
|
||||||
set_desired_resource_state(&state_entry->desired, state);
|
set_desired_resource_state(&state_entry->desired, state, pending_memory_barrier);
|
||||||
|
|
||||||
if (ctx->id != D3D12_CONTEXT_NO_ID) {
|
if (ctx->id != D3D12_CONTEXT_NO_ID) {
|
||||||
if ((res->bo->local_needs_resolve_state & (1 << ctx->id)) == 0) {
|
if ((res->bo->local_needs_resolve_state & (1 << ctx->id)) == 0) {
|
||||||
@@ -479,10 +490,10 @@ d3d12_transition_resource_state(struct d3d12_context *ctx,
|
|||||||
_mesa_set_add(ctx->pending_barriers_bos, res->bo);
|
_mesa_set_add(ctx->pending_barriers_bos, res->bo);
|
||||||
|
|
||||||
} else if (state_entry->batch_end.homogenous) {
|
} else if (state_entry->batch_end.homogenous) {
|
||||||
append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false);
|
append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false, pending_memory_barrier);
|
||||||
} else {
|
} else {
|
||||||
for (unsigned i = 0; i < state_entry->batch_end.num_subresources; ++i) {
|
for (unsigned i = 0; i < state_entry->batch_end.num_subresources; ++i) {
|
||||||
append_barrier(ctx, res->bo, state_entry, state, i, false);
|
append_barrier(ctx, res->bo, state_entry, state, i, false, pending_memory_barrier);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -502,11 +513,12 @@ d3d12_transition_subresources_state(struct d3d12_context *ctx,
|
|||||||
d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx, res->bo);
|
d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx, res->bo);
|
||||||
bool is_whole_resource = num_levels * num_layers * num_planes == state_entry->batch_end.num_subresources;
|
bool is_whole_resource = num_levels * num_layers * num_planes == state_entry->batch_end.num_subresources;
|
||||||
bool is_accumulate = (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) != 0;
|
bool is_accumulate = (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) != 0;
|
||||||
|
bool pending_memory_barrier = (flags & D3D12_TRANSITION_FLAG_PENDING_MEMORY_BARRIER) != 0;
|
||||||
|
|
||||||
if (is_whole_resource && is_accumulate) {
|
if (is_whole_resource && is_accumulate) {
|
||||||
set_desired_resource_state(&state_entry->desired, state);
|
set_desired_resource_state(&state_entry->desired, state, pending_memory_barrier);
|
||||||
} else if (is_whole_resource && state_entry->batch_end.homogenous) {
|
} else if (is_whole_resource && state_entry->batch_end.homogenous) {
|
||||||
append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false);
|
append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false, pending_memory_barrier);
|
||||||
} else {
|
} else {
|
||||||
for (uint32_t l = 0; l < num_levels; l++) {
|
for (uint32_t l = 0; l < num_levels; l++) {
|
||||||
const uint32_t level = start_level + l;
|
const uint32_t level = start_level + l;
|
||||||
@@ -518,9 +530,9 @@ d3d12_transition_subresources_state(struct d3d12_context *ctx,
|
|||||||
level + (layer * res->mip_levels) + plane * (res->mip_levels * res->base.b.array_size);
|
level + (layer * res->mip_levels) + plane * (res->mip_levels * res->base.b.array_size);
|
||||||
assert(subres_id < state_entry->desired.num_subresources);
|
assert(subres_id < state_entry->desired.num_subresources);
|
||||||
if (is_accumulate)
|
if (is_accumulate)
|
||||||
set_desired_subresource_state(&state_entry->desired, subres_id, state);
|
set_desired_subresource_state(&state_entry->desired, subres_id, state, pending_memory_barrier);
|
||||||
else
|
else
|
||||||
append_barrier(ctx, res->bo, state_entry, state, subres_id, false);
|
append_barrier(ctx, res->bo, state_entry, state, subres_id, false, pending_memory_barrier);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -558,7 +570,7 @@ static void apply_resource_state(struct d3d12_context *ctx, bool is_implicit_dis
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
append_barrier(ctx, bo, state_entry, after, subresource, is_implicit_dispatch);
|
append_barrier(ctx, bo, state_entry, after, subresource, is_implicit_dispatch, state_entry->desired.pending_memory_barrier);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update destination states.
|
// Update destination states.
|
||||||
|
@@ -72,6 +72,7 @@ struct d3d12_resource_state
|
|||||||
struct d3d12_desired_resource_state
|
struct d3d12_desired_resource_state
|
||||||
{
|
{
|
||||||
bool homogenous;
|
bool homogenous;
|
||||||
|
bool pending_memory_barrier;
|
||||||
uint32_t num_subresources;
|
uint32_t num_subresources;
|
||||||
D3D12_RESOURCE_STATES* subresource_states;
|
D3D12_RESOURCE_STATES* subresource_states;
|
||||||
};
|
};
|
||||||
|
Reference in New Issue
Block a user