mesa: Make atomic lowering put atomics above SSBOs.

Gallium arbitrarily (it seems) put atomics below SSBOs, resulting in a
bunch of extra index management, and surprising shader code when you would
see your SSBOs up at index 16.  It makes a lot more sense to see atomics
converted to SSBOs appear as magic high numbers.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3240>
This commit is contained in:
Eric Anholt
2019-12-20 09:02:07 -08:00
parent 2dc2055157
commit 10dc4ac4c5
8 changed files with 48 additions and 86 deletions

View File

@@ -32,17 +32,13 @@
#endif #endif
/* /*
* Remap atomic counters to SSBOs. Atomic counters get remapped to * Remap atomic counters to SSBOs, starting from the passed in ssbo_offset.
* SSBO binding points [0..ssbo_offset) and the original SSBOs are
* remapped to [ssbo_offset..n) (mostly to align with what mesa/st
* does.
*/ */
static bool static bool
lower_instr(nir_intrinsic_instr *instr, unsigned ssbo_offset, nir_builder *b) lower_instr(nir_intrinsic_instr *instr, unsigned ssbo_offset, nir_builder *b)
{ {
nir_intrinsic_op op; nir_intrinsic_op op;
int idx_src;
b->cursor = nir_before_instr(&instr->instr); b->cursor = nir_before_instr(&instr->instr);
@@ -54,32 +50,6 @@ lower_instr(nir_intrinsic_instr *instr, unsigned ssbo_offset, nir_builder *b)
instr->intrinsic = nir_intrinsic_memory_barrier_buffer; instr->intrinsic = nir_intrinsic_memory_barrier_buffer;
return true; return true;
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_ssbo_atomic_umin:
case nir_intrinsic_ssbo_atomic_imax:
case nir_intrinsic_ssbo_atomic_umax:
case nir_intrinsic_ssbo_atomic_and:
case nir_intrinsic_ssbo_atomic_or:
case nir_intrinsic_ssbo_atomic_xor:
case nir_intrinsic_ssbo_atomic_exchange:
case nir_intrinsic_ssbo_atomic_comp_swap:
case nir_intrinsic_ssbo_atomic_fadd:
case nir_intrinsic_ssbo_atomic_fmin:
case nir_intrinsic_ssbo_atomic_fmax:
case nir_intrinsic_ssbo_atomic_fcomp_swap:
case nir_intrinsic_store_ssbo:
case nir_intrinsic_load_ssbo:
case nir_intrinsic_get_buffer_size:
/* easy case, keep same opcode and just remap SSBO buffer index: */
op = instr->intrinsic;
idx_src = (op == nir_intrinsic_store_ssbo) ? 1 : 0;
nir_ssa_def *old_idx = nir_ssa_for_src(b, instr->src[idx_src], 1);
nir_ssa_def *new_idx = nir_iadd(b, old_idx, nir_imm_int(b, ssbo_offset));
nir_instr_rewrite_src(&instr->instr,
&instr->src[idx_src],
nir_src_for_ssa(new_idx));
return true;
case nir_intrinsic_atomic_counter_inc: case nir_intrinsic_atomic_counter_inc:
case nir_intrinsic_atomic_counter_add: case nir_intrinsic_atomic_counter_add:
case nir_intrinsic_atomic_counter_pre_dec: case nir_intrinsic_atomic_counter_pre_dec:
@@ -115,7 +85,7 @@ lower_instr(nir_intrinsic_instr *instr, unsigned ssbo_offset, nir_builder *b)
return false; return false;
} }
nir_ssa_def *buffer = nir_imm_int(b, nir_intrinsic_base(instr)); nir_ssa_def *buffer = nir_imm_int(b, ssbo_offset + nir_intrinsic_base(instr));
nir_ssa_def *temp = NULL; nir_ssa_def *temp = NULL;
nir_intrinsic_instr *new_instr = nir_intrinsic_instr *new_instr =
nir_intrinsic_instr_create(ralloc_parent(instr), op); nir_intrinsic_instr_create(ralloc_parent(instr), op);
@@ -231,7 +201,21 @@ nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned ssbo_offset)
snprintf(name, sizeof(name), "counter%d", var->data.binding); snprintf(name, sizeof(name), "counter%d", var->data.binding);
ssbo = nir_variable_create(shader, nir_var_mem_ssbo, type, name); ssbo = nir_variable_create(shader, nir_var_mem_ssbo, type, name);
ssbo->data.binding = var->data.binding; ssbo->data.binding = ssbo_offset + var->data.binding;
/* We can't use num_abos, because it only represents the number of
* active atomic counters, and currently unlike SSBO's they aren't
* compacted so num_abos actually isn't a bound on the index passed
* to nir_intrinsic_atomic_counter_*. e.g. if we have a single atomic
* counter declared like:
*
* layout(binding=1) atomic_uint counter0;
*
* then when we lower accesses to it the atomic_counter_* intrinsics
* will have 1 as the index but num_abos will still be 1.
*/
shader->info.num_ssbos = MAX2(shader->info.num_ssbos,
ssbo->data.binding + 1);
struct glsl_struct_field field = { struct glsl_struct_field field = {
.type = type, .type = type,
@@ -247,25 +231,6 @@ nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned ssbo_offset)
} }
} }
/* Make sure that shader->info.num_ssbos still reflects the maximum SSBO
* index that can be used in the shader.
*/
if (shader->info.num_ssbos > 0) {
shader->info.num_ssbos += ssbo_offset;
} else {
/* We can't use num_abos, because it only represents the number of
* active atomic counters, and currently unlike SSBO's they aren't
* compacted so num_abos actually isn't a bound on the index passed
* to nir_intrinsic_atomic_counter_*. e.g. if we have a single atomic
* counter declared like:
*
* layout(binding=1) atomic_uint counter0;
*
* then when we lower accesses to it the atomic_counter_* intrinsics
* will have 1 as the index but num_abos will still be 1.
* */
shader->info.num_ssbos = util_last_bit(replaced);
}
shader->info.num_abos = 0; shader->info.num_abos = 0;
} }

View File

@@ -333,7 +333,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
NIR_PASS_V(prog->nir, gl_nir_lower_atomics, shProg, false); NIR_PASS_V(prog->nir, gl_nir_lower_atomics, shProg, false);
NIR_PASS_V(prog->nir, nir_lower_atomics_to_ssbo, NIR_PASS_V(prog->nir, nir_lower_atomics_to_ssbo,
prog->nir->info.num_abos); prog->nir->info.num_ssbos);
nir_sweep(prog->nir); nir_sweep(prog->nir);

View File

@@ -1374,9 +1374,9 @@ brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_program *prog,
ISL_FORMAT_R32G32B32A32_FLOAT, 0); ISL_FORMAT_R32G32B32A32_FLOAT, 0);
} }
uint32_t *abo_surf_offsets = uint32_t *ssbo_surf_offsets =
&stage_state->surf_offset[prog_data->binding_table.ssbo_start]; &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
uint32_t *ssbo_surf_offsets = abo_surf_offsets + prog->info.num_abos; uint32_t *abo_surf_offsets = ssbo_surf_offsets + prog->info.num_ssbos;
for (int i = 0; i < prog->info.num_abos; i++) { for (int i = 0; i < prog->info.num_abos; i++) {
struct gl_buffer_binding *binding = struct gl_buffer_binding *binding =

View File

@@ -66,13 +66,19 @@ st_binding_to_sb(struct gl_buffer_binding *binding,
static void static void
st_bind_atomics(struct st_context *st, struct gl_program *prog, st_bind_atomics(struct st_context *st, struct gl_program *prog,
enum pipe_shader_type shader_type) gl_shader_stage stage)
{ {
unsigned i; unsigned i;
enum pipe_shader_type shader_type = pipe_shader_type_from_mesa(stage);
if (!prog || !st->pipe->set_shader_buffers || st->has_hw_atomics) if (!prog || !st->pipe->set_shader_buffers || st->has_hw_atomics)
return; return;
/* For !has_hw_atomics, the atomic counters have been rewritten to be above
* the SSBO range.
*/
unsigned buffer_base = st->ctx->Const.Program[stage].MaxShaderStorageBlocks;
for (i = 0; i < prog->sh.data->NumAtomicBuffers; i++) { for (i = 0; i < prog->sh.data->NumAtomicBuffers; i++) {
struct gl_active_atomic_buffer *atomic = struct gl_active_atomic_buffer *atomic =
&prog->sh.data->AtomicBuffers[i]; &prog->sh.data->AtomicBuffers[i];
@@ -81,7 +87,7 @@ st_bind_atomics(struct st_context *st, struct gl_program *prog,
st_binding_to_sb(&st->ctx->AtomicBufferBindings[atomic->Binding], &sb); st_binding_to_sb(&st->ctx->AtomicBufferBindings[atomic->Binding], &sb);
st->pipe->set_shader_buffers(st->pipe, shader_type, st->pipe->set_shader_buffers(st->pipe, shader_type,
atomic->Binding, 1, &sb, 0x1); buffer_base + atomic->Binding, 1, &sb, 0x1);
} }
} }
@@ -91,7 +97,7 @@ st_bind_vs_atomics(struct st_context *st)
struct gl_program *prog = struct gl_program *prog =
st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
st_bind_atomics(st, prog, PIPE_SHADER_VERTEX); st_bind_atomics(st, prog, MESA_SHADER_VERTEX);
} }
void void
@@ -100,7 +106,7 @@ st_bind_fs_atomics(struct st_context *st)
struct gl_program *prog = struct gl_program *prog =
st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT]; st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
st_bind_atomics(st, prog, PIPE_SHADER_FRAGMENT); st_bind_atomics(st, prog, MESA_SHADER_FRAGMENT);
} }
void void
@@ -109,7 +115,7 @@ st_bind_gs_atomics(struct st_context *st)
struct gl_program *prog = struct gl_program *prog =
st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
st_bind_atomics(st, prog, PIPE_SHADER_GEOMETRY); st_bind_atomics(st, prog, MESA_SHADER_GEOMETRY);
} }
void void
@@ -118,7 +124,7 @@ st_bind_tcs_atomics(struct st_context *st)
struct gl_program *prog = struct gl_program *prog =
st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL]; st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
st_bind_atomics(st, prog, PIPE_SHADER_TESS_CTRL); st_bind_atomics(st, prog, MESA_SHADER_TESS_CTRL);
} }
void void
@@ -127,7 +133,7 @@ st_bind_tes_atomics(struct st_context *st)
struct gl_program *prog = struct gl_program *prog =
st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]; st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
st_bind_atomics(st, prog, PIPE_SHADER_TESS_EVAL); st_bind_atomics(st, prog, MESA_SHADER_TESS_EVAL);
} }
void void
@@ -140,7 +146,7 @@ st_bind_cs_atomics(struct st_context *st)
struct gl_program *prog = struct gl_program *prog =
st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
st_bind_atomics(st, prog, PIPE_SHADER_COMPUTE); st_bind_atomics(st, prog, MESA_SHADER_COMPUTE);
} }
void void

View File

@@ -47,14 +47,11 @@ st_bind_ssbos(struct st_context *st, struct gl_program *prog,
unsigned i; unsigned i;
struct pipe_shader_buffer buffers[MAX_SHADER_STORAGE_BUFFERS]; struct pipe_shader_buffer buffers[MAX_SHADER_STORAGE_BUFFERS];
struct gl_program_constants *c; struct gl_program_constants *c;
int buffer_base;
if (!prog || !st->pipe->set_shader_buffers) if (!prog || !st->pipe->set_shader_buffers)
return; return;
c = &st->ctx->Const.Program[prog->info.stage]; c = &st->ctx->Const.Program[prog->info.stage];
buffer_base = st->has_hw_atomics ? 0 : c->MaxAtomicBuffers;
for (i = 0; i < prog->info.num_ssbos; i++) { for (i = 0; i < prog->info.num_ssbos; i++) {
struct gl_buffer_binding *binding; struct gl_buffer_binding *binding;
struct st_buffer_object *st_obj; struct st_buffer_object *st_obj;
@@ -81,14 +78,14 @@ st_bind_ssbos(struct st_context *st, struct gl_program *prog,
sb->buffer_size = 0; sb->buffer_size = 0;
} }
} }
st->pipe->set_shader_buffers(st->pipe, shader_type, buffer_base, st->pipe->set_shader_buffers(st->pipe, shader_type, 0,
prog->info.num_ssbos, buffers, prog->info.num_ssbos, buffers,
prog->sh.ShaderStorageBlocksWriteAccess); prog->sh.ShaderStorageBlocksWriteAccess);
/* clear out any stale shader buffers */ /* clear out any stale shader buffers */
if (prog->info.num_ssbos < c->MaxShaderStorageBlocks) if (prog->info.num_ssbos < c->MaxShaderStorageBlocks)
st->pipe->set_shader_buffers( st->pipe->set_shader_buffers(
st->pipe, shader_type, st->pipe, shader_type,
buffer_base + prog->info.num_ssbos, prog->info.num_ssbos,
c->MaxShaderStorageBlocks - prog->info.num_ssbos, c->MaxShaderStorageBlocks - prog->info.num_ssbos,
NULL, 0); NULL, 0);
} }

View File

@@ -259,8 +259,6 @@ st_feedback_draw_vbo(struct gl_context *ctx,
/* shader buffers */ /* shader buffers */
/* TODO: atomic counter buffers */ /* TODO: atomic counter buffers */
struct pipe_transfer *ssbo_transfer[PIPE_MAX_SHADER_BUFFERS] = {0}; struct pipe_transfer *ssbo_transfer[PIPE_MAX_SHADER_BUFFERS] = {0};
unsigned ssbo_first_slot = st->has_hw_atomics ? 0 :
st->ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers;
for (unsigned i = 0; i < prog->info.num_ssbos; i++) { for (unsigned i = 0; i < prog->info.num_ssbos; i++) {
struct gl_buffer_binding *binding = struct gl_buffer_binding *binding =
@@ -285,7 +283,7 @@ st_feedback_draw_vbo(struct gl_context *ctx,
PIPE_TRANSFER_READ, &ssbo_transfer[i]); PIPE_TRANSFER_READ, &ssbo_transfer[i]);
draw_set_mapped_shader_buffer(draw, PIPE_SHADER_VERTEX, draw_set_mapped_shader_buffer(draw, PIPE_SHADER_VERTEX,
ssbo_first_slot + i, ptr, size); i, ptr, size);
} }
/* samplers */ /* samplers */

View File

@@ -505,7 +505,7 @@ st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog,
if (!st->has_hw_atomics) if (!st->has_hw_atomics)
NIR_PASS_V(nir, nir_lower_atomics_to_ssbo, NIR_PASS_V(nir, nir_lower_atomics_to_ssbo,
st->ctx->Const.Program[nir->info.stage].MaxAtomicBuffers); st->ctx->Const.Program[nir->info.stage].MaxShaderStorageBlocks);
st_finalize_nir_before_variants(nir); st_finalize_nir_before_variants(nir);

View File

@@ -2237,11 +2237,9 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
case ir_unop_get_buffer_size: { case ir_unop_get_buffer_size: {
ir_constant *const_offset = ir->operands[0]->as_constant(); ir_constant *const_offset = ir->operands[0]->as_constant();
int buf_base = ctx->st->has_hw_atomics
? 0 : ctx->Const.Program[shader->Stage].MaxAtomicBuffers;
st_src_reg buffer( st_src_reg buffer(
PROGRAM_BUFFER, PROGRAM_BUFFER,
buf_base + (const_offset ? const_offset->value.u[0] : 0), const_offset ? const_offset->value.u[0] : 0,
GLSL_TYPE_UINT); GLSL_TYPE_UINT);
if (!const_offset) { if (!const_offset) {
buffer.reladdr = ralloc(mem_ctx, st_src_reg); buffer.reladdr = ralloc(mem_ctx, st_src_reg);
@@ -3449,7 +3447,9 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
resource = buffer; resource = buffer;
} else { } else {
st_src_reg buffer(PROGRAM_BUFFER, location->data.binding, st_src_reg buffer(PROGRAM_BUFFER,
ctx->Const.Program[shader->Stage].MaxShaderStorageBlocks +
location->data.binding,
GLSL_TYPE_ATOMIC_UINT); GLSL_TYPE_ATOMIC_UINT);
if (offset.file != PROGRAM_UNDEFINED) { if (offset.file != PROGRAM_UNDEFINED) {
@@ -3537,11 +3537,9 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)
ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
ir_constant *const_block = block->as_constant(); ir_constant *const_block = block->as_constant();
int buf_base = st_context(ctx)->has_hw_atomics
? 0 : ctx->Const.Program[shader->Stage].MaxAtomicBuffers;
st_src_reg buffer( st_src_reg buffer(
PROGRAM_BUFFER, PROGRAM_BUFFER,
buf_base + (const_block ? const_block->value.u[0] : 0), const_block ? const_block->value.u[0] : 0,
GLSL_TYPE_UINT); GLSL_TYPE_UINT);
if (!const_block) { if (!const_block) {
@@ -7053,8 +7051,10 @@ st_translate_program(
if (!st_context(ctx)->has_hw_atomics) { if (!st_context(ctx)->has_hw_atomics) {
for (i = 0; i < prog->info.num_abos; i++) { for (i = 0; i < prog->info.num_abos; i++) {
unsigned index = prog->sh.AtomicBuffers[i]->Binding; unsigned index = (frag_const->MaxShaderStorageBlocks +
assert(index < frag_const->MaxAtomicBuffers); prog->sh.AtomicBuffers[i]->Binding);
assert(prog->sh.AtomicBuffers[i]->Binding <
frag_const->MaxAtomicBuffers);
t->buffers[index] = ureg_DECL_buffer(ureg, index, true); t->buffers[index] = ureg_DECL_buffer(ureg, index, true);
} }
} else { } else {
@@ -7069,11 +7069,7 @@ st_translate_program(
assert(prog->info.num_ssbos <= frag_const->MaxShaderStorageBlocks); assert(prog->info.num_ssbos <= frag_const->MaxShaderStorageBlocks);
for (i = 0; i < prog->info.num_ssbos; i++) { for (i = 0; i < prog->info.num_ssbos; i++) {
unsigned index = i; t->buffers[i] = ureg_DECL_buffer(ureg, i, false);
if (!st_context(ctx)->has_hw_atomics)
index += frag_const->MaxAtomicBuffers;
t->buffers[index] = ureg_DECL_buffer(ureg, index, false);
} }
} }