ac/nir: add ac_nir_lower_legacy_gs
For legacy (non-NGG) GS to lower outputs to memory stores and add shader query when required. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20158>
This commit is contained in:
@@ -438,3 +438,231 @@ ac_nir_gs_shader_query(nir_builder *b,
|
|||||||
nir_pop_if(b, if_shader_query);
|
nir_pop_if(b, if_shader_query);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
nir_ssa_def *outputs[64][4];
|
||||||
|
nir_ssa_def *outputs_16bit_lo[16][4];
|
||||||
|
nir_ssa_def *outputs_16bit_hi[16][4];
|
||||||
|
|
||||||
|
ac_nir_gs_output_info *info;
|
||||||
|
|
||||||
|
nir_ssa_def *vertex_count[4];
|
||||||
|
nir_ssa_def *primitive_count[4];
|
||||||
|
} lower_legacy_gs_state;
|
||||||
|
|
||||||
|
static bool
|
||||||
|
lower_legacy_gs_store_output(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||||
|
lower_legacy_gs_state *s)
|
||||||
|
{
|
||||||
|
/* Assume:
|
||||||
|
* - the shader used nir_lower_io_to_temporaries
|
||||||
|
* - 64-bit outputs are lowered
|
||||||
|
* - no indirect indexing is present
|
||||||
|
*/
|
||||||
|
assert(nir_src_is_const(intrin->src[1]) && !nir_src_as_uint(intrin->src[1]));
|
||||||
|
|
||||||
|
b->cursor = nir_before_instr(&intrin->instr);
|
||||||
|
|
||||||
|
unsigned component = nir_intrinsic_component(intrin);
|
||||||
|
unsigned write_mask = nir_intrinsic_write_mask(intrin);
|
||||||
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
|
||||||
|
|
||||||
|
nir_ssa_def **outputs;
|
||||||
|
if (sem.location < VARYING_SLOT_VAR0_16BIT) {
|
||||||
|
outputs = s->outputs[sem.location];
|
||||||
|
} else {
|
||||||
|
unsigned index = sem.location - VARYING_SLOT_VAR0_16BIT;
|
||||||
|
if (sem.high_16bits)
|
||||||
|
outputs = s->outputs_16bit_hi[index];
|
||||||
|
else
|
||||||
|
outputs = s->outputs_16bit_lo[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
nir_ssa_def *store_val = intrin->src[0].ssa;
|
||||||
|
/* 64bit output has been lowered to 32bit */
|
||||||
|
assert(store_val->bit_size <= 32);
|
||||||
|
|
||||||
|
u_foreach_bit (i, write_mask) {
|
||||||
|
unsigned comp = component + i;
|
||||||
|
outputs[comp] = nir_channel(b, store_val, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
nir_instr_remove(&intrin->instr);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||||
|
lower_legacy_gs_state *s)
|
||||||
|
{
|
||||||
|
b->cursor = nir_before_instr(&intrin->instr);
|
||||||
|
|
||||||
|
unsigned stream = nir_intrinsic_stream_id(intrin);
|
||||||
|
nir_ssa_def *vtxidx = intrin->src[0].ssa;
|
||||||
|
|
||||||
|
nir_ssa_def *gsvs_ring = nir_load_ring_gsvs_amd(b, .stream_id = stream);
|
||||||
|
nir_ssa_def *soffset = nir_load_ring_gs2vs_offset_amd(b);
|
||||||
|
|
||||||
|
unsigned offset = 0;
|
||||||
|
u_foreach_bit64 (i, b->shader->info.outputs_written) {
|
||||||
|
for (unsigned j = 0; j < 4; j++) {
|
||||||
|
nir_ssa_def *output = s->outputs[i][j];
|
||||||
|
/* Next vertex emit need a new value, reset all outputs. */
|
||||||
|
s->outputs[i][j] = NULL;
|
||||||
|
|
||||||
|
if (!(s->info->usage_mask[i] & (1 << j)) ||
|
||||||
|
((s->info->streams[i] >> (j * 2)) & 0x3) != stream)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
unsigned base = offset * b->shader->info.gs.vertices_out;
|
||||||
|
offset++;
|
||||||
|
|
||||||
|
/* no one set this output, skip the buffer store */
|
||||||
|
if (!output)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
nir_ssa_def *voffset = nir_iadd_imm(b, vtxidx, base);
|
||||||
|
voffset = nir_ishl_imm(b, voffset, 2);
|
||||||
|
|
||||||
|
/* extend 8/16 bit to 32 bit, 64 bit has been lowered */
|
||||||
|
nir_ssa_def *data = nir_u2uN(b, output, 32);
|
||||||
|
|
||||||
|
nir_store_buffer_amd(b, data, gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
|
||||||
|
.is_swizzled = true, .slc_amd = true,
|
||||||
|
.access = ACCESS_COHERENT,
|
||||||
|
/* For ACO to not reorder this store around EmitVertex/EndPrimitve */
|
||||||
|
.memory_modes = nir_var_shader_out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u_foreach_bit (i, b->shader->info.outputs_written_16bit) {
|
||||||
|
for (unsigned j = 0; j < 4; j++) {
|
||||||
|
nir_ssa_def *output_lo = s->outputs_16bit_lo[i][j];
|
||||||
|
nir_ssa_def *output_hi = s->outputs_16bit_hi[i][j];
|
||||||
|
/* Next vertex emit need a new value, reset all outputs. */
|
||||||
|
s->outputs_16bit_lo[i][j] = NULL;
|
||||||
|
s->outputs_16bit_hi[i][j] = NULL;
|
||||||
|
|
||||||
|
bool has_lo_16bit = (s->info->usage_mask_16bit_lo[i] & (1 << j)) &&
|
||||||
|
((s->info->streams_16bit_lo[i] >> (j * 2)) & 0x3) == stream;
|
||||||
|
bool has_hi_16bit = (s->info->usage_mask_16bit_hi[i] & (1 << j)) &&
|
||||||
|
((s->info->streams_16bit_hi[i] >> (j * 2)) & 0x3) == stream;
|
||||||
|
if (!has_lo_16bit && !has_hi_16bit)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
unsigned base = offset * b->shader->info.gs.vertices_out;
|
||||||
|
offset++;
|
||||||
|
|
||||||
|
bool has_lo_16bit_out = has_lo_16bit && output_lo;
|
||||||
|
bool has_hi_16bit_out = has_hi_16bit && output_hi;
|
||||||
|
|
||||||
|
/* no one set needed output, skip the buffer store */
|
||||||
|
if (!has_lo_16bit_out && !has_hi_16bit_out)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!has_lo_16bit_out)
|
||||||
|
output_lo = nir_ssa_undef(b, 1, 16);
|
||||||
|
|
||||||
|
if (!has_hi_16bit_out)
|
||||||
|
output_hi = nir_ssa_undef(b, 1, 16);
|
||||||
|
|
||||||
|
nir_ssa_def *voffset = nir_iadd_imm(b, vtxidx, base);
|
||||||
|
voffset = nir_ishl_imm(b, voffset, 2);
|
||||||
|
|
||||||
|
nir_store_buffer_amd(b, nir_pack_32_2x16_split(b, output_lo, output_hi),
|
||||||
|
gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
|
||||||
|
.is_swizzled = true, .slc_amd = true,
|
||||||
|
.access = ACCESS_COHERENT,
|
||||||
|
/* For ACO to not reorder this store around EmitVertex/EndPrimitve */
|
||||||
|
.memory_modes = nir_var_shader_out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Keep this instruction to signal vertex emission. */
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
lower_legacy_gs_set_vertex_and_primitive_count(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||||
|
lower_legacy_gs_state *s)
|
||||||
|
{
|
||||||
|
b->cursor = nir_before_instr(&intrin->instr);
|
||||||
|
|
||||||
|
unsigned stream = nir_intrinsic_stream_id(intrin);
|
||||||
|
|
||||||
|
s->vertex_count[stream] = intrin->src[0].ssa;
|
||||||
|
s->primitive_count[stream] = intrin->src[1].ssa;
|
||||||
|
|
||||||
|
nir_instr_remove(&intrin->instr);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
lower_legacy_gs_intrinsic(nir_builder *b, nir_instr *instr, void *state)
|
||||||
|
{
|
||||||
|
lower_legacy_gs_state *s = (lower_legacy_gs_state *) state;
|
||||||
|
|
||||||
|
if (instr->type != nir_instr_type_intrinsic)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||||
|
|
||||||
|
if (intrin->intrinsic == nir_intrinsic_store_output)
|
||||||
|
return lower_legacy_gs_store_output(b, intrin, s);
|
||||||
|
else if (intrin->intrinsic == nir_intrinsic_emit_vertex_with_counter)
|
||||||
|
return lower_legacy_gs_emit_vertex_with_counter(b, intrin, s);
|
||||||
|
else if (intrin->intrinsic == nir_intrinsic_set_vertex_and_primitive_count)
|
||||||
|
return lower_legacy_gs_set_vertex_and_primitive_count(b, intrin, s);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ac_nir_lower_legacy_gs(nir_shader *nir,
|
||||||
|
bool has_gen_prim_query,
|
||||||
|
bool has_pipeline_stats_query,
|
||||||
|
ac_nir_gs_output_info *output_info)
|
||||||
|
{
|
||||||
|
lower_legacy_gs_state s = {
|
||||||
|
.info = output_info,
|
||||||
|
};
|
||||||
|
|
||||||
|
unsigned num_vertices_per_primitive = 0;
|
||||||
|
switch (nir->info.gs.output_primitive) {
|
||||||
|
case SHADER_PRIM_POINTS:
|
||||||
|
num_vertices_per_primitive = 1;
|
||||||
|
break;
|
||||||
|
case SHADER_PRIM_LINE_STRIP:
|
||||||
|
num_vertices_per_primitive = 2;
|
||||||
|
break;
|
||||||
|
case SHADER_PRIM_TRIANGLE_STRIP:
|
||||||
|
num_vertices_per_primitive = 3;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
unreachable("Invalid GS output primitive.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
nir_shader_instructions_pass(nir, lower_legacy_gs_intrinsic,
|
||||||
|
nir_metadata_block_index | nir_metadata_dominance, &s);
|
||||||
|
|
||||||
|
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||||
|
|
||||||
|
nir_builder builder;
|
||||||
|
nir_builder *b = &builder;
|
||||||
|
nir_builder_init(b, impl);
|
||||||
|
|
||||||
|
b->cursor = nir_after_cf_list(&impl->body);
|
||||||
|
|
||||||
|
/* Emit shader query for mix use legacy/NGG GS */
|
||||||
|
bool progress = ac_nir_gs_shader_query(b,
|
||||||
|
has_gen_prim_query,
|
||||||
|
has_pipeline_stats_query,
|
||||||
|
num_vertices_per_primitive,
|
||||||
|
64,
|
||||||
|
s.vertex_count,
|
||||||
|
s.primitive_count);
|
||||||
|
if (progress)
|
||||||
|
nir_metadata_preserve(impl, nir_metadata_none);
|
||||||
|
}
|
||||||
|
@@ -187,6 +187,16 @@ ac_nir_lower_global_access(nir_shader *shader);
|
|||||||
|
|
||||||
bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level);
|
bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level);
|
||||||
|
|
||||||
|
typedef struct ac_nir_gs_output_info {
|
||||||
|
const uint8_t *streams;
|
||||||
|
const uint8_t *streams_16bit_lo;
|
||||||
|
const uint8_t *streams_16bit_hi;
|
||||||
|
|
||||||
|
const uint8_t *usage_mask;
|
||||||
|
const uint8_t *usage_mask_16bit_lo;
|
||||||
|
const uint8_t *usage_mask_16bit_hi;
|
||||||
|
} ac_nir_gs_output_info;
|
||||||
|
|
||||||
nir_shader *
|
nir_shader *
|
||||||
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
|
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
|
||||||
bool disable_streamout,
|
bool disable_streamout,
|
||||||
@@ -207,6 +217,12 @@ ac_nir_gs_shader_query(nir_builder *b,
|
|||||||
nir_ssa_def *vertex_count[4],
|
nir_ssa_def *vertex_count[4],
|
||||||
nir_ssa_def *primitive_count[4]);
|
nir_ssa_def *primitive_count[4]);
|
||||||
|
|
||||||
|
void
|
||||||
|
ac_nir_lower_legacy_gs(nir_shader *nir,
|
||||||
|
bool has_gen_prim_query,
|
||||||
|
bool has_pipeline_stats_query,
|
||||||
|
ac_nir_gs_output_info *output_info);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user