ac/nir/ngg,radv,radeonsi: nogs use ac_nir_export_(position|parameter)

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20691>
This commit is contained in:
Qiang Yu
2022-12-24 14:55:29 +08:00
committed by Marge Bot
parent f084e9bbe8
commit 80506be31b
4 changed files with 57 additions and 51 deletions

View File

@@ -156,20 +156,23 @@ typedef struct {
unsigned max_workgroup_size;
unsigned wave_size;
uint32_t clipdist_enable_mask;
const uint8_t *vs_output_param_offset; /* GFX11+ */
bool has_param_exports;
bool can_cull;
bool disable_streamout;
bool has_gen_prim_query;
bool has_xfb_prim_query;
bool kill_pointsize;
bool force_vrs;
/* VS */
unsigned num_vertices_per_primitive;
bool early_prim_export;
bool passthrough;
bool use_edgeflags;
int primitive_id_location;
bool export_primitive_id;
uint32_t instance_rate_inputs;
uint32_t clipdist_enable_mask;
uint32_t user_clip_plane_enable_mask;
/* GS */

View File

@@ -82,7 +82,6 @@ typedef struct
bool streamout_enabled;
bool has_user_edgeflags;
unsigned max_num_waves;
unsigned position_store_base;
/* LDS params */
unsigned pervertex_lds_bytes;
@@ -597,14 +596,7 @@ emit_store_ngg_nogs_es_primitive_id(nir_builder *b, lower_ngg_nogs_state *st)
prim_id = nir_load_primitive_id(b);
}
nir_io_semantics io_sem = {
.location = VARYING_SLOT_PRIMITIVE_ID,
.num_slots = 1,
};
nir_store_output(b, prim_id, nir_imm_zero(b, 1, 32),
.base = st->options->primitive_id_location,
.src_type = nir_type_uint32, .io_semantics = io_sem);
st->outputs[VARYING_SLOT_PRIMITIVE_ID][0] = prim_id;
/* Update outputs_written to reflect that the pass added a new output. */
b->shader->info.outputs_written |= VARYING_BIT_PRIMITIVE_ID;
@@ -777,9 +769,6 @@ remove_extra_pos_output(nir_builder *b, nir_instr *instr, void *state)
nir_ssa_def *store_val = intrin->src[0].ssa;
unsigned store_pos_component = nir_intrinsic_component(intrin);
/* save the store base for re-construct store output instruction */
s->position_store_base = nir_intrinsic_base(intrin);
nir_instr_remove(instr);
if (store_val->parent_instr->type == nir_instr_type_alu) {
@@ -2105,12 +2094,8 @@ ngg_nogs_gather_outputs(nir_builder *b, struct exec_list *cf_list, lower_ngg_nog
type[c] = src_type;
}
/* remove the edge flag output anyway as it should not be passed to next stage */
bool is_edge_slot = slot == VARYING_SLOT_EDGE;
/* remove non-pos-export slot when GFX11, they are written to buffer memory */
bool is_pos_export_slot = slot < VARYING_SLOT_MAX && (BITFIELD64_BIT(slot) & POS_EXPORT_MASK);
if (is_edge_slot || (s->options->gfx_level >= GFX11 && !is_pos_export_slot))
nir_instr_remove(instr);
/* remove all store output instructions */
nir_instr_remove(instr);
}
}
}
@@ -2260,9 +2245,9 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
};
const bool need_prim_id_store_shared =
options->primitive_id_location >= 0 && shader->info.stage == MESA_SHADER_VERTEX;
options->export_primitive_id && shader->info.stage == MESA_SHADER_VERTEX;
if (options->primitive_id_location >= 0) {
if (options->export_primitive_id) {
nir_variable *prim_id_var = nir_variable_create(shader, nir_var_shader_out, glsl_uint_type(), "ngg_prim_id");
prim_id_var->data.location = VARYING_SLOT_PRIMITIVE_ID;
prim_id_var->data.driver_location = VARYING_SLOT_PRIMITIVE_ID;
@@ -2326,7 +2311,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
ngg_nogs_get_pervertex_lds_size(shader->info.stage,
shader->num_outputs,
state.streamout_enabled,
options->primitive_id_location >= 0,
options->export_primitive_id,
state.has_user_edgeflags);
if (need_prim_id_store_shared) {
@@ -2337,7 +2322,6 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_shared);
}
nir_intrinsic_instr *export_vertex_instr;
nir_ssa_def *es_thread =
options->can_cull ? nir_load_var(b, es_accepted_var) : has_input_vertex(b);
@@ -2357,11 +2341,8 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
nir_cf_reinsert(&extracted, b->cursor);
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
if (options->primitive_id_location >= 0)
if (options->export_primitive_id)
emit_store_ngg_nogs_es_primitive_id(b, &state);
/* Export all vertex attributes (including the primitive ID) */
export_vertex_instr = nir_export_vertex_amd(b);
}
nir_pop_if(b, if_es_thread);
@@ -2376,12 +2357,11 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
* it seems that it's best to put the position export always at the end, and
* then let ACO schedule it up (slightly) only when early prim export is used.
*/
b->cursor = nir_before_instr(&export_vertex_instr->instr);
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
nir_ssa_def *pos_val = nir_load_var(b, state.position_value_var);
nir_io_semantics io_sem = { .location = VARYING_SLOT_POS, .num_slots = 1 };
nir_store_output(b, pos_val, nir_imm_int(b, 0), .base = state.position_store_base,
.component = 0, .io_semantics = io_sem, .src_type = nir_type_float32);
for (int i = 0; i < 4; i++)
state.outputs[VARYING_SLOT_POS][i] = nir_channel(b, pos_val, i);
}
/* Gather outputs data and types */
@@ -2407,23 +2387,40 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
emit_ngg_nogs_prim_export(b, &state, nir_load_var(b, prim_exp_arg_var));
}
/* Export varyings for GFX11+ */
if (state.options->gfx_level >= GFX11) {
vs_output outputs[64];
uint64_t export_outputs = shader->info.outputs_written;
if (options->kill_pointsize)
export_outputs &= ~VARYING_BIT_PSIZ;
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
unsigned num_outputs = gather_vs_outputs(b, outputs, &state);
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
ac_nir_export_position(b, options->gfx_level,
options->clipdist_enable_mask,
!options->has_param_exports,
options->force_vrs,
export_outputs, state.outputs);
if (num_outputs) {
b->cursor = nir_after_cf_node(&if_es_thread->cf_node);
create_vertex_param_phis(b, num_outputs, outputs);
if (options->has_param_exports) {
if (state.options->gfx_level >= GFX11) {
/* Export varyings for GFX11+ */
vs_output outputs[64];
unsigned num_outputs = gather_vs_outputs(b, outputs, &state);
b->cursor = nir_after_cf_list(&impl->body);
if (num_outputs) {
b->cursor = nir_after_cf_node(&if_es_thread->cf_node);
create_vertex_param_phis(b, num_outputs, outputs);
if (!num_es_threads)
num_es_threads = nir_load_merged_wave_info_amd(b);
export_vertex_params_gfx11(b, NULL, num_es_threads, num_outputs, outputs,
options->vs_output_param_offset);
b->cursor = nir_after_cf_list(&impl->body);
if (!num_es_threads)
num_es_threads = nir_load_merged_wave_info_amd(b);
export_vertex_params_gfx11(b, NULL, num_es_threads, num_outputs, outputs,
options->vs_output_param_offset);
}
} else {
ac_nir_export_parameter(b, options->vs_output_param_offset,
shader->info.outputs_written,
shader->info.outputs_written_16bit,
state.outputs, state.outputs_16bit_lo,
state.outputs_16bit_hi);
}
}

View File

@@ -1444,7 +1444,9 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_
options.gfx_level = device->physical_device->rad_info.gfx_level;
options.max_workgroup_size = info->workgroup_size;
options.wave_size = info->wave_size;
options.clipdist_enable_mask = info->outinfo.clip_dist_mask | info->outinfo.cull_dist_mask;
options.vs_output_param_offset = info->outinfo.vs_output_param_offset;
options.has_param_exports = info->outinfo.param_exports;
options.can_cull = nir->info.stage != MESA_SHADER_GEOMETRY && info->has_ngg_culling;
options.disable_streamout = !device->physical_device->use_ngg_streamout;
options.has_gen_prim_query = info->has_ngg_prim_query;
@@ -1460,7 +1462,7 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_
options.num_vertices_per_primitive = num_vertices_per_prim;
options.early_prim_export = info->has_ngg_early_prim_export;
options.passthrough = info->is_ngg_passthrough;
options.primitive_id_location = info->outinfo.export_prim_id ? VARYING_SLOT_PRIMITIVE_ID : -1;
options.export_primitive_id = info->outinfo.export_prim_id;
options.instance_rate_inputs = pl_key->vs.instance_rate_inputs << VERT_ATTRIB_GENERIC0;
NIR_PASS_V(nir, ac_nir_lower_ngg_nogs, &options);

View File

@@ -1604,6 +1604,10 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
const union si_shader_key *key = &shader->key;
assert(key->ge.as_ngg);
unsigned clipdist_mask =
(sel->info.clipdist_mask & ~key->ge.opt.kill_clip_distances) |
sel->info.culldist_mask;
ac_nir_lower_ngg_options options = {
.family = sel->screen->info.family,
.gfx_level = sel->screen->info.gfx_level,
@@ -1612,6 +1616,10 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
.can_cull = !!key->ge.opt.ngg_culling,
.disable_streamout = key->ge.opt.remove_streamout,
.vs_output_param_offset = shader->info.vs_output_param_offset,
.has_param_exports = shader->info.nr_param_exports,
.clipdist_enable_mask = clipdist_mask,
.kill_pointsize = key->ge.opt.kill_pointsize,
.force_vrs = sel->screen->options.vrs2x2,
};
if (nir->info.stage == MESA_SHADER_VERTEX ||
@@ -1635,8 +1643,6 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
unsigned clip_plane_enable =
SI_NGG_CULL_GET_CLIP_PLANE_ENABLE(key->ge.opt.ngg_culling);
unsigned clipdist_mask =
(sel->info.clipdist_mask & clip_plane_enable) | sel->info.culldist_mask;
options.num_vertices_per_primitive = gfx10_ngg_get_vertices_per_prim(shader);
options.early_prim_export = gfx10_ngg_export_prim_early(shader);
@@ -1644,10 +1650,8 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
options.use_edgeflags = gfx10_edgeflags_have_effect(shader);
options.has_gen_prim_query = options.has_xfb_prim_query =
sel->screen->use_ngg_streamout && !sel->info.base.vs.blit_sgprs_amd;
options.primitive_id_location =
key->ge.mono.u.vs_export_prim_id ? sel->info.num_outputs : -1;
options.export_primitive_id = key->ge.mono.u.vs_export_prim_id;
options.instance_rate_inputs = instance_rate_inputs;
options.clipdist_enable_mask = clipdist_mask;
options.user_clip_plane_enable_mask = clip_plane_enable;
NIR_PASS_V(nir, ac_nir_lower_ngg_nogs, &options);