ac/nir/ngg,radv,radeonsi: nogs use ac_nir_export_(position|parameter)
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20691>
This commit is contained in:
@@ -156,20 +156,23 @@ typedef struct {
|
||||
|
||||
unsigned max_workgroup_size;
|
||||
unsigned wave_size;
|
||||
uint32_t clipdist_enable_mask;
|
||||
const uint8_t *vs_output_param_offset; /* GFX11+ */
|
||||
bool has_param_exports;
|
||||
bool can_cull;
|
||||
bool disable_streamout;
|
||||
bool has_gen_prim_query;
|
||||
bool has_xfb_prim_query;
|
||||
bool kill_pointsize;
|
||||
bool force_vrs;
|
||||
|
||||
/* VS */
|
||||
unsigned num_vertices_per_primitive;
|
||||
bool early_prim_export;
|
||||
bool passthrough;
|
||||
bool use_edgeflags;
|
||||
int primitive_id_location;
|
||||
bool export_primitive_id;
|
||||
uint32_t instance_rate_inputs;
|
||||
uint32_t clipdist_enable_mask;
|
||||
uint32_t user_clip_plane_enable_mask;
|
||||
|
||||
/* GS */
|
||||
|
@@ -82,7 +82,6 @@ typedef struct
|
||||
bool streamout_enabled;
|
||||
bool has_user_edgeflags;
|
||||
unsigned max_num_waves;
|
||||
unsigned position_store_base;
|
||||
|
||||
/* LDS params */
|
||||
unsigned pervertex_lds_bytes;
|
||||
@@ -597,14 +596,7 @@ emit_store_ngg_nogs_es_primitive_id(nir_builder *b, lower_ngg_nogs_state *st)
|
||||
prim_id = nir_load_primitive_id(b);
|
||||
}
|
||||
|
||||
nir_io_semantics io_sem = {
|
||||
.location = VARYING_SLOT_PRIMITIVE_ID,
|
||||
.num_slots = 1,
|
||||
};
|
||||
|
||||
nir_store_output(b, prim_id, nir_imm_zero(b, 1, 32),
|
||||
.base = st->options->primitive_id_location,
|
||||
.src_type = nir_type_uint32, .io_semantics = io_sem);
|
||||
st->outputs[VARYING_SLOT_PRIMITIVE_ID][0] = prim_id;
|
||||
|
||||
/* Update outputs_written to reflect that the pass added a new output. */
|
||||
b->shader->info.outputs_written |= VARYING_BIT_PRIMITIVE_ID;
|
||||
@@ -777,9 +769,6 @@ remove_extra_pos_output(nir_builder *b, nir_instr *instr, void *state)
|
||||
nir_ssa_def *store_val = intrin->src[0].ssa;
|
||||
unsigned store_pos_component = nir_intrinsic_component(intrin);
|
||||
|
||||
/* save the store base for re-construct store output instruction */
|
||||
s->position_store_base = nir_intrinsic_base(intrin);
|
||||
|
||||
nir_instr_remove(instr);
|
||||
|
||||
if (store_val->parent_instr->type == nir_instr_type_alu) {
|
||||
@@ -2105,12 +2094,8 @@ ngg_nogs_gather_outputs(nir_builder *b, struct exec_list *cf_list, lower_ngg_nog
|
||||
type[c] = src_type;
|
||||
}
|
||||
|
||||
/* remove the edge flag output anyway as it should not be passed to next stage */
|
||||
bool is_edge_slot = slot == VARYING_SLOT_EDGE;
|
||||
/* remove non-pos-export slot when GFX11, they are written to buffer memory */
|
||||
bool is_pos_export_slot = slot < VARYING_SLOT_MAX && (BITFIELD64_BIT(slot) & POS_EXPORT_MASK);
|
||||
if (is_edge_slot || (s->options->gfx_level >= GFX11 && !is_pos_export_slot))
|
||||
nir_instr_remove(instr);
|
||||
/* remove all store output instructions */
|
||||
nir_instr_remove(instr);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2260,9 +2245,9 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
|
||||
};
|
||||
|
||||
const bool need_prim_id_store_shared =
|
||||
options->primitive_id_location >= 0 && shader->info.stage == MESA_SHADER_VERTEX;
|
||||
options->export_primitive_id && shader->info.stage == MESA_SHADER_VERTEX;
|
||||
|
||||
if (options->primitive_id_location >= 0) {
|
||||
if (options->export_primitive_id) {
|
||||
nir_variable *prim_id_var = nir_variable_create(shader, nir_var_shader_out, glsl_uint_type(), "ngg_prim_id");
|
||||
prim_id_var->data.location = VARYING_SLOT_PRIMITIVE_ID;
|
||||
prim_id_var->data.driver_location = VARYING_SLOT_PRIMITIVE_ID;
|
||||
@@ -2326,7 +2311,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
|
||||
ngg_nogs_get_pervertex_lds_size(shader->info.stage,
|
||||
shader->num_outputs,
|
||||
state.streamout_enabled,
|
||||
options->primitive_id_location >= 0,
|
||||
options->export_primitive_id,
|
||||
state.has_user_edgeflags);
|
||||
|
||||
if (need_prim_id_store_shared) {
|
||||
@@ -2337,7 +2322,6 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
|
||||
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_shared);
|
||||
}
|
||||
|
||||
nir_intrinsic_instr *export_vertex_instr;
|
||||
nir_ssa_def *es_thread =
|
||||
options->can_cull ? nir_load_var(b, es_accepted_var) : has_input_vertex(b);
|
||||
|
||||
@@ -2357,11 +2341,8 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
|
||||
nir_cf_reinsert(&extracted, b->cursor);
|
||||
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
|
||||
|
||||
if (options->primitive_id_location >= 0)
|
||||
if (options->export_primitive_id)
|
||||
emit_store_ngg_nogs_es_primitive_id(b, &state);
|
||||
|
||||
/* Export all vertex attributes (including the primitive ID) */
|
||||
export_vertex_instr = nir_export_vertex_amd(b);
|
||||
}
|
||||
nir_pop_if(b, if_es_thread);
|
||||
|
||||
@@ -2376,12 +2357,11 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
|
||||
* it seems that it's best to put the position export always at the end, and
|
||||
* then let ACO schedule it up (slightly) only when early prim export is used.
|
||||
*/
|
||||
b->cursor = nir_before_instr(&export_vertex_instr->instr);
|
||||
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
|
||||
|
||||
nir_ssa_def *pos_val = nir_load_var(b, state.position_value_var);
|
||||
nir_io_semantics io_sem = { .location = VARYING_SLOT_POS, .num_slots = 1 };
|
||||
nir_store_output(b, pos_val, nir_imm_int(b, 0), .base = state.position_store_base,
|
||||
.component = 0, .io_semantics = io_sem, .src_type = nir_type_float32);
|
||||
for (int i = 0; i < 4; i++)
|
||||
state.outputs[VARYING_SLOT_POS][i] = nir_channel(b, pos_val, i);
|
||||
}
|
||||
|
||||
/* Gather outputs data and types */
|
||||
@@ -2407,23 +2387,40 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
|
||||
emit_ngg_nogs_prim_export(b, &state, nir_load_var(b, prim_exp_arg_var));
|
||||
}
|
||||
|
||||
/* Export varyings for GFX11+ */
|
||||
if (state.options->gfx_level >= GFX11) {
|
||||
vs_output outputs[64];
|
||||
uint64_t export_outputs = shader->info.outputs_written;
|
||||
if (options->kill_pointsize)
|
||||
export_outputs &= ~VARYING_BIT_PSIZ;
|
||||
|
||||
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
|
||||
unsigned num_outputs = gather_vs_outputs(b, outputs, &state);
|
||||
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
|
||||
ac_nir_export_position(b, options->gfx_level,
|
||||
options->clipdist_enable_mask,
|
||||
!options->has_param_exports,
|
||||
options->force_vrs,
|
||||
export_outputs, state.outputs);
|
||||
|
||||
if (num_outputs) {
|
||||
b->cursor = nir_after_cf_node(&if_es_thread->cf_node);
|
||||
create_vertex_param_phis(b, num_outputs, outputs);
|
||||
if (options->has_param_exports) {
|
||||
if (state.options->gfx_level >= GFX11) {
|
||||
/* Export varyings for GFX11+ */
|
||||
vs_output outputs[64];
|
||||
unsigned num_outputs = gather_vs_outputs(b, outputs, &state);
|
||||
|
||||
b->cursor = nir_after_cf_list(&impl->body);
|
||||
if (num_outputs) {
|
||||
b->cursor = nir_after_cf_node(&if_es_thread->cf_node);
|
||||
create_vertex_param_phis(b, num_outputs, outputs);
|
||||
|
||||
if (!num_es_threads)
|
||||
num_es_threads = nir_load_merged_wave_info_amd(b);
|
||||
export_vertex_params_gfx11(b, NULL, num_es_threads, num_outputs, outputs,
|
||||
options->vs_output_param_offset);
|
||||
b->cursor = nir_after_cf_list(&impl->body);
|
||||
|
||||
if (!num_es_threads)
|
||||
num_es_threads = nir_load_merged_wave_info_amd(b);
|
||||
export_vertex_params_gfx11(b, NULL, num_es_threads, num_outputs, outputs,
|
||||
options->vs_output_param_offset);
|
||||
}
|
||||
} else {
|
||||
ac_nir_export_parameter(b, options->vs_output_param_offset,
|
||||
shader->info.outputs_written,
|
||||
shader->info.outputs_written_16bit,
|
||||
state.outputs, state.outputs_16bit_lo,
|
||||
state.outputs_16bit_hi);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -1444,7 +1444,9 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_
|
||||
options.gfx_level = device->physical_device->rad_info.gfx_level;
|
||||
options.max_workgroup_size = info->workgroup_size;
|
||||
options.wave_size = info->wave_size;
|
||||
options.clipdist_enable_mask = info->outinfo.clip_dist_mask | info->outinfo.cull_dist_mask;
|
||||
options.vs_output_param_offset = info->outinfo.vs_output_param_offset;
|
||||
options.has_param_exports = info->outinfo.param_exports;
|
||||
options.can_cull = nir->info.stage != MESA_SHADER_GEOMETRY && info->has_ngg_culling;
|
||||
options.disable_streamout = !device->physical_device->use_ngg_streamout;
|
||||
options.has_gen_prim_query = info->has_ngg_prim_query;
|
||||
@@ -1460,7 +1462,7 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_
|
||||
options.num_vertices_per_primitive = num_vertices_per_prim;
|
||||
options.early_prim_export = info->has_ngg_early_prim_export;
|
||||
options.passthrough = info->is_ngg_passthrough;
|
||||
options.primitive_id_location = info->outinfo.export_prim_id ? VARYING_SLOT_PRIMITIVE_ID : -1;
|
||||
options.export_primitive_id = info->outinfo.export_prim_id;
|
||||
options.instance_rate_inputs = pl_key->vs.instance_rate_inputs << VERT_ATTRIB_GENERIC0;
|
||||
|
||||
NIR_PASS_V(nir, ac_nir_lower_ngg_nogs, &options);
|
||||
|
@@ -1604,6 +1604,10 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
|
||||
const union si_shader_key *key = &shader->key;
|
||||
assert(key->ge.as_ngg);
|
||||
|
||||
unsigned clipdist_mask =
|
||||
(sel->info.clipdist_mask & ~key->ge.opt.kill_clip_distances) |
|
||||
sel->info.culldist_mask;
|
||||
|
||||
ac_nir_lower_ngg_options options = {
|
||||
.family = sel->screen->info.family,
|
||||
.gfx_level = sel->screen->info.gfx_level,
|
||||
@@ -1612,6 +1616,10 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
|
||||
.can_cull = !!key->ge.opt.ngg_culling,
|
||||
.disable_streamout = key->ge.opt.remove_streamout,
|
||||
.vs_output_param_offset = shader->info.vs_output_param_offset,
|
||||
.has_param_exports = shader->info.nr_param_exports,
|
||||
.clipdist_enable_mask = clipdist_mask,
|
||||
.kill_pointsize = key->ge.opt.kill_pointsize,
|
||||
.force_vrs = sel->screen->options.vrs2x2,
|
||||
};
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX ||
|
||||
@@ -1635,8 +1643,6 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
|
||||
|
||||
unsigned clip_plane_enable =
|
||||
SI_NGG_CULL_GET_CLIP_PLANE_ENABLE(key->ge.opt.ngg_culling);
|
||||
unsigned clipdist_mask =
|
||||
(sel->info.clipdist_mask & clip_plane_enable) | sel->info.culldist_mask;
|
||||
|
||||
options.num_vertices_per_primitive = gfx10_ngg_get_vertices_per_prim(shader);
|
||||
options.early_prim_export = gfx10_ngg_export_prim_early(shader);
|
||||
@@ -1644,10 +1650,8 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
|
||||
options.use_edgeflags = gfx10_edgeflags_have_effect(shader);
|
||||
options.has_gen_prim_query = options.has_xfb_prim_query =
|
||||
sel->screen->use_ngg_streamout && !sel->info.base.vs.blit_sgprs_amd;
|
||||
options.primitive_id_location =
|
||||
key->ge.mono.u.vs_export_prim_id ? sel->info.num_outputs : -1;
|
||||
options.export_primitive_id = key->ge.mono.u.vs_export_prim_id;
|
||||
options.instance_rate_inputs = instance_rate_inputs;
|
||||
options.clipdist_enable_mask = clipdist_mask;
|
||||
options.user_clip_plane_enable_mask = clip_plane_enable;
|
||||
|
||||
NIR_PASS_V(nir, ac_nir_lower_ngg_nogs, &options);
|
||||
|
Reference in New Issue
Block a user