ac/nir: add pass_tessfactors_by_reg param to hs output lower
radeonsi won't emit tess factor in the lower pass, need to keep the output for llvm backend to pass it as parameter. This is used by radeonsi for an optimization to save LDS write. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16705>
This commit is contained in:
@@ -89,6 +89,7 @@ ac_nir_lower_hs_outputs_to_mem(nir_shader *shader,
|
||||
uint64_t tes_patch_inputs_read,
|
||||
unsigned num_reserved_tcs_outputs,
|
||||
unsigned num_reserved_tcs_patch_outputs,
|
||||
bool pass_tessfactors_by_reg,
|
||||
bool emit_tess_factor_write);
|
||||
|
||||
void
|
||||
|
@@ -154,6 +154,11 @@ typedef struct {
|
||||
* subgroup that reads them.
|
||||
*/
|
||||
bool tcs_out_patch_fits_subgroup;
|
||||
|
||||
/* Set if all invocations will write to all tess factors, so tess factors
|
||||
* can be passed by register.
|
||||
*/
|
||||
bool tcs_pass_tessfactors_by_reg;
|
||||
} lower_tess_io_state;
|
||||
|
||||
static bool
|
||||
@@ -402,7 +407,7 @@ lower_hs_per_vertex_input_load(nir_builder *b,
|
||||
.align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u);
|
||||
}
|
||||
|
||||
static void
|
||||
static nir_ssa_def *
|
||||
lower_hs_output_store(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
lower_tess_io_state *st)
|
||||
@@ -416,7 +421,8 @@ lower_hs_output_store(nir_builder *b,
|
||||
bool is_tess_factor = semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
|
||||
semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER;
|
||||
bool write_to_vmem = !is_tess_factor && tcs_output_needs_vmem(intrin, st);
|
||||
bool write_to_lds = is_tess_factor || tcs_output_needs_lds(intrin, b->shader);
|
||||
bool write_to_lds = (is_tess_factor && !st->tcs_pass_tessfactors_by_reg) ||
|
||||
tcs_output_needs_lds(intrin, b->shader);
|
||||
|
||||
if (write_to_vmem) {
|
||||
nir_ssa_def *vmem_off = intrin->intrinsic == nir_intrinsic_store_per_vertex_output
|
||||
@@ -439,6 +445,13 @@ lower_hs_output_store(nir_builder *b,
|
||||
nir_store_shared(b, store_val, lds_off, .write_mask = write_mask,
|
||||
.align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u);
|
||||
}
|
||||
|
||||
/* Keep tess factor nir_store_output instruction if it's going to be passed
|
||||
* by reg instead of LDS, because it's used by radeonsi llvm backend to generate
|
||||
* llvm variable which is read by the final llvm tess factor write epilog.
|
||||
*/
|
||||
return is_tess_factor && st->tcs_pass_tessfactors_by_reg ?
|
||||
NIR_LOWER_INSTR_PROGRESS : NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
@@ -481,8 +494,7 @@ lower_hs_output_access(nir_builder *b,
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_store_output ||
|
||||
intrin->intrinsic == nir_intrinsic_store_per_vertex_output) {
|
||||
lower_hs_output_store(b, intrin, st);
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
return lower_hs_output_store(b, intrin, st);
|
||||
} else if (intrin->intrinsic == nir_intrinsic_load_output ||
|
||||
intrin->intrinsic == nir_intrinsic_load_per_vertex_output) {
|
||||
return lower_hs_output_load(b, intrin, st);
|
||||
@@ -693,6 +705,7 @@ ac_nir_lower_hs_outputs_to_mem(nir_shader *shader,
|
||||
uint64_t tes_patch_inputs_read,
|
||||
unsigned num_reserved_tcs_outputs,
|
||||
unsigned num_reserved_tcs_patch_outputs,
|
||||
bool pass_tessfactors_by_reg,
|
||||
bool emit_tess_factor_write)
|
||||
{
|
||||
assert(shader->info.stage == MESA_SHADER_TESS_CTRL);
|
||||
@@ -705,6 +718,7 @@ ac_nir_lower_hs_outputs_to_mem(nir_shader *shader,
|
||||
.tcs_num_reserved_outputs = num_reserved_tcs_outputs,
|
||||
.tcs_num_reserved_patch_outputs = num_reserved_tcs_patch_outputs,
|
||||
.tcs_out_patch_fits_subgroup = 32 % shader->info.tess.tcs_vertices_out == 0,
|
||||
.tcs_pass_tessfactors_by_reg = pass_tessfactors_by_reg,
|
||||
.map_io = map,
|
||||
};
|
||||
|
||||
|
@@ -1124,7 +1124,7 @@ radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *sta
|
||||
device->physical_device->rad_info.gfx_level,
|
||||
info->tcs.tes_reads_tess_factors, info->tcs.tes_inputs_read,
|
||||
info->tcs.tes_patch_inputs_read, info->tcs.num_linked_outputs,
|
||||
info->tcs.num_linked_patch_outputs, true);
|
||||
info->tcs.num_linked_patch_outputs, false, true);
|
||||
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
|
Reference in New Issue
Block a user