ac/nir: tcs write tess factor support pass by reg

For radeonsi usage.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21437>
This commit is contained in:
Qiang Yu
2023-02-14 10:48:18 +08:00
committed by Marge Bot
parent e070a9e8d0
commit c06329eb3f

View File

@@ -164,6 +164,13 @@ typedef struct {
* In that case, no LDS is allocated for TCS inputs.
*/
bool tcs_no_inputs_in_lds;
/* Whether to emit TCS tess factor write. */
bool tcs_emit_tess_factor_write;
/* Save TCS tess factor for tess factor writer. */
nir_variable *tcs_tess_level_outer;
nir_variable *tcs_tess_level_inner;
} lower_tess_io_state;
static bool
@@ -428,6 +435,7 @@ lower_hs_output_store(nir_builder *b,
nir_io_semantics semantics = nir_intrinsic_io_semantics(intrin);
nir_ssa_def *store_val = intrin->src[0].ssa;
unsigned component = nir_intrinsic_component(intrin);
unsigned write_mask = nir_intrinsic_write_mask(intrin);
bool is_tess_factor = semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER;
@@ -457,15 +465,27 @@ lower_hs_output_store(nir_builder *b,
nir_ssa_def *lds_off = hs_output_lds_offset(b, st, intrin);
nir_store_shared(b, store_val, lds_off, .write_mask = write_mask,
.align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u);
.align_mul = 16u, .align_offset = (component * 4u) % 16u);
}
/* Keep tess factor nir_store_output instruction if it's going to be passed
* by reg instead of LDS, because it's used by radeonsi llvm backend to generate
* llvm variable which is read by the final llvm tess factor write epilog.
*/
return is_tess_factor && st->tcs_pass_tessfactors_by_reg ?
NIR_LOWER_INSTR_PROGRESS : NIR_LOWER_INSTR_PROGRESS_REPLACE;
nir_ssa_def *ret = NIR_LOWER_INSTR_PROGRESS_REPLACE;
if (is_tess_factor && st->tcs_pass_tessfactors_by_reg) {
if (st->tcs_emit_tess_factor_write) {
nir_variable *var = semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ?
st->tcs_tess_level_inner : st->tcs_tess_level_outer;
/* Save to temp variable for read by tess factor writer. */
ac_nir_store_var_components(b, var, store_val, component, write_mask);
} else {
/* Keep tess factor nir_store_output instruction if it's going to be passed
* by reg instead of LDS and we use a compiler backend TCS epilog.
*/
ret = NIR_LOWER_INSTR_PROGRESS;
}
}
return ret;
}
static nir_ssa_def *
@@ -559,10 +579,14 @@ hs_emit_write_tess_factors(nir_shader *shader,
nir_builder_init(b, impl);
b->cursor = nir_after_block(last_block);
nir_scope scope =
st->tcs_out_patch_fits_subgroup ? NIR_SCOPE_SUBGROUP : NIR_SCOPE_WORKGROUP;
nir_scoped_barrier(b, .execution_scope = scope, .memory_scope = scope,
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_shared);
/* If tess factors are load from LDS, wait previous LDS stores done. */
if (!st->tcs_pass_tessfactors_by_reg) {
nir_scope scope = st->tcs_out_patch_fits_subgroup ?
NIR_SCOPE_SUBGROUP : NIR_SCOPE_WORKGROUP;
nir_scoped_barrier(b, .execution_scope = scope, .memory_scope = scope,
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_shared);
}
nir_ssa_def *invocation_id = nir_load_invocation_id(b);
@@ -576,20 +600,37 @@ hs_emit_write_tess_factors(nir_shader *shader,
if (shader->info.tess.tcs_vertices_out <= 32)
invocation_id_zero->control = nir_selection_control_divergent_always_taken;
nir_ssa_def *tessfactors_outer = NULL;
nir_ssa_def *tessfactors_inner = NULL;
if (st->tcs_pass_tessfactors_by_reg) {
tessfactors_outer = nir_load_var(b, st->tcs_tess_level_outer);
tessfactors_outer = nir_trim_vector(b, tessfactors_outer, outer_comps);
if (inner_comps) {
tessfactors_inner = nir_load_var(b, st->tcs_tess_level_inner);
tessfactors_inner = nir_trim_vector(b, tessfactors_inner, inner_comps);
}
} else {
/* Base LDS address of per-patch outputs in the current patch. */
nir_ssa_def *lds_base = hs_output_lds_offset(b, st, NULL);
/* Load all tessellation factors (aka. tess levels) from LDS. */
tessfactors_outer = nir_load_shared(b, outer_comps, 32, lds_base,
.base = st->tcs_tess_lvl_out_loc,
.align_mul = 16u,
.align_offset = st->tcs_tess_lvl_out_loc % 16u);
if (inner_comps) {
tessfactors_inner = nir_load_shared(b, inner_comps, 32, lds_base,
.base = st->tcs_tess_lvl_in_loc,
.align_mul = 16u,
.align_offset = st->tcs_tess_lvl_in_loc % 16u);
}
}
/* The descriptor where tess factors have to be stored by the shader. */
nir_ssa_def *tessfactor_ring = nir_load_ring_tess_factors_amd(b);
/* Base LDS address of per-patch outputs in the current patch. */
nir_ssa_def *lds_base = hs_output_lds_offset(b, st, NULL);
/* Load all tessellation factors (aka. tess levels) from LDS. */
nir_ssa_def *tessfactors_outer = nir_load_shared(b, outer_comps, 32, lds_base, .base = st->tcs_tess_lvl_out_loc,
.align_mul = 16u, .align_offset = st->tcs_tess_lvl_out_loc % 16u);
nir_ssa_def *tessfactors_inner = inner_comps
? nir_load_shared(b, inner_comps, 32, lds_base, .base = st->tcs_tess_lvl_in_loc,
.align_mul = 16u, .align_offset = st->tcs_tess_lvl_in_loc % 16u)
: NULL;
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
nir_ssa_def *tess_factors_base = nir_load_ring_tess_factors_offset_amd(b);
@@ -758,9 +799,18 @@ ac_nir_lower_hs_outputs_to_mem(nir_shader *shader,
.tcs_out_patch_fits_subgroup = wave_size % shader->info.tess.tcs_vertices_out == 0,
.tcs_pass_tessfactors_by_reg = pass_tessfactors_by_reg,
.tcs_no_inputs_in_lds = no_inputs_in_lds,
.tcs_emit_tess_factor_write = emit_tess_factor_write,
.map_io = map,
};
if (pass_tessfactors_by_reg) {
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
state.tcs_tess_level_outer =
nir_local_variable_create(impl, glsl_vec4_type(), "tess outer");
state.tcs_tess_level_inner =
nir_local_variable_create(impl, glsl_vec4_type(), "tess inner");
}
nir_shader_lower_instructions(shader,
filter_hs_output_access,
lower_hs_output_access,