ac/nir: tcs write tess factor support pass by reg
For radeonsi usage. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21437>
This commit is contained in:
@@ -164,6 +164,13 @@ typedef struct {
|
|||||||
* In that case, no LDS is allocated for TCS inputs.
|
* In that case, no LDS is allocated for TCS inputs.
|
||||||
*/
|
*/
|
||||||
bool tcs_no_inputs_in_lds;
|
bool tcs_no_inputs_in_lds;
|
||||||
|
|
||||||
|
/* Whether to emit TCS tess factor write. */
|
||||||
|
bool tcs_emit_tess_factor_write;
|
||||||
|
|
||||||
|
/* Save TCS tess factor for tess factor writer. */
|
||||||
|
nir_variable *tcs_tess_level_outer;
|
||||||
|
nir_variable *tcs_tess_level_inner;
|
||||||
} lower_tess_io_state;
|
} lower_tess_io_state;
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
@@ -428,6 +435,7 @@ lower_hs_output_store(nir_builder *b,
|
|||||||
|
|
||||||
nir_io_semantics semantics = nir_intrinsic_io_semantics(intrin);
|
nir_io_semantics semantics = nir_intrinsic_io_semantics(intrin);
|
||||||
nir_ssa_def *store_val = intrin->src[0].ssa;
|
nir_ssa_def *store_val = intrin->src[0].ssa;
|
||||||
|
unsigned component = nir_intrinsic_component(intrin);
|
||||||
unsigned write_mask = nir_intrinsic_write_mask(intrin);
|
unsigned write_mask = nir_intrinsic_write_mask(intrin);
|
||||||
bool is_tess_factor = semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
|
bool is_tess_factor = semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
|
||||||
semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER;
|
semantics.location == VARYING_SLOT_TESS_LEVEL_OUTER;
|
||||||
@@ -457,15 +465,27 @@ lower_hs_output_store(nir_builder *b,
|
|||||||
|
|
||||||
nir_ssa_def *lds_off = hs_output_lds_offset(b, st, intrin);
|
nir_ssa_def *lds_off = hs_output_lds_offset(b, st, intrin);
|
||||||
nir_store_shared(b, store_val, lds_off, .write_mask = write_mask,
|
nir_store_shared(b, store_val, lds_off, .write_mask = write_mask,
|
||||||
.align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u);
|
.align_mul = 16u, .align_offset = (component * 4u) % 16u);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Keep tess factor nir_store_output instruction if it's going to be passed
|
nir_ssa_def *ret = NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||||
* by reg instead of LDS, because it's used by radeonsi llvm backend to generate
|
|
||||||
* llvm variable which is read by the final llvm tess factor write epilog.
|
if (is_tess_factor && st->tcs_pass_tessfactors_by_reg) {
|
||||||
*/
|
if (st->tcs_emit_tess_factor_write) {
|
||||||
return is_tess_factor && st->tcs_pass_tessfactors_by_reg ?
|
nir_variable *var = semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ?
|
||||||
NIR_LOWER_INSTR_PROGRESS : NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
st->tcs_tess_level_inner : st->tcs_tess_level_outer;
|
||||||
|
|
||||||
|
/* Save to temp variable for read by tess factor writer. */
|
||||||
|
ac_nir_store_var_components(b, var, store_val, component, write_mask);
|
||||||
|
} else {
|
||||||
|
/* Keep tess factor nir_store_output instruction if it's going to be passed
|
||||||
|
* by reg instead of LDS and we use a compiler backend TCS epilog.
|
||||||
|
*/
|
||||||
|
ret = NIR_LOWER_INSTR_PROGRESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static nir_ssa_def *
|
static nir_ssa_def *
|
||||||
@@ -559,10 +579,14 @@ hs_emit_write_tess_factors(nir_shader *shader,
|
|||||||
nir_builder_init(b, impl);
|
nir_builder_init(b, impl);
|
||||||
b->cursor = nir_after_block(last_block);
|
b->cursor = nir_after_block(last_block);
|
||||||
|
|
||||||
nir_scope scope =
|
/* If tess factors are load from LDS, wait previous LDS stores done. */
|
||||||
st->tcs_out_patch_fits_subgroup ? NIR_SCOPE_SUBGROUP : NIR_SCOPE_WORKGROUP;
|
if (!st->tcs_pass_tessfactors_by_reg) {
|
||||||
nir_scoped_barrier(b, .execution_scope = scope, .memory_scope = scope,
|
nir_scope scope = st->tcs_out_patch_fits_subgroup ?
|
||||||
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_shared);
|
NIR_SCOPE_SUBGROUP : NIR_SCOPE_WORKGROUP;
|
||||||
|
|
||||||
|
nir_scoped_barrier(b, .execution_scope = scope, .memory_scope = scope,
|
||||||
|
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_shared);
|
||||||
|
}
|
||||||
|
|
||||||
nir_ssa_def *invocation_id = nir_load_invocation_id(b);
|
nir_ssa_def *invocation_id = nir_load_invocation_id(b);
|
||||||
|
|
||||||
@@ -576,20 +600,37 @@ hs_emit_write_tess_factors(nir_shader *shader,
|
|||||||
if (shader->info.tess.tcs_vertices_out <= 32)
|
if (shader->info.tess.tcs_vertices_out <= 32)
|
||||||
invocation_id_zero->control = nir_selection_control_divergent_always_taken;
|
invocation_id_zero->control = nir_selection_control_divergent_always_taken;
|
||||||
|
|
||||||
|
nir_ssa_def *tessfactors_outer = NULL;
|
||||||
|
nir_ssa_def *tessfactors_inner = NULL;
|
||||||
|
if (st->tcs_pass_tessfactors_by_reg) {
|
||||||
|
tessfactors_outer = nir_load_var(b, st->tcs_tess_level_outer);
|
||||||
|
tessfactors_outer = nir_trim_vector(b, tessfactors_outer, outer_comps);
|
||||||
|
|
||||||
|
if (inner_comps) {
|
||||||
|
tessfactors_inner = nir_load_var(b, st->tcs_tess_level_inner);
|
||||||
|
tessfactors_inner = nir_trim_vector(b, tessfactors_inner, inner_comps);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* Base LDS address of per-patch outputs in the current patch. */
|
||||||
|
nir_ssa_def *lds_base = hs_output_lds_offset(b, st, NULL);
|
||||||
|
|
||||||
|
/* Load all tessellation factors (aka. tess levels) from LDS. */
|
||||||
|
tessfactors_outer = nir_load_shared(b, outer_comps, 32, lds_base,
|
||||||
|
.base = st->tcs_tess_lvl_out_loc,
|
||||||
|
.align_mul = 16u,
|
||||||
|
.align_offset = st->tcs_tess_lvl_out_loc % 16u);
|
||||||
|
|
||||||
|
if (inner_comps) {
|
||||||
|
tessfactors_inner = nir_load_shared(b, inner_comps, 32, lds_base,
|
||||||
|
.base = st->tcs_tess_lvl_in_loc,
|
||||||
|
.align_mul = 16u,
|
||||||
|
.align_offset = st->tcs_tess_lvl_in_loc % 16u);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* The descriptor where tess factors have to be stored by the shader. */
|
/* The descriptor where tess factors have to be stored by the shader. */
|
||||||
nir_ssa_def *tessfactor_ring = nir_load_ring_tess_factors_amd(b);
|
nir_ssa_def *tessfactor_ring = nir_load_ring_tess_factors_amd(b);
|
||||||
|
|
||||||
/* Base LDS address of per-patch outputs in the current patch. */
|
|
||||||
nir_ssa_def *lds_base = hs_output_lds_offset(b, st, NULL);
|
|
||||||
|
|
||||||
/* Load all tessellation factors (aka. tess levels) from LDS. */
|
|
||||||
nir_ssa_def *tessfactors_outer = nir_load_shared(b, outer_comps, 32, lds_base, .base = st->tcs_tess_lvl_out_loc,
|
|
||||||
.align_mul = 16u, .align_offset = st->tcs_tess_lvl_out_loc % 16u);
|
|
||||||
nir_ssa_def *tessfactors_inner = inner_comps
|
|
||||||
? nir_load_shared(b, inner_comps, 32, lds_base, .base = st->tcs_tess_lvl_in_loc,
|
|
||||||
.align_mul = 16u, .align_offset = st->tcs_tess_lvl_in_loc % 16u)
|
|
||||||
: NULL;
|
|
||||||
|
|
||||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||||
nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
|
nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
|
||||||
nir_ssa_def *tess_factors_base = nir_load_ring_tess_factors_offset_amd(b);
|
nir_ssa_def *tess_factors_base = nir_load_ring_tess_factors_offset_amd(b);
|
||||||
@@ -758,9 +799,18 @@ ac_nir_lower_hs_outputs_to_mem(nir_shader *shader,
|
|||||||
.tcs_out_patch_fits_subgroup = wave_size % shader->info.tess.tcs_vertices_out == 0,
|
.tcs_out_patch_fits_subgroup = wave_size % shader->info.tess.tcs_vertices_out == 0,
|
||||||
.tcs_pass_tessfactors_by_reg = pass_tessfactors_by_reg,
|
.tcs_pass_tessfactors_by_reg = pass_tessfactors_by_reg,
|
||||||
.tcs_no_inputs_in_lds = no_inputs_in_lds,
|
.tcs_no_inputs_in_lds = no_inputs_in_lds,
|
||||||
|
.tcs_emit_tess_factor_write = emit_tess_factor_write,
|
||||||
.map_io = map,
|
.map_io = map,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (pass_tessfactors_by_reg) {
|
||||||
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
||||||
|
state.tcs_tess_level_outer =
|
||||||
|
nir_local_variable_create(impl, glsl_vec4_type(), "tess outer");
|
||||||
|
state.tcs_tess_level_inner =
|
||||||
|
nir_local_variable_create(impl, glsl_vec4_type(), "tess inner");
|
||||||
|
}
|
||||||
|
|
||||||
nir_shader_lower_instructions(shader,
|
nir_shader_lower_instructions(shader,
|
||||||
filter_hs_output_access,
|
filter_hs_output_access,
|
||||||
lower_hs_output_access,
|
lower_hs_output_access,
|
||||||
|
Reference in New Issue
Block a user