radeonsi: monolithic TCS emit tessfactor in nir directly

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21437>
This commit is contained in:
Qiang Yu
2023-02-14 12:01:46 +08:00
committed by Marge Bot
parent 3f5d42a28a
commit 51e725df29
3 changed files with 39 additions and 43 deletions

View File

@@ -456,13 +456,18 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
/* param_tcs_offchip_offset and param_tcs_factor_offset are
* placed after the user SGPRs.
/* For monolithic shaders, the TCS epilog code is generated by
* ac_nir_lower_hs_outputs_to_mem.
*/
for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
ac_add_return(&args->ac, AC_ARG_SGPR);
for (i = 0; i < 11; i++)
ac_add_return(&args->ac, AC_ARG_VGPR);
if (!shader->is_monolithic) {
/* param_tcs_offchip_offset and param_tcs_factor_offset are
* placed after the user SGPRs.
*/
for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
ac_add_return(&args->ac, AC_ARG_SGPR);
for (i = 0; i < 11; i++)
ac_add_return(&args->ac, AC_ARG_VGPR);
}
break;
case SI_SHADER_MERGED_VERTEX_TESSCTRL:
@@ -520,16 +525,21 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
}
/* TCS return values are inputs to the TCS epilog.
*
* param_tcs_offchip_offset, param_tcs_factor_offset,
* param_tcs_offchip_layout, and internal_bindings
* should be passed to the epilog.
/* For monolithic shaders, the TCS epilog code is generated by
* ac_nir_lower_hs_outputs_to_mem.
*/
for (i = 0; i <= 8 + GFX9_SGPR_TCS_OUT_LAYOUT; i++)
ac_add_return(&args->ac, AC_ARG_SGPR);
for (i = 0; i < 11; i++)
ac_add_return(&args->ac, AC_ARG_VGPR);
if (!shader->is_monolithic) {
/* TCS return values are inputs to the TCS epilog.
*
* param_tcs_offchip_offset, param_tcs_factor_offset,
* param_tcs_offchip_layout, and internal_bindings
* should be passed to the epilog.
*/
for (i = 0; i <= 8 + GFX9_SGPR_TCS_OUT_LAYOUT; i++)
ac_add_return(&args->ac, AC_ARG_SGPR);
for (i = 0; i < 11; i++)
ac_add_return(&args->ac, AC_ARG_VGPR);
}
}
break;
@@ -1587,9 +1597,14 @@ static bool si_lower_io_to_mem(struct si_shader *shader, nir_shader *nir,
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, si_map_io_driver_location,
key->ge.opt.same_patch_vertices);
/* Used by hs_emit_write_tess_factors() when monolithic shader. */
nir->info.tess._primitive_mode = key->ge.part.tcs.epilog.prim_mode;
NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, si_map_io_driver_location,
sel->screen->info.gfx_level,
false, /* does not matter as we disabled final tess factor write */
/* Used by hs_emit_write_tess_factors() when monolithic shader. */
key->ge.part.tcs.epilog.tes_reads_tess_factors,
~0ULL, ~0ULL, /* no TES inputs filter */
util_last_bit64(sel->info.outputs_written),
util_last_bit64(sel->info.patch_outputs_written),
@@ -1597,7 +1612,9 @@ static bool si_lower_io_to_mem(struct si_shader *shader, nir_shader *nir,
/* ALL TCS inputs are passed by register. */
key->ge.opt.same_patch_vertices &&
!(sel->info.base.inputs_read & ~sel->info.tcs_vgpr_only_inputs),
sel->info.tessfactors_are_def_in_all_invocs, false);
sel->info.tessfactors_are_def_in_all_invocs,
/* Emit epilog only when monolithic shader. */
shader->is_monolithic);
return true;
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
NIR_PASS_V(nir, ac_nir_lower_tes_inputs_to_mem, si_map_io_driver_location);

View File

@@ -1035,7 +1035,8 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
break;
case MESA_SHADER_TESS_CTRL:
si_llvm_tcs_build_end(ctx);
if (!shader->is_monolithic)
si_llvm_tcs_build_end(ctx);
break;
case MESA_SHADER_TESS_EVAL:
@@ -1122,19 +1123,13 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
if (sscreen->info.gfx_level >= GFX9) {
struct si_shader_selector *ls = shader->key.ge.part.tcs.ls;
struct ac_llvm_pointer parts[4];
struct ac_llvm_pointer parts[3];
bool vs_needs_prolog =
si_vs_needs_prolog(ls, &shader->key.ge.part.tcs.ls_prolog);
/* TCS main part */
parts[2] = ctx.main_fn;
/* TCS epilog */
union si_shader_part_key tcs_epilog_key;
si_get_tcs_epilog_key(shader, &tcs_epilog_key);
si_llvm_build_tcs_epilog(&ctx, &tcs_epilog_key, false);
parts[3] = ctx.main_fn;
struct si_shader shader_ls = {};
shader_ls.selector = ls;
shader_ls.key.ge.part.vs.prolog = shader->key.ge.part.tcs.ls_prolog;
@@ -1176,26 +1171,10 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
ctx.shader = shader;
ctx.stage = MESA_SHADER_TESS_CTRL;
si_build_wrapper_function(&ctx, parts + !vs_needs_prolog, 4 - !vs_needs_prolog,
si_build_wrapper_function(&ctx, parts + !vs_needs_prolog, 3 - !vs_needs_prolog,
vs_needs_prolog, vs_needs_prolog ? 2 : 1,
main_arg_types,
shader->key.ge.opt.same_patch_vertices);
} else {
struct ac_llvm_pointer parts[2];
union si_shader_part_key epilog_key;
parts[0] = ctx.main_fn;
for (int i = 0; i < ctx.args->ac.arg_count; i++)
main_arg_types[i] = ctx.args->ac.args[i].type;
main_arg_types[MIN2(AC_MAX_ARGS - 1, ctx.args->ac.arg_count)] = AC_ARG_INVALID;
memset(&epilog_key, 0, sizeof(epilog_key));
epilog_key.tcs_epilog.states = shader->key.ge.part.tcs.epilog;
si_llvm_build_tcs_epilog(&ctx, &epilog_key, false);
parts[1] = ctx.main_fn;
si_build_wrapper_function(&ctx, parts, 2, 0, 0, main_arg_types, false);
}
} else if (shader->is_monolithic && sel->stage == MESA_SHADER_GEOMETRY) {
if (ctx.screen->info.gfx_level >= GFX9) {

View File

@@ -430,7 +430,7 @@ void si_llvm_tcs_build_end(struct si_shader_context *ctx)
invocation_id = si_unpack_param(ctx, ctx->args->ac.tcs_rel_ids, 8, 5);
tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
if (ctx->screen->info.gfx_level >= GFX9 && !ctx->shader->is_monolithic) {
if (ctx->screen->info.gfx_level >= GFX9) {
LLVMBasicBlockRef blocks[2] = {LLVMGetInsertBlock(builder), ctx->merged_wrap_if_entry_block};
LLVMValueRef values[2];