radeonsi: merge 2 conditional blocks with same condition into 1 in culling code

The block only loads input VGPRs from LDS, and the next block uses them.
The entering condition is the same, even though the second block is
the next shader part beginning with the prolog.

Simply move the VGPR loads into the prolog.

This decreases the shader code size by 12 bytes.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11102>
This commit is contained in:
Marek Olšák
2021-05-11 11:47:10 -04:00
committed by Marge Bot
parent 786678a017
commit 1e9cc86511
5 changed files with 55 additions and 43 deletions

View File

@@ -1090,7 +1090,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
if (shader->is_monolithic && ctx.stage == MESA_SHADER_VERTEX) {
LLVMValueRef parts[4];
unsigned num_parts = 0;
bool has_prolog = false;
bool first_is_prolog = false;
LLVMValueRef main_fn = ctx.main_fn;
if (ngg_cull_main_fn) {
@@ -1101,7 +1101,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
prolog_key.vs_prolog.is_monolithic = true;
si_llvm_build_vs_prolog(&ctx, &prolog_key);
parts[num_parts++] = ctx.main_fn;
has_prolog = true;
first_is_prolog = true;
}
parts[num_parts++] = ngg_cull_main_fn;
}
@@ -1113,21 +1113,34 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
prolog_key.vs_prolog.is_monolithic = true;
si_llvm_build_vs_prolog(&ctx, &prolog_key);
parts[num_parts++] = ctx.main_fn;
has_prolog = true;
if (num_parts == 1)
first_is_prolog = true;
}
parts[num_parts++] = main_fn;
si_build_wrapper_function(&ctx, parts, num_parts, has_prolog ? 1 : 0, 0, false);
si_build_wrapper_function(&ctx, parts, num_parts, first_is_prolog ? 1 : 0, 0, false);
if (ctx.shader->key.opt.vs_as_prim_discard_cs)
si_build_prim_discard_compute_shader(&ctx);
} else if (shader->is_monolithic && ctx.stage == MESA_SHADER_TESS_EVAL && ngg_cull_main_fn) {
LLVMValueRef parts[2];
LLVMValueRef parts[3], prolog, main_fn = ctx.main_fn;
/* We reuse the VS prolog code for TES just to load the input VGPRs from LDS. */
union si_shader_part_key prolog_key;
memset(&prolog_key, 0, sizeof(prolog_key));
prolog_key.vs_prolog.num_input_sgprs = shader->info.num_input_sgprs;
prolog_key.vs_prolog.num_merged_next_stage_vgprs = 5;
prolog_key.vs_prolog.as_ngg = 1;
prolog_key.vs_prolog.load_vgprs_after_culling = 1;
prolog_key.vs_prolog.is_monolithic = true;
si_llvm_build_vs_prolog(&ctx, &prolog_key);
prolog = ctx.main_fn;
parts[0] = ngg_cull_main_fn;
parts[1] = ctx.main_fn;
parts[1] = prolog;
parts[2] = main_fn;
si_build_wrapper_function(&ctx, parts, 2, 0, 0, false);
si_build_wrapper_function(&ctx, parts, 3, 0, 0, false);
} else if (shader->is_monolithic && ctx.stage == MESA_SHADER_TESS_CTRL) {
if (sscreen->info.chip_class >= GFX9) {
struct si_shader_selector *ls = shader->key.part.tcs.ls;