radeonsi: merge 2 conditional blocks with same condition into 1 in culling code
The block only loads input VGPRs from LDS, and the next block uses them. The entering condition is the same, even though the second block is the next shader part beginning with the prolog. Simply move the VGPR loads into the prolog. This decreases the shader code size by 12 bytes. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11102>
This commit is contained in:
@@ -1090,7 +1090,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
||||
if (shader->is_monolithic && ctx.stage == MESA_SHADER_VERTEX) {
|
||||
LLVMValueRef parts[4];
|
||||
unsigned num_parts = 0;
|
||||
bool has_prolog = false;
|
||||
bool first_is_prolog = false;
|
||||
LLVMValueRef main_fn = ctx.main_fn;
|
||||
|
||||
if (ngg_cull_main_fn) {
|
||||
@@ -1101,7 +1101,7 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
||||
prolog_key.vs_prolog.is_monolithic = true;
|
||||
si_llvm_build_vs_prolog(&ctx, &prolog_key);
|
||||
parts[num_parts++] = ctx.main_fn;
|
||||
has_prolog = true;
|
||||
first_is_prolog = true;
|
||||
}
|
||||
parts[num_parts++] = ngg_cull_main_fn;
|
||||
}
|
||||
@@ -1113,21 +1113,34 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
||||
prolog_key.vs_prolog.is_monolithic = true;
|
||||
si_llvm_build_vs_prolog(&ctx, &prolog_key);
|
||||
parts[num_parts++] = ctx.main_fn;
|
||||
has_prolog = true;
|
||||
if (num_parts == 1)
|
||||
first_is_prolog = true;
|
||||
}
|
||||
parts[num_parts++] = main_fn;
|
||||
|
||||
si_build_wrapper_function(&ctx, parts, num_parts, has_prolog ? 1 : 0, 0, false);
|
||||
si_build_wrapper_function(&ctx, parts, num_parts, first_is_prolog ? 1 : 0, 0, false);
|
||||
|
||||
if (ctx.shader->key.opt.vs_as_prim_discard_cs)
|
||||
si_build_prim_discard_compute_shader(&ctx);
|
||||
} else if (shader->is_monolithic && ctx.stage == MESA_SHADER_TESS_EVAL && ngg_cull_main_fn) {
|
||||
LLVMValueRef parts[2];
|
||||
LLVMValueRef parts[3], prolog, main_fn = ctx.main_fn;
|
||||
|
||||
/* We reuse the VS prolog code for TES just to load the input VGPRs from LDS. */
|
||||
union si_shader_part_key prolog_key;
|
||||
memset(&prolog_key, 0, sizeof(prolog_key));
|
||||
prolog_key.vs_prolog.num_input_sgprs = shader->info.num_input_sgprs;
|
||||
prolog_key.vs_prolog.num_merged_next_stage_vgprs = 5;
|
||||
prolog_key.vs_prolog.as_ngg = 1;
|
||||
prolog_key.vs_prolog.load_vgprs_after_culling = 1;
|
||||
prolog_key.vs_prolog.is_monolithic = true;
|
||||
si_llvm_build_vs_prolog(&ctx, &prolog_key);
|
||||
prolog = ctx.main_fn;
|
||||
|
||||
parts[0] = ngg_cull_main_fn;
|
||||
parts[1] = ctx.main_fn;
|
||||
parts[1] = prolog;
|
||||
parts[2] = main_fn;
|
||||
|
||||
si_build_wrapper_function(&ctx, parts, 2, 0, 0, false);
|
||||
si_build_wrapper_function(&ctx, parts, 3, 0, 0, false);
|
||||
} else if (shader->is_monolithic && ctx.stage == MESA_SHADER_TESS_CTRL) {
|
||||
if (sscreen->info.chip_class >= GFX9) {
|
||||
struct si_shader_selector *ls = shader->key.part.tcs.ls;
|
||||
|
Reference in New Issue
Block a user