From 3a9f8730f52191f6493f4b7d8748a6bfd6d30993 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 6 Jun 2023 10:59:46 -0400 Subject: [PATCH] amd: drop support for LLVM 12 The demote emulation can be removed, and FS_CORRECT_DERIVS_AFTER_KILL can be removed because it's always enabled on LLVM >= 13. Reviewed-by: Qiang Yu Reviewed-by: Samuel Pitoiset Part-of: --- meson.build | 4 +- src/amd/llvm/ac_llvm_build.c | 41 +----- src/amd/llvm/ac_llvm_build.h | 9 -- src/amd/llvm/ac_llvm_helper.cpp | 4 - src/amd/llvm/ac_llvm_util.c | 4 +- src/amd/llvm/ac_nir_to_llvm.c | 118 +----------------- src/gallium/drivers/radeonsi/si_get.c | 4 +- src/gallium/drivers/radeonsi/si_pipe.c | 10 -- src/gallium/drivers/radeonsi/si_pipe.h | 1 - src/gallium/drivers/radeonsi/si_shader_nir.c | 4 +- .../drivers/radeonsi/si_state_shaders.cpp | 9 +- 11 files changed, 15 insertions(+), 193 deletions(-) diff --git a/meson.build b/meson.build index eb69551e746..d86b71e08ea 100644 --- a/meson.build +++ b/meson.build @@ -1641,10 +1641,8 @@ if draw_with_llvm llvm_optional_modules += ['lto'] endif -if with_intel_clc +if with_intel_clc or with_amd_vk or with_gallium_radeonsi _llvm_version = '>= 13.0.0' -elif with_amd_vk or with_gallium_radeonsi - _llvm_version = '>= 12.0.0' elif with_gallium_opencl _llvm_version = '>= 11.0.0' elif with_clc diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index b1bf94aee9b..32b930a33c5 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -2553,22 +2553,6 @@ void ac_build_else(struct ac_llvm_context *ctx, int label_id) current_branch->next_block = endif_block; } -/* Invoked after a branch is exited. */ -static void ac_branch_exited(struct ac_llvm_context *ctx) -{ - if (ctx->flow->depth == 0 && ctx->conditional_demote_seen) { - /* The previous conditional branch contained demote. Kill threads - * after all conditional blocks because amdgcn.wqm.vote doesn't - * return usable values inside the blocks. - * - * This is an optional optimization that only kills whole inactive quads. - */ - LLVMValueRef cond = LLVMBuildLoad2(ctx->builder, ctx->i1, ctx->postponed_kill, ""); - ac_build_kill_if_false(ctx, ac_build_wqm_vote(ctx, cond)); - ctx->conditional_demote_seen = false; - } -} - void ac_build_endif(struct ac_llvm_context *ctx, int label_id) { struct ac_llvm_flow *current_branch = get_current_flow(ctx); @@ -2580,7 +2564,6 @@ void ac_build_endif(struct ac_llvm_context *ctx, int label_id) set_basicblock_name(current_branch->next_block, "endif", label_id); ctx->flow->depth--; - ac_branch_exited(ctx); } void ac_build_endloop(struct ac_llvm_context *ctx, int label_id) @@ -2594,7 +2577,6 @@ void ac_build_endloop(struct ac_llvm_context *ctx, int label_id) LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block); set_basicblock_name(current_loop->next_block, "endloop", label_id); ctx->flow->depth--; - ac_branch_exited(ctx); } void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id) @@ -3653,32 +3635,11 @@ LLVMValueRef ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef inte LLVMValueRef ac_build_load_helper_invocation(struct ac_llvm_context *ctx) { - LLVMValueRef result; + LLVMValueRef result = ac_build_intrinsic(ctx, "llvm.amdgcn.live.mask", ctx->i1, NULL, 0, 0); - if (LLVM_VERSION_MAJOR >= 13) { - result = ac_build_intrinsic(ctx, "llvm.amdgcn.live.mask", ctx->i1, NULL, 0, 0); - } else { - result = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", ctx->i1, NULL, 0, 0); - } return LLVMBuildNot(ctx->builder, result, ""); } -LLVMValueRef ac_build_is_helper_invocation(struct ac_llvm_context *ctx) -{ - if (!ctx->postponed_kill) - return ac_build_load_helper_invocation(ctx); - - /* postponed_kill should be NULL on LLVM 13+ */ - assert(LLVM_VERSION_MAJOR < 13); - - /* !(exact && postponed) */ - LLVMValueRef exact = - ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", ctx->i1, NULL, 0, 0); - - LLVMValueRef postponed = LLVMBuildLoad2(ctx->builder, ctx->i1, ctx->postponed_kill, ""); - return LLVMBuildNot(ctx->builder, LLVMBuildAnd(ctx->builder, exact, postponed, ""), ""); -} - LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMTypeRef fn_type, LLVMValueRef func, LLVMValueRef *args, unsigned num_args) { diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index 368925918c5..a36a0871705 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -112,13 +112,6 @@ struct ac_llvm_context { LLVMValueRef i1true; LLVMValueRef i1false; - /* Temporary helper to implement demote_to_helper: - * True = live lanes - * False = demoted lanes - */ - LLVMValueRef postponed_kill; - bool conditional_demote_seen; - /* Since ac_nir_translate makes a local copy of ac_llvm_context, there * are two ac_llvm_contexts. Declare a pointer here, so that the control * flow stack is shared by both ac_llvm_contexts. @@ -496,8 +489,6 @@ LLVMValueRef ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef inte LLVMValueRef ac_build_load_helper_invocation(struct ac_llvm_context *ctx); -LLVMValueRef ac_build_is_helper_invocation(struct ac_llvm_context *ctx); - LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMTypeRef fn_type, LLVMValueRef func, LLVMValueRef *args, unsigned num_args); diff --git a/src/amd/llvm/ac_llvm_helper.cpp b/src/amd/llvm/ac_llvm_helper.cpp index e8bd5609247..a7cbe4542b2 100644 --- a/src/amd/llvm/ac_llvm_helper.cpp +++ b/src/amd/llvm/ac_llvm_helper.cpp @@ -367,9 +367,7 @@ LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope); return wrap(unwrap(ctx->builder) ->CreateAtomicRMW(binop, unwrap(ptr), unwrap(val), -#if LLVM_VERSION_MAJOR >= 13 MaybeAlign(0), -#endif AtomicOrdering::SequentiallyConsistent, SSID)); } @@ -380,9 +378,7 @@ LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef return wrap(unwrap(ctx->builder) ->CreateAtomicCmpXchg(unwrap(ptr), unwrap(cmp), unwrap(val), -#if LLVM_VERSION_MAJOR >= 13 MaybeAlign(0), -#endif AtomicOrdering::SequentiallyConsistent, AtomicOrdering::SequentiallyConsistent, SSID)); } diff --git a/src/amd/llvm/ac_llvm_util.c b/src/amd/llvm/ac_llvm_util.c index 63b0880b0ce..fce275583c8 100644 --- a/src/amd/llvm/ac_llvm_util.c +++ b/src/amd/llvm/ac_llvm_util.c @@ -150,9 +150,9 @@ const char *ac_get_llvm_processor_name(enum radeon_family family) case CHIP_VANGOGH: return "gfx1033"; case CHIP_NAVI24: - return LLVM_VERSION_MAJOR >= 13 ? "gfx1034" : "gfx1030"; + return "gfx1034"; case CHIP_REMBRANDT: - return LLVM_VERSION_MAJOR >= 13 ? "gfx1035" : "gfx1030"; + return "gfx1035"; case CHIP_RAPHAEL_MENDOCINO: return LLVM_VERSION_MAJOR >= 15 ? "gfx1036" : "gfx1030"; case CHIP_GFX1100: diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 163edc3fbce..43d911fb6c1 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -1698,11 +1698,6 @@ static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx, struct wate static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { - if (ctx->ac.postponed_kill) { - LLVMValueRef cond = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i1, ctx->ac.postponed_kill, ""); - ac_build_ifcc(&ctx->ac, cond, 7000); - } - LLVMValueRef src_data = get_src(ctx, instr->src[0]); int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8; unsigned writemask = nir_intrinsic_write_mask(instr); @@ -1789,9 +1784,6 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in } exit_waterfall(ctx, &wctx, NULL); - - if (ctx->ac.postponed_kill) - ac_build_endif(&ctx->ac, 7000); } static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx, LLVMValueRef descriptor, @@ -1896,11 +1888,6 @@ translate_atomic_op(nir_atomic_op op) static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { - if (ctx->ac.postponed_kill) { - LLVMValueRef cond = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i1, ctx->ac.postponed_kill, ""); - ac_build_ifcc(&ctx->ac, cond, 7001); - } - nir_atomic_op nir_op = nir_intrinsic_atomic_op(instr); const char *op = translate_atomic_op_str(nir_op); bool is_float = nir_atomic_op_type(nir_op) == nir_type_float; @@ -1951,10 +1938,7 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_ } } - result = exit_waterfall(ctx, &wctx, result); - if (ctx->ac.postponed_kill) - ac_build_endif(&ctx->ac, 7001); - return result; + return exit_waterfall(ctx, &wctx, result); } static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) @@ -2073,11 +2057,6 @@ static LLVMValueRef visit_load_global(struct ac_nir_context *ctx, static void visit_store_global(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { - if (ctx->ac.postponed_kill) { - LLVMValueRef cond = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i1, ctx->ac.postponed_kill, ""); - ac_build_ifcc(&ctx->ac, cond, 7002); - } - LLVMValueRef data = get_src(ctx, instr->src[0]); LLVMTypeRef type = LLVMTypeOf(data); LLVMValueRef addr = get_global_address(ctx, instr, type); @@ -2089,19 +2068,11 @@ static void visit_store_global(struct ac_nir_context *ctx, LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic); LLVMSetAlignment(val, ac_get_type_size(type)); } - - if (ctx->ac.postponed_kill) - ac_build_endif(&ctx->ac, 7002); } static LLVMValueRef visit_global_atomic(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { - if (ctx->ac.postponed_kill) { - LLVMValueRef cond = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i1, ctx->ac.postponed_kill, ""); - ac_build_ifcc(&ctx->ac, cond, 7002); - } - LLVMValueRef data = get_src(ctx, instr->src[1]); LLVMAtomicRMWBinOp op; LLVMValueRef result; @@ -2140,9 +2111,6 @@ static LLVMValueRef visit_global_atomic(struct ac_nir_context *ctx, result = ac_build_atomic_rmw(&ctx->ac, op, addr, ac_to_integer(&ctx->ac, data), sync_scope); } - if (ctx->ac.postponed_kill) - ac_build_endif(&ctx->ac, 7002); - return result; } @@ -2174,11 +2142,6 @@ static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, nir_intrin static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { - if (ctx->ac.postponed_kill) { - LLVMValueRef cond = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i1, ctx->ac.postponed_kill, ""); - ac_build_ifcc(&ctx->ac, cond, 7002); - } - unsigned base = nir_intrinsic_base(instr); unsigned writemask = nir_intrinsic_write_mask(instr); unsigned component = nir_intrinsic_component(instr); @@ -2232,9 +2195,6 @@ static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr * } LLVMBuildStore(ctx->ac.builder, value, output_addr); } - - if (ctx->ac.postponed_kill) - ac_build_endif(&ctx->ac, 7002); } static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array) @@ -2450,11 +2410,6 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intri static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { - if (ctx->ac.postponed_kill) { - LLVMValueRef cond = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i1, ctx->ac.postponed_kill, ""); - ac_build_ifcc(&ctx->ac, cond, 7003); - } - enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr); bool is_array = nir_intrinsic_image_array(instr); @@ -2502,17 +2457,10 @@ static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_in } exit_waterfall(ctx, &wctx, NULL); - if (ctx->ac.postponed_kill) - ac_build_endif(&ctx->ac, 7003); } static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) { - if (ctx->ac.postponed_kill) { - LLVMValueRef cond = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i1, ctx->ac.postponed_kill, ""); - ac_build_ifcc(&ctx->ac, cond, 7004); - } - LLVMValueRef params[7]; int param_count = 0; @@ -2627,10 +2575,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_int result = ac_build_image_opcode(&ctx->ac, &args); } - result = exit_waterfall(ctx, &wctx, result); - if (ctx->ac.postponed_kill) - ac_build_endif(&ctx->ac, 7004); - return result; + return exit_waterfall(ctx, &wctx, result); } static void emit_discard(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr) @@ -2660,43 +2605,8 @@ static void emit_demote(struct ac_nir_context *ctx, const nir_intrinsic_instr *i cond = ctx->ac.i1false; } - if (LLVM_VERSION_MAJOR >= 13) { - /* This demotes the pixel if the condition is false. */ - ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.wqm.demote", ctx->ac.voidt, &cond, 1, 0); - return; - } - - LLVMValueRef mask = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i1, ctx->ac.postponed_kill, ""); - mask = LLVMBuildAnd(ctx->ac.builder, mask, cond, ""); - LLVMBuildStore(ctx->ac.builder, mask, ctx->ac.postponed_kill); - - if (!ctx->info->fs.needs_all_helper_invocations) { - /* This is an optional optimization that only kills whole inactive quads. - * It's not used when subgroup operations can possibly use all helper - * invocations. - */ - if (ctx->ac.flow->depth == 0) { - ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond)); - } else { - /* amdgcn.wqm.vote doesn't work inside conditional blocks. Here's why. - * - * The problem is that kill(wqm.vote(0)) kills all active threads within - * the block, which breaks the whole quad mode outside the block if - * the conditional block has partially active quads (2x2 pixel blocks). - * E.g. threads 0-3 are active outside the block, but only thread 0 is - * active inside the block. Thread 0 shouldn't be killed by demote, - * because threads 1-3 are still active outside the block. - * - * The fix for amdgcn.wqm.vote would be to return S_WQM((live & ~exec) | cond) - * instead of S_WQM(cond). - * - * The less efficient workaround we do here is to save the kill condition - * to a temporary (postponed_kill) and do kill(wqm.vote(cond)) after we - * exit the conditional block. - */ - ctx->ac.conditional_demote_seen = true; - } - } + /* This demotes the pixel if the condition is false. */ + ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.wqm.demote", ctx->ac.voidt, &cond, 1, 0); } static LLVMValueRef visit_load_subgroup_id(struct ac_nir_context *ctx) @@ -2837,11 +2747,6 @@ static void visit_store_shared2_amd(struct ac_nir_context *ctx, const nir_intrin static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr, LLVMValueRef ptr, int src_idx) { - if (ctx->ac.postponed_kill) { - LLVMValueRef cond = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.i1, ctx->ac.postponed_kill, ""); - ac_build_ifcc(&ctx->ac, cond, 7005); - } - LLVMValueRef result; LLVMValueRef src = get_src(ctx, instr->src[src_idx]); nir_atomic_op nir_op = nir_intrinsic_atomic_op(instr); @@ -2890,8 +2795,6 @@ static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx, const nir_intri } } - if (ctx->ac.postponed_kill) - ac_build_endif(&ctx->ac, 7005); return result; } @@ -3276,10 +3179,8 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins result = emit_i2b(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->front_face)); break; case nir_intrinsic_load_helper_invocation: - result = ac_build_load_helper_invocation(&ctx->ac); - break; case nir_intrinsic_is_helper_invocation: - result = ac_build_is_helper_invocation(&ctx->ac); + result = ac_build_load_helper_invocation(&ctx->ac); break; case nir_intrinsic_load_user_data_amd: assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32); @@ -4575,20 +4476,11 @@ bool ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, if (gl_shader_stage_is_compute(nir->info.stage)) setup_shared(&ctx, nir); - if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_demote && - LLVM_VERSION_MAJOR < 13) { - /* true = don't kill. */ - ctx.ac.postponed_kill = ac_build_alloca_init(&ctx.ac, ctx.ac.i1true, ""); - } - if (!visit_cf_list(&ctx, &func->impl->body)) return false; phi_post_pass(&ctx); - if (ctx.ac.postponed_kill) - ac_build_kill_if_false(&ctx.ac, LLVMBuildLoad2(ctx.ac.builder, ctx.ac.i1, ctx.ac.postponed_kill, "")); - free(ctx.ssa_defs); ralloc_free(ctx.defs); ralloc_free(ctx.phis); diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 100335d7adf..1999bd11118 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -1294,8 +1294,8 @@ void si_init_screen_get_functions(struct si_screen *sscreen) .has_udot_4x8 = sscreen->info.has_accelerated_dot_product, .has_dot_2x16 = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level < GFX11, .optimize_sample_mask_in = true, - .max_unroll_iterations = LLVM_VERSION_MAJOR >= 13 ? 128 : 32, - .max_unroll_iterations_aggressive = LLVM_VERSION_MAJOR >= 13 ? 128 : 32, + .max_unroll_iterations = 128, + .max_unroll_iterations_aggressive = 128, .use_interpolated_input_intrinsics = true, .lower_uniforms_to_ubo = true, .support_16bit_alu = sscreen->info.gfx_level >= GFX8, diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 4230a34fd15..89e306878a7 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1188,16 +1188,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, sscreen->max_texel_buffer_elements = sscreen->b.get_param( &sscreen->b, PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT); - /* Set these flags in debug_flags early, so that the shader cache takes - * them into account. - * - * Enable FS_CORRECT_DERIVS_AFTER_KILL by default if LLVM is >= 13. This makes - * nir_opt_move_discards_to_top more effective. - */ - if (driQueryOptionb(config->options, "glsl_correct_derivatives_after_discard") || - LLVM_VERSION_MAJOR >= 13) - sscreen->debug_flags |= DBG(FS_CORRECT_DERIVS_AFTER_KILL); - if (sscreen->debug_flags & DBG(INFO)) ac_print_gpu_info(&sscreen->info, stdout); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 7b081609450..8367a05b385 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -184,7 +184,6 @@ enum DBG_STATS, /* Shader compiler options the shader cache should be aware of: */ - DBG_FS_CORRECT_DERIVS_AFTER_KILL, DBG_W32_GE, DBG_W32_PS, DBG_W32_PS_DISCARD, diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 6a0bbd87f66..f363e8d5aa1 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -293,9 +293,7 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) NIR_PASS_V(nir, nir_lower_subgroups, &si_nir_subgroups_options); - NIR_PASS_V(nir, nir_lower_discard_or_demote, - (sscreen->debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL)) || - nir->info.use_legacy_math_rules); + NIR_PASS_V(nir, nir_lower_discard_or_demote, true); /* Lower load constants to scalar and then clean up the mess */ NIR_PASS_V(nir, nir_lower_load_const_to_scalar); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 4d646a2aac6..1daf8acf2b1 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -159,12 +159,9 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es, shader_variant_flags |= 1 << 1; if (wave_size == 32) shader_variant_flags |= 1 << 2; - if (sel->stage == MESA_SHADER_FRAGMENT && - /* Derivatives imply helper invocations so check for needs_quad_helper_invocations. */ - sel->info.base.fs.needs_quad_helper_invocations && - sel->info.base.fs.uses_discard && - sel->screen->debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL)) - shader_variant_flags |= 1 << 3; + + /* bit gap */ + /* use_ngg_culling disables NGG passthrough for non-culling shaders to reduce context * rolls, which can be changed with AMD_DEBUG=nonggc or AMD_DEBUG=nggc. */