From 1621080df71008c83c3a47c6f21b208cc8f0f5c3 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 3 Mar 2025 20:17:34 -0500 Subject: [PATCH] compiler,nir: Gather needs_full_quad_helper_invocations info This is needed on Qualcomm, where there are separate fields to enable just 3 fragments and all 4 fragments. Reviewed-by: Danylo Piliaiev Reviewed-by: Rob Clark Reviewed-by: Alyssa Rosenzweig Fixes: 264d8a67664 ("ir3: Set need_full_quad depending on info.fs.require_full_quads") Part-of: --- src/compiler/nir/nir_gather_info.c | 21 +++++++++++++++++++-- src/compiler/nir/nir_print.c | 1 + src/compiler/shader_info.h | 9 +++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index dd0a161d290..13ad829d237 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -827,14 +827,26 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader, shader->info.writes_memory = true; if (nir_intrinsic_has_semantic(instr, NIR_INTRINSIC_QUADGROUP)) { - if (shader->info.stage == MESA_SHADER_FRAGMENT) + if (shader->info.stage == MESA_SHADER_FRAGMENT) { shader->info.fs.needs_coarse_quad_helper_invocations = true; + /* For now assume that plain ddx/ddy are always coarse. This is + * true for most backends. + * TODO: Switch ddx to ddx_coarse for remaining backends. + */ + if (instr->intrinsic != nir_intrinsic_ddx && + instr->intrinsic != nir_intrinsic_ddy && + instr->intrinsic != nir_intrinsic_ddx_coarse && + instr->intrinsic != nir_intrinsic_ddy_coarse) + shader->info.fs.needs_full_quad_helper_invocations = true; + } } else if (nir_intrinsic_has_semantic(instr, NIR_INTRINSIC_SUBGROUP)) { shader->info.uses_wide_subgroup_intrinsics = true; if (shader->info.stage == MESA_SHADER_FRAGMENT && - shader->info.fs.require_full_quads) + shader->info.fs.require_full_quads) { shader->info.fs.needs_coarse_quad_helper_invocations = true; + shader->info.fs.needs_full_quad_helper_invocations = true; + } } if (instr->intrinsic == nir_intrinsic_image_levels || @@ -854,6 +866,10 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader, static void gather_tex_info(nir_tex_instr *instr, nir_shader *shader) { + /* For now we assume that implicit derivatives use coarse derivatives. + * Drivers that need to assume otherwise might have to plumb through a + * property. + */ if (shader->info.stage == MESA_SHADER_FRAGMENT && nir_tex_instr_has_implicit_derivative(instr)) shader->info.fs.needs_coarse_quad_helper_invocations = true; @@ -998,6 +1014,7 @@ nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint) shader->info.fs.color_is_dual_source = false; shader->info.fs.uses_fbfetch_output = false; shader->info.fs.needs_coarse_quad_helper_invocations = false; + shader->info.fs.needs_full_quad_helper_invocations = false; } if (shader->info.stage == MESA_SHADER_TESS_CTRL) { shader->info.tess.tcs_same_invocation_inputs_read = 0; diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index ac681b2f04d..59794125106 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -2703,6 +2703,7 @@ print_shader_info(const struct shader_info *info, FILE *fp) print_nz_bool(fp, "require_full_quads", info->fs.require_full_quads); print_nz_bool(fp, "needs_coarse_quad_helper_invocations", info->fs.needs_coarse_quad_helper_invocations); + print_nz_bool(fp, "needs_full_quad_helper_invocations", info->fs.needs_full_quad_helper_invocations); print_nz_bool(fp, "uses_sample_qualifier", info->fs.uses_sample_qualifier); print_nz_bool(fp, "uses_sample_shading", info->fs.uses_sample_shading); print_nz_bool(fp, "early_fragment_tests", info->fs.early_fragment_tests); diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h index b00a02cb14f..ac1527fe4ae 100644 --- a/src/compiler/shader_info.h +++ b/src/compiler/shader_info.h @@ -365,6 +365,15 @@ typedef struct shader_info { */ bool needs_coarse_quad_helper_invocations:1; + /** + * True if this fragment shader requires helper invocations for all + * four fragments in the quad. This can be caused by all the same + * things as needs_coarse_quad_helper_invocations, except that coarse + * derivatives don't count as they usually only use 3 out of the 4 + * fragments in a quad. + */ + bool needs_full_quad_helper_invocations:1; + /** * Whether any inputs are declared with the "sample" qualifier. */