nir/fold_16bit_tex_image: Add type granularity for dst folding

Some HW may be able to fold only some of dst types, e.g. for Adreno folding i32 -> i16 could cause a different result since folded variant clamps the result instead of masking it. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Reviewed-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20396>
2022-12-20 16:23:15 +01:00
parent c5231025be
commit 1c9ee30838
6 changed files with 12 additions and 8 deletions
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3745,7 +3745,7 @@ radv_postprocess_nir(struct radv_pipeline *pipeline,
      };
      struct nir_fold_16bit_tex_image_options fold_16bit_options = {
         .rounding_mode = nir_rounding_mode_rtne,
-         .fold_tex_dest = true,
+         .fold_tex_dest_types = nir_type_float | nir_type_uint | nir_type_int,
         .fold_image_load_store_data = true,
         .fold_image_srcs = !radv_use_llvm_for_stage(device, stage->stage),
         .fold_srcs_options_count = separate_g16 ? 2 : 1,
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -5491,7 +5491,7 @@ struct nir_fold_tex_srcs_options {

 struct nir_fold_16bit_tex_image_options {
   nir_rounding_mode rounding_mode;
-   bool fold_tex_dest;
+   nir_alu_type fold_tex_dest_types;
   bool fold_image_load_store_data;
   bool fold_image_srcs;
   unsigned fold_srcs_options_count;
--- a/src/compiler/nir/nir_lower_mediump.c
+++ b/src/compiler/nir/nir_lower_mediump.c
@@ -901,7 +901,7 @@ fold_16bit_load_data(nir_builder *b, nir_intrinsic_instr *instr,

 static bool
 fold_16bit_tex_dest(nir_tex_instr *tex, unsigned exec_mode,
-                    nir_rounding_mode rdm)
+                    nir_alu_type allowed_types, nir_rounding_mode rdm)
 {
   /* Skip sparse residency */
   if (tex->is_sparse)
@@ -918,6 +918,9 @@ fold_16bit_tex_dest(nir_tex_instr *tex, unsigned exec_mode,
       tex->op != nir_texop_fragment_fetch_amd)
      return false;

+   if (!(nir_alu_type_get_base_type(tex->dest_type) & allowed_types))
+      return false;
+
   if (!fold_16bit_destination(&tex->dest.ssa, tex->dest_type, exec_mode, rdm))
      return false;

@@ -1086,8 +1089,9 @@ fold_16bit_tex_image(nir_builder *b, nir_instr *instr, void *params)
   } else if (instr->type == nir_instr_type_tex) {
      nir_tex_instr *tex = nir_instr_as_tex(instr);

-      if (options->fold_tex_dest)
-         progress |= fold_16bit_tex_dest(tex, exec_mode, options->rounding_mode);
+      if (options->fold_tex_dest_types)
+         progress |= fold_16bit_tex_dest(tex, exec_mode, options->fold_tex_dest_types,
+                                         options->rounding_mode);

      for (unsigned i = 0; i < options->fold_srcs_options_count; i++) {
         progress |= fold_16bit_tex_srcs(b, tex, &options->fold_srcs_options[i]);
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -769,7 +769,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
         };
         struct nir_fold_16bit_tex_image_options fold_16bit_options = {
            .rounding_mode = nir_rounding_mode_rtz,
-            .fold_tex_dest = true,
+            .fold_tex_dest_types = nir_type_float | nir_type_uint | nir_type_int,
            /* blob dumps have no half regs on pixel 2's ldib or stib, so only enable for a6xx+. */
            .fold_image_load_store_data = so->compiler->gen >= 6,
            .fold_srcs_options_count = 1,
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -196,7 +196,7 @@ static void si_late_optimize_16bit_samplers(struct si_screen *sscreen, nir_shade
   };
   struct nir_fold_16bit_tex_image_options fold_16bit_options = {
      .rounding_mode = nir_rounding_mode_rtne,
-      .fold_tex_dest = true,
+      .fold_tex_dest_types = nir_type_float | nir_type_uint | nir_type_int,
      .fold_image_load_store_data = true,
      .fold_srcs_options_count = has_g16 ? 2 : 1,
      .fold_srcs_options = fold_srcs_options,
--- a/src/gallium/frontends/lavapipe/lvp_pipeline.c
+++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c
@@ -489,7 +489,7 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
   /* Skip if there are potentially conflicting rounding modes */
   struct nir_fold_16bit_tex_image_options fold_16bit_options = {
      .rounding_mode = nir_rounding_mode_undef,
-      .fold_tex_dest = true,
+      .fold_tex_dest_types = nir_type_float | nir_type_uint | nir_type_int,
   };
   NIR_PASS_V(nir, nir_fold_16bit_tex_image, &fold_16bit_options);