amd,zink: remove options.varying_estimate_instr_cost callbacks

They are a maintainenance burden since they would need changes to support more instruction types that nir_opt_varyings will be able to move between shaders, and they are almost identical to default_varying_estimate_instr_cost, so just use that. The cost threshold is adjusted for AMD because default_varying_estimate_instr_cost is slightly different. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32424>
2024-11-30 22:55:41 -05:00
parent c0de78f120
commit d8468d5463
5 changed files with 1 additions and 207 deletions
--- a/src/amd/common/ac_nir.c
+++ b/src/amd/common/ac_nir.c
@@ -1601,112 +1601,13 @@ ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
      /* TCS->TES and VS->TES (OpenGL only) */
   case MESA_SHADER_FRAGMENT:
      /* Up to 3 uniforms and 5 ALUs. */
-      return 14;
+      return 12;

   default:
      unreachable("unexpected shader stage");
   }
 }

-unsigned
-ac_nir_varying_estimate_instr_cost(nir_instr *instr)
-{
-   unsigned dst_bit_size, src_bit_size, num_dst_dwords;
-   nir_op alu_op;
-
-   /* This is a very loose approximation based on gfx10. */
-   switch (instr->type) {
-   case nir_instr_type_alu:
-      dst_bit_size = nir_instr_as_alu(instr)->def.bit_size;
-      src_bit_size = nir_instr_as_alu(instr)->src[0].src.ssa->bit_size;
-      alu_op = nir_instr_as_alu(instr)->op;
-      num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
-
-      switch (alu_op) {
-      case nir_op_mov:
-      case nir_op_vec2:
-      case nir_op_vec3:
-      case nir_op_vec4:
-      case nir_op_vec5:
-      case nir_op_vec8:
-      case nir_op_vec16:
-      case nir_op_fabs:
-      case nir_op_fneg:
-      case nir_op_fsat:
-         return 0;
-
-      case nir_op_imul:
-      case nir_op_umul_low:
-         return dst_bit_size <= 16 ? 1 : 4 * num_dst_dwords;
-
-      case nir_op_imul_high:
-      case nir_op_umul_high:
-      case nir_op_imul_2x32_64:
-      case nir_op_umul_2x32_64:
-         return 4;
-
-      case nir_op_fexp2:
-      case nir_op_flog2:
-      case nir_op_frcp:
-      case nir_op_frsq:
-      case nir_op_fsqrt:
-      case nir_op_fsin:
-      case nir_op_fcos:
-      case nir_op_fsin_amd:
-      case nir_op_fcos_amd:
-         return 4; /* FP16 & FP32. */
-
-      case nir_op_fpow:
-         return 4 + 1 + 4; /* log2 + mul + exp2 */
-
-      case nir_op_fsign:
-         return dst_bit_size == 64 ? 4 : 3; /* See ac_build_fsign. */
-
-      case nir_op_idiv:
-      case nir_op_udiv:
-      case nir_op_imod:
-      case nir_op_umod:
-      case nir_op_irem:
-         return dst_bit_size == 64 ? 80 : 40;
-
-      case nir_op_fdiv:
-         return dst_bit_size == 64 ? 80 : 5; /* FP16 & FP32: rcp + mul */
-
-      case nir_op_fmod:
-      case nir_op_frem:
-         return dst_bit_size == 64 ? 80 : 8;
-
-      default:
-         /* Double opcodes. Comparisons have always full performance. */
-         if ((dst_bit_size == 64 &&
-              nir_op_infos[alu_op].output_type & nir_type_float) ||
-             (dst_bit_size >= 8 && src_bit_size == 64 &&
-              nir_op_infos[alu_op].input_types[0] & nir_type_float))
-            return 16;
-
-         return DIV_ROUND_UP(MAX2(dst_bit_size, src_bit_size), 32);
-      }
-
-   case nir_instr_type_intrinsic:
-      dst_bit_size = nir_instr_as_intrinsic(instr)->def.bit_size;
-      num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
-
-      switch (nir_instr_as_intrinsic(instr)->intrinsic) {
-      case nir_intrinsic_load_deref:
-         /* Uniform or UBO load.
-          * Set a low cost to balance the number of scalar loads and ALUs.
-          */
-         return 3 * num_dst_dwords;
-
-      default:
-         unreachable("unexpected intrinsic");
-      }
-
-   default:
-      unreachable("unexpected instr type");
-   }
-}
-
 typedef struct {
   enum amd_gfx_level gfx_level;
   bool use_llvm;
--- a/src/amd/common/ac_nir.h
+++ b/src/amd/common/ac_nir.h
@@ -317,9 +317,6 @@ ac_nir_opt_pack_half(nir_shader *shader, enum amd_gfx_level gfx_level);
 unsigned
 ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer);

-unsigned
-ac_nir_varying_estimate_instr_cost(nir_instr *instr);
-
 bool
 ac_nir_opt_shared_append(nir_shader *shader);

--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -64,7 +64,6 @@ get_nir_options_for_stage(struct radv_physical_device *pdev, gl_shader_stage sta
   options->max_unroll_iterations_aggressive = 128;
   options->lower_doubles_options = nir_lower_drcp | nir_lower_dsqrt | nir_lower_drsq | nir_lower_ddiv;
   options->io_options |= nir_io_mediump_is_32bit;
-   options->varying_estimate_instr_cost = ac_nir_varying_estimate_instr_cost;
   options->varying_expression_max_cost = ac_nir_varying_expression_max_cost;
 }

--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -1595,5 +1595,4 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
                                      BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
   options->support_indirect_outputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
   options->varying_expression_max_cost = si_varying_expression_max_cost;
-   options->varying_estimate_instr_cost = ac_nir_varying_estimate_instr_cost;
 }
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -1265,106 +1265,6 @@ amd_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
   }
 }

-/* from radeonsi */
-static unsigned
-amd_varying_estimate_instr_cost(nir_instr *instr)
-{
-   unsigned dst_bit_size, src_bit_size, num_dst_dwords;
-   nir_op alu_op;
-
-   /* This is a very loose approximation based on gfx10. */
-   switch (instr->type) {
-   case nir_instr_type_alu:
-      dst_bit_size = nir_instr_as_alu(instr)->def.bit_size;
-      src_bit_size = nir_instr_as_alu(instr)->src[0].src.ssa->bit_size;
-      alu_op = nir_instr_as_alu(instr)->op;
-      num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
-
-      switch (alu_op) {
-      case nir_op_mov:
-      case nir_op_vec2:
-      case nir_op_vec3:
-      case nir_op_vec4:
-      case nir_op_vec5:
-      case nir_op_vec8:
-      case nir_op_vec16:
-      case nir_op_fabs:
-      case nir_op_fneg:
-      case nir_op_fsat:
-         return 0;
-
-      case nir_op_imul:
-      case nir_op_umul_low:
-         return dst_bit_size <= 16 ? 1 : 4 * num_dst_dwords;
-
-      case nir_op_imul_high:
-      case nir_op_umul_high:
-      case nir_op_imul_2x32_64:
-      case nir_op_umul_2x32_64:
-         return 4;
-
-      case nir_op_fexp2:
-      case nir_op_flog2:
-      case nir_op_frcp:
-      case nir_op_frsq:
-      case nir_op_fsqrt:
-      case nir_op_fsin:
-      case nir_op_fcos:
-      case nir_op_fsin_amd:
-      case nir_op_fcos_amd:
-         return 4; /* FP16 & FP32. */
-
-      case nir_op_fpow:
-         return 4 + 1 + 4; /* log2 + mul + exp2 */
-
-      case nir_op_fsign:
-         return dst_bit_size == 64 ? 4 : 3; /* See ac_build_fsign. */
-
-      case nir_op_idiv:
-      case nir_op_udiv:
-      case nir_op_imod:
-      case nir_op_umod:
-      case nir_op_irem:
-         return dst_bit_size == 64 ? 80 : 40;
-
-      case nir_op_fdiv:
-         return dst_bit_size == 64 ? 80 : 5; /* FP16 & FP32: rcp + mul */
-
-      case nir_op_fmod:
-      case nir_op_frem:
-         return dst_bit_size == 64 ? 80 : 8;
-
-      default:
-         /* Double opcodes. Comparisons have always full performance. */
-         if ((dst_bit_size == 64 &&
-              nir_op_infos[alu_op].output_type & nir_type_float) ||
-             (dst_bit_size >= 8 && src_bit_size == 64 &&
-              nir_op_infos[alu_op].input_types[0] & nir_type_float))
-            return 16;
-
-         return DIV_ROUND_UP(MAX2(dst_bit_size, src_bit_size), 32);
-      }
-
-   case nir_instr_type_intrinsic:
-      dst_bit_size = nir_instr_as_intrinsic(instr)->def.bit_size;
-      num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
-
-      switch (nir_instr_as_intrinsic(instr)->intrinsic) {
-      case nir_intrinsic_load_deref:
-         /* Uniform or UBO load.
-          * Set a low cost to balance the number of scalar loads and ALUs.
-          */
-         return 3 * num_dst_dwords;
-
-      default:
-         unreachable("unexpected intrinsic");
-      }
-
-   default:
-      unreachable("unexpected instr type");
-   }
-}
-
 void
 zink_screen_init_compiler(struct zink_screen *screen)
 {
@@ -1438,12 +1338,10 @@ zink_screen_init_compiler(struct zink_screen *screen)
      case VK_DRIVER_ID_AMD_OPEN_SOURCE:
      case VK_DRIVER_ID_AMD_PROPRIETARY:
         screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
-         screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost;
         break;
      default:
         mesa_logw("zink: instruction costs not implemented for this implementation!");
         screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
-         screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost;
      }
   } else {
      screen->nir_options.io_options |= nir_io_dont_optimize;