nir,amd: remove trinary_minmax opcodes

These consist of the variations nir_op_{i|u|f}{min|max|med}3 which are either lowered in the backend (LLVM) anyway or can be recombined by the backend (ACO). Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6421>
2020-06-18 15:14:20 +01:00
parent 1fa43a4a8e
commit a79dad950b
9 changed files with 20 additions and 239 deletions
--- a/src/compiler/nir/nir_lower_int64.c
+++ b/src/compiler/nir/nir_lower_int64.c
@@ -838,12 +838,6 @@ nir_lower_int64_op_to_options_mask(nir_op opcode)
   case nir_op_imax:
   case nir_op_umin:
   case nir_op_umax:
-   case nir_op_imin3:
-   case nir_op_imax3:
-   case nir_op_umin3:
-   case nir_op_umax3:
-   case nir_op_imed3:
-   case nir_op_umed3:
      return nir_lower_minmax64;
   case nir_op_iabs:
      return nir_lower_iabs64;
@@ -944,18 +938,6 @@ lower_int64_alu_instr(nir_builder *b, nir_instr *instr, void *_state)
      return lower_umin64(b, src[0], src[1]);
   case nir_op_umax:
      return lower_umax64(b, src[0], src[1]);
-   case nir_op_imin3:
-      return lower_imin64(b, src[0], lower_imin64(b, src[1], src[2]));
-   case nir_op_imax3:
-      return lower_imax64(b, src[0], lower_imax64(b, src[1], src[2]));
-   case nir_op_umin3:
-      return lower_umin64(b, src[0], lower_umin64(b, src[1], src[2]));
-   case nir_op_umax3:
-      return lower_umax64(b, src[0], lower_umax64(b, src[1], src[2]));
-   case nir_op_imed3:
-      return lower_imax64(b, lower_imin64(b, lower_imax64(b, src[0], src[1]), src[2]), lower_imin64(b, src[0], src[1]));
-   case nir_op_umed3:
-      return lower_umax64(b, lower_umin64(b, lower_umax64(b, src[0], src[1]), src[2]), lower_umin64(b, src[0], src[1]));
   case nir_op_iabs:
      return lower_iabs64(b, src[0]);
   case nir_op_ineg:
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -950,22 +950,8 @@ triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2")
 # component on vectors). There are two versions, one for floating point
 # bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).

-
 triop("fcsel", tfloat32, "", "(src0 != 0.0f) ? src1 : src2")

-# 3 way min/max/med
-triop("fmin3", tfloat, "", "fminf(src0, fminf(src1, src2))")
-triop("imin3", tint, "", "MIN2(src0, MIN2(src1, src2))")
-triop("umin3", tuint, "", "MIN2(src0, MIN2(src1, src2))")
-
-triop("fmax3", tfloat, "", "fmaxf(src0, fmaxf(src1, src2))")
-triop("imax3", tint, "", "MAX2(src0, MAX2(src1, src2))")
-triop("umax3", tuint, "", "MAX2(src0, MAX2(src1, src2))")
-
-triop("fmed3", tfloat, "", "fmaxf(fminf(fmaxf(src0, src1), src2), fminf(src0, src1))")
-triop("imed3", tint, "", "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
-triop("umed3", tuint, "", "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
-
 opcode("bcsel", 0, tuint, [0, 0, 0],
       [tbool1, tuint, tuint], False, "", "src0 ? src1 : src2")
 opcode("b8csel", 0, tuint, [0, 0, 0],
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -1153,10 +1153,6 @@ optimizations.extend([
   (('bcsel', a, ('bcsel', b, c, d), d), ('bcsel', ('iand', a, b), c, d)),
   (('bcsel', a, b, ('bcsel', c, b, d)), ('bcsel', ('ior', a, c), b, d)),

-   (('fmin3@64', a, b, c), ('fmin@64', a, ('fmin@64', b, c))),
-   (('fmax3@64', a, b, c), ('fmax@64', a, ('fmax@64', b, c))),
-   (('fmed3@64', a, b, c), ('fmax@64', ('fmin@64', ('fmax@64', a, b), c), ('fmin@64', a, b))),
-
   # Misc. lowering
   (('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'),
   (('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod'),
--- a/src/compiler/nir/nir_range_analysis.c
+++ b/src/compiler/nir/nir_range_analysis.c
@@ -1319,10 +1319,6 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
      case nir_op_udiv:
      case nir_op_bcsel:
      case nir_op_b32csel:
-      case nir_op_imax3:
-      case nir_op_imin3:
-      case nir_op_umax3:
-      case nir_op_umin3:
      case nir_op_ubfe:
      case nir_op_bfm:
      case nir_op_f2u32:
@@ -1405,16 +1401,6 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
      case nir_op_b32csel:
         res = src1 > src2 ? src1 : src2;
         break;
-      case nir_op_imax3:
-      case nir_op_imin3:
-      case nir_op_umax3:
-         src0 = src0 > src1 ? src0 : src1;
-         res = src0 > src2 ? src0 : src2;
-         break;
-      case nir_op_umin3:
-         src0 = src0 < src1 ? src0 : src1;
-         res = src0 < src2 ? src0 : src2;
-         break;
      case nir_op_ubfe:
         res = bitmask(MIN2(src2, scalar.def->bit_size));
         break;
--- a/src/compiler/spirv/vtn_amd.c
+++ b/src/compiler/spirv/vtn_amd.c
@@ -126,34 +126,45 @@ vtn_handle_amd_shader_trinary_minmax_instruction(struct vtn_builder *b, SpvOp ex
   for (unsigned i = 0; i < num_inputs; i++)
      src[i] = vtn_get_nir_ssa(b, w[i + 5]);

+   /* place constants at src[1-2] for easier constant-folding */
+   for (unsigned i = 1; i <= 2; i++) {
+      if (nir_src_as_const_value(nir_src_for_ssa(src[0]))) {
+         nir_ssa_def* tmp = src[i];
+         src[i] = src[0];
+         src[0] = tmp;
+      }
+   }
   nir_ssa_def *def;
   switch ((enum ShaderTrinaryMinMaxAMD)ext_opcode) {
   case FMin3AMD:
-      def = nir_fmin3(nb, src[0], src[1], src[2]);
+      def = nir_fmin(nb, src[0], nir_fmin(nb, src[1], src[2]));
      break;
   case UMin3AMD:
-      def = nir_umin3(nb, src[0], src[1], src[2]);
+      def = nir_umin(nb, src[0], nir_umin(nb, src[1], src[2]));
      break;
   case SMin3AMD:
-      def = nir_imin3(nb, src[0], src[1], src[2]);
+      def = nir_imin(nb, src[0], nir_imin(nb, src[1], src[2]));
      break;
   case FMax3AMD:
-      def = nir_fmax3(nb, src[0], src[1], src[2]);
+      def = nir_fmax(nb, src[0], nir_fmax(nb, src[1], src[2]));
      break;
   case UMax3AMD:
-      def = nir_umax3(nb, src[0], src[1], src[2]);
+      def = nir_umax(nb, src[0], nir_umax(nb, src[1], src[2]));
      break;
   case SMax3AMD:
-      def = nir_imax3(nb, src[0], src[1], src[2]);
+      def = nir_imax(nb, src[0], nir_imax(nb, src[1], src[2]));
      break;
   case FMid3AMD:
-      def = nir_fmed3(nb, src[0], src[1], src[2]);
+      def = nir_fmin(nb, nir_fmax(nb, src[0], nir_fmin(nb, src[1], src[2])),
+                     nir_fmax(nb, src[1], src[2]));
      break;
   case UMid3AMD:
-      def = nir_umed3(nb, src[0], src[1], src[2]);
+      def = nir_umin(nb, nir_umax(nb, src[0], nir_umin(nb, src[1], src[2])),
+                     nir_umax(nb, src[1], src[2]));
      break;
   case SMid3AMD:
-      def = nir_imed3(nb, src[0], src[1], src[2]);
+      def = nir_imin(nb, nir_imax(nb, src[0], nir_imin(nb, src[1], src[2])),
+                     nir_imax(nb, src[1], src[2]));
      break;
   default:
      unreachable("unknown opcode\n");