zink: fix isNan mismatch between NIR and SPIR-V

SPIR-V and Vulkan allow implementations to optimize float ops assuming inputs are not NaN unless SignedZeroInfNanPreserve execution mode is enabled. This means isnan which is turned to nir_op_fneu(a,a) and then emitted as SpvOpFUnordNotEqual could potentially be optimised to a const 0. isnan is special in this approach and therefore recognise this pattern, and emit SpvOpIsNan instead of enabling SignedZeroInfNanPreserve which will hurt performace of some vulkan drivers. Fixes dEQP-GL45-ES31.functional.shaders.builtin_functions.common.isnan.* Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18910> (cherry picked from commit 69c22dd817)
2022-09-30 16:08:27 +01:00
parent c7ff53ae51
commit d9f989928a
2 changed files with 26 additions and 10 deletions
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -157,7 +157,7 @@
        "description": "zink: fix isNan mismatch between NIR and SPIR-V",
        "nominated": true,
        "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
        "main_sha": null,
        "because_sha": null
    },
--- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
+++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
@@ -1811,29 +1811,29 @@ alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
 }

 static SpvId
-get_alu_src(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src)
+get_alu_src(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src, SpvId *raw_value)
 {
-   SpvId raw_value = get_alu_src_raw(ctx, alu, src);
+   *raw_value = get_alu_src_raw(ctx, alu, src);

   unsigned num_components = alu_instr_src_components(alu, src);
   unsigned bit_size = nir_src_bit_size(alu->src[src].src);
   nir_alu_type type = nir_op_infos[alu->op].input_types[src];

   if (bit_size == 1)
-      return raw_value;
+      return *raw_value;
   else {
      switch (nir_alu_type_get_base_type(type)) {
      case nir_type_bool:
         unreachable("bool should have bit-size 1");

      case nir_type_int:
-         return bitcast_to_ivec(ctx, raw_value, bit_size, num_components);
+         return bitcast_to_ivec(ctx, *raw_value, bit_size, num_components);

      case nir_type_uint:
-         return raw_value;
+         return *raw_value;

      case nir_type_float:
-         return bitcast_to_fvec(ctx, raw_value, bit_size, num_components);
+         return bitcast_to_fvec(ctx, *raw_value, bit_size, num_components);

      default:
         unreachable("unknown nir_alu_type");
@@ -1908,8 +1908,9 @@ static void
 emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
 {
   SpvId src[NIR_MAX_VEC_COMPONENTS];
+   SpvId raw_src[NIR_MAX_VEC_COMPONENTS];
   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++)
-      src[i] = get_alu_src(ctx, alu, i);
+      src[i] = get_alu_src(ctx, alu, i, &raw_src[i]);

   SpvId dest_type = get_dest_type(ctx, &alu->dest.dest,
                                   nir_op_infos[alu->op].output_type);
@@ -2086,8 +2087,6 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
   BINOP(nir_op_uge, SpvOpUGreaterThanEqual)
   BINOP(nir_op_flt, SpvOpFOrdLessThan)
   BINOP(nir_op_fge, SpvOpFOrdGreaterThanEqual)
-   BINOP(nir_op_feq, SpvOpFOrdEqual)
-   BINOP(nir_op_fneu, SpvOpFUnordNotEqual)
   BINOP(nir_op_frem, SpvOpFRem)
 #undef BINOP

@@ -2152,6 +2151,23 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
   case nir_op_sge:
      unreachable("should already be lowered away");

+   case nir_op_fneu:
+      assert(nir_op_infos[alu->op].num_inputs == 2);
+      if (raw_src[0] == raw_src[1])
+         result =  emit_unop(ctx, SpvOpIsNan, dest_type, src[0]);
+      else
+         result = emit_binop(ctx, SpvOpFUnordNotEqual, dest_type, src[0], src[1]);
+      break;
+
+   case nir_op_feq:
+      assert(nir_op_infos[alu->op].num_inputs == 2);
+      if (raw_src[0] == raw_src[1])
+         result =  emit_unop(ctx, SpvOpLogicalNot, dest_type,
+                             emit_unop(ctx, SpvOpIsNan, dest_type, src[0]));
+      else
+         result = emit_binop(ctx, SpvOpFOrdEqual, dest_type, src[0], src[1]);
+      break;
+
   case nir_op_flrp:
      assert(nir_op_infos[alu->op].num_inputs == 3);
      result = emit_builtin_triop(ctx, GLSLstd450FMix, dest_type,