aco/gfx11.5: select SALU fsign

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29245>
2023-09-22 10:17:29 +02:00
parent b1b5a0c6ad
commit 314053a3e3
2 changed files with 19 additions and 4 deletions
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -2917,10 +2917,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
      break;
   }
   case nir_op_fsign: {
-      Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
+      Temp src = get_alu_src(ctx, instr->src[0]);
      if (dst.regClass() == v2b) {
         /* replace negative zero with positive zero */
-         src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand::zero(), src);
+         src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand::zero(), as_vgpr(ctx, src));
         if (ctx->program->gfx_level >= GFX9) {
            src = bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand::c16(-1), src,
                           Operand::c16(1u));
@@ -2936,10 +2936,11 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
          * the correctly signed Inf. After that, we only need to clamp between -1.0 and +1.0.
          */
         Temp inf = bld.copy(bld.def(s1), Operand::c32(0x7f800000));
-         src = bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), inf, src);
+         src = bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), inf, as_vgpr(ctx, src));
         bld.vop3(aco_opcode::v_med3_f32, Definition(dst), Operand::c32(0x3f800000), src,
                  Operand::c32(0xbf800000));
      } else if (dst.regClass() == v2) {
+         src = as_vgpr(ctx, src);
         Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.def(bld.lm), Operand::zero(), src);
         Temp tmp = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u));
         Temp upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp,
@@ -2950,6 +2951,20 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
         upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp, upper, cond);

         bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(), upper);
+      } else if (dst.regClass() == s1 && instr->def.bit_size == 16) {
+         Temp cond = bld.sopc(aco_opcode::s_cmp_lt_f16, bld.def(s1, scc), Operand::c16(0), src);
+         src = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(0x3c00), src,
+                        bld.scc(cond));
+         cond = bld.sopc(aco_opcode::s_cmp_ge_f16, bld.def(s1, scc), src, Operand::c16(0));
+         bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), src, Operand::c32(0xbc00),
+                  bld.scc(cond));
+      } else if (dst.regClass() == s1 && instr->def.bit_size == 32) {
+         Temp cond = bld.sopc(aco_opcode::s_cmp_lt_f32, bld.def(s1, scc), Operand::c32(0), src);
+         src = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(0x3f800000), src,
+                        bld.scc(cond));
+         cond = bld.sopc(aco_opcode::s_cmp_ge_f32, bld.def(s1, scc), src, Operand::c32(0));
+         bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), src, Operand::c32(0xbf800000),
+                  bld.scc(cond));
      } else {
         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
      }
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -332,7 +332,6 @@ init_context(isel_context* ctx, nir_shader* shader)
               case nir_op_ffmaz:
               case nir_op_fneg:
               case nir_op_fabs:
-               case nir_op_fsign:
               case nir_op_frcp:
               case nir_op_frsq:
               case nir_op_fsqrt:
@@ -377,6 +376,7 @@ init_context(isel_context* ctx, nir_shader* shader)
               case nir_op_fmax:
               case nir_op_fmin:
               case nir_op_fsat:
+               case nir_op_fsign:
               case nir_op_i2f16:
               case nir_op_i2f32:
               case nir_op_u2f16: