diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index f5faa7fb353..78bbb42a2f7 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2917,10 +2917,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) break; } case nir_op_fsign: { - Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0])); + Temp src = get_alu_src(ctx, instr->src[0]); if (dst.regClass() == v2b) { /* replace negative zero with positive zero */ - src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand::zero(), src); + src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand::zero(), as_vgpr(ctx, src)); if (ctx->program->gfx_level >= GFX9) { src = bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand::c16(-1), src, Operand::c16(1u)); @@ -2936,10 +2936,11 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) * the correctly signed Inf. After that, we only need to clamp between -1.0 and +1.0. */ Temp inf = bld.copy(bld.def(s1), Operand::c32(0x7f800000)); - src = bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), inf, src); + src = bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), inf, as_vgpr(ctx, src)); bld.vop3(aco_opcode::v_med3_f32, Definition(dst), Operand::c32(0x3f800000), src, Operand::c32(0xbf800000)); } else if (dst.regClass() == v2) { + src = as_vgpr(ctx, src); Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.def(bld.lm), Operand::zero(), src); Temp tmp = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u)); Temp upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp, @@ -2950,6 +2951,20 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp, upper, cond); bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(), upper); + } else if (dst.regClass() == s1 && instr->def.bit_size == 16) { + Temp cond = bld.sopc(aco_opcode::s_cmp_lt_f16, bld.def(s1, scc), Operand::c16(0), src); + src = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(0x3c00), src, + bld.scc(cond)); + cond = bld.sopc(aco_opcode::s_cmp_ge_f16, bld.def(s1, scc), src, Operand::c16(0)); + bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), src, Operand::c32(0xbc00), + bld.scc(cond)); + } else if (dst.regClass() == s1 && instr->def.bit_size == 32) { + Temp cond = bld.sopc(aco_opcode::s_cmp_lt_f32, bld.def(s1, scc), Operand::c32(0), src); + src = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(0x3f800000), src, + bld.scc(cond)); + cond = bld.sopc(aco_opcode::s_cmp_ge_f32, bld.def(s1, scc), src, Operand::c32(0)); + bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), src, Operand::c32(0xbf800000), + bld.scc(cond)); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); } diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index f26778aed77..f06cbaf6453 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -332,7 +332,6 @@ init_context(isel_context* ctx, nir_shader* shader) case nir_op_ffmaz: case nir_op_fneg: case nir_op_fabs: - case nir_op_fsign: case nir_op_frcp: case nir_op_frsq: case nir_op_fsqrt: @@ -377,6 +376,7 @@ init_context(isel_context* ctx, nir_shader* shader) case nir_op_fmax: case nir_op_fmin: case nir_op_fsat: + case nir_op_fsign: case nir_op_i2f16: case nir_op_i2f32: case nir_op_u2f16: