aco/gfx11.5: select SALU fsign

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29245>
This commit is contained in:
Georg Lehmann
2023-09-22 10:17:29 +02:00
committed by Marge Bot
parent b1b5a0c6ad
commit 314053a3e3
2 changed files with 19 additions and 4 deletions

View File

@@ -2917,10 +2917,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
break;
}
case nir_op_fsign: {
Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
Temp src = get_alu_src(ctx, instr->src[0]);
if (dst.regClass() == v2b) {
/* replace negative zero with positive zero */
src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand::zero(), src);
src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand::zero(), as_vgpr(ctx, src));
if (ctx->program->gfx_level >= GFX9) {
src = bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand::c16(-1), src,
Operand::c16(1u));
@@ -2936,10 +2936,11 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
* the correctly signed Inf. After that, we only need to clamp between -1.0 and +1.0.
*/
Temp inf = bld.copy(bld.def(s1), Operand::c32(0x7f800000));
src = bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), inf, src);
src = bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), inf, as_vgpr(ctx, src));
bld.vop3(aco_opcode::v_med3_f32, Definition(dst), Operand::c32(0x3f800000), src,
Operand::c32(0xbf800000));
} else if (dst.regClass() == v2) {
src = as_vgpr(ctx, src);
Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.def(bld.lm), Operand::zero(), src);
Temp tmp = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u));
Temp upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp,
@@ -2950,6 +2951,20 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp, upper, cond);
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(), upper);
} else if (dst.regClass() == s1 && instr->def.bit_size == 16) {
Temp cond = bld.sopc(aco_opcode::s_cmp_lt_f16, bld.def(s1, scc), Operand::c16(0), src);
src = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(0x3c00), src,
bld.scc(cond));
cond = bld.sopc(aco_opcode::s_cmp_ge_f16, bld.def(s1, scc), src, Operand::c16(0));
bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), src, Operand::c32(0xbc00),
bld.scc(cond));
} else if (dst.regClass() == s1 && instr->def.bit_size == 32) {
Temp cond = bld.sopc(aco_opcode::s_cmp_lt_f32, bld.def(s1, scc), Operand::c32(0), src);
src = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(0x3f800000), src,
bld.scc(cond));
cond = bld.sopc(aco_opcode::s_cmp_ge_f32, bld.def(s1, scc), src, Operand::c32(0));
bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), src, Operand::c32(0xbf800000),
bld.scc(cond));
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}

View File

@@ -332,7 +332,6 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_op_ffmaz:
case nir_op_fneg:
case nir_op_fabs:
case nir_op_fsign:
case nir_op_frcp:
case nir_op_frsq:
case nir_op_fsqrt:
@@ -377,6 +376,7 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_op_fmax:
case nir_op_fmin:
case nir_op_fsat:
case nir_op_fsign:
case nir_op_i2f16:
case nir_op_i2f32:
case nir_op_u2f16: