aco/gfx11.5: select SALU fsign
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29245>
This commit is contained in:
@@ -2917,10 +2917,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
break;
|
||||
}
|
||||
case nir_op_fsign: {
|
||||
Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
|
||||
Temp src = get_alu_src(ctx, instr->src[0]);
|
||||
if (dst.regClass() == v2b) {
|
||||
/* replace negative zero with positive zero */
|
||||
src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand::zero(), src);
|
||||
src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand::zero(), as_vgpr(ctx, src));
|
||||
if (ctx->program->gfx_level >= GFX9) {
|
||||
src = bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand::c16(-1), src,
|
||||
Operand::c16(1u));
|
||||
@@ -2936,10 +2936,11 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
* the correctly signed Inf. After that, we only need to clamp between -1.0 and +1.0.
|
||||
*/
|
||||
Temp inf = bld.copy(bld.def(s1), Operand::c32(0x7f800000));
|
||||
src = bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), inf, src);
|
||||
src = bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), inf, as_vgpr(ctx, src));
|
||||
bld.vop3(aco_opcode::v_med3_f32, Definition(dst), Operand::c32(0x3f800000), src,
|
||||
Operand::c32(0xbf800000));
|
||||
} else if (dst.regClass() == v2) {
|
||||
src = as_vgpr(ctx, src);
|
||||
Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.def(bld.lm), Operand::zero(), src);
|
||||
Temp tmp = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u));
|
||||
Temp upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp,
|
||||
@@ -2950,6 +2951,20 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp, upper, cond);
|
||||
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(), upper);
|
||||
} else if (dst.regClass() == s1 && instr->def.bit_size == 16) {
|
||||
Temp cond = bld.sopc(aco_opcode::s_cmp_lt_f16, bld.def(s1, scc), Operand::c16(0), src);
|
||||
src = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(0x3c00), src,
|
||||
bld.scc(cond));
|
||||
cond = bld.sopc(aco_opcode::s_cmp_ge_f16, bld.def(s1, scc), src, Operand::c16(0));
|
||||
bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), src, Operand::c32(0xbc00),
|
||||
bld.scc(cond));
|
||||
} else if (dst.regClass() == s1 && instr->def.bit_size == 32) {
|
||||
Temp cond = bld.sopc(aco_opcode::s_cmp_lt_f32, bld.def(s1, scc), Operand::c32(0), src);
|
||||
src = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(0x3f800000), src,
|
||||
bld.scc(cond));
|
||||
cond = bld.sopc(aco_opcode::s_cmp_ge_f32, bld.def(s1, scc), src, Operand::c32(0));
|
||||
bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), src, Operand::c32(0xbf800000),
|
||||
bld.scc(cond));
|
||||
} else {
|
||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||
}
|
||||
|
@@ -332,7 +332,6 @@ init_context(isel_context* ctx, nir_shader* shader)
|
||||
case nir_op_ffmaz:
|
||||
case nir_op_fneg:
|
||||
case nir_op_fabs:
|
||||
case nir_op_fsign:
|
||||
case nir_op_frcp:
|
||||
case nir_op_frsq:
|
||||
case nir_op_fsqrt:
|
||||
@@ -377,6 +376,7 @@ init_context(isel_context* ctx, nir_shader* shader)
|
||||
case nir_op_fmax:
|
||||
case nir_op_fmin:
|
||||
case nir_op_fsat:
|
||||
case nir_op_fsign:
|
||||
case nir_op_i2f16:
|
||||
case nir_op_i2f32:
|
||||
case nir_op_u2f16:
|
||||
|
Reference in New Issue
Block a user