aco/gfx11.5: select SOP2 float instructions
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29245>
This commit is contained in:
@@ -2325,6 +2325,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||||||
emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_f32, dst, true);
|
emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_f32, dst, true);
|
||||||
} else if (dst.regClass() == v2) {
|
} else if (dst.regClass() == v2) {
|
||||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_f64_e64, dst);
|
emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_f64_e64, dst);
|
||||||
|
} else if (dst.regClass() == s1 && instr->def.bit_size == 16) {
|
||||||
|
emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_f16, dst, false);
|
||||||
|
} else if (dst.regClass() == s1 && instr->def.bit_size == 32) {
|
||||||
|
emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_f32, dst, false);
|
||||||
} else {
|
} else {
|
||||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||||
}
|
}
|
||||||
@@ -2347,6 +2351,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||||||
emit_vop2_instruction(ctx, instr, aco_opcode::v_add_f32, dst, true);
|
emit_vop2_instruction(ctx, instr, aco_opcode::v_add_f32, dst, true);
|
||||||
} else if (dst.regClass() == v2) {
|
} else if (dst.regClass() == v2) {
|
||||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_add_f64_e64, dst);
|
emit_vop3a_instruction(ctx, instr, aco_opcode::v_add_f64_e64, dst);
|
||||||
|
} else if (dst.regClass() == s1 && instr->def.bit_size == 16) {
|
||||||
|
emit_sop2_instruction(ctx, instr, aco_opcode::s_add_f16, dst, false);
|
||||||
|
} else if (dst.regClass() == s1 && instr->def.bit_size == 32) {
|
||||||
|
emit_sop2_instruction(ctx, instr, aco_opcode::s_add_f32, dst, false);
|
||||||
} else {
|
} else {
|
||||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||||
}
|
}
|
||||||
@@ -2377,6 +2385,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||||||
Instruction* add = bld.vop3(aco_opcode::v_add_f64_e64, Definition(dst), as_vgpr(ctx, src0),
|
Instruction* add = bld.vop3(aco_opcode::v_add_f64_e64, Definition(dst), as_vgpr(ctx, src0),
|
||||||
as_vgpr(ctx, src1));
|
as_vgpr(ctx, src1));
|
||||||
add->valu().neg[1] = true;
|
add->valu().neg[1] = true;
|
||||||
|
} else if (dst.regClass() == s1 && instr->def.bit_size == 16) {
|
||||||
|
emit_sop2_instruction(ctx, instr, aco_opcode::s_sub_f16, dst, false);
|
||||||
|
} else if (dst.regClass() == s1 && instr->def.bit_size == 32) {
|
||||||
|
emit_sop2_instruction(ctx, instr, aco_opcode::s_sub_f32, dst, false);
|
||||||
} else {
|
} else {
|
||||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||||
}
|
}
|
||||||
@@ -2406,6 +2418,13 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||||||
ctx->block->fp_mode.must_flush_denorms32, 3);
|
ctx->block->fp_mode.must_flush_denorms32, 3);
|
||||||
} else if (dst.regClass() == v2) {
|
} else if (dst.regClass() == v2) {
|
||||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f64, dst, false, 3);
|
emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f64, dst, false, 3);
|
||||||
|
} else if (dst.regClass() == s1) {
|
||||||
|
Temp src0 = get_alu_src(ctx, instr->src[0]);
|
||||||
|
Temp src1 = get_alu_src(ctx, instr->src[1]);
|
||||||
|
Temp src2 = get_alu_src(ctx, instr->src[2]);
|
||||||
|
aco_opcode op =
|
||||||
|
instr->def.bit_size == 16 ? aco_opcode::s_fmac_f16 : aco_opcode::s_fmac_f32;
|
||||||
|
bld.sop2(op, Definition(dst), src0, src1, src2);
|
||||||
} else {
|
} else {
|
||||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||||
}
|
}
|
||||||
@@ -2432,6 +2451,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||||||
} else if (dst.regClass() == v2) {
|
} else if (dst.regClass() == v2) {
|
||||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_max_f64_e64, dst,
|
emit_vop3a_instruction(ctx, instr, aco_opcode::v_max_f64_e64, dst,
|
||||||
ctx->block->fp_mode.must_flush_denorms16_64);
|
ctx->block->fp_mode.must_flush_denorms16_64);
|
||||||
|
} else if (dst.regClass() == s1 && instr->def.bit_size == 16) {
|
||||||
|
emit_sop2_instruction(ctx, instr, aco_opcode::s_max_f16, dst, false);
|
||||||
|
} else if (dst.regClass() == s1 && instr->def.bit_size == 32) {
|
||||||
|
emit_sop2_instruction(ctx, instr, aco_opcode::s_max_f32, dst, false);
|
||||||
} else {
|
} else {
|
||||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||||
}
|
}
|
||||||
@@ -2449,6 +2472,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||||||
} else if (dst.regClass() == v2) {
|
} else if (dst.regClass() == v2) {
|
||||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_min_f64_e64, dst,
|
emit_vop3a_instruction(ctx, instr, aco_opcode::v_min_f64_e64, dst,
|
||||||
ctx->block->fp_mode.must_flush_denorms16_64);
|
ctx->block->fp_mode.must_flush_denorms16_64);
|
||||||
|
} else if (dst.regClass() == s1 && instr->def.bit_size == 16) {
|
||||||
|
emit_sop2_instruction(ctx, instr, aco_opcode::s_min_f16, dst, false);
|
||||||
|
} else if (dst.regClass() == s1 && instr->def.bit_size == 32) {
|
||||||
|
emit_sop2_instruction(ctx, instr, aco_opcode::s_min_f32, dst, false);
|
||||||
} else {
|
} else {
|
||||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||||
}
|
}
|
||||||
@@ -3415,6 +3442,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32_e64, dst);
|
emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32_e64, dst);
|
||||||
else
|
else
|
||||||
emit_vop2_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32, dst, false);
|
emit_vop2_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32, dst, false);
|
||||||
|
} else if (dst.regClass() == s1) {
|
||||||
|
emit_sop2_instruction(ctx, instr, aco_opcode::s_cvt_pk_rtz_f16_f32, dst, false);
|
||||||
} else {
|
} else {
|
||||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||||
}
|
}
|
||||||
|
@@ -328,14 +328,8 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||||||
case nir_op_b2f16:
|
case nir_op_b2f16:
|
||||||
case nir_op_b2f32:
|
case nir_op_b2f32:
|
||||||
case nir_op_mov: break;
|
case nir_op_mov: break;
|
||||||
case nir_op_fmul:
|
|
||||||
case nir_op_fmulz:
|
case nir_op_fmulz:
|
||||||
case nir_op_fadd:
|
|
||||||
case nir_op_fsub:
|
|
||||||
case nir_op_ffma:
|
|
||||||
case nir_op_ffmaz:
|
case nir_op_ffmaz:
|
||||||
case nir_op_fmax:
|
|
||||||
case nir_op_fmin:
|
|
||||||
case nir_op_fneg:
|
case nir_op_fneg:
|
||||||
case nir_op_fabs:
|
case nir_op_fabs:
|
||||||
case nir_op_fsat:
|
case nir_op_fsat:
|
||||||
@@ -350,8 +344,6 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||||||
case nir_op_f2f64:
|
case nir_op_f2f64:
|
||||||
case nir_op_u2f64:
|
case nir_op_u2f64:
|
||||||
case nir_op_i2f64:
|
case nir_op_i2f64:
|
||||||
case nir_op_pack_half_2x16_rtz_split:
|
|
||||||
case nir_op_pack_half_2x16_split:
|
|
||||||
case nir_op_pack_unorm_2x16:
|
case nir_op_pack_unorm_2x16:
|
||||||
case nir_op_pack_snorm_2x16:
|
case nir_op_pack_snorm_2x16:
|
||||||
case nir_op_pack_uint_2x16:
|
case nir_op_pack_uint_2x16:
|
||||||
@@ -379,6 +371,12 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||||||
case nir_op_sdot_2x16_iadd:
|
case nir_op_sdot_2x16_iadd:
|
||||||
case nir_op_udot_2x16_uadd_sat:
|
case nir_op_udot_2x16_uadd_sat:
|
||||||
case nir_op_sdot_2x16_iadd_sat: type = RegType::vgpr; break;
|
case nir_op_sdot_2x16_iadd_sat: type = RegType::vgpr; break;
|
||||||
|
case nir_op_fmul:
|
||||||
|
case nir_op_ffma:
|
||||||
|
case nir_op_fadd:
|
||||||
|
case nir_op_fsub:
|
||||||
|
case nir_op_fmax:
|
||||||
|
case nir_op_fmin:
|
||||||
case nir_op_i2f16:
|
case nir_op_i2f16:
|
||||||
case nir_op_i2f32:
|
case nir_op_i2f32:
|
||||||
case nir_op_u2f16:
|
case nir_op_u2f16:
|
||||||
@@ -392,6 +390,8 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||||||
case nir_op_fceil:
|
case nir_op_fceil:
|
||||||
case nir_op_ftrunc:
|
case nir_op_ftrunc:
|
||||||
case nir_op_fround_even:
|
case nir_op_fround_even:
|
||||||
|
case nir_op_pack_half_2x16_rtz_split:
|
||||||
|
case nir_op_pack_half_2x16_split:
|
||||||
case nir_op_unpack_half_2x16_split_x:
|
case nir_op_unpack_half_2x16_split_x:
|
||||||
case nir_op_unpack_half_2x16_split_y: {
|
case nir_op_unpack_half_2x16_split_y: {
|
||||||
if (ctx->program->gfx_level < GFX11_5 ||
|
if (ctx->program->gfx_level < GFX11_5 ||
|
||||||
|
Reference in New Issue
Block a user