aco/gfx11.5: select SOPC float instructions

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29245>
This commit is contained in:
Georg Lehmann
2023-09-22 08:52:03 +02:00
committed by Marge Bot
parent 4bd229ac50
commit ee0e183700

View File

@@ -1037,12 +1037,13 @@ emit_sopc_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Te
void
emit_comparison(isel_context* ctx, nir_alu_instr* instr, Temp dst, aco_opcode v16_op,
aco_opcode v32_op, aco_opcode v64_op, aco_opcode s32_op = aco_opcode::num_opcodes,
aco_opcode v32_op, aco_opcode v64_op, aco_opcode s16_op = aco_opcode::num_opcodes,
aco_opcode s32_op = aco_opcode::num_opcodes,
aco_opcode s64_op = aco_opcode::num_opcodes)
{
aco_opcode s_op = instr->src[0].src.ssa->bit_size == 64 ? s64_op
: instr->src[0].src.ssa->bit_size == 32 ? s32_op
: aco_opcode::num_opcodes;
: s16_op;
aco_opcode v_op = instr->src[0].src.ssa->bit_size == 64 ? v64_op
: instr->src[0].src.ssa->bit_size == 32 ? v32_op
: v16_op;
@@ -3801,63 +3802,91 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
break;
}
case nir_op_flt: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_lt_f16, aco_opcode::v_cmp_lt_f32,
aco_opcode::v_cmp_lt_f64);
emit_comparison(
ctx, instr, dst, aco_opcode::v_cmp_lt_f16, aco_opcode::v_cmp_lt_f32,
aco_opcode::v_cmp_lt_f64,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_lt_f16 : aco_opcode::num_opcodes,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_lt_f32 : aco_opcode::num_opcodes);
break;
}
case nir_op_fge: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_ge_f16, aco_opcode::v_cmp_ge_f32,
aco_opcode::v_cmp_ge_f64);
emit_comparison(
ctx, instr, dst, aco_opcode::v_cmp_ge_f16, aco_opcode::v_cmp_ge_f32,
aco_opcode::v_cmp_ge_f64,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_ge_f16 : aco_opcode::num_opcodes,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_ge_f32 : aco_opcode::num_opcodes);
break;
}
case nir_op_fltu: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_nge_f16, aco_opcode::v_cmp_nge_f32,
aco_opcode::v_cmp_nge_f64);
emit_comparison(
ctx, instr, dst, aco_opcode::v_cmp_nge_f16, aco_opcode::v_cmp_nge_f32,
aco_opcode::v_cmp_nge_f64,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_nge_f16 : aco_opcode::num_opcodes,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_nge_f32 : aco_opcode::num_opcodes);
break;
}
case nir_op_fgeu: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_nlt_f16, aco_opcode::v_cmp_nlt_f32,
aco_opcode::v_cmp_nlt_f64);
emit_comparison(
ctx, instr, dst, aco_opcode::v_cmp_nlt_f16, aco_opcode::v_cmp_nlt_f32,
aco_opcode::v_cmp_nlt_f64,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_nlt_f16 : aco_opcode::num_opcodes,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_nlt_f32 : aco_opcode::num_opcodes);
break;
}
case nir_op_feq: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_eq_f16, aco_opcode::v_cmp_eq_f32,
aco_opcode::v_cmp_eq_f64);
emit_comparison(
ctx, instr, dst, aco_opcode::v_cmp_eq_f16, aco_opcode::v_cmp_eq_f32,
aco_opcode::v_cmp_eq_f64,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_eq_f16 : aco_opcode::num_opcodes,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_eq_f32 : aco_opcode::num_opcodes);
break;
}
case nir_op_fneu: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_neq_f16, aco_opcode::v_cmp_neq_f32,
aco_opcode::v_cmp_neq_f64);
emit_comparison(
ctx, instr, dst, aco_opcode::v_cmp_neq_f16, aco_opcode::v_cmp_neq_f32,
aco_opcode::v_cmp_neq_f64,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_neq_f16 : aco_opcode::num_opcodes,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_neq_f32 : aco_opcode::num_opcodes);
break;
}
case nir_op_fequ: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_nlg_f16, aco_opcode::v_cmp_nlg_f32,
aco_opcode::v_cmp_nlg_f64);
emit_comparison(
ctx, instr, dst, aco_opcode::v_cmp_nlg_f16, aco_opcode::v_cmp_nlg_f32,
aco_opcode::v_cmp_nlg_f64,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_nlg_f16 : aco_opcode::num_opcodes,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_nlg_f32 : aco_opcode::num_opcodes);
break;
}
case nir_op_fneo: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_lg_f16, aco_opcode::v_cmp_lg_f32,
aco_opcode::v_cmp_lg_f64);
emit_comparison(
ctx, instr, dst, aco_opcode::v_cmp_lg_f16, aco_opcode::v_cmp_lg_f32,
aco_opcode::v_cmp_lg_f64,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_lg_f16 : aco_opcode::num_opcodes,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_lg_f32 : aco_opcode::num_opcodes);
break;
}
case nir_op_funord: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_u_f16, aco_opcode::v_cmp_u_f32,
aco_opcode::v_cmp_u_f64);
emit_comparison(
ctx, instr, dst, aco_opcode::v_cmp_u_f16, aco_opcode::v_cmp_u_f32, aco_opcode::v_cmp_u_f64,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_u_f16 : aco_opcode::num_opcodes,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_u_f32 : aco_opcode::num_opcodes);
break;
}
case nir_op_ford: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_o_f16, aco_opcode::v_cmp_o_f32,
aco_opcode::v_cmp_o_f64);
emit_comparison(
ctx, instr, dst, aco_opcode::v_cmp_o_f16, aco_opcode::v_cmp_o_f32, aco_opcode::v_cmp_o_f64,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_o_f16 : aco_opcode::num_opcodes,
ctx->program->gfx_level >= GFX11_5 ? aco_opcode::s_cmp_o_f32 : aco_opcode::num_opcodes);
break;
}
case nir_op_ilt: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_lt_i16, aco_opcode::v_cmp_lt_i32,
aco_opcode::v_cmp_lt_i64, aco_opcode::s_cmp_lt_i32);
aco_opcode::v_cmp_lt_i64, aco_opcode::num_opcodes, aco_opcode::s_cmp_lt_i32);
break;
}
case nir_op_ige: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_ge_i16, aco_opcode::v_cmp_ge_i32,
aco_opcode::v_cmp_ge_i64, aco_opcode::s_cmp_ge_i32);
aco_opcode::v_cmp_ge_i64, aco_opcode::num_opcodes, aco_opcode::s_cmp_ge_i32);
break;
}
case nir_op_ieq: {
@@ -3866,7 +3895,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
else
emit_comparison(
ctx, instr, dst, aco_opcode::v_cmp_eq_i16, aco_opcode::v_cmp_eq_i32,
aco_opcode::v_cmp_eq_i64, aco_opcode::s_cmp_eq_i32,
aco_opcode::v_cmp_eq_i64, aco_opcode::num_opcodes, aco_opcode::s_cmp_eq_i32,
ctx->program->gfx_level >= GFX8 ? aco_opcode::s_cmp_eq_u64 : aco_opcode::num_opcodes);
break;
}
@@ -3876,18 +3905,18 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
else
emit_comparison(
ctx, instr, dst, aco_opcode::v_cmp_lg_i16, aco_opcode::v_cmp_lg_i32,
aco_opcode::v_cmp_lg_i64, aco_opcode::s_cmp_lg_i32,
aco_opcode::v_cmp_lg_i64, aco_opcode::num_opcodes, aco_opcode::s_cmp_lg_i32,
ctx->program->gfx_level >= GFX8 ? aco_opcode::s_cmp_lg_u64 : aco_opcode::num_opcodes);
break;
}
case nir_op_ult: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_lt_u16, aco_opcode::v_cmp_lt_u32,
aco_opcode::v_cmp_lt_u64, aco_opcode::s_cmp_lt_u32);
aco_opcode::v_cmp_lt_u64, aco_opcode::num_opcodes, aco_opcode::s_cmp_lt_u32);
break;
}
case nir_op_uge: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_ge_u16, aco_opcode::v_cmp_ge_u32,
aco_opcode::v_cmp_ge_u64, aco_opcode::s_cmp_ge_u32);
aco_opcode::v_cmp_ge_u64, aco_opcode::num_opcodes, aco_opcode::s_cmp_ge_u32);
break;
}
case nir_op_bitz: