aco/isel: refactor emit_vop3a_instruction() to handle 2 operand instructions

Only AC:O has been affected.

Totals from 4 (0.00% of 136546) affected shaders (RAVEN):
CodeSize: 16428 -> 16420 (-0.05%)
Instrs: 3294 -> 3292 (-0.06%)
Cycles: 14208 -> 14200 (-0.06%)
VMEM: 936 -> 978 (+4.49%)
VClause: 80 -> 77 (-3.75%)
Copies: 211 -> 209 (-0.95%)
PreVGPRs: 127 -> 126 (-0.79%)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6635>
This commit is contained in:
Daniel Schürmann
2020-09-05 00:47:30 +01:00
committed by Marge Bot
parent 5b31056257
commit 0b6448bbe7

View File

@@ -782,29 +782,36 @@ void emit_vop2_instruction_logic64(isel_context *ctx, nir_alu_instr *instr,
} }
void emit_vop3a_instruction(isel_context *ctx, nir_alu_instr *instr, aco_opcode op, Temp dst, void emit_vop3a_instruction(isel_context *ctx, nir_alu_instr *instr, aco_opcode op, Temp dst,
bool flush_denorms = false) bool flush_denorms = false, unsigned num_sources = 2)
{ {
Temp src0 = get_alu_src(ctx, instr->src[0]); assert(num_sources == 2 || num_sources == 3);
Temp src1 = get_alu_src(ctx, instr->src[1]); Temp src[3] = { Temp(0, v1), Temp(0, v1), Temp(0, v1) };
Temp src2 = get_alu_src(ctx, instr->src[2]); bool has_sgpr = false;
for (unsigned i = 0; i < num_sources; i++) {
/* ensure that the instruction has at most 1 sgpr operand src[i] = get_alu_src(ctx, instr->src[i]);
* The optimizer will inline constants for us */ if (has_sgpr)
if (src0.type() == RegType::sgpr && src1.type() == RegType::sgpr) src[i] = as_vgpr(ctx, src[i]);
src0 = as_vgpr(ctx, src0); else
if (src1.type() == RegType::sgpr && src2.type() == RegType::sgpr) has_sgpr = src[i].type() == RegType::sgpr;
src1 = as_vgpr(ctx, src1); }
if (src2.type() == RegType::sgpr && src0.type() == RegType::sgpr)
src2 = as_vgpr(ctx, src2);
Builder bld(ctx->program, ctx->block); Builder bld(ctx->program, ctx->block);
bld.is_precise = instr->exact; bld.is_precise = instr->exact;
if (flush_denorms && ctx->program->chip_class < GFX9) { if (flush_denorms && ctx->program->chip_class < GFX9) {
assert(dst.size() == 1); assert(dst.size() == 1);
Temp tmp = bld.vop3(op, Definition(dst), src0, src1, src2); Temp tmp;
bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand(0x3f800000u), tmp); if (num_sources == 3)
tmp = bld.vop3(op, bld.def(dst.regClass()), src[0], src[1], src[2]);
else
tmp = bld.vop3(op, bld.def(dst.regClass()), src[0], src[1]);
if (dst.size() == 1)
bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand(0x3f800000u), tmp);
else
bld.vop3(aco_opcode::v_mul_f64, Definition(dst), Operand(0x3FF0000000000000lu), tmp);
} else if (num_sources == 3) {
bld.vop3(op, Definition(dst), src[0], src[1], src[2]);
} else { } else {
bld.vop3(op, Definition(dst), src0, src1, src2); bld.vop3(op, Definition(dst), src[0], src[1]);
} }
} }
@@ -1407,8 +1414,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
bld.vop3(aco_opcode::v_lshrrev_b64, Definition(dst), bld.vop3(aco_opcode::v_lshrrev_b64, Definition(dst),
get_alu_src(ctx, instr->src[1]), get_alu_src(ctx, instr->src[0])); get_alu_src(ctx, instr->src[1]), get_alu_src(ctx, instr->src[0]));
} else if (dst.regClass() == v2) { } else if (dst.regClass() == v2) {
bld.vop3(aco_opcode::v_lshr_b64, Definition(dst), emit_vop3a_instruction(ctx, instr, aco_opcode::v_lshr_b64, dst);
get_alu_src(ctx, instr->src[0]), get_alu_src(ctx, instr->src[1]));
} else if (dst.regClass() == s2) { } else if (dst.regClass() == s2) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_lshr_b64, dst, true); emit_sop2_instruction(ctx, instr, aco_opcode::s_lshr_b64, dst, true);
} else if (dst.regClass() == s1) { } else if (dst.regClass() == s1) {
@@ -1425,8 +1431,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
bld.vop3(aco_opcode::v_lshlrev_b64, Definition(dst), bld.vop3(aco_opcode::v_lshlrev_b64, Definition(dst),
get_alu_src(ctx, instr->src[1]), get_alu_src(ctx, instr->src[0])); get_alu_src(ctx, instr->src[1]), get_alu_src(ctx, instr->src[0]));
} else if (dst.regClass() == v2) { } else if (dst.regClass() == v2) {
bld.vop3(aco_opcode::v_lshl_b64, Definition(dst), emit_vop3a_instruction(ctx, instr, aco_opcode::v_lshl_b64, dst);
get_alu_src(ctx, instr->src[0]), get_alu_src(ctx, instr->src[1]));
} else if (dst.regClass() == s1) { } else if (dst.regClass() == s1) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_lshl_b32, dst, true); emit_sop2_instruction(ctx, instr, aco_opcode::s_lshl_b32, dst, true);
} else if (dst.regClass() == s2) { } else if (dst.regClass() == s2) {
@@ -1443,8 +1448,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
bld.vop3(aco_opcode::v_ashrrev_i64, Definition(dst), bld.vop3(aco_opcode::v_ashrrev_i64, Definition(dst),
get_alu_src(ctx, instr->src[1]), get_alu_src(ctx, instr->src[0])); get_alu_src(ctx, instr->src[1]), get_alu_src(ctx, instr->src[0]));
} else if (dst.regClass() == v2) { } else if (dst.regClass() == v2) {
bld.vop3(aco_opcode::v_ashr_i64, Definition(dst), emit_vop3a_instruction(ctx, instr, aco_opcode::v_ashr_i64, dst);
get_alu_src(ctx, instr->src[0]), get_alu_src(ctx, instr->src[1]));
} else if (dst.regClass() == s1) { } else if (dst.regClass() == s1) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_ashr_i32, dst, true); emit_sop2_instruction(ctx, instr, aco_opcode::s_ashr_i32, dst, true);
} else if (dst.regClass() == s2) { } else if (dst.regClass() == s2) {
@@ -1672,8 +1676,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
} }
case nir_op_imul: { case nir_op_imul: {
if (dst.regClass() == v1) { if (dst.regClass() == v1) {
bld.vop3(aco_opcode::v_mul_lo_u32, Definition(dst), emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_lo_u32, dst);
get_alu_src(ctx, instr->src[0]), get_alu_src(ctx, instr->src[1]));
} else if (dst.regClass() == s1) { } else if (dst.regClass() == s1) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_i32, dst, false); emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_i32, dst, false);
} else { } else {
@@ -1683,7 +1686,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
} }
case nir_op_umul_high: { case nir_op_umul_high: {
if (dst.regClass() == v1) { if (dst.regClass() == v1) {
bld.vop3(aco_opcode::v_mul_hi_u32, Definition(dst), get_alu_src(ctx, instr->src[0]), get_alu_src(ctx, instr->src[1])); emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_hi_u32, dst);
} else if (dst.regClass() == s1 && ctx->options->chip_class >= GFX9) { } else if (dst.regClass() == s1 && ctx->options->chip_class >= GFX9) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_hi_u32, dst, false); emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_hi_u32, dst, false);
} else if (dst.regClass() == s1) { } else if (dst.regClass() == s1) {
@@ -1697,7 +1700,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
} }
case nir_op_imul_high: { case nir_op_imul_high: {
if (dst.regClass() == v1) { if (dst.regClass() == v1) {
bld.vop3(aco_opcode::v_mul_hi_i32, Definition(dst), get_alu_src(ctx, instr->src[0]), get_alu_src(ctx, instr->src[1])); emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_hi_i32, dst);
} else if (dst.regClass() == s1 && ctx->options->chip_class >= GFX9) { } else if (dst.regClass() == s1 && ctx->options->chip_class >= GFX9) {
emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_hi_i32, dst, false); emit_sop2_instruction(ctx, instr, aco_opcode::s_mul_hi_i32, dst, false);
} else if (dst.regClass() == s1) { } else if (dst.regClass() == s1) {
@@ -1715,9 +1718,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
} else if (dst.regClass() == v1) { } else if (dst.regClass() == v1) {
emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_f32, dst, true); emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_f32, dst, true);
} else if (dst.regClass() == v2) { } else if (dst.regClass() == v2) {
Temp src0 = get_alu_src(ctx, instr->src[0]); emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_f64, dst);
Temp src1 = as_vgpr(ctx, get_alu_src(ctx, instr->src[1]));
bld.vop3(aco_opcode::v_mul_f64, Definition(dst), src0, src1);
} else { } else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size"); isel_err(&instr->instr, "Unimplemented NIR instr bit size");
} }
@@ -1729,9 +1730,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
} else if (dst.regClass() == v1) { } else if (dst.regClass() == v1) {
emit_vop2_instruction(ctx, instr, aco_opcode::v_add_f32, dst, true); emit_vop2_instruction(ctx, instr, aco_opcode::v_add_f32, dst, true);
} else if (dst.regClass() == v2) { } else if (dst.regClass() == v2) {
Temp src0 = get_alu_src(ctx, instr->src[0]); emit_vop3a_instruction(ctx, instr, aco_opcode::v_add_f64, dst);
Temp src1 = as_vgpr(ctx, get_alu_src(ctx, instr->src[1]));
bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, src1);
} else { } else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size"); isel_err(&instr->instr, "Unimplemented NIR instr bit size");
} }
@@ -1767,14 +1766,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
} else if (dst.regClass() == v1) { } else if (dst.regClass() == v1) {
emit_vop2_instruction(ctx, instr, aco_opcode::v_max_f32, dst, true, false, ctx->block->fp_mode.must_flush_denorms32); emit_vop2_instruction(ctx, instr, aco_opcode::v_max_f32, dst, true, false, ctx->block->fp_mode.must_flush_denorms32);
} else if (dst.regClass() == v2) { } else if (dst.regClass() == v2) {
Temp src0 = get_alu_src(ctx, instr->src[0]); emit_vop3a_instruction(ctx, instr, aco_opcode::v_max_f64, dst, ctx->block->fp_mode.must_flush_denorms16_64);
Temp src1 = as_vgpr(ctx, get_alu_src(ctx, instr->src[1]));
if (ctx->block->fp_mode.must_flush_denorms16_64 && ctx->program->chip_class < GFX9) {
Temp tmp = bld.vop3(aco_opcode::v_max_f64, bld.def(v2), src0, src1);
bld.vop3(aco_opcode::v_mul_f64, Definition(dst), Operand(0x3FF0000000000000lu), tmp);
} else {
bld.vop3(aco_opcode::v_max_f64, Definition(dst), src0, src1);
}
} else { } else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size"); isel_err(&instr->instr, "Unimplemented NIR instr bit size");
} }
@@ -1787,14 +1779,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
} else if (dst.regClass() == v1) { } else if (dst.regClass() == v1) {
emit_vop2_instruction(ctx, instr, aco_opcode::v_min_f32, dst, true, false, ctx->block->fp_mode.must_flush_denorms32); emit_vop2_instruction(ctx, instr, aco_opcode::v_min_f32, dst, true, false, ctx->block->fp_mode.must_flush_denorms32);
} else if (dst.regClass() == v2) { } else if (dst.regClass() == v2) {
Temp src0 = get_alu_src(ctx, instr->src[0]); emit_vop3a_instruction(ctx, instr, aco_opcode::v_min_f64, dst, ctx->block->fp_mode.must_flush_denorms16_64);
Temp src1 = as_vgpr(ctx, get_alu_src(ctx, instr->src[1]));
if (ctx->block->fp_mode.must_flush_denorms16_64 && ctx->program->chip_class < GFX9) {
Temp tmp = bld.vop3(aco_opcode::v_min_f64, bld.def(v2), src0, src1);
bld.vop3(aco_opcode::v_mul_f64, Definition(dst), Operand(0x3FF0000000000000lu), tmp);
} else {
bld.vop3(aco_opcode::v_min_f64, Definition(dst), src0, src1);
}
} else { } else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size"); isel_err(&instr->instr, "Unimplemented NIR instr bit size");
} }
@@ -2083,14 +2068,12 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
break; break;
} }
case nir_op_ldexp: { case nir_op_ldexp: {
Temp src0 = get_alu_src(ctx, instr->src[0]);
Temp src1 = get_alu_src(ctx, instr->src[1]);
if (dst.regClass() == v2b) { if (dst.regClass() == v2b) {
emit_vop2_instruction(ctx, instr, aco_opcode::v_ldexp_f16, dst, false); emit_vop2_instruction(ctx, instr, aco_opcode::v_ldexp_f16, dst, false);
} else if (dst.regClass() == v1) { } else if (dst.regClass() == v1) {
bld.vop3(aco_opcode::v_ldexp_f32, Definition(dst), as_vgpr(ctx, src0), src1); emit_vop3a_instruction(ctx, instr, aco_opcode::v_ldexp_f32, dst);
} else if (dst.regClass() == v2) { } else if (dst.regClass() == v2) {
bld.vop3(aco_opcode::v_ldexp_f64, Definition(dst), as_vgpr(ctx, src0), src1); emit_vop3a_instruction(ctx, instr, aco_opcode::v_ldexp_f64, dst);
} else { } else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size"); isel_err(&instr->instr, "Unimplemented NIR instr bit size");
} }
@@ -2719,13 +2702,12 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
break; break;
} }
case nir_op_bitfield_select: { case nir_op_bitfield_select: {
/* (mask & insert) | (~mask & base) */
Temp bitmask = get_alu_src(ctx, instr->src[0]);
Temp insert = get_alu_src(ctx, instr->src[1]);
Temp base = get_alu_src(ctx, instr->src[2]);
/* dst = (insert & bitmask) | (base & ~bitmask) */ /* dst = (insert & bitmask) | (base & ~bitmask) */
if (dst.regClass() == s1) { if (dst.regClass() == s1) {
Temp bitmask = get_alu_src(ctx, instr->src[0]);
Temp insert = get_alu_src(ctx, instr->src[1]);
Temp base = get_alu_src(ctx, instr->src[2]);
aco_ptr<Instruction> sop2; aco_ptr<Instruction> sop2;
nir_const_value* const_bitmask = nir_src_as_const_value(instr->src[0].src); nir_const_value* const_bitmask = nir_src_as_const_value(instr->src[0].src);
nir_const_value* const_insert = nir_src_as_const_value(instr->src[1].src); nir_const_value* const_insert = nir_src_as_const_value(instr->src[1].src);
@@ -2749,13 +2731,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
bld.sop2(aco_opcode::s_or_b32, Definition(dst), bld.def(s1, scc), rhs, lhs); bld.sop2(aco_opcode::s_or_b32, Definition(dst), bld.def(s1, scc), rhs, lhs);
} else if (dst.regClass() == v1) { } else if (dst.regClass() == v1) {
if (base.type() == RegType::sgpr && (bitmask.type() == RegType::sgpr || (insert.type() == RegType::sgpr))) emit_vop3a_instruction(ctx, instr, aco_opcode::v_bfi_b32, dst, false, 3);
base = as_vgpr(ctx, base);
if (insert.type() == RegType::sgpr && bitmask.type() == RegType::sgpr)
insert = as_vgpr(ctx, insert);
bld.vop3(aco_opcode::v_bfi_b32, Definition(dst), bitmask, insert, base);
} else { } else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size"); isel_err(&instr->instr, "Unimplemented NIR instr bit size");
} }
@@ -2796,7 +2772,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
} else { } else {
aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::v_bfe_u32 : aco_opcode::v_bfe_i32; aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::v_bfe_u32 : aco_opcode::v_bfe_i32;
emit_vop3a_instruction(ctx, instr, opcode, dst); emit_vop3a_instruction(ctx, instr, opcode, dst, false, 3);
} }
break; break;
} }