r600/sfn: Make use of variable length DOT
This frees some alu slots for better group scheduling. Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20205>
This commit is contained in:
@@ -694,7 +694,18 @@ AluInstr::split(ValueFactory& vf)
|
|||||||
|
|
||||||
m_dest->del_parent(this);
|
m_dest->del_parent(this);
|
||||||
|
|
||||||
for (int s = 0; s < m_alu_slots; ++s) {
|
int start_slot = 0;
|
||||||
|
bool is_dot = m_opcode == op2_dot || opcode() == op2_dot_ieee;
|
||||||
|
auto last_opcode = m_opcode;
|
||||||
|
|
||||||
|
if (is_dot) {
|
||||||
|
start_slot = m_dest->chan();
|
||||||
|
last_opcode = m_opcode == op2_dot ? op2_mul : op2_mul_ieee;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for (int k = 0; k < m_alu_slots; ++k) {
|
||||||
|
int s = k + start_slot;
|
||||||
|
|
||||||
PRegister dst = m_dest->chan() == s ? m_dest : vf.dummy_dest(s);
|
PRegister dst = m_dest->chan() == s ? m_dest : vf.dummy_dest(s);
|
||||||
if (dst->pin() != pin_chgr) {
|
if (dst->pin() != pin_chgr) {
|
||||||
@@ -720,7 +731,10 @@ AluInstr::split(ValueFactory& vf)
|
|||||||
src.push_back(old_src);
|
src.push_back(old_src);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto instr = new AluInstr(m_opcode, dst, src, {}, 1);
|
auto opcode = k < m_alu_slots -1 ? m_opcode : last_opcode;
|
||||||
|
|
||||||
|
|
||||||
|
auto instr = new AluInstr(opcode, dst, src, {}, 1);
|
||||||
instr->set_blockid(block_id(), index());
|
instr->set_blockid(block_id(), index());
|
||||||
|
|
||||||
if (s == 0 || !m_alu_flags.test(alu_64bit_op)) {
|
if (s == 0 || !m_alu_flags.test(alu_64bit_op)) {
|
||||||
@@ -1239,6 +1253,8 @@ emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader);
|
|||||||
static bool
|
static bool
|
||||||
emit_dot(const nir_alu_instr& alu, int nelm, Shader& shader);
|
emit_dot(const nir_alu_instr& alu, int nelm, Shader& shader);
|
||||||
static bool
|
static bool
|
||||||
|
emit_dot4(const nir_alu_instr& alu, Shader& shader);
|
||||||
|
static bool
|
||||||
emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader);
|
emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader);
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
@@ -1524,7 +1540,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
|
|||||||
case nir_op_fdot3:
|
case nir_op_fdot3:
|
||||||
return emit_dot(*alu, 3, shader);
|
return emit_dot(*alu, 3, shader);
|
||||||
case nir_op_fdot4:
|
case nir_op_fdot4:
|
||||||
return emit_dot(*alu, 4, shader);
|
return emit_dot4(*alu, shader);
|
||||||
|
|
||||||
case nir_op_feq32:
|
case nir_op_feq32:
|
||||||
case nir_op_feq:
|
case nir_op_feq:
|
||||||
@@ -2463,18 +2479,49 @@ emit_dot(const nir_alu_instr& alu, int n, Shader& shader)
|
|||||||
const nir_alu_src& src0 = alu.src[0];
|
const nir_alu_src& src0 = alu.src[0];
|
||||||
const nir_alu_src& src1 = alu.src[1];
|
const nir_alu_src& src1 = alu.src[1];
|
||||||
|
|
||||||
auto dest = value_factory.dest(alu.dest.dest, 0, pin_free);
|
auto dest = value_factory.dest(alu.dest.dest, 0, pin_chan);
|
||||||
|
|
||||||
AluInstr::SrcValues srcs(8);
|
AluInstr::SrcValues srcs(2 * n);
|
||||||
|
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
srcs[2 * i] = value_factory.src(src0, i);
|
srcs[2 * i] = value_factory.src(src0, i);
|
||||||
srcs[2 * i + 1] = value_factory.src(src1, i);
|
srcs[2 * i + 1] = value_factory.src(src1, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = n; i < 4; ++i) {
|
auto op =
|
||||||
srcs[2 * i] = value_factory.zero();
|
unlikely(shader.has_flag(Shader::sh_legacy_math_rules)) ? op2_dot : op2_dot_ieee;
|
||||||
srcs[2 * i + 1] = value_factory.zero();
|
AluInstr *ir = new AluInstr(op, dest, srcs, AluInstr::last_write, n);
|
||||||
|
|
||||||
|
if (src0.negate)
|
||||||
|
ir->set_alu_flag(alu_src0_neg);
|
||||||
|
if (src0.abs)
|
||||||
|
ir->set_alu_flag(alu_src0_abs);
|
||||||
|
if (src1.negate)
|
||||||
|
ir->set_alu_flag(alu_src1_neg);
|
||||||
|
if (src1.abs)
|
||||||
|
ir->set_alu_flag(alu_src1_abs);
|
||||||
|
|
||||||
|
if (alu.dest.saturate)
|
||||||
|
ir->set_alu_flag(alu_dst_clamp);
|
||||||
|
|
||||||
|
shader.emit_instruction(ir);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
emit_dot4(const nir_alu_instr& alu, Shader& shader)
|
||||||
|
{
|
||||||
|
auto& value_factory = shader.value_factory();
|
||||||
|
const nir_alu_src& src0 = alu.src[0];
|
||||||
|
const nir_alu_src& src1 = alu.src[1];
|
||||||
|
|
||||||
|
auto dest = value_factory.dest(alu.dest.dest, 0, pin_free);
|
||||||
|
|
||||||
|
AluInstr::SrcValues srcs(8);
|
||||||
|
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
srcs[2 * i] = value_factory.src(src0, i);
|
||||||
|
srcs[2 * i + 1] = value_factory.src(src1, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto op =
|
auto op =
|
||||||
@@ -2497,6 +2544,7 @@ emit_dot(const nir_alu_instr& alu, int n, Shader& shader)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
emit_fdph(const nir_alu_instr& alu, Shader& shader)
|
emit_fdph(const nir_alu_instr& alu, Shader& shader)
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user