r600: Force NOPs when loading AR on R600 class hardware

Loading indirectly from a register that was just written to
doesn't work on R600 class hardware, so add a NOP group with
the address register load being emitted in the t-slot. to make
sure that the register write was finished.

Fixes: 33765aa92a
     r600/sfn: Enable NIR for pre RG hardware

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18130>
(cherry picked from commit 404d95ca49)
This commit is contained in:
Gert Wollny
2022-08-15 18:52:09 +02:00
committed by Dylan Baker
parent 5f562b1e00
commit 96073f7f98
9 changed files with 30 additions and 22 deletions

View File

@@ -166,7 +166,7 @@
"description": "r600: Force NOPs when loading AR on R600 class hardware",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "33765aa92aa5c150873fc210e9d6c1fe22cf8646"
},

View File

@@ -1196,7 +1196,7 @@ static int insert_nop_r6xx(struct r600_bytecode *bc, int max_slots)
}
/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
static int load_ar_r6xx(struct r600_bytecode *bc)
static int load_ar_r6xx(struct r600_bytecode *bc, bool for_src)
{
struct r600_bytecode_alu alu;
int r;
@@ -1207,6 +1207,10 @@ static int load_ar_r6xx(struct r600_bytecode *bc)
/* hack to avoid making MOVA the last instruction in the clause */
if ((bc->cf_last->ndw>>1) >= 110)
bc->force_add_cf = 1;
else if (for_src) {
insert_nop_r6xx(bc, 4);
bc->nalu_groups++;
}
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP1_MOVA_GPR_INT;
@@ -1224,13 +1228,13 @@ static int load_ar_r6xx(struct r600_bytecode *bc)
}
/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
int r600_load_ar(struct r600_bytecode *bc)
int r600_load_ar(struct r600_bytecode *bc, bool for_src)
{
struct r600_bytecode_alu alu;
int r;
if (bc->ar_handling)
return load_ar_r6xx(bc);
return load_ar_r6xx(bc, for_src);
if (bc->ar_loaded)
return 0;
@@ -1306,10 +1310,10 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
/* Check AR usage and load it if required */
for (i = 0; i < 3; i++)
if (nalu->src[i].rel && !bc->ar_loaded)
r600_load_ar(bc);
r600_load_ar(bc, true);
if (nalu->dst.rel && !bc->ar_loaded)
r600_load_ar(bc);
r600_load_ar(bc, false);
/* Setup the kcache for this ALU instruction. This will start a new
* ALU clause if needed. */

View File

@@ -329,7 +329,7 @@ void r600_bytecode_special_constants(uint32_t value, unsigned *sel);
void r600_bytecode_disasm(struct r600_bytecode *bc);
void r600_bytecode_alu_read(struct r600_bytecode *bc,
struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
int r600_load_ar(struct r600_bytecode *bc);
int r600_load_ar(struct r600_bytecode *bc, bool for_src);
int cm_bytecode_add_cf_end(struct r600_bytecode *bc);
@@ -355,7 +355,7 @@ void eg_bytecode_export_read(struct r600_bytecode *bc,
void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
unsigned *num_format, unsigned *format_comp, unsigned *endian);
int r600_load_ar(struct r600_bytecode *bc);
int r600_load_ar(struct r600_bytecode *bc, bool for_src);
static inline int fp64_switch(int i)
{

View File

@@ -413,7 +413,7 @@ void AssamblerVisitor::visit(const AluGroup& group)
m_last_addr = addr.first;
m_bc->ar_loaded = 0;
r600_load_ar(m_bc);
r600_load_ar(m_bc, group.addr_for_src());
}
} else {
emit_index_reg(*addr.first, 0);
@@ -849,7 +849,7 @@ void AssamblerVisitor::visit(const IfInstr& instr)
}
auto pred = instr.predicate();
auto [addr, dummy ] = pred->indirect_addr(); {}
auto [addr, dummy0, dummy1 ] = pred->indirect_addr(); {}
if (addr) {
if (!m_last_addr || !m_bc->ar_loaded ||
!m_last_addr->equal_to(*addr)) {
@@ -858,7 +858,7 @@ void AssamblerVisitor::visit(const IfInstr& instr)
m_last_addr = addr;
m_bc->ar_loaded = 0;
r600_load_ar(m_bc);
r600_load_ar(m_bc, true);
}
}

View File

@@ -622,23 +622,23 @@ void ResolveIndirectArrayAddr::visit(const UniformValue& value)
}
}
std::pair<PRegister, bool> AluInstr::indirect_addr() const
std::tuple<PRegister, bool, bool> AluInstr::indirect_addr() const
{
ResolveIndirectArrayAddr visitor;
if (m_dest) {
m_dest->accept(visitor);
if (visitor.addr)
return {visitor.addr, false};
return {visitor.addr, false, false};
}
for (auto s: m_src) {
s->accept(visitor);
if (visitor.addr) {
return {visitor.addr, visitor.is_index};
return {visitor.addr, !visitor.is_index, visitor.is_index};
}
}
return {nullptr, false};
return {nullptr, false, false};
}
AluGroup *AluInstr::split(ValueFactory& vf)

View File

@@ -145,7 +145,7 @@ public:
static const std::set<AluModifiers> last;
static const std::set<AluModifiers> last_write;
std::pair<PRegister, bool> indirect_addr() const;
std::tuple<PRegister, bool, bool> indirect_addr() const;
void add_extra_dependency(PVirtualValue reg);

View File

@@ -231,13 +231,14 @@ bool AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle)
bool AluGroup::update_indirect_access(AluInstr *instr)
{
auto indirect_addr = instr->indirect_addr();
auto [indirect_addr, for_src, is_index ] = instr->indirect_addr();
if (indirect_addr.first) {
if (indirect_addr) {
if (!m_addr_used) {
m_addr_used = indirect_addr.first;
m_addr_is_index = indirect_addr.second;
} else if (!indirect_addr.first->equal_to(*m_addr_used)) {
m_addr_used = indirect_addr;
m_addr_for_src = for_src;
m_addr_is_index = is_index;
} else if (!indirect_addr->equal_to(*m_addr_used)) {
return false;
}
}

View File

@@ -86,6 +86,8 @@ public:
static bool has_t() { return s_max_slots == 5;}
bool addr_for_src() const { return m_addr_for_src;}
private:
void forward_set_blockid(int id, int index) override;
bool do_ready() const override;
@@ -108,6 +110,7 @@ private:
int m_nesting_depth{0};
bool m_has_lds_op{false};
bool m_addr_is_index{false};
bool m_addr_for_src{false};
};

View File

@@ -824,7 +824,7 @@ bool BlockSheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready, std::lis
auto opinfo = alu_ops.find((*i)->opcode());
assert(opinfo != alu_ops.end());
if (opinfo->second.can_channel(AluOp::t, m_chip_class) &&
!(*i)->indirect_addr().first)
!std::get<0>((*i)->indirect_addr()))
priority = -1;
}