r600: Force NOPs when loading AR on R600 class hardware
Loading indirectly from a register that was just written to doesn't work on R600 class hardware, so add a NOP group with the address register load being emitted in the t-slot. to make sure that the register write was finished. Fixes:33765aa92a
r600/sfn: Enable NIR for pre RG hardware Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18130> (cherry picked from commit404d95ca49
)
This commit is contained in:
@@ -166,7 +166,7 @@
|
||||
"description": "r600: Force NOPs when loading AR on R600 class hardware",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": "33765aa92aa5c150873fc210e9d6c1fe22cf8646"
|
||||
},
|
||||
|
@@ -1196,7 +1196,7 @@ static int insert_nop_r6xx(struct r600_bytecode *bc, int max_slots)
|
||||
}
|
||||
|
||||
/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
|
||||
static int load_ar_r6xx(struct r600_bytecode *bc)
|
||||
static int load_ar_r6xx(struct r600_bytecode *bc, bool for_src)
|
||||
{
|
||||
struct r600_bytecode_alu alu;
|
||||
int r;
|
||||
@@ -1207,6 +1207,10 @@ static int load_ar_r6xx(struct r600_bytecode *bc)
|
||||
/* hack to avoid making MOVA the last instruction in the clause */
|
||||
if ((bc->cf_last->ndw>>1) >= 110)
|
||||
bc->force_add_cf = 1;
|
||||
else if (for_src) {
|
||||
insert_nop_r6xx(bc, 4);
|
||||
bc->nalu_groups++;
|
||||
}
|
||||
|
||||
memset(&alu, 0, sizeof(alu));
|
||||
alu.op = ALU_OP1_MOVA_GPR_INT;
|
||||
@@ -1224,13 +1228,13 @@ static int load_ar_r6xx(struct r600_bytecode *bc)
|
||||
}
|
||||
|
||||
/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
|
||||
int r600_load_ar(struct r600_bytecode *bc)
|
||||
int r600_load_ar(struct r600_bytecode *bc, bool for_src)
|
||||
{
|
||||
struct r600_bytecode_alu alu;
|
||||
int r;
|
||||
|
||||
if (bc->ar_handling)
|
||||
return load_ar_r6xx(bc);
|
||||
return load_ar_r6xx(bc, for_src);
|
||||
|
||||
if (bc->ar_loaded)
|
||||
return 0;
|
||||
@@ -1306,10 +1310,10 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
|
||||
/* Check AR usage and load it if required */
|
||||
for (i = 0; i < 3; i++)
|
||||
if (nalu->src[i].rel && !bc->ar_loaded)
|
||||
r600_load_ar(bc);
|
||||
r600_load_ar(bc, true);
|
||||
|
||||
if (nalu->dst.rel && !bc->ar_loaded)
|
||||
r600_load_ar(bc);
|
||||
r600_load_ar(bc, false);
|
||||
|
||||
/* Setup the kcache for this ALU instruction. This will start a new
|
||||
* ALU clause if needed. */
|
||||
|
@@ -329,7 +329,7 @@ void r600_bytecode_special_constants(uint32_t value, unsigned *sel);
|
||||
void r600_bytecode_disasm(struct r600_bytecode *bc);
|
||||
void r600_bytecode_alu_read(struct r600_bytecode *bc,
|
||||
struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
|
||||
int r600_load_ar(struct r600_bytecode *bc);
|
||||
int r600_load_ar(struct r600_bytecode *bc, bool for_src);
|
||||
|
||||
int cm_bytecode_add_cf_end(struct r600_bytecode *bc);
|
||||
|
||||
@@ -355,7 +355,7 @@ void eg_bytecode_export_read(struct r600_bytecode *bc,
|
||||
void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
|
||||
unsigned *num_format, unsigned *format_comp, unsigned *endian);
|
||||
|
||||
int r600_load_ar(struct r600_bytecode *bc);
|
||||
int r600_load_ar(struct r600_bytecode *bc, bool for_src);
|
||||
|
||||
static inline int fp64_switch(int i)
|
||||
{
|
||||
|
@@ -413,7 +413,7 @@ void AssamblerVisitor::visit(const AluGroup& group)
|
||||
m_last_addr = addr.first;
|
||||
m_bc->ar_loaded = 0;
|
||||
|
||||
r600_load_ar(m_bc);
|
||||
r600_load_ar(m_bc, group.addr_for_src());
|
||||
}
|
||||
} else {
|
||||
emit_index_reg(*addr.first, 0);
|
||||
@@ -849,7 +849,7 @@ void AssamblerVisitor::visit(const IfInstr& instr)
|
||||
}
|
||||
|
||||
auto pred = instr.predicate();
|
||||
auto [addr, dummy ] = pred->indirect_addr(); {}
|
||||
auto [addr, dummy0, dummy1 ] = pred->indirect_addr(); {}
|
||||
if (addr) {
|
||||
if (!m_last_addr || !m_bc->ar_loaded ||
|
||||
!m_last_addr->equal_to(*addr)) {
|
||||
@@ -858,7 +858,7 @@ void AssamblerVisitor::visit(const IfInstr& instr)
|
||||
m_last_addr = addr;
|
||||
m_bc->ar_loaded = 0;
|
||||
|
||||
r600_load_ar(m_bc);
|
||||
r600_load_ar(m_bc, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -622,23 +622,23 @@ void ResolveIndirectArrayAddr::visit(const UniformValue& value)
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<PRegister, bool> AluInstr::indirect_addr() const
|
||||
std::tuple<PRegister, bool, bool> AluInstr::indirect_addr() const
|
||||
{
|
||||
ResolveIndirectArrayAddr visitor;
|
||||
|
||||
if (m_dest) {
|
||||
m_dest->accept(visitor);
|
||||
if (visitor.addr)
|
||||
return {visitor.addr, false};
|
||||
return {visitor.addr, false, false};
|
||||
}
|
||||
|
||||
for (auto s: m_src) {
|
||||
s->accept(visitor);
|
||||
if (visitor.addr) {
|
||||
return {visitor.addr, visitor.is_index};
|
||||
return {visitor.addr, !visitor.is_index, visitor.is_index};
|
||||
}
|
||||
}
|
||||
return {nullptr, false};
|
||||
return {nullptr, false, false};
|
||||
}
|
||||
|
||||
AluGroup *AluInstr::split(ValueFactory& vf)
|
||||
|
@@ -145,7 +145,7 @@ public:
|
||||
static const std::set<AluModifiers> last;
|
||||
static const std::set<AluModifiers> last_write;
|
||||
|
||||
std::pair<PRegister, bool> indirect_addr() const;
|
||||
std::tuple<PRegister, bool, bool> indirect_addr() const;
|
||||
|
||||
void add_extra_dependency(PVirtualValue reg);
|
||||
|
||||
|
@@ -231,13 +231,14 @@ bool AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle)
|
||||
|
||||
bool AluGroup::update_indirect_access(AluInstr *instr)
|
||||
{
|
||||
auto indirect_addr = instr->indirect_addr();
|
||||
auto [indirect_addr, for_src, is_index ] = instr->indirect_addr();
|
||||
|
||||
if (indirect_addr.first) {
|
||||
if (indirect_addr) {
|
||||
if (!m_addr_used) {
|
||||
m_addr_used = indirect_addr.first;
|
||||
m_addr_is_index = indirect_addr.second;
|
||||
} else if (!indirect_addr.first->equal_to(*m_addr_used)) {
|
||||
m_addr_used = indirect_addr;
|
||||
m_addr_for_src = for_src;
|
||||
m_addr_is_index = is_index;
|
||||
} else if (!indirect_addr->equal_to(*m_addr_used)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@@ -86,6 +86,8 @@ public:
|
||||
|
||||
static bool has_t() { return s_max_slots == 5;}
|
||||
|
||||
bool addr_for_src() const { return m_addr_for_src;}
|
||||
|
||||
private:
|
||||
void forward_set_blockid(int id, int index) override;
|
||||
bool do_ready() const override;
|
||||
@@ -108,6 +110,7 @@ private:
|
||||
int m_nesting_depth{0};
|
||||
bool m_has_lds_op{false};
|
||||
bool m_addr_is_index{false};
|
||||
bool m_addr_for_src{false};
|
||||
};
|
||||
|
||||
|
||||
|
@@ -824,7 +824,7 @@ bool BlockSheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready, std::lis
|
||||
auto opinfo = alu_ops.find((*i)->opcode());
|
||||
assert(opinfo != alu_ops.end());
|
||||
if (opinfo->second.can_channel(AluOp::t, m_chip_class) &&
|
||||
!(*i)->indirect_addr().first)
|
||||
!std::get<0>((*i)->indirect_addr()))
|
||||
priority = -1;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user