aco: fix VMEMtoScalarWriteHazard s_waitcnt mitigation
It doesn't make sense for a "s_waitcnt vmcnt(0)" to affect a store or DS
instruction.
LLVM checks for "s_waitcnt vmcnt(0) lgkmcnt(0) expcnt(0)" but ignores
s_waitcnt_vscnt (which I assume is a bug).
No fossil-db changes.
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Fixes: bcf94bb933
("aco: properly recognize that s_waitcnt mitigates VMEMtoScalarWriteHazard")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18270>
This commit is contained in:
@@ -220,7 +220,7 @@ VMEM/FLAT/GLOBAL/SCRATCH/DS instruction reads an SGPR (or EXEC, or M0).
|
|||||||
Then, a SALU/SMEM instruction writes the same SGPR.
|
Then, a SALU/SMEM instruction writes the same SGPR.
|
||||||
|
|
||||||
Mitigated by:
|
Mitigated by:
|
||||||
A VALU instruction or an `s_waitcnt vmcnt(0)` between the two instructions.
|
A VALU instruction or an `s_waitcnt` between the two instructions.
|
||||||
|
|
||||||
### SMEMtoVectorWriteHazard
|
### SMEMtoVectorWriteHazard
|
||||||
|
|
||||||
|
@@ -159,6 +159,8 @@ struct NOP_ctx_gfx10 {
|
|||||||
bool has_NSA_MIMG = false;
|
bool has_NSA_MIMG = false;
|
||||||
bool has_writelane = false;
|
bool has_writelane = false;
|
||||||
std::bitset<128> sgprs_read_by_VMEM;
|
std::bitset<128> sgprs_read_by_VMEM;
|
||||||
|
std::bitset<128> sgprs_read_by_VMEM_store;
|
||||||
|
std::bitset<128> sgprs_read_by_DS;
|
||||||
std::bitset<128> sgprs_read_by_SMEM;
|
std::bitset<128> sgprs_read_by_SMEM;
|
||||||
|
|
||||||
void join(const NOP_ctx_gfx10& other)
|
void join(const NOP_ctx_gfx10& other)
|
||||||
@@ -172,6 +174,8 @@ struct NOP_ctx_gfx10 {
|
|||||||
has_NSA_MIMG |= other.has_NSA_MIMG;
|
has_NSA_MIMG |= other.has_NSA_MIMG;
|
||||||
has_writelane |= other.has_writelane;
|
has_writelane |= other.has_writelane;
|
||||||
sgprs_read_by_VMEM |= other.sgprs_read_by_VMEM;
|
sgprs_read_by_VMEM |= other.sgprs_read_by_VMEM;
|
||||||
|
sgprs_read_by_DS |= other.sgprs_read_by_DS;
|
||||||
|
sgprs_read_by_VMEM_store |= other.sgprs_read_by_VMEM_store;
|
||||||
sgprs_read_by_SMEM |= other.sgprs_read_by_SMEM;
|
sgprs_read_by_SMEM |= other.sgprs_read_by_SMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -183,6 +187,8 @@ struct NOP_ctx_gfx10 {
|
|||||||
has_branch_after_DS == other.has_branch_after_DS &&
|
has_branch_after_DS == other.has_branch_after_DS &&
|
||||||
has_NSA_MIMG == other.has_NSA_MIMG && has_writelane == other.has_writelane &&
|
has_NSA_MIMG == other.has_NSA_MIMG && has_writelane == other.has_writelane &&
|
||||||
sgprs_read_by_VMEM == other.sgprs_read_by_VMEM &&
|
sgprs_read_by_VMEM == other.sgprs_read_by_VMEM &&
|
||||||
|
sgprs_read_by_DS == other.sgprs_read_by_DS &&
|
||||||
|
sgprs_read_by_VMEM_store == other.sgprs_read_by_VMEM_store &&
|
||||||
sgprs_read_by_SMEM == other.sgprs_read_by_SMEM;
|
sgprs_read_by_SMEM == other.sgprs_read_by_SMEM;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -583,6 +589,16 @@ mark_read_regs(const aco_ptr<Instruction>& instr, std::bitset<N>& reg_reads)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <std::size_t N>
|
||||||
|
void
|
||||||
|
mark_read_regs_exec(State& state, const aco_ptr<Instruction>& instr, std::bitset<N>& reg_reads)
|
||||||
|
{
|
||||||
|
mark_read_regs(instr, reg_reads);
|
||||||
|
reg_reads.set(exec);
|
||||||
|
if (state.program->wave_size == 64)
|
||||||
|
reg_reads.set(exec_hi);
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
VALU_writes_sgpr(aco_ptr<Instruction>& instr)
|
VALU_writes_sgpr(aco_ptr<Instruction>& instr)
|
||||||
{
|
{
|
||||||
@@ -641,31 +657,36 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>&
|
|||||||
Builder bld(state.program, &new_instructions);
|
Builder bld(state.program, &new_instructions);
|
||||||
|
|
||||||
/* VMEMtoScalarWriteHazard
|
/* VMEMtoScalarWriteHazard
|
||||||
* Handle EXEC/M0/SGPR write following a VMEM instruction without a VALU or "waitcnt vmcnt(0)"
|
* Handle EXEC/M0/SGPR write following a VMEM/DS instruction without a VALU or "waitcnt vmcnt(0)"
|
||||||
* in-between.
|
* in-between.
|
||||||
*/
|
*/
|
||||||
if (instr->isVMEM() || instr->isFlatLike() || instr->isDS()) {
|
if (instr->isVMEM() || instr->isFlatLike() || instr->isDS()) {
|
||||||
/* Remember all SGPRs that are read by the VMEM instruction */
|
/* Remember all SGPRs that are read by the VMEM/DS instruction */
|
||||||
mark_read_regs(instr, ctx.sgprs_read_by_VMEM);
|
if (instr->isVMEM() || instr->isFlatLike())
|
||||||
ctx.sgprs_read_by_VMEM.set(exec);
|
mark_read_regs_exec(
|
||||||
if (state.program->wave_size == 64)
|
state, instr,
|
||||||
ctx.sgprs_read_by_VMEM.set(exec_hi);
|
instr->definitions.empty() ? ctx.sgprs_read_by_VMEM_store : ctx.sgprs_read_by_VMEM);
|
||||||
|
if (instr->isFlat() || instr->isDS())
|
||||||
|
mark_read_regs_exec(state, instr, ctx.sgprs_read_by_DS);
|
||||||
} else if (instr->isSALU() || instr->isSMEM()) {
|
} else if (instr->isSALU() || instr->isSMEM()) {
|
||||||
if (instr->opcode == aco_opcode::s_waitcnt) {
|
if (instr->opcode == aco_opcode::s_waitcnt) {
|
||||||
/* Hazard is mitigated by "s_waitcnt vmcnt(0)" */
|
wait_imm imm(state.program->gfx_level, instr->sopp().imm);
|
||||||
uint16_t imm = instr->sopp().imm;
|
if (imm.vm == 0)
|
||||||
unsigned vmcnt = (imm & 0xF) | ((imm & (0x3 << 14)) >> 10);
|
|
||||||
if (vmcnt == 0)
|
|
||||||
ctx.sgprs_read_by_VMEM.reset();
|
ctx.sgprs_read_by_VMEM.reset();
|
||||||
} else if (instr->opcode == aco_opcode::s_waitcnt_depctr) {
|
} else if (instr->opcode == aco_opcode::s_waitcnt_depctr && instr->sopp().imm == 0xffe3) {
|
||||||
/* Hazard is mitigated by a s_waitcnt_depctr with a magic imm */
|
/* Hazard is mitigated by a s_waitcnt_depctr with a magic imm */
|
||||||
if (instr->sopp().imm == 0xffe3)
|
ctx.sgprs_read_by_VMEM.reset();
|
||||||
ctx.sgprs_read_by_VMEM.reset();
|
ctx.sgprs_read_by_DS.reset();
|
||||||
|
ctx.sgprs_read_by_VMEM_store.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check if SALU writes an SGPR that was previously read by the VALU */
|
/* Check if SALU writes an SGPR that was previously read by the VALU */
|
||||||
if (check_written_regs(instr, ctx.sgprs_read_by_VMEM)) {
|
if (check_written_regs(instr, ctx.sgprs_read_by_VMEM) ||
|
||||||
|
check_written_regs(instr, ctx.sgprs_read_by_DS) ||
|
||||||
|
check_written_regs(instr, ctx.sgprs_read_by_VMEM_store)) {
|
||||||
ctx.sgprs_read_by_VMEM.reset();
|
ctx.sgprs_read_by_VMEM.reset();
|
||||||
|
ctx.sgprs_read_by_DS.reset();
|
||||||
|
ctx.sgprs_read_by_VMEM_store.reset();
|
||||||
|
|
||||||
/* Insert s_waitcnt_depctr instruction with magic imm to mitigate the problem */
|
/* Insert s_waitcnt_depctr instruction with magic imm to mitigate the problem */
|
||||||
aco_ptr<SOPP_instruction> depctr{
|
aco_ptr<SOPP_instruction> depctr{
|
||||||
@@ -677,6 +698,8 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>&
|
|||||||
} else if (instr->isVALU()) {
|
} else if (instr->isVALU()) {
|
||||||
/* Hazard is mitigated by any VALU instruction */
|
/* Hazard is mitigated by any VALU instruction */
|
||||||
ctx.sgprs_read_by_VMEM.reset();
|
ctx.sgprs_read_by_VMEM.reset();
|
||||||
|
ctx.sgprs_read_by_DS.reset();
|
||||||
|
ctx.sgprs_read_by_VMEM_store.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* VcmpxPermlaneHazard
|
/* VcmpxPermlaneHazard
|
||||||
|
Reference in New Issue
Block a user