aco: avoid WAW hazard with BVH MIMG and other VMEM

According to LLVM, image_bvh64_intersect_ray does not write results in
order with other VMEM instructions.

fossil-db (navi21):
Totals from 7 (0.00% of 162293) affected shaders:
Instrs: 39978 -> 39985 (+0.02%)
CodeSize: 219356 -> 219384 (+0.01%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17079>
This commit is contained in:
Rhys Perry
2022-05-25 17:21:10 +01:00
committed by Marge Bot
parent 7d34044908
commit c66206cbed

View File

@@ -79,6 +79,12 @@ enum counter_type : uint8_t {
num_counters = 4,
};
enum vmem_type : uint8_t {
vmem_nosampler = 1 << 0,
vmem_sampler = 1 << 1,
vmem_bvh = 1 << 2,
};
static const uint16_t exp_events =
event_exp_pos | event_exp_param | event_exp_mrt_null | event_gds_gpr_lock | event_vmem_gpr_lock;
static const uint16_t lgkm_events = event_smem | event_lds | event_gds | event_flat | event_sendmsg;
@@ -111,27 +117,22 @@ struct wait_entry {
uint8_t counters; /* use counter_type notion */
bool wait_on_read : 1;
bool logical : 1;
bool has_vmem_nosampler : 1;
bool has_vmem_sampler : 1;
uint8_t vmem_types : 4;
wait_entry(wait_event event_, wait_imm imm_, bool logical_, bool wait_on_read_)
: imm(imm_), events(event_), counters(get_counters_for_event(event_)),
wait_on_read(wait_on_read_), logical(logical_), has_vmem_nosampler(false),
has_vmem_sampler(false)
wait_on_read(wait_on_read_), logical(logical_), vmem_types(0)
{}
bool join(const wait_entry& other)
{
bool changed = (other.events & ~events) || (other.counters & ~counters) ||
(other.wait_on_read && !wait_on_read) ||
(other.has_vmem_nosampler && !has_vmem_nosampler) ||
(other.has_vmem_sampler && !has_vmem_sampler);
(other.wait_on_read && !wait_on_read) || (other.vmem_types & !vmem_types);
events |= other.events;
counters |= other.counters;
changed |= imm.combine(other.imm);
wait_on_read |= other.wait_on_read;
has_vmem_nosampler |= other.has_vmem_nosampler;
has_vmem_sampler |= other.has_vmem_sampler;
vmem_types |= other.vmem_types;
assert(logical == other.logical);
return changed;
}
@@ -148,8 +149,7 @@ struct wait_entry {
if (counter == counter_vm) {
imm.vm = wait_imm::unset_counter;
events &= ~event_vmem;
has_vmem_nosampler = false;
has_vmem_sampler = false;
vmem_types = 0;
}
if (counter == counter_exp) {
@@ -242,6 +242,19 @@ struct wait_ctx {
}
};
uint8_t
get_vmem_type(Instruction* instr)
{
if (instr->opcode == aco_opcode::image_bvh64_intersect_ray)
return vmem_bvh;
else if (instr->isMIMG() && !instr->operands[1].isUndefined() &&
instr->operands[1].regClass() == s4)
return vmem_sampler;
else if (instr->isVMEM())
return vmem_nosampler;
return 0;
}
void
check_instr(wait_ctx& ctx, wait_imm& wait, Instruction* instr)
{
@@ -270,11 +283,9 @@ check_instr(wait_ctx& ctx, wait_imm& wait, Instruction* instr)
continue;
/* Vector Memory reads and writes return in the order they were issued */
bool has_sampler = instr->isMIMG() && !instr->operands[1].isUndefined() &&
instr->operands[1].regClass() == s4;
if (instr->isVMEM() && ((it->second.events & vm_events) == event_vmem) &&
it->second.has_vmem_nosampler == !has_sampler &&
it->second.has_vmem_sampler == has_sampler)
uint8_t vmem_type = get_vmem_type(instr);
if (vmem_type && ((it->second.events & vm_events) == event_vmem) &&
it->second.vmem_types == vmem_type)
continue;
/* LDS reads and writes return in the order they were issued. same for GDS */
@@ -568,7 +579,7 @@ update_counters_for_flat_load(wait_ctx& ctx, memory_sync_info sync = memory_sync
void
insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event, bool wait_on_read,
bool has_sampler = false)
uint8_t vmem_types = 0)
{
uint16_t counters = get_counters_for_event(event);
wait_imm imm;
@@ -582,8 +593,7 @@ insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event, boo
imm.vs = 0;
wait_entry new_entry(event, imm, !rc.is_linear(), wait_on_read);
new_entry.has_vmem_nosampler = (event & event_vmem) && !has_sampler;
new_entry.has_vmem_sampler = (event & event_vmem) && has_sampler;
new_entry.vmem_types |= vmem_types;
for (unsigned i = 0; i < rc.size(); i++) {
auto it = ctx.gpr_map.emplace(PhysReg{reg.reg() + i}, new_entry);
@@ -593,16 +603,16 @@ insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event, boo
}
void
insert_wait_entry(wait_ctx& ctx, Operand op, wait_event event, bool has_sampler = false)
insert_wait_entry(wait_ctx& ctx, Operand op, wait_event event, uint8_t vmem_types = 0)
{
if (!op.isConstant() && !op.isUndefined())
insert_wait_entry(ctx, op.physReg(), op.regClass(), event, false, has_sampler);
insert_wait_entry(ctx, op.physReg(), op.regClass(), event, false, vmem_types);
}
void
insert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, bool has_sampler = false)
insert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, uint8_t vmem_types = 0)
{
insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, has_sampler);
insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, vmem_types);
}
void
@@ -679,11 +689,8 @@ gen(Instruction* instr, wait_ctx& ctx)
!instr->definitions.empty() || ctx.gfx_level < GFX10 ? event_vmem : event_vmem_store;
update_counters(ctx, ev, get_sync_info(instr));
bool has_sampler = instr->isMIMG() && !instr->operands[1].isUndefined() &&
instr->operands[1].regClass() == s4;
if (!instr->definitions.empty())
insert_wait_entry(ctx, instr->definitions[0], ev, has_sampler);
insert_wait_entry(ctx, instr->definitions[0], ev, get_vmem_type(instr));
if (ctx.gfx_level == GFX6 && instr->format != Format::MIMG && instr->operands.size() == 4) {
ctx.exp_cnt++;