aco: update VS prolog waitcnt for GFX12

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29225>
This commit is contained in:
Rhys Perry
2024-05-03 12:05:00 +01:00
committed by Marge Bot
parent f01cac835f
commit 418fed1805

View File

@@ -12098,6 +12098,30 @@ load_vb_descs(Builder& bld, PhysReg dest, Operand base, unsigned start, unsigned
return count;
}
void
wait_for_smem_loads(Builder& bld)
{
if (bld.program->gfx_level >= GFX12) {
bld.sopp(aco_opcode::s_wait_kmcnt, 0);
} else {
wait_imm lgkm_imm;
lgkm_imm.lgkm = 0;
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(bld.program->gfx_level));
}
}
void
wait_for_vmem_loads(Builder& bld)
{
if (bld.program->gfx_level >= GFX12) {
bld.sopp(aco_opcode::s_wait_loadcnt, 0);
} else {
wait_imm vm_imm;
vm_imm.vm = 0;
bld.sopp(aco_opcode::s_waitcnt, vm_imm.pack(bld.program->gfx_level));
}
}
Operand
calc_nontrivial_instance_id(Builder& bld, const struct ac_shader_args* args,
const struct aco_vs_prolog_info* pinfo, unsigned index,
@@ -12107,9 +12131,7 @@ calc_nontrivial_instance_id(Builder& bld, const struct ac_shader_args* args,
bld.smem(aco_opcode::s_load_dwordx2, Definition(tmp_sgpr, s2),
get_arg_fixed(args, pinfo->inputs), Operand::c32(8u + index * 8u));
wait_imm lgkm_imm;
lgkm_imm.lgkm = 0;
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(bld.program->gfx_level));
wait_for_smem_loads(bld);
Definition fetch_index_def(tmp_vgpr0, v1);
Operand fetch_index(tmp_vgpr0, v1);
@@ -12489,9 +12511,7 @@ convert_current_unaligned_vs_attribs(Builder& bld, UnalignedVsAttribLoadState* s
if (state->current_loads.empty())
return;
wait_imm vm_imm;
vm_imm.vm = 0;
bld.sopp(aco_opcode::s_waitcnt, vm_imm.pack(bld.program->gfx_level));
wait_for_vmem_loads(bld);
for (UnalignedVsAttribLoad load : state->current_loads)
convert_unaligned_vs_attrib(bld, load);
@@ -12577,9 +12597,6 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
uint32_t attrib_mask = BITFIELD_MASK(pinfo->num_attributes);
bool has_nontrivial_divisors = pinfo->nontrivial_divisors;
wait_imm lgkm_imm;
lgkm_imm.lgkm = 0;
/* choose sgprs */
PhysReg vertex_buffers(align(max_user_sgprs + 14, 2));
PhysReg prolog_input = vertex_buffers.advance(8);
@@ -12682,7 +12699,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
bld.vop1(aco_opcode::v_mov_b32, Definition(start_instance_vgpr, v1), start_instance);
}
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(program->gfx_level));
wait_for_smem_loads(bld);
for (unsigned i = 0; i < num_descs;) {
PhysReg dest(attributes_start.reg() + loc * 4u);
@@ -12773,11 +12790,8 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
convert_current_unaligned_vs_attribs(bld, &unaligned_state);
if (pinfo->alpha_adjust_lo | pinfo->alpha_adjust_hi) {
wait_imm vm_imm;
vm_imm.vm = 0;
bld.sopp(aco_opcode::s_waitcnt, vm_imm.pack(program->gfx_level));
}
if (pinfo->alpha_adjust_lo | pinfo->alpha_adjust_hi)
wait_for_vmem_loads(bld);
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
* so we may need to fix it up. */
@@ -12817,7 +12831,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
if (has_nontrivial_divisors) {
bld.smem(aco_opcode::s_load_dwordx2, Definition(prolog_input, s2),
get_arg_fixed(args, pinfo->inputs), Operand::c32(0u));
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(program->gfx_level));
wait_for_smem_loads(bld);
continue_pc = Operand(prolog_input, s2);
}