aco: update VS prolog waitcnt for GFX12
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29225>
This commit is contained in:
@@ -12098,6 +12098,30 @@ load_vb_descs(Builder& bld, PhysReg dest, Operand base, unsigned start, unsigned
|
||||
return count;
|
||||
}
|
||||
|
||||
void
|
||||
wait_for_smem_loads(Builder& bld)
|
||||
{
|
||||
if (bld.program->gfx_level >= GFX12) {
|
||||
bld.sopp(aco_opcode::s_wait_kmcnt, 0);
|
||||
} else {
|
||||
wait_imm lgkm_imm;
|
||||
lgkm_imm.lgkm = 0;
|
||||
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(bld.program->gfx_level));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
wait_for_vmem_loads(Builder& bld)
|
||||
{
|
||||
if (bld.program->gfx_level >= GFX12) {
|
||||
bld.sopp(aco_opcode::s_wait_loadcnt, 0);
|
||||
} else {
|
||||
wait_imm vm_imm;
|
||||
vm_imm.vm = 0;
|
||||
bld.sopp(aco_opcode::s_waitcnt, vm_imm.pack(bld.program->gfx_level));
|
||||
}
|
||||
}
|
||||
|
||||
Operand
|
||||
calc_nontrivial_instance_id(Builder& bld, const struct ac_shader_args* args,
|
||||
const struct aco_vs_prolog_info* pinfo, unsigned index,
|
||||
@@ -12107,9 +12131,7 @@ calc_nontrivial_instance_id(Builder& bld, const struct ac_shader_args* args,
|
||||
bld.smem(aco_opcode::s_load_dwordx2, Definition(tmp_sgpr, s2),
|
||||
get_arg_fixed(args, pinfo->inputs), Operand::c32(8u + index * 8u));
|
||||
|
||||
wait_imm lgkm_imm;
|
||||
lgkm_imm.lgkm = 0;
|
||||
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(bld.program->gfx_level));
|
||||
wait_for_smem_loads(bld);
|
||||
|
||||
Definition fetch_index_def(tmp_vgpr0, v1);
|
||||
Operand fetch_index(tmp_vgpr0, v1);
|
||||
@@ -12489,9 +12511,7 @@ convert_current_unaligned_vs_attribs(Builder& bld, UnalignedVsAttribLoadState* s
|
||||
if (state->current_loads.empty())
|
||||
return;
|
||||
|
||||
wait_imm vm_imm;
|
||||
vm_imm.vm = 0;
|
||||
bld.sopp(aco_opcode::s_waitcnt, vm_imm.pack(bld.program->gfx_level));
|
||||
wait_for_vmem_loads(bld);
|
||||
|
||||
for (UnalignedVsAttribLoad load : state->current_loads)
|
||||
convert_unaligned_vs_attrib(bld, load);
|
||||
@@ -12577,9 +12597,6 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
|
||||
uint32_t attrib_mask = BITFIELD_MASK(pinfo->num_attributes);
|
||||
bool has_nontrivial_divisors = pinfo->nontrivial_divisors;
|
||||
|
||||
wait_imm lgkm_imm;
|
||||
lgkm_imm.lgkm = 0;
|
||||
|
||||
/* choose sgprs */
|
||||
PhysReg vertex_buffers(align(max_user_sgprs + 14, 2));
|
||||
PhysReg prolog_input = vertex_buffers.advance(8);
|
||||
@@ -12682,7 +12699,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(start_instance_vgpr, v1), start_instance);
|
||||
}
|
||||
|
||||
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(program->gfx_level));
|
||||
wait_for_smem_loads(bld);
|
||||
|
||||
for (unsigned i = 0; i < num_descs;) {
|
||||
PhysReg dest(attributes_start.reg() + loc * 4u);
|
||||
@@ -12773,11 +12790,8 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
|
||||
|
||||
convert_current_unaligned_vs_attribs(bld, &unaligned_state);
|
||||
|
||||
if (pinfo->alpha_adjust_lo | pinfo->alpha_adjust_hi) {
|
||||
wait_imm vm_imm;
|
||||
vm_imm.vm = 0;
|
||||
bld.sopp(aco_opcode::s_waitcnt, vm_imm.pack(program->gfx_level));
|
||||
}
|
||||
if (pinfo->alpha_adjust_lo | pinfo->alpha_adjust_hi)
|
||||
wait_for_vmem_loads(bld);
|
||||
|
||||
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
|
||||
* so we may need to fix it up. */
|
||||
@@ -12817,7 +12831,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
|
||||
if (has_nontrivial_divisors) {
|
||||
bld.smem(aco_opcode::s_load_dwordx2, Definition(prolog_input, s2),
|
||||
get_arg_fixed(args, pinfo->inputs), Operand::c32(0u));
|
||||
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(program->gfx_level));
|
||||
wait_for_smem_loads(bld);
|
||||
continue_pc = Operand(prolog_input, s2);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user