aco: update VS prolog waitcnt for GFX12

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29225>
This commit is contained in:
Rhys Perry
2024-05-03 12:05:00 +01:00
committed by Marge Bot
parent f01cac835f
commit 418fed1805

View File

@@ -12098,6 +12098,30 @@ load_vb_descs(Builder& bld, PhysReg dest, Operand base, unsigned start, unsigned
return count; return count;
} }
void
wait_for_smem_loads(Builder& bld)
{
if (bld.program->gfx_level >= GFX12) {
bld.sopp(aco_opcode::s_wait_kmcnt, 0);
} else {
wait_imm lgkm_imm;
lgkm_imm.lgkm = 0;
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(bld.program->gfx_level));
}
}
void
wait_for_vmem_loads(Builder& bld)
{
if (bld.program->gfx_level >= GFX12) {
bld.sopp(aco_opcode::s_wait_loadcnt, 0);
} else {
wait_imm vm_imm;
vm_imm.vm = 0;
bld.sopp(aco_opcode::s_waitcnt, vm_imm.pack(bld.program->gfx_level));
}
}
Operand Operand
calc_nontrivial_instance_id(Builder& bld, const struct ac_shader_args* args, calc_nontrivial_instance_id(Builder& bld, const struct ac_shader_args* args,
const struct aco_vs_prolog_info* pinfo, unsigned index, const struct aco_vs_prolog_info* pinfo, unsigned index,
@@ -12107,9 +12131,7 @@ calc_nontrivial_instance_id(Builder& bld, const struct ac_shader_args* args,
bld.smem(aco_opcode::s_load_dwordx2, Definition(tmp_sgpr, s2), bld.smem(aco_opcode::s_load_dwordx2, Definition(tmp_sgpr, s2),
get_arg_fixed(args, pinfo->inputs), Operand::c32(8u + index * 8u)); get_arg_fixed(args, pinfo->inputs), Operand::c32(8u + index * 8u));
wait_imm lgkm_imm; wait_for_smem_loads(bld);
lgkm_imm.lgkm = 0;
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(bld.program->gfx_level));
Definition fetch_index_def(tmp_vgpr0, v1); Definition fetch_index_def(tmp_vgpr0, v1);
Operand fetch_index(tmp_vgpr0, v1); Operand fetch_index(tmp_vgpr0, v1);
@@ -12489,9 +12511,7 @@ convert_current_unaligned_vs_attribs(Builder& bld, UnalignedVsAttribLoadState* s
if (state->current_loads.empty()) if (state->current_loads.empty())
return; return;
wait_imm vm_imm; wait_for_vmem_loads(bld);
vm_imm.vm = 0;
bld.sopp(aco_opcode::s_waitcnt, vm_imm.pack(bld.program->gfx_level));
for (UnalignedVsAttribLoad load : state->current_loads) for (UnalignedVsAttribLoad load : state->current_loads)
convert_unaligned_vs_attrib(bld, load); convert_unaligned_vs_attrib(bld, load);
@@ -12577,9 +12597,6 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
uint32_t attrib_mask = BITFIELD_MASK(pinfo->num_attributes); uint32_t attrib_mask = BITFIELD_MASK(pinfo->num_attributes);
bool has_nontrivial_divisors = pinfo->nontrivial_divisors; bool has_nontrivial_divisors = pinfo->nontrivial_divisors;
wait_imm lgkm_imm;
lgkm_imm.lgkm = 0;
/* choose sgprs */ /* choose sgprs */
PhysReg vertex_buffers(align(max_user_sgprs + 14, 2)); PhysReg vertex_buffers(align(max_user_sgprs + 14, 2));
PhysReg prolog_input = vertex_buffers.advance(8); PhysReg prolog_input = vertex_buffers.advance(8);
@@ -12682,7 +12699,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
bld.vop1(aco_opcode::v_mov_b32, Definition(start_instance_vgpr, v1), start_instance); bld.vop1(aco_opcode::v_mov_b32, Definition(start_instance_vgpr, v1), start_instance);
} }
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(program->gfx_level)); wait_for_smem_loads(bld);
for (unsigned i = 0; i < num_descs;) { for (unsigned i = 0; i < num_descs;) {
PhysReg dest(attributes_start.reg() + loc * 4u); PhysReg dest(attributes_start.reg() + loc * 4u);
@@ -12773,11 +12790,8 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
convert_current_unaligned_vs_attribs(bld, &unaligned_state); convert_current_unaligned_vs_attribs(bld, &unaligned_state);
if (pinfo->alpha_adjust_lo | pinfo->alpha_adjust_hi) { if (pinfo->alpha_adjust_lo | pinfo->alpha_adjust_hi)
wait_imm vm_imm; wait_for_vmem_loads(bld);
vm_imm.vm = 0;
bld.sopp(aco_opcode::s_waitcnt, vm_imm.pack(program->gfx_level));
}
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW. /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
* so we may need to fix it up. */ * so we may need to fix it up. */
@@ -12817,7 +12831,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
if (has_nontrivial_divisors) { if (has_nontrivial_divisors) {
bld.smem(aco_opcode::s_load_dwordx2, Definition(prolog_input, s2), bld.smem(aco_opcode::s_load_dwordx2, Definition(prolog_input, s2),
get_arg_fixed(args, pinfo->inputs), Operand::c32(0u)); get_arg_fixed(args, pinfo->inputs), Operand::c32(0u));
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(program->gfx_level)); wait_for_smem_loads(bld);
continue_pc = Operand(prolog_input, s2); continue_pc = Operand(prolog_input, s2);
} }