aco: update VS prolog waitcnt for GFX12
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29225>
This commit is contained in:
@@ -12098,6 +12098,30 @@ load_vb_descs(Builder& bld, PhysReg dest, Operand base, unsigned start, unsigned
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
wait_for_smem_loads(Builder& bld)
|
||||||
|
{
|
||||||
|
if (bld.program->gfx_level >= GFX12) {
|
||||||
|
bld.sopp(aco_opcode::s_wait_kmcnt, 0);
|
||||||
|
} else {
|
||||||
|
wait_imm lgkm_imm;
|
||||||
|
lgkm_imm.lgkm = 0;
|
||||||
|
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(bld.program->gfx_level));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
wait_for_vmem_loads(Builder& bld)
|
||||||
|
{
|
||||||
|
if (bld.program->gfx_level >= GFX12) {
|
||||||
|
bld.sopp(aco_opcode::s_wait_loadcnt, 0);
|
||||||
|
} else {
|
||||||
|
wait_imm vm_imm;
|
||||||
|
vm_imm.vm = 0;
|
||||||
|
bld.sopp(aco_opcode::s_waitcnt, vm_imm.pack(bld.program->gfx_level));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Operand
|
Operand
|
||||||
calc_nontrivial_instance_id(Builder& bld, const struct ac_shader_args* args,
|
calc_nontrivial_instance_id(Builder& bld, const struct ac_shader_args* args,
|
||||||
const struct aco_vs_prolog_info* pinfo, unsigned index,
|
const struct aco_vs_prolog_info* pinfo, unsigned index,
|
||||||
@@ -12107,9 +12131,7 @@ calc_nontrivial_instance_id(Builder& bld, const struct ac_shader_args* args,
|
|||||||
bld.smem(aco_opcode::s_load_dwordx2, Definition(tmp_sgpr, s2),
|
bld.smem(aco_opcode::s_load_dwordx2, Definition(tmp_sgpr, s2),
|
||||||
get_arg_fixed(args, pinfo->inputs), Operand::c32(8u + index * 8u));
|
get_arg_fixed(args, pinfo->inputs), Operand::c32(8u + index * 8u));
|
||||||
|
|
||||||
wait_imm lgkm_imm;
|
wait_for_smem_loads(bld);
|
||||||
lgkm_imm.lgkm = 0;
|
|
||||||
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(bld.program->gfx_level));
|
|
||||||
|
|
||||||
Definition fetch_index_def(tmp_vgpr0, v1);
|
Definition fetch_index_def(tmp_vgpr0, v1);
|
||||||
Operand fetch_index(tmp_vgpr0, v1);
|
Operand fetch_index(tmp_vgpr0, v1);
|
||||||
@@ -12489,9 +12511,7 @@ convert_current_unaligned_vs_attribs(Builder& bld, UnalignedVsAttribLoadState* s
|
|||||||
if (state->current_loads.empty())
|
if (state->current_loads.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
wait_imm vm_imm;
|
wait_for_vmem_loads(bld);
|
||||||
vm_imm.vm = 0;
|
|
||||||
bld.sopp(aco_opcode::s_waitcnt, vm_imm.pack(bld.program->gfx_level));
|
|
||||||
|
|
||||||
for (UnalignedVsAttribLoad load : state->current_loads)
|
for (UnalignedVsAttribLoad load : state->current_loads)
|
||||||
convert_unaligned_vs_attrib(bld, load);
|
convert_unaligned_vs_attrib(bld, load);
|
||||||
@@ -12577,9 +12597,6 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
|
|||||||
uint32_t attrib_mask = BITFIELD_MASK(pinfo->num_attributes);
|
uint32_t attrib_mask = BITFIELD_MASK(pinfo->num_attributes);
|
||||||
bool has_nontrivial_divisors = pinfo->nontrivial_divisors;
|
bool has_nontrivial_divisors = pinfo->nontrivial_divisors;
|
||||||
|
|
||||||
wait_imm lgkm_imm;
|
|
||||||
lgkm_imm.lgkm = 0;
|
|
||||||
|
|
||||||
/* choose sgprs */
|
/* choose sgprs */
|
||||||
PhysReg vertex_buffers(align(max_user_sgprs + 14, 2));
|
PhysReg vertex_buffers(align(max_user_sgprs + 14, 2));
|
||||||
PhysReg prolog_input = vertex_buffers.advance(8);
|
PhysReg prolog_input = vertex_buffers.advance(8);
|
||||||
@@ -12682,7 +12699,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
|
|||||||
bld.vop1(aco_opcode::v_mov_b32, Definition(start_instance_vgpr, v1), start_instance);
|
bld.vop1(aco_opcode::v_mov_b32, Definition(start_instance_vgpr, v1), start_instance);
|
||||||
}
|
}
|
||||||
|
|
||||||
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(program->gfx_level));
|
wait_for_smem_loads(bld);
|
||||||
|
|
||||||
for (unsigned i = 0; i < num_descs;) {
|
for (unsigned i = 0; i < num_descs;) {
|
||||||
PhysReg dest(attributes_start.reg() + loc * 4u);
|
PhysReg dest(attributes_start.reg() + loc * 4u);
|
||||||
@@ -12773,11 +12790,8 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
|
|||||||
|
|
||||||
convert_current_unaligned_vs_attribs(bld, &unaligned_state);
|
convert_current_unaligned_vs_attribs(bld, &unaligned_state);
|
||||||
|
|
||||||
if (pinfo->alpha_adjust_lo | pinfo->alpha_adjust_hi) {
|
if (pinfo->alpha_adjust_lo | pinfo->alpha_adjust_hi)
|
||||||
wait_imm vm_imm;
|
wait_for_vmem_loads(bld);
|
||||||
vm_imm.vm = 0;
|
|
||||||
bld.sopp(aco_opcode::s_waitcnt, vm_imm.pack(program->gfx_level));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
|
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
|
||||||
* so we may need to fix it up. */
|
* so we may need to fix it up. */
|
||||||
@@ -12817,7 +12831,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
|
|||||||
if (has_nontrivial_divisors) {
|
if (has_nontrivial_divisors) {
|
||||||
bld.smem(aco_opcode::s_load_dwordx2, Definition(prolog_input, s2),
|
bld.smem(aco_opcode::s_load_dwordx2, Definition(prolog_input, s2),
|
||||||
get_arg_fixed(args, pinfo->inputs), Operand::c32(0u));
|
get_arg_fixed(args, pinfo->inputs), Operand::c32(0u));
|
||||||
bld.sopp(aco_opcode::s_waitcnt, lgkm_imm.pack(program->gfx_level));
|
wait_for_smem_loads(bld);
|
||||||
continue_pc = Operand(prolog_input, s2);
|
continue_pc = Operand(prolog_input, s2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user