aco/stats: support GFX12 in collect_preasm_stats()

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29225>
This commit is contained in:
Rhys Perry
2024-05-03 12:04:59 +01:00
committed by Marge Bot
parent 9e9cabd2fa
commit f01cac835f
4 changed files with 39 additions and 27 deletions

View File

@@ -74,12 +74,6 @@ enum counter_type : uint8_t {
wait_counters = BITFIELD_MASK(wait_type_num),
};
enum vmem_type : uint8_t {
vmem_nosampler = 1 << 0,
vmem_sampler = 1 << 1,
vmem_bvh = 1 << 2,
};
/* On GFX11+ the SIMD frontend doesn't switch to issuing instructions from a different
* wave if there is an ALU stall. Hence we have an instruction (s_delay_alu) to signal
* that we should switch to a different wave and contains info on dependencies as to
@@ -349,21 +343,6 @@ struct wait_ctx {
}
};
uint8_t
get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr)
{
if (instr->opcode == aco_opcode::image_bvh64_intersect_ray)
return vmem_bvh;
else if (gfx_level >= GFX12 && instr->opcode == aco_opcode::image_msaa_load)
return vmem_sampler;
else if (instr->isMIMG() && !instr->operands[1].isUndefined() &&
instr->operands[1].regClass() == s4)
return vmem_sampler;
else if (instr->isVMEM() || instr->isScratch() || instr->isGlobal())
return vmem_nosampler;
return 0;
}
wait_event
get_vmem_event(wait_ctx& ctx, Instruction* instr, uint8_t type)
{

View File

@@ -1414,6 +1414,21 @@ get_op_fixed_to_def(Instruction* instr)
return -1;
}
uint8_t
get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr)
{
if (instr->opcode == aco_opcode::image_bvh64_intersect_ray)
return vmem_bvh;
else if (gfx_level >= GFX12 && instr->opcode == aco_opcode::image_msaa_load)
return vmem_sampler;
else if (instr->isMIMG() && !instr->operands[1].isUndefined() &&
instr->operands[1].regClass() == s4)
return vmem_sampler;
else if (instr->isVMEM() || instr->isScratch() || instr->isGlobal())
return vmem_nosampler;
return 0;
}
bool
dealloc_vgprs(Program* program)
{

View File

@@ -1781,6 +1781,17 @@ unsigned get_operand_size(aco_ptr<Instruction>& instr, unsigned index);
bool should_form_clause(const Instruction* a, const Instruction* b);
enum vmem_type : uint8_t {
vmem_nosampler = 1 << 0,
vmem_sampler = 1 << 1,
vmem_bvh = 1 << 2,
};
/* VMEM instructions of the same type return in-order. For GFX12+, this determines which counter
* is used.
*/
uint8_t get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr);
enum block_kind {
/* uniform indicates that leaving this block,
* all actives lanes stay active */

View File

@@ -288,10 +288,11 @@ get_wait_counter_info(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr)
else
info[wait_type_vs] = 320;
} else if (instr->isSMEM()) {
wait_type type = gfx_level >= GFX12 ? wait_type_km : wait_type_lgkm;
if (instr->definitions.empty()) {
info[wait_type_lgkm] = 200;
info[type] = 200;
} else if (instr->operands.empty()) { /* s_memtime and s_memrealtime */
info[wait_type_lgkm] = 1;
info[type] = 1;
} else {
bool likely_desc_load = instr->operands[0].size() == 2;
bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4);
@@ -299,15 +300,21 @@ get_wait_counter_info(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr)
instr->operands[1].isConstant() && (!soe || instr->operands.back().isConstant());
if (likely_desc_load || const_offset)
info[wait_type_lgkm] = 30; /* likely to hit L0 cache */
info[type] = 30; /* likely to hit L0 cache */
else
info[wait_type_lgkm] = 200;
info[type] = 200;
}
} else if (instr->isDS()) {
info[wait_type_lgkm] = 20;
} else if (instr->isVMEM() && instr->definitions.empty() && gfx_level >= GFX10) {
info[wait_type_vs] = 320;
} else if (instr->isVMEM()) {
wait_type type =
instr->definitions.empty() && gfx_level >= GFX10 ? wait_type_vs : wait_type_vm;
uint8_t vm_type = get_vmem_type(gfx_level, instr.get());
wait_type type = wait_type_vm;
if (gfx_level >= GFX12 && vm_type == vmem_bvh)
type = wait_type_bvh;
else if (gfx_level >= GFX12 && vm_type == vmem_sampler)
type = wait_type_sample;
info[type] = 320;
}