aco/gfx11: deallocate VGPRs at the end of the shader
fossil-db (gfx1100): Totals from 65987 (40.81% of 161689) affected shaders: Instrs: 57123207 -> 57199947 (+0.13%) CodeSize: 308402500 -> 308709460 (+0.10%) Latency: 680527139 -> 680527160 (+0.00%) InvThroughput: 131620026 -> 131620045 (+0.00%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17710>
This commit is contained in:
@@ -943,4 +943,28 @@ should_form_clause(const Instruction* a, const Instruction* b)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
dealloc_vgprs(Program* program)
|
||||
{
|
||||
if (program->gfx_level < GFX11)
|
||||
return false;
|
||||
|
||||
/* skip if deallocating VGPRs won't increase occupancy */
|
||||
uint16_t max_waves = program->dev.max_wave64_per_simd * (64 / program->wave_size);
|
||||
max_waves = max_suitable_waves(program, max_waves);
|
||||
if (program->max_reg_demand.vgpr <= get_addr_vgpr_from_waves(program, max_waves))
|
||||
return false;
|
||||
|
||||
Block& block = program->blocks.back();
|
||||
|
||||
/* don't bother checking if there is a pending VMEM store or export: there almost always is */
|
||||
Builder bld(program);
|
||||
if (!block.instructions.empty() && block.instructions.back()->opcode == aco_opcode::s_endpgm) {
|
||||
bld.reset(&block.instructions, block.instructions.begin() + (block.instructions.size() - 1));
|
||||
bld.sopp(aco_opcode::s_sendmsg, -1, sendmsg_dealloc_vgprs);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace aco
|
||||
|
@@ -2289,6 +2289,7 @@ void lower_to_hw_instr(Program* program);
|
||||
void schedule_program(Program* program, live& live_vars);
|
||||
void spill(Program* program, live& live_vars);
|
||||
void insert_wait_states(Program* program);
|
||||
bool dealloc_vgprs(Program* program);
|
||||
void insert_NOPs(Program* program);
|
||||
void form_hard_clauses(Program* program);
|
||||
unsigned emit_program(Program* program, std::vector<uint32_t>& code);
|
||||
|
@@ -2004,6 +2004,8 @@ lower_to_hw_instr(Program* program)
|
||||
{
|
||||
Block* discard_block = NULL;
|
||||
|
||||
bool should_dealloc_vgprs = dealloc_vgprs(program);
|
||||
|
||||
for (int block_idx = program->blocks.size() - 1; block_idx >= 0; block_idx--) {
|
||||
Block* block = &program->blocks[block_idx];
|
||||
lower_context ctx;
|
||||
@@ -2126,6 +2128,8 @@ lower_to_hw_instr(Program* program)
|
||||
block = &program->blocks[block_idx];
|
||||
|
||||
bld.reset(discard_block);
|
||||
if (should_dealloc_vgprs)
|
||||
bld.sopp(aco_opcode::s_sendmsg, -1, sendmsg_dealloc_vgprs);
|
||||
bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1), 0,
|
||||
program->gfx_level >= GFX11 ? V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_NULL,
|
||||
false, true, true);
|
||||
|
Reference in New Issue
Block a user