aco: add ACO_DEBUG=perfinfo
This prints the program with each instruction's contribution to it's latency and various factors for the calculation of the Inverse Throughput statistic. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8994>
This commit is contained in:
@@ -167,7 +167,7 @@ void aco_compile_shader(unsigned shader_count,
|
|||||||
if (program->chip_class >= GFX10)
|
if (program->chip_class >= GFX10)
|
||||||
aco::form_hard_clauses(program.get());
|
aco::form_hard_clauses(program.get());
|
||||||
|
|
||||||
if (program->collect_statistics)
|
if (program->collect_statistics || (aco::debug_flags & aco::DEBUG_PERF_INFO))
|
||||||
aco::collect_preasm_stats(program.get());
|
aco::collect_preasm_stats(program.get());
|
||||||
|
|
||||||
/* Assembly */
|
/* Assembly */
|
||||||
|
@@ -38,6 +38,7 @@ static const struct debug_control aco_debug_options[] = {
|
|||||||
{"novn", DEBUG_NO_VN},
|
{"novn", DEBUG_NO_VN},
|
||||||
{"noopt", DEBUG_NO_OPT},
|
{"noopt", DEBUG_NO_OPT},
|
||||||
{"nosched", DEBUG_NO_SCHED},
|
{"nosched", DEBUG_NO_SCHED},
|
||||||
|
{"perfinfo", DEBUG_PERF_INFO},
|
||||||
{NULL, 0}
|
{NULL, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -55,6 +55,7 @@ enum {
|
|||||||
DEBUG_NO_VN = 0x10,
|
DEBUG_NO_VN = 0x10,
|
||||||
DEBUG_NO_OPT = 0x20,
|
DEBUG_NO_OPT = 0x20,
|
||||||
DEBUG_NO_SCHED = 0x40,
|
DEBUG_NO_SCHED = 0x40,
|
||||||
|
DEBUG_PERF_INFO = 0x80,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -2012,6 +2013,7 @@ void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code);
|
|||||||
|
|
||||||
enum print_flags {
|
enum print_flags {
|
||||||
print_no_ssa = 0x1,
|
print_no_ssa = 0x1,
|
||||||
|
print_perf_info = 0x2,
|
||||||
};
|
};
|
||||||
|
|
||||||
void aco_print_operand(const Operand *operand, FILE *output, unsigned flags=0);
|
void aco_print_operand(const Operand *operand, FILE *output, unsigned flags=0);
|
||||||
|
@@ -875,6 +875,9 @@ void aco_print_block(const Block* block, FILE *output, unsigned flags)
|
|||||||
fprintf(output, "*/\n");
|
fprintf(output, "*/\n");
|
||||||
for (auto const& instr : block->instructions) {
|
for (auto const& instr : block->instructions) {
|
||||||
fprintf(output, "\t");
|
fprintf(output, "\t");
|
||||||
|
if (flags & print_perf_info)
|
||||||
|
fprintf(output, "(%3u clk) ", instr->pass_flags);
|
||||||
|
|
||||||
aco_print_instr(instr.get(), output, flags);
|
aco_print_instr(instr.get(), output, flags);
|
||||||
fprintf(output, "\n");
|
fprintf(output, "\n");
|
||||||
}
|
}
|
||||||
|
@@ -488,8 +488,11 @@ void collect_preasm_stats(Program *program)
|
|||||||
for (unsigned pred : block.linear_preds)
|
for (unsigned pred : block.linear_preds)
|
||||||
block_est.join(blocks[pred]);
|
block_est.join(blocks[pred]);
|
||||||
|
|
||||||
for (aco_ptr<Instruction>& instr : block.instructions)
|
for (aco_ptr<Instruction>& instr : block.instructions) {
|
||||||
|
unsigned before = block_est.cur_cycle;
|
||||||
block_est.add(instr);
|
block_est.add(instr);
|
||||||
|
instr->pass_flags = block_est.cur_cycle - before;
|
||||||
|
}
|
||||||
|
|
||||||
/* TODO: it would be nice to be able to consider estimated loop trip
|
/* TODO: it would be nice to be able to consider estimated loop trip
|
||||||
* counts used for loop unrolling.
|
* counts used for loop unrolling.
|
||||||
@@ -541,6 +544,24 @@ void collect_preasm_stats(Program *program)
|
|||||||
|
|
||||||
program->statistics[statistic_latency] = round(latency);
|
program->statistics[statistic_latency] = round(latency);
|
||||||
program->statistics[statistic_inv_throughput] = round(1.0 / wave64_per_cycle);
|
program->statistics[statistic_inv_throughput] = round(1.0 / wave64_per_cycle);
|
||||||
|
|
||||||
|
if (debug_flags & DEBUG_PERF_INFO) {
|
||||||
|
aco_print_program(program, stderr, print_no_ssa | print_perf_info);
|
||||||
|
|
||||||
|
fprintf(stderr, "num_waves: %u\n", program->num_waves);
|
||||||
|
fprintf(stderr, "salu_smem_usage: %f\n", usage[(int)BlockCycleEstimator::scalar]);
|
||||||
|
fprintf(stderr, "branch_sendmsg_usage: %f\n", usage[(int)BlockCycleEstimator::branch_sendmsg]);
|
||||||
|
fprintf(stderr, "valu_usage: %f\n", usage[(int)BlockCycleEstimator::valu]);
|
||||||
|
fprintf(stderr, "valu_complex_usage: %f\n", usage[(int)BlockCycleEstimator::valu_complex]);
|
||||||
|
fprintf(stderr, "lds_usage: %f\n", usage[(int)BlockCycleEstimator::lds]);
|
||||||
|
fprintf(stderr, "export_gds_usage: %f\n", usage[(int)BlockCycleEstimator::export_gds]);
|
||||||
|
fprintf(stderr, "vmem_usage: %f\n", usage[(int)BlockCycleEstimator::vmem]);
|
||||||
|
fprintf(stderr, "latency: %f\n", latency);
|
||||||
|
fprintf(stderr, "parallelism: %f\n", parallelism);
|
||||||
|
fprintf(stderr, "max_utilization: %f\n", max_utilization);
|
||||||
|
fprintf(stderr, "wave64_per_cycle: %f\n", wave64_per_cycle);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code)
|
void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code)
|
||||||
|
Reference in New Issue
Block a user