intel/compiler: report max dispatch width statistic

Most tools looking at shader stats assume that there is only a single
resulting binary shader out of a single input. On Intel HW this is not
always the case. So having a statistic on each variant that reports
the maximum dispatch width helps showing improvement on a single
shader in terms of how large we manage to compile it.

For shaders that can be compiled in multiple SIMD width (like fragment
shaders), this will report the maximum dispatch width in the
statistics of each variants.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22014>
This commit is contained in:
Lionel Landwerlin
2023-03-19 15:03:33 +02:00
committed by Marge Bot
parent 1e28f2a6f2
commit 2acc2f18ea
4 changed files with 15 additions and 0 deletions

View File

@@ -1696,6 +1696,7 @@ DEFINE_PROG_DATA_DOWNCAST(sf, true)
struct brw_compile_stats {
uint32_t dispatch_width; /**< 0 for vec4 */
uint32_t max_dispatch_width;
uint32_t instructions;
uint32_t sends;
uint32_t loops;

View File

@@ -7631,12 +7631,14 @@ brw_compile_fs(const struct brw_compiler *compiler,
}
struct brw_compile_stats *stats = params->stats;
uint32_t max_dispatch_width = 0;
if (simd8_cfg) {
prog_data->dispatch_8 = true;
g.generate_code(simd8_cfg, 8, v8->shader_stats,
v8->performance_analysis.require(), stats);
stats = stats ? stats + 1 : NULL;
max_dispatch_width = 8;
}
if (simd16_cfg) {
@@ -7645,6 +7647,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
simd16_cfg, 16, v16->shader_stats,
v16->performance_analysis.require(), stats);
stats = stats ? stats + 1 : NULL;
max_dispatch_width = 16;
}
if (simd32_cfg) {
@@ -7653,8 +7656,12 @@ brw_compile_fs(const struct brw_compiler *compiler,
simd32_cfg, 32, v32->shader_stats,
v32->performance_analysis.require(), stats);
stats = stats ? stats + 1 : NULL;
max_dispatch_width = 32;
}
for (struct brw_compile_stats *s = params->stats; s != NULL && s != stats; s++)
s->max_dispatch_width = max_dispatch_width;
g.add_const_data(nir->constant_data, nir->constant_data_size);
return g.get_assembly();
}
@@ -7890,6 +7897,8 @@ brw_compile_cs(const struct brw_compiler *compiler,
g.enable_debug(name);
}
uint32_t max_dispatch_width = 8u << (util_last_bit(prog_data->prog_mask) - 1);
struct brw_compile_stats *stats = params->stats;
for (unsigned simd = 0; simd < 3; simd++) {
if (prog_data->prog_mask & (1u << simd)) {
@@ -7897,7 +7906,10 @@ brw_compile_cs(const struct brw_compiler *compiler,
prog_data->prog_offset[simd] =
g.generate_code(v[simd]->cfg, 8u << simd, v[simd]->shader_stats,
v[simd]->performance_analysis.require(), stats);
if (stats)
stats->max_dispatch_width = max_dispatch_width;
stats = stats ? stats + 1 : NULL;
max_dispatch_width = 8u << simd;
}
}

View File

@@ -2485,6 +2485,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
before_size, after_size);
if (stats) {
stats->dispatch_width = dispatch_width;
stats->max_dispatch_width = dispatch_width;
stats->instructions = before_size / 16 - nop_count;
stats->sends = send_count;
stats->loops = loop_count;

View File

@@ -2269,6 +2269,7 @@ generate_code(struct brw_codegen *p,
fill_count, send_count, before_size, after_size);
if (stats) {
stats->dispatch_width = 0;
stats->max_dispatch_width = 0;
stats->instructions = before_size / 16;
stats->sends = send_count;
stats->loops = loop_count;