From 2acc2f18ea74e5f59b1e6f67fec49fd0c6b1e466 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sun, 19 Mar 2023 15:03:33 +0200 Subject: [PATCH] intel/compiler: report max dispatch width statistic Most tools looking at shader stats assume that there is only a single resulting binary shader out of a single input. On Intel HW this is not always the case. So having a statistic on each variant that reports the maximum dispatch width helps showing improvement on a single shader in terms of how large we manage to compile it. For shaders that can be compiled in multiple SIMD width (like fragment shaders), this will report the maximum dispatch width in the statistics of each variants. Signed-off-by: Lionel Landwerlin Reviewed-by: Sagar Ghuge Part-of: --- src/intel/compiler/brw_compiler.h | 1 + src/intel/compiler/brw_fs.cpp | 12 ++++++++++++ src/intel/compiler/brw_fs_generator.cpp | 1 + src/intel/compiler/brw_vec4_generator.cpp | 1 + 4 files changed, 15 insertions(+) diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index a714aeab010..f456acc35ae 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -1696,6 +1696,7 @@ DEFINE_PROG_DATA_DOWNCAST(sf, true) struct brw_compile_stats { uint32_t dispatch_width; /**< 0 for vec4 */ + uint32_t max_dispatch_width; uint32_t instructions; uint32_t sends; uint32_t loops; diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 2335e80eabe..e481791ad5a 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -7631,12 +7631,14 @@ brw_compile_fs(const struct brw_compiler *compiler, } struct brw_compile_stats *stats = params->stats; + uint32_t max_dispatch_width = 0; if (simd8_cfg) { prog_data->dispatch_8 = true; g.generate_code(simd8_cfg, 8, v8->shader_stats, v8->performance_analysis.require(), stats); stats = stats ? stats + 1 : NULL; + max_dispatch_width = 8; } if (simd16_cfg) { @@ -7645,6 +7647,7 @@ brw_compile_fs(const struct brw_compiler *compiler, simd16_cfg, 16, v16->shader_stats, v16->performance_analysis.require(), stats); stats = stats ? stats + 1 : NULL; + max_dispatch_width = 16; } if (simd32_cfg) { @@ -7653,8 +7656,12 @@ brw_compile_fs(const struct brw_compiler *compiler, simd32_cfg, 32, v32->shader_stats, v32->performance_analysis.require(), stats); stats = stats ? stats + 1 : NULL; + max_dispatch_width = 32; } + for (struct brw_compile_stats *s = params->stats; s != NULL && s != stats; s++) + s->max_dispatch_width = max_dispatch_width; + g.add_const_data(nir->constant_data, nir->constant_data_size); return g.get_assembly(); } @@ -7890,6 +7897,8 @@ brw_compile_cs(const struct brw_compiler *compiler, g.enable_debug(name); } + uint32_t max_dispatch_width = 8u << (util_last_bit(prog_data->prog_mask) - 1); + struct brw_compile_stats *stats = params->stats; for (unsigned simd = 0; simd < 3; simd++) { if (prog_data->prog_mask & (1u << simd)) { @@ -7897,7 +7906,10 @@ brw_compile_cs(const struct brw_compiler *compiler, prog_data->prog_offset[simd] = g.generate_code(v[simd]->cfg, 8u << simd, v[simd]->shader_stats, v[simd]->performance_analysis.require(), stats); + if (stats) + stats->max_dispatch_width = max_dispatch_width; stats = stats ? stats + 1 : NULL; + max_dispatch_width = 8u << simd; } } diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 95227e9655d..56f7815fa5d 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2485,6 +2485,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, before_size, after_size); if (stats) { stats->dispatch_width = dispatch_width; + stats->max_dispatch_width = dispatch_width; stats->instructions = before_size / 16 - nop_count; stats->sends = send_count; stats->loops = loop_count; diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index c6bee0141ff..f47fee23a73 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -2269,6 +2269,7 @@ generate_code(struct brw_codegen *p, fill_count, send_count, before_size, after_size); if (stats) { stats->dispatch_width = 0; + stats->max_dispatch_width = 0; stats->instructions = before_size / 16; stats->sends = send_count; stats->loops = loop_count;