intel/ir: Use brw::performance object instead of CFG cycle counts for codegen stats.
These should be more accurate than the current cycle counts, since among other things they consider the effect of post-scheduling passes like the software scoreboard on TGL. In addition it will enable us to clean up some of the now redundant cycle-count estimation functionality in the instruction scheduler. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -8845,21 +8845,24 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
|
|
||||||
if (simd8_cfg) {
|
if (simd8_cfg) {
|
||||||
prog_data->dispatch_8 = true;
|
prog_data->dispatch_8 = true;
|
||||||
g.generate_code(simd8_cfg, 8, v8->shader_stats, stats);
|
g.generate_code(simd8_cfg, 8, v8->shader_stats,
|
||||||
|
v8->performance_analysis.require(), stats);
|
||||||
stats = stats ? stats + 1 : NULL;
|
stats = stats ? stats + 1 : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (simd16_cfg) {
|
if (simd16_cfg) {
|
||||||
prog_data->dispatch_16 = true;
|
prog_data->dispatch_16 = true;
|
||||||
prog_data->prog_offset_16 = g.generate_code(simd16_cfg, 16,
|
prog_data->prog_offset_16 = g.generate_code(
|
||||||
v16->shader_stats, stats);
|
simd16_cfg, 16, v16->shader_stats,
|
||||||
|
v16->performance_analysis.require(), stats);
|
||||||
stats = stats ? stats + 1 : NULL;
|
stats = stats ? stats + 1 : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (simd32_cfg) {
|
if (simd32_cfg) {
|
||||||
prog_data->dispatch_32 = true;
|
prog_data->dispatch_32 = true;
|
||||||
prog_data->prog_offset_32 = g.generate_code(simd32_cfg, 32,
|
prog_data->prog_offset_32 = g.generate_code(
|
||||||
v32->shader_stats, stats);
|
simd32_cfg, 32, v32->shader_stats,
|
||||||
|
v32->performance_analysis.require(), stats);
|
||||||
stats = stats ? stats + 1 : NULL;
|
stats = stats ? stats + 1 : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -9118,7 +9121,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
g.enable_debug(name);
|
g.enable_debug(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
g.generate_code(v->cfg, prog_data->simd_size, v->shader_stats, stats);
|
g.generate_code(v->cfg, prog_data->simd_size, v->shader_stats,
|
||||||
|
v->performance_analysis.require(), stats);
|
||||||
|
|
||||||
ret = g.get_assembly();
|
ret = g.get_assembly();
|
||||||
}
|
}
|
||||||
|
@@ -477,6 +477,7 @@ public:
|
|||||||
void enable_debug(const char *shader_name);
|
void enable_debug(const char *shader_name);
|
||||||
int generate_code(const cfg_t *cfg, int dispatch_width,
|
int generate_code(const cfg_t *cfg, int dispatch_width,
|
||||||
struct shader_stats shader_stats,
|
struct shader_stats shader_stats,
|
||||||
|
const brw::performance &perf,
|
||||||
struct brw_compile_stats *stats);
|
struct brw_compile_stats *stats);
|
||||||
const unsigned *get_assembly();
|
const unsigned *get_assembly();
|
||||||
|
|
||||||
|
@@ -1715,6 +1715,7 @@ fs_generator::enable_debug(const char *shader_name)
|
|||||||
int
|
int
|
||||||
fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
||||||
struct shader_stats shader_stats,
|
struct shader_stats shader_stats,
|
||||||
|
const brw::performance &perf,
|
||||||
struct brw_compile_stats *stats)
|
struct brw_compile_stats *stats)
|
||||||
{
|
{
|
||||||
/* align to 64 byte boundary. */
|
/* align to 64 byte boundary. */
|
||||||
@@ -2462,7 +2463,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||||||
"Compacted %d to %d bytes (%.0f%%)\n",
|
"Compacted %d to %d bytes (%.0f%%)\n",
|
||||||
shader_name, sha1buf,
|
shader_name, sha1buf,
|
||||||
dispatch_width, before_size / 16,
|
dispatch_width, before_size / 16,
|
||||||
loop_count, cfg->cycle_count,
|
loop_count, perf.latency,
|
||||||
spill_count, fill_count, send_count,
|
spill_count, fill_count, send_count,
|
||||||
shader_stats.scheduler_mode,
|
shader_stats.scheduler_mode,
|
||||||
shader_stats.promoted_constants,
|
shader_stats.promoted_constants,
|
||||||
@@ -2487,7 +2488,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||||||
"compacted %d to %d bytes.",
|
"compacted %d to %d bytes.",
|
||||||
_mesa_shader_stage_to_abbrev(stage),
|
_mesa_shader_stage_to_abbrev(stage),
|
||||||
dispatch_width, before_size / 16 - nop_count,
|
dispatch_width, before_size / 16 - nop_count,
|
||||||
loop_count, cfg->cycle_count,
|
loop_count, perf.latency,
|
||||||
spill_count, fill_count, send_count,
|
spill_count, fill_count, send_count,
|
||||||
shader_stats.scheduler_mode,
|
shader_stats.scheduler_mode,
|
||||||
shader_stats.promoted_constants,
|
shader_stats.promoted_constants,
|
||||||
@@ -2497,7 +2498,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||||||
stats->instructions = before_size / 16 - nop_count;
|
stats->instructions = before_size / 16 - nop_count;
|
||||||
stats->sends = send_count;
|
stats->sends = send_count;
|
||||||
stats->loops = loop_count;
|
stats->loops = loop_count;
|
||||||
stats->cycles = cfg->cycle_count;
|
stats->cycles = perf.latency;
|
||||||
stats->spills = spill_count;
|
stats->spills = spill_count;
|
||||||
stats->fills = fill_count;
|
stats->fills = fill_count;
|
||||||
}
|
}
|
||||||
|
@@ -1371,7 +1371,8 @@ brw_compile_tes(const struct brw_compiler *compiler,
|
|||||||
nir->info.name));
|
nir->info.name));
|
||||||
}
|
}
|
||||||
|
|
||||||
g.generate_code(v.cfg, 8, v.shader_stats, stats);
|
g.generate_code(v.cfg, 8, v.shader_stats,
|
||||||
|
v.performance_analysis.require(), stats);
|
||||||
|
|
||||||
assembly = g.get_assembly();
|
assembly = g.get_assembly();
|
||||||
} else {
|
} else {
|
||||||
@@ -1387,7 +1388,9 @@ brw_compile_tes(const struct brw_compiler *compiler,
|
|||||||
v.dump_instructions();
|
v.dump_instructions();
|
||||||
|
|
||||||
assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
|
assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
|
||||||
&prog_data->base, v.cfg, stats);
|
&prog_data->base, v.cfg,
|
||||||
|
v.performance_analysis.require(),
|
||||||
|
stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
return assembly;
|
return assembly;
|
||||||
|
@@ -2999,7 +2999,8 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
|
|
||||||
g.enable_debug(debug_name);
|
g.enable_debug(debug_name);
|
||||||
}
|
}
|
||||||
g.generate_code(v.cfg, 8, v.shader_stats, stats);
|
g.generate_code(v.cfg, 8, v.shader_stats,
|
||||||
|
v.performance_analysis.require(), stats);
|
||||||
assembly = g.get_assembly();
|
assembly = g.get_assembly();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3017,7 +3018,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
|
|
||||||
assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
|
assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
|
||||||
shader, &prog_data->base,
|
shader, &prog_data->base,
|
||||||
v.cfg, stats);
|
v.cfg,
|
||||||
|
v.performance_analysis.require(),
|
||||||
|
stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
return assembly;
|
return assembly;
|
||||||
|
@@ -48,6 +48,7 @@ brw_vec4_generate_assembly(const struct brw_compiler *compiler,
|
|||||||
const nir_shader *nir,
|
const nir_shader *nir,
|
||||||
struct brw_vue_prog_data *prog_data,
|
struct brw_vue_prog_data *prog_data,
|
||||||
const struct cfg_t *cfg,
|
const struct cfg_t *cfg,
|
||||||
|
const brw::performance &perf,
|
||||||
struct brw_compile_stats *stats);
|
struct brw_compile_stats *stats);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@@ -1498,6 +1498,7 @@ generate_code(struct brw_codegen *p,
|
|||||||
const nir_shader *nir,
|
const nir_shader *nir,
|
||||||
struct brw_vue_prog_data *prog_data,
|
struct brw_vue_prog_data *prog_data,
|
||||||
const struct cfg_t *cfg,
|
const struct cfg_t *cfg,
|
||||||
|
const performance &perf,
|
||||||
struct brw_compile_stats *stats)
|
struct brw_compile_stats *stats)
|
||||||
{
|
{
|
||||||
const struct gen_device_info *devinfo = p->devinfo;
|
const struct gen_device_info *devinfo = p->devinfo;
|
||||||
@@ -2220,7 +2221,7 @@ generate_code(struct brw_codegen *p,
|
|||||||
|
|
||||||
fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. %d:%d "
|
fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. %d:%d "
|
||||||
"spills:fills, %u sends. Compacted %d to %d bytes (%.0f%%)\n",
|
"spills:fills, %u sends. Compacted %d to %d bytes (%.0f%%)\n",
|
||||||
stage_abbrev, before_size / 16, loop_count, cfg->cycle_count,
|
stage_abbrev, before_size / 16, loop_count, perf.latency,
|
||||||
spill_count, fill_count, send_count, before_size, after_size,
|
spill_count, fill_count, send_count, before_size, after_size,
|
||||||
100.0f * (before_size - after_size) / before_size);
|
100.0f * (before_size - after_size) / before_size);
|
||||||
|
|
||||||
@@ -2239,14 +2240,14 @@ generate_code(struct brw_codegen *p,
|
|||||||
"%d:%d spills:fills, %u sends, "
|
"%d:%d spills:fills, %u sends, "
|
||||||
"compacted %d to %d bytes.",
|
"compacted %d to %d bytes.",
|
||||||
stage_abbrev, before_size / 16,
|
stage_abbrev, before_size / 16,
|
||||||
loop_count, cfg->cycle_count, spill_count,
|
loop_count, perf.latency, spill_count,
|
||||||
fill_count, send_count, before_size, after_size);
|
fill_count, send_count, before_size, after_size);
|
||||||
if (stats) {
|
if (stats) {
|
||||||
stats->dispatch_width = 0;
|
stats->dispatch_width = 0;
|
||||||
stats->instructions = before_size / 16;
|
stats->instructions = before_size / 16;
|
||||||
stats->sends = send_count;
|
stats->sends = send_count;
|
||||||
stats->loops = loop_count;
|
stats->loops = loop_count;
|
||||||
stats->cycles = cfg->cycle_count;
|
stats->cycles = perf.latency;
|
||||||
stats->spills = spill_count;
|
stats->spills = spill_count;
|
||||||
stats->fills = fill_count;
|
stats->fills = fill_count;
|
||||||
}
|
}
|
||||||
@@ -2259,13 +2260,14 @@ brw_vec4_generate_assembly(const struct brw_compiler *compiler,
|
|||||||
const nir_shader *nir,
|
const nir_shader *nir,
|
||||||
struct brw_vue_prog_data *prog_data,
|
struct brw_vue_prog_data *prog_data,
|
||||||
const struct cfg_t *cfg,
|
const struct cfg_t *cfg,
|
||||||
|
const performance &perf,
|
||||||
struct brw_compile_stats *stats)
|
struct brw_compile_stats *stats)
|
||||||
{
|
{
|
||||||
struct brw_codegen *p = rzalloc(mem_ctx, struct brw_codegen);
|
struct brw_codegen *p = rzalloc(mem_ctx, struct brw_codegen);
|
||||||
brw_init_codegen(compiler->devinfo, p, mem_ctx);
|
brw_init_codegen(compiler->devinfo, p, mem_ctx);
|
||||||
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
||||||
|
|
||||||
generate_code(p, compiler, log_data, nir, prog_data, cfg, stats);
|
generate_code(p, compiler, log_data, nir, prog_data, cfg, perf, stats);
|
||||||
|
|
||||||
return brw_get_program(p, &prog_data->base.program_size);
|
return brw_get_program(p, &prog_data->base.program_size);
|
||||||
}
|
}
|
||||||
|
@@ -865,7 +865,8 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
label, shader->info.name);
|
label, shader->info.name);
|
||||||
g.enable_debug(name);
|
g.enable_debug(name);
|
||||||
}
|
}
|
||||||
g.generate_code(v.cfg, 8, v.shader_stats, stats);
|
g.generate_code(v.cfg, 8, v.shader_stats,
|
||||||
|
v.performance_analysis.require(), stats);
|
||||||
return g.get_assembly();
|
return g.get_assembly();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -897,7 +898,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
ralloc_free(param);
|
ralloc_free(param);
|
||||||
return brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
|
return brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
|
||||||
shader, &prog_data->base,
|
shader, &prog_data->base,
|
||||||
v.cfg, stats);
|
v.cfg,
|
||||||
|
v.performance_analysis.require(),
|
||||||
|
stats);
|
||||||
} else {
|
} else {
|
||||||
/* These variables could be modified by the execution of the GS
|
/* These variables could be modified by the execution of the GS
|
||||||
* visitor if it packed the uniforms in the push constant buffer.
|
* visitor if it packed the uniforms in the push constant buffer.
|
||||||
@@ -960,7 +963,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
*error_str = ralloc_strdup(mem_ctx, gs->fail_msg);
|
*error_str = ralloc_strdup(mem_ctx, gs->fail_msg);
|
||||||
} else {
|
} else {
|
||||||
ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader,
|
ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader,
|
||||||
&prog_data->base, gs->cfg, stats);
|
&prog_data->base, gs->cfg,
|
||||||
|
gs->performance_analysis.require(),
|
||||||
|
stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
delete gs;
|
delete gs;
|
||||||
|
@@ -480,7 +480,8 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||||||
nir->info.name));
|
nir->info.name));
|
||||||
}
|
}
|
||||||
|
|
||||||
g.generate_code(v.cfg, 8, v.shader_stats, stats);
|
g.generate_code(v.cfg, 8, v.shader_stats,
|
||||||
|
v.performance_analysis.require(), stats);
|
||||||
|
|
||||||
assembly = g.get_assembly();
|
assembly = g.get_assembly();
|
||||||
} else {
|
} else {
|
||||||
@@ -497,7 +498,9 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||||||
|
|
||||||
|
|
||||||
assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
|
assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
|
||||||
&prog_data->base, v.cfg, stats);
|
&prog_data->base, v.cfg,
|
||||||
|
v.performance_analysis.require(),
|
||||||
|
stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
return assembly;
|
return assembly;
|
||||||
|
Reference in New Issue
Block a user