intel/fs: track more steps with INTEL_DEBUG=optimizer

One particular nice thing to have is the first generated backend IR
before validation. Especially if you made a mistake in the NIR
translation, you can at least look at it before validation tells you
off.

Then the last 2 steps of the optimize() function can be interesting to
look at.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24552>
This commit is contained in:
Lionel Landwerlin
2023-08-06 15:46:12 +03:00
committed by Marge Bot
parent f9cd8446ef
commit 0e244d56e3
3 changed files with 51 additions and 27 deletions

View File

@@ -2540,10 +2540,11 @@ fs_visitor::get_pull_locs(const fs_reg &src,
* Replace UNIFORM register file access with either UNIFORM_PULL_CONSTANT_LOAD * Replace UNIFORM register file access with either UNIFORM_PULL_CONSTANT_LOAD
* or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs. * or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs.
*/ */
void bool
fs_visitor::lower_constant_loads() fs_visitor::lower_constant_loads()
{ {
unsigned index, pull_index; unsigned index, pull_index;
bool progress = false;
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) { foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
/* Set up the annotation tracking for new generated instructions. */ /* Set up the annotation tracking for new generated instructions. */
@@ -2581,6 +2582,8 @@ fs_visitor::lower_constant_loads()
inst->src[i].nr = dst.nr; inst->src[i].nr = dst.nr;
inst->src[i].offset = (base & (block_sz - 1)) + inst->src[i].offset = (base & (block_sz - 1)) +
inst->src[i].offset % 4; inst->src[i].offset % 4;
progress = true;
} }
if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
@@ -2595,9 +2598,13 @@ fs_visitor::lower_constant_loads()
inst->src[1], inst->src[1],
pull_index * 4, 4); pull_index * 4, 4);
inst->remove(block); inst->remove(block);
progress = true;
} }
} }
invalidate_analysis(DEPENDENCY_INSTRUCTIONS); invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
return progress;
} }
static uint64_t static uint64_t
@@ -6185,9 +6192,28 @@ fs_visitor::invalidate_analysis(brw::analysis_dependency_class c)
regpressure_analysis.invalidate(c); regpressure_analysis.invalidate(c);
} }
void
fs_visitor::debug_optimizer(const char *pass_name,
int iteration, int pass_num) const
{
if (!INTEL_DEBUG(DEBUG_OPTIMIZER))
return;
char *filename;
int ret = asprintf(&filename, "%s%d-%s-%02d-%02d-%s",
stage_abbrev, dispatch_width, nir->info.name,
iteration, pass_num, pass_name);
if (ret == -1)
return;
dump_instructions(filename);
free(filename);
}
void void
fs_visitor::optimize() fs_visitor::optimize()
{ {
debug_optimizer("start", 0, 0);
/* Start by validating the shader we currently have. */ /* Start by validating the shader we currently have. */
validate(); validate();
@@ -6204,22 +6230,16 @@ fs_visitor::optimize()
*/ */
bld = fs_builder(this, 64); bld = fs_builder(this, 64);
assign_constant_locations(); bool progress = false;
lower_constant_loads(); int iteration = 0;
int pass_num = 0;
validate();
#define OPT(pass, args...) ({ \ #define OPT(pass, args...) ({ \
pass_num++; \ pass_num++; \
bool this_progress = pass(args); \ bool this_progress = pass(args); \
\ \
if (INTEL_DEBUG(DEBUG_OPTIMIZER) && this_progress) { \ if (this_progress) \
char filename[64]; \ debug_optimizer(#pass, iteration, pass_num); \
snprintf(filename, 64, "%s%d-%s-%02d-%02d-" #pass, \
stage_abbrev, dispatch_width, nir->info.name, iteration, pass_num); \
\
dump_instructions(filename); \
} \
\ \
validate(); \ validate(); \
\ \
@@ -6227,17 +6247,10 @@ fs_visitor::optimize()
this_progress; \ this_progress; \
}) })
if (INTEL_DEBUG(DEBUG_OPTIMIZER)) { assign_constant_locations();
char filename[64]; OPT(lower_constant_loads);
snprintf(filename, 64, "%s%d-%s-00-00-start",
stage_abbrev, dispatch_width, nir->info.name);
dump_instructions(filename); validate();
}
bool progress = false;
int iteration = 0;
int pass_num = 0;
OPT(split_virtual_grfs); OPT(split_virtual_grfs);
@@ -6360,9 +6373,9 @@ fs_visitor::optimize()
OPT(fixup_sends_duplicate_payload); OPT(fixup_sends_duplicate_payload);
lower_uniform_pull_constant_loads(); OPT(lower_uniform_pull_constant_loads);
lower_find_live_channel(); OPT(lower_find_live_channel);
validate(); validate();
} }
@@ -6733,6 +6746,8 @@ fs_visitor::allocate_registers(bool allow_spilling)
if (needs_register_pressure) if (needs_register_pressure)
shader_stats.max_register_pressure = compute_max_register_pressure(); shader_stats.max_register_pressure = compute_max_register_pressure();
debug_optimizer("pre_register_allocate", 99, 99);
bool spill_all = allow_spilling && INTEL_DEBUG(DEBUG_SPILL_FS); bool spill_all = allow_spilling && INTEL_DEBUG(DEBUG_SPILL_FS);
/* Before we schedule anything, stash off the instruction order as an array /* Before we schedule anything, stash off the instruction order as an array

View File

@@ -254,7 +254,7 @@ public:
void assign_constant_locations(); void assign_constant_locations();
bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index, bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index,
unsigned *out_pull_index); unsigned *out_pull_index);
void lower_constant_loads(); bool lower_constant_loads();
virtual void invalidate_analysis(brw::analysis_dependency_class c); virtual void invalidate_analysis(brw::analysis_dependency_class c);
void validate(); void validate();
bool opt_algebraic(); bool opt_algebraic();
@@ -286,7 +286,7 @@ public:
void vfail(const char *msg, va_list args); void vfail(const char *msg, va_list args);
void fail(const char *msg, ...); void fail(const char *msg, ...);
void limit_dispatch_width(unsigned n, const char *msg); void limit_dispatch_width(unsigned n, const char *msg);
void lower_uniform_pull_constant_loads(); bool lower_uniform_pull_constant_loads();
bool lower_load_payload(); bool lower_load_payload();
bool lower_pack(); bool lower_pack();
bool lower_regioning(); bool lower_regioning();
@@ -571,6 +571,9 @@ private:
void lower_mulh_inst(fs_inst *inst, bblock_t *block); void lower_mulh_inst(fs_inst *inst, bblock_t *block);
unsigned workgroup_size() const; unsigned workgroup_size() const;
void debug_optimizer(const char *pass_name,
int iteration, int pass_num) const;
}; };
/** /**

View File

@@ -3033,9 +3033,11 @@ fs_visitor::lower_logical_sends()
* mask, since a later instruction will use one of the result channels as a * mask, since a later instruction will use one of the result channels as a
* source operand for all 8 or 16 of its channels. * source operand for all 8 or 16 of its channels.
*/ */
void bool
fs_visitor::lower_uniform_pull_constant_loads() fs_visitor::lower_uniform_pull_constant_loads()
{ {
bool progress = false;
foreach_block_and_inst (block, fs_inst, inst, cfg) { foreach_block_and_inst (block, fs_inst, inst, cfg) {
if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD) if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD)
continue; continue;
@@ -3125,5 +3127,9 @@ fs_visitor::lower_uniform_pull_constant_loads()
inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->ver) + 1; inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->ver) + 1;
inst->mlen = 1; inst->mlen = 1;
} }
progress = true;
} }
return progress;
} }