intel/fs: track more steps with INTEL_DEBUG=optimizer

One particular nice thing to have is the first generated backend IR
before validation. Especially if you made a mistake in the NIR
translation, you can at least look at it before validation tells you
off.

Then the last 2 steps of the optimize() function can be interesting to
look at.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24552>
This commit is contained in:
Lionel Landwerlin
2023-08-06 15:46:12 +03:00
committed by Marge Bot
parent f9cd8446ef
commit 0e244d56e3
3 changed files with 51 additions and 27 deletions

View File

@@ -2540,10 +2540,11 @@ fs_visitor::get_pull_locs(const fs_reg &src,
* Replace UNIFORM register file access with either UNIFORM_PULL_CONSTANT_LOAD
* or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs.
*/
void
bool
fs_visitor::lower_constant_loads()
{
unsigned index, pull_index;
bool progress = false;
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
/* Set up the annotation tracking for new generated instructions. */
@@ -2581,6 +2582,8 @@ fs_visitor::lower_constant_loads()
inst->src[i].nr = dst.nr;
inst->src[i].offset = (base & (block_sz - 1)) +
inst->src[i].offset % 4;
progress = true;
}
if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
@@ -2595,9 +2598,13 @@ fs_visitor::lower_constant_loads()
inst->src[1],
pull_index * 4, 4);
inst->remove(block);
progress = true;
}
}
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
return progress;
}
static uint64_t
@@ -6185,9 +6192,28 @@ fs_visitor::invalidate_analysis(brw::analysis_dependency_class c)
regpressure_analysis.invalidate(c);
}
void
fs_visitor::debug_optimizer(const char *pass_name,
int iteration, int pass_num) const
{
if (!INTEL_DEBUG(DEBUG_OPTIMIZER))
return;
char *filename;
int ret = asprintf(&filename, "%s%d-%s-%02d-%02d-%s",
stage_abbrev, dispatch_width, nir->info.name,
iteration, pass_num, pass_name);
if (ret == -1)
return;
dump_instructions(filename);
free(filename);
}
void
fs_visitor::optimize()
{
debug_optimizer("start", 0, 0);
/* Start by validating the shader we currently have. */
validate();
@@ -6204,22 +6230,16 @@ fs_visitor::optimize()
*/
bld = fs_builder(this, 64);
assign_constant_locations();
lower_constant_loads();
validate();
bool progress = false;
int iteration = 0;
int pass_num = 0;
#define OPT(pass, args...) ({ \
pass_num++; \
bool this_progress = pass(args); \
\
if (INTEL_DEBUG(DEBUG_OPTIMIZER) && this_progress) { \
char filename[64]; \
snprintf(filename, 64, "%s%d-%s-%02d-%02d-" #pass, \
stage_abbrev, dispatch_width, nir->info.name, iteration, pass_num); \
\
dump_instructions(filename); \
} \
if (this_progress) \
debug_optimizer(#pass, iteration, pass_num); \
\
validate(); \
\
@@ -6227,17 +6247,10 @@ fs_visitor::optimize()
this_progress; \
})
if (INTEL_DEBUG(DEBUG_OPTIMIZER)) {
char filename[64];
snprintf(filename, 64, "%s%d-%s-00-00-start",
stage_abbrev, dispatch_width, nir->info.name);
assign_constant_locations();
OPT(lower_constant_loads);
dump_instructions(filename);
}
bool progress = false;
int iteration = 0;
int pass_num = 0;
validate();
OPT(split_virtual_grfs);
@@ -6360,9 +6373,9 @@ fs_visitor::optimize()
OPT(fixup_sends_duplicate_payload);
lower_uniform_pull_constant_loads();
OPT(lower_uniform_pull_constant_loads);
lower_find_live_channel();
OPT(lower_find_live_channel);
validate();
}
@@ -6733,6 +6746,8 @@ fs_visitor::allocate_registers(bool allow_spilling)
if (needs_register_pressure)
shader_stats.max_register_pressure = compute_max_register_pressure();
debug_optimizer("pre_register_allocate", 99, 99);
bool spill_all = allow_spilling && INTEL_DEBUG(DEBUG_SPILL_FS);
/* Before we schedule anything, stash off the instruction order as an array

View File

@@ -254,7 +254,7 @@ public:
void assign_constant_locations();
bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index,
unsigned *out_pull_index);
void lower_constant_loads();
bool lower_constant_loads();
virtual void invalidate_analysis(brw::analysis_dependency_class c);
void validate();
bool opt_algebraic();
@@ -286,7 +286,7 @@ public:
void vfail(const char *msg, va_list args);
void fail(const char *msg, ...);
void limit_dispatch_width(unsigned n, const char *msg);
void lower_uniform_pull_constant_loads();
bool lower_uniform_pull_constant_loads();
bool lower_load_payload();
bool lower_pack();
bool lower_regioning();
@@ -571,6 +571,9 @@ private:
void lower_mulh_inst(fs_inst *inst, bblock_t *block);
unsigned workgroup_size() const;
void debug_optimizer(const char *pass_name,
int iteration, int pass_num) const;
};
/**

View File

@@ -3033,9 +3033,11 @@ fs_visitor::lower_logical_sends()
* mask, since a later instruction will use one of the result channels as a
* source operand for all 8 or 16 of its channels.
*/
void
bool
fs_visitor::lower_uniform_pull_constant_loads()
{
bool progress = false;
foreach_block_and_inst (block, fs_inst, inst, cfg) {
if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD)
continue;
@@ -3125,5 +3127,9 @@ fs_visitor::lower_uniform_pull_constant_loads()
inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->ver) + 1;
inst->mlen = 1;
}
progress = true;
}
return progress;
}