intel/fs: track more steps with INTEL_DEBUG=optimizer
One particular nice thing to have is the first generated backend IR before validation. Especially if you made a mistake in the NIR translation, you can at least look at it before validation tells you off. Then the last 2 steps of the optimize() function can be interesting to look at. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24552>
This commit is contained in:

committed by
Marge Bot

parent
f9cd8446ef
commit
0e244d56e3
@@ -2540,10 +2540,11 @@ fs_visitor::get_pull_locs(const fs_reg &src,
|
||||
* Replace UNIFORM register file access with either UNIFORM_PULL_CONSTANT_LOAD
|
||||
* or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs.
|
||||
*/
|
||||
void
|
||||
bool
|
||||
fs_visitor::lower_constant_loads()
|
||||
{
|
||||
unsigned index, pull_index;
|
||||
bool progress = false;
|
||||
|
||||
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
|
||||
/* Set up the annotation tracking for new generated instructions. */
|
||||
@@ -2581,6 +2582,8 @@ fs_visitor::lower_constant_loads()
|
||||
inst->src[i].nr = dst.nr;
|
||||
inst->src[i].offset = (base & (block_sz - 1)) +
|
||||
inst->src[i].offset % 4;
|
||||
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
|
||||
@@ -2595,9 +2598,13 @@ fs_visitor::lower_constant_loads()
|
||||
inst->src[1],
|
||||
pull_index * 4, 4);
|
||||
inst->remove(block);
|
||||
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
@@ -6185,9 +6192,28 @@ fs_visitor::invalidate_analysis(brw::analysis_dependency_class c)
|
||||
regpressure_analysis.invalidate(c);
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::debug_optimizer(const char *pass_name,
|
||||
int iteration, int pass_num) const
|
||||
{
|
||||
if (!INTEL_DEBUG(DEBUG_OPTIMIZER))
|
||||
return;
|
||||
|
||||
char *filename;
|
||||
int ret = asprintf(&filename, "%s%d-%s-%02d-%02d-%s",
|
||||
stage_abbrev, dispatch_width, nir->info.name,
|
||||
iteration, pass_num, pass_name);
|
||||
if (ret == -1)
|
||||
return;
|
||||
dump_instructions(filename);
|
||||
free(filename);
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::optimize()
|
||||
{
|
||||
debug_optimizer("start", 0, 0);
|
||||
|
||||
/* Start by validating the shader we currently have. */
|
||||
validate();
|
||||
|
||||
@@ -6204,22 +6230,16 @@ fs_visitor::optimize()
|
||||
*/
|
||||
bld = fs_builder(this, 64);
|
||||
|
||||
assign_constant_locations();
|
||||
lower_constant_loads();
|
||||
|
||||
validate();
|
||||
bool progress = false;
|
||||
int iteration = 0;
|
||||
int pass_num = 0;
|
||||
|
||||
#define OPT(pass, args...) ({ \
|
||||
pass_num++; \
|
||||
bool this_progress = pass(args); \
|
||||
\
|
||||
if (INTEL_DEBUG(DEBUG_OPTIMIZER) && this_progress) { \
|
||||
char filename[64]; \
|
||||
snprintf(filename, 64, "%s%d-%s-%02d-%02d-" #pass, \
|
||||
stage_abbrev, dispatch_width, nir->info.name, iteration, pass_num); \
|
||||
\
|
||||
dump_instructions(filename); \
|
||||
} \
|
||||
if (this_progress) \
|
||||
debug_optimizer(#pass, iteration, pass_num); \
|
||||
\
|
||||
validate(); \
|
||||
\
|
||||
@@ -6227,17 +6247,10 @@ fs_visitor::optimize()
|
||||
this_progress; \
|
||||
})
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_OPTIMIZER)) {
|
||||
char filename[64];
|
||||
snprintf(filename, 64, "%s%d-%s-00-00-start",
|
||||
stage_abbrev, dispatch_width, nir->info.name);
|
||||
assign_constant_locations();
|
||||
OPT(lower_constant_loads);
|
||||
|
||||
dump_instructions(filename);
|
||||
}
|
||||
|
||||
bool progress = false;
|
||||
int iteration = 0;
|
||||
int pass_num = 0;
|
||||
validate();
|
||||
|
||||
OPT(split_virtual_grfs);
|
||||
|
||||
@@ -6360,9 +6373,9 @@ fs_visitor::optimize()
|
||||
|
||||
OPT(fixup_sends_duplicate_payload);
|
||||
|
||||
lower_uniform_pull_constant_loads();
|
||||
OPT(lower_uniform_pull_constant_loads);
|
||||
|
||||
lower_find_live_channel();
|
||||
OPT(lower_find_live_channel);
|
||||
|
||||
validate();
|
||||
}
|
||||
@@ -6733,6 +6746,8 @@ fs_visitor::allocate_registers(bool allow_spilling)
|
||||
if (needs_register_pressure)
|
||||
shader_stats.max_register_pressure = compute_max_register_pressure();
|
||||
|
||||
debug_optimizer("pre_register_allocate", 99, 99);
|
||||
|
||||
bool spill_all = allow_spilling && INTEL_DEBUG(DEBUG_SPILL_FS);
|
||||
|
||||
/* Before we schedule anything, stash off the instruction order as an array
|
||||
|
@@ -254,7 +254,7 @@ public:
|
||||
void assign_constant_locations();
|
||||
bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index,
|
||||
unsigned *out_pull_index);
|
||||
void lower_constant_loads();
|
||||
bool lower_constant_loads();
|
||||
virtual void invalidate_analysis(brw::analysis_dependency_class c);
|
||||
void validate();
|
||||
bool opt_algebraic();
|
||||
@@ -286,7 +286,7 @@ public:
|
||||
void vfail(const char *msg, va_list args);
|
||||
void fail(const char *msg, ...);
|
||||
void limit_dispatch_width(unsigned n, const char *msg);
|
||||
void lower_uniform_pull_constant_loads();
|
||||
bool lower_uniform_pull_constant_loads();
|
||||
bool lower_load_payload();
|
||||
bool lower_pack();
|
||||
bool lower_regioning();
|
||||
@@ -571,6 +571,9 @@ private:
|
||||
void lower_mulh_inst(fs_inst *inst, bblock_t *block);
|
||||
|
||||
unsigned workgroup_size() const;
|
||||
|
||||
void debug_optimizer(const char *pass_name,
|
||||
int iteration, int pass_num) const;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@@ -3033,9 +3033,11 @@ fs_visitor::lower_logical_sends()
|
||||
* mask, since a later instruction will use one of the result channels as a
|
||||
* source operand for all 8 or 16 of its channels.
|
||||
*/
|
||||
void
|
||||
bool
|
||||
fs_visitor::lower_uniform_pull_constant_loads()
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
foreach_block_and_inst (block, fs_inst, inst, cfg) {
|
||||
if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD)
|
||||
continue;
|
||||
@@ -3125,5 +3127,9 @@ fs_visitor::lower_uniform_pull_constant_loads()
|
||||
inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->ver) + 1;
|
||||
inst->mlen = 1;
|
||||
}
|
||||
|
||||
progress = true;
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
Reference in New Issue
Block a user