intel/fs: track more steps with INTEL_DEBUG=optimizer
One particular nice thing to have is the first generated backend IR before validation. Especially if you made a mistake in the NIR translation, you can at least look at it before validation tells you off. Then the last 2 steps of the optimize() function can be interesting to look at. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24552>
This commit is contained in:

committed by
Marge Bot

parent
f9cd8446ef
commit
0e244d56e3
@@ -2540,10 +2540,11 @@ fs_visitor::get_pull_locs(const fs_reg &src,
|
|||||||
* Replace UNIFORM register file access with either UNIFORM_PULL_CONSTANT_LOAD
|
* Replace UNIFORM register file access with either UNIFORM_PULL_CONSTANT_LOAD
|
||||||
* or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs.
|
* or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs.
|
||||||
*/
|
*/
|
||||||
void
|
bool
|
||||||
fs_visitor::lower_constant_loads()
|
fs_visitor::lower_constant_loads()
|
||||||
{
|
{
|
||||||
unsigned index, pull_index;
|
unsigned index, pull_index;
|
||||||
|
bool progress = false;
|
||||||
|
|
||||||
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
|
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
|
||||||
/* Set up the annotation tracking for new generated instructions. */
|
/* Set up the annotation tracking for new generated instructions. */
|
||||||
@@ -2581,6 +2582,8 @@ fs_visitor::lower_constant_loads()
|
|||||||
inst->src[i].nr = dst.nr;
|
inst->src[i].nr = dst.nr;
|
||||||
inst->src[i].offset = (base & (block_sz - 1)) +
|
inst->src[i].offset = (base & (block_sz - 1)) +
|
||||||
inst->src[i].offset % 4;
|
inst->src[i].offset % 4;
|
||||||
|
|
||||||
|
progress = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
|
if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
|
||||||
@@ -2595,9 +2598,13 @@ fs_visitor::lower_constant_loads()
|
|||||||
inst->src[1],
|
inst->src[1],
|
||||||
pull_index * 4, 4);
|
pull_index * 4, 4);
|
||||||
inst->remove(block);
|
inst->remove(block);
|
||||||
|
|
||||||
|
progress = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
|
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
|
||||||
|
|
||||||
|
return progress;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t
|
static uint64_t
|
||||||
@@ -6185,9 +6192,28 @@ fs_visitor::invalidate_analysis(brw::analysis_dependency_class c)
|
|||||||
regpressure_analysis.invalidate(c);
|
regpressure_analysis.invalidate(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
fs_visitor::debug_optimizer(const char *pass_name,
|
||||||
|
int iteration, int pass_num) const
|
||||||
|
{
|
||||||
|
if (!INTEL_DEBUG(DEBUG_OPTIMIZER))
|
||||||
|
return;
|
||||||
|
|
||||||
|
char *filename;
|
||||||
|
int ret = asprintf(&filename, "%s%d-%s-%02d-%02d-%s",
|
||||||
|
stage_abbrev, dispatch_width, nir->info.name,
|
||||||
|
iteration, pass_num, pass_name);
|
||||||
|
if (ret == -1)
|
||||||
|
return;
|
||||||
|
dump_instructions(filename);
|
||||||
|
free(filename);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::optimize()
|
fs_visitor::optimize()
|
||||||
{
|
{
|
||||||
|
debug_optimizer("start", 0, 0);
|
||||||
|
|
||||||
/* Start by validating the shader we currently have. */
|
/* Start by validating the shader we currently have. */
|
||||||
validate();
|
validate();
|
||||||
|
|
||||||
@@ -6204,22 +6230,16 @@ fs_visitor::optimize()
|
|||||||
*/
|
*/
|
||||||
bld = fs_builder(this, 64);
|
bld = fs_builder(this, 64);
|
||||||
|
|
||||||
assign_constant_locations();
|
bool progress = false;
|
||||||
lower_constant_loads();
|
int iteration = 0;
|
||||||
|
int pass_num = 0;
|
||||||
validate();
|
|
||||||
|
|
||||||
#define OPT(pass, args...) ({ \
|
#define OPT(pass, args...) ({ \
|
||||||
pass_num++; \
|
pass_num++; \
|
||||||
bool this_progress = pass(args); \
|
bool this_progress = pass(args); \
|
||||||
\
|
\
|
||||||
if (INTEL_DEBUG(DEBUG_OPTIMIZER) && this_progress) { \
|
if (this_progress) \
|
||||||
char filename[64]; \
|
debug_optimizer(#pass, iteration, pass_num); \
|
||||||
snprintf(filename, 64, "%s%d-%s-%02d-%02d-" #pass, \
|
|
||||||
stage_abbrev, dispatch_width, nir->info.name, iteration, pass_num); \
|
|
||||||
\
|
|
||||||
dump_instructions(filename); \
|
|
||||||
} \
|
|
||||||
\
|
\
|
||||||
validate(); \
|
validate(); \
|
||||||
\
|
\
|
||||||
@@ -6227,17 +6247,10 @@ fs_visitor::optimize()
|
|||||||
this_progress; \
|
this_progress; \
|
||||||
})
|
})
|
||||||
|
|
||||||
if (INTEL_DEBUG(DEBUG_OPTIMIZER)) {
|
assign_constant_locations();
|
||||||
char filename[64];
|
OPT(lower_constant_loads);
|
||||||
snprintf(filename, 64, "%s%d-%s-00-00-start",
|
|
||||||
stage_abbrev, dispatch_width, nir->info.name);
|
|
||||||
|
|
||||||
dump_instructions(filename);
|
validate();
|
||||||
}
|
|
||||||
|
|
||||||
bool progress = false;
|
|
||||||
int iteration = 0;
|
|
||||||
int pass_num = 0;
|
|
||||||
|
|
||||||
OPT(split_virtual_grfs);
|
OPT(split_virtual_grfs);
|
||||||
|
|
||||||
@@ -6360,9 +6373,9 @@ fs_visitor::optimize()
|
|||||||
|
|
||||||
OPT(fixup_sends_duplicate_payload);
|
OPT(fixup_sends_duplicate_payload);
|
||||||
|
|
||||||
lower_uniform_pull_constant_loads();
|
OPT(lower_uniform_pull_constant_loads);
|
||||||
|
|
||||||
lower_find_live_channel();
|
OPT(lower_find_live_channel);
|
||||||
|
|
||||||
validate();
|
validate();
|
||||||
}
|
}
|
||||||
@@ -6733,6 +6746,8 @@ fs_visitor::allocate_registers(bool allow_spilling)
|
|||||||
if (needs_register_pressure)
|
if (needs_register_pressure)
|
||||||
shader_stats.max_register_pressure = compute_max_register_pressure();
|
shader_stats.max_register_pressure = compute_max_register_pressure();
|
||||||
|
|
||||||
|
debug_optimizer("pre_register_allocate", 99, 99);
|
||||||
|
|
||||||
bool spill_all = allow_spilling && INTEL_DEBUG(DEBUG_SPILL_FS);
|
bool spill_all = allow_spilling && INTEL_DEBUG(DEBUG_SPILL_FS);
|
||||||
|
|
||||||
/* Before we schedule anything, stash off the instruction order as an array
|
/* Before we schedule anything, stash off the instruction order as an array
|
||||||
|
@@ -254,7 +254,7 @@ public:
|
|||||||
void assign_constant_locations();
|
void assign_constant_locations();
|
||||||
bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index,
|
bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index,
|
||||||
unsigned *out_pull_index);
|
unsigned *out_pull_index);
|
||||||
void lower_constant_loads();
|
bool lower_constant_loads();
|
||||||
virtual void invalidate_analysis(brw::analysis_dependency_class c);
|
virtual void invalidate_analysis(brw::analysis_dependency_class c);
|
||||||
void validate();
|
void validate();
|
||||||
bool opt_algebraic();
|
bool opt_algebraic();
|
||||||
@@ -286,7 +286,7 @@ public:
|
|||||||
void vfail(const char *msg, va_list args);
|
void vfail(const char *msg, va_list args);
|
||||||
void fail(const char *msg, ...);
|
void fail(const char *msg, ...);
|
||||||
void limit_dispatch_width(unsigned n, const char *msg);
|
void limit_dispatch_width(unsigned n, const char *msg);
|
||||||
void lower_uniform_pull_constant_loads();
|
bool lower_uniform_pull_constant_loads();
|
||||||
bool lower_load_payload();
|
bool lower_load_payload();
|
||||||
bool lower_pack();
|
bool lower_pack();
|
||||||
bool lower_regioning();
|
bool lower_regioning();
|
||||||
@@ -571,6 +571,9 @@ private:
|
|||||||
void lower_mulh_inst(fs_inst *inst, bblock_t *block);
|
void lower_mulh_inst(fs_inst *inst, bblock_t *block);
|
||||||
|
|
||||||
unsigned workgroup_size() const;
|
unsigned workgroup_size() const;
|
||||||
|
|
||||||
|
void debug_optimizer(const char *pass_name,
|
||||||
|
int iteration, int pass_num) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -3033,9 +3033,11 @@ fs_visitor::lower_logical_sends()
|
|||||||
* mask, since a later instruction will use one of the result channels as a
|
* mask, since a later instruction will use one of the result channels as a
|
||||||
* source operand for all 8 or 16 of its channels.
|
* source operand for all 8 or 16 of its channels.
|
||||||
*/
|
*/
|
||||||
void
|
bool
|
||||||
fs_visitor::lower_uniform_pull_constant_loads()
|
fs_visitor::lower_uniform_pull_constant_loads()
|
||||||
{
|
{
|
||||||
|
bool progress = false;
|
||||||
|
|
||||||
foreach_block_and_inst (block, fs_inst, inst, cfg) {
|
foreach_block_and_inst (block, fs_inst, inst, cfg) {
|
||||||
if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD)
|
if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD)
|
||||||
continue;
|
continue;
|
||||||
@@ -3125,5 +3127,9 @@ fs_visitor::lower_uniform_pull_constant_loads()
|
|||||||
inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->ver) + 1;
|
inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->ver) + 1;
|
||||||
inst->mlen = 1;
|
inst->mlen = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
progress = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return progress;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user