intel/brw: Add a new def analysis pass
This introduces a new analysis pass that opportunistically looks for VGRFs which happen to satisfy the SSA definition properties. Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28666>
This commit is contained in:

committed by
Marge Bot

parent
ad9e414aa9
commit
0d144821f0
205
src/intel/compiler/brw_def_analysis.cpp
Normal file
205
src/intel/compiler/brw_def_analysis.cpp
Normal file
@@ -0,0 +1,205 @@
|
|||||||
|
/*
|
||||||
|
* Copyright © 2023 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "brw_fs.h"
|
||||||
|
#include "brw_cfg.h"
|
||||||
|
#include "brw_ir_analysis.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An opportunistic SSA-def analysis pass.
|
||||||
|
*
|
||||||
|
* VGRFs are considered defs (SSA values) when:
|
||||||
|
*
|
||||||
|
* 1. One instruction wholly defines the register (including all offsets)
|
||||||
|
* 2. The single defining write dominates all uses
|
||||||
|
* 3. All sources of the definition are also defs (for non-VGRF files)
|
||||||
|
*
|
||||||
|
* We don't consider non-VGRF sources to prevent an instruction from forming
|
||||||
|
* an SSA def. The other files represent immediates, pushed uniforms, inputs
|
||||||
|
* to shaders, thread payload fields, and so on. In theory, we could mutate
|
||||||
|
* FIXED_GRF register values, but we don't today, so it isn't an issue.
|
||||||
|
*
|
||||||
|
* Limitations:
|
||||||
|
* - We do not track uses, only definitions.
|
||||||
|
* - We do not handle flags, address registers, or accumulators yet.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
*
|
||||||
|
* const def_analysis &defs = s.def_analysis.require();
|
||||||
|
* fs_inst *def = defs.get(inst->src[i]); // returns NULL if non-SSA
|
||||||
|
* bblock_t *block = defs.get_block(inst->src[i]); // block containing def
|
||||||
|
*
|
||||||
|
* Def analysis requires the dominator tree, but not liveness information.
|
||||||
|
*/
|
||||||
|
|
||||||
|
using namespace brw;
|
||||||
|
|
||||||
|
static fs_inst *const UNSEEN = (fs_inst *) (uintptr_t) 1;
|
||||||
|
|
||||||
|
void
|
||||||
|
def_analysis::mark_invalid(int nr)
|
||||||
|
{
|
||||||
|
def_blocks[nr] = NULL;
|
||||||
|
def_insts[nr] = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
def_analysis::update_for_reads(const idom_tree &idom,
|
||||||
|
bblock_t *block,
|
||||||
|
fs_inst *inst)
|
||||||
|
{
|
||||||
|
/* We don't track accumulator use for def analysis, so if an instruction
|
||||||
|
* implicitly reads the accumulator, we don't consider it to produce a def.
|
||||||
|
*/
|
||||||
|
if (inst->reads_accumulator_implicitly())
|
||||||
|
mark_invalid(inst->dst.nr);
|
||||||
|
|
||||||
|
for (int i = 0; i < inst->sources; i++) {
|
||||||
|
const int nr = inst->src[i].nr;
|
||||||
|
|
||||||
|
if (inst->src[i].file != VGRF) {
|
||||||
|
/* Similarly, explicit reads of accumulators, address registers,
|
||||||
|
* and flags make the destination not a def, as we don't track those.
|
||||||
|
*/
|
||||||
|
if (inst->src[i].file == ARF &&
|
||||||
|
(nr == BRW_ARF_ADDRESS ||
|
||||||
|
nr == BRW_ARF_ACCUMULATOR ||
|
||||||
|
nr == BRW_ARF_FLAG))
|
||||||
|
mark_invalid(inst->dst.nr);
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (def_insts[nr]) {
|
||||||
|
/* Mark the source def invalid in two cases:
|
||||||
|
*
|
||||||
|
* 1. The register is used before being written
|
||||||
|
* 2. The def doesn't dominate our use.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
if (def_insts[nr] == UNSEEN ||
|
||||||
|
!idom.dominates(def_blocks[nr], block))
|
||||||
|
mark_invalid(nr);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Additionally, if one of our sources is not a def, then our
|
||||||
|
* destination may have multiple dynamic assignments.
|
||||||
|
*/
|
||||||
|
if (!def_insts[nr] && inst->dst.file == VGRF)
|
||||||
|
mark_invalid(inst->dst.nr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
def_analysis::fully_defines(const fs_visitor *v, fs_inst *inst)
|
||||||
|
{
|
||||||
|
return v->alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written &&
|
||||||
|
!inst->is_partial_write();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
def_analysis::update_for_write(const fs_visitor *v,
|
||||||
|
bblock_t *block,
|
||||||
|
fs_inst *inst)
|
||||||
|
{
|
||||||
|
const int nr = inst->dst.nr;
|
||||||
|
|
||||||
|
if (inst->dst.file != VGRF || !def_insts[nr])
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* If this is our first write to the destination, and it fully defines
|
||||||
|
* the destination, then consider it an SSA def for now.
|
||||||
|
*/
|
||||||
|
if (def_insts[nr] == UNSEEN && fully_defines(v, inst)) {
|
||||||
|
def_insts[nr] = inst;
|
||||||
|
def_blocks[nr] = block;
|
||||||
|
} else {
|
||||||
|
/* Otherwise this is a second write or a partial write, in which
|
||||||
|
* case we know with certainty that this isn't an SSA def.
|
||||||
|
*/
|
||||||
|
mark_invalid(nr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def_analysis::def_analysis(const fs_visitor *v)
|
||||||
|
{
|
||||||
|
const idom_tree &idom = v->idom_analysis.require();
|
||||||
|
|
||||||
|
def_count = v->alloc.count;
|
||||||
|
|
||||||
|
def_insts = new fs_inst*[def_count]();
|
||||||
|
def_blocks = new bblock_t*[def_count]();
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < def_count; i++)
|
||||||
|
def_insts[i] = UNSEEN;
|
||||||
|
|
||||||
|
foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
|
||||||
|
if (inst->opcode != SHADER_OPCODE_UNDEF) {
|
||||||
|
update_for_reads(idom, block, inst);
|
||||||
|
update_for_write(v, block, inst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool iterate;
|
||||||
|
do {
|
||||||
|
iterate = false;
|
||||||
|
|
||||||
|
for (unsigned d = 0; d < def_count; d++) {
|
||||||
|
/* Anything still unseen was never written and thus dead code. */
|
||||||
|
if (def_insts[d] == UNSEEN)
|
||||||
|
def_insts[d] = NULL;
|
||||||
|
|
||||||
|
fs_inst *def = def_insts[d];
|
||||||
|
if (!def)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (int i = 0; i < def->sources; i++) {
|
||||||
|
if (def->src[i].file != VGRF)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const int nr = def->src[i].nr;
|
||||||
|
|
||||||
|
/* If our "def" reads a non-SSA source, then it isn't a def. */
|
||||||
|
if (!def_insts[nr] || def_insts[nr] == UNSEEN) {
|
||||||
|
mark_invalid(def->dst.nr);
|
||||||
|
iterate = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (iterate);
|
||||||
|
}
|
||||||
|
|
||||||
|
def_analysis::~def_analysis()
|
||||||
|
{
|
||||||
|
delete[] def_insts;
|
||||||
|
delete[] def_blocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
def_analysis::validate(const fs_visitor *v) const
|
||||||
|
{
|
||||||
|
for (unsigned i = 0; i < def_count; i++) {
|
||||||
|
assert(!def_insts[i] == !def_blocks[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
def_analysis::print_stats(const fs_visitor *v) const
|
||||||
|
{
|
||||||
|
unsigned defs = 0;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < def_count; i++) {
|
||||||
|
if (def_insts[i])
|
||||||
|
++defs;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "DEFS: %u registers, %u SSA, %u non-SSA => %.1f SSA\n",
|
||||||
|
def_count, defs, def_count - defs,
|
||||||
|
100.0f * float(defs) / float(def_count));
|
||||||
|
}
|
@@ -2739,6 +2739,7 @@ fs_visitor::invalidate_analysis(brw::analysis_dependency_class c)
|
|||||||
live_analysis.invalidate(c);
|
live_analysis.invalidate(c);
|
||||||
regpressure_analysis.invalidate(c);
|
regpressure_analysis.invalidate(c);
|
||||||
idom_analysis.invalidate(c);
|
idom_analysis.invalidate(c);
|
||||||
|
def_analysis.invalidate(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@@ -69,6 +69,51 @@ namespace brw {
|
|||||||
|
|
||||||
unsigned *regs_live_at_ip;
|
unsigned *regs_live_at_ip;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class def_analysis {
|
||||||
|
public:
|
||||||
|
def_analysis(const fs_visitor *v);
|
||||||
|
~def_analysis();
|
||||||
|
|
||||||
|
fs_inst *
|
||||||
|
get(const fs_reg ®) const
|
||||||
|
{
|
||||||
|
return reg.file == VGRF && reg.nr < def_count ?
|
||||||
|
def_insts[reg.nr] : NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
bblock_t *
|
||||||
|
get_block(const fs_reg ®) const
|
||||||
|
{
|
||||||
|
return reg.file == VGRF && reg.nr < def_count ?
|
||||||
|
def_blocks[reg.nr] : NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned count() const { return def_count; }
|
||||||
|
|
||||||
|
void print_stats(const fs_visitor *) const;
|
||||||
|
|
||||||
|
analysis_dependency_class
|
||||||
|
dependency_class() const
|
||||||
|
{
|
||||||
|
return DEPENDENCY_INSTRUCTION_IDENTITY |
|
||||||
|
DEPENDENCY_INSTRUCTION_DATA_FLOW |
|
||||||
|
DEPENDENCY_VARIABLES |
|
||||||
|
DEPENDENCY_BLOCKS;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool validate(const fs_visitor *) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void mark_invalid(int);
|
||||||
|
bool fully_defines(const fs_visitor *v, fs_inst *);
|
||||||
|
void update_for_reads(const idom_tree &idom, bblock_t *block, fs_inst *);
|
||||||
|
void update_for_write(const fs_visitor *v, bblock_t *block, fs_inst *);
|
||||||
|
|
||||||
|
fs_inst **def_insts;
|
||||||
|
bblock_t **def_blocks;
|
||||||
|
unsigned def_count;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#define UBO_START ((1 << 16) - 4)
|
#define UBO_START ((1 << 16) - 4)
|
||||||
@@ -349,6 +394,7 @@ public:
|
|||||||
brw_analysis<brw::register_pressure, fs_visitor> regpressure_analysis;
|
brw_analysis<brw::register_pressure, fs_visitor> regpressure_analysis;
|
||||||
brw_analysis<brw::performance, fs_visitor> performance_analysis;
|
brw_analysis<brw::performance, fs_visitor> performance_analysis;
|
||||||
brw_analysis<brw::idom_tree, fs_visitor> idom_analysis;
|
brw_analysis<brw::idom_tree, fs_visitor> idom_analysis;
|
||||||
|
brw_analysis<brw::def_analysis, fs_visitor> def_analysis;
|
||||||
|
|
||||||
/** Number of uniform variable components visited. */
|
/** Number of uniform variable components visited. */
|
||||||
unsigned uniforms;
|
unsigned uniforms;
|
||||||
|
@@ -1036,7 +1036,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler,
|
|||||||
debug_enabled(debug_enabled),
|
debug_enabled(debug_enabled),
|
||||||
key(key), gs_compile(NULL), prog_data(prog_data),
|
key(key), gs_compile(NULL), prog_data(prog_data),
|
||||||
live_analysis(this), regpressure_analysis(this),
|
live_analysis(this), regpressure_analysis(this),
|
||||||
performance_analysis(this), idom_analysis(this),
|
performance_analysis(this), idom_analysis(this), def_analysis(this),
|
||||||
needs_register_pressure(needs_register_pressure),
|
needs_register_pressure(needs_register_pressure),
|
||||||
dispatch_width(dispatch_width),
|
dispatch_width(dispatch_width),
|
||||||
max_polygons(0),
|
max_polygons(0),
|
||||||
@@ -1060,7 +1060,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler,
|
|||||||
debug_enabled(debug_enabled),
|
debug_enabled(debug_enabled),
|
||||||
key(&key->base), gs_compile(NULL), prog_data(&prog_data->base),
|
key(&key->base), gs_compile(NULL), prog_data(&prog_data->base),
|
||||||
live_analysis(this), regpressure_analysis(this),
|
live_analysis(this), regpressure_analysis(this),
|
||||||
performance_analysis(this), idom_analysis(this),
|
performance_analysis(this), idom_analysis(this), def_analysis(this),
|
||||||
needs_register_pressure(needs_register_pressure),
|
needs_register_pressure(needs_register_pressure),
|
||||||
dispatch_width(dispatch_width),
|
dispatch_width(dispatch_width),
|
||||||
max_polygons(max_polygons),
|
max_polygons(max_polygons),
|
||||||
@@ -1088,7 +1088,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler,
|
|||||||
key(&c->key.base), gs_compile(c),
|
key(&c->key.base), gs_compile(c),
|
||||||
prog_data(&prog_data->base.base),
|
prog_data(&prog_data->base.base),
|
||||||
live_analysis(this), regpressure_analysis(this),
|
live_analysis(this), regpressure_analysis(this),
|
||||||
performance_analysis(this), idom_analysis(this),
|
performance_analysis(this), idom_analysis(this), def_analysis(this),
|
||||||
needs_register_pressure(needs_register_pressure),
|
needs_register_pressure(needs_register_pressure),
|
||||||
dispatch_width(compiler->devinfo->ver >= 20 ? 16 : 8),
|
dispatch_width(compiler->devinfo->ver >= 20 ? 16 : 8),
|
||||||
max_polygons(0),
|
max_polygons(0),
|
||||||
|
@@ -47,6 +47,7 @@ libintel_compiler_brw_files = files(
|
|||||||
'brw_compiler.h',
|
'brw_compiler.h',
|
||||||
'brw_dead_control_flow.cpp',
|
'brw_dead_control_flow.cpp',
|
||||||
'brw_debug_recompile.c',
|
'brw_debug_recompile.c',
|
||||||
|
'brw_def_analysis.cpp',
|
||||||
'brw_disasm.c',
|
'brw_disasm.c',
|
||||||
'brw_disasm_info.cpp',
|
'brw_disasm_info.cpp',
|
||||||
'brw_disasm_info.h',
|
'brw_disasm_info.h',
|
||||||
|
Reference in New Issue
Block a user