intel/fs/ra: Spill without destroying the interference graph
Instead of re-building the interference graph every time we spill, we modify it in place so we can avoid recalculating liveness and the whole O(n^2) interference graph building process. We make a simplifying assumption in order to do so which is that all spill/fill temporary registers live for the entire duration of the instruction around which we're spilling. This isn't quite true because a spill into the source of an instruction doesn't need to interfere with its destination, for instance. Not re-calculating liveness also means that we aren't adjusting spill costs based on the new liveness. The combination of these things results in a bit of churn in spilling. It takes a large cut out of the run-time of shader-db on my laptop. Shader-db results on Kaby Lake: total instructions in shared programs: 15311224 -> 15311360 (<.01%) instructions in affected programs: 77027 -> 77163 (0.18%) helped: 11 HURT: 18 total cycles in shared programs: 355544739 -> 355830749 (0.08%) cycles in affected programs: 203273745 -> 203559755 (0.14%) helped: 234 HURT: 190 total spills in shared programs: 12049 -> 12042 (-0.06%) spills in affected programs: 2465 -> 2458 (-0.28%) helped: 9 HURT: 16 total fills in shared programs: 25112 -> 25165 (0.21%) fills in affected programs: 6819 -> 6872 (0.78%) helped: 11 HURT: 16 Total CPU time (seconds): 2469.68 -> 2360.22 (-4.43%) Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -412,6 +412,10 @@ public:
|
|||||||
|
|
||||||
/* Get payload IP information */
|
/* Get payload IP information */
|
||||||
payload_last_use_ip = ralloc_array(mem_ctx, int, payload_node_count);
|
payload_last_use_ip = ralloc_array(mem_ctx, int, payload_node_count);
|
||||||
|
|
||||||
|
spill_vgrf_ip = NULL;
|
||||||
|
spill_vgrf_ip_alloc = 0;
|
||||||
|
spill_node_count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
~fs_reg_alloc()
|
~fs_reg_alloc()
|
||||||
@@ -428,7 +432,9 @@ private:
|
|||||||
|
|
||||||
void build_interference_graph(bool allow_spilling);
|
void build_interference_graph(bool allow_spilling);
|
||||||
|
|
||||||
|
void set_spill_costs();
|
||||||
int choose_spill_reg();
|
int choose_spill_reg();
|
||||||
|
fs_reg alloc_spill_reg(unsigned size, int ip);
|
||||||
void spill_reg(unsigned spill_reg);
|
void spill_reg(unsigned spill_reg);
|
||||||
|
|
||||||
void *mem_ctx;
|
void *mem_ctx;
|
||||||
@@ -449,6 +455,11 @@ private:
|
|||||||
int first_mrf_hack_node;
|
int first_mrf_hack_node;
|
||||||
int grf127_send_hack_node;
|
int grf127_send_hack_node;
|
||||||
int first_vgrf_node;
|
int first_vgrf_node;
|
||||||
|
int first_spill_node;
|
||||||
|
|
||||||
|
int *spill_vgrf_ip;
|
||||||
|
int spill_vgrf_ip_alloc;
|
||||||
|
int spill_node_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -559,7 +570,8 @@ fs_reg_alloc::setup_live_interference(unsigned node,
|
|||||||
* node's. We only need to look at nodes below this one as the reflexivity
|
* node's. We only need to look at nodes below this one as the reflexivity
|
||||||
* of interference will take care of the rest.
|
* of interference will take care of the rest.
|
||||||
*/
|
*/
|
||||||
for (unsigned n2 = first_vgrf_node; n2 < node; n2++) {
|
for (unsigned n2 = first_vgrf_node;
|
||||||
|
n2 < (unsigned)first_spill_node && n2 < node; n2++) {
|
||||||
unsigned vgrf = n2 - first_vgrf_node;
|
unsigned vgrf = n2 - first_vgrf_node;
|
||||||
if (!(node_end_ip <= fs->virtual_grf_start[vgrf] ||
|
if (!(node_end_ip <= fs->virtual_grf_start[vgrf] ||
|
||||||
fs->virtual_grf_end[vgrf] <= node_start_ip))
|
fs->virtual_grf_end[vgrf] <= node_start_ip))
|
||||||
@@ -703,6 +715,7 @@ fs_reg_alloc::build_interference_graph(bool allow_spilling)
|
|||||||
}
|
}
|
||||||
first_vgrf_node = node_count;
|
first_vgrf_node = node_count;
|
||||||
node_count += fs->alloc.count;
|
node_count += fs->alloc.count;
|
||||||
|
first_spill_node = node_count;
|
||||||
|
|
||||||
fs->calculate_live_intervals();
|
fs->calculate_live_intervals();
|
||||||
fs->calculate_payload_ranges(payload_node_count,
|
fs->calculate_payload_ranges(payload_node_count,
|
||||||
@@ -782,6 +795,9 @@ fs_reg_alloc::build_interference_graph(bool allow_spilling)
|
|||||||
*/
|
*/
|
||||||
foreach_block_and_inst(block, fs_inst, inst, fs->cfg)
|
foreach_block_and_inst(block, fs_inst, inst, fs->cfg)
|
||||||
setup_inst_interference(inst);
|
setup_inst_interference(inst);
|
||||||
|
|
||||||
|
if (allow_spilling)
|
||||||
|
set_spill_costs();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -837,8 +853,8 @@ emit_spill(const fs_builder &bld, fs_reg src,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
void
|
||||||
fs_reg_alloc::choose_spill_reg()
|
fs_reg_alloc::set_spill_costs()
|
||||||
{
|
{
|
||||||
float block_scale = 1.0;
|
float block_scale = 1.0;
|
||||||
float spill_costs[fs->alloc.count];
|
float spill_costs[fs->alloc.count];
|
||||||
@@ -913,7 +929,11 @@ fs_reg_alloc::choose_spill_reg()
|
|||||||
if (!no_spill[i])
|
if (!no_spill[i])
|
||||||
ra_set_node_spill_cost(g, first_vgrf_node + i, adjusted_cost);
|
ra_set_node_spill_cost(g, first_vgrf_node + i, adjusted_cost);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
fs_reg_alloc::choose_spill_reg()
|
||||||
|
{
|
||||||
int node = ra_get_best_spill_node(g);
|
int node = ra_get_best_spill_node(g);
|
||||||
if (node < 0)
|
if (node < 0)
|
||||||
return -1;
|
return -1;
|
||||||
@@ -922,6 +942,38 @@ fs_reg_alloc::choose_spill_reg()
|
|||||||
return node - first_vgrf_node;
|
return node - first_vgrf_node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fs_reg
|
||||||
|
fs_reg_alloc::alloc_spill_reg(unsigned size, int ip)
|
||||||
|
{
|
||||||
|
int vgrf = fs->alloc.allocate(size);
|
||||||
|
int n = ra_add_node(g, compiler->fs_reg_sets[rsi].classes[size - 1]);
|
||||||
|
assert(n == first_vgrf_node + vgrf);
|
||||||
|
assert(n == first_spill_node + spill_node_count);
|
||||||
|
|
||||||
|
setup_live_interference(n, ip - 1, ip + 1);
|
||||||
|
|
||||||
|
/* Add interference between this spill node and any other spill nodes for
|
||||||
|
* the same instruction.
|
||||||
|
*/
|
||||||
|
for (int s = 0; s < spill_node_count; s++) {
|
||||||
|
if (spill_vgrf_ip[s] == ip)
|
||||||
|
ra_add_node_interference(g, n, first_spill_node + s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add this spill node to the list for next time */
|
||||||
|
if (spill_node_count >= spill_vgrf_ip_alloc) {
|
||||||
|
if (spill_vgrf_ip_alloc == 0)
|
||||||
|
spill_vgrf_ip_alloc = 16;
|
||||||
|
else
|
||||||
|
spill_vgrf_ip_alloc *= 2;
|
||||||
|
spill_vgrf_ip = reralloc(mem_ctx, spill_vgrf_ip, int,
|
||||||
|
spill_vgrf_ip_alloc);
|
||||||
|
}
|
||||||
|
spill_vgrf_ip[spill_node_count++] = ip;
|
||||||
|
|
||||||
|
return fs_reg(VGRF, vgrf);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_reg_alloc::spill_reg(unsigned spill_reg)
|
fs_reg_alloc::spill_reg(unsigned spill_reg)
|
||||||
{
|
{
|
||||||
@@ -952,13 +1004,22 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
|||||||
|
|
||||||
fs->last_scratch += size * REG_SIZE;
|
fs->last_scratch += size * REG_SIZE;
|
||||||
|
|
||||||
|
/* We're about to replace all uses of this register. It no longer
|
||||||
|
* conflicts with anything so we can get rid of its interference.
|
||||||
|
*/
|
||||||
|
ra_set_node_spill_cost(g, first_vgrf_node + spill_reg, 0);
|
||||||
|
ra_reset_node_interference(g, first_vgrf_node + spill_reg);
|
||||||
|
|
||||||
/* Generate spill/unspill instructions for the objects being
|
/* Generate spill/unspill instructions for the objects being
|
||||||
* spilled. Right now, we spill or unspill the whole thing to a
|
* spilled. Right now, we spill or unspill the whole thing to a
|
||||||
* virtual grf of the same size. For most instructions, though, we
|
* virtual grf of the same size. For most instructions, though, we
|
||||||
* could just spill/unspill the GRF being accessed.
|
* could just spill/unspill the GRF being accessed.
|
||||||
*/
|
*/
|
||||||
|
int ip = 0;
|
||||||
foreach_block_and_inst (block, fs_inst, inst, fs->cfg) {
|
foreach_block_and_inst (block, fs_inst, inst, fs->cfg) {
|
||||||
const fs_builder ibld = fs_builder(fs, block, inst);
|
const fs_builder ibld = fs_builder(fs, block, inst);
|
||||||
|
exec_node *before = inst->prev;
|
||||||
|
exec_node *after = inst->next;
|
||||||
|
|
||||||
for (unsigned int i = 0; i < inst->sources; i++) {
|
for (unsigned int i = 0; i < inst->sources; i++) {
|
||||||
if (inst->src[i].file == VGRF &&
|
if (inst->src[i].file == VGRF &&
|
||||||
@@ -966,7 +1027,7 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
|||||||
int count = regs_read(inst, i);
|
int count = regs_read(inst, i);
|
||||||
int subset_spill_offset = spill_offset +
|
int subset_spill_offset = spill_offset +
|
||||||
ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE);
|
ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE);
|
||||||
fs_reg unspill_dst(VGRF, fs->alloc.allocate(count));
|
fs_reg unspill_dst = alloc_spill_reg(count, ip);
|
||||||
|
|
||||||
inst->src[i].nr = unspill_dst.nr;
|
inst->src[i].nr = unspill_dst.nr;
|
||||||
inst->src[i].offset %= REG_SIZE;
|
inst->src[i].offset %= REG_SIZE;
|
||||||
@@ -994,7 +1055,7 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
|||||||
inst->dst.nr == spill_reg) {
|
inst->dst.nr == spill_reg) {
|
||||||
int subset_spill_offset = spill_offset +
|
int subset_spill_offset = spill_offset +
|
||||||
ROUND_DOWN_TO(inst->dst.offset, REG_SIZE);
|
ROUND_DOWN_TO(inst->dst.offset, REG_SIZE);
|
||||||
fs_reg spill_src(VGRF, fs->alloc.allocate(regs_written(inst)));
|
fs_reg spill_src = alloc_spill_reg(regs_written(inst), ip);
|
||||||
|
|
||||||
inst->dst.nr = spill_src.nr;
|
inst->dst.nr = spill_src.nr;
|
||||||
inst->dst.offset %= REG_SIZE;
|
inst->dst.offset %= REG_SIZE;
|
||||||
@@ -1045,24 +1106,34 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
|||||||
emit_spill(ubld.at(block, inst->next), spill_src,
|
emit_spill(ubld.at(block, inst->next), spill_src,
|
||||||
subset_spill_offset, regs_written(inst));
|
subset_spill_offset, regs_written(inst));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
fs->invalidate_live_intervals();
|
for (fs_inst *inst = (fs_inst *)before->next;
|
||||||
|
inst != after; inst = (fs_inst *)inst->next)
|
||||||
|
setup_inst_interference(inst);
|
||||||
|
|
||||||
|
/* We don't advance the ip for scratch read/write instructions
|
||||||
|
* because we consider them to have the same ip as instruction we're
|
||||||
|
* spilling around for the purposes of interference.
|
||||||
|
*/
|
||||||
|
if (inst->opcode != SHADER_OPCODE_GEN4_SCRATCH_WRITE &&
|
||||||
|
inst->opcode != SHADER_OPCODE_GEN4_SCRATCH_READ &&
|
||||||
|
inst->opcode != SHADER_OPCODE_GEN7_SCRATCH_READ)
|
||||||
|
ip++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
fs_reg_alloc::assign_regs(bool allow_spilling, bool spill_all)
|
fs_reg_alloc::assign_regs(bool allow_spilling, bool spill_all)
|
||||||
{
|
{
|
||||||
while (1) {
|
build_interference_graph(fs->spilled_any_registers || spill_all);
|
||||||
build_interference_graph(fs->spilled_any_registers);
|
|
||||||
|
|
||||||
|
bool spilled = false;
|
||||||
|
while (1) {
|
||||||
/* Debug of register spilling: Go spill everything. */
|
/* Debug of register spilling: Go spill everything. */
|
||||||
if (unlikely(spill_all)) {
|
if (unlikely(spill_all)) {
|
||||||
int reg = choose_spill_reg();
|
int reg = choose_spill_reg();
|
||||||
if (reg != -1) {
|
if (reg != -1) {
|
||||||
spill_reg(reg);
|
spill_reg(reg);
|
||||||
ralloc_free(g);
|
|
||||||
g = NULL;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1073,6 +1144,17 @@ fs_reg_alloc::assign_regs(bool allow_spilling, bool spill_all)
|
|||||||
if (!allow_spilling)
|
if (!allow_spilling)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/* If we're going to spill but we've never spilled before, we need to
|
||||||
|
* re-build the interference graph with MRFs enabled to allow spilling.
|
||||||
|
*/
|
||||||
|
if (!fs->spilled_any_registers) {
|
||||||
|
ralloc_free(g);
|
||||||
|
g = NULL;
|
||||||
|
build_interference_graph(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
spilled = true;
|
||||||
|
|
||||||
/* Failed to allocate registers. Spill a reg, and the caller will
|
/* Failed to allocate registers. Spill a reg, and the caller will
|
||||||
* loop back into here to try again.
|
* loop back into here to try again.
|
||||||
*/
|
*/
|
||||||
@@ -1081,10 +1163,11 @@ fs_reg_alloc::assign_regs(bool allow_spilling, bool spill_all)
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
spill_reg(reg);
|
spill_reg(reg);
|
||||||
ralloc_free(g);
|
|
||||||
g = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (spilled)
|
||||||
|
fs->invalidate_live_intervals();
|
||||||
|
|
||||||
/* Get the chosen virtual registers for each node, and map virtual
|
/* Get the chosen virtual registers for each node, and map virtual
|
||||||
* regs in the register classes back down to real hardware reg
|
* regs in the register classes back down to real hardware reg
|
||||||
* numbers.
|
* numbers.
|
||||||
|
Reference in New Issue
Block a user