lima/gpir: Rework the scheduler
Now, we do scheduling at the same time as value register allocation. The ready list now acts similarly to the array of registers in value_regalloc, keeping us from running out of slots. Before this, the value register allocator wasn't aware of the scheduling constraints of the actual machine, which meant that it sometimes chose the wrong false dependencies to insert. Now, we assign value registers at the same time as we actually schedule instructions, making its choices reflect reality much better. It was also conservative in some cases where the new scheme doesn't have to be. For example, in something like: 1 = ld_att 2 = ld_uni 3 = add 1, 2 It's possible that one of 1 and 2 can't be scheduled in the same instruction as 3, meaning that a move needs to be inserted, so the value register allocator needs to assume that this sequence requires two registers. But when actually scheduling, we could discover that 1, 2, and 3 can all be scheduled together, so that they only require one register. The new scheduler speculatively inserts the instruction under consideration, as well as all of its child load instructions, and then counts the number of live value registers after all is said and done. This lets us be more aggressive with scheduling when we're close to the limit. With the new scheduler, the kmscube vertex shader is now scheduled in 40 instructions, versus 66 before. Acked-by: Qiang Yu <yuq825@gmail.com>
This commit is contained in:
@@ -76,9 +76,13 @@ static gpir_codegen_src gpir_get_alu_input(gpir_node *parent, gpir_node *child)
|
||||
gpir_codegen_src_load_w, gpir_codegen_src_unused, gpir_codegen_src_unused },
|
||||
};
|
||||
|
||||
assert(child->sched.instr - parent->sched.instr < 3);
|
||||
int diff = child->sched.instr->index - parent->sched.instr->index;
|
||||
assert(diff < 3);
|
||||
assert(diff >= 0);
|
||||
|
||||
return slot_to_src[child->sched.pos][child->sched.instr - parent->sched.instr];
|
||||
int src = slot_to_src[child->sched.pos][diff];
|
||||
assert(src != gpir_codegen_src_unused);
|
||||
return src;
|
||||
}
|
||||
|
||||
static void gpir_codegen_mul0_slot(gpir_codegen_instr *code, gpir_instr *instr)
|
||||
|
@@ -131,8 +131,6 @@ typedef struct {
|
||||
GPIR_DEP_OFFSET, /* def is the offset of use (i.e. temp store) */
|
||||
GPIR_DEP_READ_AFTER_WRITE,
|
||||
GPIR_DEP_WRITE_AFTER_READ,
|
||||
GPIR_DEP_VREG_READ_AFTER_WRITE,
|
||||
GPIR_DEP_VREG_WRITE_AFTER_READ,
|
||||
} type;
|
||||
|
||||
/* node execute before succ */
|
||||
@@ -146,6 +144,9 @@ typedef struct {
|
||||
struct list_head succ_link;
|
||||
} gpir_dep;
|
||||
|
||||
struct gpir_instr;
|
||||
struct gpir_store_node;
|
||||
|
||||
typedef struct gpir_node {
|
||||
struct list_head list;
|
||||
gpir_op op;
|
||||
@@ -165,12 +166,14 @@ typedef struct gpir_node {
|
||||
int value_reg;
|
||||
union {
|
||||
struct {
|
||||
int instr;
|
||||
struct gpir_instr *instr;
|
||||
struct gpir_store_node *physreg_store;
|
||||
int pos;
|
||||
int dist;
|
||||
int index;
|
||||
bool ready;
|
||||
bool inserted;
|
||||
bool max_node, next_max_node;
|
||||
} sched;
|
||||
struct {
|
||||
int parent_index;
|
||||
@@ -223,7 +226,7 @@ typedef struct {
|
||||
struct list_head reg_link;
|
||||
} gpir_load_node;
|
||||
|
||||
typedef struct {
|
||||
typedef struct gpir_store_node {
|
||||
gpir_node node;
|
||||
|
||||
unsigned index;
|
||||
@@ -266,14 +269,43 @@ enum gpir_instr_slot {
|
||||
GPIR_INSTR_SLOT_DIST_TWO_END = GPIR_INSTR_SLOT_PASS,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
typedef struct gpir_instr {
|
||||
int index;
|
||||
struct list_head list;
|
||||
|
||||
gpir_node *slots[GPIR_INSTR_SLOT_NUM];
|
||||
|
||||
/* The number of ALU slots free for moves. */
|
||||
int alu_num_slot_free;
|
||||
|
||||
/* The number of ALU slots free for moves, except for the complex slot. */
|
||||
int alu_non_cplx_slot_free;
|
||||
|
||||
/* We need to make sure that we can insert moves in the following cases:
|
||||
* (1) There was a use of a value two cycles ago.
|
||||
* (2) There were more than 5 uses of a value 1 cycle ago (or else we can't
|
||||
* possibly satisfy (1) for the next cycle).
|
||||
* (3) There is a store instruction scheduled, but not its child.
|
||||
*
|
||||
* The complex slot cannot be used for a move in case (1), since it only
|
||||
* has a FIFO depth of 1, but it can be used for (2) and (3). In order to
|
||||
* ensure that we have enough space for all three, we maintain the
|
||||
* following invariants:
|
||||
*
|
||||
* (1) alu_num_slot_free >= alu_num_slot_needed_by_store +
|
||||
* alu_num_slot_needed_by_max +
|
||||
* alu_num_slot_needed_by_next_max
|
||||
* (2) alu_non_cplx_slot_free >= alu_num_slot_needed_by_max
|
||||
*/
|
||||
int alu_num_slot_needed_by_store;
|
||||
int alu_num_slot_needed_by_max;
|
||||
int alu_num_slot_needed_by_next_max;
|
||||
|
||||
/* Used to communicate to the scheduler how many slots need to be cleared
|
||||
* up in order to satisfy the invariants.
|
||||
*/
|
||||
int slot_difference;
|
||||
int non_cplx_slot_difference;
|
||||
|
||||
int reg0_use_count;
|
||||
bool reg0_is_attr;
|
||||
@@ -387,18 +419,12 @@ bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node);
|
||||
void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node);
|
||||
void gpir_instr_print_prog(gpir_compiler *comp);
|
||||
|
||||
static inline bool gpir_instr_alu_slot_is_full(gpir_instr *instr)
|
||||
{
|
||||
return instr->alu_num_slot_free <= instr->alu_num_slot_needed_by_store;
|
||||
}
|
||||
|
||||
bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2);
|
||||
|
||||
bool gpir_pre_rsched_lower_prog(gpir_compiler *comp);
|
||||
bool gpir_post_rsched_lower_prog(gpir_compiler *comp);
|
||||
bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp);
|
||||
bool gpir_value_regalloc_prog(gpir_compiler *comp);
|
||||
bool gpir_physical_regalloc_prog(gpir_compiler *comp);
|
||||
bool gpir_regalloc_prog(gpir_compiler *comp);
|
||||
bool gpir_schedule_prog(gpir_compiler *comp);
|
||||
bool gpir_codegen_prog(gpir_compiler *comp);
|
||||
|
||||
|
@@ -36,6 +36,7 @@ gpir_instr *gpir_instr_create(gpir_block *block)
|
||||
|
||||
instr->index = block->sched.instr_index++;
|
||||
instr->alu_num_slot_free = 6;
|
||||
instr->alu_non_cplx_slot_free = 5;
|
||||
|
||||
list_add(&instr->list, &block->instr_list);
|
||||
return instr;
|
||||
@@ -85,6 +86,11 @@ static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node)
|
||||
return false;
|
||||
|
||||
int consume_slot = gpir_instr_get_consume_slot(instr, node);
|
||||
int non_cplx_consume_slot =
|
||||
node->sched.pos == GPIR_INSTR_SLOT_COMPLEX ? 0 : consume_slot;
|
||||
int store_reduce_slot = 0;
|
||||
int max_reduce_slot = node->sched.max_node ? 1 : 0;
|
||||
int next_max_reduce_slot = node->sched.next_max_node ? 1 : 0;
|
||||
|
||||
/* check if this node is child of one store node.
|
||||
* complex1 won't be any of this instr's store node's child,
|
||||
@@ -93,25 +99,40 @@ static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node)
|
||||
for (int i = GPIR_INSTR_SLOT_STORE0; i <= GPIR_INSTR_SLOT_STORE3; i++) {
|
||||
gpir_store_node *s = gpir_node_to_store(instr->slots[i]);
|
||||
if (s && s->child == node) {
|
||||
/* acc node may consume 2 slots, so even it's the child of a
|
||||
* store node, it may not be inserted successfully, in which
|
||||
* case we need a move node for it */
|
||||
if (instr->alu_num_slot_free - consume_slot <
|
||||
instr->alu_num_slot_needed_by_store - 1)
|
||||
return false;
|
||||
|
||||
instr->alu_num_slot_needed_by_store--;
|
||||
instr->alu_num_slot_free -= consume_slot;
|
||||
return true;
|
||||
store_reduce_slot = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* not a child of any store node, so must reserve alu slot for store node */
|
||||
if (instr->alu_num_slot_free - consume_slot <
|
||||
instr->alu_num_slot_needed_by_store)
|
||||
/* Check that the invariants will be maintained after we adjust everything
|
||||
*/
|
||||
|
||||
int slot_difference =
|
||||
instr->alu_num_slot_needed_by_store - store_reduce_slot +
|
||||
instr->alu_num_slot_needed_by_max - max_reduce_slot +
|
||||
MAX2(instr->alu_num_slot_needed_by_next_max - next_max_reduce_slot, 0) -
|
||||
(instr->alu_num_slot_free - consume_slot);
|
||||
if (slot_difference > 0) {
|
||||
gpir_debug("failed %d because of alu slot\n", node->index);
|
||||
instr->slot_difference = slot_difference;
|
||||
}
|
||||
|
||||
int non_cplx_slot_difference =
|
||||
instr->alu_num_slot_needed_by_max - max_reduce_slot -
|
||||
(instr->alu_non_cplx_slot_free - non_cplx_consume_slot);
|
||||
if (non_cplx_slot_difference > 0) {
|
||||
gpir_debug("failed %d because of alu slot\n", node->index);
|
||||
instr->non_cplx_slot_difference = non_cplx_slot_difference;
|
||||
}
|
||||
|
||||
if (slot_difference > 0 || non_cplx_slot_difference > 0)
|
||||
return false;
|
||||
|
||||
instr->alu_num_slot_free -= consume_slot;
|
||||
instr->alu_non_cplx_slot_free -= non_cplx_consume_slot;
|
||||
instr->alu_num_slot_needed_by_store -= store_reduce_slot;
|
||||
instr->alu_num_slot_needed_by_max -= max_reduce_slot;
|
||||
instr->alu_num_slot_needed_by_next_max -= next_max_reduce_slot;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -123,12 +144,17 @@ static void gpir_instr_remove_alu(gpir_instr *instr, gpir_node *node)
|
||||
gpir_store_node *s = gpir_node_to_store(instr->slots[i]);
|
||||
if (s && s->child == node) {
|
||||
instr->alu_num_slot_needed_by_store++;
|
||||
instr->alu_num_slot_free += consume_slot;
|
||||
return;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
instr->alu_num_slot_free += consume_slot;
|
||||
if (node->sched.pos != GPIR_INSTR_SLOT_COMPLEX)
|
||||
instr->alu_non_cplx_slot_free += consume_slot;
|
||||
if (node->sched.max_node)
|
||||
instr->alu_num_slot_needed_by_max++;
|
||||
if (node->sched.next_max_node)
|
||||
instr->alu_num_slot_needed_by_next_max++;
|
||||
}
|
||||
|
||||
static bool gpir_instr_insert_reg0_check(gpir_instr *instr, gpir_node *node)
|
||||
@@ -269,12 +295,18 @@ static bool gpir_instr_insert_store_check(gpir_instr *instr, gpir_node *node)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* no store node has the same child as this node, and child is not
|
||||
* already in this instr's alu slot, so instr must have some free
|
||||
* alu slot to insert this node's child
|
||||
/* Check the invariants documented in gpir.h, similar to the ALU case.
|
||||
* Since the only thing that changes is alu_num_slot_needed_by_store, we
|
||||
* can get away with just checking the first one.
|
||||
*/
|
||||
if (gpir_instr_alu_slot_is_full(instr))
|
||||
int slot_difference = instr->alu_num_slot_needed_by_store + 1
|
||||
+ instr->alu_num_slot_needed_by_max +
|
||||
MAX2(instr->alu_num_slot_needed_by_next_max, 0) -
|
||||
instr->alu_num_slot_free;
|
||||
if (slot_difference > 0) {
|
||||
instr->slot_difference = slot_difference;
|
||||
return false;
|
||||
}
|
||||
|
||||
instr->alu_num_slot_needed_by_store++;
|
||||
|
||||
@@ -299,6 +331,9 @@ static void gpir_instr_remove_store(gpir_instr *instr, gpir_node *node)
|
||||
int other_slot = GPIR_INSTR_SLOT_STORE0 + (component ^ 1);
|
||||
|
||||
for (int j = GPIR_INSTR_SLOT_STORE0; j <= GPIR_INSTR_SLOT_STORE3; j++) {
|
||||
if (j == node->sched.pos)
|
||||
continue;
|
||||
|
||||
gpir_store_node *s = gpir_node_to_store(instr->slots[j]);
|
||||
if (s && s->child == store->child)
|
||||
goto out;
|
||||
@@ -369,6 +404,9 @@ static bool gpir_instr_slot_free(gpir_instr *instr, gpir_node *node)
|
||||
|
||||
bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node)
|
||||
{
|
||||
instr->slot_difference = 0;
|
||||
instr->non_cplx_slot_difference = 0;
|
||||
|
||||
if (!gpir_instr_slot_free(instr, node))
|
||||
return false;
|
||||
|
||||
@@ -413,7 +451,7 @@ void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node)
|
||||
/* This can happen if we merge duplicate loads in the scheduler. */
|
||||
if (instr->slots[node->sched.pos] != node) {
|
||||
node->sched.pos = -1;
|
||||
node->sched.instr = -1;
|
||||
node->sched.instr = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -439,7 +477,7 @@ void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node)
|
||||
instr->slots[GPIR_INSTR_SLOT_MUL1] = NULL;
|
||||
|
||||
node->sched.pos = -1;
|
||||
node->sched.instr = -1;
|
||||
node->sched.instr = NULL;
|
||||
}
|
||||
|
||||
void gpir_instr_print_prog(gpir_compiler *comp)
|
||||
|
@@ -422,10 +422,7 @@ bool gpir_compile_nir(struct lima_vs_shader_state *prog, struct nir_shader *nir)
|
||||
if (!gpir_post_rsched_lower_prog(comp))
|
||||
goto err_out0;
|
||||
|
||||
if (!gpir_value_regalloc_prog(comp))
|
||||
goto err_out0;
|
||||
|
||||
if (!gpir_physical_regalloc_prog(comp))
|
||||
if (!gpir_regalloc_prog(comp))
|
||||
goto err_out0;
|
||||
|
||||
if (!gpir_schedule_prog(comp))
|
||||
|
@@ -436,8 +436,6 @@ static void gpir_node_print_node(gpir_node *node, int type, int space)
|
||||
[GPIR_DEP_OFFSET] = "offset",
|
||||
[GPIR_DEP_READ_AFTER_WRITE] = "RaW",
|
||||
[GPIR_DEP_WRITE_AFTER_READ] = "WaR",
|
||||
[GPIR_DEP_VREG_READ_AFTER_WRITE] = "vRaW",
|
||||
[GPIR_DEP_VREG_WRITE_AFTER_READ] = "vWaR",
|
||||
};
|
||||
|
||||
for (int i = 0; i < space; i++)
|
||||
|
@@ -1,135 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Lima Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sub license,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include "gpir.h"
|
||||
|
||||
/* Linear scan register alloc for physical reg alloc of each
|
||||
* load/store node
|
||||
*/
|
||||
|
||||
static void regalloc_print_result(gpir_compiler *comp)
|
||||
{
|
||||
if (!(lima_debug & LIMA_DEBUG_GP))
|
||||
return;
|
||||
|
||||
int index = 0;
|
||||
printf("======== physical regalloc ========\n");
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
if (node->op == gpir_op_load_reg) {
|
||||
gpir_load_node *load = gpir_node_to_load(node);
|
||||
printf("%03d: load %d use reg %d\n", index, node->index, load->reg->index);
|
||||
}
|
||||
else if (node->op == gpir_op_store_reg) {
|
||||
gpir_store_node *store = gpir_node_to_store(node);
|
||||
printf("%03d: store %d use reg %d\n", index, node->index, store->reg->index);
|
||||
}
|
||||
index++;
|
||||
}
|
||||
printf("----------------------------\n");
|
||||
}
|
||||
}
|
||||
|
||||
bool gpir_physical_regalloc_prog(gpir_compiler *comp)
|
||||
{
|
||||
int index = 0;
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
node->preg.index = index++;
|
||||
}
|
||||
}
|
||||
|
||||
/* calculate each reg liveness interval */
|
||||
list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) {
|
||||
reg->start = INT_MAX;
|
||||
list_for_each_entry(gpir_store_node, store, ®->defs_list, reg_link) {
|
||||
if (store->node.preg.index < reg->start)
|
||||
reg->start = store->node.preg.index;
|
||||
}
|
||||
|
||||
reg->end = 0;
|
||||
list_for_each_entry(gpir_load_node, load, ®->uses_list, reg_link) {
|
||||
if (load->node.preg.index > reg->end)
|
||||
reg->end = load->node.preg.index;
|
||||
}
|
||||
}
|
||||
|
||||
/* sort reg list by start value */
|
||||
struct list_head reg_list;
|
||||
list_replace(&comp->reg_list, ®_list);
|
||||
list_inithead(&comp->reg_list);
|
||||
list_for_each_entry_safe(gpir_reg, reg, ®_list, list) {
|
||||
struct list_head *insert_pos = &comp->reg_list;
|
||||
list_for_each_entry(gpir_reg, creg, &comp->reg_list, list) {
|
||||
if (creg->start > reg->start) {
|
||||
insert_pos = &creg->list;
|
||||
break;
|
||||
}
|
||||
}
|
||||
list_del(®->list);
|
||||
list_addtail(®->list, insert_pos);
|
||||
}
|
||||
|
||||
/* do linear scan reg alloc */
|
||||
gpir_reg *active[GPIR_PHYSICAL_REG_NUM] = {0};
|
||||
list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) {
|
||||
int i;
|
||||
|
||||
/* if some reg is expired */
|
||||
for (i = 0; i < GPIR_PHYSICAL_REG_NUM; i++) {
|
||||
if (active[i] && active[i]->end <= reg->start)
|
||||
active[i] = NULL;
|
||||
}
|
||||
|
||||
/* find a free reg value for this reg */
|
||||
for (i = 0; i < GPIR_PHYSICAL_REG_NUM; i++) {
|
||||
if (!active[i]) {
|
||||
active[i] = reg;
|
||||
reg->index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: support spill to temp memory */
|
||||
assert(i < GPIR_PHYSICAL_REG_NUM);
|
||||
}
|
||||
|
||||
/* update load/store node info for the real reg */
|
||||
list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) {
|
||||
list_for_each_entry(gpir_store_node, store, ®->defs_list, reg_link) {
|
||||
store->index = reg->index >> 2;
|
||||
store->component = reg->index % 4;
|
||||
}
|
||||
|
||||
list_for_each_entry(gpir_load_node, load, ®->uses_list, reg_link) {
|
||||
load->index = reg->index >> 2;
|
||||
load->index = reg->index % 4;
|
||||
}
|
||||
}
|
||||
|
||||
regalloc_print_result(comp);
|
||||
return true;
|
||||
}
|
@@ -24,60 +24,17 @@
|
||||
|
||||
#include "gpir.h"
|
||||
|
||||
/* Linear scan register alloc for value reg alloc of each node */
|
||||
|
||||
static int regalloc_spill_active_node(gpir_node *active[])
|
||||
{
|
||||
gpir_node *spill = NULL;
|
||||
for (int i = 0; i < GPIR_VALUE_REG_NUM; i++) {
|
||||
if (gpir_op_infos[active[i]->op].spillless)
|
||||
continue;
|
||||
|
||||
/* spill farest node */
|
||||
if (!spill ||
|
||||
spill->vreg.last->vreg.index < active[i]->vreg.last->vreg.index) {
|
||||
spill = active[i];
|
||||
}
|
||||
}
|
||||
|
||||
assert(spill);
|
||||
gpir_debug("value regalloc spill node %d for value reg %d\n",
|
||||
spill->index, spill->value_reg);
|
||||
|
||||
/* create store node for spilled node */
|
||||
gpir_store_node *store = gpir_node_create(spill->block, gpir_op_store_reg);
|
||||
store->child = spill;
|
||||
/* no need to calculate other vreg values because store & spill won't
|
||||
* be used in the following schedule again */
|
||||
store->node.value_reg = spill->value_reg;
|
||||
list_addtail(&store->node.list, &spill->list);
|
||||
|
||||
gpir_reg *reg = gpir_create_reg(spill->block->comp);
|
||||
store->reg = reg;
|
||||
list_addtail(&store->reg_link, ®->defs_list);
|
||||
|
||||
gpir_node_foreach_succ_safe(spill, dep) {
|
||||
gpir_node *succ = dep->succ;
|
||||
gpir_load_node *load = gpir_node_create(succ->block, gpir_op_load_reg);
|
||||
gpir_node_replace_pred(dep, &load->node);
|
||||
gpir_node_replace_child(succ, spill, &load->node);
|
||||
list_addtail(&load->node.list, &succ->list);
|
||||
|
||||
/* only valid for succ already scheduled, succ not scheduled will
|
||||
* re-write this value */
|
||||
load->node.value_reg = spill->value_reg;
|
||||
load->node.vreg.index =
|
||||
(list_first_entry(&load->node.list, gpir_node, list)->vreg.index +
|
||||
list_last_entry(&load->node.list, gpir_node, list)->vreg.index) / 2.0f;
|
||||
load->node.vreg.last = succ;
|
||||
|
||||
load->reg = reg;
|
||||
list_addtail(&load->reg_link, ®->uses_list);
|
||||
}
|
||||
|
||||
gpir_node_add_dep(&store->node, spill, GPIR_DEP_INPUT);
|
||||
return spill->value_reg;
|
||||
}
|
||||
/* Register allocation
|
||||
*
|
||||
* TODO: This needs to be rewritten when we support multiple basic blocks. We
|
||||
* need to do proper liveness analysis, combined with either linear scan,
|
||||
* graph coloring, or SSA-based allocation. We should also support spilling to
|
||||
* temporaries.
|
||||
*
|
||||
* For now, this only assigns fake registers to values, used to build the fake
|
||||
* dependencies that the scheduler relies on. In the future we should also be
|
||||
* assigning actual physreg numbers to load_reg/store_reg nodes.
|
||||
*/
|
||||
|
||||
static void regalloc_block(gpir_block *block)
|
||||
{
|
||||
@@ -99,7 +56,7 @@ static void regalloc_block(gpir_block *block)
|
||||
|
||||
/* do linear scan regalloc */
|
||||
int reg_search_start = 0;
|
||||
gpir_node *active[GPIR_VALUE_REG_NUM] = {0};
|
||||
gpir_node *active[GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM] = {0};
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
/* if some reg is expired */
|
||||
gpir_node_foreach_pred(node, dep) {
|
||||
@@ -116,9 +73,9 @@ static void regalloc_block(gpir_block *block)
|
||||
|
||||
/* find a free reg for this node */
|
||||
int i;
|
||||
for (i = 0; i < GPIR_VALUE_REG_NUM; i++) {
|
||||
for (i = 0; i < GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM; i++) {
|
||||
/* round robin reg select to reduce false dep when schedule */
|
||||
int reg = (reg_search_start + i) % GPIR_VALUE_REG_NUM;
|
||||
int reg = (reg_search_start + i) % (GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM);
|
||||
if (!active[reg]) {
|
||||
active[reg] = node;
|
||||
node->value_reg = reg;
|
||||
@@ -127,14 +84,8 @@ static void regalloc_block(gpir_block *block)
|
||||
}
|
||||
}
|
||||
|
||||
/* need spill */
|
||||
if (i == GPIR_VALUE_REG_NUM) {
|
||||
int spilled_reg = regalloc_spill_active_node(active);
|
||||
active[spilled_reg] = node;
|
||||
node->value_reg = spilled_reg;
|
||||
gpir_debug("value regalloc node %d reuse reg %d\n",
|
||||
node->index, spilled_reg);
|
||||
}
|
||||
/* TODO: spill */
|
||||
assert(i != GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -144,7 +95,7 @@ static void regalloc_print_result(gpir_compiler *comp)
|
||||
return;
|
||||
|
||||
int index = 0;
|
||||
printf("======== value regalloc ========\n");
|
||||
printf("======== regalloc ========\n");
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(gpir_node, node, &block->node_list, list) {
|
||||
printf("%03d: %d/%d %s ", index++, node->index, node->value_reg,
|
||||
@@ -159,7 +110,7 @@ static void regalloc_print_result(gpir_compiler *comp)
|
||||
}
|
||||
}
|
||||
|
||||
bool gpir_value_regalloc_prog(gpir_compiler *comp)
|
||||
bool gpir_regalloc_prog(gpir_compiler *comp)
|
||||
{
|
||||
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
|
||||
regalloc_block(block);
|
File diff suppressed because it is too large
Load Diff
@@ -29,8 +29,7 @@ files_lima = files(
|
||||
'ir/gp/codegen.h',
|
||||
'ir/gp/codegen.c',
|
||||
'ir/gp/reduce_scheduler.c',
|
||||
'ir/gp/value_regalloc.c',
|
||||
'ir/gp/physical_regalloc.c',
|
||||
'ir/gp/regalloc.c',
|
||||
'ir/gp/disasm.c',
|
||||
|
||||
'ir/pp/ppir.h',
|
||||
|
Reference in New Issue
Block a user