lima/gpir: Rework the scheduler

Now, we do scheduling at the same time as value register allocation. The
ready list now acts similarly to the array of registers in
value_regalloc, keeping us from running out of slots. Before this, the
value register allocator wasn't aware of the scheduling constraints of
the actual machine, which meant that it sometimes chose the wrong false
dependencies to insert. Now, we assign value registers at the same time
as we actually schedule instructions, making its choices reflect reality
much better. It was also conservative in some cases where the new scheme
doesn't have to be. For example, in something like:

1 = ld_att
2 = ld_uni
3 = add 1, 2

It's possible that one of 1 and 2 can't be scheduled in the same
instruction as 3, meaning that a move needs to be inserted, so the value
register allocator needs to assume that this sequence requires two
registers. But when actually scheduling, we could discover that 1, 2,
and 3 can all be scheduled together, so that they only require one
register. The new scheduler speculatively inserts the instruction under
consideration, as well as all of its child load instructions, and then
counts the number of live value registers after all is said and done.
This lets us be more aggressive with scheduling when we're close to the
limit.

With the new scheduler, the kmscube vertex shader is now scheduled in 40
instructions, versus 66 before.

Acked-by: Qiang Yu <yuq825@gmail.com>
This commit is contained in:
Connor Abbott
2018-01-11 18:35:58 -05:00
parent 12645e8714
commit 54434fe670
9 changed files with 1215 additions and 588 deletions

View File

@@ -76,9 +76,13 @@ static gpir_codegen_src gpir_get_alu_input(gpir_node *parent, gpir_node *child)
gpir_codegen_src_load_w, gpir_codegen_src_unused, gpir_codegen_src_unused }, gpir_codegen_src_load_w, gpir_codegen_src_unused, gpir_codegen_src_unused },
}; };
assert(child->sched.instr - parent->sched.instr < 3); int diff = child->sched.instr->index - parent->sched.instr->index;
assert(diff < 3);
assert(diff >= 0);
return slot_to_src[child->sched.pos][child->sched.instr - parent->sched.instr]; int src = slot_to_src[child->sched.pos][diff];
assert(src != gpir_codegen_src_unused);
return src;
} }
static void gpir_codegen_mul0_slot(gpir_codegen_instr *code, gpir_instr *instr) static void gpir_codegen_mul0_slot(gpir_codegen_instr *code, gpir_instr *instr)

View File

@@ -131,8 +131,6 @@ typedef struct {
GPIR_DEP_OFFSET, /* def is the offset of use (i.e. temp store) */ GPIR_DEP_OFFSET, /* def is the offset of use (i.e. temp store) */
GPIR_DEP_READ_AFTER_WRITE, GPIR_DEP_READ_AFTER_WRITE,
GPIR_DEP_WRITE_AFTER_READ, GPIR_DEP_WRITE_AFTER_READ,
GPIR_DEP_VREG_READ_AFTER_WRITE,
GPIR_DEP_VREG_WRITE_AFTER_READ,
} type; } type;
/* node execute before succ */ /* node execute before succ */
@@ -146,6 +144,9 @@ typedef struct {
struct list_head succ_link; struct list_head succ_link;
} gpir_dep; } gpir_dep;
struct gpir_instr;
struct gpir_store_node;
typedef struct gpir_node { typedef struct gpir_node {
struct list_head list; struct list_head list;
gpir_op op; gpir_op op;
@@ -165,12 +166,14 @@ typedef struct gpir_node {
int value_reg; int value_reg;
union { union {
struct { struct {
int instr; struct gpir_instr *instr;
struct gpir_store_node *physreg_store;
int pos; int pos;
int dist; int dist;
int index; int index;
bool ready; bool ready;
bool inserted; bool inserted;
bool max_node, next_max_node;
} sched; } sched;
struct { struct {
int parent_index; int parent_index;
@@ -223,7 +226,7 @@ typedef struct {
struct list_head reg_link; struct list_head reg_link;
} gpir_load_node; } gpir_load_node;
typedef struct { typedef struct gpir_store_node {
gpir_node node; gpir_node node;
unsigned index; unsigned index;
@@ -266,14 +269,43 @@ enum gpir_instr_slot {
GPIR_INSTR_SLOT_DIST_TWO_END = GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_DIST_TWO_END = GPIR_INSTR_SLOT_PASS,
}; };
typedef struct { typedef struct gpir_instr {
int index; int index;
struct list_head list; struct list_head list;
gpir_node *slots[GPIR_INSTR_SLOT_NUM]; gpir_node *slots[GPIR_INSTR_SLOT_NUM];
/* The number of ALU slots free for moves. */
int alu_num_slot_free; int alu_num_slot_free;
/* The number of ALU slots free for moves, except for the complex slot. */
int alu_non_cplx_slot_free;
/* We need to make sure that we can insert moves in the following cases:
* (1) There was a use of a value two cycles ago.
* (2) There were more than 5 uses of a value 1 cycle ago (or else we can't
* possibly satisfy (1) for the next cycle).
* (3) There is a store instruction scheduled, but not its child.
*
* The complex slot cannot be used for a move in case (1), since it only
* has a FIFO depth of 1, but it can be used for (2) and (3). In order to
* ensure that we have enough space for all three, we maintain the
* following invariants:
*
* (1) alu_num_slot_free >= alu_num_slot_needed_by_store +
* alu_num_slot_needed_by_max +
* alu_num_slot_needed_by_next_max
* (2) alu_non_cplx_slot_free >= alu_num_slot_needed_by_max
*/
int alu_num_slot_needed_by_store; int alu_num_slot_needed_by_store;
int alu_num_slot_needed_by_max;
int alu_num_slot_needed_by_next_max;
/* Used to communicate to the scheduler how many slots need to be cleared
* up in order to satisfy the invariants.
*/
int slot_difference;
int non_cplx_slot_difference;
int reg0_use_count; int reg0_use_count;
bool reg0_is_attr; bool reg0_is_attr;
@@ -387,18 +419,12 @@ bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node);
void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node); void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node);
void gpir_instr_print_prog(gpir_compiler *comp); void gpir_instr_print_prog(gpir_compiler *comp);
static inline bool gpir_instr_alu_slot_is_full(gpir_instr *instr)
{
return instr->alu_num_slot_free <= instr->alu_num_slot_needed_by_store;
}
bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2); bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2);
bool gpir_pre_rsched_lower_prog(gpir_compiler *comp); bool gpir_pre_rsched_lower_prog(gpir_compiler *comp);
bool gpir_post_rsched_lower_prog(gpir_compiler *comp); bool gpir_post_rsched_lower_prog(gpir_compiler *comp);
bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp); bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp);
bool gpir_value_regalloc_prog(gpir_compiler *comp); bool gpir_regalloc_prog(gpir_compiler *comp);
bool gpir_physical_regalloc_prog(gpir_compiler *comp);
bool gpir_schedule_prog(gpir_compiler *comp); bool gpir_schedule_prog(gpir_compiler *comp);
bool gpir_codegen_prog(gpir_compiler *comp); bool gpir_codegen_prog(gpir_compiler *comp);

View File

@@ -36,6 +36,7 @@ gpir_instr *gpir_instr_create(gpir_block *block)
instr->index = block->sched.instr_index++; instr->index = block->sched.instr_index++;
instr->alu_num_slot_free = 6; instr->alu_num_slot_free = 6;
instr->alu_non_cplx_slot_free = 5;
list_add(&instr->list, &block->instr_list); list_add(&instr->list, &block->instr_list);
return instr; return instr;
@@ -85,6 +86,11 @@ static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node)
return false; return false;
int consume_slot = gpir_instr_get_consume_slot(instr, node); int consume_slot = gpir_instr_get_consume_slot(instr, node);
int non_cplx_consume_slot =
node->sched.pos == GPIR_INSTR_SLOT_COMPLEX ? 0 : consume_slot;
int store_reduce_slot = 0;
int max_reduce_slot = node->sched.max_node ? 1 : 0;
int next_max_reduce_slot = node->sched.next_max_node ? 1 : 0;
/* check if this node is child of one store node. /* check if this node is child of one store node.
* complex1 won't be any of this instr's store node's child, * complex1 won't be any of this instr's store node's child,
@@ -93,25 +99,40 @@ static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node)
for (int i = GPIR_INSTR_SLOT_STORE0; i <= GPIR_INSTR_SLOT_STORE3; i++) { for (int i = GPIR_INSTR_SLOT_STORE0; i <= GPIR_INSTR_SLOT_STORE3; i++) {
gpir_store_node *s = gpir_node_to_store(instr->slots[i]); gpir_store_node *s = gpir_node_to_store(instr->slots[i]);
if (s && s->child == node) { if (s && s->child == node) {
/* acc node may consume 2 slots, so even it's the child of a store_reduce_slot = 1;
* store node, it may not be inserted successfully, in which break;
* case we need a move node for it */
if (instr->alu_num_slot_free - consume_slot <
instr->alu_num_slot_needed_by_store - 1)
return false;
instr->alu_num_slot_needed_by_store--;
instr->alu_num_slot_free -= consume_slot;
return true;
} }
} }
/* not a child of any store node, so must reserve alu slot for store node */ /* Check that the invariants will be maintained after we adjust everything
if (instr->alu_num_slot_free - consume_slot < */
instr->alu_num_slot_needed_by_store)
int slot_difference =
instr->alu_num_slot_needed_by_store - store_reduce_slot +
instr->alu_num_slot_needed_by_max - max_reduce_slot +
MAX2(instr->alu_num_slot_needed_by_next_max - next_max_reduce_slot, 0) -
(instr->alu_num_slot_free - consume_slot);
if (slot_difference > 0) {
gpir_debug("failed %d because of alu slot\n", node->index);
instr->slot_difference = slot_difference;
}
int non_cplx_slot_difference =
instr->alu_num_slot_needed_by_max - max_reduce_slot -
(instr->alu_non_cplx_slot_free - non_cplx_consume_slot);
if (non_cplx_slot_difference > 0) {
gpir_debug("failed %d because of alu slot\n", node->index);
instr->non_cplx_slot_difference = non_cplx_slot_difference;
}
if (slot_difference > 0 || non_cplx_slot_difference > 0)
return false; return false;
instr->alu_num_slot_free -= consume_slot; instr->alu_num_slot_free -= consume_slot;
instr->alu_non_cplx_slot_free -= non_cplx_consume_slot;
instr->alu_num_slot_needed_by_store -= store_reduce_slot;
instr->alu_num_slot_needed_by_max -= max_reduce_slot;
instr->alu_num_slot_needed_by_next_max -= next_max_reduce_slot;
return true; return true;
} }
@@ -123,12 +144,17 @@ static void gpir_instr_remove_alu(gpir_instr *instr, gpir_node *node)
gpir_store_node *s = gpir_node_to_store(instr->slots[i]); gpir_store_node *s = gpir_node_to_store(instr->slots[i]);
if (s && s->child == node) { if (s && s->child == node) {
instr->alu_num_slot_needed_by_store++; instr->alu_num_slot_needed_by_store++;
instr->alu_num_slot_free += consume_slot; break;
return;
} }
} }
instr->alu_num_slot_free += consume_slot; instr->alu_num_slot_free += consume_slot;
if (node->sched.pos != GPIR_INSTR_SLOT_COMPLEX)
instr->alu_non_cplx_slot_free += consume_slot;
if (node->sched.max_node)
instr->alu_num_slot_needed_by_max++;
if (node->sched.next_max_node)
instr->alu_num_slot_needed_by_next_max++;
} }
static bool gpir_instr_insert_reg0_check(gpir_instr *instr, gpir_node *node) static bool gpir_instr_insert_reg0_check(gpir_instr *instr, gpir_node *node)
@@ -269,12 +295,18 @@ static bool gpir_instr_insert_store_check(gpir_instr *instr, gpir_node *node)
goto out; goto out;
} }
/* no store node has the same child as this node, and child is not /* Check the invariants documented in gpir.h, similar to the ALU case.
* already in this instr's alu slot, so instr must have some free * Since the only thing that changes is alu_num_slot_needed_by_store, we
* alu slot to insert this node's child * can get away with just checking the first one.
*/ */
if (gpir_instr_alu_slot_is_full(instr)) int slot_difference = instr->alu_num_slot_needed_by_store + 1
+ instr->alu_num_slot_needed_by_max +
MAX2(instr->alu_num_slot_needed_by_next_max, 0) -
instr->alu_num_slot_free;
if (slot_difference > 0) {
instr->slot_difference = slot_difference;
return false; return false;
}
instr->alu_num_slot_needed_by_store++; instr->alu_num_slot_needed_by_store++;
@@ -299,6 +331,9 @@ static void gpir_instr_remove_store(gpir_instr *instr, gpir_node *node)
int other_slot = GPIR_INSTR_SLOT_STORE0 + (component ^ 1); int other_slot = GPIR_INSTR_SLOT_STORE0 + (component ^ 1);
for (int j = GPIR_INSTR_SLOT_STORE0; j <= GPIR_INSTR_SLOT_STORE3; j++) { for (int j = GPIR_INSTR_SLOT_STORE0; j <= GPIR_INSTR_SLOT_STORE3; j++) {
if (j == node->sched.pos)
continue;
gpir_store_node *s = gpir_node_to_store(instr->slots[j]); gpir_store_node *s = gpir_node_to_store(instr->slots[j]);
if (s && s->child == store->child) if (s && s->child == store->child)
goto out; goto out;
@@ -369,6 +404,9 @@ static bool gpir_instr_slot_free(gpir_instr *instr, gpir_node *node)
bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node) bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node)
{ {
instr->slot_difference = 0;
instr->non_cplx_slot_difference = 0;
if (!gpir_instr_slot_free(instr, node)) if (!gpir_instr_slot_free(instr, node))
return false; return false;
@@ -413,7 +451,7 @@ void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node)
/* This can happen if we merge duplicate loads in the scheduler. */ /* This can happen if we merge duplicate loads in the scheduler. */
if (instr->slots[node->sched.pos] != node) { if (instr->slots[node->sched.pos] != node) {
node->sched.pos = -1; node->sched.pos = -1;
node->sched.instr = -1; node->sched.instr = NULL;
return; return;
} }
@@ -439,7 +477,7 @@ void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node)
instr->slots[GPIR_INSTR_SLOT_MUL1] = NULL; instr->slots[GPIR_INSTR_SLOT_MUL1] = NULL;
node->sched.pos = -1; node->sched.pos = -1;
node->sched.instr = -1; node->sched.instr = NULL;
} }
void gpir_instr_print_prog(gpir_compiler *comp) void gpir_instr_print_prog(gpir_compiler *comp)

View File

@@ -422,10 +422,7 @@ bool gpir_compile_nir(struct lima_vs_shader_state *prog, struct nir_shader *nir)
if (!gpir_post_rsched_lower_prog(comp)) if (!gpir_post_rsched_lower_prog(comp))
goto err_out0; goto err_out0;
if (!gpir_value_regalloc_prog(comp)) if (!gpir_regalloc_prog(comp))
goto err_out0;
if (!gpir_physical_regalloc_prog(comp))
goto err_out0; goto err_out0;
if (!gpir_schedule_prog(comp)) if (!gpir_schedule_prog(comp))

View File

@@ -436,8 +436,6 @@ static void gpir_node_print_node(gpir_node *node, int type, int space)
[GPIR_DEP_OFFSET] = "offset", [GPIR_DEP_OFFSET] = "offset",
[GPIR_DEP_READ_AFTER_WRITE] = "RaW", [GPIR_DEP_READ_AFTER_WRITE] = "RaW",
[GPIR_DEP_WRITE_AFTER_READ] = "WaR", [GPIR_DEP_WRITE_AFTER_READ] = "WaR",
[GPIR_DEP_VREG_READ_AFTER_WRITE] = "vRaW",
[GPIR_DEP_VREG_WRITE_AFTER_READ] = "vWaR",
}; };
for (int i = 0; i < space; i++) for (int i = 0; i < space; i++)

View File

@@ -1,135 +0,0 @@
/*
* Copyright (c) 2017 Lima Project
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sub license,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*/
#include <limits.h>
#include "gpir.h"
/* Linear scan register alloc for physical reg alloc of each
* load/store node
*/
static void regalloc_print_result(gpir_compiler *comp)
{
if (!(lima_debug & LIMA_DEBUG_GP))
return;
int index = 0;
printf("======== physical regalloc ========\n");
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
list_for_each_entry(gpir_node, node, &block->node_list, list) {
if (node->op == gpir_op_load_reg) {
gpir_load_node *load = gpir_node_to_load(node);
printf("%03d: load %d use reg %d\n", index, node->index, load->reg->index);
}
else if (node->op == gpir_op_store_reg) {
gpir_store_node *store = gpir_node_to_store(node);
printf("%03d: store %d use reg %d\n", index, node->index, store->reg->index);
}
index++;
}
printf("----------------------------\n");
}
}
bool gpir_physical_regalloc_prog(gpir_compiler *comp)
{
int index = 0;
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
list_for_each_entry(gpir_node, node, &block->node_list, list) {
node->preg.index = index++;
}
}
/* calculate each reg liveness interval */
list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) {
reg->start = INT_MAX;
list_for_each_entry(gpir_store_node, store, &reg->defs_list, reg_link) {
if (store->node.preg.index < reg->start)
reg->start = store->node.preg.index;
}
reg->end = 0;
list_for_each_entry(gpir_load_node, load, &reg->uses_list, reg_link) {
if (load->node.preg.index > reg->end)
reg->end = load->node.preg.index;
}
}
/* sort reg list by start value */
struct list_head reg_list;
list_replace(&comp->reg_list, &reg_list);
list_inithead(&comp->reg_list);
list_for_each_entry_safe(gpir_reg, reg, &reg_list, list) {
struct list_head *insert_pos = &comp->reg_list;
list_for_each_entry(gpir_reg, creg, &comp->reg_list, list) {
if (creg->start > reg->start) {
insert_pos = &creg->list;
break;
}
}
list_del(&reg->list);
list_addtail(&reg->list, insert_pos);
}
/* do linear scan reg alloc */
gpir_reg *active[GPIR_PHYSICAL_REG_NUM] = {0};
list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) {
int i;
/* if some reg is expired */
for (i = 0; i < GPIR_PHYSICAL_REG_NUM; i++) {
if (active[i] && active[i]->end <= reg->start)
active[i] = NULL;
}
/* find a free reg value for this reg */
for (i = 0; i < GPIR_PHYSICAL_REG_NUM; i++) {
if (!active[i]) {
active[i] = reg;
reg->index = i;
break;
}
}
/* TODO: support spill to temp memory */
assert(i < GPIR_PHYSICAL_REG_NUM);
}
/* update load/store node info for the real reg */
list_for_each_entry(gpir_reg, reg, &comp->reg_list, list) {
list_for_each_entry(gpir_store_node, store, &reg->defs_list, reg_link) {
store->index = reg->index >> 2;
store->component = reg->index % 4;
}
list_for_each_entry(gpir_load_node, load, &reg->uses_list, reg_link) {
load->index = reg->index >> 2;
load->index = reg->index % 4;
}
}
regalloc_print_result(comp);
return true;
}

View File

@@ -24,60 +24,17 @@
#include "gpir.h" #include "gpir.h"
/* Linear scan register alloc for value reg alloc of each node */ /* Register allocation
*
static int regalloc_spill_active_node(gpir_node *active[]) * TODO: This needs to be rewritten when we support multiple basic blocks. We
{ * need to do proper liveness analysis, combined with either linear scan,
gpir_node *spill = NULL; * graph coloring, or SSA-based allocation. We should also support spilling to
for (int i = 0; i < GPIR_VALUE_REG_NUM; i++) { * temporaries.
if (gpir_op_infos[active[i]->op].spillless) *
continue; * For now, this only assigns fake registers to values, used to build the fake
* dependencies that the scheduler relies on. In the future we should also be
/* spill farest node */ * assigning actual physreg numbers to load_reg/store_reg nodes.
if (!spill || */
spill->vreg.last->vreg.index < active[i]->vreg.last->vreg.index) {
spill = active[i];
}
}
assert(spill);
gpir_debug("value regalloc spill node %d for value reg %d\n",
spill->index, spill->value_reg);
/* create store node for spilled node */
gpir_store_node *store = gpir_node_create(spill->block, gpir_op_store_reg);
store->child = spill;
/* no need to calculate other vreg values because store & spill won't
* be used in the following schedule again */
store->node.value_reg = spill->value_reg;
list_addtail(&store->node.list, &spill->list);
gpir_reg *reg = gpir_create_reg(spill->block->comp);
store->reg = reg;
list_addtail(&store->reg_link, &reg->defs_list);
gpir_node_foreach_succ_safe(spill, dep) {
gpir_node *succ = dep->succ;
gpir_load_node *load = gpir_node_create(succ->block, gpir_op_load_reg);
gpir_node_replace_pred(dep, &load->node);
gpir_node_replace_child(succ, spill, &load->node);
list_addtail(&load->node.list, &succ->list);
/* only valid for succ already scheduled, succ not scheduled will
* re-write this value */
load->node.value_reg = spill->value_reg;
load->node.vreg.index =
(list_first_entry(&load->node.list, gpir_node, list)->vreg.index +
list_last_entry(&load->node.list, gpir_node, list)->vreg.index) / 2.0f;
load->node.vreg.last = succ;
load->reg = reg;
list_addtail(&load->reg_link, &reg->uses_list);
}
gpir_node_add_dep(&store->node, spill, GPIR_DEP_INPUT);
return spill->value_reg;
}
static void regalloc_block(gpir_block *block) static void regalloc_block(gpir_block *block)
{ {
@@ -99,7 +56,7 @@ static void regalloc_block(gpir_block *block)
/* do linear scan regalloc */ /* do linear scan regalloc */
int reg_search_start = 0; int reg_search_start = 0;
gpir_node *active[GPIR_VALUE_REG_NUM] = {0}; gpir_node *active[GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM] = {0};
list_for_each_entry(gpir_node, node, &block->node_list, list) { list_for_each_entry(gpir_node, node, &block->node_list, list) {
/* if some reg is expired */ /* if some reg is expired */
gpir_node_foreach_pred(node, dep) { gpir_node_foreach_pred(node, dep) {
@@ -116,9 +73,9 @@ static void regalloc_block(gpir_block *block)
/* find a free reg for this node */ /* find a free reg for this node */
int i; int i;
for (i = 0; i < GPIR_VALUE_REG_NUM; i++) { for (i = 0; i < GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM; i++) {
/* round robin reg select to reduce false dep when schedule */ /* round robin reg select to reduce false dep when schedule */
int reg = (reg_search_start + i) % GPIR_VALUE_REG_NUM; int reg = (reg_search_start + i) % (GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM);
if (!active[reg]) { if (!active[reg]) {
active[reg] = node; active[reg] = node;
node->value_reg = reg; node->value_reg = reg;
@@ -127,14 +84,8 @@ static void regalloc_block(gpir_block *block)
} }
} }
/* need spill */ /* TODO: spill */
if (i == GPIR_VALUE_REG_NUM) { assert(i != GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM);
int spilled_reg = regalloc_spill_active_node(active);
active[spilled_reg] = node;
node->value_reg = spilled_reg;
gpir_debug("value regalloc node %d reuse reg %d\n",
node->index, spilled_reg);
}
} }
} }
@@ -144,7 +95,7 @@ static void regalloc_print_result(gpir_compiler *comp)
return; return;
int index = 0; int index = 0;
printf("======== value regalloc ========\n"); printf("======== regalloc ========\n");
list_for_each_entry(gpir_block, block, &comp->block_list, list) { list_for_each_entry(gpir_block, block, &comp->block_list, list) {
list_for_each_entry(gpir_node, node, &block->node_list, list) { list_for_each_entry(gpir_node, node, &block->node_list, list) {
printf("%03d: %d/%d %s ", index++, node->index, node->value_reg, printf("%03d: %d/%d %s ", index++, node->index, node->value_reg,
@@ -159,7 +110,7 @@ static void regalloc_print_result(gpir_compiler *comp)
} }
} }
bool gpir_value_regalloc_prog(gpir_compiler *comp) bool gpir_regalloc_prog(gpir_compiler *comp)
{ {
list_for_each_entry(gpir_block, block, &comp->block_list, list) { list_for_each_entry(gpir_block, block, &comp->block_list, list) {
regalloc_block(block); regalloc_block(block);

File diff suppressed because it is too large Load Diff

View File

@@ -29,8 +29,7 @@ files_lima = files(
'ir/gp/codegen.h', 'ir/gp/codegen.h',
'ir/gp/codegen.c', 'ir/gp/codegen.c',
'ir/gp/reduce_scheduler.c', 'ir/gp/reduce_scheduler.c',
'ir/gp/value_regalloc.c', 'ir/gp/regalloc.c',
'ir/gp/physical_regalloc.c',
'ir/gp/disasm.c', 'ir/gp/disasm.c',
'ir/pp/ppir.h', 'ir/pp/ppir.h',