lima/gp: Support exp2 and log2

log2 is tricky because there cannot be a move between complex1 and
postlog2. We can't guarantee that scheduling complex1 will succeed when
we schedule postlog2, so we try to schedule complex1 and if it fails we
back out by rewriting the postlog2 as a move and introducing a new
postlog2 so that we can try again later.

Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Acked-by: Qiang Yu <yuq825@gmail.com>
This commit is contained in:
Connor Abbott
2019-04-21 21:46:46 +02:00
parent c2f48d8f32
commit 11a49f289d
5 changed files with 147 additions and 30 deletions

View File

@@ -376,6 +376,8 @@ static void gpir_codegen_complex_slot(gpir_codegen_instr *code, gpir_instr *inst
case gpir_op_mov:
case gpir_op_rcp_impl:
case gpir_op_rsqrt_impl:
case gpir_op_exp2_impl:
case gpir_op_log2_impl:
{
gpir_alu_node *alu = gpir_node_to_alu(node);
code->complex_src = gpir_get_alu_input(node, alu->children[0]);
@@ -395,6 +397,12 @@ static void gpir_codegen_complex_slot(gpir_codegen_instr *code, gpir_instr *inst
case gpir_op_rsqrt_impl:
code->complex_op = gpir_codegen_complex_op_rsqrt;
break;
case gpir_op_exp2_impl:
code->complex_op = gpir_codegen_complex_op_exp2;
break;
case gpir_op_log2_impl:
code->complex_op = gpir_codegen_complex_op_log2;
break;
default:
assert(0);
}
@@ -410,14 +418,19 @@ static void gpir_codegen_pass_slot(gpir_codegen_instr *code, gpir_instr *instr)
return;
}
gpir_alu_node *alu = gpir_node_to_alu(node);
code->pass_src = gpir_get_alu_input(node, alu->children[0]);
switch (node->op) {
case gpir_op_mov:
{
gpir_alu_node *alu = gpir_node_to_alu(node);
code->pass_src = gpir_get_alu_input(node, alu->children[0]);
code->pass_op = gpir_codegen_pass_op_pass;
break;
}
case gpir_op_preexp2:
code->pass_op = gpir_codegen_pass_op_preexp2;
break;
case gpir_op_postlog2:
code->pass_op = gpir_codegen_pass_op_postlog2;
break;
default:
assert(0);
}

View File

@@ -177,6 +177,19 @@ static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
gpir_alu_node *alu = gpir_node_to_alu(node);
gpir_node *child = alu->children[0];
if (node->op == gpir_op_exp2) {
gpir_alu_node *preexp2 = gpir_node_create(block, gpir_op_preexp2);
if (unlikely(!preexp2))
return false;
preexp2->children[0] = child;
preexp2->num_child = 1;
gpir_node_add_dep(&preexp2->node, child, GPIR_DEP_INPUT);
list_addtail(&preexp2->node.list, &node->list);
child = &preexp2->node;
}
gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2);
if (unlikely(!complex2))
return false;
@@ -194,6 +207,12 @@ static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
case gpir_op_rsqrt:
impl_op = gpir_op_rsqrt_impl;
break;
case gpir_op_exp2:
impl_op = gpir_op_exp2_impl;
break;
case gpir_op_log2:
impl_op = gpir_op_log2_impl;
break;
default:
assert(0);
}
@@ -207,14 +226,33 @@ static bool gpir_lower_complex(gpir_block *block, gpir_node *node)
gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT);
list_addtail(&impl->node.list, &node->list);
/* change node to complex1 node */
node->op = gpir_op_complex1;
alu->children[0] = &impl->node;
alu->children[1] = &complex2->node;
alu->children[2] = child;
alu->num_child = 3;
gpir_node_add_dep(node, &impl->node, GPIR_DEP_INPUT);
gpir_node_add_dep(node, &complex2->node, GPIR_DEP_INPUT);
gpir_alu_node *complex1 = gpir_node_create(block, gpir_op_complex1);
complex1->children[0] = &impl->node;
complex1->children[1] = &complex2->node;
complex1->children[2] = child;
complex1->num_child = 3;
gpir_node_add_dep(&complex1->node, child, GPIR_DEP_INPUT);
gpir_node_add_dep(&complex1->node, &impl->node, GPIR_DEP_INPUT);
gpir_node_add_dep(&complex1->node, &complex2->node, GPIR_DEP_INPUT);
list_addtail(&complex1->node.list, &node->list);
gpir_node *result = &complex1->node;
if (node->op == gpir_op_log2) {
gpir_alu_node *postlog2 = gpir_node_create(block, gpir_op_postlog2);
if (unlikely(!postlog2))
return false;
postlog2->children[0] = result;
postlog2->num_child = 1;
gpir_node_add_dep(&postlog2->node, result, GPIR_DEP_INPUT);
list_addtail(&postlog2->node.list, &node->list);
result = &postlog2->node;
}
gpir_node_replace_succ(result, node);
gpir_node_delete(node);
return true;
}
@@ -384,6 +422,8 @@ static bool (*gpir_post_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node
[gpir_op_neg] = gpir_lower_neg,
[gpir_op_rcp] = gpir_lower_complex,
[gpir_op_rsqrt] = gpir_lower_complex,
[gpir_op_exp2] = gpir_lower_complex,
[gpir_op_log2] = gpir_lower_complex,
[gpir_op_eq] = gpir_lower_eq_ne,
[gpir_op_ne] = gpir_lower_eq_ne,
[gpir_op_abs] = gpir_lower_abs,

View File

@@ -118,6 +118,8 @@ static int nir_to_gpir_opcodes[nir_num_opcodes] = {
[nir_op_fmax] = gpir_op_max,
[nir_op_frcp] = gpir_op_rcp,
[nir_op_frsq] = gpir_op_rsqrt,
[nir_op_fexp2] = gpir_op_exp2,
[nir_op_flog2] = gpir_op_log2,
[nir_op_slt] = gpir_op_lt,
[nir_op_sge] = gpir_op_ge,
[nir_op_fcsel] = gpir_op_select,

View File

@@ -141,15 +141,25 @@ const gpir_op_info gpir_op_infos[] = {
},
[gpir_op_preexp2] = {
.name = "preexp2",
.slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
.spillless = true,
.schedule_first = true,
},
[gpir_op_postlog2] = {
.name = "postlog2",
.slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },
},
[gpir_op_exp2_impl] = {
.name = "exp2_impl",
.slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
.spillless = true,
.schedule_first = true,
},
[gpir_op_log2_impl] = {
.name = "log2_impl",
.slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },
.spillless = true,
.schedule_first = true,
},
[gpir_op_rcp_impl] = {
.name = "rcp_impl",

View File

@@ -627,23 +627,26 @@ static bool schedule_try_place_node(sched_ctx *ctx, gpir_node *node,
return true;
}
static gpir_node *create_move(sched_ctx *ctx, gpir_node *node)
/* Create a new node with "node" as the child, replace all uses of "node" with
* this new node, and replace "node" with it in the ready list.
*/
static gpir_node *create_replacement(sched_ctx *ctx, gpir_node *node,
gpir_op op)
{
gpir_alu_node *move = gpir_node_create(node->block, gpir_op_mov);
if (unlikely(!move))
gpir_alu_node *new_node = gpir_node_create(node->block, op);
if (unlikely(!new_node))
return NULL;
move->children[0] = node;
move->num_child = 1;
new_node->children[0] = node;
new_node->num_child = 1;
move->node.sched.instr = NULL;
move->node.sched.pos = -1;
move->node.sched.dist = node->sched.dist;
move->node.sched.max_node = node->sched.max_node;
move->node.sched.next_max_node = node->sched.next_max_node;
move->node.sched.complex_allowed = node->sched.complex_allowed;
gpir_debug("create move %d for %d\n", move->node.index, node->index);
new_node->node.sched.instr = NULL;
new_node->node.sched.pos = -1;
new_node->node.sched.dist = node->sched.dist;
new_node->node.sched.max_node = node->sched.max_node;
new_node->node.sched.next_max_node = node->sched.next_max_node;
new_node->node.sched.complex_allowed = node->sched.complex_allowed;
ctx->ready_list_slots--;
list_del(&node->list);
@@ -651,12 +654,26 @@ static gpir_node *create_move(sched_ctx *ctx, gpir_node *node)
node->sched.next_max_node = false;
node->sched.ready = false;
node->sched.inserted = false;
gpir_node_replace_succ(&move->node, node);
gpir_node_add_dep(&move->node, node, GPIR_DEP_INPUT);
schedule_insert_ready_list(ctx, &move->node);
return &move->node;
gpir_node_replace_succ(&new_node->node, node);
gpir_node_add_dep(&new_node->node, node, GPIR_DEP_INPUT);
schedule_insert_ready_list(ctx, &new_node->node);
return &new_node->node;
}
static gpir_node *create_move(sched_ctx *ctx, gpir_node *node)
{
gpir_node *move = create_replacement(ctx, node, gpir_op_mov);
gpir_debug("create move %d for %d\n", move->index, node->index);
return move;
}
static gpir_node *create_postlog2(sched_ctx *ctx, gpir_node *node)
{
assert(node->op == gpir_op_complex1);
gpir_node *postlog2 = create_replacement(ctx, node, gpir_op_postlog2);
gpir_debug("create postlog2 %d for %d\n", postlog2->index, node->index);
return postlog2;
}
/* Once we schedule the successor, would the predecessor be fully ready? */
static bool pred_almost_ready(gpir_dep *dep)
@@ -936,7 +953,22 @@ static bool used_by_store(gpir_node *node, gpir_instr *instr)
return false;
}
static gpir_node *consuming_postlog2(gpir_node *node)
{
if (node->op != gpir_op_complex1)
return NULL;
gpir_node_foreach_succ(node, dep) {
if (dep->type != GPIR_DEP_INPUT)
continue;
if (dep->succ->op == gpir_op_postlog2)
return dep->succ;
else
return NULL;
}
return NULL;
}
static bool try_spill_node(sched_ctx *ctx, gpir_node *node)
{
@@ -961,6 +993,16 @@ static bool try_spill_node(sched_ctx *ctx, gpir_node *node)
if (available == 0)
return false;
/* Don't spill complex1 if it's used postlog2, turn the postlog2 into a
* move, replace the complex1 with postlog2 and spill that instead. The
* store needs a move anyways so the postlog2 is usually free.
*/
gpir_node *postlog2 = consuming_postlog2(node);
if (postlog2) {
postlog2->op = gpir_op_mov;
node = create_postlog2(ctx, node);
}
/* TODO: use a better heuristic for choosing an available register? */
int physreg = ffsll(available) - 1;
@@ -1305,7 +1347,17 @@ static bool sched_move(sched_ctx *ctx)
{
list_for_each_entry(gpir_node, node, &ctx->ready_list, list) {
if (node->sched.max_node) {
place_move(ctx, node);
/* For complex1 that is consumed by a postlog2, we cannot allow any
* moves in between. Convert the postlog2 to a move and insert a new
* postlog2, and try to schedule it again in try_node().
*/
gpir_node *postlog2 = consuming_postlog2(node);
if (postlog2) {
postlog2->op = gpir_op_mov;
create_postlog2(ctx, node);
} else {
place_move(ctx, node);
}
return true;
}
}