lima/ppir: implement discard and discard_if

This commit also adds codegen for branch since we need it
for discard_if.

Reviewed-by: Qiang Yu <yuq825@gmail.com>
Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
This commit is contained in:
Vasily Khoruzhick
2019-05-10 19:17:40 -07:00
parent 7a7be61398
commit af0de6b91c
7 changed files with 253 additions and 10 deletions

View File

@@ -507,6 +507,42 @@ static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
code[i] = util_float_to_half(constant->value[i].f);
}
static void ppir_codegen_encode_discard(ppir_node *node, void *code)
{
ppir_codegen_field_branch *b = code;
assert(node->op = ppir_op_discard);
b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
}
static void ppir_codegen_encode_branch(ppir_node *node, void *code)
{
ppir_codegen_field_branch *b = code;
ppir_branch_node *branch;
ppir_instr *target_instr;
if (node->op == ppir_op_discard) {
ppir_codegen_encode_discard(node, code);
return;
}
assert(node->op = ppir_op_branch);
branch = ppir_node_to_branch(node);
b->branch.unknown_0 = 0x0;
b->branch.arg0_source = ppir_target_get_src_reg_index(&branch->src[0]);
b->branch.arg1_source = ppir_target_get_src_reg_index(&branch->src[1]);
b->branch.cond_gt = branch->cond_gt;
b->branch.cond_eq = branch->cond_eq;
b->branch.cond_lt = branch->cond_lt;
b->branch.unknown_1 = 0x0;
b->branch.unknown_2 = 0x3;
target_instr = list_first_entry(&branch->target->instr_list, ppir_instr, list);
b->branch.target = target_instr->offset - node->instr->offset;
}
typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
static const ppir_codegen_instr_slot_encode_func
@@ -520,6 +556,7 @@ ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
[PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
[PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
[PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
[PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
};
static const int ppir_codegen_field_size[] = {
@@ -634,7 +671,7 @@ static void ppir_codegen_print_prog(ppir_compiler *comp)
printf("========ppir codegen========\n");
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
printf("%03d: ", instr->index);
printf("%03d (@%6ld): ", instr->index, instr->offset);
int n = prog[0] & 0x1f;
for (int i = 0; i < n; i++) {
if (i && i % 6 == 0)
@@ -655,6 +692,7 @@ bool ppir_codegen_prog(ppir_compiler *comp)
int size = 0;
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
instr->offset = size;
size += get_instr_encode_size(instr);
}
}

View File

@@ -135,6 +135,20 @@ static bool ppir_instr_insert_const(ppir_const *dst, const ppir_const *src,
return true;
}
static void ppir_update_src_pipeline(ppir_pipeline pipeline, ppir_src *src,
ppir_dest *dest, uint8_t *swizzle)
{
if (ppir_node_target_equal(src, dest)) {
src->type = ppir_target_pipeline;
src->pipeline = pipeline;
if (swizzle) {
for (int k = 0; k < 4; k++)
src->swizzle[k] = swizzle[src->swizzle[k]];
}
}
}
/* make alu node src reflact the pipeline reg */
static void ppir_instr_update_src_pipeline(ppir_instr *instr, ppir_pipeline pipeline,
ppir_dest *dest, uint8_t *swizzle)
@@ -146,15 +160,16 @@ static void ppir_instr_update_src_pipeline(ppir_instr *instr, ppir_pipeline pipe
ppir_alu_node *alu = ppir_node_to_alu(instr->slots[i]);
for (int j = 0; j < alu->num_src; j++) {
ppir_src *src = alu->src + j;
if (ppir_node_target_equal(src, dest)) {
src->type = ppir_target_pipeline;
src->pipeline = pipeline;
ppir_update_src_pipeline(pipeline, src, dest, swizzle);
}
}
if (swizzle) {
for (int k = 0; k < 4; k++)
src->swizzle[k] = swizzle[src->swizzle[k]];
}
}
ppir_node *branch_node = instr->slots[PPIR_INSTR_SLOT_BRANCH];
if (branch_node && (branch_node->type == ppir_node_type_branch)) {
ppir_branch_node *branch = ppir_node_to_branch(branch_node);
for (int j = 0; j < 2; j++) {
ppir_src *src = branch->src + j;
ppir_update_src_pipeline(pipeline, src, dest, swizzle);
}
}
}
@@ -234,6 +249,7 @@ static struct {
[PPIR_INSTR_SLOT_ALU_SCL_ADD] = { 4, "sadd" },
[PPIR_INSTR_SLOT_ALU_COMBINE] = { 4, "comb" },
[PPIR_INSTR_SLOT_STORE_TEMP] = { 4, "stor" },
[PPIR_INSTR_SLOT_BRANCH] = { 4, "brch" },
};
void ppir_instr_print_list(ppir_compiler *comp)

View File

@@ -400,6 +400,40 @@ static bool ppir_lower_trunc(ppir_block *block, ppir_node *node)
return true;
}
static bool ppir_lower_branch(ppir_block *block, ppir_node *node)
{
ppir_branch_node *branch = ppir_node_to_branch(node);
ppir_const_node *zero = ppir_node_create(block, ppir_op_const, -1, 0);
if (!zero)
return false;
list_addtail(&zero->node.list, &node->list);
zero->constant.value[0].f = 0;
zero->constant.num = 1;
zero->dest.type = ppir_target_ssa;
zero->dest.ssa.num_components = 1;
zero->dest.ssa.live_in = INT_MAX;
zero->dest.ssa.live_out = 0;
zero->dest.write_mask = 0x01;
/* For now we're just comparing branch condition with 0,
* in future we should look whether it's possible to move
* comparision node into branch itself and use current
* way as a fallback for complex conditions.
*/
branch->src[1].type = ppir_target_ssa;
branch->src[1].ssa = &zero->dest.ssa;
branch->cond_gt = true;
branch->cond_lt = true;
ppir_node_add_dep(&branch->node, &zero->node);
return true;
}
static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
[ppir_op_const] = ppir_lower_const,
[ppir_op_dot2] = ppir_lower_dot,
@@ -417,6 +451,7 @@ static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
[ppir_op_load_texture] = ppir_lower_texture,
[ppir_op_select] = ppir_lower_select,
[ppir_op_trunc] = ppir_lower_trunc,
[ppir_op_branch] = ppir_lower_branch,
};
bool ppir_lower_prog(ppir_compiler *comp)

View File

@@ -204,6 +204,57 @@ static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
return &node->node;
}
static ppir_block *ppir_block_create(ppir_compiler *comp);
static bool ppir_emit_discard_block(ppir_compiler *comp)
{
ppir_block *block = ppir_block_create(comp);
ppir_discard_node *discard;
if (!block)
return false;
comp->discard_block = block;
block->comp = comp;
discard = ppir_node_create(block, ppir_op_discard, -1, 0);
if (discard)
list_addtail(&discard->node.list, &block->node_list);
else
return false;
return true;
}
static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
{
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
ppir_node *node;
ppir_compiler *comp = block->comp;
ppir_branch_node *branch;
if (!comp->discard_block && !ppir_emit_discard_block(comp))
return NULL;
node = ppir_node_create(block, ppir_op_branch, -1, 0);
if (!node)
return NULL;
branch = ppir_node_to_branch(node);
/* second src and condition will be updated during lowering */
ppir_node_add_src(block->comp, node, &branch->src[0],
&instr->src[0], u_bit_consecutive(0, instr->num_components));
branch->target = comp->discard_block;
return node;
}
static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
{
ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
return node;
}
static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
{
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
@@ -264,6 +315,12 @@ static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
return &snode->node;
case nir_intrinsic_discard:
return ppir_emit_discard(block, ni);
case nir_intrinsic_discard_if:
return ppir_emit_discard_if(block, ni);
default:
ppir_error("unsupported nir_intrinsic_instr %s\n",
nir_intrinsic_infos[instr->intrinsic].name);
@@ -452,6 +509,46 @@ static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigne
return comp;
}
static void ppir_add_ordering_deps(ppir_compiler *comp)
{
/* Some intrinsics do not have explicit dependencies and thus depend
* on instructions order. Consider discard_if and store_ouput as
* example. If we don't add fake dependency of discard_if to store_output
* scheduler may put store_output first and since store_output terminates
* shader on Utgard PP, rest of it will never be executed.
* Add fake dependencies for discard/branch/store to preserve
* instruction order.
*
* TODO: scheduler should schedule discard_if as early as possible otherwise
* we may end up with suboptimal code for cases like this:
*
* s3 = s1 < s2
* discard_if s3
* s4 = s1 + s2
* store s4
*
* In this case store depends on discard_if and s4, but since dependencies can
* be scheduled in any order it can result in code like this:
*
* instr1: s3 = s1 < s3
* instr2: s4 = s1 + s2
* instr3: discard_if s3
* instr4: store s4
*/
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
ppir_node *prev_node = NULL;
list_for_each_entry(ppir_node, node, &block->node_list, list) {
if (node->type == ppir_node_type_discard ||
node->type == ppir_node_type_store ||
node->type == ppir_node_type_branch) {
if (prev_node)
ppir_node_add_dep(node, prev_node);
prev_node = node;
}
}
}
}
bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
struct ra_regs *ra)
{
@@ -477,6 +574,13 @@ bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
if (!ppir_emit_cf_list(comp, &func->body))
goto err_out0;
/* If we have discard block add it to the very end */
if (comp->discard_block)
list_addtail(&comp->discard_block->list, &comp->block_list);
ppir_add_ordering_deps(comp);
ppir_node_print_prog(comp);
if (!ppir_lower_prog(comp))

View File

@@ -281,6 +281,20 @@ const ppir_op_info ppir_op_infos[] = {
PPIR_INSTR_SLOT_STORE_TEMP, PPIR_INSTR_SLOT_END
},
},
[ppir_op_discard] = {
.name = "discard",
.type = ppir_node_type_discard,
.slots = (int []) {
PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
},
},
[ppir_op_branch] = {
.name = "branch",
.type = ppir_node_type_branch,
.slots = (int []) {
PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
},
},
};
void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask)
@@ -292,6 +306,8 @@ void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask)
[ppir_node_type_load] = sizeof(ppir_load_node),
[ppir_node_type_store] = sizeof(ppir_store_node),
[ppir_node_type_load_texture] = sizeof(ppir_load_texture_node),
[ppir_node_type_discard] = sizeof(ppir_discard_node),
[ppir_node_type_branch] = sizeof(ppir_branch_node),
};
ppir_node_type type = ppir_op_infos[op].type;

View File

@@ -93,7 +93,8 @@ static bool insert_to_each_succ_instr(ppir_block *block, ppir_node *node)
ppir_node_foreach_succ_safe(node, dep) {
ppir_node *succ = dep->succ;
assert(succ->type == ppir_node_type_alu);
assert(succ->type == ppir_node_type_alu ||
succ->type == ppir_node_type_branch);
if (!ppir_instr_insert_node(succ->instr, node)) {
/* create a move node to insert for failed node */
@@ -323,6 +324,15 @@ static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *node)
node = move;
break;
}
case ppir_node_type_discard:
if (!create_new_instr(block, node))
return false;
node->instr->is_end = true;
break;
case ppir_node_type_branch:
if (!create_new_instr(block, node))
return false;
break;
default:
return false;
}

View File

@@ -108,6 +108,9 @@ typedef enum {
ppir_op_const,
ppir_op_discard,
ppir_op_branch,
ppir_op_num,
} ppir_op;
@@ -117,6 +120,8 @@ typedef enum {
ppir_node_type_load,
ppir_node_type_store,
ppir_node_type_load_texture,
ppir_node_type_discard,
ppir_node_type_branch,
} ppir_node_type;
typedef struct {
@@ -254,6 +259,10 @@ typedef struct {
int sampler_dim;
} ppir_load_texture_node;
typedef struct {
ppir_node node;
} ppir_discard_node;
enum ppir_instr_slot {
PPIR_INSTR_SLOT_VARYING,
PPIR_INSTR_SLOT_TEXLD,
@@ -264,6 +273,7 @@ enum ppir_instr_slot {
PPIR_INSTR_SLOT_ALU_SCL_ADD,
PPIR_INSTR_SLOT_ALU_COMBINE,
PPIR_INSTR_SLOT_STORE_TEMP,
PPIR_INSTR_SLOT_BRANCH,
PPIR_INSTR_SLOT_NUM,
PPIR_INSTR_SLOT_END,
PPIR_INSTR_SLOT_ALU_START = PPIR_INSTR_SLOT_ALU_VEC_MUL,
@@ -287,6 +297,7 @@ typedef struct ppir_instr {
int est; /* earliest start time */
int parent_index;
bool scheduled;
off_t offset;
} ppir_instr;
typedef struct ppir_block {
@@ -300,6 +311,15 @@ typedef struct ppir_block {
int sched_instr_base;
} ppir_block;
typedef struct {
ppir_node node;
ppir_src src[2];
bool cond_gt;
bool cond_eq;
bool cond_lt;
ppir_block *target;
} ppir_branch_node;
struct ra_regs;
struct lima_fs_shader_state;
@@ -322,6 +342,8 @@ typedef struct ppir_compiler {
/* for regalloc spilling debug */
int force_spilling;
ppir_block *discard_block;
} ppir_compiler;
void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask);
@@ -377,6 +399,8 @@ static inline ppir_node *ppir_node_first_pred(ppir_node *node)
#define ppir_node_to_load(node) ((ppir_load_node *)(node))
#define ppir_node_to_store(node) ((ppir_store_node *)(node))
#define ppir_node_to_load_texture(node) ((ppir_load_texture_node *)(node))
#define ppir_node_to_discard(node) ((ppir_discard_node *)(node))
#define ppir_node_to_branch(node) ((ppir_branch_node *)(node))
static inline ppir_dest *ppir_node_get_dest(ppir_node *node)
{