lima/ppir: implement discard and discard_if
This commit also adds codegen for branch since we need it for discard_if. Reviewed-by: Qiang Yu <yuq825@gmail.com> Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
This commit is contained in:
@@ -507,6 +507,42 @@ static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
|
||||
code[i] = util_float_to_half(constant->value[i].f);
|
||||
}
|
||||
|
||||
static void ppir_codegen_encode_discard(ppir_node *node, void *code)
|
||||
{
|
||||
ppir_codegen_field_branch *b = code;
|
||||
assert(node->op = ppir_op_discard);
|
||||
|
||||
b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
|
||||
b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
|
||||
b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
|
||||
}
|
||||
|
||||
static void ppir_codegen_encode_branch(ppir_node *node, void *code)
|
||||
{
|
||||
ppir_codegen_field_branch *b = code;
|
||||
ppir_branch_node *branch;
|
||||
ppir_instr *target_instr;
|
||||
if (node->op == ppir_op_discard) {
|
||||
ppir_codegen_encode_discard(node, code);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(node->op = ppir_op_branch);
|
||||
branch = ppir_node_to_branch(node);
|
||||
|
||||
b->branch.unknown_0 = 0x0;
|
||||
b->branch.arg0_source = ppir_target_get_src_reg_index(&branch->src[0]);
|
||||
b->branch.arg1_source = ppir_target_get_src_reg_index(&branch->src[1]);
|
||||
b->branch.cond_gt = branch->cond_gt;
|
||||
b->branch.cond_eq = branch->cond_eq;
|
||||
b->branch.cond_lt = branch->cond_lt;
|
||||
b->branch.unknown_1 = 0x0;
|
||||
b->branch.unknown_2 = 0x3;
|
||||
|
||||
target_instr = list_first_entry(&branch->target->instr_list, ppir_instr, list);
|
||||
b->branch.target = target_instr->offset - node->instr->offset;
|
||||
}
|
||||
|
||||
typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
|
||||
|
||||
static const ppir_codegen_instr_slot_encode_func
|
||||
@@ -520,6 +556,7 @@ ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
|
||||
[PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
|
||||
[PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
|
||||
[PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
|
||||
[PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
|
||||
};
|
||||
|
||||
static const int ppir_codegen_field_size[] = {
|
||||
@@ -634,7 +671,7 @@ static void ppir_codegen_print_prog(ppir_compiler *comp)
|
||||
printf("========ppir codegen========\n");
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
|
||||
printf("%03d: ", instr->index);
|
||||
printf("%03d (@%6ld): ", instr->index, instr->offset);
|
||||
int n = prog[0] & 0x1f;
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (i && i % 6 == 0)
|
||||
@@ -655,6 +692,7 @@ bool ppir_codegen_prog(ppir_compiler *comp)
|
||||
int size = 0;
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
|
||||
instr->offset = size;
|
||||
size += get_instr_encode_size(instr);
|
||||
}
|
||||
}
|
||||
|
@@ -135,6 +135,20 @@ static bool ppir_instr_insert_const(ppir_const *dst, const ppir_const *src,
|
||||
return true;
|
||||
}
|
||||
|
||||
static void ppir_update_src_pipeline(ppir_pipeline pipeline, ppir_src *src,
|
||||
ppir_dest *dest, uint8_t *swizzle)
|
||||
{
|
||||
if (ppir_node_target_equal(src, dest)) {
|
||||
src->type = ppir_target_pipeline;
|
||||
src->pipeline = pipeline;
|
||||
|
||||
if (swizzle) {
|
||||
for (int k = 0; k < 4; k++)
|
||||
src->swizzle[k] = swizzle[src->swizzle[k]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* make alu node src reflact the pipeline reg */
|
||||
static void ppir_instr_update_src_pipeline(ppir_instr *instr, ppir_pipeline pipeline,
|
||||
ppir_dest *dest, uint8_t *swizzle)
|
||||
@@ -146,15 +160,16 @@ static void ppir_instr_update_src_pipeline(ppir_instr *instr, ppir_pipeline pipe
|
||||
ppir_alu_node *alu = ppir_node_to_alu(instr->slots[i]);
|
||||
for (int j = 0; j < alu->num_src; j++) {
|
||||
ppir_src *src = alu->src + j;
|
||||
if (ppir_node_target_equal(src, dest)) {
|
||||
src->type = ppir_target_pipeline;
|
||||
src->pipeline = pipeline;
|
||||
ppir_update_src_pipeline(pipeline, src, dest, swizzle);
|
||||
}
|
||||
}
|
||||
|
||||
if (swizzle) {
|
||||
for (int k = 0; k < 4; k++)
|
||||
src->swizzle[k] = swizzle[src->swizzle[k]];
|
||||
}
|
||||
}
|
||||
ppir_node *branch_node = instr->slots[PPIR_INSTR_SLOT_BRANCH];
|
||||
if (branch_node && (branch_node->type == ppir_node_type_branch)) {
|
||||
ppir_branch_node *branch = ppir_node_to_branch(branch_node);
|
||||
for (int j = 0; j < 2; j++) {
|
||||
ppir_src *src = branch->src + j;
|
||||
ppir_update_src_pipeline(pipeline, src, dest, swizzle);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -234,6 +249,7 @@ static struct {
|
||||
[PPIR_INSTR_SLOT_ALU_SCL_ADD] = { 4, "sadd" },
|
||||
[PPIR_INSTR_SLOT_ALU_COMBINE] = { 4, "comb" },
|
||||
[PPIR_INSTR_SLOT_STORE_TEMP] = { 4, "stor" },
|
||||
[PPIR_INSTR_SLOT_BRANCH] = { 4, "brch" },
|
||||
};
|
||||
|
||||
void ppir_instr_print_list(ppir_compiler *comp)
|
||||
|
@@ -400,6 +400,40 @@ static bool ppir_lower_trunc(ppir_block *block, ppir_node *node)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ppir_lower_branch(ppir_block *block, ppir_node *node)
|
||||
{
|
||||
ppir_branch_node *branch = ppir_node_to_branch(node);
|
||||
ppir_const_node *zero = ppir_node_create(block, ppir_op_const, -1, 0);
|
||||
|
||||
if (!zero)
|
||||
return false;
|
||||
|
||||
list_addtail(&zero->node.list, &node->list);
|
||||
|
||||
zero->constant.value[0].f = 0;
|
||||
zero->constant.num = 1;
|
||||
zero->dest.type = ppir_target_ssa;
|
||||
zero->dest.ssa.num_components = 1;
|
||||
zero->dest.ssa.live_in = INT_MAX;
|
||||
zero->dest.ssa.live_out = 0;
|
||||
zero->dest.write_mask = 0x01;
|
||||
|
||||
/* For now we're just comparing branch condition with 0,
|
||||
* in future we should look whether it's possible to move
|
||||
* comparision node into branch itself and use current
|
||||
* way as a fallback for complex conditions.
|
||||
*/
|
||||
branch->src[1].type = ppir_target_ssa;
|
||||
branch->src[1].ssa = &zero->dest.ssa;
|
||||
|
||||
branch->cond_gt = true;
|
||||
branch->cond_lt = true;
|
||||
|
||||
ppir_node_add_dep(&branch->node, &zero->node);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
|
||||
[ppir_op_const] = ppir_lower_const,
|
||||
[ppir_op_dot2] = ppir_lower_dot,
|
||||
@@ -417,6 +451,7 @@ static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
|
||||
[ppir_op_load_texture] = ppir_lower_texture,
|
||||
[ppir_op_select] = ppir_lower_select,
|
||||
[ppir_op_trunc] = ppir_lower_trunc,
|
||||
[ppir_op_branch] = ppir_lower_branch,
|
||||
};
|
||||
|
||||
bool ppir_lower_prog(ppir_compiler *comp)
|
||||
|
@@ -204,6 +204,57 @@ static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
|
||||
return &node->node;
|
||||
}
|
||||
|
||||
static ppir_block *ppir_block_create(ppir_compiler *comp);
|
||||
|
||||
static bool ppir_emit_discard_block(ppir_compiler *comp)
|
||||
{
|
||||
ppir_block *block = ppir_block_create(comp);
|
||||
ppir_discard_node *discard;
|
||||
if (!block)
|
||||
return false;
|
||||
|
||||
comp->discard_block = block;
|
||||
block->comp = comp;
|
||||
|
||||
discard = ppir_node_create(block, ppir_op_discard, -1, 0);
|
||||
if (discard)
|
||||
list_addtail(&discard->node.list, &block->node_list);
|
||||
else
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
|
||||
{
|
||||
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
|
||||
ppir_node *node;
|
||||
ppir_compiler *comp = block->comp;
|
||||
ppir_branch_node *branch;
|
||||
|
||||
if (!comp->discard_block && !ppir_emit_discard_block(comp))
|
||||
return NULL;
|
||||
|
||||
node = ppir_node_create(block, ppir_op_branch, -1, 0);
|
||||
if (!node)
|
||||
return NULL;
|
||||
branch = ppir_node_to_branch(node);
|
||||
|
||||
/* second src and condition will be updated during lowering */
|
||||
ppir_node_add_src(block->comp, node, &branch->src[0],
|
||||
&instr->src[0], u_bit_consecutive(0, instr->num_components));
|
||||
branch->target = comp->discard_block;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
|
||||
{
|
||||
ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
|
||||
{
|
||||
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
|
||||
@@ -264,6 +315,12 @@ static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
|
||||
|
||||
return &snode->node;
|
||||
|
||||
case nir_intrinsic_discard:
|
||||
return ppir_emit_discard(block, ni);
|
||||
|
||||
case nir_intrinsic_discard_if:
|
||||
return ppir_emit_discard_if(block, ni);
|
||||
|
||||
default:
|
||||
ppir_error("unsupported nir_intrinsic_instr %s\n",
|
||||
nir_intrinsic_infos[instr->intrinsic].name);
|
||||
@@ -452,6 +509,46 @@ static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigne
|
||||
return comp;
|
||||
}
|
||||
|
||||
static void ppir_add_ordering_deps(ppir_compiler *comp)
|
||||
{
|
||||
/* Some intrinsics do not have explicit dependencies and thus depend
|
||||
* on instructions order. Consider discard_if and store_ouput as
|
||||
* example. If we don't add fake dependency of discard_if to store_output
|
||||
* scheduler may put store_output first and since store_output terminates
|
||||
* shader on Utgard PP, rest of it will never be executed.
|
||||
* Add fake dependencies for discard/branch/store to preserve
|
||||
* instruction order.
|
||||
*
|
||||
* TODO: scheduler should schedule discard_if as early as possible otherwise
|
||||
* we may end up with suboptimal code for cases like this:
|
||||
*
|
||||
* s3 = s1 < s2
|
||||
* discard_if s3
|
||||
* s4 = s1 + s2
|
||||
* store s4
|
||||
*
|
||||
* In this case store depends on discard_if and s4, but since dependencies can
|
||||
* be scheduled in any order it can result in code like this:
|
||||
*
|
||||
* instr1: s3 = s1 < s3
|
||||
* instr2: s4 = s1 + s2
|
||||
* instr3: discard_if s3
|
||||
* instr4: store s4
|
||||
*/
|
||||
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
|
||||
ppir_node *prev_node = NULL;
|
||||
list_for_each_entry(ppir_node, node, &block->node_list, list) {
|
||||
if (node->type == ppir_node_type_discard ||
|
||||
node->type == ppir_node_type_store ||
|
||||
node->type == ppir_node_type_branch) {
|
||||
if (prev_node)
|
||||
ppir_node_add_dep(node, prev_node);
|
||||
prev_node = node;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
|
||||
struct ra_regs *ra)
|
||||
{
|
||||
@@ -477,6 +574,13 @@ bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
|
||||
|
||||
if (!ppir_emit_cf_list(comp, &func->body))
|
||||
goto err_out0;
|
||||
|
||||
/* If we have discard block add it to the very end */
|
||||
if (comp->discard_block)
|
||||
list_addtail(&comp->discard_block->list, &comp->block_list);
|
||||
|
||||
ppir_add_ordering_deps(comp);
|
||||
|
||||
ppir_node_print_prog(comp);
|
||||
|
||||
if (!ppir_lower_prog(comp))
|
||||
|
@@ -281,6 +281,20 @@ const ppir_op_info ppir_op_infos[] = {
|
||||
PPIR_INSTR_SLOT_STORE_TEMP, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_discard] = {
|
||||
.name = "discard",
|
||||
.type = ppir_node_type_discard,
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
[ppir_op_branch] = {
|
||||
.name = "branch",
|
||||
.type = ppir_node_type_branch,
|
||||
.slots = (int []) {
|
||||
PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask)
|
||||
@@ -292,6 +306,8 @@ void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask)
|
||||
[ppir_node_type_load] = sizeof(ppir_load_node),
|
||||
[ppir_node_type_store] = sizeof(ppir_store_node),
|
||||
[ppir_node_type_load_texture] = sizeof(ppir_load_texture_node),
|
||||
[ppir_node_type_discard] = sizeof(ppir_discard_node),
|
||||
[ppir_node_type_branch] = sizeof(ppir_branch_node),
|
||||
};
|
||||
|
||||
ppir_node_type type = ppir_op_infos[op].type;
|
||||
|
@@ -93,7 +93,8 @@ static bool insert_to_each_succ_instr(ppir_block *block, ppir_node *node)
|
||||
|
||||
ppir_node_foreach_succ_safe(node, dep) {
|
||||
ppir_node *succ = dep->succ;
|
||||
assert(succ->type == ppir_node_type_alu);
|
||||
assert(succ->type == ppir_node_type_alu ||
|
||||
succ->type == ppir_node_type_branch);
|
||||
|
||||
if (!ppir_instr_insert_node(succ->instr, node)) {
|
||||
/* create a move node to insert for failed node */
|
||||
@@ -323,6 +324,15 @@ static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *node)
|
||||
node = move;
|
||||
break;
|
||||
}
|
||||
case ppir_node_type_discard:
|
||||
if (!create_new_instr(block, node))
|
||||
return false;
|
||||
node->instr->is_end = true;
|
||||
break;
|
||||
case ppir_node_type_branch:
|
||||
if (!create_new_instr(block, node))
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
@@ -108,6 +108,9 @@ typedef enum {
|
||||
|
||||
ppir_op_const,
|
||||
|
||||
ppir_op_discard,
|
||||
ppir_op_branch,
|
||||
|
||||
ppir_op_num,
|
||||
} ppir_op;
|
||||
|
||||
@@ -117,6 +120,8 @@ typedef enum {
|
||||
ppir_node_type_load,
|
||||
ppir_node_type_store,
|
||||
ppir_node_type_load_texture,
|
||||
ppir_node_type_discard,
|
||||
ppir_node_type_branch,
|
||||
} ppir_node_type;
|
||||
|
||||
typedef struct {
|
||||
@@ -254,6 +259,10 @@ typedef struct {
|
||||
int sampler_dim;
|
||||
} ppir_load_texture_node;
|
||||
|
||||
typedef struct {
|
||||
ppir_node node;
|
||||
} ppir_discard_node;
|
||||
|
||||
enum ppir_instr_slot {
|
||||
PPIR_INSTR_SLOT_VARYING,
|
||||
PPIR_INSTR_SLOT_TEXLD,
|
||||
@@ -264,6 +273,7 @@ enum ppir_instr_slot {
|
||||
PPIR_INSTR_SLOT_ALU_SCL_ADD,
|
||||
PPIR_INSTR_SLOT_ALU_COMBINE,
|
||||
PPIR_INSTR_SLOT_STORE_TEMP,
|
||||
PPIR_INSTR_SLOT_BRANCH,
|
||||
PPIR_INSTR_SLOT_NUM,
|
||||
PPIR_INSTR_SLOT_END,
|
||||
PPIR_INSTR_SLOT_ALU_START = PPIR_INSTR_SLOT_ALU_VEC_MUL,
|
||||
@@ -287,6 +297,7 @@ typedef struct ppir_instr {
|
||||
int est; /* earliest start time */
|
||||
int parent_index;
|
||||
bool scheduled;
|
||||
off_t offset;
|
||||
} ppir_instr;
|
||||
|
||||
typedef struct ppir_block {
|
||||
@@ -300,6 +311,15 @@ typedef struct ppir_block {
|
||||
int sched_instr_base;
|
||||
} ppir_block;
|
||||
|
||||
typedef struct {
|
||||
ppir_node node;
|
||||
ppir_src src[2];
|
||||
bool cond_gt;
|
||||
bool cond_eq;
|
||||
bool cond_lt;
|
||||
ppir_block *target;
|
||||
} ppir_branch_node;
|
||||
|
||||
struct ra_regs;
|
||||
struct lima_fs_shader_state;
|
||||
|
||||
@@ -322,6 +342,8 @@ typedef struct ppir_compiler {
|
||||
|
||||
/* for regalloc spilling debug */
|
||||
int force_spilling;
|
||||
|
||||
ppir_block *discard_block;
|
||||
} ppir_compiler;
|
||||
|
||||
void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask);
|
||||
@@ -377,6 +399,8 @@ static inline ppir_node *ppir_node_first_pred(ppir_node *node)
|
||||
#define ppir_node_to_load(node) ((ppir_load_node *)(node))
|
||||
#define ppir_node_to_store(node) ((ppir_store_node *)(node))
|
||||
#define ppir_node_to_load_texture(node) ((ppir_load_texture_node *)(node))
|
||||
#define ppir_node_to_discard(node) ((ppir_discard_node *)(node))
|
||||
#define ppir_node_to_branch(node) ((ppir_branch_node *)(node))
|
||||
|
||||
static inline ppir_dest *ppir_node_get_dest(ppir_node *node)
|
||||
{
|
||||
|
Reference in New Issue
Block a user