freedreno/ir3/cp: extract valid_flags

We'll also need this in the postsched-cp pass.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5280>
This commit is contained in:
Rob Clark
2020-05-14 09:31:32 -07:00
committed by Marge Bot
parent 5f1f8f7b17
commit a60d48a863
3 changed files with 178 additions and 174 deletions

View File

@@ -1285,3 +1285,173 @@ ir3_fixup_src_type(struct ir3_instruction *instr)
break;
}
}
static unsigned
cp_flags(unsigned flags)
{
/* only considering these flags (at least for now): */
flags &= (IR3_REG_CONST | IR3_REG_IMMED |
IR3_REG_FNEG | IR3_REG_FABS |
IR3_REG_SNEG | IR3_REG_SABS |
IR3_REG_BNOT | IR3_REG_RELATIV);
return flags;
}
bool
ir3_valid_flags(struct ir3_instruction *instr, unsigned n,
unsigned flags)
{
struct ir3_compiler *compiler = instr->block->shader->compiler;
unsigned valid_flags;
if ((flags & IR3_REG_HIGH) &&
(opc_cat(instr->opc) > 1) &&
(compiler->gpu_id >= 600))
return false;
flags = cp_flags(flags);
/* If destination is indirect, then source cannot be.. at least
* I don't think so..
*/
if ((instr->regs[0]->flags & IR3_REG_RELATIV) &&
(flags & IR3_REG_RELATIV))
return false;
if (flags & IR3_REG_RELATIV) {
/* TODO need to test on earlier gens.. pretty sure the earlier
* problem was just that we didn't check that the src was from
* same block (since we can't propagate address register values
* across blocks currently)
*/
if (compiler->gpu_id < 600)
return false;
/* NOTE in the special try_swap_mad_two_srcs() case we can be
* called on a src that has already had an indirect load folded
* in, in which case ssa() returns NULL
*/
if (instr->regs[n+1]->flags & IR3_REG_SSA) {
struct ir3_instruction *src = ssa(instr->regs[n+1]);
if (src->address->block != instr->block)
return false;
}
}
switch (opc_cat(instr->opc)) {
case 1:
valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV;
if (flags & ~valid_flags)
return false;
break;
case 2:
valid_flags = ir3_cat2_absneg(instr->opc) |
IR3_REG_CONST | IR3_REG_RELATIV;
if (ir3_cat2_int(instr->opc))
valid_flags |= IR3_REG_IMMED;
if (flags & ~valid_flags)
return false;
if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
unsigned m = (n ^ 1) + 1;
/* cannot deal w/ const in both srcs:
* (note that some cat2 actually only have a single src)
*/
if (m < instr->regs_count) {
struct ir3_register *reg = instr->regs[m];
if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
return false;
if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
return false;
}
}
break;
case 3:
valid_flags = ir3_cat3_absneg(instr->opc) |
IR3_REG_CONST | IR3_REG_RELATIV;
if (flags & ~valid_flags)
return false;
if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) {
/* cannot deal w/ const/relativ in 2nd src: */
if (n == 1)
return false;
}
break;
case 4:
/* seems like blob compiler avoids const as src.. */
/* TODO double check if this is still the case on a4xx */
if (flags & (IR3_REG_CONST | IR3_REG_IMMED))
return false;
if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
return false;
break;
case 5:
/* no flags allowed */
if (flags)
return false;
break;
case 6:
valid_flags = IR3_REG_IMMED;
if (flags & ~valid_flags)
return false;
if (flags & IR3_REG_IMMED) {
/* doesn't seem like we can have immediate src for store
* instructions:
*
* TODO this restriction could also apply to load instructions,
* but for load instructions this arg is the address (and not
* really sure any good way to test a hard-coded immed addr src)
*/
if (is_store(instr) && (n == 1))
return false;
if ((instr->opc == OPC_LDL) && (n == 0))
return false;
if ((instr->opc == OPC_STL) && (n != 2))
return false;
if (instr->opc == OPC_STLW && n == 0)
return false;
if (instr->opc == OPC_LDLW && n == 0)
return false;
/* disallow immediates in anything but the SSBO slot argument for
* cat6 instructions:
*/
if (is_atomic(instr->opc) && (n != 0))
return false;
if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
return false;
if (instr->opc == OPC_STG && (instr->flags & IR3_INSTR_G) && (n != 2))
return false;
/* as with atomics, these cat6 instrs can only have an immediate
* for SSBO/IBO slot argument
*/
switch (instr->opc) {
case OPC_LDIB:
case OPC_LDC:
case OPC_RESINFO:
if (n != 0)
return false;
break;
default:
break;
}
}
break;
}
return true;
}

View File

@@ -620,6 +620,8 @@ void ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps);
void ir3_set_dst_type(struct ir3_instruction *instr, bool half);
void ir3_fixup_src_type(struct ir3_instruction *instr);
bool ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags);
#include "util/set.h"
#define foreach_ssa_use(__use, __instr) \
for (struct ir3_instruction *__use = (void *)~0; \

View File

@@ -100,174 +100,6 @@ static bool is_eligible_mov(struct ir3_instruction *instr,
return false;
}
static unsigned cp_flags(unsigned flags)
{
/* only considering these flags (at least for now): */
flags &= (IR3_REG_CONST | IR3_REG_IMMED |
IR3_REG_FNEG | IR3_REG_FABS |
IR3_REG_SNEG | IR3_REG_SABS |
IR3_REG_BNOT | IR3_REG_RELATIV);
return flags;
}
static bool valid_flags(struct ir3_instruction *instr, unsigned n,
unsigned flags)
{
struct ir3_compiler *compiler = instr->block->shader->compiler;
unsigned valid_flags;
if ((flags & IR3_REG_HIGH) &&
(opc_cat(instr->opc) > 1) &&
(compiler->gpu_id >= 600))
return false;
flags = cp_flags(flags);
/* If destination is indirect, then source cannot be.. at least
* I don't think so..
*/
if ((instr->regs[0]->flags & IR3_REG_RELATIV) &&
(flags & IR3_REG_RELATIV))
return false;
if (flags & IR3_REG_RELATIV) {
/* TODO need to test on earlier gens.. pretty sure the earlier
* problem was just that we didn't check that the src was from
* same block (since we can't propagate address register values
* across blocks currently)
*/
if (compiler->gpu_id < 600)
return false;
/* NOTE in the special try_swap_mad_two_srcs() case we can be
* called on a src that has already had an indirect load folded
* in, in which case ssa() returns NULL
*/
if (instr->regs[n+1]->flags & IR3_REG_SSA) {
struct ir3_instruction *src = ssa(instr->regs[n+1]);
if (src->address->block != instr->block)
return false;
}
}
switch (opc_cat(instr->opc)) {
case 1:
valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV;
if (flags & ~valid_flags)
return false;
break;
case 2:
valid_flags = ir3_cat2_absneg(instr->opc) |
IR3_REG_CONST | IR3_REG_RELATIV;
if (ir3_cat2_int(instr->opc))
valid_flags |= IR3_REG_IMMED;
if (flags & ~valid_flags)
return false;
if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
unsigned m = (n ^ 1) + 1;
/* cannot deal w/ const in both srcs:
* (note that some cat2 actually only have a single src)
*/
if (m < instr->regs_count) {
struct ir3_register *reg = instr->regs[m];
if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
return false;
if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
return false;
}
}
break;
case 3:
valid_flags = ir3_cat3_absneg(instr->opc) |
IR3_REG_CONST | IR3_REG_RELATIV;
if (flags & ~valid_flags)
return false;
if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) {
/* cannot deal w/ const/relativ in 2nd src: */
if (n == 1)
return false;
}
break;
case 4:
/* seems like blob compiler avoids const as src.. */
/* TODO double check if this is still the case on a4xx */
if (flags & (IR3_REG_CONST | IR3_REG_IMMED))
return false;
if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
return false;
break;
case 5:
/* no flags allowed */
if (flags)
return false;
break;
case 6:
valid_flags = IR3_REG_IMMED;
if (flags & ~valid_flags)
return false;
if (flags & IR3_REG_IMMED) {
/* doesn't seem like we can have immediate src for store
* instructions:
*
* TODO this restriction could also apply to load instructions,
* but for load instructions this arg is the address (and not
* really sure any good way to test a hard-coded immed addr src)
*/
if (is_store(instr) && (n == 1))
return false;
if ((instr->opc == OPC_LDL) && (n == 0))
return false;
if ((instr->opc == OPC_STL) && (n != 2))
return false;
if (instr->opc == OPC_STLW && n == 0)
return false;
if (instr->opc == OPC_LDLW && n == 0)
return false;
/* disallow immediates in anything but the SSBO slot argument for
* cat6 instructions:
*/
if (is_atomic(instr->opc) && (n != 0))
return false;
if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
return false;
if (instr->opc == OPC_STG && (instr->flags & IR3_INSTR_G) && (n != 2))
return false;
/* as with atomics, these cat6 instrs can only have an immediate
* for SSBO/IBO slot argument
*/
switch (instr->opc) {
case OPC_LDIB:
case OPC_LDC:
case OPC_RESINFO:
if (n != 0)
return false;
break;
default:
break;
}
}
break;
}
return true;
}
/* propagate register flags from src to dst.. negates need special
* handling to cancel each other out.
*/
@@ -326,7 +158,7 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
new_flags &= ~IR3_REG_IMMED;
new_flags |= IR3_REG_CONST;
if (!valid_flags(instr, n, new_flags))
if (!ir3_valid_flags(instr, n, new_flags))
return false;
unsigned swiz, idx, i;
@@ -455,9 +287,9 @@ try_swap_mad_two_srcs(struct ir3_instruction *instr, unsigned new_flags)
bool valid_swap =
/* can we propagate mov if we move 2nd src to first? */
valid_flags(instr, 0, new_flags) &&
ir3_valid_flags(instr, 0, new_flags) &&
/* and does first src fit in second slot? */
valid_flags(instr, 1, instr->regs[1 + 1]->flags);
ir3_valid_flags(instr, 1, instr->regs[1 + 1]->flags);
if (!valid_swap) {
/* put things back the way they were: */
@@ -487,7 +319,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
combine_flags(&new_flags, src);
if (valid_flags(instr, n, new_flags)) {
if (ir3_valid_flags(instr, n, new_flags)) {
if (new_flags & IR3_REG_ARRAY) {
debug_assert(!(reg->flags & IR3_REG_ARRAY));
reg->array = src_reg->array;
@@ -512,7 +344,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
combine_flags(&new_flags, src);
if (!valid_flags(instr, n, new_flags)) {
if (!ir3_valid_flags(instr, n, new_flags)) {
/* See if lowering an immediate to const would help. */
if (lower_immed(ctx, instr, n, src_reg, new_flags))
return true;
@@ -613,7 +445,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
iim_val = ~iim_val;
/* other than category 1 (mov) we can only encode up to 10 bits: */
if (valid_flags(instr, n, new_flags) &&
if (ir3_valid_flags(instr, n, new_flags) &&
((instr->opc == OPC_MOV) ||
!((iim_val & ~0x3ff) && (-iim_val & ~0x3ff)))) {
new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);