panfrost: Clean-up one-argument passing quirk

Most Midgard instructions take two-arguments logically; there are always
two arguments at the assembly level. For the few instructions that take
only a single argument, generally the second argument slot is unused,
with a zero inline constant occupying the space. fmov/imov are the
exception, where the first argument is filled with r24 and the logical
argument is in the second slot.

Previously, these constraints were handled by a delicate, buggy series
of hacks. This commit removes these hacks. Instead, we look at the
logical number of arguments (from NIR), switching between two argument
and one-argument-one-zero style. We then introduce a quirk for the
flipped style, which applies to fmov/imov.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
Alyssa Rosenzweig
2019-02-07 03:39:25 +00:00
parent 49397a3c84
commit 97dcad8d3e
2 changed files with 107 additions and 109 deletions

View File

@@ -52,6 +52,14 @@
#define OP_CHANNEL_COUNT(c) ((c - 1) << 0) #define OP_CHANNEL_COUNT(c) ((c - 1) << 0)
#define GET_CHANNEL_COUNT(c) ((c & (0x3 << 0)) ? ((c & (0x3 << 0)) + 1) : 0) #define GET_CHANNEL_COUNT(c) ((c & (0x3 << 0)) ? ((c & (0x3 << 0)) + 1) : 0)
/* For instructions that take a single argument, normally the first argument
* slot is used for the argument and the second slot is a dummy #0 constant.
* However, there are exceptions: instructions like fmov store their argument
* in the _second_ slot and store a dummy r24 in the first slot, designated by
* QUIRK_FLIPPED_R24 */
#define QUIRK_FLIPPED_R24 (1 << 2)
/* Vector-independant shorthands for the above; these numbers are arbitrary and /* Vector-independant shorthands for the above; these numbers are arbitrary and
* not from the ISA. Convert to the above with unit_enum_to_midgard */ * not from the ISA. Convert to the above with unit_enum_to_midgard */
@@ -162,24 +170,24 @@ midgard_is_integer_op(int op)
#define UNIT_SMUL ALU_ENAB_SCAL_MUL #define UNIT_SMUL ALU_ENAB_SCAL_MUL
#define UNIT_VLUT ALU_ENAB_VEC_LUT #define UNIT_VLUT ALU_ENAB_VEC_LUT
/* Shorthands for usual combinations of units. LUT is intentionally excluded /* Shorthands for usual combinations of units */
* since it's nutty. */
#define UNITS_MUL (UNIT_VMUL | UNIT_SMUL) #define UNITS_MUL (UNIT_VMUL | UNIT_SMUL)
#define UNITS_ADD (UNIT_VADD | UNIT_SADD) #define UNITS_ADD (UNIT_VADD | UNIT_SADD)
#define UNITS_ALL (UNITS_MUL | UNITS_ADD) #define UNITS_MOST (UNITS_MUL | UNITS_ADD)
#define UNITS_ALL (UNITS_MOST | UNIT_VLUT)
#define UNITS_SCALAR (UNIT_SADD | UNIT_SMUL) #define UNITS_SCALAR (UNIT_SADD | UNIT_SMUL)
#define UNITS_VECTOR (UNIT_VMUL | UNIT_VADD) #define UNITS_VECTOR (UNIT_VMUL | UNIT_VADD)
#define UNITS_ANY_VECTOR (UNITS_VECTOR | UNIT_VLUT) #define UNITS_ANY_VECTOR (UNITS_VECTOR | UNIT_VLUT)
static int alu_opcode_props[256] = { static unsigned alu_opcode_props[256] = {
[midgard_alu_op_fadd] = UNITS_ADD, [midgard_alu_op_fadd] = UNITS_ADD,
[midgard_alu_op_fmul] = UNITS_MUL | UNIT_VLUT, [midgard_alu_op_fmul] = UNITS_MUL | UNIT_VLUT,
[midgard_alu_op_fmin] = UNITS_MUL | UNITS_ADD, [midgard_alu_op_fmin] = UNITS_MUL | UNITS_ADD,
[midgard_alu_op_fmax] = UNITS_MUL | UNITS_ADD, [midgard_alu_op_fmax] = UNITS_MUL | UNITS_ADD,
[midgard_alu_op_imin] = UNITS_ALL, [midgard_alu_op_imin] = UNITS_MOST,
[midgard_alu_op_imax] = UNITS_ALL, [midgard_alu_op_imax] = UNITS_MOST,
[midgard_alu_op_fmov] = UNITS_ALL | UNIT_VLUT, [midgard_alu_op_fmov] = UNITS_ALL | QUIRK_FLIPPED_R24,
[midgard_alu_op_ffloor] = UNITS_ADD, [midgard_alu_op_ffloor] = UNITS_ADD,
[midgard_alu_op_fceil] = UNITS_ADD, [midgard_alu_op_fceil] = UNITS_ADD,
@@ -188,19 +196,20 @@ static int alu_opcode_props[256] = {
[midgard_alu_op_fdot3] = UNIT_VMUL | OP_CHANNEL_COUNT(3), [midgard_alu_op_fdot3] = UNIT_VMUL | OP_CHANNEL_COUNT(3),
[midgard_alu_op_fdot4] = UNIT_VMUL | OP_CHANNEL_COUNT(4), [midgard_alu_op_fdot4] = UNIT_VMUL | OP_CHANNEL_COUNT(4),
[midgard_alu_op_iadd] = UNITS_ADD, /* Incredibly, iadd can run on vmul, etc */
[midgard_alu_op_isub] = UNITS_ADD, [midgard_alu_op_iadd] = UNITS_MOST,
[midgard_alu_op_imul] = UNITS_ALL, [midgard_alu_op_isub] = UNITS_MOST,
[midgard_alu_op_imov] = UNITS_ALL, [midgard_alu_op_imul] = UNITS_MOST,
[midgard_alu_op_imov] = UNITS_MOST | QUIRK_FLIPPED_R24,
/* For vector comparisons, use ball etc */ /* For vector comparisons, use ball etc */
[midgard_alu_op_feq] = UNITS_ALL, [midgard_alu_op_feq] = UNITS_MOST,
[midgard_alu_op_fne] = UNITS_ALL, [midgard_alu_op_fne] = UNITS_MOST,
[midgard_alu_op_flt] = UNIT_SADD, [midgard_alu_op_flt] = UNIT_SADD,
[midgard_alu_op_ieq] = UNITS_ALL, [midgard_alu_op_ieq] = UNITS_MOST,
[midgard_alu_op_ine] = UNITS_ALL, [midgard_alu_op_ine] = UNITS_MOST,
[midgard_alu_op_ilt] = UNITS_ALL, [midgard_alu_op_ilt] = UNITS_MOST,
[midgard_alu_op_ile] = UNITS_ALL, [midgard_alu_op_ile] = UNITS_MOST,
[midgard_alu_op_icsel] = UNITS_ADD, [midgard_alu_op_icsel] = UNITS_ADD,
[midgard_alu_op_fcsel] = UNITS_ADD | UNIT_SMUL, [midgard_alu_op_fcsel] = UNITS_ADD | UNIT_SMUL,
@@ -223,14 +232,14 @@ static int alu_opcode_props[256] = {
[midgard_alu_op_iand] = UNITS_ADD, /* XXX: Test case where it's right on smul but not sadd */ [midgard_alu_op_iand] = UNITS_ADD, /* XXX: Test case where it's right on smul but not sadd */
[midgard_alu_op_ior] = UNITS_ADD, [midgard_alu_op_ior] = UNITS_ADD,
[midgard_alu_op_ixor] = UNITS_ADD, [midgard_alu_op_ixor] = UNITS_ADD,
[midgard_alu_op_inot] = UNITS_ALL, [midgard_alu_op_inot] = UNITS_MOST,
[midgard_alu_op_ishl] = UNITS_ADD, [midgard_alu_op_ishl] = UNITS_ADD,
[midgard_alu_op_iasr] = UNITS_ADD, [midgard_alu_op_iasr] = UNITS_ADD,
[midgard_alu_op_ilsr] = UNITS_ADD, [midgard_alu_op_ilsr] = UNITS_ADD,
[midgard_alu_op_ilsr] = UNITS_ADD, [midgard_alu_op_ilsr] = UNITS_ADD,
[midgard_alu_op_fball_eq] = UNITS_ALL, [midgard_alu_op_fball_eq] = UNITS_MOST,
[midgard_alu_op_fbany_neq] = UNITS_ALL, [midgard_alu_op_fbany_neq] = UNITS_MOST,
[midgard_alu_op_iball_eq] = UNITS_ALL, [midgard_alu_op_iball_eq] = UNITS_MOST,
[midgard_alu_op_ibany_neq] = UNITS_ALL [midgard_alu_op_ibany_neq] = UNITS_MOST
}; };

View File

@@ -888,18 +888,8 @@ emit_condition(compiler_context *ctx, nir_src *src, bool for_branch)
emit_mir_instruction(ctx, ins); emit_mir_instruction(ctx, ins);
} }
/* Components: Number/style of arguments: #define ALU_CASE(nir, _op) \
* 3: One-argument op with r24 (i2f, f2i)
* 2: Standard two argument op (fadd, fmul)
* 1: Flipped one-argument op (fmov, imov)
* 0: Standard one-argument op (frcp)
* NIR: NIR instruction op.
* Op: Midgard instruction op.
*/
#define ALU_CASE(_components, nir, _op) \
case nir_op_##nir: \ case nir_op_##nir: \
components = _components; \
op = midgard_alu_op_##_op; \ op = midgard_alu_op_##_op; \
break; break;
@@ -910,6 +900,7 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
unsigned dest = nir_dest_index(&instr->dest.dest); unsigned dest = nir_dest_index(&instr->dest.dest);
unsigned nr_components = is_ssa ? instr->dest.dest.ssa.num_components : instr->dest.dest.reg.reg->num_components; unsigned nr_components = is_ssa ? instr->dest.dest.ssa.num_components : instr->dest.dest.reg.reg->num_components;
unsigned nr_inputs = nir_op_infos[instr->op].num_inputs;
/* Most Midgard ALU ops have a 1:1 correspondance to NIR ops; these are /* Most Midgard ALU ops have a 1:1 correspondance to NIR ops; these are
* supported. A few do not and are commented for now. Also, there are a * supported. A few do not and are commented for now. Also, there are a
@@ -918,70 +909,66 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
* convention of the Midgard instruction; actual packing is done in * convention of the Midgard instruction; actual packing is done in
* emit_alu below */ * emit_alu below */
unsigned op, components; unsigned op;
switch (instr->op) { switch (instr->op) {
ALU_CASE(2, fadd, fadd); ALU_CASE(fadd, fadd);
ALU_CASE(2, fmul, fmul); ALU_CASE(fmul, fmul);
ALU_CASE(2, fmin, fmin); ALU_CASE(fmin, fmin);
ALU_CASE(2, fmax, fmax); ALU_CASE(fmax, fmax);
ALU_CASE(2, imin, imin); ALU_CASE(imin, imin);
ALU_CASE(2, imax, imax); ALU_CASE(imax, imax);
ALU_CASE(1, fmov, fmov); ALU_CASE(fmov, fmov);
ALU_CASE(0, ffloor, ffloor); ALU_CASE(ffloor, ffloor);
ALU_CASE(0, fceil, fceil); ALU_CASE(fceil, fceil);
ALU_CASE(2, fdot3, fdot3); ALU_CASE(fdot3, fdot3);
//ALU_CASE(2, fdot3r); ALU_CASE(fdot4, fdot4);
ALU_CASE(2, fdot4, fdot4); ALU_CASE(iadd, iadd);
//ALU_CASE(2, freduce); ALU_CASE(isub, isub);
ALU_CASE(2, iadd, iadd); ALU_CASE(imul, imul);
ALU_CASE(2, isub, isub);
ALU_CASE(2, imul, imul);
/* XXX: Use fmov, not imov, since imov was causing major /* XXX: Use fmov, not imov, since imov was causing major
* issues with texture precision? XXX research */ * issues with texture precision? XXX research */
ALU_CASE(1, imov, fmov); ALU_CASE(imov, fmov);
ALU_CASE(2, feq, feq); ALU_CASE(feq, feq);
ALU_CASE(2, fne, fne); ALU_CASE(fne, fne);
ALU_CASE(2, flt, flt); ALU_CASE(flt, flt);
ALU_CASE(2, ieq, ieq); ALU_CASE(ieq, ieq);
ALU_CASE(2, ine, ine); ALU_CASE(ine, ine);
ALU_CASE(2, ilt, ilt); ALU_CASE(ilt, ilt);
//ALU_CASE(2, icsel, icsel);
ALU_CASE(0, frcp, frcp);
ALU_CASE(0, frsq, frsqrt);
ALU_CASE(0, fsqrt, fsqrt);
ALU_CASE(0, fexp2, fexp2);
ALU_CASE(0, flog2, flog2);
ALU_CASE(3, f2i32, f2i); ALU_CASE(frcp, frcp);
ALU_CASE(3, f2u32, f2u); ALU_CASE(frsq, frsqrt);
ALU_CASE(3, i2f32, i2f); ALU_CASE(fsqrt, fsqrt);
ALU_CASE(3, u2f32, u2f); ALU_CASE(fexp2, fexp2);
ALU_CASE(flog2, flog2);
ALU_CASE(0, fsin, fsin); ALU_CASE(f2i32, f2i);
ALU_CASE(0, fcos, fcos); ALU_CASE(f2u32, f2u);
ALU_CASE(i2f32, i2f);
ALU_CASE(u2f32, u2f);
ALU_CASE(2, iand, iand); ALU_CASE(fsin, fsin);
ALU_CASE(2, ior, ior); ALU_CASE(fcos, fcos);
ALU_CASE(2, ixor, ixor);
ALU_CASE(0, inot, inot);
ALU_CASE(2, ishl, ishl);
ALU_CASE(2, ishr, iasr);
ALU_CASE(2, ushr, ilsr);
//ALU_CASE(2, ilsr, ilsr);
ALU_CASE(2, ball_fequal4, fball_eq); ALU_CASE(iand, iand);
ALU_CASE(2, bany_fnequal4, fbany_neq); ALU_CASE(ior, ior);
ALU_CASE(2, ball_iequal4, iball_eq); ALU_CASE(ixor, ixor);
ALU_CASE(2, bany_inequal4, ibany_neq); ALU_CASE(inot, inot);
ALU_CASE(ishl, ishl);
ALU_CASE(ishr, iasr);
ALU_CASE(ushr, ilsr);
ALU_CASE(ball_fequal4, fball_eq);
ALU_CASE(bany_fnequal4, fbany_neq);
ALU_CASE(ball_iequal4, iball_eq);
ALU_CASE(bany_inequal4, ibany_neq);
/* For greater-or-equal, we use less-or-equal and flip the /* For greater-or-equal, we use less-or-equal and flip the
* arguments */ * arguments */
case nir_op_ige: { case nir_op_ige: {
components = 2;
op = midgard_alu_op_ile; op = midgard_alu_op_ile;
/* Swap via temporary */ /* Swap via temporary */
@@ -993,9 +980,11 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
} }
case nir_op_bcsel: { case nir_op_bcsel: {
components = 2;
op = midgard_alu_op_fcsel; op = midgard_alu_op_fcsel;
/* csel works as a two-arg in Midgard, since the condition is hardcoded in r31.w */
nr_inputs = 2;
emit_condition(ctx, &instr->src[0].src, false); emit_condition(ctx, &instr->src[0].src, false);
/* The condition is the first argument; move the other /* The condition is the first argument; move the other
@@ -1016,7 +1005,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
case nir_op_b2f32: { case nir_op_b2f32: {
op = midgard_alu_op_iand; op = midgard_alu_op_iand;
components = 0;
break; break;
} }
@@ -1026,7 +1014,9 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
return; return;
} }
int _unit = alu_opcode_props[op]; /* Fetch unit, quirks, etc information */
unsigned opcode_props = alu_opcode_props[op];
bool quirk_flipped_r24 = opcode_props & QUIRK_FLIPPED_R24;
/* Initialise fields common between scalar/vector instructions */ /* Initialise fields common between scalar/vector instructions */
midgard_outmod outmod = instr->dest.saturate ? midgard_outmod_sat : midgard_outmod_none; midgard_outmod outmod = instr->dest.saturate ? midgard_outmod_sat : midgard_outmod_none;
@@ -1036,7 +1026,7 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
* needs it, or else we may segfault. */ * needs it, or else we may segfault. */
unsigned src0 = nir_alu_src_index(&instr->src[0]); unsigned src0 = nir_alu_src_index(&instr->src[0]);
unsigned src1 = components == 2 ? nir_alu_src_index(&instr->src[1]) : SSA_UNUSED_0; unsigned src1 = nr_inputs == 2 ? nir_alu_src_index(&instr->src[1]) : SSA_UNUSED_0;
/* Rather than use the instruction generation helpers, we do it /* Rather than use the instruction generation helpers, we do it
* ourselves here to avoid the mess */ * ourselves here to avoid the mess */
@@ -1044,23 +1034,22 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
midgard_instruction ins = { midgard_instruction ins = {
.type = TAG_ALU_4, .type = TAG_ALU_4,
.ssa_args = { .ssa_args = {
.src0 = components == 3 || components == 2 || components == 0 ? src0 : SSA_UNUSED_1, .src0 = quirk_flipped_r24 ? SSA_UNUSED_1 : src0,
.src1 = components == 2 ? src1 : components == 1 ? src0 : components == 0 ? SSA_UNUSED_0 : SSA_UNUSED_1, .src1 = quirk_flipped_r24 ? src0 : src1,
.dest = dest, .dest = dest,
.inline_constant = components == 0 .inline_constant = (nr_inputs == 1) && !quirk_flipped_r24
} }
}; };
nir_alu_src *nirmod0 = NULL; nir_alu_src *nirmods[2] = { NULL };
nir_alu_src *nirmod1 = NULL;
if (components == 2) { if (nr_inputs == 2) {
nirmod0 = &instr->src[0]; nirmods[0] = &instr->src[0];
nirmod1 = &instr->src[1]; nirmods[1] = &instr->src[1];
} else if (components == 1) { } else if (nr_inputs == 1) {
nirmod1 = &instr->src[0]; nirmods[quirk_flipped_r24] = &instr->src[0];
} else if (components == 0) { } else {
nirmod0 = &instr->src[0]; assert(0);
} }
midgard_vector_alu alu = { midgard_vector_alu alu = {
@@ -1072,8 +1061,8 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
/* Writemask only valid for non-SSA NIR */ /* Writemask only valid for non-SSA NIR */
.mask = expand_writemask((1 << nr_components) - 1), .mask = expand_writemask((1 << nr_components) - 1),
.src1 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmod0)), .src1 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmods[0])),
.src2 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmod1)), .src2 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmods[1])),
}; };
/* Apply writemask if non-SSA, keeping in mind that we can't write to components that don't exist */ /* Apply writemask if non-SSA, keeping in mind that we can't write to components that don't exist */
@@ -1097,23 +1086,21 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
ins.constants[0] = 1.0; ins.constants[0] = 1.0;
} }
if (_unit == UNIT_VLUT) { if ((opcode_props & UNITS_ALL) == UNIT_VLUT) {
/* To avoid duplicating the LUTs (we think?), LUT instructions can only /* To avoid duplicating the lookup tables (probably), true LUT
* operate as if they were scalars. Lower them here by changing the * instructions can only operate as if they were scalars. Lower
* component. */ * them here by changing the component. */
assert(components == 0);
uint8_t original_swizzle[4]; uint8_t original_swizzle[4];
memcpy(original_swizzle, nirmod0->swizzle, sizeof(nirmod0->swizzle)); memcpy(original_swizzle, nirmods[0]->swizzle, sizeof(nirmods[0]->swizzle));
for (int i = 0; i < nr_components; ++i) { for (int i = 0; i < nr_components; ++i) {
ins.alu.mask = (0x3) << (2 * i); /* Mask the associated component */ ins.alu.mask = (0x3) << (2 * i); /* Mask the associated component */
for (int j = 0; j < 4; ++j) for (int j = 0; j < 4; ++j)
nirmod0->swizzle[j] = original_swizzle[i]; /* Pull from the correct component */ nirmods[0]->swizzle[j] = original_swizzle[i]; /* Pull from the correct component */
ins.alu.src1 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmod0)); ins.alu.src1 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmods[0]));
emit_mir_instruction(ctx, ins); emit_mir_instruction(ctx, ins);
} }
} else { } else {
@@ -1121,6 +1108,8 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
} }
} }
#undef ALU_CASE
static void static void
emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
{ {