v3d: Switch to using the new SFU instructions on V3D 4.x.

These instructions let us write directly to the phys regfile, instead of
just R4.  That lets us avoid moving out of R4 to avoid conflicting with
other SFU results, and to avoid conflicting with thread switches.

There is still an extra instruction of latency, which is not represented
in the scheduler at the moment.  If you use the result before it's ready,
the QPU will just stall, unlike the magic R4 mode where you'd read the
previous value.  That means that the following shader-db results aren't
quite representative (since we now cause some stalls instead of emitting
nops), but they're impressive enough that I'm happy with the change.

total instructions in shared programs: 95669 -> 91275 (-4.59%)
instructions in affected programs:     82590 -> 78196 (-5.32%)
This commit is contained in:
Eric Anholt
2018-07-20 13:06:50 -07:00
parent 58c1d3860f
commit e7ae900341
8 changed files with 118 additions and 24 deletions

View File

@@ -867,6 +867,33 @@ vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \
a, b)); \
}
#define VIR_SFU(name) \
static inline struct qreg \
vir_##name(struct v3d_compile *c, struct qreg a) \
{ \
if (c->devinfo->ver >= 41) { \
return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name, \
c->undef, \
a, c->undef)); \
} else { \
vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
} \
} \
static inline struct qinst * \
vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \
struct qreg a) \
{ \
if (c->devinfo->ver >= 41) { \
return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \
dest, \
a, c->undef)); \
} else { \
vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
return vir_FMOV_dest(c, dest, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
} \
}
#define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name)
#define VIR_M_ALU2(name) VIR_ALU2(name, vir_mul_inst, V3D_QPU_M_##name)
#define VIR_A_ALU1(name) VIR_ALU1(name, vir_add_inst, V3D_QPU_A_##name)
@@ -948,6 +975,13 @@ VIR_M_NODST_2(MULTOP)
VIR_M_ALU1(MOV)
VIR_M_ALU1(FMOV)
VIR_SFU(RECIP)
VIR_SFU(RSQRT)
VIR_SFU(EXP)
VIR_SFU(LOG)
VIR_SFU(SIN)
VIR_SFU(RSQRT2)
static inline struct qinst *
vir_MOV_cond(struct v3d_compile *c, enum v3d_qpu_cond cond,
struct qreg dest, struct qreg src)