broadcom/vc5: Use the new LDVPM/STVPM opcodes on V3D 4.1.

Now, instead of a magic write register for VPM stores we have an
instruction to do them (which means no packing of other ALU ops into it),
with the ability to reorder the VPM stores due to the offset being baked
into the instruction.

VPM loads also gain the ability to be reordered by packing the row into
the A argument.  They also no longer write to the r3 accumulator, and
instead must be stored to a physical register.
This commit is contained in:
Eric Anholt
2018-01-04 15:35:28 -08:00
parent 55f8a01aca
commit 22a02f3e34
9 changed files with 197 additions and 51 deletions

View File

@@ -29,6 +29,7 @@
#include "util/hash_table.h" #include "util/hash_table.h"
#include "compiler/nir/nir.h" #include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h" #include "compiler/nir/nir_builder.h"
#include "common/v3d_device_info.h"
#include "v3d_compiler.h" #include "v3d_compiler.h"
/* We don't do any address packing. */ /* We don't do any address packing. */
@@ -1224,7 +1225,21 @@ emit_frag_end(struct v3d_compile *c)
} }
static void static void
emit_scaled_viewport_write(struct v3d_compile *c, struct qreg rcp_w) vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t *vpm_index)
{
if (c->devinfo->ver >= 40) {
vir_STVPMV(c, vir_uniform_ui(c, *vpm_index), val);
*vpm_index = *vpm_index + 1;
} else {
vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
}
c->num_vpm_writes++;
}
static void
emit_scaled_viewport_write(struct v3d_compile *c, struct qreg rcp_w,
uint32_t *vpm_index)
{ {
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
struct qreg coord = c->outputs[c->output_position_index + i]; struct qreg coord = c->outputs[c->output_position_index + i];
@@ -1232,34 +1247,32 @@ emit_scaled_viewport_write(struct v3d_compile *c, struct qreg rcp_w)
vir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, vir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i,
0)); 0));
coord = vir_FMUL(c, coord, rcp_w); coord = vir_FMUL(c, coord, rcp_w);
vir_FTOIN_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), vir_VPM_WRITE(c, vir_FTOIN(c, coord), vpm_index);
coord);
} }
} }
static void static void
emit_zs_write(struct v3d_compile *c, struct qreg rcp_w) emit_zs_write(struct v3d_compile *c, struct qreg rcp_w, uint32_t *vpm_index)
{ {
struct qreg zscale = vir_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0); struct qreg zscale = vir_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0);
struct qreg zoffset = vir_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0); struct qreg zoffset = vir_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0);
vir_FADD_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), struct qreg z = c->outputs[c->output_position_index + 2];
vir_FMUL(c, vir_FMUL(c, z = vir_FMUL(c, z, zscale);
c->outputs[c->output_position_index + 2], z = vir_FMUL(c, z, rcp_w);
zscale), z = vir_FADD(c, z, zoffset);
rcp_w), vir_VPM_WRITE(c, z, vpm_index);
zoffset);
} }
static void static void
emit_rcp_wc_write(struct v3d_compile *c, struct qreg rcp_w) emit_rcp_wc_write(struct v3d_compile *c, struct qreg rcp_w, uint32_t *vpm_index)
{ {
vir_VPM_WRITE(c, rcp_w); vir_VPM_WRITE(c, rcp_w, vpm_index);
} }
static void static void
emit_point_size_write(struct v3d_compile *c) emit_point_size_write(struct v3d_compile *c, uint32_t *vpm_index)
{ {
struct qreg point_size; struct qreg point_size;
@@ -1273,12 +1286,15 @@ emit_point_size_write(struct v3d_compile *c)
*/ */
point_size = vir_FMAX(c, point_size, vir_uniform_f(c, .125)); point_size = vir_FMAX(c, point_size, vir_uniform_f(c, .125));
vir_VPM_WRITE(c, point_size); vir_VPM_WRITE(c, point_size, vpm_index);
} }
static void static void
emit_vpm_write_setup(struct v3d_compile *c) emit_vpm_write_setup(struct v3d_compile *c)
{ {
if (c->devinfo->ver >= 40)
return;
uint32_t packed; uint32_t packed;
struct V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP unpacked = { struct V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP unpacked = {
V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_header, V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_header,
@@ -1300,6 +1316,7 @@ emit_vpm_write_setup(struct v3d_compile *c)
static void static void
emit_vert_end(struct v3d_compile *c) emit_vert_end(struct v3d_compile *c)
{ {
uint32_t vpm_index = 0;
struct qreg rcp_w = vir_SFU(c, V3D_QPU_WADDR_RECIP, struct qreg rcp_w = vir_SFU(c, V3D_QPU_WADDR_RECIP,
c->outputs[c->output_position_index + 3]); c->outputs[c->output_position_index + 3]);
@@ -1307,21 +1324,22 @@ emit_vert_end(struct v3d_compile *c)
if (c->vs_key->is_coord) { if (c->vs_key->is_coord) {
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
vir_VPM_WRITE(c, c->outputs[c->output_position_index + i]); vir_VPM_WRITE(c, c->outputs[c->output_position_index + i],
emit_scaled_viewport_write(c, rcp_w); &vpm_index);
emit_scaled_viewport_write(c, rcp_w, &vpm_index);
if (c->vs_key->per_vertex_point_size) { if (c->vs_key->per_vertex_point_size) {
emit_point_size_write(c); emit_point_size_write(c, &vpm_index);
/* emit_rcp_wc_write(c, rcp_w); */ /* emit_rcp_wc_write(c, rcp_w); */
} }
/* XXX: Z-only rendering */ /* XXX: Z-only rendering */
if (0) if (0)
emit_zs_write(c, rcp_w); emit_zs_write(c, rcp_w, &vpm_index);
} else { } else {
emit_scaled_viewport_write(c, rcp_w); emit_scaled_viewport_write(c, rcp_w, &vpm_index);
emit_zs_write(c, rcp_w); emit_zs_write(c, rcp_w, &vpm_index);
emit_rcp_wc_write(c, rcp_w); emit_rcp_wc_write(c, rcp_w, &vpm_index);
if (c->vs_key->per_vertex_point_size) if (c->vs_key->per_vertex_point_size)
emit_point_size_write(c); emit_point_size_write(c, &vpm_index);
} }
for (int i = 0; i < c->vs_key->num_fs_inputs; i++) { for (int i = 0; i < c->vs_key->num_fs_inputs; i++) {
@@ -1332,7 +1350,8 @@ emit_vert_end(struct v3d_compile *c)
struct v3d_varying_slot output = c->output_slots[j]; struct v3d_varying_slot output = c->output_slots[j];
if (!memcmp(&input, &output, sizeof(input))) { if (!memcmp(&input, &output, sizeof(input))) {
vir_VPM_WRITE(c, c->outputs[j]); vir_VPM_WRITE(c, c->outputs[j],
&vpm_index);
break; break;
} }
} }
@@ -1340,7 +1359,8 @@ emit_vert_end(struct v3d_compile *c)
* this FS input. * this FS input.
*/ */
if (j == c->num_outputs) if (j == c->num_outputs)
vir_VPM_WRITE(c, vir_uniform_f(c, 0.0)); vir_VPM_WRITE(c, vir_uniform_f(c, 0.0),
&vpm_index);
} }
} }
@@ -1384,6 +1404,12 @@ ntq_emit_vpm_read(struct v3d_compile *c,
{ {
struct qreg vpm = vir_reg(QFILE_VPM, vpm_index); struct qreg vpm = vir_reg(QFILE_VPM, vpm_index);
if (c->devinfo->ver >= 40 ) {
return vir_LDVPMV_IN(c,
vir_uniform_ui(c,
(*num_components_queued)++));
}
if (*num_components_queued != 0) { if (*num_components_queued != 0) {
(*num_components_queued)--; (*num_components_queued)--;
c->num_inputs++; c->num_inputs++;
@@ -1501,8 +1527,12 @@ ntq_setup_inputs(struct v3d_compile *c)
} }
if (c->s->info.stage == MESA_SHADER_VERTEX) { if (c->s->info.stage == MESA_SHADER_VERTEX) {
assert(vpm_components_queued == 0); if (c->devinfo->ver >= 40) {
assert(num_components == 0); assert(vpm_components_queued == num_components);
} else {
assert(vpm_components_queued == 0);
assert(num_components == 0);
}
} }
} }

View File

@@ -594,6 +594,9 @@ qpu_magic_waddr_is_periph(enum v3d_qpu_waddr waddr)
static bool static bool
qpu_accesses_peripheral(const struct v3d_qpu_instr *inst) qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
{ {
if (v3d_qpu_uses_vpm(inst))
return true;
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
if (inst->alu.add.op != V3D_QPU_A_NOP && if (inst->alu.add.op != V3D_QPU_A_NOP &&
inst->alu.add.magic_write && inst->alu.add.magic_write &&
@@ -601,9 +604,6 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
return true; return true;
} }
if (inst->alu.add.op == V3D_QPU_A_VPMSETUP)
return true;
if (inst->alu.mul.op != V3D_QPU_M_NOP && if (inst->alu.mul.op != V3D_QPU_M_NOP &&
inst->alu.mul.magic_write && inst->alu.mul.magic_write &&
qpu_magic_waddr_is_periph(inst->alu.mul.waddr)) { qpu_magic_waddr_is_periph(inst->alu.mul.waddr)) {

View File

@@ -791,6 +791,7 @@ VIR_A_ALU2(OR)
VIR_A_ALU2(XOR) VIR_A_ALU2(XOR)
VIR_A_ALU2(VADD) VIR_A_ALU2(VADD)
VIR_A_ALU2(VSUB) VIR_A_ALU2(VSUB)
VIR_A_ALU2(STVPMV)
VIR_A_ALU1(NOT) VIR_A_ALU1(NOT)
VIR_A_ALU1(NEG) VIR_A_ALU1(NEG)
VIR_A_ALU1(FLAPUSH) VIR_A_ALU1(FLAPUSH)
@@ -800,6 +801,8 @@ VIR_A_ALU1(SETMSF)
VIR_A_ALU1(SETREVF) VIR_A_ALU1(SETREVF)
VIR_A_ALU1(TIDX) VIR_A_ALU1(TIDX)
VIR_A_ALU1(EIDX) VIR_A_ALU1(EIDX)
VIR_A_ALU1(LDVPMV_IN)
VIR_A_ALU1(LDVPMV_OUT)
VIR_A_ALU0(FXCD) VIR_A_ALU0(FXCD)
VIR_A_ALU0(XCD) VIR_A_ALU0(XCD)
@@ -854,12 +857,6 @@ vir_SEL(struct v3d_compile *c, enum v3d_qpu_cond cond,
return t; return t;
} }
static inline void
vir_VPM_WRITE(struct v3d_compile *c, struct qreg val)
{
vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
}
static inline struct qinst * static inline struct qinst *
vir_NOP(struct v3d_compile *c) vir_NOP(struct v3d_compile *c)
{ {

View File

@@ -92,6 +92,9 @@ vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
case V3D_QPU_A_SETREVF: case V3D_QPU_A_SETREVF:
case V3D_QPU_A_SETMSF: case V3D_QPU_A_SETMSF:
case V3D_QPU_A_VPMSETUP: case V3D_QPU_A_VPMSETUP:
case V3D_QPU_A_STVPMV:
case V3D_QPU_A_STVPMD:
case V3D_QPU_A_STVPMP:
return true; return true;
default: default:
break; break;
@@ -412,10 +415,6 @@ static void
vir_emit(struct v3d_compile *c, struct qinst *inst) vir_emit(struct v3d_compile *c, struct qinst *inst)
{ {
list_addtail(&inst->link, &c->cur_block->instructions); list_addtail(&inst->link, &c->cur_block->instructions);
if (inst->dst.file == QFILE_MAGIC &&
inst->dst.index == V3D_QPU_WADDR_VPM)
c->num_vpm_writes++;
} }
/* Updates inst to write to a new temporary, emits it, and notes the def. */ /* Updates inst to write to a new temporary, emits it, and notes the def. */

View File

@@ -160,6 +160,28 @@ v3d_register_allocate(struct v3d_compile *c)
} }
} }
if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
switch (inst->qpu.alu.add.op) {
case V3D_QPU_A_LDVPMV_IN:
case V3D_QPU_A_LDVPMV_OUT:
case V3D_QPU_A_LDVPMD_IN:
case V3D_QPU_A_LDVPMD_OUT:
case V3D_QPU_A_LDVPMP:
case V3D_QPU_A_LDVPMG_IN:
case V3D_QPU_A_LDVPMG_OUT:
/* LDVPMs only store to temps (the MA flag
* decides whether the LDVPM is in or out)
*/
assert(inst->dst.file == QFILE_TEMP);
class_bits[temp_to_node[inst->dst.index]] &=
CLASS_BIT_PHYS;
break;
default:
break;
}
}
if (inst->src[0].file == QFILE_REG) { if (inst->src[0].file == QFILE_REG) {
switch (inst->src[0].index) { switch (inst->src[0].index) {
case 0: case 0:

View File

@@ -113,10 +113,13 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
[V3D_QPU_A_TMUWT] = "tmuwt", [V3D_QPU_A_TMUWT] = "tmuwt",
[V3D_QPU_A_VPMSETUP] = "vpmsetup", [V3D_QPU_A_VPMSETUP] = "vpmsetup",
[V3D_QPU_A_VPMWT] = "vpmwt", [V3D_QPU_A_VPMWT] = "vpmwt",
[V3D_QPU_A_LDVPMV] = "ldvpmv", [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in",
[V3D_QPU_A_LDVPMD] = "ldvpmd", [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out",
[V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",
[V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out",
[V3D_QPU_A_LDVPMP] = "ldvpmp", [V3D_QPU_A_LDVPMP] = "ldvpmp",
[V3D_QPU_A_LDVPMG] = "ldvpmg", [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in",
[V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out",
[V3D_QPU_A_FCMP] = "fcmp", [V3D_QPU_A_FCMP] = "fcmp",
[V3D_QPU_A_VFMAX] = "vfmax", [V3D_QPU_A_VFMAX] = "vfmax",
[V3D_QPU_A_FROUND] = "fround", [V3D_QPU_A_FROUND] = "fround",
@@ -376,10 +379,13 @@ static const uint8_t add_op_args[] = {
[V3D_QPU_A_VPMSETUP] = D | A, [V3D_QPU_A_VPMSETUP] = D | A,
[V3D_QPU_A_LDVPMV] = D | A, [V3D_QPU_A_LDVPMV_IN] = D | A,
[V3D_QPU_A_LDVPMD] = D | A, [V3D_QPU_A_LDVPMV_OUT] = D | A,
[V3D_QPU_A_LDVPMD_IN] = D | A,
[V3D_QPU_A_LDVPMD_OUT] = D | A,
[V3D_QPU_A_LDVPMP] = D | A, [V3D_QPU_A_LDVPMP] = D | A,
[V3D_QPU_A_LDVPMG] = D | A | B, [V3D_QPU_A_LDVPMG_IN] = D | A | B,
[V3D_QPU_A_LDVPMG_OUT] = D | A | B,
/* FIXME: MOVABSNEG */ /* FIXME: MOVABSNEG */
@@ -516,6 +522,49 @@ v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
waddr == V3D_QPU_WADDR_SYNCU); waddr == V3D_QPU_WADDR_SYNCU);
} }
static bool
v3d_qpu_add_op_uses_vpm(enum v3d_qpu_add_op op)
{
switch (op) {
case V3D_QPU_A_VPMSETUP:
case V3D_QPU_A_VPMWT:
case V3D_QPU_A_LDVPMV_IN:
case V3D_QPU_A_LDVPMV_OUT:
case V3D_QPU_A_LDVPMD_IN:
case V3D_QPU_A_LDVPMD_OUT:
case V3D_QPU_A_LDVPMP:
case V3D_QPU_A_LDVPMG_IN:
case V3D_QPU_A_LDVPMG_OUT:
case V3D_QPU_A_STVPMV:
case V3D_QPU_A_STVPMD:
case V3D_QPU_A_STVPMP:
return true;
default:
return false;
}
}
bool
v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)
{
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
if (v3d_qpu_add_op_uses_vpm(inst->alu.add.op))
return true;
if (inst->alu.add.magic_write &&
v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) {
return true;
}
if (inst->alu.mul.magic_write &&
v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) {
return true;
}
}
return false;
}
bool bool
v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *inst) const struct v3d_qpu_instr *inst)

View File

@@ -173,10 +173,13 @@ enum v3d_qpu_add_op {
V3D_QPU_A_TMUWT, V3D_QPU_A_TMUWT,
V3D_QPU_A_VPMSETUP, V3D_QPU_A_VPMSETUP,
V3D_QPU_A_VPMWT, V3D_QPU_A_VPMWT,
V3D_QPU_A_LDVPMV, V3D_QPU_A_LDVPMV_IN,
V3D_QPU_A_LDVPMD, V3D_QPU_A_LDVPMV_OUT,
V3D_QPU_A_LDVPMD_IN,
V3D_QPU_A_LDVPMD_OUT,
V3D_QPU_A_LDVPMP, V3D_QPU_A_LDVPMP,
V3D_QPU_A_LDVPMG, V3D_QPU_A_LDVPMG_IN,
V3D_QPU_A_LDVPMG_OUT,
V3D_QPU_A_FCMP, V3D_QPU_A_FCMP,
V3D_QPU_A_VFMAX, V3D_QPU_A_VFMAX,
V3D_QPU_A_FROUND, V3D_QPU_A_FROUND,
@@ -425,6 +428,7 @@ bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo, bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux); bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst);
bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST; const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;

View File

@@ -515,7 +515,11 @@ static const struct opcode_desc add_ops[] = {
{ 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT }, { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
{ 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT }, { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
{ 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP }, { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
{ 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
{ 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
{ 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
{ 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
/* FIXME: MORE COMPLICATED */ /* FIXME: MORE COMPLICATED */
/* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */ /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
@@ -823,7 +827,24 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
instr->alu.add.a = mux_a; instr->alu.add.a = mux_a;
instr->alu.add.b = mux_b; instr->alu.add.b = mux_b;
instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
instr->alu.add.magic_write = packed_inst & VC5_QPU_MA;
instr->alu.add.magic_write = false;
if (packed_inst & VC5_QPU_MA) {
switch (instr->alu.add.op) {
case V3D_QPU_A_LDVPMV_IN:
instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
break;
case V3D_QPU_A_LDVPMD_IN:
instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
break;
case V3D_QPU_A_LDVPMG_IN:
instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
break;
default:
instr->alu.add.magic_write = true;
break;
}
}
return true; return true;
} }
@@ -930,16 +951,36 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
if (nsrc < 1) if (nsrc < 1)
mux_a = ffs(desc->mux_a_mask) - 1; mux_a = ffs(desc->mux_a_mask) - 1;
bool no_magic_write = false;
switch (instr->alu.add.op) { switch (instr->alu.add.op) {
case V3D_QPU_A_STVPMV: case V3D_QPU_A_STVPMV:
waddr = 0; waddr = 0;
no_magic_write = true;
break; break;
case V3D_QPU_A_STVPMD: case V3D_QPU_A_STVPMD:
waddr = 1; waddr = 1;
no_magic_write = true;
break; break;
case V3D_QPU_A_STVPMP: case V3D_QPU_A_STVPMP:
waddr = 2; waddr = 2;
no_magic_write = true;
break; break;
case V3D_QPU_A_LDVPMV_IN:
case V3D_QPU_A_LDVPMD_IN:
case V3D_QPU_A_LDVPMP:
case V3D_QPU_A_LDVPMG_IN:
assert(!instr->alu.add.magic_write);
break;
case V3D_QPU_A_LDVPMV_OUT:
case V3D_QPU_A_LDVPMD_OUT:
case V3D_QPU_A_LDVPMG_OUT:
assert(!instr->alu.add.magic_write);
*packed_instr |= VC5_QPU_MA;
break;
default: default:
break; break;
} }
@@ -1065,7 +1106,7 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
*packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B); *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B);
*packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD); *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD);
*packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A); *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
if (instr->alu.add.magic_write) if (instr->alu.add.magic_write && !no_magic_write)
*packed_instr |= VC5_QPU_MA; *packed_instr |= VC5_QPU_MA;
return true; return true;

View File

@@ -76,6 +76,10 @@ static const struct {
{ 41, 0xdb3048eb9d533780ull, "fmax rf43.l, r3.h, rf30; fmul rf35.h, r4, r2.l; ldunifarf.r1" }, { 41, 0xdb3048eb9d533780ull, "fmax rf43.l, r3.h, rf30; fmul rf35.h, r4, r2.l; ldunifarf.r1" },
{ 41, 0x733620471e6ce700ull, "faddnf rf7.l, rf28.h, r1.l; fmul r1, r3.h, r3.abs; ldunifarf.rsqrt2" }, { 41, 0x733620471e6ce700ull, "faddnf rf7.l, rf28.h, r1.l; fmul r1, r3.h, r3.abs; ldunifarf.rsqrt2" },
{ 41, 0x9c094adef634b000ull, "ffloor.ifb rf30.l, r3; fmul.pushz rf43.l, r5, r1.h" }, { 41, 0x9c094adef634b000ull, "ffloor.ifb rf30.l, r3; fmul.pushz rf43.l, r5, r1.h" },
/* v4.1 opcodes */
{ 41, 0x3de020c7bdfd200dull, "ldvpmg_in rf7, r2, r2; mov r3, 13" },
{ 41, 0x3de02040f8ff7201ull, "stvpmv 1, rf8 ; mov r1, 1" },
}; };
static void static void