broadcom/vc5: Add support for V3Dv4 signal bits.

The WRTMUC replaces the implicit uniform loads in the first two texture
instructions.  LDVPM disappears in favor of an ALU op.  LDVARY, LDTMU,
LDTLB, and LDUNIF*RF now write to arbitrary registers, which required
passing the devinfo through to a few more functions.
This commit is contained in:
Eric Anholt
2018-01-03 21:42:33 -08:00
parent 81ec2ba229
commit dfee62eed3
12 changed files with 322 additions and 45 deletions

View File

@@ -78,6 +78,7 @@ struct schedule_node_child {
enum direction { F, R };
struct schedule_state {
const struct v3d_device_info *devinfo;
struct schedule_node *last_r[6];
struct schedule_node *last_rf[64];
struct schedule_node *last_sf;
@@ -265,6 +266,7 @@ process_uf_deps(struct schedule_state *state, struct schedule_node *n,
static void
calculate_deps(struct schedule_state *state, struct schedule_node *n)
{
const struct v3d_device_info *devinfo = state->devinfo;
struct qinst *qinst = n->inst;
struct v3d_qpu_instr *inst = &qinst->qpu;
@@ -356,12 +358,16 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
process_waddr_deps(state, n, inst->alu.mul.waddr,
inst->alu.mul.magic_write);
}
if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
process_waddr_deps(state, n, inst->sig_addr,
inst->sig_magic);
}
if (v3d_qpu_writes_r3(inst))
if (v3d_qpu_writes_r3(devinfo, inst))
add_write_dep(state, &state->last_r[3], n);
if (v3d_qpu_writes_r4(inst))
if (v3d_qpu_writes_r4(devinfo, inst))
add_write_dep(state, &state->last_r[4], n);
if (v3d_qpu_writes_r5(inst))
if (v3d_qpu_writes_r5(devinfo, inst))
add_write_dep(state, &state->last_r[5], n);
if (inst->sig.thrsw) {
@@ -410,6 +416,7 @@ calculate_forward_deps(struct v3d_compile *c, struct list_head *schedule_list)
struct schedule_state state;
memset(&state, 0, sizeof(state));
state.devinfo = c->devinfo;
state.dir = F;
list_for_each_entry(struct schedule_node, node, schedule_list, link)
@@ -423,6 +430,7 @@ calculate_reverse_deps(struct v3d_compile *c, struct list_head *schedule_list)
struct schedule_state state;
memset(&state, 0, sizeof(state));
state.devinfo = c->devinfo;
state.dir = R;
for (node = schedule_list->prev; schedule_list != node; node = node->prev) {
@@ -514,7 +522,8 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard,
}
static bool
writes_too_soon_after_write(struct choose_scoreboard *scoreboard,
writes_too_soon_after_write(const struct v3d_device_info *devinfo,
struct choose_scoreboard *scoreboard,
struct qinst *qinst)
{
const struct v3d_qpu_instr *inst = &qinst->qpu;
@@ -524,7 +533,7 @@ writes_too_soon_after_write(struct choose_scoreboard *scoreboard,
* occur if a dead SFU computation makes it to scheduling.
*/
if (scoreboard->tick - scoreboard->last_sfu_write_tick < 2 &&
v3d_qpu_writes_r4(inst))
v3d_qpu_writes_r4(devinfo, inst))
return true;
return false;
@@ -605,7 +614,8 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
return (inst->sig.ldvpm ||
inst->sig.ldtmu ||
inst->sig.ldtlb ||
inst->sig.ldtlbu);
inst->sig.ldtlbu ||
inst->sig.wrtmuc);
}
static bool
@@ -619,7 +629,11 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
return false;
}
/* Can't do more than one peripheral access in an instruction. */
/* Can't do more than one peripheral access in an instruction.
*
* XXX: V3D 4.1 allows TMU read along with a VPM read or write, and
* WRTMUC with a TMU magic register write (other than tmuc).
*/
if (qpu_accesses_peripheral(a) && qpu_accesses_peripheral(b))
return false;
@@ -663,6 +677,9 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
merge.sig.thrsw |= b->sig.thrsw;
merge.sig.ldunif |= b->sig.ldunif;
merge.sig.ldunifrf |= b->sig.ldunifrf;
merge.sig.ldunifa |= b->sig.ldunifa;
merge.sig.ldunifarf |= b->sig.ldunifarf;
merge.sig.ldtmu |= b->sig.ldtmu;
merge.sig.ldvary |= b->sig.ldvary;
merge.sig.ldvpm |= b->sig.ldvpm;
@@ -673,6 +690,12 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
merge.sig.rotate |= b->sig.rotate;
merge.sig.wrtmuc |= b->sig.wrtmuc;
if (v3d_qpu_sig_writes_address(devinfo, &a->sig) &&
v3d_qpu_sig_writes_address(devinfo, &b->sig))
return false;
merge.sig_addr |= b->sig_addr;
merge.sig_magic |= b->sig_magic;
uint64_t packed;
bool ok = v3d_qpu_instr_pack(devinfo, &merge, &packed);
@@ -719,7 +742,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo,
if (reads_too_soon_after_write(scoreboard, n->inst))
continue;
if (writes_too_soon_after_write(scoreboard, n->inst))
if (writes_too_soon_after_write(devinfo, scoreboard, n->inst))
continue;
/* "A scoreboard wait must not occur in the first two
@@ -735,7 +758,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo,
* otherwise get scheduled so ldunif and ldvary try to update
* r5 in the same tick.
*/
if (inst->sig.ldunif &&
if ((inst->sig.ldunif || inst->sig.ldunifa) &&
scoreboard->tick == scoreboard->last_ldvary_tick + 1) {
continue;
}

View File

@@ -85,6 +85,7 @@ qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
static void
qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
{
const struct v3d_device_info *devinfo = state->c->devinfo;
const struct v3d_qpu_instr *inst = &qinst->qpu;
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
@@ -94,7 +95,8 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
* r5 one instruction later, which is illegal to have
* together.
*/
if (state->last && state->last->sig.ldvary && inst->sig.ldunif) {
if (state->last && state->last->sig.ldvary &&
(inst->sig.ldunif || inst->sig.ldunifa)) {
fail_instr(state, "LDUNIF after a LDVARY");
}
@@ -143,7 +145,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
fail_instr(state, "R4 read too soon after SFU");
if (v3d_qpu_writes_r4(inst))
if (v3d_qpu_writes_r4(devinfo, inst))
fail_instr(state, "R4 write too soon after SFU");
if (sfu_writes)

View File

@@ -650,8 +650,8 @@ bool vir_is_add(struct qinst *inst);
bool vir_is_mul(struct qinst *inst);
bool vir_is_float_input(struct qinst *inst);
bool vir_depends_on_flags(struct qinst *inst);
bool vir_writes_r3(struct qinst *inst);
bool vir_writes_r4(struct qinst *inst);
bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst);
struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
uint8_t vir_channels_written(struct qinst *inst);

View File

@@ -21,6 +21,7 @@
* IN THE SOFTWARE.
*/
#include "broadcom/common/v3d_device_info.h"
#include "v3d_compiler.h"
int
@@ -198,7 +199,7 @@ vir_depends_on_flags(struct qinst *inst)
}
bool
vir_writes_r3(struct qinst *inst)
vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
{
for (int i = 0; i < vir_get_nsrc(inst); i++) {
switch (inst->src[i].file) {
@@ -210,11 +211,18 @@ vir_writes_r3(struct qinst *inst)
}
}
if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
inst->qpu.sig.ldtlb ||
inst->qpu.sig.ldtlbu ||
inst->qpu.sig.ldvpm)) {
return true;
}
return false;
}
bool
vir_writes_r4(struct qinst *inst)
vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
{
switch (inst->dst.file) {
case QFILE_MAGIC:
@@ -231,7 +239,7 @@ vir_writes_r4(struct qinst *inst)
break;
}
if (inst->qpu.sig.ldtmu)
if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
return true;
return false;

View File

@@ -21,6 +21,7 @@
* IN THE SOFTWARE.
*/
#include "broadcom/common/v3d_device_info.h"
#include "v3d_compiler.h"
static void
@@ -145,6 +146,24 @@ vir_print_reg(struct v3d_compile *c, struct qreg reg)
}
}
static void
vir_dump_sig_addr(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *instr)
{
if (devinfo->ver < 41)
return;
if (!instr->sig_magic)
fprintf(stderr, ".rf%d", instr->sig_addr);
else {
const char *name = v3d_qpu_magic_waddr_name(instr->sig_addr);
if (name)
fprintf(stderr, ".%s", name);
else
fprintf(stderr, ".UNKNOWN%d", instr->sig_addr);
}
}
static void
vir_dump_sig(struct v3d_compile *c, struct qinst *inst)
{
@@ -152,14 +171,36 @@ vir_dump_sig(struct v3d_compile *c, struct qinst *inst)
if (sig->thrsw)
fprintf(stderr, "; thrsw");
if (sig->ldvary)
if (sig->ldvary) {
fprintf(stderr, "; ldvary");
vir_dump_sig_addr(c->devinfo, &inst->qpu);
}
if (sig->ldvpm)
fprintf(stderr, "; ldvpm");
if (sig->ldtmu)
if (sig->ldtmu) {
fprintf(stderr, "; ldtmu");
vir_dump_sig_addr(c->devinfo, &inst->qpu);
}
if (sig->ldtlb) {
fprintf(stderr, "; ldtlb");
vir_dump_sig_addr(c->devinfo, &inst->qpu);
}
if (sig->ldtlbu) {
fprintf(stderr, "; ldtlbu");
vir_dump_sig_addr(c->devinfo, &inst->qpu);
}
if (sig->ldunif)
fprintf(stderr, "; ldunif");
if (sig->ldunifrf) {
fprintf(stderr, "; ldunifrf");
vir_dump_sig_addr(c->devinfo, &inst->qpu);
}
if (sig->ldunifa)
fprintf(stderr, "; ldunifa");
if (sig->ldunifarf) {
fprintf(stderr, "; ldunifarf");
vir_dump_sig_addr(c->devinfo, &inst->qpu);
}
if (sig->wrtmuc)
fprintf(stderr, "; wrtmuc");
}

View File

@@ -139,7 +139,7 @@ v3d_register_allocate(struct v3d_compile *c)
* result to a temp), nothing else can be stored in r3/r4 across
* it.
*/
if (vir_writes_r3(inst)) {
if (vir_writes_r3(c->devinfo, inst)) {
for (int i = 0; i < c->num_temps; i++) {
if (c->temp_start[i] < ip &&
c->temp_end[i] > ip) {
@@ -149,7 +149,7 @@ v3d_register_allocate(struct v3d_compile *c)
}
}
}
if (vir_writes_r4(inst)) {
if (vir_writes_r4(c->devinfo, inst)) {
for (int i = 0; i < c->num_temps; i++) {
if (c->temp_start[i] < ip &&
c->temp_end[i] > ip) {

View File

@@ -264,7 +264,14 @@ v3d_generate_code_block(struct v3d_compile *c,
}
if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
if (v3d_qpu_sig_writes_address(c->devinfo,
&qinst->qpu.sig)) {
assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
qinst->qpu.sig_addr = dst.index;
qinst->qpu.sig_magic = dst.magic;
} else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
if (nsrc >= 1) {
set_src(&qinst->qpu,

View File

@@ -91,7 +91,8 @@ v3d_qpu_disasm_add(struct disasm_state *disasm,
int num_src = v3d_qpu_add_op_num_src(instr->alu.add.op);
append(disasm, "%s", v3d_qpu_add_op_name(instr->alu.add.op));
append(disasm, "%s", v3d_qpu_cond_name(instr->flags.ac));
if (!v3d_qpu_sig_writes_address(disasm->devinfo, &instr->sig))
append(disasm, "%s", v3d_qpu_cond_name(instr->flags.ac));
append(disasm, "%s", v3d_qpu_pf_name(instr->flags.apf));
append(disasm, "%s", v3d_qpu_uf_name(instr->flags.auf));
@@ -130,7 +131,8 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm,
append(disasm, "; ");
append(disasm, "%s", v3d_qpu_mul_op_name(instr->alu.mul.op));
append(disasm, "%s", v3d_qpu_cond_name(instr->flags.mc));
if (!v3d_qpu_sig_writes_address(disasm->devinfo, &instr->sig))
append(disasm, "%s", v3d_qpu_cond_name(instr->flags.mc));
append(disasm, "%s", v3d_qpu_pf_name(instr->flags.mpf));
append(disasm, "%s", v3d_qpu_uf_name(instr->flags.muf));
@@ -161,6 +163,24 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm,
}
}
static void
v3d_qpu_disasm_sig_addr(struct disasm_state *disasm,
const struct v3d_qpu_instr *instr)
{
if (disasm->devinfo->ver < 41)
return;
if (!instr->sig_magic)
append(disasm, ".rf%d", instr->sig_addr);
else {
const char *name = v3d_qpu_magic_waddr_name(instr->sig_addr);
if (name)
append(disasm, ".%s", name);
else
append(disasm, ".UNKNOWN%d", instr->sig_addr);
}
}
static void
v3d_qpu_disasm_sig(struct disasm_state *disasm,
const struct v3d_qpu_instr *instr)
@@ -172,6 +192,9 @@ v3d_qpu_disasm_sig(struct disasm_state *disasm,
!sig->ldvpm &&
!sig->ldtmu &&
!sig->ldunif &&
!sig->ldunifrf &&
!sig->ldunifa &&
!sig->ldunifarf &&
!sig->wrtmuc) {
return;
}
@@ -180,14 +203,36 @@ v3d_qpu_disasm_sig(struct disasm_state *disasm,
if (sig->thrsw)
append(disasm, "; thrsw");
if (sig->ldvary)
if (sig->ldvary) {
append(disasm, "; ldvary");
v3d_qpu_disasm_sig_addr(disasm, instr);
}
if (sig->ldvpm)
append(disasm, "; ldvpm");
if (sig->ldtmu)
if (sig->ldtmu) {
append(disasm, "; ldtmu");
v3d_qpu_disasm_sig_addr(disasm, instr);
}
if (sig->ldtlb) {
append(disasm, "; ldtlb");
v3d_qpu_disasm_sig_addr(disasm, instr);
}
if (sig->ldtlbu) {
append(disasm, "; ldtlbu");
v3d_qpu_disasm_sig_addr(disasm, instr);
}
if (sig->ldunif)
append(disasm, "; ldunif");
if (sig->ldunifrf) {
append(disasm, "; ldunifrf");
v3d_qpu_disasm_sig_addr(disasm, instr);
}
if (sig->ldunifa)
append(disasm, "; ldunifa");
if (sig->ldunifarf) {
append(disasm, "; ldunifarf");
v3d_qpu_disasm_sig_addr(disasm, instr);
}
if (sig->wrtmuc)
append(disasm, "; wrtmuc");
}

View File

@@ -23,6 +23,7 @@
#include <stdlib.h>
#include "util/macros.h"
#include "broadcom/common/v3d_device_info.h"
#include "qpu_instr.h"
#ifndef QPU_MASK
@@ -600,7 +601,8 @@ v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
}
bool
v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst)
v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *inst)
{
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
if (inst->alu.add.magic_write &&
@@ -614,11 +616,17 @@ v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst)
}
}
if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R3) {
return true;
}
return inst->sig.ldvary || inst->sig.ldvpm;
}
bool
v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst)
v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *inst)
{
if (inst->sig.ldtmu)
return true;
@@ -637,11 +645,17 @@ v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst)
}
}
if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4) {
return true;
}
return false;
}
bool
v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst)
v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *inst)
{
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
if (inst->alu.add.magic_write &&
@@ -655,7 +669,12 @@ v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst)
}
}
return inst->sig.ldvary || inst->sig.ldunif;
if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R5) {
return true;
}
return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa;
}
bool
@@ -669,3 +688,18 @@ v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
(mul_nsrc > 0 && inst->alu.mul.a == mux) ||
(mul_nsrc > 1 && inst->alu.mul.b == mux));
}
bool
v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
const struct v3d_qpu_sig *sig)
{
if (devinfo->ver < 41)
return false;
return (sig->ldunifrf ||
sig->ldunifarf ||
sig->ldvary ||
sig->ldtmu ||
sig->ldtlb ||
sig->ldtlbu);
}

View File

@@ -42,6 +42,9 @@ struct v3d_device_info;
struct v3d_qpu_sig {
bool thrsw:1;
bool ldunif:1;
bool ldunifa:1;
bool ldunifrf:1;
bool ldunifarf:1;
bool ldtmu:1;
bool ldvary:1;
bool ldvpm:1;
@@ -347,6 +350,8 @@ struct v3d_qpu_instr {
enum v3d_qpu_instr_type type;
struct v3d_qpu_sig sig;
uint8_t sig_addr;
bool sig_magic; /* If the signal writes to a magic address */
uint8_t raddr_a;
uint8_t raddr_b;
struct v3d_qpu_flags flags;
@@ -403,9 +408,14 @@ bool v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
bool v3d_qpu_writes_r3(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
bool v3d_qpu_writes_r4(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
bool v3d_qpu_writes_r5(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;
#endif

View File

@@ -55,11 +55,7 @@
#define VC5_QPU_COND_SHIFT 46
#define VC5_QPU_COND_MASK QPU_MASK(52, 46)
#define VC5_QPU_COND_IFA 0
#define VC5_QPU_COND_IFB 1
#define VC5_QPU_COND_IFNA 2
#define VC5_QPU_COND_IFNB 3
#define VC5_QPU_COND_SIG_MAGIC_ADDR (1 << 6)
#define VC5_QPU_MM QPU_MASK(45, 45)
#define VC5_QPU_MA QPU_MASK(44, 44)
@@ -113,6 +109,9 @@
#define THRSW .thrsw = true
#define LDUNIF .ldunif = true
#define LDUNIFRF .ldunifrf = true
#define LDUNIFA .ldunifa = true
#define LDUNIFARF .ldunifarf = true
#define LDTMU .ldtmu = true
#define LDVARY .ldvary = true
#define LDVPM .ldvpm = true
@@ -156,6 +155,67 @@ static const struct v3d_qpu_sig v33_sig_map[] = {
[31] = { SMIMM, },
};
static const struct v3d_qpu_sig v40_sig_map[] = {
/* MISC R3 R4 R5 */
[0] = { },
[1] = { THRSW, },
[2] = { LDUNIF },
[3] = { THRSW, LDUNIF },
[4] = { LDTMU, },
[5] = { THRSW, LDTMU, },
[6] = { LDTMU, LDUNIF },
[7] = { THRSW, LDTMU, LDUNIF },
[8] = { LDVARY, },
[9] = { THRSW, LDVARY, },
[10] = { LDVARY, LDUNIF },
[11] = { THRSW, LDVARY, LDUNIF },
/* 12-13 reserved */
[14] = { SMIMM, LDVARY, },
[15] = { SMIMM, },
[16] = { LDTLB, },
[17] = { LDTLBU, },
[18] = { WRTMUC },
[19] = { THRSW, WRTMUC },
[20] = { LDVARY, WRTMUC },
[21] = { THRSW, LDVARY, WRTMUC },
[22] = { UCB, },
[23] = { ROT, },
/* 24-30 reserved */
[31] = { SMIMM, LDTMU, },
};
static const struct v3d_qpu_sig v41_sig_map[] = {
/* MISC phys R5 */
[0] = { },
[1] = { THRSW, },
[2] = { LDUNIF },
[3] = { THRSW, LDUNIF },
[4] = { LDTMU, },
[5] = { THRSW, LDTMU, },
[6] = { LDTMU, LDUNIF },
[7] = { THRSW, LDTMU, LDUNIF },
[8] = { LDVARY, },
[9] = { THRSW, LDVARY, },
[10] = { LDVARY, LDUNIF },
[11] = { THRSW, LDVARY, LDUNIF },
[12] = { LDUNIFRF },
[13] = { THRSW, LDUNIFRF },
[14] = { SMIMM, LDVARY, },
[15] = { SMIMM, },
[16] = { LDTLB, },
[17] = { LDTLBU, },
[18] = { WRTMUC },
[19] = { THRSW, WRTMUC },
[20] = { LDVARY, WRTMUC },
[21] = { THRSW, LDVARY, WRTMUC },
[22] = { UCB, },
[23] = { ROT, },
/* 24-30 reserved */
[24] = { LDUNIFA},
[25] = { LDUNIFARF },
[31] = { SMIMM, LDTMU, },
};
bool
v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
uint32_t packed_sig,
@@ -164,7 +224,12 @@ v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
if (packed_sig >= ARRAY_SIZE(v33_sig_map))
return false;
*sig = v33_sig_map[packed_sig];
if (devinfo->ver >= 41)
*sig = v41_sig_map[packed_sig];
else if (devinfo->ver == 40)
*sig = v40_sig_map[packed_sig];
else
*sig = v33_sig_map[packed_sig];
/* Signals with zeroed unpacked contents after element 0 are reserved. */
return (packed_sig == 0 ||
@@ -178,7 +243,12 @@ v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
{
static const struct v3d_qpu_sig *map;
map = v33_sig_map;
if (devinfo->ver >= 41)
map = v41_sig_map;
else if (devinfo->ver == 40)
map = v40_sig_map;
else
map = v33_sig_map;
for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
@@ -1063,10 +1133,21 @@ v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
&instr->sig))
return false;
if (!v3d_qpu_flags_unpack(devinfo,
QPU_GET_FIELD(packed_instr, VC5_QPU_COND),
&instr->flags))
return false;
uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND);
if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR;
instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR;
instr->flags.ac = V3D_QPU_COND_NONE;
instr->flags.mc = V3D_QPU_COND_NONE;
instr->flags.apf = V3D_QPU_PF_NONE;
instr->flags.mpf = V3D_QPU_PF_NONE;
instr->flags.auf = V3D_QPU_UF_NONE;
instr->flags.muf = V3D_QPU_UF_NONE;
} else {
if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
return false;
}
instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A);
instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B);
@@ -1164,9 +1245,28 @@ v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
return false;
uint32_t flags;
if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
return false;
if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
if (instr->flags.ac != V3D_QPU_COND_NONE ||
instr->flags.mc != V3D_QPU_COND_NONE ||
instr->flags.apf != V3D_QPU_PF_NONE ||
instr->flags.mpf != V3D_QPU_PF_NONE ||
instr->flags.auf != V3D_QPU_UF_NONE ||
instr->flags.muf != V3D_QPU_UF_NONE) {
return false;
}
flags = instr->sig_addr;
if (instr->sig_magic)
flags |= VC5_QPU_COND_SIG_MAGIC_ADDR;
} else {
if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
return false;
}
*packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND);
} else {
if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
return false;
}
return true;

View File

@@ -63,6 +63,13 @@ static const struct {
{ 33, 0x041618d57c453000ull, "shl.andn exp, r3, r2; add.ifb rf35, r1, r2" },
{ 33, 0x7048e5da49272800ull, "fsub.ifa rf26, r2.l, rf32; fmul.pushc sin, r1.h, r1.abs; ldunif" },
/* v4.1 signals */
{ 41, 0x1f010520cf60a000ull, "fcmp.andz rf32, r2.h, r1.h; vfmul rf20, r0.hh, r3; ldunifa" },
{ 41, 0x932045e6c16ea000ull, "fcmp rf38, r2.abs, r5; fmul rf23.l, r3, r3.abs; ldunifarf.rf1" },
{ 41, 0xd72f0434e43ae5c0ull, "fcmp rf52.h, rf23, r5.abs; fmul rf16.h, rf23, r1; ldunifarf.rf60" },
{ 41, 0xdb3048eb9d533780ull, "fmax rf43.l, r3.h, rf30; fmul rf35.h, r4, r2.l; ldunifarf.r1" },
{ 41, 0x733620471e6ce700ull, "faddnf rf7.l, rf28.h, r1.l; fmul r1, r3.h, r3.abs; ldunifarf.rsqrt2" },
{ 41, 0x9c094adef634b000ull, "ffloor.ifb rf30.l, r3; fmul.pushz rf43.l, r5, r1.h" },
};
static void