broadcom/vc5: Add support for V3Dv4 signal bits.
The WRTMUC replaces the implicit uniform loads in the first two texture instructions. LDVPM disappears in favor of an ALU op. LDVARY, LDTMU, LDTLB, and LDUNIF*RF now write to arbitrary registers, which required passing the devinfo through to a few more functions.
This commit is contained in:
@@ -78,6 +78,7 @@ struct schedule_node_child {
|
||||
enum direction { F, R };
|
||||
|
||||
struct schedule_state {
|
||||
const struct v3d_device_info *devinfo;
|
||||
struct schedule_node *last_r[6];
|
||||
struct schedule_node *last_rf[64];
|
||||
struct schedule_node *last_sf;
|
||||
@@ -265,6 +266,7 @@ process_uf_deps(struct schedule_state *state, struct schedule_node *n,
|
||||
static void
|
||||
calculate_deps(struct schedule_state *state, struct schedule_node *n)
|
||||
{
|
||||
const struct v3d_device_info *devinfo = state->devinfo;
|
||||
struct qinst *qinst = n->inst;
|
||||
struct v3d_qpu_instr *inst = &qinst->qpu;
|
||||
|
||||
@@ -356,12 +358,16 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
|
||||
process_waddr_deps(state, n, inst->alu.mul.waddr,
|
||||
inst->alu.mul.magic_write);
|
||||
}
|
||||
if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
|
||||
process_waddr_deps(state, n, inst->sig_addr,
|
||||
inst->sig_magic);
|
||||
}
|
||||
|
||||
if (v3d_qpu_writes_r3(inst))
|
||||
if (v3d_qpu_writes_r3(devinfo, inst))
|
||||
add_write_dep(state, &state->last_r[3], n);
|
||||
if (v3d_qpu_writes_r4(inst))
|
||||
if (v3d_qpu_writes_r4(devinfo, inst))
|
||||
add_write_dep(state, &state->last_r[4], n);
|
||||
if (v3d_qpu_writes_r5(inst))
|
||||
if (v3d_qpu_writes_r5(devinfo, inst))
|
||||
add_write_dep(state, &state->last_r[5], n);
|
||||
|
||||
if (inst->sig.thrsw) {
|
||||
@@ -410,6 +416,7 @@ calculate_forward_deps(struct v3d_compile *c, struct list_head *schedule_list)
|
||||
struct schedule_state state;
|
||||
|
||||
memset(&state, 0, sizeof(state));
|
||||
state.devinfo = c->devinfo;
|
||||
state.dir = F;
|
||||
|
||||
list_for_each_entry(struct schedule_node, node, schedule_list, link)
|
||||
@@ -423,6 +430,7 @@ calculate_reverse_deps(struct v3d_compile *c, struct list_head *schedule_list)
|
||||
struct schedule_state state;
|
||||
|
||||
memset(&state, 0, sizeof(state));
|
||||
state.devinfo = c->devinfo;
|
||||
state.dir = R;
|
||||
|
||||
for (node = schedule_list->prev; schedule_list != node; node = node->prev) {
|
||||
@@ -514,7 +522,8 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard,
|
||||
}
|
||||
|
||||
static bool
|
||||
writes_too_soon_after_write(struct choose_scoreboard *scoreboard,
|
||||
writes_too_soon_after_write(const struct v3d_device_info *devinfo,
|
||||
struct choose_scoreboard *scoreboard,
|
||||
struct qinst *qinst)
|
||||
{
|
||||
const struct v3d_qpu_instr *inst = &qinst->qpu;
|
||||
@@ -524,7 +533,7 @@ writes_too_soon_after_write(struct choose_scoreboard *scoreboard,
|
||||
* occur if a dead SFU computation makes it to scheduling.
|
||||
*/
|
||||
if (scoreboard->tick - scoreboard->last_sfu_write_tick < 2 &&
|
||||
v3d_qpu_writes_r4(inst))
|
||||
v3d_qpu_writes_r4(devinfo, inst))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -605,7 +614,8 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
|
||||
return (inst->sig.ldvpm ||
|
||||
inst->sig.ldtmu ||
|
||||
inst->sig.ldtlb ||
|
||||
inst->sig.ldtlbu);
|
||||
inst->sig.ldtlbu ||
|
||||
inst->sig.wrtmuc);
|
||||
}
|
||||
|
||||
static bool
|
||||
@@ -619,7 +629,11 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Can't do more than one peripheral access in an instruction. */
|
||||
/* Can't do more than one peripheral access in an instruction.
|
||||
*
|
||||
* XXX: V3D 4.1 allows TMU read along with a VPM read or write, and
|
||||
* WRTMUC with a TMU magic register write (other than tmuc).
|
||||
*/
|
||||
if (qpu_accesses_peripheral(a) && qpu_accesses_peripheral(b))
|
||||
return false;
|
||||
|
||||
@@ -663,6 +677,9 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
|
||||
|
||||
merge.sig.thrsw |= b->sig.thrsw;
|
||||
merge.sig.ldunif |= b->sig.ldunif;
|
||||
merge.sig.ldunifrf |= b->sig.ldunifrf;
|
||||
merge.sig.ldunifa |= b->sig.ldunifa;
|
||||
merge.sig.ldunifarf |= b->sig.ldunifarf;
|
||||
merge.sig.ldtmu |= b->sig.ldtmu;
|
||||
merge.sig.ldvary |= b->sig.ldvary;
|
||||
merge.sig.ldvpm |= b->sig.ldvpm;
|
||||
@@ -673,6 +690,12 @@ qpu_merge_inst(const struct v3d_device_info *devinfo,
|
||||
merge.sig.rotate |= b->sig.rotate;
|
||||
merge.sig.wrtmuc |= b->sig.wrtmuc;
|
||||
|
||||
if (v3d_qpu_sig_writes_address(devinfo, &a->sig) &&
|
||||
v3d_qpu_sig_writes_address(devinfo, &b->sig))
|
||||
return false;
|
||||
merge.sig_addr |= b->sig_addr;
|
||||
merge.sig_magic |= b->sig_magic;
|
||||
|
||||
uint64_t packed;
|
||||
bool ok = v3d_qpu_instr_pack(devinfo, &merge, &packed);
|
||||
|
||||
@@ -719,7 +742,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo,
|
||||
if (reads_too_soon_after_write(scoreboard, n->inst))
|
||||
continue;
|
||||
|
||||
if (writes_too_soon_after_write(scoreboard, n->inst))
|
||||
if (writes_too_soon_after_write(devinfo, scoreboard, n->inst))
|
||||
continue;
|
||||
|
||||
/* "A scoreboard wait must not occur in the first two
|
||||
@@ -735,7 +758,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo,
|
||||
* otherwise get scheduled so ldunif and ldvary try to update
|
||||
* r5 in the same tick.
|
||||
*/
|
||||
if (inst->sig.ldunif &&
|
||||
if ((inst->sig.ldunif || inst->sig.ldunifa) &&
|
||||
scoreboard->tick == scoreboard->last_ldvary_tick + 1) {
|
||||
continue;
|
||||
}
|
||||
|
@@ -85,6 +85,7 @@ qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
|
||||
static void
|
||||
qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
||||
{
|
||||
const struct v3d_device_info *devinfo = state->c->devinfo;
|
||||
const struct v3d_qpu_instr *inst = &qinst->qpu;
|
||||
|
||||
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
|
||||
@@ -94,7 +95,8 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
||||
* r5 one instruction later, which is illegal to have
|
||||
* together.
|
||||
*/
|
||||
if (state->last && state->last->sig.ldvary && inst->sig.ldunif) {
|
||||
if (state->last && state->last->sig.ldvary &&
|
||||
(inst->sig.ldunif || inst->sig.ldunifa)) {
|
||||
fail_instr(state, "LDUNIF after a LDVARY");
|
||||
}
|
||||
|
||||
@@ -143,7 +145,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
||||
if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
|
||||
fail_instr(state, "R4 read too soon after SFU");
|
||||
|
||||
if (v3d_qpu_writes_r4(inst))
|
||||
if (v3d_qpu_writes_r4(devinfo, inst))
|
||||
fail_instr(state, "R4 write too soon after SFU");
|
||||
|
||||
if (sfu_writes)
|
||||
|
@@ -650,8 +650,8 @@ bool vir_is_add(struct qinst *inst);
|
||||
bool vir_is_mul(struct qinst *inst);
|
||||
bool vir_is_float_input(struct qinst *inst);
|
||||
bool vir_depends_on_flags(struct qinst *inst);
|
||||
bool vir_writes_r3(struct qinst *inst);
|
||||
bool vir_writes_r4(struct qinst *inst);
|
||||
bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
|
||||
bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst);
|
||||
struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
|
||||
uint8_t vir_channels_written(struct qinst *inst);
|
||||
|
||||
|
@@ -21,6 +21,7 @@
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "broadcom/common/v3d_device_info.h"
|
||||
#include "v3d_compiler.h"
|
||||
|
||||
int
|
||||
@@ -198,7 +199,7 @@ vir_depends_on_flags(struct qinst *inst)
|
||||
}
|
||||
|
||||
bool
|
||||
vir_writes_r3(struct qinst *inst)
|
||||
vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst)
|
||||
{
|
||||
for (int i = 0; i < vir_get_nsrc(inst); i++) {
|
||||
switch (inst->src[i].file) {
|
||||
@@ -210,11 +211,18 @@ vir_writes_r3(struct qinst *inst)
|
||||
}
|
||||
}
|
||||
|
||||
if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
|
||||
inst->qpu.sig.ldtlb ||
|
||||
inst->qpu.sig.ldtlbu ||
|
||||
inst->qpu.sig.ldvpm)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
vir_writes_r4(struct qinst *inst)
|
||||
vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst)
|
||||
{
|
||||
switch (inst->dst.file) {
|
||||
case QFILE_MAGIC:
|
||||
@@ -231,7 +239,7 @@ vir_writes_r4(struct qinst *inst)
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->qpu.sig.ldtmu)
|
||||
if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@@ -21,6 +21,7 @@
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "broadcom/common/v3d_device_info.h"
|
||||
#include "v3d_compiler.h"
|
||||
|
||||
static void
|
||||
@@ -145,6 +146,24 @@ vir_print_reg(struct v3d_compile *c, struct qreg reg)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vir_dump_sig_addr(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *instr)
|
||||
{
|
||||
if (devinfo->ver < 41)
|
||||
return;
|
||||
|
||||
if (!instr->sig_magic)
|
||||
fprintf(stderr, ".rf%d", instr->sig_addr);
|
||||
else {
|
||||
const char *name = v3d_qpu_magic_waddr_name(instr->sig_addr);
|
||||
if (name)
|
||||
fprintf(stderr, ".%s", name);
|
||||
else
|
||||
fprintf(stderr, ".UNKNOWN%d", instr->sig_addr);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vir_dump_sig(struct v3d_compile *c, struct qinst *inst)
|
||||
{
|
||||
@@ -152,14 +171,36 @@ vir_dump_sig(struct v3d_compile *c, struct qinst *inst)
|
||||
|
||||
if (sig->thrsw)
|
||||
fprintf(stderr, "; thrsw");
|
||||
if (sig->ldvary)
|
||||
if (sig->ldvary) {
|
||||
fprintf(stderr, "; ldvary");
|
||||
vir_dump_sig_addr(c->devinfo, &inst->qpu);
|
||||
}
|
||||
if (sig->ldvpm)
|
||||
fprintf(stderr, "; ldvpm");
|
||||
if (sig->ldtmu)
|
||||
if (sig->ldtmu) {
|
||||
fprintf(stderr, "; ldtmu");
|
||||
vir_dump_sig_addr(c->devinfo, &inst->qpu);
|
||||
}
|
||||
if (sig->ldtlb) {
|
||||
fprintf(stderr, "; ldtlb");
|
||||
vir_dump_sig_addr(c->devinfo, &inst->qpu);
|
||||
}
|
||||
if (sig->ldtlbu) {
|
||||
fprintf(stderr, "; ldtlbu");
|
||||
vir_dump_sig_addr(c->devinfo, &inst->qpu);
|
||||
}
|
||||
if (sig->ldunif)
|
||||
fprintf(stderr, "; ldunif");
|
||||
if (sig->ldunifrf) {
|
||||
fprintf(stderr, "; ldunifrf");
|
||||
vir_dump_sig_addr(c->devinfo, &inst->qpu);
|
||||
}
|
||||
if (sig->ldunifa)
|
||||
fprintf(stderr, "; ldunifa");
|
||||
if (sig->ldunifarf) {
|
||||
fprintf(stderr, "; ldunifarf");
|
||||
vir_dump_sig_addr(c->devinfo, &inst->qpu);
|
||||
}
|
||||
if (sig->wrtmuc)
|
||||
fprintf(stderr, "; wrtmuc");
|
||||
}
|
||||
|
@@ -139,7 +139,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
* result to a temp), nothing else can be stored in r3/r4 across
|
||||
* it.
|
||||
*/
|
||||
if (vir_writes_r3(inst)) {
|
||||
if (vir_writes_r3(c->devinfo, inst)) {
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip &&
|
||||
c->temp_end[i] > ip) {
|
||||
@@ -149,7 +149,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
}
|
||||
}
|
||||
}
|
||||
if (vir_writes_r4(inst)) {
|
||||
if (vir_writes_r4(c->devinfo, inst)) {
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip &&
|
||||
c->temp_end[i] > ip) {
|
||||
|
@@ -264,7 +264,14 @@ v3d_generate_code_block(struct v3d_compile *c,
|
||||
}
|
||||
|
||||
if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
|
||||
if (v3d_qpu_sig_writes_address(c->devinfo,
|
||||
&qinst->qpu.sig)) {
|
||||
assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
|
||||
assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
|
||||
|
||||
qinst->qpu.sig_addr = dst.index;
|
||||
qinst->qpu.sig_magic = dst.magic;
|
||||
} else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {
|
||||
assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
|
||||
if (nsrc >= 1) {
|
||||
set_src(&qinst->qpu,
|
||||
|
@@ -91,7 +91,8 @@ v3d_qpu_disasm_add(struct disasm_state *disasm,
|
||||
int num_src = v3d_qpu_add_op_num_src(instr->alu.add.op);
|
||||
|
||||
append(disasm, "%s", v3d_qpu_add_op_name(instr->alu.add.op));
|
||||
append(disasm, "%s", v3d_qpu_cond_name(instr->flags.ac));
|
||||
if (!v3d_qpu_sig_writes_address(disasm->devinfo, &instr->sig))
|
||||
append(disasm, "%s", v3d_qpu_cond_name(instr->flags.ac));
|
||||
append(disasm, "%s", v3d_qpu_pf_name(instr->flags.apf));
|
||||
append(disasm, "%s", v3d_qpu_uf_name(instr->flags.auf));
|
||||
|
||||
@@ -130,7 +131,8 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm,
|
||||
append(disasm, "; ");
|
||||
|
||||
append(disasm, "%s", v3d_qpu_mul_op_name(instr->alu.mul.op));
|
||||
append(disasm, "%s", v3d_qpu_cond_name(instr->flags.mc));
|
||||
if (!v3d_qpu_sig_writes_address(disasm->devinfo, &instr->sig))
|
||||
append(disasm, "%s", v3d_qpu_cond_name(instr->flags.mc));
|
||||
append(disasm, "%s", v3d_qpu_pf_name(instr->flags.mpf));
|
||||
append(disasm, "%s", v3d_qpu_uf_name(instr->flags.muf));
|
||||
|
||||
@@ -161,6 +163,24 @@ v3d_qpu_disasm_mul(struct disasm_state *disasm,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
v3d_qpu_disasm_sig_addr(struct disasm_state *disasm,
|
||||
const struct v3d_qpu_instr *instr)
|
||||
{
|
||||
if (disasm->devinfo->ver < 41)
|
||||
return;
|
||||
|
||||
if (!instr->sig_magic)
|
||||
append(disasm, ".rf%d", instr->sig_addr);
|
||||
else {
|
||||
const char *name = v3d_qpu_magic_waddr_name(instr->sig_addr);
|
||||
if (name)
|
||||
append(disasm, ".%s", name);
|
||||
else
|
||||
append(disasm, ".UNKNOWN%d", instr->sig_addr);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
v3d_qpu_disasm_sig(struct disasm_state *disasm,
|
||||
const struct v3d_qpu_instr *instr)
|
||||
@@ -172,6 +192,9 @@ v3d_qpu_disasm_sig(struct disasm_state *disasm,
|
||||
!sig->ldvpm &&
|
||||
!sig->ldtmu &&
|
||||
!sig->ldunif &&
|
||||
!sig->ldunifrf &&
|
||||
!sig->ldunifa &&
|
||||
!sig->ldunifarf &&
|
||||
!sig->wrtmuc) {
|
||||
return;
|
||||
}
|
||||
@@ -180,14 +203,36 @@ v3d_qpu_disasm_sig(struct disasm_state *disasm,
|
||||
|
||||
if (sig->thrsw)
|
||||
append(disasm, "; thrsw");
|
||||
if (sig->ldvary)
|
||||
if (sig->ldvary) {
|
||||
append(disasm, "; ldvary");
|
||||
v3d_qpu_disasm_sig_addr(disasm, instr);
|
||||
}
|
||||
if (sig->ldvpm)
|
||||
append(disasm, "; ldvpm");
|
||||
if (sig->ldtmu)
|
||||
if (sig->ldtmu) {
|
||||
append(disasm, "; ldtmu");
|
||||
v3d_qpu_disasm_sig_addr(disasm, instr);
|
||||
}
|
||||
if (sig->ldtlb) {
|
||||
append(disasm, "; ldtlb");
|
||||
v3d_qpu_disasm_sig_addr(disasm, instr);
|
||||
}
|
||||
if (sig->ldtlbu) {
|
||||
append(disasm, "; ldtlbu");
|
||||
v3d_qpu_disasm_sig_addr(disasm, instr);
|
||||
}
|
||||
if (sig->ldunif)
|
||||
append(disasm, "; ldunif");
|
||||
if (sig->ldunifrf) {
|
||||
append(disasm, "; ldunifrf");
|
||||
v3d_qpu_disasm_sig_addr(disasm, instr);
|
||||
}
|
||||
if (sig->ldunifa)
|
||||
append(disasm, "; ldunifa");
|
||||
if (sig->ldunifarf) {
|
||||
append(disasm, "; ldunifarf");
|
||||
v3d_qpu_disasm_sig_addr(disasm, instr);
|
||||
}
|
||||
if (sig->wrtmuc)
|
||||
append(disasm, "; wrtmuc");
|
||||
}
|
||||
|
@@ -23,6 +23,7 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "util/macros.h"
|
||||
#include "broadcom/common/v3d_device_info.h"
|
||||
#include "qpu_instr.h"
|
||||
|
||||
#ifndef QPU_MASK
|
||||
@@ -600,7 +601,8 @@ v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
|
||||
}
|
||||
|
||||
bool
|
||||
v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst)
|
||||
v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
if (inst->alu.add.magic_write &&
|
||||
@@ -614,11 +616,17 @@ v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst)
|
||||
}
|
||||
}
|
||||
|
||||
if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
|
||||
inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R3) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return inst->sig.ldvary || inst->sig.ldvpm;
|
||||
}
|
||||
|
||||
bool
|
||||
v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst)
|
||||
v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
if (inst->sig.ldtmu)
|
||||
return true;
|
||||
@@ -637,11 +645,17 @@ v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst)
|
||||
}
|
||||
}
|
||||
|
||||
if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
|
||||
inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst)
|
||||
v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
if (inst->alu.add.magic_write &&
|
||||
@@ -655,7 +669,12 @@ v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst)
|
||||
}
|
||||
}
|
||||
|
||||
return inst->sig.ldvary || inst->sig.ldunif;
|
||||
if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
|
||||
inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R5) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa;
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -669,3 +688,18 @@ v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
|
||||
(mul_nsrc > 0 && inst->alu.mul.a == mux) ||
|
||||
(mul_nsrc > 1 && inst->alu.mul.b == mux));
|
||||
}
|
||||
|
||||
bool
|
||||
v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_sig *sig)
|
||||
{
|
||||
if (devinfo->ver < 41)
|
||||
return false;
|
||||
|
||||
return (sig->ldunifrf ||
|
||||
sig->ldunifarf ||
|
||||
sig->ldvary ||
|
||||
sig->ldtmu ||
|
||||
sig->ldtlb ||
|
||||
sig->ldtlbu);
|
||||
}
|
||||
|
@@ -42,6 +42,9 @@ struct v3d_device_info;
|
||||
struct v3d_qpu_sig {
|
||||
bool thrsw:1;
|
||||
bool ldunif:1;
|
||||
bool ldunifa:1;
|
||||
bool ldunifrf:1;
|
||||
bool ldunifarf:1;
|
||||
bool ldtmu:1;
|
||||
bool ldvary:1;
|
||||
bool ldvpm:1;
|
||||
@@ -347,6 +350,8 @@ struct v3d_qpu_instr {
|
||||
enum v3d_qpu_instr_type type;
|
||||
|
||||
struct v3d_qpu_sig sig;
|
||||
uint8_t sig_addr;
|
||||
bool sig_magic; /* If the signal writes to a magic address */
|
||||
uint8_t raddr_a;
|
||||
uint8_t raddr_b;
|
||||
struct v3d_qpu_flags flags;
|
||||
@@ -403,9 +408,14 @@ bool v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_writes_r3(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_writes_r4(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_writes_r5(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
|
||||
bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;
|
||||
|
||||
#endif
|
||||
|
@@ -55,11 +55,7 @@
|
||||
|
||||
#define VC5_QPU_COND_SHIFT 46
|
||||
#define VC5_QPU_COND_MASK QPU_MASK(52, 46)
|
||||
|
||||
#define VC5_QPU_COND_IFA 0
|
||||
#define VC5_QPU_COND_IFB 1
|
||||
#define VC5_QPU_COND_IFNA 2
|
||||
#define VC5_QPU_COND_IFNB 3
|
||||
#define VC5_QPU_COND_SIG_MAGIC_ADDR (1 << 6)
|
||||
|
||||
#define VC5_QPU_MM QPU_MASK(45, 45)
|
||||
#define VC5_QPU_MA QPU_MASK(44, 44)
|
||||
@@ -113,6 +109,9 @@
|
||||
|
||||
#define THRSW .thrsw = true
|
||||
#define LDUNIF .ldunif = true
|
||||
#define LDUNIFRF .ldunifrf = true
|
||||
#define LDUNIFA .ldunifa = true
|
||||
#define LDUNIFARF .ldunifarf = true
|
||||
#define LDTMU .ldtmu = true
|
||||
#define LDVARY .ldvary = true
|
||||
#define LDVPM .ldvpm = true
|
||||
@@ -156,6 +155,67 @@ static const struct v3d_qpu_sig v33_sig_map[] = {
|
||||
[31] = { SMIMM, },
|
||||
};
|
||||
|
||||
static const struct v3d_qpu_sig v40_sig_map[] = {
|
||||
/* MISC R3 R4 R5 */
|
||||
[0] = { },
|
||||
[1] = { THRSW, },
|
||||
[2] = { LDUNIF },
|
||||
[3] = { THRSW, LDUNIF },
|
||||
[4] = { LDTMU, },
|
||||
[5] = { THRSW, LDTMU, },
|
||||
[6] = { LDTMU, LDUNIF },
|
||||
[7] = { THRSW, LDTMU, LDUNIF },
|
||||
[8] = { LDVARY, },
|
||||
[9] = { THRSW, LDVARY, },
|
||||
[10] = { LDVARY, LDUNIF },
|
||||
[11] = { THRSW, LDVARY, LDUNIF },
|
||||
/* 12-13 reserved */
|
||||
[14] = { SMIMM, LDVARY, },
|
||||
[15] = { SMIMM, },
|
||||
[16] = { LDTLB, },
|
||||
[17] = { LDTLBU, },
|
||||
[18] = { WRTMUC },
|
||||
[19] = { THRSW, WRTMUC },
|
||||
[20] = { LDVARY, WRTMUC },
|
||||
[21] = { THRSW, LDVARY, WRTMUC },
|
||||
[22] = { UCB, },
|
||||
[23] = { ROT, },
|
||||
/* 24-30 reserved */
|
||||
[31] = { SMIMM, LDTMU, },
|
||||
};
|
||||
|
||||
static const struct v3d_qpu_sig v41_sig_map[] = {
|
||||
/* MISC phys R5 */
|
||||
[0] = { },
|
||||
[1] = { THRSW, },
|
||||
[2] = { LDUNIF },
|
||||
[3] = { THRSW, LDUNIF },
|
||||
[4] = { LDTMU, },
|
||||
[5] = { THRSW, LDTMU, },
|
||||
[6] = { LDTMU, LDUNIF },
|
||||
[7] = { THRSW, LDTMU, LDUNIF },
|
||||
[8] = { LDVARY, },
|
||||
[9] = { THRSW, LDVARY, },
|
||||
[10] = { LDVARY, LDUNIF },
|
||||
[11] = { THRSW, LDVARY, LDUNIF },
|
||||
[12] = { LDUNIFRF },
|
||||
[13] = { THRSW, LDUNIFRF },
|
||||
[14] = { SMIMM, LDVARY, },
|
||||
[15] = { SMIMM, },
|
||||
[16] = { LDTLB, },
|
||||
[17] = { LDTLBU, },
|
||||
[18] = { WRTMUC },
|
||||
[19] = { THRSW, WRTMUC },
|
||||
[20] = { LDVARY, WRTMUC },
|
||||
[21] = { THRSW, LDVARY, WRTMUC },
|
||||
[22] = { UCB, },
|
||||
[23] = { ROT, },
|
||||
/* 24-30 reserved */
|
||||
[24] = { LDUNIFA},
|
||||
[25] = { LDUNIFARF },
|
||||
[31] = { SMIMM, LDTMU, },
|
||||
};
|
||||
|
||||
bool
|
||||
v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
|
||||
uint32_t packed_sig,
|
||||
@@ -164,7 +224,12 @@ v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
|
||||
if (packed_sig >= ARRAY_SIZE(v33_sig_map))
|
||||
return false;
|
||||
|
||||
*sig = v33_sig_map[packed_sig];
|
||||
if (devinfo->ver >= 41)
|
||||
*sig = v41_sig_map[packed_sig];
|
||||
else if (devinfo->ver == 40)
|
||||
*sig = v40_sig_map[packed_sig];
|
||||
else
|
||||
*sig = v33_sig_map[packed_sig];
|
||||
|
||||
/* Signals with zeroed unpacked contents after element 0 are reserved. */
|
||||
return (packed_sig == 0 ||
|
||||
@@ -178,7 +243,12 @@ v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
|
||||
{
|
||||
static const struct v3d_qpu_sig *map;
|
||||
|
||||
map = v33_sig_map;
|
||||
if (devinfo->ver >= 41)
|
||||
map = v41_sig_map;
|
||||
else if (devinfo->ver == 40)
|
||||
map = v40_sig_map;
|
||||
else
|
||||
map = v33_sig_map;
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
|
||||
if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
|
||||
@@ -1063,10 +1133,21 @@ v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
|
||||
&instr->sig))
|
||||
return false;
|
||||
|
||||
if (!v3d_qpu_flags_unpack(devinfo,
|
||||
QPU_GET_FIELD(packed_instr, VC5_QPU_COND),
|
||||
&instr->flags))
|
||||
return false;
|
||||
uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND);
|
||||
if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
|
||||
instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR;
|
||||
instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR;
|
||||
|
||||
instr->flags.ac = V3D_QPU_COND_NONE;
|
||||
instr->flags.mc = V3D_QPU_COND_NONE;
|
||||
instr->flags.apf = V3D_QPU_PF_NONE;
|
||||
instr->flags.mpf = V3D_QPU_PF_NONE;
|
||||
instr->flags.auf = V3D_QPU_UF_NONE;
|
||||
instr->flags.muf = V3D_QPU_UF_NONE;
|
||||
} else {
|
||||
if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
|
||||
return false;
|
||||
}
|
||||
|
||||
instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A);
|
||||
instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B);
|
||||
@@ -1164,9 +1245,28 @@ v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
|
||||
return false;
|
||||
|
||||
uint32_t flags;
|
||||
if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
|
||||
return false;
|
||||
if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
|
||||
if (instr->flags.ac != V3D_QPU_COND_NONE ||
|
||||
instr->flags.mc != V3D_QPU_COND_NONE ||
|
||||
instr->flags.apf != V3D_QPU_PF_NONE ||
|
||||
instr->flags.mpf != V3D_QPU_PF_NONE ||
|
||||
instr->flags.auf != V3D_QPU_UF_NONE ||
|
||||
instr->flags.muf != V3D_QPU_UF_NONE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
flags = instr->sig_addr;
|
||||
if (instr->sig_magic)
|
||||
flags |= VC5_QPU_COND_SIG_MAGIC_ADDR;
|
||||
} else {
|
||||
if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
|
||||
return false;
|
||||
}
|
||||
|
||||
*packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND);
|
||||
} else {
|
||||
if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@@ -63,6 +63,13 @@ static const struct {
|
||||
{ 33, 0x041618d57c453000ull, "shl.andn exp, r3, r2; add.ifb rf35, r1, r2" },
|
||||
{ 33, 0x7048e5da49272800ull, "fsub.ifa rf26, r2.l, rf32; fmul.pushc sin, r1.h, r1.abs; ldunif" },
|
||||
|
||||
/* v4.1 signals */
|
||||
{ 41, 0x1f010520cf60a000ull, "fcmp.andz rf32, r2.h, r1.h; vfmul rf20, r0.hh, r3; ldunifa" },
|
||||
{ 41, 0x932045e6c16ea000ull, "fcmp rf38, r2.abs, r5; fmul rf23.l, r3, r3.abs; ldunifarf.rf1" },
|
||||
{ 41, 0xd72f0434e43ae5c0ull, "fcmp rf52.h, rf23, r5.abs; fmul rf16.h, rf23, r1; ldunifarf.rf60" },
|
||||
{ 41, 0xdb3048eb9d533780ull, "fmax rf43.l, r3.h, rf30; fmul rf35.h, r4, r2.l; ldunifarf.r1" },
|
||||
{ 41, 0x733620471e6ce700ull, "faddnf rf7.l, rf28.h, r1.l; fmul r1, r3.h, r3.abs; ldunifarf.rsqrt2" },
|
||||
{ 41, 0x9c094adef634b000ull, "ffloor.ifb rf30.l, r3; fmul.pushz rf43.l, r5, r1.h" },
|
||||
};
|
||||
|
||||
static void
|
||||
|
Reference in New Issue
Block a user