broadcom/compiler: pass a devinfo to check if an instruction writes to TMU
V3D 3.x has V3D_QPU_WADDR_TMU which in V3D 4.x is V3D_QPU_WADDR_UNIFA (which isn't a TMU write address). This change passes a devinfo to any functions that need to do these checks so we can account for the target V3D version correctly. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8980>
This commit is contained in:

committed by
Marge Bot

parent
449af48f42
commit
f85fcaa494
@@ -174,7 +174,7 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
|
|||||||
{
|
{
|
||||||
if (!magic) {
|
if (!magic) {
|
||||||
add_write_dep(state, &state->last_rf[waddr], n);
|
add_write_dep(state, &state->last_rf[waddr], n);
|
||||||
} else if (v3d_qpu_magic_waddr_is_tmu(waddr)) {
|
} else if (v3d_qpu_magic_waddr_is_tmu(state->devinfo, waddr)) {
|
||||||
/* XXX perf: For V3D 4.x, we could reorder TMU writes other
|
/* XXX perf: For V3D 4.x, we could reorder TMU writes other
|
||||||
* than the TMUS/TMUD/TMUA to improve scheduling flexibility.
|
* than the TMUS/TMUD/TMUA to improve scheduling flexibility.
|
||||||
*/
|
*/
|
||||||
@@ -568,7 +568,8 @@ mux_read_stalls(struct choose_scoreboard *scoreboard,
|
|||||||
#define MAX_SCHEDULE_PRIORITY 16
|
#define MAX_SCHEDULE_PRIORITY 16
|
||||||
|
|
||||||
static int
|
static int
|
||||||
get_instruction_priority(const struct v3d_qpu_instr *inst)
|
get_instruction_priority(const struct v3d_device_info *devinfo,
|
||||||
|
const struct v3d_qpu_instr *inst)
|
||||||
{
|
{
|
||||||
uint32_t baseline_score;
|
uint32_t baseline_score;
|
||||||
uint32_t next_score = 0;
|
uint32_t next_score = 0;
|
||||||
@@ -590,7 +591,7 @@ get_instruction_priority(const struct v3d_qpu_instr *inst)
|
|||||||
next_score++;
|
next_score++;
|
||||||
|
|
||||||
/* Schedule texture read setup early to hide their latency better. */
|
/* Schedule texture read setup early to hide their latency better. */
|
||||||
if (v3d_qpu_writes_tmu(inst))
|
if (v3d_qpu_writes_tmu(devinfo, inst))
|
||||||
return next_score;
|
return next_score;
|
||||||
next_score++;
|
next_score++;
|
||||||
|
|
||||||
@@ -601,9 +602,10 @@ get_instruction_priority(const struct v3d_qpu_instr *inst)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
qpu_magic_waddr_is_periph(enum v3d_qpu_waddr waddr)
|
qpu_magic_waddr_is_periph(const struct v3d_device_info *devinfo,
|
||||||
|
enum v3d_qpu_waddr waddr)
|
||||||
{
|
{
|
||||||
return (v3d_qpu_magic_waddr_is_tmu(waddr) ||
|
return (v3d_qpu_magic_waddr_is_tmu(devinfo, waddr) ||
|
||||||
v3d_qpu_magic_waddr_is_sfu(waddr) ||
|
v3d_qpu_magic_waddr_is_sfu(waddr) ||
|
||||||
v3d_qpu_magic_waddr_is_tlb(waddr) ||
|
v3d_qpu_magic_waddr_is_tlb(waddr) ||
|
||||||
v3d_qpu_magic_waddr_is_vpm(waddr) ||
|
v3d_qpu_magic_waddr_is_vpm(waddr) ||
|
||||||
@@ -611,7 +613,8 @@ qpu_magic_waddr_is_periph(enum v3d_qpu_waddr waddr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
|
qpu_accesses_peripheral(const struct v3d_device_info *devinfo,
|
||||||
|
const struct v3d_qpu_instr *inst)
|
||||||
{
|
{
|
||||||
if (v3d_qpu_uses_vpm(inst))
|
if (v3d_qpu_uses_vpm(inst))
|
||||||
return true;
|
return true;
|
||||||
@@ -621,7 +624,7 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
|
|||||||
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||||
if (inst->alu.add.op != V3D_QPU_A_NOP &&
|
if (inst->alu.add.op != V3D_QPU_A_NOP &&
|
||||||
inst->alu.add.magic_write &&
|
inst->alu.add.magic_write &&
|
||||||
qpu_magic_waddr_is_periph(inst->alu.add.waddr)) {
|
qpu_magic_waddr_is_periph(devinfo, inst->alu.add.waddr)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -630,7 +633,7 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
|
|||||||
|
|
||||||
if (inst->alu.mul.op != V3D_QPU_M_NOP &&
|
if (inst->alu.mul.op != V3D_QPU_M_NOP &&
|
||||||
inst->alu.mul.magic_write &&
|
inst->alu.mul.magic_write &&
|
||||||
qpu_magic_waddr_is_periph(inst->alu.mul.waddr)) {
|
qpu_magic_waddr_is_periph(devinfo, inst->alu.mul.waddr)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -647,8 +650,8 @@ qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo,
|
|||||||
const struct v3d_qpu_instr *a,
|
const struct v3d_qpu_instr *a,
|
||||||
const struct v3d_qpu_instr *b)
|
const struct v3d_qpu_instr *b)
|
||||||
{
|
{
|
||||||
const bool a_uses_peripheral = qpu_accesses_peripheral(a);
|
const bool a_uses_peripheral = qpu_accesses_peripheral(devinfo, a);
|
||||||
const bool b_uses_peripheral = qpu_accesses_peripheral(b);
|
const bool b_uses_peripheral = qpu_accesses_peripheral(devinfo, b);
|
||||||
|
|
||||||
/* We can always do one peripheral access per instruction. */
|
/* We can always do one peripheral access per instruction. */
|
||||||
if (!a_uses_peripheral || !b_uses_peripheral)
|
if (!a_uses_peripheral || !b_uses_peripheral)
|
||||||
@@ -665,8 +668,8 @@ qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((a->sig.wrtmuc && v3d_qpu_writes_tmu_not_tmuc(b)) ||
|
if ((a->sig.wrtmuc && v3d_qpu_writes_tmu_not_tmuc(devinfo, b)) ||
|
||||||
(b->sig.wrtmuc && v3d_qpu_writes_tmu_not_tmuc(a))) {
|
(b->sig.wrtmuc && v3d_qpu_writes_tmu_not_tmuc(devinfo, a))) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -849,7 +852,7 @@ choose_instruction_to_schedule(const struct v3d_device_info *devinfo,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int prio = get_instruction_priority(inst);
|
int prio = get_instruction_priority(devinfo, inst);
|
||||||
|
|
||||||
if (mux_read_stalls(scoreboard, inst)) {
|
if (mux_read_stalls(scoreboard, inst)) {
|
||||||
/* Don't merge an instruction that stalls */
|
/* Don't merge an instruction that stalls */
|
||||||
@@ -910,7 +913,8 @@ update_scoreboard_for_sfu_stall_waddr(struct choose_scoreboard *scoreboard,
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
|
update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
|
||||||
const struct v3d_qpu_instr *inst)
|
const struct v3d_qpu_instr *inst,
|
||||||
|
const struct v3d_device_info *devinfo)
|
||||||
{
|
{
|
||||||
if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH)
|
if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH)
|
||||||
return;
|
return;
|
||||||
@@ -920,7 +924,8 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
|
|||||||
if (inst->alu.add.op != V3D_QPU_A_NOP) {
|
if (inst->alu.add.op != V3D_QPU_A_NOP) {
|
||||||
if (inst->alu.add.magic_write) {
|
if (inst->alu.add.magic_write) {
|
||||||
update_scoreboard_for_magic_waddr(scoreboard,
|
update_scoreboard_for_magic_waddr(scoreboard,
|
||||||
inst->alu.add.waddr);
|
inst->alu.add.waddr,
|
||||||
|
devinfo);
|
||||||
} else {
|
} else {
|
||||||
update_scoreboard_for_sfu_stall_waddr(scoreboard,
|
update_scoreboard_for_sfu_stall_waddr(scoreboard,
|
||||||
inst);
|
inst);
|
||||||
@@ -930,7 +935,8 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
|
|||||||
if (inst->alu.mul.op != V3D_QPU_M_NOP) {
|
if (inst->alu.mul.op != V3D_QPU_M_NOP) {
|
||||||
if (inst->alu.mul.magic_write) {
|
if (inst->alu.mul.magic_write) {
|
||||||
update_scoreboard_for_magic_waddr(scoreboard,
|
update_scoreboard_for_magic_waddr(scoreboard,
|
||||||
inst->alu.mul.waddr);
|
inst->alu.mul.waddr,
|
||||||
|
devinfo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -964,7 +970,8 @@ dump_state(const struct v3d_device_info *devinfo, struct dag *dag)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t magic_waddr_latency(enum v3d_qpu_waddr waddr,
|
static uint32_t magic_waddr_latency(const struct v3d_device_info *devinfo,
|
||||||
|
enum v3d_qpu_waddr waddr,
|
||||||
const struct v3d_qpu_instr *after)
|
const struct v3d_qpu_instr *after)
|
||||||
{
|
{
|
||||||
/* Apply some huge latency between texture fetch requests and getting
|
/* Apply some huge latency between texture fetch requests and getting
|
||||||
@@ -990,8 +997,10 @@ static uint32_t magic_waddr_latency(enum v3d_qpu_waddr waddr,
|
|||||||
*
|
*
|
||||||
* because we associate the first load_tmu0 with the *second* tmu0_s.
|
* because we associate the first load_tmu0 with the *second* tmu0_s.
|
||||||
*/
|
*/
|
||||||
if (v3d_qpu_magic_waddr_is_tmu(waddr) && v3d_qpu_waits_on_tmu(after))
|
if (v3d_qpu_magic_waddr_is_tmu(devinfo, waddr) &&
|
||||||
|
v3d_qpu_waits_on_tmu(after)) {
|
||||||
return 100;
|
return 100;
|
||||||
|
}
|
||||||
|
|
||||||
/* Assume that anything depending on us is consuming the SFU result. */
|
/* Assume that anything depending on us is consuming the SFU result. */
|
||||||
if (v3d_qpu_magic_waddr_is_sfu(waddr))
|
if (v3d_qpu_magic_waddr_is_sfu(waddr))
|
||||||
@@ -1001,7 +1010,8 @@ static uint32_t magic_waddr_latency(enum v3d_qpu_waddr waddr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t
|
static uint32_t
|
||||||
instruction_latency(struct schedule_node *before, struct schedule_node *after)
|
instruction_latency(const struct v3d_device_info *devinfo,
|
||||||
|
struct schedule_node *before, struct schedule_node *after)
|
||||||
{
|
{
|
||||||
const struct v3d_qpu_instr *before_inst = &before->inst->qpu;
|
const struct v3d_qpu_instr *before_inst = &before->inst->qpu;
|
||||||
const struct v3d_qpu_instr *after_inst = &after->inst->qpu;
|
const struct v3d_qpu_instr *after_inst = &after->inst->qpu;
|
||||||
@@ -1013,13 +1023,15 @@ instruction_latency(struct schedule_node *before, struct schedule_node *after)
|
|||||||
|
|
||||||
if (before_inst->alu.add.magic_write) {
|
if (before_inst->alu.add.magic_write) {
|
||||||
latency = MAX2(latency,
|
latency = MAX2(latency,
|
||||||
magic_waddr_latency(before_inst->alu.add.waddr,
|
magic_waddr_latency(devinfo,
|
||||||
|
before_inst->alu.add.waddr,
|
||||||
after_inst));
|
after_inst));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (before_inst->alu.mul.magic_write) {
|
if (before_inst->alu.mul.magic_write) {
|
||||||
latency = MAX2(latency,
|
latency = MAX2(latency,
|
||||||
magic_waddr_latency(before_inst->alu.mul.waddr,
|
magic_waddr_latency(devinfo,
|
||||||
|
before_inst->alu.mul.waddr,
|
||||||
after_inst));
|
after_inst));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1034,6 +1046,7 @@ static void
|
|||||||
compute_delay(struct dag_node *node, void *state)
|
compute_delay(struct dag_node *node, void *state)
|
||||||
{
|
{
|
||||||
struct schedule_node *n = (struct schedule_node *)node;
|
struct schedule_node *n = (struct schedule_node *)node;
|
||||||
|
struct v3d_compile *c = (struct v3d_compile *) state;
|
||||||
|
|
||||||
n->delay = 1;
|
n->delay = 1;
|
||||||
|
|
||||||
@@ -1042,7 +1055,8 @@ compute_delay(struct dag_node *node, void *state)
|
|||||||
(struct schedule_node *)edge->child;
|
(struct schedule_node *)edge->child;
|
||||||
|
|
||||||
n->delay = MAX2(n->delay, (child->delay +
|
n->delay = MAX2(n->delay, (child->delay +
|
||||||
instruction_latency(n, child)));
|
instruction_latency(c->devinfo, n,
|
||||||
|
child)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1061,7 +1075,8 @@ pre_remove_head(struct dag *dag, struct schedule_node *n)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
mark_instruction_scheduled(struct dag *dag,
|
mark_instruction_scheduled(const struct v3d_device_info *devinfo,
|
||||||
|
struct dag *dag,
|
||||||
uint32_t time,
|
uint32_t time,
|
||||||
struct schedule_node *node)
|
struct schedule_node *node)
|
||||||
{
|
{
|
||||||
@@ -1075,7 +1090,7 @@ mark_instruction_scheduled(struct dag *dag,
|
|||||||
if (!child)
|
if (!child)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
uint32_t latency = instruction_latency(node, child);
|
uint32_t latency = instruction_latency(devinfo, node, child);
|
||||||
|
|
||||||
child->unblocked_time = MAX2(child->unblocked_time,
|
child->unblocked_time = MAX2(child->unblocked_time,
|
||||||
time + latency);
|
time + latency);
|
||||||
@@ -1091,7 +1106,7 @@ insert_scheduled_instruction(struct v3d_compile *c,
|
|||||||
{
|
{
|
||||||
list_addtail(&inst->link, &block->instructions);
|
list_addtail(&inst->link, &block->instructions);
|
||||||
|
|
||||||
update_scoreboard_for_chosen(scoreboard, &inst->qpu);
|
update_scoreboard_for_chosen(scoreboard, &inst->qpu, c->devinfo);
|
||||||
c->qpu_inst_count++;
|
c->qpu_inst_count++;
|
||||||
scoreboard->tick++;
|
scoreboard->tick++;
|
||||||
}
|
}
|
||||||
@@ -1390,10 +1405,10 @@ schedule_instructions(struct v3d_compile *c,
|
|||||||
* be scheduled. Update the children's unblocked time for this
|
* be scheduled. Update the children's unblocked time for this
|
||||||
* DAG edge as we do so.
|
* DAG edge as we do so.
|
||||||
*/
|
*/
|
||||||
mark_instruction_scheduled(scoreboard->dag, time, chosen);
|
mark_instruction_scheduled(devinfo, scoreboard->dag, time, chosen);
|
||||||
list_for_each_entry(struct schedule_node, merge, &merged_list,
|
list_for_each_entry(struct schedule_node, merge, &merged_list,
|
||||||
link) {
|
link) {
|
||||||
mark_instruction_scheduled(scoreboard->dag, time, merge);
|
mark_instruction_scheduled(devinfo, scoreboard->dag, time, merge);
|
||||||
|
|
||||||
/* The merged VIR instruction doesn't get re-added to the
|
/* The merged VIR instruction doesn't get re-added to the
|
||||||
* block, so free it now.
|
* block, so free it now.
|
||||||
@@ -1456,7 +1471,7 @@ qpu_schedule_instructions_block(struct v3d_compile *c,
|
|||||||
calculate_forward_deps(c, scoreboard->dag, &setup_list);
|
calculate_forward_deps(c, scoreboard->dag, &setup_list);
|
||||||
calculate_reverse_deps(c, scoreboard->dag, &setup_list);
|
calculate_reverse_deps(c, scoreboard->dag, &setup_list);
|
||||||
|
|
||||||
dag_traverse_bottom_up(scoreboard->dag, compute_delay, NULL);
|
dag_traverse_bottom_up(scoreboard->dag, compute_delay, c);
|
||||||
|
|
||||||
uint32_t cycles = schedule_instructions(c, scoreboard, block,
|
uint32_t cycles = schedule_instructions(c, scoreboard, block,
|
||||||
orig_uniform_contents,
|
orig_uniform_contents,
|
||||||
|
@@ -145,8 +145,10 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
|||||||
|
|
||||||
if (inst->alu.add.op != V3D_QPU_A_NOP) {
|
if (inst->alu.add.op != V3D_QPU_A_NOP) {
|
||||||
if (inst->alu.add.magic_write) {
|
if (inst->alu.add.magic_write) {
|
||||||
if (v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr))
|
if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
|
||||||
|
inst->alu.add.waddr)) {
|
||||||
tmu_writes++;
|
tmu_writes++;
|
||||||
|
}
|
||||||
if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))
|
if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))
|
||||||
sfu_writes++;
|
sfu_writes++;
|
||||||
if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr))
|
if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr))
|
||||||
@@ -160,8 +162,10 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
|||||||
|
|
||||||
if (inst->alu.mul.op != V3D_QPU_M_NOP) {
|
if (inst->alu.mul.op != V3D_QPU_M_NOP) {
|
||||||
if (inst->alu.mul.magic_write) {
|
if (inst->alu.mul.magic_write) {
|
||||||
if (v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr))
|
if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
|
||||||
|
inst->alu.mul.waddr)) {
|
||||||
tmu_writes++;
|
tmu_writes++;
|
||||||
|
}
|
||||||
if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))
|
if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))
|
||||||
sfu_writes++;
|
sfu_writes++;
|
||||||
if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr))
|
if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr))
|
||||||
|
@@ -950,7 +950,7 @@ bool vir_has_side_effects(struct v3d_compile *c, struct qinst *inst);
|
|||||||
bool vir_get_add_op(struct qinst *inst, enum v3d_qpu_add_op *op);
|
bool vir_get_add_op(struct qinst *inst, enum v3d_qpu_add_op *op);
|
||||||
bool vir_get_mul_op(struct qinst *inst, enum v3d_qpu_mul_op *op);
|
bool vir_get_mul_op(struct qinst *inst, enum v3d_qpu_mul_op *op);
|
||||||
bool vir_is_raw_mov(struct qinst *inst);
|
bool vir_is_raw_mov(struct qinst *inst);
|
||||||
bool vir_is_tex(struct qinst *inst);
|
bool vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst);
|
||||||
bool vir_is_add(struct qinst *inst);
|
bool vir_is_add(struct qinst *inst);
|
||||||
bool vir_is_mul(struct qinst *inst);
|
bool vir_is_mul(struct qinst *inst);
|
||||||
bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
|
bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
|
||||||
|
@@ -130,10 +130,10 @@ vir_is_mul(struct qinst *inst)
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
vir_is_tex(struct qinst *inst)
|
vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst)
|
||||||
{
|
{
|
||||||
if (inst->dst.file == QFILE_MAGIC)
|
if (inst->dst.file == QFILE_MAGIC)
|
||||||
return v3d_qpu_magic_waddr_is_tmu(inst->dst.index);
|
return v3d_qpu_magic_waddr_is_tmu(devinfo, inst->dst.index);
|
||||||
|
|
||||||
if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
|
if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
|
||||||
inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) {
|
inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) {
|
||||||
|
@@ -34,15 +34,17 @@
|
|||||||
#define PHYS_COUNT 64
|
#define PHYS_COUNT 64
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
qinst_writes_tmu(struct qinst *inst)
|
qinst_writes_tmu(const struct v3d_device_info *devinfo,
|
||||||
|
struct qinst *inst)
|
||||||
{
|
{
|
||||||
return (inst->dst.file == QFILE_MAGIC &&
|
return (inst->dst.file == QFILE_MAGIC &&
|
||||||
v3d_qpu_magic_waddr_is_tmu(inst->dst.index)) ||
|
v3d_qpu_magic_waddr_is_tmu(devinfo, inst->dst.index)) ||
|
||||||
inst->qpu.sig.wrtmuc;
|
inst->qpu.sig.wrtmuc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
is_end_of_tmu_sequence(struct qinst *inst, struct qblock *block)
|
is_end_of_tmu_sequence(const struct v3d_device_info *devinfo,
|
||||||
|
struct qinst *inst, struct qblock *block)
|
||||||
{
|
{
|
||||||
if (!inst->qpu.sig.ldtmu &&
|
if (!inst->qpu.sig.ldtmu &&
|
||||||
!(inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
|
!(inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
|
||||||
@@ -58,7 +60,7 @@ is_end_of_tmu_sequence(struct qinst *inst, struct qblock *block)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (qinst_writes_tmu(scan_inst))
|
if (qinst_writes_tmu(devinfo, scan_inst))
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -149,10 +151,10 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g,
|
|||||||
* final LDTMU or TMUWT from that TMU setup. We
|
* final LDTMU or TMUWT from that TMU setup. We
|
||||||
* penalize spills during that time.
|
* penalize spills during that time.
|
||||||
*/
|
*/
|
||||||
if (is_end_of_tmu_sequence(inst, block))
|
if (is_end_of_tmu_sequence(c->devinfo, inst, block))
|
||||||
in_tmu_operation = false;
|
in_tmu_operation = false;
|
||||||
|
|
||||||
if (qinst_writes_tmu(inst))
|
if (qinst_writes_tmu(c->devinfo, inst))
|
||||||
in_tmu_operation = true;
|
in_tmu_operation = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -268,7 +270,7 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
|
|||||||
* move the fill up to not intrude in the middle of the TMU
|
* move the fill up to not intrude in the middle of the TMU
|
||||||
* sequence.
|
* sequence.
|
||||||
*/
|
*/
|
||||||
if (is_end_of_tmu_sequence(inst, block)) {
|
if (is_end_of_tmu_sequence(c->devinfo, inst, block)) {
|
||||||
if (postponed_spill) {
|
if (postponed_spill) {
|
||||||
v3d_emit_tmu_spill(c, postponed_spill,
|
v3d_emit_tmu_spill(c, postponed_spill,
|
||||||
inst, spill_offset);
|
inst, spill_offset);
|
||||||
@@ -278,8 +280,10 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
|
|||||||
postponed_spill = NULL;
|
postponed_spill = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!start_of_tmu_sequence && qinst_writes_tmu(inst))
|
if (!start_of_tmu_sequence &&
|
||||||
|
qinst_writes_tmu(c->devinfo, inst)) {
|
||||||
start_of_tmu_sequence = inst;
|
start_of_tmu_sequence = inst;
|
||||||
|
}
|
||||||
|
|
||||||
/* fills */
|
/* fills */
|
||||||
for (int i = 0; i < vir_get_nsrc(inst); i++) {
|
for (int i = 0; i < vir_get_nsrc(inst); i++) {
|
||||||
|
@@ -533,13 +533,20 @@ v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr)
|
v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info *devinfo,
|
||||||
|
enum v3d_qpu_waddr waddr)
|
||||||
{
|
{
|
||||||
/* XXX: WADDR_TMU changed to UNIFA on 4.x */
|
if (devinfo->ver >= 40) {
|
||||||
|
return ((waddr >= V3D_QPU_WADDR_TMUD &&
|
||||||
|
waddr <= V3D_QPU_WADDR_TMUAU) ||
|
||||||
|
(waddr >= V3D_QPU_WADDR_TMUC &&
|
||||||
|
waddr <= V3D_QPU_WADDR_TMUHSLOD));
|
||||||
|
} else {
|
||||||
return ((waddr >= V3D_QPU_WADDR_TMU &&
|
return ((waddr >= V3D_QPU_WADDR_TMU &&
|
||||||
waddr <= V3D_QPU_WADDR_TMUAU) ||
|
waddr <= V3D_QPU_WADDR_TMUAU) ||
|
||||||
(waddr >= V3D_QPU_WADDR_TMUC &&
|
(waddr >= V3D_QPU_WADDR_TMUC &&
|
||||||
waddr <= V3D_QPU_WADDR_TMUHSLOD));
|
waddr <= V3D_QPU_WADDR_TMUHSLOD));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
@@ -681,19 +688,21 @@ v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst)
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst)
|
v3d_qpu_writes_tmu(const struct v3d_device_info *devinfo,
|
||||||
|
const struct v3d_qpu_instr *inst)
|
||||||
{
|
{
|
||||||
return (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
|
return (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
|
||||||
((inst->alu.add.magic_write &&
|
((inst->alu.add.magic_write &&
|
||||||
v3d_qpu_magic_waddr_is_tmu(inst->alu.add.waddr)) ||
|
v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.add.waddr)) ||
|
||||||
(inst->alu.mul.magic_write &&
|
(inst->alu.mul.magic_write &&
|
||||||
v3d_qpu_magic_waddr_is_tmu(inst->alu.mul.waddr))));
|
v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.mul.waddr))));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
v3d_qpu_writes_tmu_not_tmuc(const struct v3d_qpu_instr *inst)
|
v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info *devinfo,
|
||||||
|
const struct v3d_qpu_instr *inst)
|
||||||
{
|
{
|
||||||
return v3d_qpu_writes_tmu(inst) &&
|
return v3d_qpu_writes_tmu(devinfo, inst) &&
|
||||||
(!inst->alu.add.magic_write ||
|
(!inst->alu.add.magic_write ||
|
||||||
inst->alu.add.waddr != V3D_QPU_WADDR_TMUC) &&
|
inst->alu.add.waddr != V3D_QPU_WADDR_TMUC) &&
|
||||||
(!inst->alu.mul.magic_write ||
|
(!inst->alu.mul.magic_write ||
|
||||||
|
@@ -442,7 +442,8 @@ v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
|
|||||||
struct v3d_qpu_instr *instr);
|
struct v3d_qpu_instr *instr);
|
||||||
|
|
||||||
bool v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
bool v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
||||||
bool v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
bool v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info *devinfo,
|
||||||
|
enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
||||||
bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
||||||
bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
||||||
bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
||||||
@@ -450,8 +451,10 @@ bool v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
|||||||
bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||||
bool v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
bool v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||||
bool v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
bool v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||||
bool v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
bool v3d_qpu_writes_tmu(const struct v3d_device_info *devinfo,
|
||||||
bool v3d_qpu_writes_tmu_not_tmuc(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||||
|
bool v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info *devinfo,
|
||||||
|
const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||||
bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
|
bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
|
||||||
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
|
const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
|
||||||
bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
|
bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
|
||||||
|
Reference in New Issue
Block a user