broadcom/compiler: only handle accumulator classes if present

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25450>
This commit is contained in:
Iago Toral Quiroga
2021-09-29 12:10:31 +02:00
committed by Marge Bot
parent b1548b18d3
commit 03594b3dca

View File

@@ -53,6 +53,17 @@ get_class_bit_any(const struct v3d_device_info *devinfo)
else else
return CLASS_BITS_PHYS; return CLASS_BITS_PHYS;
} }
static uint8_t
filter_class_bits(const struct v3d_device_info *devinfo, uint8_t class_bits)
{
if (!devinfo->has_accumulators) {
assert(class_bits & CLASS_BITS_PHYS);
class_bits = CLASS_BITS_PHYS;
}
return class_bits;
}
static inline uint32_t static inline uint32_t
temp_to_node(struct v3d_compile *c, uint32_t temp) temp_to_node(struct v3d_compile *c, uint32_t temp)
{ {
@@ -413,8 +424,10 @@ v3d_setup_spill_base(struct v3d_compile *c)
*/ */
if (c->spilling) { if (c->spilling) {
int temp_class = CLASS_BITS_PHYS; int temp_class = CLASS_BITS_PHYS;
if (i != c->spill_base.index) if (c->devinfo->has_accumulators &&
i != c->spill_base.index) {
temp_class |= CLASS_BITS_ACC; temp_class |= CLASS_BITS_ACC;
}
add_node(c, i, temp_class); add_node(c, i, temp_class);
} }
} }
@@ -473,14 +486,16 @@ v3d_emit_spill_tmua(struct v3d_compile *c,
* temp will be used immediately so just like the uniform above we * temp will be used immediately so just like the uniform above we
* can allow accumulators. * can allow accumulators.
*/ */
int temp_class =
filter_class_bits(c->devinfo, CLASS_BITS_PHYS | CLASS_BITS_ACC);
if (!fill_dst) { if (!fill_dst) {
struct qreg dst = vir_TMUWT(c); struct qreg dst = vir_TMUWT(c);
assert(dst.file == QFILE_TEMP); assert(dst.file == QFILE_TEMP);
add_node(c, dst.index, CLASS_BITS_PHYS | CLASS_BITS_ACC); add_node(c, dst.index, temp_class);
} else { } else {
*fill_dst = vir_LDTMU(c); *fill_dst = vir_LDTMU(c);
assert(fill_dst->file == QFILE_TEMP); assert(fill_dst->file == QFILE_TEMP);
add_node(c, fill_dst->index, CLASS_BITS_PHYS | CLASS_BITS_ACC); add_node(c, fill_dst->index, temp_class);
} }
/* Temps across the thread switch we injected can't be assigned to /* Temps across the thread switch we injected can't be assigned to
@@ -662,8 +677,10 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
* instruction immediately after so we * instruction immediately after so we
* can use ACC. * can use ACC.
*/ */
add_node(c, temp.index, CLASS_BITS_PHYS | int temp_class =
CLASS_BITS_ACC); filter_class_bits(c->devinfo, CLASS_BITS_PHYS |
CLASS_BITS_ACC);
add_node(c, temp.index, temp_class);
} else { } else {
/* If we have a postponed spill, we /* If we have a postponed spill, we
* don't need a fill as the temp would * don't need a fill as the temp would
@@ -941,6 +958,7 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
compiler->reg_class_phys[threads] = compiler->reg_class_phys[threads] =
ra_alloc_contig_reg_class(compiler->regs, 1); ra_alloc_contig_reg_class(compiler->regs, 1);
/* Init physical regs */
for (int i = phys_index; for (int i = phys_index;
i < phys_index + (PHYS_COUNT >> threads); i++) { i < phys_index + (PHYS_COUNT >> threads); i++) {
if (compiler->devinfo->has_accumulators) if (compiler->devinfo->has_accumulators)
@@ -949,16 +967,15 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
ra_class_add_reg(compiler->reg_class_any[threads], i); ra_class_add_reg(compiler->reg_class_any[threads], i);
} }
/* Init accumulator regs */
if (compiler->devinfo->has_accumulators) { if (compiler->devinfo->has_accumulators) {
for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) { for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) {
ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i); ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
ra_class_add_reg(compiler->reg_class_any[threads], i); ra_class_add_reg(compiler->reg_class_any[threads], i);
} }
} /* r5 can only store a single 32-bit value, so not much can
/* r5 can only store a single 32-bit value, so not much can * use it.
* use it. */
*/
if (compiler->devinfo->has_accumulators) {
ra_class_add_reg(compiler->reg_class_r5[threads], ra_class_add_reg(compiler->reg_class_r5[threads],
ACC_INDEX + 5); ACC_INDEX + 5);
ra_class_add_reg(compiler->reg_class_any[threads], ra_class_add_reg(compiler->reg_class_any[threads],
@@ -1081,21 +1098,23 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
* because ldunif has usually a shorter lifespan, allowing for * because ldunif has usually a shorter lifespan, allowing for
* more accumulator reuse and QPU merges. * more accumulator reuse and QPU merges.
*/ */
if (!inst->qpu.sig.ldunif) { if (c->devinfo->has_accumulators) {
uint8_t class_bits = if (!inst->qpu.sig.ldunif) {
get_temp_class_bits(c, inst->dst.index) & uint8_t class_bits =
~CLASS_BITS_R5; get_temp_class_bits(c, inst->dst.index) &
set_temp_class_bits(c, inst->dst.index, ~CLASS_BITS_R5;
class_bits);
} else {
/* Until V3D 4.x, we could only load a uniform
* to r5, so we'll need to spill if uniform
* loads interfere with each other.
*/
if (c->devinfo->ver < 40) {
set_temp_class_bits(c, inst->dst.index, set_temp_class_bits(c, inst->dst.index,
CLASS_BITS_R5); class_bits);
} else {
/* Until V3D 4.x, we could only load a uniform
* to r5, so we'll need to spill if uniform
* loads interfere with each other.
*/
if (c->devinfo->ver < 40) {
set_temp_class_bits(c, inst->dst.index,
CLASS_BITS_R5);
}
} }
} }
} }
@@ -1152,8 +1171,10 @@ v3d_register_allocate(struct v3d_compile *c)
c->thread_index--; c->thread_index--;
} }
c->g = ra_alloc_interference_graph(c->compiler->regs, unsigned num_ra_nodes = c->num_temps;
c->num_temps + ARRAY_SIZE(acc_nodes)); if (c->devinfo->has_accumulators)
num_ra_nodes += ARRAY_SIZE(acc_nodes);
c->g = ra_alloc_interference_graph(c->compiler->regs, num_ra_nodes);
ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data); ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data);
/* Make some fixed nodes for the accumulators, which we will need to /* Make some fixed nodes for the accumulators, which we will need to
@@ -1162,8 +1183,8 @@ v3d_register_allocate(struct v3d_compile *c)
* live in, but the classes take up a lot of memory to set up, so we * live in, but the classes take up a lot of memory to set up, so we
* don't want to make too many. * don't want to make too many.
*/ */
for (uint32_t i = 0; i < ACC_COUNT + c->num_temps; i++) { for (uint32_t i = 0; i < num_ra_nodes; i++) {
if (i < ACC_COUNT) { if (c->devinfo->has_accumulators && i < ACC_COUNT) {
acc_nodes[i] = i; acc_nodes[i] = i;
ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i); ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i);
c->nodes.info[i].priority = 0; c->nodes.info[i].priority = 0;