broadcom/compiler: only handle accumulator classes if present
Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25450>
This commit is contained in:

committed by
Marge Bot

parent
b1548b18d3
commit
03594b3dca
@@ -53,6 +53,17 @@ get_class_bit_any(const struct v3d_device_info *devinfo)
|
||||
else
|
||||
return CLASS_BITS_PHYS;
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
filter_class_bits(const struct v3d_device_info *devinfo, uint8_t class_bits)
|
||||
{
|
||||
if (!devinfo->has_accumulators) {
|
||||
assert(class_bits & CLASS_BITS_PHYS);
|
||||
class_bits = CLASS_BITS_PHYS;
|
||||
}
|
||||
return class_bits;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
temp_to_node(struct v3d_compile *c, uint32_t temp)
|
||||
{
|
||||
@@ -413,8 +424,10 @@ v3d_setup_spill_base(struct v3d_compile *c)
|
||||
*/
|
||||
if (c->spilling) {
|
||||
int temp_class = CLASS_BITS_PHYS;
|
||||
if (i != c->spill_base.index)
|
||||
if (c->devinfo->has_accumulators &&
|
||||
i != c->spill_base.index) {
|
||||
temp_class |= CLASS_BITS_ACC;
|
||||
}
|
||||
add_node(c, i, temp_class);
|
||||
}
|
||||
}
|
||||
@@ -473,14 +486,16 @@ v3d_emit_spill_tmua(struct v3d_compile *c,
|
||||
* temp will be used immediately so just like the uniform above we
|
||||
* can allow accumulators.
|
||||
*/
|
||||
int temp_class =
|
||||
filter_class_bits(c->devinfo, CLASS_BITS_PHYS | CLASS_BITS_ACC);
|
||||
if (!fill_dst) {
|
||||
struct qreg dst = vir_TMUWT(c);
|
||||
assert(dst.file == QFILE_TEMP);
|
||||
add_node(c, dst.index, CLASS_BITS_PHYS | CLASS_BITS_ACC);
|
||||
add_node(c, dst.index, temp_class);
|
||||
} else {
|
||||
*fill_dst = vir_LDTMU(c);
|
||||
assert(fill_dst->file == QFILE_TEMP);
|
||||
add_node(c, fill_dst->index, CLASS_BITS_PHYS | CLASS_BITS_ACC);
|
||||
add_node(c, fill_dst->index, temp_class);
|
||||
}
|
||||
|
||||
/* Temps across the thread switch we injected can't be assigned to
|
||||
@@ -662,8 +677,10 @@ v3d_spill_reg(struct v3d_compile *c, int *acc_nodes, int spill_temp)
|
||||
* instruction immediately after so we
|
||||
* can use ACC.
|
||||
*/
|
||||
add_node(c, temp.index, CLASS_BITS_PHYS |
|
||||
CLASS_BITS_ACC);
|
||||
int temp_class =
|
||||
filter_class_bits(c->devinfo, CLASS_BITS_PHYS |
|
||||
CLASS_BITS_ACC);
|
||||
add_node(c, temp.index, temp_class);
|
||||
} else {
|
||||
/* If we have a postponed spill, we
|
||||
* don't need a fill as the temp would
|
||||
@@ -941,6 +958,7 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
|
||||
compiler->reg_class_phys[threads] =
|
||||
ra_alloc_contig_reg_class(compiler->regs, 1);
|
||||
|
||||
/* Init physical regs */
|
||||
for (int i = phys_index;
|
||||
i < phys_index + (PHYS_COUNT >> threads); i++) {
|
||||
if (compiler->devinfo->has_accumulators)
|
||||
@@ -949,16 +967,15 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
|
||||
ra_class_add_reg(compiler->reg_class_any[threads], i);
|
||||
}
|
||||
|
||||
/* Init accumulator regs */
|
||||
if (compiler->devinfo->has_accumulators) {
|
||||
for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) {
|
||||
ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
|
||||
ra_class_add_reg(compiler->reg_class_any[threads], i);
|
||||
}
|
||||
}
|
||||
/* r5 can only store a single 32-bit value, so not much can
|
||||
* use it.
|
||||
*/
|
||||
if (compiler->devinfo->has_accumulators) {
|
||||
/* r5 can only store a single 32-bit value, so not much can
|
||||
* use it.
|
||||
*/
|
||||
ra_class_add_reg(compiler->reg_class_r5[threads],
|
||||
ACC_INDEX + 5);
|
||||
ra_class_add_reg(compiler->reg_class_any[threads],
|
||||
@@ -1081,21 +1098,23 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
|
||||
* because ldunif has usually a shorter lifespan, allowing for
|
||||
* more accumulator reuse and QPU merges.
|
||||
*/
|
||||
if (!inst->qpu.sig.ldunif) {
|
||||
uint8_t class_bits =
|
||||
get_temp_class_bits(c, inst->dst.index) &
|
||||
~CLASS_BITS_R5;
|
||||
set_temp_class_bits(c, inst->dst.index,
|
||||
class_bits);
|
||||
|
||||
} else {
|
||||
/* Until V3D 4.x, we could only load a uniform
|
||||
* to r5, so we'll need to spill if uniform
|
||||
* loads interfere with each other.
|
||||
*/
|
||||
if (c->devinfo->ver < 40) {
|
||||
if (c->devinfo->has_accumulators) {
|
||||
if (!inst->qpu.sig.ldunif) {
|
||||
uint8_t class_bits =
|
||||
get_temp_class_bits(c, inst->dst.index) &
|
||||
~CLASS_BITS_R5;
|
||||
set_temp_class_bits(c, inst->dst.index,
|
||||
CLASS_BITS_R5);
|
||||
class_bits);
|
||||
|
||||
} else {
|
||||
/* Until V3D 4.x, we could only load a uniform
|
||||
* to r5, so we'll need to spill if uniform
|
||||
* loads interfere with each other.
|
||||
*/
|
||||
if (c->devinfo->ver < 40) {
|
||||
set_temp_class_bits(c, inst->dst.index,
|
||||
CLASS_BITS_R5);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1152,8 +1171,10 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
c->thread_index--;
|
||||
}
|
||||
|
||||
c->g = ra_alloc_interference_graph(c->compiler->regs,
|
||||
c->num_temps + ARRAY_SIZE(acc_nodes));
|
||||
unsigned num_ra_nodes = c->num_temps;
|
||||
if (c->devinfo->has_accumulators)
|
||||
num_ra_nodes += ARRAY_SIZE(acc_nodes);
|
||||
c->g = ra_alloc_interference_graph(c->compiler->regs, num_ra_nodes);
|
||||
ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data);
|
||||
|
||||
/* Make some fixed nodes for the accumulators, which we will need to
|
||||
@@ -1162,8 +1183,8 @@ v3d_register_allocate(struct v3d_compile *c)
|
||||
* live in, but the classes take up a lot of memory to set up, so we
|
||||
* don't want to make too many.
|
||||
*/
|
||||
for (uint32_t i = 0; i < ACC_COUNT + c->num_temps; i++) {
|
||||
if (i < ACC_COUNT) {
|
||||
for (uint32_t i = 0; i < num_ra_nodes; i++) {
|
||||
if (c->devinfo->has_accumulators && i < ACC_COUNT) {
|
||||
acc_nodes[i] = i;
|
||||
ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i);
|
||||
c->nodes.info[i].priority = 0;
|
||||
|
Reference in New Issue
Block a user