i965/vs: Implement proper register allocation instead of 1:1 mapping.

Fixes vs-atan-* and several others.  This is not the real solution we
eventually want, which will pack floats, vec2s, and vec3s into vec4
registers, but this code should provide the framework for that.
This commit is contained in:
Eric Anholt
2011-08-16 15:28:53 -07:00
parent 8174945d33
commit f4db75547f
2 changed files with 155 additions and 1 deletions

View File

@@ -685,6 +685,23 @@ struct brw_context
uint32_t push_const_offset; /* Offset in the batchbuffer */ uint32_t push_const_offset; /* Offset in the batchbuffer */
int push_const_size; /* in 256-bit register increments */ int push_const_size; /* in 256-bit register increments */
/** @{ register allocator */
struct ra_regs *regs;
/**
* Array of the ra classes for the unaligned contiguous register
* block sizes used.
*/
int *classes;
/**
* Mapping for register-allocated objects in *regs to the first
* GRF for that object.
*/
uint8_t *ra_reg_to_grf;
/** @} */
} vs; } vs;
struct { struct {

View File

@@ -21,6 +21,11 @@
* IN THE SOFTWARE. * IN THE SOFTWARE.
*/ */
extern "C" {
#include "main/macros.h"
#include "program/register_allocate.h"
} /* extern "C" */
#include "brw_vec4.h" #include "brw_vec4.h"
#include "../glsl/ir_print_visitor.h" #include "../glsl/ir_print_visitor.h"
@@ -88,10 +93,142 @@ vec4_visitor::reg_allocate_trivial()
} }
} }
static void
brw_alloc_reg_set_for_classes(struct brw_context *brw,
int *class_sizes,
int class_count,
int base_reg_count)
{
/* Compute the total number of registers across all classes. */
int ra_reg_count = 0;
for (int i = 0; i < class_count; i++) {
ra_reg_count += base_reg_count - (class_sizes[i] - 1);
}
ralloc_free(brw->vs.ra_reg_to_grf);
brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
ralloc_free(brw->vs.regs);
brw->vs.regs = ra_alloc_reg_set(ra_reg_count);
ralloc_free(brw->vs.classes);
brw->vs.classes = ralloc_array(brw, int, class_count + 1);
/* Now, add the registers to their classes, and add the conflicts
* between them and the base GRF registers (and also each other).
*/
int reg = 0;
for (int i = 0; i < class_count; i++) {
int class_reg_count = base_reg_count - (class_sizes[i] - 1);
brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs);
for (int j = 0; j < class_reg_count; j++) {
ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg);
brw->vs.ra_reg_to_grf[reg] = j;
for (int base_reg = j;
base_reg < j + class_sizes[i];
base_reg++) {
ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg);
}
reg++;
}
}
assert(reg == ra_reg_count);
ra_set_finalize(brw->vs.regs);
}
void void
vec4_visitor::reg_allocate() vec4_visitor::reg_allocate()
{ {
reg_allocate_trivial(); int hw_reg_mapping[virtual_grf_count];
int first_assigned_grf = this->first_non_payload_grf;
int base_reg_count = BRW_MAX_GRF - first_assigned_grf;
int class_sizes[base_reg_count];
int class_count = 0;
/* Using the trivial allocator can be useful in debugging undefined
* register access as a result of broken optimization passes.
*/
if (0) {
reg_allocate_trivial();
return;
}
calculate_live_intervals();
/* Set up the register classes.
*
* The base registers store a vec4. However, we'll need larger
* storage for arrays, structures, and matrices, which will be sets
* of contiguous registers.
*/
class_sizes[class_count++] = 1;
for (int r = 0; r < virtual_grf_count; r++) {
int i;
for (i = 0; i < class_count; i++) {
if (class_sizes[i] == this->virtual_grf_sizes[r])
break;
}
if (i == class_count) {
if (this->virtual_grf_sizes[r] >= base_reg_count) {
fail("Object too large to register allocate.\n");
}
class_sizes[class_count++] = this->virtual_grf_sizes[r];
}
}
brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count);
struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs,
virtual_grf_count);
for (int i = 0; i < virtual_grf_count; i++) {
for (int c = 0; c < class_count; c++) {
if (class_sizes[c] == this->virtual_grf_sizes[i]) {
ra_set_node_class(g, i, brw->vs.classes[c]);
break;
}
}
for (int j = 0; j < i; j++) {
if (virtual_grf_interferes(i, j)) {
ra_add_node_interference(g, i, j);
}
}
}
if (!ra_allocate_no_spills(g)) {
ralloc_free(g);
fail("No register spilling support yet\n");
}
/* Get the chosen virtual registers for each node, and map virtual
* regs in the register classes back down to real hardware reg
* numbers.
*/
prog_data->total_grf = first_assigned_grf;
for (int i = 0; i < virtual_grf_count; i++) {
int reg = ra_get_node_reg(g, i);
hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg];
prog_data->total_grf = MAX2(prog_data->total_grf, hw_reg_mapping[i] + 1);
}
foreach_list(node, &this->instructions) {
vec4_instruction *inst = (vec4_instruction *)node;
assign(hw_reg_mapping, &inst->dst);
assign(hw_reg_mapping, &inst->src[0]);
assign(hw_reg_mapping, &inst->src[1]);
assign(hw_reg_mapping, &inst->src[2]);
}
ralloc_free(g);
} }
} /* namespace brw */ } /* namespace brw */