broadcom/vc5: Add support for register spilling.

Our register spilling support is nice to have since vc4 couldn't at all,
but we're still very restricted due to needing to not spill during a TMU
operation, or during the last segment of the program (which would be nice
to spill a value of, when there's a long-lived value being passed through
with little modification from the start to the end).

We could do better by emitting unspills for the last-segment values just
before the last thrsw, since the last segment is probably not the maximum
interference area.

Fixes GTF uniform_buffer_object_arrays_of_all_valid_basic_types and 3
others.
This commit is contained in:
Eric Anholt
2018-03-13 15:13:00 -07:00
parent 271fc58ba1
commit facc3c6f58
7 changed files with 306 additions and 11 deletions

View File

@@ -248,6 +248,12 @@ enum quniform_contents {
QUNIFORM_ALPHA_REF,
QUNIFORM_SAMPLE_MASK,
/**
* Returns the the offset of the scratch buffer for register spilling.
*/
QUNIFORM_SPILL_OFFSET,
QUNIFORM_SPILL_SIZE_PER_THREAD,
};
struct v3d_varying_slot {
@@ -506,6 +512,20 @@ struct v3d_compile {
uint8_t vattr_sizes[V3D_MAX_VS_INPUTS];
uint32_t num_vpm_writes;
/* Size in bytes of registers that have been spilled. This is how much
* space needs to be available in the spill BO per thread per QPU.
*/
uint32_t spill_size;
/* Shader-db stats for register spilling. */
uint32_t spills, fills;
/**
* Register spilling's per-thread base address, shared between each
* spill/fill's addressing calculations.
*/
struct qreg spill_base;
/* Bit vector of which temps may be spilled */
BITSET_WORD *spillable;
/**
* Array of the VARYING_SLOT_* of all FS QFILE_VARY reads.
*
@@ -600,6 +620,7 @@ struct v3d_prog_data {
struct v3d_ubo_range *ubo_ranges;
uint32_t num_ubo_ranges;
uint32_t ubo_size;
uint32_t spill_size;
uint8_t num_inputs;
uint8_t threads;
@@ -697,6 +718,7 @@ void vir_set_unpack(struct qinst *inst, int src,
enum v3d_qpu_input_unpack unpack);
struct qreg vir_get_temp(struct v3d_compile *c);
void vir_emit_last_thrsw(struct v3d_compile *c);
void vir_calculate_live_intervals(struct v3d_compile *c);
bool vir_has_implicit_uniform(struct qinst *inst);
int vir_get_implicit_uniform_src(struct qinst *inst);
@@ -746,7 +768,7 @@ void v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
void v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers);
uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c);
void qpu_validate(struct v3d_compile *c);
struct qpu_reg *v3d_register_allocate(struct v3d_compile *c);
struct qpu_reg *v3d_register_allocate(struct v3d_compile *c, bool *spilled);
bool vir_init_reg_sets(struct v3d_compiler *compiler);
void vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf);