lima/ppir: rework liveness data structures to bitset

The liveness code in ppir can be a bottleneck in complicated shaders
with multiple spills, and the use of the mesa set data structure is one
of the main reasons it is expensive to run.
With some changes, it can be adapted to using bitsets which makes it run
substantially faster.
ppir liveness can't run with a regular bitset for registers since we
need to track inviditual component masks for non-ssa registers, but we
can switch to using a separate packed bit array just for the masks,
rather than a full blown hash set. This also makes operations such as
liveness propagation much more straightforward.

Signed-off-by: Erico Nunes <nunes.erico@gmail.com>
Reviewed-by: Vasily Khoruzhick <anarsoul@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9745>
This commit is contained in:
Erico Nunes
2021-03-17 01:47:20 +01:00
committed by Marge Bot
parent 2b83d99538
commit ab71c0ba44
3 changed files with 111 additions and 123 deletions

View File

@@ -28,53 +28,30 @@
* union between sets. */
static void
ppir_liveness_propagate(ppir_compiler *comp,
struct ppir_liveness *dest, struct ppir_liveness *src,
struct set *dest_set, struct set *src_set)
BITSET_WORD *dest_set, BITSET_WORD *src_set,
uint8_t *dest_mask, uint8_t *src_mask)
{
set_foreach(src_set, entry_src) {
const struct ppir_liveness *s = entry_src->key;
assert(s);
for (int i = 0; i < BITSET_WORDS(comp->reg_num); i++)
dest_set[i] |= src_set[i];
unsigned int regalloc_index = s->reg->regalloc_index;
dest[regalloc_index].reg = src[regalloc_index].reg;
dest[regalloc_index].mask |= src[regalloc_index].mask;
_mesa_set_add(dest_set, &dest[regalloc_index]);
}
for (int i = 0; i < reg_mask_size(comp->reg_num); i++)
dest_mask[i] |= src_mask[i];
}
/* Check whether two liveness sets are equal. */
static bool
ppir_liveness_set_equal(ppir_compiler *comp,
struct ppir_liveness *l1, struct ppir_liveness *l2,
struct set *set1, struct set *set2)
BITSET_WORD *set1, BITSET_WORD *set2,
uint8_t *mask1, uint8_t *mask2)
{
set_foreach(set1, entry1) {
const struct ppir_liveness *k1 = entry1->key;
unsigned int regalloc_index = k1->reg->regalloc_index;
struct set_entry *entry2 = _mesa_set_search(set2, &l2[regalloc_index]);
if (!entry2)
for (int i = 0; i < BITSET_WORDS(comp->reg_num); i++)
if (set1[i] != set2[i])
return false;
const struct ppir_liveness *k2 = entry2->key;
if (k1->mask != k2->mask)
return false;
}
set_foreach(set2, entry2) {
const struct ppir_liveness *k2 = entry2->key;
unsigned int regalloc_index = k2->reg->regalloc_index;
struct set_entry *entry1 = _mesa_set_search(set1, &l1[regalloc_index]);
if (!entry1)
for (int i = 0; i < reg_mask_size(comp->reg_num); i++)
if (mask1[i] != mask2[i])
return false;
const struct ppir_liveness *k1 = entry1->key;
if (k2->mask != k1->mask)
return false;
}
return true;
}
@@ -105,38 +82,36 @@ ppir_liveness_instr_srcs(ppir_compiler *comp, ppir_instr *instr)
if (!reg || reg->undef)
continue;
unsigned int index = reg->regalloc_index;
/* if some other op on this same instruction is writing,
* we just need to reserve a register for this particular
* instruction. */
if (src->node && src->node->instr == instr) {
instr->live_internal[reg->regalloc_index].reg = reg;
_mesa_set_add(instr->live_internal_set, &instr->live_internal[reg->regalloc_index]);
BITSET_SET(instr->live_internal, index);
continue;
}
struct set_entry *live = _mesa_set_search(instr->live_in_set,
&instr->live_in[reg->regalloc_index]);
bool live = BITSET_TEST(instr->live_set, index);
if (src->type == ppir_target_ssa) {
/* reg is read, needs to be live before instr */
if (live)
continue;
instr->live_in[reg->regalloc_index].reg = reg;
_mesa_set_add(instr->live_in_set, &instr->live_in[reg->regalloc_index]);
BITSET_SET(instr->live_set, index);
}
else {
unsigned int mask = ppir_src_get_mask(src);
uint8_t live_mask = get_reg_mask(instr->live_mask, index);
/* read reg is type register, need to check if this sets
* any additional bits in the current mask */
if (live && (instr->live_in[reg->regalloc_index].mask ==
(instr->live_in[reg->regalloc_index].mask | mask)))
if (live && (live_mask == (live_mask | mask)))
continue;
/* some new components */
instr->live_in[reg->regalloc_index].reg = reg;
instr->live_in[reg->regalloc_index].mask |= mask;
_mesa_set_add(instr->live_in_set, &instr->live_in[reg->regalloc_index]);
set_reg_mask(instr->live_mask, index, (live_mask | mask));
BITSET_SET(instr->live_set, index);
}
}
}
@@ -168,34 +143,33 @@ ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr)
if (!reg || reg->undef)
continue;
struct set_entry *live = _mesa_set_search(instr->live_in_set,
&instr->live_in[reg->regalloc_index]);
unsigned int index = reg->regalloc_index;
bool live = BITSET_TEST(instr->live_set, index);
/* If a register is written but wasn't read in a later instruction, it is
* either dead code or a bug. For now, assign an interference to it to
* ensure it doesn't get assigned a live register and overwrites it. */
if (!live) {
instr->live_internal[reg->regalloc_index].reg = reg;
_mesa_set_add(instr->live_internal_set, &instr->live_internal[reg->regalloc_index]);
BITSET_SET(instr->live_internal, index);
continue;
}
if (dest->type == ppir_target_ssa) {
/* reg is written and ssa, is not live before instr */
_mesa_set_remove_key(instr->live_in_set, &instr->live_in[reg->regalloc_index]);
BITSET_CLEAR(instr->live_set, index);
}
else {
unsigned int mask = dest->write_mask;
uint8_t live_mask = get_reg_mask(instr->live_mask, index);
/* written reg is type register, need to check if this clears
* the remaining mask to remove it from the live set */
if (instr->live_in[reg->regalloc_index].mask ==
(instr->live_in[reg->regalloc_index].mask & ~mask))
if (live_mask == (live_mask & ~mask))
continue;
instr->live_in[reg->regalloc_index].mask &= ~mask;
set_reg_mask(instr->live_mask, index, (live_mask & ~mask));
/* unset reg if all remaining bits were cleared */
if (!instr->live_in[reg->regalloc_index].mask) {
_mesa_set_remove_key(instr->live_in_set, &instr->live_in[reg->regalloc_index]);
if ((live_mask & ~mask) == 0) {
BITSET_CLEAR(instr->live_set, index);
}
}
}
@@ -206,6 +180,8 @@ ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr)
static bool
ppir_liveness_compute_live_sets(ppir_compiler *comp)
{
uint8_t temp_live_mask[reg_mask_size(comp->reg_num)];
BITSET_DECLARE(temp_live_set, comp->reg_num);
bool cont = false;
list_for_each_entry_rev(ppir_block, block, &comp->block_list, list) {
if (list_is_empty(&block->instr_list))
@@ -216,18 +192,17 @@ ppir_liveness_compute_live_sets(ppir_compiler *comp)
list_for_each_entry_rev(ppir_instr, instr, &block->instr_list, list) {
/* initial copy to check for changes */
struct ppir_liveness temp_live_in[comp->reg_num];
memset(temp_live_in, 0, sizeof(temp_live_in));
struct set *temp_live_in_set = _mesa_set_create(comp,
_mesa_hash_pointer,
_mesa_key_pointer_equal);
ppir_liveness_propagate(comp, temp_live_in, instr->live_in,
temp_live_in_set, instr->live_in_set);
memset(temp_live_mask, 0, sizeof(temp_live_mask));
memset(temp_live_set, 0, sizeof(temp_live_set));
ppir_liveness_propagate(comp,
temp_live_set, instr->live_set,
temp_live_mask, instr->live_mask);
/* inherit (or-) live variables from next instr or block */
if (instr == last) {
ppir_instr *next_instr;
/* inherit liveness from the first instruction in the next block */
/* inherit liveness from the first instruction in the next blocks */
for (int i = 0; i < 2; i++) {
ppir_block *succ = block->successors[i];
if (!succ)
@@ -243,22 +218,24 @@ ppir_liveness_compute_live_sets(ppir_compiler *comp)
next_instr = list_first_entry(&succ->instr_list, ppir_instr, list);
assert(next_instr);
ppir_liveness_propagate(comp, instr->live_in, next_instr->live_in,
instr->live_in_set, next_instr->live_in_set);
ppir_liveness_propagate(comp,
instr->live_set, next_instr->live_set,
instr->live_mask, next_instr->live_mask);
}
}
else {
ppir_instr *next_instr = LIST_ENTRY(ppir_instr, instr->list.next, list);
ppir_liveness_propagate(comp,
instr->live_in, next_instr->live_in,
instr->live_in_set, next_instr->live_in_set);
instr->live_set, next_instr->live_set,
instr->live_mask, next_instr->live_mask);
}
ppir_liveness_instr_dest(comp, instr);
ppir_liveness_instr_srcs(comp, instr);
cont |= !ppir_liveness_set_equal(comp, temp_live_in, instr->live_in,
temp_live_in_set, instr->live_in_set);
cont |= !ppir_liveness_set_equal(comp,
temp_live_set, instr->live_set,
temp_live_mask, instr->live_mask);
}
}
@@ -267,31 +244,36 @@ ppir_liveness_compute_live_sets(ppir_compiler *comp)
/*
* Liveness analysis is based on https://en.wikipedia.org/wiki/Live_variable_analysis
* This implementation calculates liveness before each
* instruction.
* This implementation calculates liveness for each instruction.
* The liveness set in this implementation is defined as the set of
* registers live before the instruction executes.
* Blocks/instructions/ops are iterated backwards so register reads are
* propagated up to the instruction that writes it.
*
* 1) Before computing liveness for each instruction, propagate liveness
* 1) Before computing liveness for an instruction, propagate liveness
* from the next instruction. If it is the last instruction in a
* block, propagate liveness from all possible next instructions
* 2) Calculate live_in for the each instruction. The initial live_in is
* a copy of the liveness from the next instructions so registers who
* aren't touched by this instruction are kept intact.
* block, propagate liveness from all possible next instructions in
* the successor blocks.
* 2) Calculate the live set for the instruction. The initial live set
* is a propagated set of the live set from the next instructions.
* - Registers which aren't touched by this instruction are kept
* intact.
* - If a register is written by this instruction, it no longer needs
* to be live before the instruction, so it is removed from live_in.
* to be live before the instruction, so it is removed from the live
* set of that instruction.
* - If a register is read by this instruction, it needs to be live
* before its execution, so add it to live_in.
* before its execution, so add it to its live set.
* - Non-ssa registers are a special case. For this, the algorithm
* keeps and updates the mask of live components following the same
* logic as above. The register is only removed from the live set
* when no live components are left.
* logic as above. The register is only removed from the live set of
* the instruction when no live components are left.
* - If a non-ssa register is written and read in the same
* instruction, it stays in live_in.
* - Another special case is a ssa register that is written by an
* early op in the instruction, and read by a later op. In this case,
* the algorithm adds it to the live_internal set so that the register
* allocator properly assigns an interference for it.
* instruction, it stays in the live set.
* - Another special case is when a register is only written and read
* within a single instruciton. In this case a register needs to be
* reserved but not propagated. The algorithm adds it to the
* live_internal set so that the register allocator properly assigns
* an interference for it.
* 3) The algorithm must run over the entire program until it converges,
* i.e. a full run happens without changes. This is because blocks
* are updated sequentially and updates in a block may need to be

View File

@@ -300,11 +300,6 @@ enum ppir_instr_slot {
PPIR_INSTR_SLOT_ALU_END = PPIR_INSTR_SLOT_ALU_COMBINE,
};
struct ppir_liveness {
ppir_reg *reg;
unsigned mask : 4;
};
typedef struct ppir_instr {
struct list_head list;
int index;
@@ -326,12 +321,11 @@ typedef struct ppir_instr {
int encode_size;
/* for liveness analysis */
struct ppir_liveness *live_in;
BITSET_WORD *live_set;
uint8_t *live_mask; /* mask for non-ssa registers */
/* live_internal is to mark registers only live within an
* instruction, without propagation */
struct ppir_liveness *live_internal;
struct set *live_in_set;
struct set *live_internal_set;
BITSET_WORD *live_internal;
} ppir_instr;
typedef struct ppir_block {
@@ -693,4 +687,26 @@ bool ppir_regalloc_prog(ppir_compiler *comp);
bool ppir_codegen_prog(ppir_compiler *comp);
void ppir_liveness_analysis(ppir_compiler *comp);
static inline unsigned int reg_mask_size(unsigned int num_reg)
{
return (num_reg + 1) / 2;
}
static inline uint8_t get_reg_mask(uint8_t *set, unsigned index)
{
unsigned int i = index / 2;
unsigned int shift = index % 2 ? 4 : 0;
uint8_t mask = 0x0f << shift;
return (set[i] & mask) >> shift;
}
static inline void set_reg_mask(uint8_t *set, unsigned int index, uint8_t bits)
{
unsigned int i = index / 2;
unsigned int shift = index % 2 ? 4 : 0;
uint8_t mask = 0x0f << shift;
set[i] &= ~mask;
set[i] |= (bits << shift);
}
#endif

View File

@@ -567,42 +567,31 @@ static void ppir_regalloc_reset_liveness_info(ppir_compiler *comp)
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
if (instr->live_in)
ralloc_free(instr->live_in);
instr->live_in = rzalloc_array(comp,
struct ppir_liveness, comp->reg_num);
if (instr->live_mask)
ralloc_free(instr->live_mask);
instr->live_mask = rzalloc_array(comp, uint8_t,
reg_mask_size(comp->reg_num));
if (instr->live_in_set)
_mesa_set_destroy(instr->live_in_set, NULL);
instr->live_in_set = _mesa_set_create(comp,
_mesa_hash_pointer,
_mesa_key_pointer_equal);
if (instr->live_set)
ralloc_free(instr->live_set);
instr->live_set = rzalloc_array(comp, BITSET_WORD, comp->reg_num);
if (instr->live_internal)
ralloc_free(instr->live_internal);
instr->live_internal = rzalloc_array(comp,
struct ppir_liveness, comp->reg_num);
if (instr->live_internal_set)
_mesa_set_destroy(instr->live_internal_set, NULL);
instr->live_internal_set = _mesa_set_create(comp,
_mesa_hash_pointer,
_mesa_key_pointer_equal);
instr->live_internal = rzalloc_array(comp, BITSET_WORD, comp->reg_num);
}
}
}
static void ppir_all_interference(ppir_compiler *comp, struct ra_graph *g,
struct set *liveness)
BITSET_WORD *liveness)
{
set_foreach(liveness, entry1) {
set_foreach(liveness, entry2) {
const struct ppir_liveness *r1 = entry1->key;
const struct ppir_liveness *r2 = entry2->key;
ra_add_node_interference(g, r1->reg->regalloc_index,
r2->reg->regalloc_index);
int i, j;
BITSET_FOREACH_SET(i, liveness, comp->reg_num) {
BITSET_FOREACH_SET(j, liveness, comp->reg_num) {
ra_add_node_interference(g, i, j);
}
_mesa_set_remove(liveness, entry1);
BITSET_CLEAR(liveness, i);
}
}
@@ -627,10 +616,11 @@ static bool ppir_regalloc_prog_try(ppir_compiler *comp, bool *spilled)
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
set_foreach(instr->live_internal_set, entry) {
_mesa_set_add(instr->live_in_set, entry->key);
int i;
BITSET_FOREACH_SET(i, instr->live_internal, comp->reg_num) {
BITSET_SET(instr->live_set, i);
}
ppir_all_interference(comp, g, instr->live_in_set);
ppir_all_interference(comp, g, instr->live_set);
}
}