nir/from_ssa: Support register intrinsics
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23089>
This commit is contained in:

committed by
Marge Bot

parent
29b2ace184
commit
ae0408be1b
@@ -2627,8 +2627,13 @@ nir_phi_get_src_from_block(nir_phi_instr *phi, struct nir_block *block)
|
||||
|
||||
typedef struct {
|
||||
struct exec_node node;
|
||||
bool src_is_reg;
|
||||
bool dest_is_reg;
|
||||
nir_src src;
|
||||
nir_dest dest;
|
||||
union {
|
||||
nir_dest dest;
|
||||
nir_src reg;
|
||||
} dest;
|
||||
} nir_parallel_copy_entry;
|
||||
|
||||
#define nir_foreach_parallel_copy_entry(entry, pcopy) \
|
||||
|
@@ -37,6 +37,7 @@ struct from_ssa_state {
|
||||
void *dead_ctx;
|
||||
struct exec_list dead_instrs;
|
||||
bool phi_webs_only;
|
||||
bool reg_intrinsics;
|
||||
struct hash_table *merge_node_table;
|
||||
nir_instr *instr;
|
||||
bool progress;
|
||||
@@ -122,7 +123,10 @@ typedef struct merge_set {
|
||||
struct exec_list nodes;
|
||||
unsigned size;
|
||||
bool divergent;
|
||||
nir_register *reg;
|
||||
union {
|
||||
nir_register *reg;
|
||||
nir_ssa_def *decl;
|
||||
} reg;
|
||||
} merge_set;
|
||||
|
||||
#if 0
|
||||
@@ -154,11 +158,10 @@ get_merge_node(nir_ssa_def *def, struct from_ssa_state *state)
|
||||
if (entry)
|
||||
return entry->data;
|
||||
|
||||
merge_set *set = ralloc(state->dead_ctx, merge_set);
|
||||
merge_set *set = rzalloc(state->dead_ctx, merge_set);
|
||||
exec_list_make_empty(&set->nodes);
|
||||
set->size = 1;
|
||||
set->divergent = def->divergent;
|
||||
set->reg = NULL;
|
||||
|
||||
merge_node *node = ralloc(state->dead_ctx, merge_node);
|
||||
node->set = set;
|
||||
@@ -401,28 +404,32 @@ isolate_phi_nodes_block(nir_shader *shader, nir_block *block, void *dead_ctx)
|
||||
|
||||
nir_parallel_copy_entry *entry = rzalloc(dead_ctx,
|
||||
nir_parallel_copy_entry);
|
||||
nir_ssa_dest_init(&pcopy->instr, &entry->dest,
|
||||
entry->src_is_reg = false;
|
||||
entry->dest_is_reg = false;
|
||||
nir_ssa_dest_init(&pcopy->instr, &entry->dest.dest,
|
||||
phi->dest.ssa.num_components,
|
||||
phi->dest.ssa.bit_size);
|
||||
entry->dest.ssa.divergent = nir_src_is_divergent(src->src);
|
||||
entry->dest.dest.ssa.divergent = nir_src_is_divergent(src->src);
|
||||
exec_list_push_tail(&pcopy->entries, &entry->node);
|
||||
|
||||
assert(src->src.is_ssa);
|
||||
nir_instr_rewrite_src(&pcopy->instr, &entry->src, src->src);
|
||||
|
||||
nir_instr_rewrite_src(&phi->instr, &src->src,
|
||||
nir_src_for_ssa(&entry->dest.ssa));
|
||||
nir_src_for_ssa(&entry->dest.dest.ssa));
|
||||
}
|
||||
|
||||
nir_parallel_copy_entry *entry = rzalloc(dead_ctx,
|
||||
nir_parallel_copy_entry);
|
||||
nir_ssa_dest_init(&block_pcopy->instr, &entry->dest,
|
||||
entry->src_is_reg = false;
|
||||
entry->dest_is_reg = false;
|
||||
nir_ssa_dest_init(&block_pcopy->instr, &entry->dest.dest,
|
||||
phi->dest.ssa.num_components, phi->dest.ssa.bit_size);
|
||||
entry->dest.ssa.divergent = phi->dest.ssa.divergent;
|
||||
entry->dest.dest.ssa.divergent = phi->dest.ssa.divergent;
|
||||
exec_list_push_tail(&block_pcopy->entries, &entry->node);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
|
||||
&entry->dest.ssa);
|
||||
&entry->dest.dest.ssa);
|
||||
|
||||
nir_instr_rewrite_src(&block_pcopy->instr, &entry->src,
|
||||
nir_src_for_ssa(&phi->dest.ssa));
|
||||
@@ -457,9 +464,12 @@ aggressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy,
|
||||
struct from_ssa_state *state)
|
||||
{
|
||||
nir_foreach_parallel_copy_entry(entry, pcopy) {
|
||||
assert(!entry->src_is_reg);
|
||||
assert(entry->src.is_ssa);
|
||||
assert(entry->dest.is_ssa);
|
||||
assert(entry->dest.ssa.num_components == entry->src.ssa->num_components);
|
||||
assert(!entry->dest_is_reg);
|
||||
assert(entry->dest.dest.is_ssa);
|
||||
assert(entry->dest.dest.ssa.num_components ==
|
||||
entry->src.ssa->num_components);
|
||||
|
||||
/* Since load_const instructions are SSA only, we can't replace their
|
||||
* destinations with registers and, therefore, can't coalesce them.
|
||||
@@ -468,7 +478,7 @@ aggressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy,
|
||||
continue;
|
||||
|
||||
merge_node *src_node = get_merge_node(entry->src.ssa, state);
|
||||
merge_node *dest_node = get_merge_node(&entry->dest.ssa, state);
|
||||
merge_node *dest_node = get_merge_node(&entry->dest.dest.ssa, state);
|
||||
|
||||
if (src_node->set == dest_node->set)
|
||||
continue;
|
||||
@@ -529,6 +539,13 @@ decl_reg_for_ssa_def(nir_builder *b, nir_ssa_def *def)
|
||||
return nir_decl_reg(b, def->num_components, def->bit_size, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
set_reg_divergent(nir_ssa_def *reg, bool divergent)
|
||||
{
|
||||
nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
|
||||
nir_intrinsic_set_divergent(decl, divergent);
|
||||
}
|
||||
|
||||
void
|
||||
nir_rewrite_uses_to_load_reg(nir_builder *b, nir_ssa_def *old,
|
||||
nir_ssa_def *reg)
|
||||
@@ -536,6 +553,19 @@ nir_rewrite_uses_to_load_reg(nir_builder *b, nir_ssa_def *old,
|
||||
nir_foreach_use_including_if_safe(use, old) {
|
||||
b->cursor = nir_before_src(use);
|
||||
|
||||
/* If this is a parallel copy, it can just take the register directly */
|
||||
if (!use->is_if &&
|
||||
use->parent_instr->type == nir_instr_type_parallel_copy) {
|
||||
|
||||
nir_parallel_copy_entry *copy_entry =
|
||||
list_entry(use, nir_parallel_copy_entry, src);
|
||||
|
||||
assert(!copy_entry->src_is_reg);
|
||||
copy_entry->src_is_reg = true;
|
||||
nir_src_rewrite_ssa(©_entry->src, reg);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If the immediate preceding instruction is a load_reg from the same
|
||||
* register, use it instead of creating a new load_reg. This helps when
|
||||
* a register is referenced in multiple sources in the same instruction,
|
||||
@@ -562,7 +592,7 @@ nir_rewrite_uses_to_load_reg(nir_builder *b, nir_ssa_def *old,
|
||||
}
|
||||
|
||||
static bool
|
||||
rewrite_ssa_def(nir_ssa_def *def, void *void_state)
|
||||
rewrite_ssa_def_legacy_reg(nir_ssa_def *def, void *void_state)
|
||||
{
|
||||
struct from_ssa_state *state = void_state;
|
||||
nir_register *reg;
|
||||
@@ -577,12 +607,12 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
|
||||
* the things in the merge set should be the same so it doesn't
|
||||
* matter which node's definition we use.
|
||||
*/
|
||||
if (node->set->reg == NULL) {
|
||||
node->set->reg = create_reg_for_ssa_def(def, state->builder.impl);
|
||||
node->set->reg->divergent = node->set->divergent;
|
||||
if (node->set->reg.reg == NULL) {
|
||||
node->set->reg.reg = create_reg_for_ssa_def(def, state->builder.impl);
|
||||
node->set->reg.reg->divergent = node->set->divergent;
|
||||
}
|
||||
|
||||
reg = node->set->reg;
|
||||
reg = node->set->reg.reg;
|
||||
} else {
|
||||
if (state->phi_webs_only)
|
||||
return true;
|
||||
@@ -626,11 +656,12 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
|
||||
* remove phi nodes.
|
||||
*/
|
||||
static void
|
||||
resolve_registers_block(nir_block *block, struct from_ssa_state *state)
|
||||
resolve_registers_block_legacy_reg(nir_block *block,
|
||||
struct from_ssa_state *state)
|
||||
{
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
state->instr = instr;
|
||||
nir_foreach_ssa_def(instr, rewrite_ssa_def, state);
|
||||
nir_foreach_ssa_def(instr, rewrite_ssa_def_legacy_reg, state);
|
||||
|
||||
if (instr->type == nir_instr_type_phi) {
|
||||
nir_instr_remove(instr);
|
||||
@@ -641,6 +672,189 @@ resolve_registers_block(nir_block *block, struct from_ssa_state *state)
|
||||
state->instr = NULL;
|
||||
}
|
||||
|
||||
static bool
|
||||
dest_replace_ssa_with_reg(nir_dest *dest, nir_function_impl *impl)
|
||||
{
|
||||
if (!dest->is_ssa)
|
||||
return false;
|
||||
|
||||
nir_builder b = nir_builder_create(impl);
|
||||
|
||||
nir_ssa_def *reg = decl_reg_for_ssa_def(&b, &dest->ssa);
|
||||
nir_rewrite_uses_to_load_reg(&b, &dest->ssa, reg);
|
||||
|
||||
b.cursor = nir_after_instr(dest->ssa.parent_instr);
|
||||
nir_store_reg(&b, &dest->ssa, reg);
|
||||
return true;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
reg_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state)
|
||||
{
|
||||
struct hash_entry *entry =
|
||||
_mesa_hash_table_search(state->merge_node_table, def);
|
||||
if (entry) {
|
||||
/* In this case, we're part of a phi web. Use the web's register. */
|
||||
merge_node *node = (merge_node *)entry->data;
|
||||
|
||||
/* If it doesn't have a register yet, create one. Note that all of
|
||||
* the things in the merge set should be the same so it doesn't
|
||||
* matter which node's definition we use.
|
||||
*/
|
||||
if (node->set->reg.decl == NULL) {
|
||||
node->set->reg.decl = decl_reg_for_ssa_def(&state->builder, def);
|
||||
set_reg_divergent(node->set->reg.decl, node->set->divergent);
|
||||
}
|
||||
|
||||
return node->set->reg.decl;
|
||||
} else {
|
||||
assert(state->phi_webs_only);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
remove_no_op_phi(nir_instr *instr, struct from_ssa_state *state)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
nir_phi_instr *phi = nir_instr_as_phi(instr);
|
||||
|
||||
assert(phi->dest.is_ssa);
|
||||
struct hash_entry *entry =
|
||||
_mesa_hash_table_search(state->merge_node_table, &phi->dest.ssa);
|
||||
assert(entry != NULL);
|
||||
merge_node *node = (merge_node *)entry->data;
|
||||
|
||||
nir_foreach_phi_src(src, phi) {
|
||||
if (nir_src_is_undef(src->src))
|
||||
continue;
|
||||
|
||||
assert(src->src.is_ssa);
|
||||
entry = _mesa_hash_table_search(state->merge_node_table, src->src.ssa);
|
||||
assert(entry != NULL);
|
||||
merge_node *src_node = (merge_node *)entry->data;
|
||||
assert(src_node->set == node->set);
|
||||
}
|
||||
#endif
|
||||
|
||||
nir_instr_remove(instr);
|
||||
}
|
||||
|
||||
static bool
|
||||
rewrite_ssa_def(nir_ssa_def *def, void *void_state)
|
||||
{
|
||||
struct from_ssa_state *state = void_state;
|
||||
|
||||
nir_ssa_def *reg = reg_for_ssa_def(def, state);
|
||||
if (reg == NULL)
|
||||
return true;
|
||||
|
||||
assert(nir_ssa_def_is_unused(def));
|
||||
|
||||
/* At this point we know a priori that this SSA def is part of a
|
||||
* nir_dest. We can use exec_node_data to get the dest pointer.
|
||||
*/
|
||||
assert(def->parent_instr->type != nir_instr_type_load_const);
|
||||
nir_store_reg(&state->builder, def, reg);
|
||||
|
||||
state->progress = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
rewrite_src(nir_src *src, void *void_state)
|
||||
{
|
||||
struct from_ssa_state *state = void_state;
|
||||
|
||||
assert(src->is_ssa);
|
||||
nir_ssa_def *reg = reg_for_ssa_def(src->ssa, state);
|
||||
if (reg == NULL)
|
||||
return true;
|
||||
|
||||
nir_src_rewrite_ssa(src, nir_load_reg(&state->builder, reg));
|
||||
|
||||
state->progress = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Resolves ssa definitions to registers. While we're at it, we also
|
||||
* remove phi nodes.
|
||||
*/
|
||||
static void
|
||||
resolve_registers_impl(nir_function_impl *impl, struct from_ssa_state *state)
|
||||
{
|
||||
nir_foreach_block_reverse(block, impl) {
|
||||
/* Remove successor phis in case there's a back edge. */
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
nir_block *succ = block->successors[i];
|
||||
if (succ == NULL)
|
||||
continue;
|
||||
|
||||
nir_foreach_instr_safe(instr, succ) {
|
||||
if (instr->type != nir_instr_type_phi)
|
||||
break;
|
||||
|
||||
remove_no_op_phi(instr, state);
|
||||
}
|
||||
}
|
||||
|
||||
/* The following if is right after the block, handle its condition as the
|
||||
* last source "in" the block.
|
||||
*/
|
||||
nir_if *nif = nir_block_get_following_if(block);
|
||||
if (nif) {
|
||||
state->builder.cursor = nir_before_src(&nif->condition);
|
||||
rewrite_src(&nif->condition, state);
|
||||
}
|
||||
|
||||
nir_foreach_instr_reverse_safe(instr, block) {
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_phi:
|
||||
remove_no_op_phi(instr, state);
|
||||
break;
|
||||
|
||||
case nir_instr_type_parallel_copy: {
|
||||
nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(instr);
|
||||
|
||||
nir_foreach_parallel_copy_entry(entry, pcopy) {
|
||||
assert(!entry->dest_is_reg);
|
||||
assert(entry->dest.dest.is_ssa);
|
||||
assert(nir_ssa_def_is_unused(&entry->dest.dest.ssa));
|
||||
|
||||
/* Parallel copy destinations will always be registers */
|
||||
nir_ssa_def *reg = reg_for_ssa_def(&entry->dest.dest.ssa, state);
|
||||
assert(reg != NULL);
|
||||
|
||||
entry->dest_is_reg = true;
|
||||
entry->dest.reg = NIR_SRC_INIT;
|
||||
nir_instr_rewrite_src(&pcopy->instr, &entry->dest.reg,
|
||||
nir_src_for_ssa(reg));
|
||||
}
|
||||
|
||||
nir_foreach_parallel_copy_entry(entry, pcopy) {
|
||||
assert(!entry->src_is_reg);
|
||||
assert(entry->src.is_ssa);
|
||||
nir_ssa_def *reg = reg_for_ssa_def(entry->src.ssa, state);
|
||||
if (reg == NULL)
|
||||
continue;
|
||||
|
||||
entry->src_is_reg = true;
|
||||
nir_instr_rewrite_src(&pcopy->instr, &entry->src,
|
||||
nir_src_for_ssa(reg));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
state->builder.cursor = nir_after_instr(instr);
|
||||
nir_foreach_ssa_def(instr, rewrite_ssa_def, state);
|
||||
state->builder.cursor = nir_before_instr(instr);
|
||||
nir_foreach_src(instr, rewrite_src, state);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_copy(nir_builder *b, nir_src src, nir_src dest_src)
|
||||
{
|
||||
@@ -686,13 +900,15 @@ emit_copy(nir_builder *b, nir_src src, nir_src dest_src)
|
||||
* can continue with the above steps.
|
||||
*/
|
||||
static void
|
||||
resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
|
||||
struct from_ssa_state *state)
|
||||
resolve_parallel_copy_legacy_reg(nir_parallel_copy_instr *pcopy,
|
||||
struct from_ssa_state *state)
|
||||
{
|
||||
unsigned num_copies = 0;
|
||||
nir_foreach_parallel_copy_entry(entry, pcopy) {
|
||||
/* Sources may be SSA */
|
||||
if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg)
|
||||
/* Sources may be SSA but destinations are always registers */
|
||||
assert(!entry->src_is_reg);
|
||||
assert(!entry->dest_is_reg && !entry->dest.dest.is_ssa);
|
||||
if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.dest.reg.reg)
|
||||
continue;
|
||||
|
||||
num_copies++;
|
||||
@@ -727,8 +943,8 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
|
||||
*/
|
||||
int num_vals = 0;
|
||||
nir_foreach_parallel_copy_entry(entry, pcopy) {
|
||||
/* Sources may be SSA */
|
||||
if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg)
|
||||
/* Sources may be SSA but destinations are always registers */
|
||||
if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.dest.reg.reg)
|
||||
continue;
|
||||
|
||||
int src_idx = -1;
|
||||
@@ -741,7 +957,7 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
|
||||
values[src_idx] = entry->src;
|
||||
}
|
||||
|
||||
nir_src dest_src = nir_src_for_reg(entry->dest.reg.reg);
|
||||
nir_src dest_src = nir_src_for_reg(entry->dest.dest.reg.reg);
|
||||
|
||||
int dest_idx = -1;
|
||||
for (int i = 0; i < num_vals; ++i) {
|
||||
@@ -851,6 +1067,229 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
|
||||
exec_list_push_tail(&state->dead_instrs, &pcopy->instr.node);
|
||||
}
|
||||
|
||||
struct copy_value {
|
||||
bool is_reg;
|
||||
nir_ssa_def *ssa;
|
||||
};
|
||||
|
||||
static bool
|
||||
copy_values_equal(struct copy_value a, struct copy_value b)
|
||||
{
|
||||
return a.is_reg == b.is_reg && a.ssa == b.ssa;
|
||||
}
|
||||
|
||||
static bool
|
||||
copy_value_is_divergent(struct copy_value v)
|
||||
{
|
||||
if (!v.is_reg)
|
||||
return v.ssa->divergent;
|
||||
|
||||
nir_intrinsic_instr *decl = nir_reg_get_decl(v.ssa);
|
||||
return nir_intrinsic_divergent(decl);
|
||||
}
|
||||
|
||||
static void
|
||||
copy_values(nir_builder *b, struct copy_value dest, struct copy_value src)
|
||||
{
|
||||
nir_ssa_def *val = src.is_reg ? nir_load_reg(b, src.ssa) : src.ssa;
|
||||
|
||||
assert(!copy_value_is_divergent(src) || copy_value_is_divergent(dest));
|
||||
|
||||
assert(dest.is_reg);
|
||||
nir_store_reg(b, val, dest.ssa);
|
||||
}
|
||||
|
||||
static void
|
||||
resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
|
||||
struct from_ssa_state *state)
|
||||
{
|
||||
if (!state->reg_intrinsics) {
|
||||
resolve_parallel_copy_legacy_reg(pcopy, state);
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned num_copies = 0;
|
||||
nir_foreach_parallel_copy_entry(entry, pcopy) {
|
||||
/* Sources may be SSA but destinations are always registers */
|
||||
assert(entry->src.is_ssa);
|
||||
assert(entry->dest_is_reg && entry->dest.dest.is_ssa);
|
||||
if (entry->src_is_reg && entry->src.ssa == entry->dest.reg.ssa)
|
||||
continue;
|
||||
|
||||
num_copies++;
|
||||
}
|
||||
|
||||
if (num_copies == 0) {
|
||||
/* Hooray, we don't need any copies! */
|
||||
nir_instr_remove(&pcopy->instr);
|
||||
exec_list_push_tail(&state->dead_instrs, &pcopy->instr.node);
|
||||
return;
|
||||
}
|
||||
|
||||
/* The register/source corresponding to the given index */
|
||||
NIR_VLA_ZERO(struct copy_value, values, num_copies * 2);
|
||||
|
||||
/* The current location of a given piece of data. We will use -1 for "null" */
|
||||
NIR_VLA_FILL(int, loc, num_copies * 2, -1);
|
||||
|
||||
/* The piece of data that the given piece of data is to be copied from. We will use -1 for "null" */
|
||||
NIR_VLA_FILL(int, pred, num_copies * 2, -1);
|
||||
|
||||
/* The destinations we have yet to properly fill */
|
||||
NIR_VLA(int, to_do, num_copies * 2);
|
||||
int to_do_idx = -1;
|
||||
|
||||
state->builder.cursor = nir_before_instr(&pcopy->instr);
|
||||
|
||||
/* Now we set everything up:
|
||||
* - All values get assigned a temporary index
|
||||
* - Current locations are set from sources
|
||||
* - Predecessors are recorded from sources and destinations
|
||||
*/
|
||||
int num_vals = 0;
|
||||
nir_foreach_parallel_copy_entry(entry, pcopy) {
|
||||
/* Sources may be SSA but destinations are always registers */
|
||||
if (entry->src_is_reg && entry->src.ssa == entry->dest.reg.ssa)
|
||||
continue;
|
||||
|
||||
assert(entry->src.is_ssa);
|
||||
struct copy_value src_value = {
|
||||
.is_reg = entry->src_is_reg,
|
||||
.ssa = entry->src.ssa,
|
||||
};
|
||||
|
||||
int src_idx = -1;
|
||||
for (int i = 0; i < num_vals; ++i) {
|
||||
if (copy_values_equal(values[i], src_value))
|
||||
src_idx = i;
|
||||
}
|
||||
if (src_idx < 0) {
|
||||
src_idx = num_vals++;
|
||||
values[src_idx] = src_value;
|
||||
}
|
||||
|
||||
assert(entry->dest_is_reg && entry->dest.dest.is_ssa);
|
||||
struct copy_value dest_value = {
|
||||
.is_reg = true,
|
||||
.ssa = entry->dest.reg.ssa,
|
||||
};
|
||||
|
||||
int dest_idx = -1;
|
||||
for (int i = 0; i < num_vals; ++i) {
|
||||
if (copy_values_equal(values[i], dest_value)) {
|
||||
/* Each destination of a parallel copy instruction should be
|
||||
* unique. A destination may get used as a source, so we still
|
||||
* have to walk the list. However, the predecessor should not,
|
||||
* at this point, be set yet, so we should have -1 here.
|
||||
*/
|
||||
assert(pred[i] == -1);
|
||||
dest_idx = i;
|
||||
}
|
||||
}
|
||||
if (dest_idx < 0) {
|
||||
dest_idx = num_vals++;
|
||||
values[dest_idx] = dest_value;
|
||||
}
|
||||
|
||||
loc[src_idx] = src_idx;
|
||||
pred[dest_idx] = src_idx;
|
||||
|
||||
to_do[++to_do_idx] = dest_idx;
|
||||
}
|
||||
|
||||
/* Currently empty destinations we can go ahead and fill */
|
||||
NIR_VLA(int, ready, num_copies * 2);
|
||||
int ready_idx = -1;
|
||||
|
||||
/* Mark the ones that are ready for copying. We know an index is a
|
||||
* destination if it has a predecessor and it's ready for copying if
|
||||
* it's not marked as containing data.
|
||||
*/
|
||||
for (int i = 0; i < num_vals; i++) {
|
||||
if (pred[i] != -1 && loc[i] == -1)
|
||||
ready[++ready_idx] = i;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
while (ready_idx >= 0) {
|
||||
int b = ready[ready_idx--];
|
||||
int a = pred[b];
|
||||
copy_values(&state->builder, values[b], values[loc[a]]);
|
||||
|
||||
/* b has been filled, mark it as not needing to be copied */
|
||||
pred[b] = -1;
|
||||
|
||||
/* The next bit only applies if the source and destination have the
|
||||
* same divergence. If they differ (it must be convergent ->
|
||||
* divergent), then we can't guarantee we won't need the convergent
|
||||
* version of it again.
|
||||
*/
|
||||
if (copy_value_is_divergent(values[a]) ==
|
||||
copy_value_is_divergent(values[b])) {
|
||||
/* If a needs to be filled... */
|
||||
if (pred[a] != -1) {
|
||||
/* If any other copies want a they can find it at b */
|
||||
loc[a] = b;
|
||||
|
||||
/* It's ready for copying now */
|
||||
ready[++ready_idx] = a;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(ready_idx < 0);
|
||||
if (to_do_idx < 0)
|
||||
break;
|
||||
|
||||
int b = to_do[to_do_idx--];
|
||||
if (pred[b] == -1)
|
||||
continue;
|
||||
|
||||
/* If we got here, then we don't have any more trivial copies that we
|
||||
* can do. We have to break a cycle, so we create a new temporary
|
||||
* register for that purpose. Normally, if going out of SSA after
|
||||
* register allocation, you would want to avoid creating temporary
|
||||
* registers. However, we are going out of SSA before register
|
||||
* allocation, so we would rather not create extra register
|
||||
* dependencies for the backend to deal with. If it wants, the
|
||||
* backend can coalesce the (possibly multiple) temporaries.
|
||||
*
|
||||
* We can also get here in the case where there is no cycle but our
|
||||
* source value is convergent, is also used as a destination by another
|
||||
* element of the parallel copy, and all the destinations of the
|
||||
* parallel copy which copy from it are divergent. In this case, the
|
||||
* above loop cannot detect that the value has moved due to all the
|
||||
* divergent destinations and we'll end up emitting a copy to a
|
||||
* temporary which never gets used. We can avoid this with additional
|
||||
* tracking or we can just trust the back-end to dead-code the unused
|
||||
* temporary (which is trivial).
|
||||
*/
|
||||
assert(num_vals < num_copies * 2);
|
||||
nir_ssa_def *reg;
|
||||
if (values[b].is_reg) {
|
||||
nir_intrinsic_instr *decl = nir_reg_get_decl(values[b].ssa);
|
||||
uint8_t num_components = nir_intrinsic_num_components(decl);
|
||||
uint8_t bit_size = nir_intrinsic_bit_size(decl);
|
||||
reg = nir_decl_reg(&state->builder, num_components, bit_size, 0);
|
||||
} else {
|
||||
reg = decl_reg_for_ssa_def(&state->builder, values[b].ssa);
|
||||
}
|
||||
set_reg_divergent(reg, copy_value_is_divergent(values[b]));
|
||||
|
||||
values[num_vals] = (struct copy_value) {
|
||||
.is_reg = true,
|
||||
.ssa = reg,
|
||||
};
|
||||
copy_values(&state->builder, values[num_vals], values[b]);
|
||||
loc[b] = num_vals;
|
||||
ready[++ready_idx] = b;
|
||||
num_vals++;
|
||||
}
|
||||
|
||||
nir_instr_remove(&pcopy->instr);
|
||||
exec_list_push_tail(&state->dead_instrs, &pcopy->instr.node);
|
||||
}
|
||||
|
||||
/* Resolves the parallel copies in a block. Each block can have at most
|
||||
* two: One at the beginning, right after all the phi noces, and one at
|
||||
* the end (or right before the final jump if it exists).
|
||||
@@ -866,27 +1305,40 @@ resolve_parallel_copies_block(nir_block *block, struct from_ssa_state *state)
|
||||
if (first_instr == NULL)
|
||||
return true; /* Empty, nothing to do. */
|
||||
|
||||
if (first_instr->type == nir_instr_type_parallel_copy) {
|
||||
nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(first_instr);
|
||||
if (state->reg_intrinsics) {
|
||||
/* There can be load_reg in the way of the copies... don't be clever. */
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type == nir_instr_type_parallel_copy) {
|
||||
nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(instr);
|
||||
|
||||
resolve_parallel_copy(pcopy, state);
|
||||
resolve_parallel_copy(pcopy, state);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (first_instr->type == nir_instr_type_parallel_copy) {
|
||||
nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(first_instr);
|
||||
|
||||
resolve_parallel_copy(pcopy, state);
|
||||
}
|
||||
|
||||
/* It's possible that the above code already cleaned up the end parallel
|
||||
* copy. However, doing so removed it form the instructions list so we
|
||||
* won't find it here. Therefore, it's safe to go ahead and just look
|
||||
* for one and clean it up if it exists.
|
||||
*/
|
||||
nir_parallel_copy_instr *end_pcopy =
|
||||
get_parallel_copy_at_end_of_block(block);
|
||||
if (end_pcopy)
|
||||
resolve_parallel_copy(end_pcopy, state);
|
||||
}
|
||||
|
||||
/* It's possible that the above code already cleaned up the end parallel
|
||||
* copy. However, doing so removed it form the instructions list so we
|
||||
* won't find it here. Therefore, it's safe to go ahead and just look
|
||||
* for one and clean it up if it exists.
|
||||
*/
|
||||
nir_parallel_copy_instr *end_pcopy =
|
||||
get_parallel_copy_at_end_of_block(block);
|
||||
if (end_pcopy)
|
||||
resolve_parallel_copy(end_pcopy, state);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only)
|
||||
nir_convert_from_ssa_impl(nir_function_impl *impl,
|
||||
bool phi_webs_only,
|
||||
bool reg_intrinsics)
|
||||
{
|
||||
nir_shader *shader = impl->function->shader;
|
||||
|
||||
@@ -895,6 +1347,7 @@ nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only)
|
||||
state.builder = nir_builder_create(impl);
|
||||
state.dead_ctx = ralloc_context(NULL);
|
||||
state.phi_webs_only = phi_webs_only;
|
||||
state.reg_intrinsics = reg_intrinsics;
|
||||
state.merge_node_table = _mesa_pointer_hash_table_create(NULL);
|
||||
state.progress = false;
|
||||
exec_list_make_empty(&state.dead_instrs);
|
||||
@@ -915,6 +1368,7 @@ nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only)
|
||||
nir_metadata_live_ssa_defs |
|
||||
nir_metadata_dominance);
|
||||
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
coalesce_phi_nodes_block(block, &state);
|
||||
}
|
||||
@@ -923,8 +1377,12 @@ nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only)
|
||||
aggressive_coalesce_block(block, &state);
|
||||
}
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
resolve_registers_block(block, &state);
|
||||
if (reg_intrinsics) {
|
||||
resolve_registers_impl(impl, &state);
|
||||
} else {
|
||||
nir_foreach_block(block, impl) {
|
||||
resolve_registers_block_legacy_reg(block, &state);
|
||||
}
|
||||
}
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
@@ -949,7 +1407,8 @@ nir_convert_from_ssa(nir_shader *shader,
|
||||
bool progress = false;
|
||||
|
||||
nir_foreach_function_impl(impl, shader) {
|
||||
progress |= nir_convert_from_ssa_impl(impl, phi_webs_only);
|
||||
progress |= nir_convert_from_ssa_impl(impl, phi_webs_only,
|
||||
reg_intrinsics);
|
||||
}
|
||||
|
||||
return progress;
|
||||
@@ -1059,24 +1518,10 @@ struct ssa_def_to_reg_state {
|
||||
};
|
||||
|
||||
static bool
|
||||
dest_replace_ssa_with_reg(nir_dest *dest, void *void_state)
|
||||
dest_replace_ssa_with_reg_state(nir_dest *dest, void *void_state)
|
||||
{
|
||||
struct ssa_def_to_reg_state *state = void_state;
|
||||
|
||||
if (!dest->is_ssa)
|
||||
return true;
|
||||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, state->impl);
|
||||
|
||||
nir_ssa_def *reg = decl_reg_for_ssa_def(&b, &dest->ssa);
|
||||
nir_rewrite_uses_to_load_reg(&b, &dest->ssa, reg);
|
||||
|
||||
b.cursor = nir_after_instr(dest->ssa.parent_instr);
|
||||
nir_store_reg(&b, &dest->ssa, reg);
|
||||
|
||||
state->progress = true;
|
||||
|
||||
state->progress |= dest_replace_ssa_with_reg(dest, state->impl);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1159,7 +1604,7 @@ nir_lower_ssa_defs_to_regs_block(nir_block *block)
|
||||
* don't have a reason to convert it to a register.
|
||||
*/
|
||||
} else {
|
||||
nir_foreach_dest(instr, dest_replace_ssa_with_reg, &state);
|
||||
nir_foreach_dest(instr, dest_replace_ssa_with_reg_state, &state);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -21,7 +21,7 @@ _nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
|
||||
return cb(&nir_instr_as_phi(instr)->dest, state);
|
||||
case nir_instr_type_parallel_copy: {
|
||||
nir_foreach_parallel_copy_entry(entry, nir_instr_as_parallel_copy(instr)) {
|
||||
if (!cb(&entry->dest, state))
|
||||
if (!entry->dest_is_reg && !cb(&entry->dest.dest, state))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@@ -137,6 +137,8 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
|
||||
nir_foreach_parallel_copy_entry(entry, pc) {
|
||||
if (!_nir_visit_src(&entry->src, cb, state))
|
||||
return false;
|
||||
if (entry->dest_is_reg && !_nir_visit_src(&entry->dest.reg, cb, state))
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@@ -91,7 +91,7 @@ is_live(BITSET_WORD *defs_live, nir_instr *instr)
|
||||
case nir_instr_type_parallel_copy: {
|
||||
nir_parallel_copy_instr *pc = nir_instr_as_parallel_copy(instr);
|
||||
nir_foreach_parallel_copy_entry(entry, pc) {
|
||||
if (is_dest_live(&entry->dest, defs_live))
|
||||
if (entry->dest_is_reg || is_dest_live(&entry->dest.dest, defs_live))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@@ -1764,8 +1764,16 @@ print_parallel_copy_instr(nir_parallel_copy_instr *instr, print_state *state)
|
||||
if (&entry->node != exec_list_get_head(&instr->entries))
|
||||
fprintf(fp, "; ");
|
||||
|
||||
print_dest(&entry->dest, state);
|
||||
if (entry->dest_is_reg) {
|
||||
fprintf(fp, "*");
|
||||
print_src(&entry->dest.reg, state, nir_type_invalid);
|
||||
} else {
|
||||
print_dest(&entry->dest.dest, state);
|
||||
}
|
||||
fprintf(fp, " = ");
|
||||
|
||||
if (entry->src_is_reg)
|
||||
fprintf(fp, "*");
|
||||
print_src(&entry->src, state, nir_type_invalid);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user