diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 4ae2410c33e..1e919d5ca6e 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2627,8 +2627,13 @@ nir_phi_get_src_from_block(nir_phi_instr *phi, struct nir_block *block) typedef struct { struct exec_node node; + bool src_is_reg; + bool dest_is_reg; nir_src src; - nir_dest dest; + union { + nir_dest dest; + nir_src reg; + } dest; } nir_parallel_copy_entry; #define nir_foreach_parallel_copy_entry(entry, pcopy) \ diff --git a/src/compiler/nir/nir_from_ssa.c b/src/compiler/nir/nir_from_ssa.c index 9375802463e..dc9f594f7b9 100644 --- a/src/compiler/nir/nir_from_ssa.c +++ b/src/compiler/nir/nir_from_ssa.c @@ -37,6 +37,7 @@ struct from_ssa_state { void *dead_ctx; struct exec_list dead_instrs; bool phi_webs_only; + bool reg_intrinsics; struct hash_table *merge_node_table; nir_instr *instr; bool progress; @@ -122,7 +123,10 @@ typedef struct merge_set { struct exec_list nodes; unsigned size; bool divergent; - nir_register *reg; + union { + nir_register *reg; + nir_ssa_def *decl; + } reg; } merge_set; #if 0 @@ -154,11 +158,10 @@ get_merge_node(nir_ssa_def *def, struct from_ssa_state *state) if (entry) return entry->data; - merge_set *set = ralloc(state->dead_ctx, merge_set); + merge_set *set = rzalloc(state->dead_ctx, merge_set); exec_list_make_empty(&set->nodes); set->size = 1; set->divergent = def->divergent; - set->reg = NULL; merge_node *node = ralloc(state->dead_ctx, merge_node); node->set = set; @@ -401,28 +404,32 @@ isolate_phi_nodes_block(nir_shader *shader, nir_block *block, void *dead_ctx) nir_parallel_copy_entry *entry = rzalloc(dead_ctx, nir_parallel_copy_entry); - nir_ssa_dest_init(&pcopy->instr, &entry->dest, + entry->src_is_reg = false; + entry->dest_is_reg = false; + nir_ssa_dest_init(&pcopy->instr, &entry->dest.dest, phi->dest.ssa.num_components, phi->dest.ssa.bit_size); - entry->dest.ssa.divergent = nir_src_is_divergent(src->src); + entry->dest.dest.ssa.divergent = nir_src_is_divergent(src->src); exec_list_push_tail(&pcopy->entries, &entry->node); assert(src->src.is_ssa); nir_instr_rewrite_src(&pcopy->instr, &entry->src, src->src); nir_instr_rewrite_src(&phi->instr, &src->src, - nir_src_for_ssa(&entry->dest.ssa)); + nir_src_for_ssa(&entry->dest.dest.ssa)); } nir_parallel_copy_entry *entry = rzalloc(dead_ctx, nir_parallel_copy_entry); - nir_ssa_dest_init(&block_pcopy->instr, &entry->dest, + entry->src_is_reg = false; + entry->dest_is_reg = false; + nir_ssa_dest_init(&block_pcopy->instr, &entry->dest.dest, phi->dest.ssa.num_components, phi->dest.ssa.bit_size); - entry->dest.ssa.divergent = phi->dest.ssa.divergent; + entry->dest.dest.ssa.divergent = phi->dest.ssa.divergent; exec_list_push_tail(&block_pcopy->entries, &entry->node); nir_ssa_def_rewrite_uses(&phi->dest.ssa, - &entry->dest.ssa); + &entry->dest.dest.ssa); nir_instr_rewrite_src(&block_pcopy->instr, &entry->src, nir_src_for_ssa(&phi->dest.ssa)); @@ -457,9 +464,12 @@ aggressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy, struct from_ssa_state *state) { nir_foreach_parallel_copy_entry(entry, pcopy) { + assert(!entry->src_is_reg); assert(entry->src.is_ssa); - assert(entry->dest.is_ssa); - assert(entry->dest.ssa.num_components == entry->src.ssa->num_components); + assert(!entry->dest_is_reg); + assert(entry->dest.dest.is_ssa); + assert(entry->dest.dest.ssa.num_components == + entry->src.ssa->num_components); /* Since load_const instructions are SSA only, we can't replace their * destinations with registers and, therefore, can't coalesce them. @@ -468,7 +478,7 @@ aggressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy, continue; merge_node *src_node = get_merge_node(entry->src.ssa, state); - merge_node *dest_node = get_merge_node(&entry->dest.ssa, state); + merge_node *dest_node = get_merge_node(&entry->dest.dest.ssa, state); if (src_node->set == dest_node->set) continue; @@ -529,6 +539,13 @@ decl_reg_for_ssa_def(nir_builder *b, nir_ssa_def *def) return nir_decl_reg(b, def->num_components, def->bit_size, 0); } +static void +set_reg_divergent(nir_ssa_def *reg, bool divergent) +{ + nir_intrinsic_instr *decl = nir_reg_get_decl(reg); + nir_intrinsic_set_divergent(decl, divergent); +} + void nir_rewrite_uses_to_load_reg(nir_builder *b, nir_ssa_def *old, nir_ssa_def *reg) @@ -536,6 +553,19 @@ nir_rewrite_uses_to_load_reg(nir_builder *b, nir_ssa_def *old, nir_foreach_use_including_if_safe(use, old) { b->cursor = nir_before_src(use); + /* If this is a parallel copy, it can just take the register directly */ + if (!use->is_if && + use->parent_instr->type == nir_instr_type_parallel_copy) { + + nir_parallel_copy_entry *copy_entry = + list_entry(use, nir_parallel_copy_entry, src); + + assert(!copy_entry->src_is_reg); + copy_entry->src_is_reg = true; + nir_src_rewrite_ssa(©_entry->src, reg); + continue; + } + /* If the immediate preceding instruction is a load_reg from the same * register, use it instead of creating a new load_reg. This helps when * a register is referenced in multiple sources in the same instruction, @@ -562,7 +592,7 @@ nir_rewrite_uses_to_load_reg(nir_builder *b, nir_ssa_def *old, } static bool -rewrite_ssa_def(nir_ssa_def *def, void *void_state) +rewrite_ssa_def_legacy_reg(nir_ssa_def *def, void *void_state) { struct from_ssa_state *state = void_state; nir_register *reg; @@ -577,12 +607,12 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state) * the things in the merge set should be the same so it doesn't * matter which node's definition we use. */ - if (node->set->reg == NULL) { - node->set->reg = create_reg_for_ssa_def(def, state->builder.impl); - node->set->reg->divergent = node->set->divergent; + if (node->set->reg.reg == NULL) { + node->set->reg.reg = create_reg_for_ssa_def(def, state->builder.impl); + node->set->reg.reg->divergent = node->set->divergent; } - reg = node->set->reg; + reg = node->set->reg.reg; } else { if (state->phi_webs_only) return true; @@ -626,11 +656,12 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state) * remove phi nodes. */ static void -resolve_registers_block(nir_block *block, struct from_ssa_state *state) +resolve_registers_block_legacy_reg(nir_block *block, + struct from_ssa_state *state) { nir_foreach_instr_safe(instr, block) { state->instr = instr; - nir_foreach_ssa_def(instr, rewrite_ssa_def, state); + nir_foreach_ssa_def(instr, rewrite_ssa_def_legacy_reg, state); if (instr->type == nir_instr_type_phi) { nir_instr_remove(instr); @@ -641,6 +672,189 @@ resolve_registers_block(nir_block *block, struct from_ssa_state *state) state->instr = NULL; } +static bool +dest_replace_ssa_with_reg(nir_dest *dest, nir_function_impl *impl) +{ + if (!dest->is_ssa) + return false; + + nir_builder b = nir_builder_create(impl); + + nir_ssa_def *reg = decl_reg_for_ssa_def(&b, &dest->ssa); + nir_rewrite_uses_to_load_reg(&b, &dest->ssa, reg); + + b.cursor = nir_after_instr(dest->ssa.parent_instr); + nir_store_reg(&b, &dest->ssa, reg); + return true; +} + +static nir_ssa_def * +reg_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state) +{ + struct hash_entry *entry = + _mesa_hash_table_search(state->merge_node_table, def); + if (entry) { + /* In this case, we're part of a phi web. Use the web's register. */ + merge_node *node = (merge_node *)entry->data; + + /* If it doesn't have a register yet, create one. Note that all of + * the things in the merge set should be the same so it doesn't + * matter which node's definition we use. + */ + if (node->set->reg.decl == NULL) { + node->set->reg.decl = decl_reg_for_ssa_def(&state->builder, def); + set_reg_divergent(node->set->reg.decl, node->set->divergent); + } + + return node->set->reg.decl; + } else { + assert(state->phi_webs_only); + return NULL; + } +} + +static void +remove_no_op_phi(nir_instr *instr, struct from_ssa_state *state) +{ +#ifndef NDEBUG + nir_phi_instr *phi = nir_instr_as_phi(instr); + + assert(phi->dest.is_ssa); + struct hash_entry *entry = + _mesa_hash_table_search(state->merge_node_table, &phi->dest.ssa); + assert(entry != NULL); + merge_node *node = (merge_node *)entry->data; + + nir_foreach_phi_src(src, phi) { + if (nir_src_is_undef(src->src)) + continue; + + assert(src->src.is_ssa); + entry = _mesa_hash_table_search(state->merge_node_table, src->src.ssa); + assert(entry != NULL); + merge_node *src_node = (merge_node *)entry->data; + assert(src_node->set == node->set); + } +#endif + + nir_instr_remove(instr); +} + +static bool +rewrite_ssa_def(nir_ssa_def *def, void *void_state) +{ + struct from_ssa_state *state = void_state; + + nir_ssa_def *reg = reg_for_ssa_def(def, state); + if (reg == NULL) + return true; + + assert(nir_ssa_def_is_unused(def)); + + /* At this point we know a priori that this SSA def is part of a + * nir_dest. We can use exec_node_data to get the dest pointer. + */ + assert(def->parent_instr->type != nir_instr_type_load_const); + nir_store_reg(&state->builder, def, reg); + + state->progress = true; + return true; +} + +static bool +rewrite_src(nir_src *src, void *void_state) +{ + struct from_ssa_state *state = void_state; + + assert(src->is_ssa); + nir_ssa_def *reg = reg_for_ssa_def(src->ssa, state); + if (reg == NULL) + return true; + + nir_src_rewrite_ssa(src, nir_load_reg(&state->builder, reg)); + + state->progress = true; + return true; +} + +/* Resolves ssa definitions to registers. While we're at it, we also + * remove phi nodes. + */ +static void +resolve_registers_impl(nir_function_impl *impl, struct from_ssa_state *state) +{ + nir_foreach_block_reverse(block, impl) { + /* Remove successor phis in case there's a back edge. */ + for (unsigned i = 0; i < 2; i++) { + nir_block *succ = block->successors[i]; + if (succ == NULL) + continue; + + nir_foreach_instr_safe(instr, succ) { + if (instr->type != nir_instr_type_phi) + break; + + remove_no_op_phi(instr, state); + } + } + + /* The following if is right after the block, handle its condition as the + * last source "in" the block. + */ + nir_if *nif = nir_block_get_following_if(block); + if (nif) { + state->builder.cursor = nir_before_src(&nif->condition); + rewrite_src(&nif->condition, state); + } + + nir_foreach_instr_reverse_safe(instr, block) { + switch (instr->type) { + case nir_instr_type_phi: + remove_no_op_phi(instr, state); + break; + + case nir_instr_type_parallel_copy: { + nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(instr); + + nir_foreach_parallel_copy_entry(entry, pcopy) { + assert(!entry->dest_is_reg); + assert(entry->dest.dest.is_ssa); + assert(nir_ssa_def_is_unused(&entry->dest.dest.ssa)); + + /* Parallel copy destinations will always be registers */ + nir_ssa_def *reg = reg_for_ssa_def(&entry->dest.dest.ssa, state); + assert(reg != NULL); + + entry->dest_is_reg = true; + entry->dest.reg = NIR_SRC_INIT; + nir_instr_rewrite_src(&pcopy->instr, &entry->dest.reg, + nir_src_for_ssa(reg)); + } + + nir_foreach_parallel_copy_entry(entry, pcopy) { + assert(!entry->src_is_reg); + assert(entry->src.is_ssa); + nir_ssa_def *reg = reg_for_ssa_def(entry->src.ssa, state); + if (reg == NULL) + continue; + + entry->src_is_reg = true; + nir_instr_rewrite_src(&pcopy->instr, &entry->src, + nir_src_for_ssa(reg)); + } + break; + } + + default: + state->builder.cursor = nir_after_instr(instr); + nir_foreach_ssa_def(instr, rewrite_ssa_def, state); + state->builder.cursor = nir_before_instr(instr); + nir_foreach_src(instr, rewrite_src, state); + } + } + } +} + static void emit_copy(nir_builder *b, nir_src src, nir_src dest_src) { @@ -686,13 +900,15 @@ emit_copy(nir_builder *b, nir_src src, nir_src dest_src) * can continue with the above steps. */ static void -resolve_parallel_copy(nir_parallel_copy_instr *pcopy, - struct from_ssa_state *state) +resolve_parallel_copy_legacy_reg(nir_parallel_copy_instr *pcopy, + struct from_ssa_state *state) { unsigned num_copies = 0; nir_foreach_parallel_copy_entry(entry, pcopy) { - /* Sources may be SSA */ - if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg) + /* Sources may be SSA but destinations are always registers */ + assert(!entry->src_is_reg); + assert(!entry->dest_is_reg && !entry->dest.dest.is_ssa); + if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.dest.reg.reg) continue; num_copies++; @@ -727,8 +943,8 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy, */ int num_vals = 0; nir_foreach_parallel_copy_entry(entry, pcopy) { - /* Sources may be SSA */ - if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg) + /* Sources may be SSA but destinations are always registers */ + if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.dest.reg.reg) continue; int src_idx = -1; @@ -741,7 +957,7 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy, values[src_idx] = entry->src; } - nir_src dest_src = nir_src_for_reg(entry->dest.reg.reg); + nir_src dest_src = nir_src_for_reg(entry->dest.dest.reg.reg); int dest_idx = -1; for (int i = 0; i < num_vals; ++i) { @@ -851,6 +1067,229 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy, exec_list_push_tail(&state->dead_instrs, &pcopy->instr.node); } +struct copy_value { + bool is_reg; + nir_ssa_def *ssa; +}; + +static bool +copy_values_equal(struct copy_value a, struct copy_value b) +{ + return a.is_reg == b.is_reg && a.ssa == b.ssa; +} + +static bool +copy_value_is_divergent(struct copy_value v) +{ + if (!v.is_reg) + return v.ssa->divergent; + + nir_intrinsic_instr *decl = nir_reg_get_decl(v.ssa); + return nir_intrinsic_divergent(decl); +} + +static void +copy_values(nir_builder *b, struct copy_value dest, struct copy_value src) +{ + nir_ssa_def *val = src.is_reg ? nir_load_reg(b, src.ssa) : src.ssa; + + assert(!copy_value_is_divergent(src) || copy_value_is_divergent(dest)); + + assert(dest.is_reg); + nir_store_reg(b, val, dest.ssa); +} + +static void +resolve_parallel_copy(nir_parallel_copy_instr *pcopy, + struct from_ssa_state *state) +{ + if (!state->reg_intrinsics) { + resolve_parallel_copy_legacy_reg(pcopy, state); + return; + } + + unsigned num_copies = 0; + nir_foreach_parallel_copy_entry(entry, pcopy) { + /* Sources may be SSA but destinations are always registers */ + assert(entry->src.is_ssa); + assert(entry->dest_is_reg && entry->dest.dest.is_ssa); + if (entry->src_is_reg && entry->src.ssa == entry->dest.reg.ssa) + continue; + + num_copies++; + } + + if (num_copies == 0) { + /* Hooray, we don't need any copies! */ + nir_instr_remove(&pcopy->instr); + exec_list_push_tail(&state->dead_instrs, &pcopy->instr.node); + return; + } + + /* The register/source corresponding to the given index */ + NIR_VLA_ZERO(struct copy_value, values, num_copies * 2); + + /* The current location of a given piece of data. We will use -1 for "null" */ + NIR_VLA_FILL(int, loc, num_copies * 2, -1); + + /* The piece of data that the given piece of data is to be copied from. We will use -1 for "null" */ + NIR_VLA_FILL(int, pred, num_copies * 2, -1); + + /* The destinations we have yet to properly fill */ + NIR_VLA(int, to_do, num_copies * 2); + int to_do_idx = -1; + + state->builder.cursor = nir_before_instr(&pcopy->instr); + + /* Now we set everything up: + * - All values get assigned a temporary index + * - Current locations are set from sources + * - Predecessors are recorded from sources and destinations + */ + int num_vals = 0; + nir_foreach_parallel_copy_entry(entry, pcopy) { + /* Sources may be SSA but destinations are always registers */ + if (entry->src_is_reg && entry->src.ssa == entry->dest.reg.ssa) + continue; + + assert(entry->src.is_ssa); + struct copy_value src_value = { + .is_reg = entry->src_is_reg, + .ssa = entry->src.ssa, + }; + + int src_idx = -1; + for (int i = 0; i < num_vals; ++i) { + if (copy_values_equal(values[i], src_value)) + src_idx = i; + } + if (src_idx < 0) { + src_idx = num_vals++; + values[src_idx] = src_value; + } + + assert(entry->dest_is_reg && entry->dest.dest.is_ssa); + struct copy_value dest_value = { + .is_reg = true, + .ssa = entry->dest.reg.ssa, + }; + + int dest_idx = -1; + for (int i = 0; i < num_vals; ++i) { + if (copy_values_equal(values[i], dest_value)) { + /* Each destination of a parallel copy instruction should be + * unique. A destination may get used as a source, so we still + * have to walk the list. However, the predecessor should not, + * at this point, be set yet, so we should have -1 here. + */ + assert(pred[i] == -1); + dest_idx = i; + } + } + if (dest_idx < 0) { + dest_idx = num_vals++; + values[dest_idx] = dest_value; + } + + loc[src_idx] = src_idx; + pred[dest_idx] = src_idx; + + to_do[++to_do_idx] = dest_idx; + } + + /* Currently empty destinations we can go ahead and fill */ + NIR_VLA(int, ready, num_copies * 2); + int ready_idx = -1; + + /* Mark the ones that are ready for copying. We know an index is a + * destination if it has a predecessor and it's ready for copying if + * it's not marked as containing data. + */ + for (int i = 0; i < num_vals; i++) { + if (pred[i] != -1 && loc[i] == -1) + ready[++ready_idx] = i; + } + + while (1) { + while (ready_idx >= 0) { + int b = ready[ready_idx--]; + int a = pred[b]; + copy_values(&state->builder, values[b], values[loc[a]]); + + /* b has been filled, mark it as not needing to be copied */ + pred[b] = -1; + + /* The next bit only applies if the source and destination have the + * same divergence. If they differ (it must be convergent -> + * divergent), then we can't guarantee we won't need the convergent + * version of it again. + */ + if (copy_value_is_divergent(values[a]) == + copy_value_is_divergent(values[b])) { + /* If a needs to be filled... */ + if (pred[a] != -1) { + /* If any other copies want a they can find it at b */ + loc[a] = b; + + /* It's ready for copying now */ + ready[++ready_idx] = a; + } + } + } + + assert(ready_idx < 0); + if (to_do_idx < 0) + break; + + int b = to_do[to_do_idx--]; + if (pred[b] == -1) + continue; + + /* If we got here, then we don't have any more trivial copies that we + * can do. We have to break a cycle, so we create a new temporary + * register for that purpose. Normally, if going out of SSA after + * register allocation, you would want to avoid creating temporary + * registers. However, we are going out of SSA before register + * allocation, so we would rather not create extra register + * dependencies for the backend to deal with. If it wants, the + * backend can coalesce the (possibly multiple) temporaries. + * + * We can also get here in the case where there is no cycle but our + * source value is convergent, is also used as a destination by another + * element of the parallel copy, and all the destinations of the + * parallel copy which copy from it are divergent. In this case, the + * above loop cannot detect that the value has moved due to all the + * divergent destinations and we'll end up emitting a copy to a + * temporary which never gets used. We can avoid this with additional + * tracking or we can just trust the back-end to dead-code the unused + * temporary (which is trivial). + */ + assert(num_vals < num_copies * 2); + nir_ssa_def *reg; + if (values[b].is_reg) { + nir_intrinsic_instr *decl = nir_reg_get_decl(values[b].ssa); + uint8_t num_components = nir_intrinsic_num_components(decl); + uint8_t bit_size = nir_intrinsic_bit_size(decl); + reg = nir_decl_reg(&state->builder, num_components, bit_size, 0); + } else { + reg = decl_reg_for_ssa_def(&state->builder, values[b].ssa); + } + set_reg_divergent(reg, copy_value_is_divergent(values[b])); + + values[num_vals] = (struct copy_value) { + .is_reg = true, + .ssa = reg, + }; + copy_values(&state->builder, values[num_vals], values[b]); + loc[b] = num_vals; + ready[++ready_idx] = b; + num_vals++; + } + + nir_instr_remove(&pcopy->instr); + exec_list_push_tail(&state->dead_instrs, &pcopy->instr.node); +} + /* Resolves the parallel copies in a block. Each block can have at most * two: One at the beginning, right after all the phi noces, and one at * the end (or right before the final jump if it exists). @@ -866,27 +1305,40 @@ resolve_parallel_copies_block(nir_block *block, struct from_ssa_state *state) if (first_instr == NULL) return true; /* Empty, nothing to do. */ - if (first_instr->type == nir_instr_type_parallel_copy) { - nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(first_instr); + if (state->reg_intrinsics) { + /* There can be load_reg in the way of the copies... don't be clever. */ + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_parallel_copy) { + nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(instr); - resolve_parallel_copy(pcopy, state); + resolve_parallel_copy(pcopy, state); + } + } + } else { + if (first_instr->type == nir_instr_type_parallel_copy) { + nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(first_instr); + + resolve_parallel_copy(pcopy, state); + } + + /* It's possible that the above code already cleaned up the end parallel + * copy. However, doing so removed it form the instructions list so we + * won't find it here. Therefore, it's safe to go ahead and just look + * for one and clean it up if it exists. + */ + nir_parallel_copy_instr *end_pcopy = + get_parallel_copy_at_end_of_block(block); + if (end_pcopy) + resolve_parallel_copy(end_pcopy, state); } - /* It's possible that the above code already cleaned up the end parallel - * copy. However, doing so removed it form the instructions list so we - * won't find it here. Therefore, it's safe to go ahead and just look - * for one and clean it up if it exists. - */ - nir_parallel_copy_instr *end_pcopy = - get_parallel_copy_at_end_of_block(block); - if (end_pcopy) - resolve_parallel_copy(end_pcopy, state); - return true; } static bool -nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only) +nir_convert_from_ssa_impl(nir_function_impl *impl, + bool phi_webs_only, + bool reg_intrinsics) { nir_shader *shader = impl->function->shader; @@ -895,6 +1347,7 @@ nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only) state.builder = nir_builder_create(impl); state.dead_ctx = ralloc_context(NULL); state.phi_webs_only = phi_webs_only; + state.reg_intrinsics = reg_intrinsics; state.merge_node_table = _mesa_pointer_hash_table_create(NULL); state.progress = false; exec_list_make_empty(&state.dead_instrs); @@ -915,6 +1368,7 @@ nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only) nir_metadata_live_ssa_defs | nir_metadata_dominance); + nir_foreach_block(block, impl) { coalesce_phi_nodes_block(block, &state); } @@ -923,8 +1377,12 @@ nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only) aggressive_coalesce_block(block, &state); } - nir_foreach_block(block, impl) { - resolve_registers_block(block, &state); + if (reg_intrinsics) { + resolve_registers_impl(impl, &state); + } else { + nir_foreach_block(block, impl) { + resolve_registers_block_legacy_reg(block, &state); + } } nir_foreach_block(block, impl) { @@ -949,7 +1407,8 @@ nir_convert_from_ssa(nir_shader *shader, bool progress = false; nir_foreach_function_impl(impl, shader) { - progress |= nir_convert_from_ssa_impl(impl, phi_webs_only); + progress |= nir_convert_from_ssa_impl(impl, phi_webs_only, + reg_intrinsics); } return progress; @@ -1059,24 +1518,10 @@ struct ssa_def_to_reg_state { }; static bool -dest_replace_ssa_with_reg(nir_dest *dest, void *void_state) +dest_replace_ssa_with_reg_state(nir_dest *dest, void *void_state) { struct ssa_def_to_reg_state *state = void_state; - - if (!dest->is_ssa) - return true; - - nir_builder b; - nir_builder_init(&b, state->impl); - - nir_ssa_def *reg = decl_reg_for_ssa_def(&b, &dest->ssa); - nir_rewrite_uses_to_load_reg(&b, &dest->ssa, reg); - - b.cursor = nir_after_instr(dest->ssa.parent_instr); - nir_store_reg(&b, &dest->ssa, reg); - - state->progress = true; - + state->progress |= dest_replace_ssa_with_reg(dest, state->impl); return true; } @@ -1159,7 +1604,7 @@ nir_lower_ssa_defs_to_regs_block(nir_block *block) * don't have a reason to convert it to a register. */ } else { - nir_foreach_dest(instr, dest_replace_ssa_with_reg, &state); + nir_foreach_dest(instr, dest_replace_ssa_with_reg_state, &state); } } diff --git a/src/compiler/nir/nir_inline_helpers.h b/src/compiler/nir/nir_inline_helpers.h index 125dd8a537c..21d552e8b6e 100644 --- a/src/compiler/nir/nir_inline_helpers.h +++ b/src/compiler/nir/nir_inline_helpers.h @@ -21,7 +21,7 @@ _nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state) return cb(&nir_instr_as_phi(instr)->dest, state); case nir_instr_type_parallel_copy: { nir_foreach_parallel_copy_entry(entry, nir_instr_as_parallel_copy(instr)) { - if (!cb(&entry->dest, state)) + if (!entry->dest_is_reg && !cb(&entry->dest.dest, state)) return false; } return true; @@ -137,6 +137,8 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) nir_foreach_parallel_copy_entry(entry, pc) { if (!_nir_visit_src(&entry->src, cb, state)) return false; + if (entry->dest_is_reg && !_nir_visit_src(&entry->dest.reg, cb, state)) + return false; } break; } diff --git a/src/compiler/nir/nir_opt_dce.c b/src/compiler/nir/nir_opt_dce.c index b8da60ed787..eff84d565ac 100644 --- a/src/compiler/nir/nir_opt_dce.c +++ b/src/compiler/nir/nir_opt_dce.c @@ -91,7 +91,7 @@ is_live(BITSET_WORD *defs_live, nir_instr *instr) case nir_instr_type_parallel_copy: { nir_parallel_copy_instr *pc = nir_instr_as_parallel_copy(instr); nir_foreach_parallel_copy_entry(entry, pc) { - if (is_dest_live(&entry->dest, defs_live)) + if (entry->dest_is_reg || is_dest_live(&entry->dest.dest, defs_live)) return true; } return false; diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 3fb9007ad14..bbaa45fa1c3 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -1764,8 +1764,16 @@ print_parallel_copy_instr(nir_parallel_copy_instr *instr, print_state *state) if (&entry->node != exec_list_get_head(&instr->entries)) fprintf(fp, "; "); - print_dest(&entry->dest, state); + if (entry->dest_is_reg) { + fprintf(fp, "*"); + print_src(&entry->dest.reg, state, nir_type_invalid); + } else { + print_dest(&entry->dest.dest, state); + } fprintf(fp, " = "); + + if (entry->src_is_reg) + fprintf(fp, "*"); print_src(&entry->src, state, nir_type_invalid); } }