diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 0f7096a5e18..9cf28f6b06a 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -580,7 +580,7 @@ struct ir3_array { unsigned length; unsigned id; - struct nir_register *r; + struct nir_ssa_def *r; /* To avoid array write's from getting DCE'd, keep track of the * most recent write. Any array access depends on the most diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 4305682f54b..f810ad92852 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1992,6 +1992,63 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) const unsigned primitive_map = const_state->offsets.primitive_map * 4; switch (intr->intrinsic) { + case nir_intrinsic_decl_reg: + /* There's logically nothing to do, but this has a destination in NIR so + * plug in something... It will get DCE'd. + */ + dst[0] = create_immed(ctx->block, 0); + break; + + case nir_intrinsic_load_reg: + case nir_intrinsic_load_reg_indirect: { + struct ir3_array *arr = ir3_get_array(ctx, intr->src[0].ssa); + struct ir3_instruction *addr = NULL; + + if (intr->intrinsic == nir_intrinsic_load_reg_indirect) { + addr = ir3_get_addr0(ctx, ir3_get_src(ctx, &intr->src[1])[0], + dest_components); + } + + ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(intr->src[0].ssa); + assert(dest_components == nir_intrinsic_num_components(decl)); + + for (unsigned i = 0; i < dest_components; i++) { + unsigned n = nir_intrinsic_base(intr) * dest_components + i; + compile_assert(ctx, n < arr->length); + dst[i] = ir3_create_array_load(ctx, arr, n, addr); + } + + break; + } + + case nir_intrinsic_store_reg: + case nir_intrinsic_store_reg_indirect: { + struct ir3_array *arr = ir3_get_array(ctx, intr->src[1].ssa); + unsigned num_components = nir_src_num_components(intr->src[0]); + struct ir3_instruction *addr = NULL; + + ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(intr->src[1].ssa); + assert(num_components == nir_intrinsic_num_components(decl)); + + struct ir3_instruction *const *value = ir3_get_src(ctx, &intr->src[0]); + + if (intr->intrinsic == nir_intrinsic_store_reg_indirect) { + addr = ir3_get_addr0(ctx, ir3_get_src(ctx, &intr->src[2])[0], + num_components); + } + + u_foreach_bit(i, nir_intrinsic_write_mask(intr)) { + assert(i < num_components); + + unsigned n = nir_intrinsic_base(intr) * num_components + i; + compile_assert(ctx, n < arr->length); + if (value[i]) + ir3_create_array_store(ctx, arr, n, value[i], addr); + } + + break; + } + case nir_intrinsic_load_uniform: idx = nir_intrinsic_base(intr); if (nir_src_is_const(intr->src[0])) { @@ -4377,8 +4434,8 @@ emit_instructions(struct ir3_context *ctx) ctx->so->shared_size = ctx->s->info.shared_size; /* NOTE: need to do something more clever when we support >1 fxn */ - nir_foreach_register (reg, &fxn->registers) { - ir3_declare_array(ctx, reg); + nir_foreach_reg_decl (decl, fxn) { + ir3_declare_array(ctx, decl); } if (ctx->so->type == MESA_SHADER_TESS_CTRL && diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 6f8f7384484..3e995a10ecf 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -29,6 +29,8 @@ #include "ir3_image.h" #include "ir3_nir.h" #include "ir3_shader.h" +#include "nir.h" +#include "nir_intrinsics_indices.h" struct ir3_context * ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader, @@ -87,9 +89,9 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader, */ bool progress = false; bool needs_late_alg = false; - NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs, 1); + NIR_PASS(progress, ctx->s, nir_lower_locals_to_reg_intrinsics, 1); - /* we could need cleanup after lower_locals_to_regs */ + /* we could need cleanup after lower_locals_to_reg_intrinsics */ while (progress) { progress = false; NIR_PASS(progress, ctx->s, nir_opt_algebraic); @@ -98,9 +100,9 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader, } /* We want to lower nir_op_imul as late as possible, to catch also - * those generated by earlier passes (e.g, nir_lower_locals_to_regs). - * However, we want a final swing of a few passes to have a chance - * at optimizing the result. + * those generated by earlier passes (e.g, + * nir_lower_locals_to_reg_intrinsics). However, we want a final swing of a + * few passes to have a chance at optimizing the result. */ progress = false; NIR_PASS(progress, ctx->s, ir3_nir_lower_imul); @@ -201,17 +203,9 @@ ir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n) struct ir3_instruction ** ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n) { - struct ir3_instruction **value; + assert(dst->is_ssa); + struct ir3_instruction **value = ir3_get_dst_ssa(ctx, &dst->ssa, n); - if (dst->is_ssa) { - value = ir3_get_dst_ssa(ctx, &dst->ssa, n); - } else { - value = ralloc_array(ctx, struct ir3_instruction *, n); - } - - /* NOTE: in non-ssa case, we don't really need to store last_dst - * but this helps us catch cases where put_dst() call is forgotten - */ compile_assert(ctx, !ctx->last_dst); ctx->last_dst = value; ctx->last_dst_n = n; @@ -222,31 +216,11 @@ ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n) struct ir3_instruction *const * ir3_get_src(struct ir3_context *ctx, nir_src *src) { - if (src->is_ssa) { - struct hash_entry *entry; - entry = _mesa_hash_table_search(ctx->def_ht, src->ssa); - compile_assert(ctx, entry); - return entry->data; - } else { - nir_register *reg = src->reg.reg; - struct ir3_array *arr = ir3_get_array(ctx, reg); - unsigned num_components = arr->r->num_components; - struct ir3_instruction *addr = NULL; - struct ir3_instruction **value = - ralloc_array(ctx, struct ir3_instruction *, num_components); - - if (src->reg.indirect) - addr = ir3_get_addr0(ctx, ir3_get_src(ctx, src->reg.indirect)[0], - reg->num_components); - - for (unsigned i = 0; i < num_components; i++) { - unsigned n = src->reg.base_offset * reg->num_components + i; - compile_assert(ctx, n < arr->length); - value[i] = ir3_create_array_load(ctx, arr, n, addr); - } - - return value; - } + assert(src->is_ssa); + struct hash_entry *entry; + entry = _mesa_hash_table_search(ctx->def_ht, src->ssa); + compile_assert(ctx, entry); + return entry->data; } void @@ -279,27 +253,7 @@ ir3_put_dst(struct ir3_context *ctx, nir_dest *dst) } } - if (!dst->is_ssa) { - nir_register *reg = dst->reg.reg; - struct ir3_array *arr = ir3_get_array(ctx, reg); - unsigned num_components = ctx->last_dst_n; - struct ir3_instruction *addr = NULL; - - if (dst->reg.indirect) - addr = ir3_get_addr0(ctx, ir3_get_src(ctx, dst->reg.indirect)[0], - reg->num_components); - - for (unsigned i = 0; i < num_components; i++) { - unsigned n = dst->reg.base_offset * reg->num_components + i; - compile_assert(ctx, n < arr->length); - if (!ctx->last_dst[i]) - continue; - ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr); - } - - ralloc_free(ctx->last_dst); - } - + assert(dst->is_ssa); ctx->last_dst = NULL; ctx->last_dst_n = 0; } @@ -543,7 +497,7 @@ ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src) */ void -ir3_declare_array(struct ir3_context *ctx, nir_register *reg) +ir3_declare_array(struct ir3_context *ctx, nir_intrinsic_instr *decl) { struct ir3_array *arr = rzalloc(ctx, struct ir3_array); arr->id = ++ctx->num_arrays; @@ -554,15 +508,17 @@ ir3_declare_array(struct ir3_context *ctx, nir_register *reg) * It would be nice if there was a nir pass to convert arrays of * length 1 to ssa. */ - arr->length = reg->num_components * MAX2(1, reg->num_array_elems); + arr->length = nir_intrinsic_num_components(decl) * + MAX2(1, nir_intrinsic_num_array_elems(decl)); + compile_assert(ctx, arr->length > 0); - arr->r = reg; - arr->half = ir3_bitsize(ctx, reg->bit_size) <= 16; + arr->r = &decl->dest.ssa; + arr->half = ir3_bitsize(ctx, nir_intrinsic_bit_size(decl)) <= 16; list_addtail(&arr->node, &ctx->ir->array_list); } struct ir3_array * -ir3_get_array(struct ir3_context *ctx, nir_register *reg) +ir3_get_array(struct ir3_context *ctx, nir_ssa_def *reg) { foreach_array (arr, &ctx->ir->array_list) { if (arr->r == reg) @@ -622,34 +578,6 @@ ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n, struct ir3_register *dst; unsigned flags = 0; - /* if not relative store, don't create an extra mov, since that - * ends up being difficult for cp to remove. - * - * Also, don't skip the mov if the src is meta (like fanout/split), - * since that creates a situation that RA can't really handle properly. - */ - if (!address && !is_meta(src)) { - dst = src->dsts[0]; - - src->barrier_class |= IR3_BARRIER_ARRAY_W; - src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W; - - dst->flags |= IR3_REG_ARRAY; - dst->size = arr->length; - dst->array.id = arr->id; - dst->array.offset = n; - dst->array.base = INVALID_REG; - - if (arr->last_write && arr->last_write->instr->block == src->block) - ir3_reg_set_last_array(src, dst, arr->last_write); - - arr->last_write = dst; - - array_insert(block, block->keeps, src); - - return; - } - mov = ir3_instr_create(block, OPC_MOV, 1, 1); if (arr->half) { mov->cat1.src_type = TYPE_U16; diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h index 22c7b1b3cdf..ecd8e9bc19e 100644 --- a/src/freedreno/ir3/ir3_context.h +++ b/src/freedreno/ir3/ir3_context.h @@ -243,8 +243,8 @@ struct ir3_instruction *ir3_get_addr1(struct ir3_context *ctx, struct ir3_instruction *ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src); -void ir3_declare_array(struct ir3_context *ctx, nir_register *reg); -struct ir3_array *ir3_get_array(struct ir3_context *ctx, nir_register *reg); +void ir3_declare_array(struct ir3_context *ctx, nir_intrinsic_instr *decl); +struct ir3_array *ir3_get_array(struct ir3_context *ctx, nir_ssa_def *reg); struct ir3_instruction *ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n, struct ir3_instruction *address);