freedreno/ir3: Add intrinsics that map to LDLW/STLW
These intrinsics will let us do all the offset calculations in nir, which is nicer to work with and lets nir_opt_algebraic eat it all up. Signed-off-by: Kristian H. Kristensen <hoegsberg@google.com>
This commit is contained in:
@@ -771,6 +771,14 @@ intrinsic("ssbo_atomic_xor_ir3", src_comp=[1, 1, 1, 1], dest_comp=1)
|
|||||||
intrinsic("ssbo_atomic_exchange_ir3", src_comp=[1, 1, 1, 1], dest_comp=1)
|
intrinsic("ssbo_atomic_exchange_ir3", src_comp=[1, 1, 1, 1], dest_comp=1)
|
||||||
intrinsic("ssbo_atomic_comp_swap_ir3", src_comp=[1, 1, 1, 1, 1], dest_comp=1)
|
intrinsic("ssbo_atomic_comp_swap_ir3", src_comp=[1, 1, 1, 1, 1], dest_comp=1)
|
||||||
|
|
||||||
|
# IR3-specific load/store intrinsics. These access a buffer used to pass data
|
||||||
|
# between geometry stages - perhaps it's explicit access to the vertex cache.
|
||||||
|
|
||||||
|
# src[] = { value, offset }.
|
||||||
|
store("shared_ir3", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])
|
||||||
|
# src[] = { offset }.
|
||||||
|
load("shared_ir3", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
|
||||||
|
|
||||||
# Intrinsics used by the Midgard/Bifrost blend pipeline. These are defined
|
# Intrinsics used by the Midgard/Bifrost blend pipeline. These are defined
|
||||||
# within a blend shader to read/write the raw value from the tile buffer,
|
# within a blend shader to read/write the raw value from the tile buffer,
|
||||||
# without applying any format conversion in the process. If the shader needs
|
# without applying any format conversion in the process. If the shader needs
|
||||||
|
@@ -843,6 +843,75 @@ emit_intrinsic_store_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* src[] = { offset }. const_index[] = { base } */
|
||||||
|
static void
|
||||||
|
emit_intrinsic_load_shared_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
||||||
|
struct ir3_instruction **dst)
|
||||||
|
{
|
||||||
|
struct ir3_block *b = ctx->block;
|
||||||
|
struct ir3_instruction *load, *offset;
|
||||||
|
unsigned base;
|
||||||
|
|
||||||
|
offset = ir3_get_src(ctx, &intr->src[0])[0];
|
||||||
|
base = nir_intrinsic_base(intr);
|
||||||
|
|
||||||
|
load = ir3_LDLW(b, offset, 0,
|
||||||
|
create_immed(b, intr->num_components), 0,
|
||||||
|
create_immed(b, base), 0);
|
||||||
|
|
||||||
|
load->cat6.type = utype_dst(intr->dest);
|
||||||
|
load->regs[0]->wrmask = MASK(intr->num_components);
|
||||||
|
|
||||||
|
load->barrier_class = IR3_BARRIER_SHARED_R;
|
||||||
|
load->barrier_conflict = IR3_BARRIER_SHARED_W;
|
||||||
|
|
||||||
|
ir3_split_dest(b, dst, load, 0, intr->num_components);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* src[] = { value, offset }. const_index[] = { base, write_mask } */
|
||||||
|
static void
|
||||||
|
emit_intrinsic_store_shared_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||||
|
{
|
||||||
|
struct ir3_block *b = ctx->block;
|
||||||
|
struct ir3_instruction *store, *offset;
|
||||||
|
struct ir3_instruction * const *value;
|
||||||
|
unsigned base, wrmask;
|
||||||
|
|
||||||
|
value = ir3_get_src(ctx, &intr->src[0]);
|
||||||
|
offset = ir3_get_src(ctx, &intr->src[1])[0];
|
||||||
|
|
||||||
|
base = nir_intrinsic_base(intr);
|
||||||
|
wrmask = nir_intrinsic_write_mask(intr);
|
||||||
|
|
||||||
|
/* Combine groups of consecutive enabled channels in one write
|
||||||
|
* message. We use ffs to find the first enabled channel and then ffs on
|
||||||
|
* the bit-inverse, down-shifted writemask to determine the length of
|
||||||
|
* the block of enabled bits.
|
||||||
|
*
|
||||||
|
* (trick stolen from i965's fs_visitor::nir_emit_cs_intrinsic())
|
||||||
|
*/
|
||||||
|
while (wrmask) {
|
||||||
|
unsigned first_component = ffs(wrmask) - 1;
|
||||||
|
unsigned length = ffs(~(wrmask >> first_component)) - 1;
|
||||||
|
|
||||||
|
store = ir3_STLW(b, offset, 0,
|
||||||
|
ir3_create_collect(ctx, &value[first_component], length), 0,
|
||||||
|
create_immed(b, length), 0);
|
||||||
|
|
||||||
|
store->cat6.dst_offset = first_component + base;
|
||||||
|
store->cat6.type = utype_src(intr->src[0]);
|
||||||
|
store->barrier_class = IR3_BARRIER_SHARED_W;
|
||||||
|
store->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W;
|
||||||
|
|
||||||
|
array_insert(b, b->keeps, store);
|
||||||
|
|
||||||
|
/* Clear the bits in the writemask that we just wrote, then try
|
||||||
|
* again to see if more channels are left.
|
||||||
|
*/
|
||||||
|
wrmask &= (15 << (first_component + length));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CS shared variable atomic intrinsics
|
* CS shared variable atomic intrinsics
|
||||||
*
|
*
|
||||||
@@ -1582,6 +1651,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case nir_intrinsic_load_shared_ir3:
|
||||||
|
emit_intrinsic_load_shared_ir3(ctx, intr, dst);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_store_shared_ir3:
|
||||||
|
emit_intrinsic_store_shared_ir3(ctx, intr);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
ir3_context_error(ctx, "Unhandled intrinsic type: %s\n",
|
ir3_context_error(ctx, "Unhandled intrinsic type: %s\n",
|
||||||
nir_intrinsic_infos[intr->intrinsic].name);
|
nir_intrinsic_infos[intr->intrinsic].name);
|
||||||
|
Reference in New Issue
Block a user