freedreno/ir3: Add load and store intrinsics for global io
These intrinsics take a ivec2 for the 64 bit base address and a integer offset. Signed-off-by: Kristian H. Kristensen <hoegsberg@google.com> Acked-by: Eric Anholt <eric@anholt.net> Reviewed-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
@@ -800,6 +800,17 @@ store("shared_ir3", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])
|
||||
# src[] = { offset }.
|
||||
load("shared_ir3", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
|
||||
|
||||
# IR3-specific load/store global intrinsics. They take a 64-bit base address
|
||||
# and a 32-bit offset. The hardware will add the base and the offset, which
|
||||
# saves us from doing 64-bit math on the base address.
|
||||
|
||||
# src[] = { value, address(vec2 of hi+lo uint32_t), offset }.
|
||||
# const_index[] = { write_mask, align_mul, align_offset }
|
||||
intrinsic("store_global_ir3", [0, 2, 1], indices=[WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
|
||||
# src[] = { address(vec2 of hi+lo uint32_t), offset }.
|
||||
# const_index[] = { access, align_mul, align_offset }
|
||||
intrinsic("load_global_ir3", [2, 1], dest_comp=0, indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE])
|
||||
|
||||
# Intrinsics used by the Midgard/Bifrost blend pipeline. These are defined
|
||||
# within a blend shader to read/write the raw value from the tile buffer,
|
||||
# without applying any format conversion in the process. If the shader needs
|
||||
|
@@ -1376,6 +1376,55 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
dst[0] = ctx->primitive_id;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_global_ir3: {
|
||||
struct ir3_instruction *value, *addr, *offset;
|
||||
|
||||
addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
|
||||
ir3_get_src(ctx, &intr->src[1])[0],
|
||||
ir3_get_src(ctx, &intr->src[1])[1]
|
||||
}, 2);
|
||||
|
||||
offset = ir3_get_src(ctx, &intr->src[2])[0];
|
||||
|
||||
value = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]),
|
||||
intr->num_components);
|
||||
|
||||
struct ir3_instruction *stg =
|
||||
ir3_STG_G(ctx->block, addr, 0, value, 0,
|
||||
create_immed(ctx->block, intr->num_components), 0, offset, 0);
|
||||
stg->cat6.type = TYPE_U32;
|
||||
stg->cat6.iim_val = 1;
|
||||
|
||||
array_insert(b, b->keeps, stg);
|
||||
|
||||
stg->barrier_class = IR3_BARRIER_BUFFER_W;
|
||||
stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_global_ir3: {
|
||||
struct ir3_instruction *addr, *offset;
|
||||
|
||||
addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
|
||||
ir3_get_src(ctx, &intr->src[0])[0],
|
||||
ir3_get_src(ctx, &intr->src[0])[1]
|
||||
}, 2);
|
||||
|
||||
offset = ir3_get_src(ctx, &intr->src[1])[0];
|
||||
|
||||
struct ir3_instruction *load =
|
||||
ir3_LDG(b, addr, 0, create_immed(ctx->block, intr->num_components),
|
||||
0, offset, 0);
|
||||
load->cat6.type = TYPE_U32;
|
||||
load->regs[0]->wrmask = MASK(intr->num_components);
|
||||
|
||||
load->barrier_class = IR3_BARRIER_BUFFER_R;
|
||||
load->barrier_conflict = IR3_BARRIER_BUFFER_W;
|
||||
|
||||
ir3_split_dest(b, dst, load, 0, intr->num_components);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_ubo:
|
||||
emit_intrinsic_load_ubo(ctx, intr, dst);
|
||||
break;
|
||||
|
Reference in New Issue
Block a user