freedreno/ir3: SSBO/atomic support
TODO cwabbott pointed out a write-after-read hazzard, which effects both this and arrays. A write needs to depend on *all* reads since the last write, not just the last read. Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
@@ -475,6 +475,13 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
|
|||||||
struct ir3_register *dst, *src1, *src2;
|
struct ir3_register *dst, *src1, *src2;
|
||||||
instr_cat6_t *cat6 = ptr;
|
instr_cat6_t *cat6 = ptr;
|
||||||
|
|
||||||
|
cat6->type = instr->cat6.type;
|
||||||
|
cat6->opc = instr->opc;
|
||||||
|
cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
|
||||||
|
cat6->sync = !!(instr->flags & IR3_INSTR_SY);
|
||||||
|
cat6->g = !!(instr->flags & IR3_INSTR_G);
|
||||||
|
cat6->opc_cat = 6;
|
||||||
|
|
||||||
/* the "dst" for a store instruction is (from the perspective
|
/* the "dst" for a store instruction is (from the perspective
|
||||||
* of data flow in the shader, ie. register use/def, etc) in
|
* of data flow in the shader, ie. register use/def, etc) in
|
||||||
* fact a register that is read by the instruction, rather
|
* fact a register that is read by the instruction, rather
|
||||||
@@ -500,7 +507,65 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
|
|||||||
* indicate to use the src_off encoding even if offset is zero
|
* indicate to use the src_off encoding even if offset is zero
|
||||||
* (but then what to do about dst_off?)
|
* (but then what to do about dst_off?)
|
||||||
*/
|
*/
|
||||||
if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) {
|
if ((instr->opc == OPC_LDGB) || is_atomic(instr->opc)) {
|
||||||
|
struct ir3_register *src3 = instr->regs[3];
|
||||||
|
instr_cat6ldgb_t *ldgb = ptr;
|
||||||
|
|
||||||
|
/* maybe these two bits both determine the instruction encoding? */
|
||||||
|
cat6->src_off = false;
|
||||||
|
|
||||||
|
ldgb->d = 4 - 1; /* always .4d ? */
|
||||||
|
ldgb->typed = false; /* TODO true for images */
|
||||||
|
ldgb->type_size = instr->cat6.iim_val - 1;
|
||||||
|
|
||||||
|
ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
|
||||||
|
|
||||||
|
/* first src is src_ssbo: */
|
||||||
|
iassert(src1->flags & IR3_REG_IMMED);
|
||||||
|
ldgb->src_ssbo = src1->uim_val;
|
||||||
|
|
||||||
|
/* then next two are src1/src2: */
|
||||||
|
ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
|
||||||
|
ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED);
|
||||||
|
ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
|
||||||
|
ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED);
|
||||||
|
|
||||||
|
if (is_atomic(instr->opc)) {
|
||||||
|
struct ir3_register *src4 = instr->regs[4];
|
||||||
|
ldgb->src3 = reg(src4, info, instr->repeat, 0);
|
||||||
|
ldgb->pad0 = 0x1;
|
||||||
|
ldgb->pad3 = 0x3;
|
||||||
|
} else {
|
||||||
|
ldgb->pad0 = 0x0;
|
||||||
|
ldgb->pad3 = 0x2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
} else if (instr->opc == OPC_STGB) {
|
||||||
|
struct ir3_register *src3 = instr->regs[4];
|
||||||
|
instr_cat6stgb_t *stgb = ptr;
|
||||||
|
|
||||||
|
/* maybe these two bits both determine the instruction encoding? */
|
||||||
|
cat6->src_off = true;
|
||||||
|
stgb->pad3 = 0x2;
|
||||||
|
|
||||||
|
stgb->d = 4 - 1; /* always .4d ? */
|
||||||
|
stgb->typed = false;
|
||||||
|
stgb->type_size = instr->cat6.iim_val - 1;
|
||||||
|
|
||||||
|
/* first src is dst_ssbo: */
|
||||||
|
iassert(dst->flags & IR3_REG_IMMED);
|
||||||
|
stgb->dst_ssbo = dst->uim_val;
|
||||||
|
|
||||||
|
/* then src1/src2/src3: */
|
||||||
|
stgb->src1 = reg(src1, info, instr->repeat, 0);
|
||||||
|
stgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
|
||||||
|
stgb->src2_im = !!(src2->flags & IR3_REG_IMMED);
|
||||||
|
stgb->src3 = reg(src3, info, instr->repeat, IR3_REG_IMMED);
|
||||||
|
stgb->src3_im = !!(src3->flags & IR3_REG_IMMED);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
} else if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) {
|
||||||
instr_cat6a_t *cat6a = ptr;
|
instr_cat6a_t *cat6a = ptr;
|
||||||
|
|
||||||
cat6->src_off = true;
|
cat6->src_off = true;
|
||||||
@@ -536,13 +601,6 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
|
|||||||
cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
|
cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
|
||||||
}
|
}
|
||||||
|
|
||||||
cat6->type = instr->cat6.type;
|
|
||||||
cat6->opc = instr->opc;
|
|
||||||
cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
|
|
||||||
cat6->sync = !!(instr->flags & IR3_INSTR_SY);
|
|
||||||
cat6->g = !!(instr->flags & IR3_INSTR_G);
|
|
||||||
cat6->opc_cat = 6;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -226,7 +226,7 @@ struct ir3_instruction {
|
|||||||
type_t type;
|
type_t type;
|
||||||
int src_offset;
|
int src_offset;
|
||||||
int dst_offset;
|
int dst_offset;
|
||||||
int iim_val;
|
int iim_val; /* for ldgb/stgb, # of components */
|
||||||
} cat6;
|
} cat6;
|
||||||
/* for meta-instructions, just used to hold extra data
|
/* for meta-instructions, just used to hold extra data
|
||||||
* before instruction scheduling, etc
|
* before instruction scheduling, etc
|
||||||
@@ -602,6 +602,7 @@ is_store(struct ir3_instruction *instr)
|
|||||||
*/
|
*/
|
||||||
switch (instr->opc) {
|
switch (instr->opc) {
|
||||||
case OPC_STG:
|
case OPC_STG:
|
||||||
|
case OPC_STGB:
|
||||||
case OPC_STP:
|
case OPC_STP:
|
||||||
case OPC_STL:
|
case OPC_STL:
|
||||||
case OPC_STLW:
|
case OPC_STLW:
|
||||||
@@ -617,6 +618,7 @@ static inline bool is_load(struct ir3_instruction *instr)
|
|||||||
{
|
{
|
||||||
switch (instr->opc) {
|
switch (instr->opc) {
|
||||||
case OPC_LDG:
|
case OPC_LDG:
|
||||||
|
case OPC_LDGB:
|
||||||
case OPC_LDL:
|
case OPC_LDL:
|
||||||
case OPC_LDP:
|
case OPC_LDP:
|
||||||
case OPC_L2G:
|
case OPC_L2G:
|
||||||
@@ -931,7 +933,7 @@ int ir3_ra(struct ir3 *ir3, enum shader_t type,
|
|||||||
bool frag_coord, bool frag_face);
|
bool frag_coord, bool frag_face);
|
||||||
|
|
||||||
/* legalize: */
|
/* legalize: */
|
||||||
void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary);
|
void ir3_legalize(struct ir3 *ir, bool *has_samp, bool *has_ssbo, int *max_bary);
|
||||||
|
|
||||||
/* ************************************************************************* */
|
/* ************************************************************************* */
|
||||||
/* instruction helpers */
|
/* instruction helpers */
|
||||||
@@ -1025,6 +1027,24 @@ ir3_##name(struct ir3_block *block, \
|
|||||||
return instr; \
|
return instr; \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define INSTR4(name) \
|
||||||
|
static inline struct ir3_instruction * \
|
||||||
|
ir3_##name(struct ir3_block *block, \
|
||||||
|
struct ir3_instruction *a, unsigned aflags, \
|
||||||
|
struct ir3_instruction *b, unsigned bflags, \
|
||||||
|
struct ir3_instruction *c, unsigned cflags, \
|
||||||
|
struct ir3_instruction *d, unsigned dflags) \
|
||||||
|
{ \
|
||||||
|
struct ir3_instruction *instr = \
|
||||||
|
ir3_instr_create2(block, OPC_##name, 5); \
|
||||||
|
ir3_reg_create(instr, 0, 0); /* dst */ \
|
||||||
|
ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \
|
||||||
|
ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \
|
||||||
|
ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c; \
|
||||||
|
ir3_reg_create(instr, 0, IR3_REG_SSA | dflags)->instr = d; \
|
||||||
|
return instr; \
|
||||||
|
}
|
||||||
|
|
||||||
/* cat0 instructions: */
|
/* cat0 instructions: */
|
||||||
INSTR0(BR);
|
INSTR0(BR);
|
||||||
INSTR0(JUMP);
|
INSTR0(JUMP);
|
||||||
@@ -1142,6 +1162,19 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
|
|||||||
INSTR2(LDLV)
|
INSTR2(LDLV)
|
||||||
INSTR2(LDG)
|
INSTR2(LDG)
|
||||||
INSTR3(STG)
|
INSTR3(STG)
|
||||||
|
INSTR3(LDGB);
|
||||||
|
INSTR4(STGB);
|
||||||
|
INSTR4(ATOMIC_ADD);
|
||||||
|
INSTR4(ATOMIC_SUB);
|
||||||
|
INSTR4(ATOMIC_XCHG);
|
||||||
|
INSTR4(ATOMIC_INC);
|
||||||
|
INSTR4(ATOMIC_DEC);
|
||||||
|
INSTR4(ATOMIC_CMPXCHG);
|
||||||
|
INSTR4(ATOMIC_MIN);
|
||||||
|
INSTR4(ATOMIC_MAX);
|
||||||
|
INSTR4(ATOMIC_AND);
|
||||||
|
INSTR4(ATOMIC_OR);
|
||||||
|
INSTR4(ATOMIC_XOR);
|
||||||
|
|
||||||
/* ************************************************************************* */
|
/* ************************************************************************* */
|
||||||
/* split this out or find some helper to use.. like main/bitset.h.. */
|
/* split this out or find some helper to use.. like main/bitset.h.. */
|
||||||
|
@@ -71,6 +71,20 @@ struct ir3_compile {
|
|||||||
/* For vertex shaders, keep track of the system values sources */
|
/* For vertex shaders, keep track of the system values sources */
|
||||||
struct ir3_instruction *vertex_id, *basevertex, *instance_id;
|
struct ir3_instruction *vertex_id, *basevertex, *instance_id;
|
||||||
|
|
||||||
|
/* For SSBO's and atomics, we need to preserve order, such
|
||||||
|
* that reads don't overtake writes, and the order of writes
|
||||||
|
* is preserved. Atomics are considered as a write.
|
||||||
|
*
|
||||||
|
* To do this, we track last write and last access, in a
|
||||||
|
* similar way to ir3_array. But since we don't know whether
|
||||||
|
* the same SSBO is bound to multiple slots, so we simply
|
||||||
|
* track this globally rather than per-SSBO.
|
||||||
|
*
|
||||||
|
* TODO should we track this per block instead? I guess it
|
||||||
|
* shouldn't matter much?
|
||||||
|
*/
|
||||||
|
struct ir3_instruction *last_write, *last_access;
|
||||||
|
|
||||||
/* mapping from nir_register to defining instruction: */
|
/* mapping from nir_register to defining instruction: */
|
||||||
struct hash_table *def_ht;
|
struct hash_table *def_ht;
|
||||||
|
|
||||||
@@ -430,7 +444,7 @@ create_uniform_indirect(struct ir3_compile *ctx, int n,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct ir3_instruction *
|
static struct ir3_instruction *
|
||||||
create_collect(struct ir3_block *block, struct ir3_instruction **arr,
|
create_collect(struct ir3_block *block, struct ir3_instruction *const *arr,
|
||||||
unsigned arrsz)
|
unsigned arrsz)
|
||||||
{
|
{
|
||||||
struct ir3_instruction *collect;
|
struct ir3_instruction *collect;
|
||||||
@@ -1136,6 +1150,165 @@ emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
mark_ssbo_read(struct ir3_compile *ctx, struct ir3_instruction *instr)
|
||||||
|
{
|
||||||
|
instr->regs[0]->instr = ctx->last_write;
|
||||||
|
instr->regs[0]->flags |= IR3_REG_SSA;
|
||||||
|
ctx->last_access = instr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
mark_ssbo_write(struct ir3_compile *ctx, struct ir3_instruction *instr)
|
||||||
|
{
|
||||||
|
instr->regs[0]->instr = ctx->last_access;
|
||||||
|
instr->regs[0]->flags |= IR3_REG_SSA;
|
||||||
|
ctx->last_write = ctx->last_access = instr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_intrinsic_load_ssbo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
|
||||||
|
struct ir3_instruction **dst)
|
||||||
|
{
|
||||||
|
struct ir3_block *b = ctx->block;
|
||||||
|
struct ir3_instruction *ldgb, *src0, *src1, *offset;
|
||||||
|
nir_const_value *const_offset;
|
||||||
|
|
||||||
|
/* can this be non-const buffer_index? how do we handle that? */
|
||||||
|
const_offset = nir_src_as_const_value(intr->src[0]);
|
||||||
|
compile_assert(ctx, const_offset);
|
||||||
|
|
||||||
|
offset = get_src(ctx, &intr->src[1])[0];
|
||||||
|
|
||||||
|
/* src0 is uvec2(offset*4, 0), src1 is offset.. nir already *= 4: */
|
||||||
|
src0 = create_collect(b, (struct ir3_instruction*[]){
|
||||||
|
offset,
|
||||||
|
create_immed(b, 0),
|
||||||
|
}, 2);
|
||||||
|
src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
|
||||||
|
|
||||||
|
ldgb = ir3_LDGB(b, create_immed(b, const_offset->u32[0]), 0,
|
||||||
|
src0, 0, src1, 0);
|
||||||
|
ldgb->regs[0]->wrmask = (1 << intr->num_components) - 1;
|
||||||
|
ldgb->cat6.iim_val = intr->num_components;
|
||||||
|
ldgb->cat6.type = TYPE_U32;
|
||||||
|
mark_ssbo_read(ctx, ldgb);
|
||||||
|
|
||||||
|
split_dest(b, dst, ldgb, 0, intr->num_components);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
|
||||||
|
static void
|
||||||
|
emit_intrinsic_store_ssbo(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
||||||
|
{
|
||||||
|
struct ir3_block *b = ctx->block;
|
||||||
|
struct ir3_instruction *stgb, *src0, *src1, *src2, *offset;
|
||||||
|
nir_const_value *const_offset;
|
||||||
|
unsigned ncomp = ffs(~intr->const_index[0]) - 1;
|
||||||
|
|
||||||
|
/* can this be non-const buffer_index? how do we handle that? */
|
||||||
|
const_offset = nir_src_as_const_value(intr->src[1]);
|
||||||
|
compile_assert(ctx, const_offset);
|
||||||
|
|
||||||
|
offset = get_src(ctx, &intr->src[2])[0];
|
||||||
|
|
||||||
|
/* src0 is value, src1 is offset, src2 is uvec2(offset*4, 0)..
|
||||||
|
* nir already *= 4:
|
||||||
|
*/
|
||||||
|
src0 = create_collect(b, get_src(ctx, &intr->src[0]), ncomp);
|
||||||
|
src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
|
||||||
|
src2 = create_collect(b, (struct ir3_instruction*[]){
|
||||||
|
offset,
|
||||||
|
create_immed(b, 0),
|
||||||
|
}, 2);
|
||||||
|
|
||||||
|
stgb = ir3_STGB(b, create_immed(b, const_offset->u32[0]), 0,
|
||||||
|
src0, 0, src1, 0, src2, 0);
|
||||||
|
stgb->cat6.iim_val = ncomp;
|
||||||
|
stgb->cat6.type = TYPE_U32;
|
||||||
|
mark_ssbo_write(ctx, stgb);
|
||||||
|
|
||||||
|
array_insert(b, b->keeps, stgb);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_intrinsic_atomic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
||||||
|
{
|
||||||
|
struct ir3_block *b = ctx->block;
|
||||||
|
struct ir3_instruction *atomic, *ssbo, *src0, *src1, *src2, *offset;
|
||||||
|
nir_const_value *const_offset;
|
||||||
|
type_t type = TYPE_U32;
|
||||||
|
|
||||||
|
/* can this be non-const buffer_index? how do we handle that? */
|
||||||
|
const_offset = nir_src_as_const_value(intr->src[0]);
|
||||||
|
compile_assert(ctx, const_offset);
|
||||||
|
ssbo = create_immed(b, const_offset->u32[0]);
|
||||||
|
|
||||||
|
offset = get_src(ctx, &intr->src[1])[0];
|
||||||
|
|
||||||
|
/* src0 is data (or uvec2(data, compare)
|
||||||
|
* src1 is offset
|
||||||
|
* src2 is uvec2(offset*4, 0)
|
||||||
|
*
|
||||||
|
* Note that nir already multiplies the offset by four
|
||||||
|
*/
|
||||||
|
src0 = get_src(ctx, &intr->src[2])[0];
|
||||||
|
src1 = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
|
||||||
|
src2 = create_collect(b, (struct ir3_instruction*[]){
|
||||||
|
offset,
|
||||||
|
create_immed(b, 0),
|
||||||
|
}, 2);
|
||||||
|
|
||||||
|
switch (intr->intrinsic) {
|
||||||
|
case nir_intrinsic_ssbo_atomic_add:
|
||||||
|
atomic = ir3_ATOMIC_ADD(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_ssbo_atomic_imin:
|
||||||
|
atomic = ir3_ATOMIC_MIN(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
|
||||||
|
type = TYPE_S32;
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_ssbo_atomic_umin:
|
||||||
|
atomic = ir3_ATOMIC_MIN(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_ssbo_atomic_imax:
|
||||||
|
atomic = ir3_ATOMIC_MAX(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
|
||||||
|
type = TYPE_S32;
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_ssbo_atomic_umax:
|
||||||
|
atomic = ir3_ATOMIC_MAX(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_ssbo_atomic_and:
|
||||||
|
atomic = ir3_ATOMIC_AND(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_ssbo_atomic_or:
|
||||||
|
atomic = ir3_ATOMIC_OR(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_ssbo_atomic_xor:
|
||||||
|
atomic = ir3_ATOMIC_XOR(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_ssbo_atomic_exchange:
|
||||||
|
atomic = ir3_ATOMIC_XCHG(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_ssbo_atomic_comp_swap:
|
||||||
|
/* for cmpxchg, src0 is [ui]vec2(data, compare): */
|
||||||
|
src0 = create_collect(b, (struct ir3_instruction*[]){
|
||||||
|
src0,
|
||||||
|
get_src(ctx, &intr->src[3])[0],
|
||||||
|
}, 2);
|
||||||
|
atomic = ir3_ATOMIC_CMPXCHG(b, ssbo, 0, src0, 0, src1, 0, src2, 0);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
unreachable("boo");
|
||||||
|
}
|
||||||
|
|
||||||
|
atomic->cat6.iim_val = 1;
|
||||||
|
atomic->cat6.type = type;
|
||||||
|
mark_ssbo_write(ctx, atomic);
|
||||||
|
|
||||||
|
/* even if nothing consume the result, we can't DCE the instruction: */
|
||||||
|
array_insert(b, b->keeps, atomic);
|
||||||
|
}
|
||||||
|
|
||||||
static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
|
static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
|
||||||
struct ir3_instruction *instr)
|
struct ir3_instruction *instr)
|
||||||
{
|
{
|
||||||
@@ -1225,6 +1398,24 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||||||
case nir_intrinsic_store_var:
|
case nir_intrinsic_store_var:
|
||||||
emit_intrinsic_store_var(ctx, intr);
|
emit_intrinsic_store_var(ctx, intr);
|
||||||
break;
|
break;
|
||||||
|
case nir_intrinsic_load_ssbo:
|
||||||
|
emit_intrinsic_load_ssbo(ctx, intr, dst);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_store_ssbo:
|
||||||
|
emit_intrinsic_store_ssbo(ctx, intr);
|
||||||
|
break;
|
||||||
|
case nir_intrinsic_ssbo_atomic_add:
|
||||||
|
case nir_intrinsic_ssbo_atomic_imin:
|
||||||
|
case nir_intrinsic_ssbo_atomic_umin:
|
||||||
|
case nir_intrinsic_ssbo_atomic_imax:
|
||||||
|
case nir_intrinsic_ssbo_atomic_umax:
|
||||||
|
case nir_intrinsic_ssbo_atomic_and:
|
||||||
|
case nir_intrinsic_ssbo_atomic_or:
|
||||||
|
case nir_intrinsic_ssbo_atomic_xor:
|
||||||
|
case nir_intrinsic_ssbo_atomic_exchange:
|
||||||
|
case nir_intrinsic_ssbo_atomic_comp_swap:
|
||||||
|
emit_intrinsic_atomic(ctx, intr);
|
||||||
|
break;
|
||||||
case nir_intrinsic_store_output:
|
case nir_intrinsic_store_output:
|
||||||
idx = nir_intrinsic_base(intr);
|
idx = nir_intrinsic_base(intr);
|
||||||
const_offset = nir_src_as_const_value(intr->src[1]);
|
const_offset = nir_src_as_const_value(intr->src[1]);
|
||||||
@@ -2541,7 +2732,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||||||
/* We need to do legalize after (for frag shader's) the "bary.f"
|
/* We need to do legalize after (for frag shader's) the "bary.f"
|
||||||
* offsets (inloc) have been assigned.
|
* offsets (inloc) have been assigned.
|
||||||
*/
|
*/
|
||||||
ir3_legalize(ir, &so->has_samp, &max_bary);
|
ir3_legalize(ir, &so->has_samp, &so->has_ssbo, &max_bary);
|
||||||
|
|
||||||
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
|
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
|
||||||
printf("AFTER LEGALIZE:\n");
|
printf("AFTER LEGALIZE:\n");
|
||||||
|
@@ -193,6 +193,12 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n,
|
|||||||
*/
|
*/
|
||||||
if (is_store(instr) && (n == 1))
|
if (is_store(instr) && (n == 1))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/* disallow CP into anything but the SSBO slot argument for
|
||||||
|
* atomics:
|
||||||
|
*/
|
||||||
|
if (is_atomic(instr->opc) && (n != 0))
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@@ -43,6 +43,7 @@
|
|||||||
|
|
||||||
struct ir3_legalize_ctx {
|
struct ir3_legalize_ctx {
|
||||||
bool has_samp;
|
bool has_samp;
|
||||||
|
bool has_ssbo;
|
||||||
int max_bary;
|
int max_bary;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -192,6 +193,9 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
|||||||
regmask_set(&needs_sy, n->regs[0]);
|
regmask_set(&needs_sy, n->regs[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((n->opc == OPC_LDGB) || (n->opc == OPC_STGB) || is_atomic(n->opc))
|
||||||
|
ctx->has_ssbo = true;
|
||||||
|
|
||||||
/* both tex/sfu appear to not always immediately consume
|
/* both tex/sfu appear to not always immediately consume
|
||||||
* their src register(s):
|
* their src register(s):
|
||||||
*/
|
*/
|
||||||
@@ -388,7 +392,7 @@ mark_convergence_points(struct ir3 *ir)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary)
|
ir3_legalize(struct ir3 *ir, bool *has_samp, bool *has_ssbo, int *max_bary)
|
||||||
{
|
{
|
||||||
struct ir3_legalize_ctx ctx = {
|
struct ir3_legalize_ctx ctx = {
|
||||||
.max_bary = -1,
|
.max_bary = -1,
|
||||||
@@ -399,6 +403,7 @@ ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary)
|
|||||||
}
|
}
|
||||||
|
|
||||||
*has_samp = ctx.has_samp;
|
*has_samp = ctx.has_samp;
|
||||||
|
*has_ssbo = ctx.has_ssbo;
|
||||||
*max_bary = ctx.max_bary;
|
*max_bary = ctx.max_bary;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
@@ -249,6 +249,9 @@ struct ir3_shader_variant {
|
|||||||
/* do we have one or more texture sample instructions: */
|
/* do we have one or more texture sample instructions: */
|
||||||
bool has_samp;
|
bool has_samp;
|
||||||
|
|
||||||
|
/* do we have one or more SSBO instructions: */
|
||||||
|
bool has_ssbo;
|
||||||
|
|
||||||
/* do we have kill instructions: */
|
/* do we have kill instructions: */
|
||||||
bool has_kill;
|
bool has_kill;
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user