ac: rework ac_build_waitcnt for gfx10
Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
@@ -2882,13 +2882,49 @@ LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
|
|||||||
LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
|
LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16)
|
void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
|
||||||
{
|
{
|
||||||
|
if (!wait_flags)
|
||||||
|
return;
|
||||||
|
|
||||||
|
unsigned lgkmcnt = 63;
|
||||||
|
unsigned expcnt = 7;
|
||||||
|
unsigned vmcnt = ctx->chip_class >= GFX9 ? 63 : 15;
|
||||||
|
unsigned vscnt = 63;
|
||||||
|
|
||||||
|
if (wait_flags & AC_WAIT_LGKM)
|
||||||
|
lgkmcnt = 0;
|
||||||
|
if (wait_flags & AC_WAIT_EXP)
|
||||||
|
expcnt = 0;
|
||||||
|
if (wait_flags & AC_WAIT_VLOAD)
|
||||||
|
vmcnt = 0;
|
||||||
|
|
||||||
|
if (wait_flags & AC_WAIT_VSTORE) {
|
||||||
|
if (ctx->chip_class >= GFX10)
|
||||||
|
vscnt = 0;
|
||||||
|
else
|
||||||
|
vmcnt = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned simm16 = (lgkmcnt << 8) |
|
||||||
|
(expcnt << 4) |
|
||||||
|
(vmcnt & 0xf) |
|
||||||
|
((vmcnt >> 4) << 14);
|
||||||
|
|
||||||
LLVMValueRef args[1] = {
|
LLVMValueRef args[1] = {
|
||||||
LLVMConstInt(ctx->i32, simm16, false),
|
LLVMConstInt(ctx->i32, simm16, false),
|
||||||
};
|
};
|
||||||
ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
|
ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
|
||||||
ctx->voidt, args, 1, 0);
|
ctx->voidt, args, 1, 0);
|
||||||
|
|
||||||
|
/* TODO: add llvm.amdgcn.s.waitcnt.vscnt into LLVM: */
|
||||||
|
if (0 && ctx->chip_class >= GFX10 && vscnt == 0) {
|
||||||
|
LLVMValueRef args[1] = {
|
||||||
|
LLVMConstInt(ctx->i32, vscnt, false),
|
||||||
|
};
|
||||||
|
ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt.vscnt",
|
||||||
|
ctx->voidt, args, 1, 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
|
LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
|
||||||
|
@@ -43,11 +43,10 @@ enum {
|
|||||||
AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
|
AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Combine these with & instead of |. */
|
#define AC_WAIT_LGKM (1 << 0) /* LDS, GDS, constant, message */
|
||||||
#define NOOP_WAITCNT 0xcf7f
|
#define AC_WAIT_EXP (1 << 1) /* exports */
|
||||||
#define LGKM_CNT 0xc07f
|
#define AC_WAIT_VLOAD (1 << 2) /* VMEM load/sample instructions */
|
||||||
#define EXP_CNT 0xcf0f
|
#define AC_WAIT_VSTORE (1 << 3) /* VMEM store instructions */
|
||||||
#define VM_CNT 0x0f70 /* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */
|
|
||||||
|
|
||||||
struct ac_llvm_flow;
|
struct ac_llvm_flow;
|
||||||
|
|
||||||
@@ -575,7 +574,7 @@ LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
|
|||||||
LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
|
LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
|
||||||
LLVMValueRef s1, LLVMValueRef s2);
|
LLVMValueRef s1, LLVMValueRef s2);
|
||||||
|
|
||||||
void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
|
void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags);
|
||||||
|
|
||||||
LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
|
LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
|
||||||
unsigned bitsize);
|
unsigned bitsize);
|
||||||
|
@@ -2741,26 +2741,26 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
|
|||||||
static void emit_membar(struct ac_llvm_context *ac,
|
static void emit_membar(struct ac_llvm_context *ac,
|
||||||
const nir_intrinsic_instr *instr)
|
const nir_intrinsic_instr *instr)
|
||||||
{
|
{
|
||||||
unsigned waitcnt = NOOP_WAITCNT;
|
unsigned wait_flags = 0;
|
||||||
|
|
||||||
switch (instr->intrinsic) {
|
switch (instr->intrinsic) {
|
||||||
case nir_intrinsic_memory_barrier:
|
case nir_intrinsic_memory_barrier:
|
||||||
case nir_intrinsic_group_memory_barrier:
|
case nir_intrinsic_group_memory_barrier:
|
||||||
waitcnt &= VM_CNT & LGKM_CNT;
|
wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
|
||||||
break;
|
break;
|
||||||
case nir_intrinsic_memory_barrier_atomic_counter:
|
case nir_intrinsic_memory_barrier_atomic_counter:
|
||||||
case nir_intrinsic_memory_barrier_buffer:
|
case nir_intrinsic_memory_barrier_buffer:
|
||||||
case nir_intrinsic_memory_barrier_image:
|
case nir_intrinsic_memory_barrier_image:
|
||||||
waitcnt &= VM_CNT;
|
wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE;
|
||||||
break;
|
break;
|
||||||
case nir_intrinsic_memory_barrier_shared:
|
case nir_intrinsic_memory_barrier_shared:
|
||||||
waitcnt &= LGKM_CNT;
|
wait_flags = AC_WAIT_LGKM;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (waitcnt != NOOP_WAITCNT)
|
|
||||||
ac_build_waitcnt(ac, waitcnt);
|
ac_build_waitcnt(ac, wait_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
|
void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
|
||||||
@@ -2770,7 +2770,7 @@ void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
|
|||||||
* always fits into a single wave.
|
* always fits into a single wave.
|
||||||
*/
|
*/
|
||||||
if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) {
|
if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) {
|
||||||
ac_build_waitcnt(ac, LGKM_CNT & VM_CNT);
|
ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ac_build_s_barrier(ac);
|
ac_build_s_barrier(ac);
|
||||||
|
@@ -3908,21 +3908,20 @@ static void membar_emit(
|
|||||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||||
LLVMValueRef src0 = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
|
LLVMValueRef src0 = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
|
||||||
unsigned flags = LLVMConstIntGetZExtValue(src0);
|
unsigned flags = LLVMConstIntGetZExtValue(src0);
|
||||||
unsigned waitcnt = NOOP_WAITCNT;
|
unsigned wait_flags = 0;
|
||||||
|
|
||||||
if (flags & TGSI_MEMBAR_THREAD_GROUP)
|
if (flags & TGSI_MEMBAR_THREAD_GROUP)
|
||||||
waitcnt &= VM_CNT & LGKM_CNT;
|
wait_flags |= AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
|
||||||
|
|
||||||
if (flags & (TGSI_MEMBAR_ATOMIC_BUFFER |
|
if (flags & (TGSI_MEMBAR_ATOMIC_BUFFER |
|
||||||
TGSI_MEMBAR_SHADER_BUFFER |
|
TGSI_MEMBAR_SHADER_BUFFER |
|
||||||
TGSI_MEMBAR_SHADER_IMAGE))
|
TGSI_MEMBAR_SHADER_IMAGE))
|
||||||
waitcnt &= VM_CNT;
|
wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
|
||||||
|
|
||||||
if (flags & TGSI_MEMBAR_SHARED)
|
if (flags & TGSI_MEMBAR_SHARED)
|
||||||
waitcnt &= LGKM_CNT;
|
wait_flags |= AC_WAIT_LGKM;
|
||||||
|
|
||||||
if (waitcnt != NOOP_WAITCNT)
|
ac_build_waitcnt(&ctx->ac, wait_flags);
|
||||||
ac_build_waitcnt(&ctx->ac, waitcnt);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void clock_emit(
|
static void clock_emit(
|
||||||
@@ -4372,7 +4371,7 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
|
|||||||
*/
|
*/
|
||||||
if (ctx->screen->info.chip_class == GFX6 &&
|
if (ctx->screen->info.chip_class == GFX6 &&
|
||||||
ctx->type == PIPE_SHADER_TESS_CTRL) {
|
ctx->type == PIPE_SHADER_TESS_CTRL) {
|
||||||
ac_build_waitcnt(&ctx->ac, LGKM_CNT & VM_CNT);
|
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -520,7 +520,7 @@ static void load_emit(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
|
if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
|
||||||
ac_build_waitcnt(&ctx->ac, VM_CNT);
|
ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE);
|
||||||
|
|
||||||
can_speculate = !(inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) &&
|
can_speculate = !(inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) &&
|
||||||
is_oneway_access_only(inst, info,
|
is_oneway_access_only(inst, info,
|
||||||
@@ -709,7 +709,7 @@ static void store_emit(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
|
if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
|
||||||
ac_build_waitcnt(&ctx->ac, VM_CNT);
|
ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE);
|
||||||
|
|
||||||
bool is_image = inst->Dst[0].Register.File != TGSI_FILE_BUFFER;
|
bool is_image = inst->Dst[0].Register.File != TGSI_FILE_BUFFER;
|
||||||
args.cache_policy = get_cache_policy(ctx, inst,
|
args.cache_policy = get_cache_policy(ctx, inst,
|
||||||
|
Reference in New Issue
Block a user