radeon/ac: use ds_swizzle for derivs on si/cik.
This looks like it's supported since llvm 3.9 at least, so switch over radeonsi and radv to using it, -pro also uses this. We can now drop creating lds for these operations as the ds_swizzle operation doesn't actually write to lds at all. Acked-by: Marek Olšák <marek.olsak@amd.com> (stable requested due to fixing radv CIK conformance tests) Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
@@ -796,21 +796,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
|
|||||||
bool has_ds_bpermute,
|
bool has_ds_bpermute,
|
||||||
uint32_t mask,
|
uint32_t mask,
|
||||||
int idx,
|
int idx,
|
||||||
LLVMValueRef lds,
|
|
||||||
LLVMValueRef val)
|
LLVMValueRef val)
|
||||||
{
|
{
|
||||||
LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
|
LLVMValueRef tl, trbl, args[2];
|
||||||
LLVMValueRef result;
|
LLVMValueRef result;
|
||||||
|
|
||||||
thread_id = ac_get_thread_id(ctx);
|
|
||||||
|
|
||||||
tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
|
|
||||||
LLVMConstInt(ctx->i32, mask, false), "");
|
|
||||||
|
|
||||||
trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
|
|
||||||
LLVMConstInt(ctx->i32, idx, false), "");
|
|
||||||
|
|
||||||
if (has_ds_bpermute) {
|
if (has_ds_bpermute) {
|
||||||
|
LLVMValueRef thread_id, tl_tid, trbl_tid;
|
||||||
|
thread_id = ac_get_thread_id(ctx);
|
||||||
|
|
||||||
|
tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
|
||||||
|
LLVMConstInt(ctx->i32, mask, false), "");
|
||||||
|
|
||||||
|
trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
|
||||||
|
LLVMConstInt(ctx->i32, idx, false), "");
|
||||||
|
|
||||||
args[0] = LLVMBuildMul(ctx->builder, tl_tid,
|
args[0] = LLVMBuildMul(ctx->builder, tl_tid,
|
||||||
LLVMConstInt(ctx->i32, 4, false), "");
|
LLVMConstInt(ctx->i32, 4, false), "");
|
||||||
args[1] = val;
|
args[1] = val;
|
||||||
@@ -828,15 +828,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
|
|||||||
AC_FUNC_ATTR_READNONE |
|
AC_FUNC_ATTR_READNONE |
|
||||||
AC_FUNC_ATTR_CONVERGENT);
|
AC_FUNC_ATTR_CONVERGENT);
|
||||||
} else {
|
} else {
|
||||||
LLVMValueRef store_ptr, load_ptr0, load_ptr1;
|
uint32_t masks[2];
|
||||||
|
|
||||||
store_ptr = ac_build_gep0(ctx, lds, thread_id);
|
switch (mask) {
|
||||||
load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
|
case AC_TID_MASK_TOP_LEFT:
|
||||||
load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
|
masks[0] = 0x8000;
|
||||||
|
if (idx == 1)
|
||||||
|
masks[1] = 0x8055;
|
||||||
|
else
|
||||||
|
masks[1] = 0x80aa;
|
||||||
|
|
||||||
LLVMBuildStore(ctx->builder, val, store_ptr);
|
break;
|
||||||
tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
|
case AC_TID_MASK_TOP:
|
||||||
trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
|
masks[0] = 0x8044;
|
||||||
|
masks[1] = 0x80ee;
|
||||||
|
break;
|
||||||
|
case AC_TID_MASK_LEFT:
|
||||||
|
masks[0] = 0x80a0;
|
||||||
|
masks[1] = 0x80f5;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
args[0] = val;
|
||||||
|
args[1] = LLVMConstInt(ctx->i32, masks[0], false);
|
||||||
|
|
||||||
|
tl = ac_build_intrinsic(ctx,
|
||||||
|
"llvm.amdgcn.ds.swizzle", ctx->i32,
|
||||||
|
args, 2,
|
||||||
|
AC_FUNC_ATTR_READNONE |
|
||||||
|
AC_FUNC_ATTR_CONVERGENT);
|
||||||
|
|
||||||
|
args[1] = LLVMConstInt(ctx->i32, masks[1], false);
|
||||||
|
trbl = ac_build_intrinsic(ctx,
|
||||||
|
"llvm.amdgcn.ds.swizzle", ctx->i32,
|
||||||
|
args, 2,
|
||||||
|
AC_FUNC_ATTR_READNONE |
|
||||||
|
AC_FUNC_ATTR_CONVERGENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
|
tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
|
||||||
|
@@ -174,7 +174,6 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
|
|||||||
bool has_ds_bpermute,
|
bool has_ds_bpermute,
|
||||||
uint32_t mask,
|
uint32_t mask,
|
||||||
int idx,
|
int idx,
|
||||||
LLVMValueRef lds,
|
|
||||||
LLVMValueRef val);
|
LLVMValueRef val);
|
||||||
|
|
||||||
#define AC_SENDMSG_GS 2
|
#define AC_SENDMSG_GS 2
|
||||||
|
@@ -68,8 +68,6 @@ struct ac_nir_context {
|
|||||||
int num_locals;
|
int num_locals;
|
||||||
LLVMValueRef *locals;
|
LLVMValueRef *locals;
|
||||||
|
|
||||||
LLVMValueRef ddxy_lds;
|
|
||||||
|
|
||||||
struct nir_to_llvm_context *nctx; /* TODO get rid of this */
|
struct nir_to_llvm_context *nctx; /* TODO get rid of this */
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -1463,11 +1461,6 @@ static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
|
|||||||
LLVMValueRef result;
|
LLVMValueRef result;
|
||||||
bool has_ds_bpermute = ctx->abi->chip_class >= VI;
|
bool has_ds_bpermute = ctx->abi->chip_class >= VI;
|
||||||
|
|
||||||
if (!ctx->ddxy_lds && !has_ds_bpermute)
|
|
||||||
ctx->ddxy_lds = LLVMAddGlobalInAddressSpace(ctx->ac.module,
|
|
||||||
LLVMArrayType(ctx->ac.i32, 64),
|
|
||||||
"ddxy_lds", LOCAL_ADDR_SPACE);
|
|
||||||
|
|
||||||
if (op == nir_op_fddx_fine || op == nir_op_fddx)
|
if (op == nir_op_fddx_fine || op == nir_op_fddx)
|
||||||
mask = AC_TID_MASK_LEFT;
|
mask = AC_TID_MASK_LEFT;
|
||||||
else if (op == nir_op_fddy_fine || op == nir_op_fddy)
|
else if (op == nir_op_fddy_fine || op == nir_op_fddy)
|
||||||
@@ -1484,7 +1477,7 @@ static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
|
|||||||
idx = 2;
|
idx = 2;
|
||||||
|
|
||||||
result = ac_build_ddxy(&ctx->ac, has_ds_bpermute,
|
result = ac_build_ddxy(&ctx->ac, has_ds_bpermute,
|
||||||
mask, idx, ctx->ddxy_lds,
|
mask, idx,
|
||||||
src0);
|
src0);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@@ -3591,7 +3591,7 @@ static void si_llvm_emit_ddxy(
|
|||||||
|
|
||||||
val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
|
val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
|
||||||
val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute,
|
val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute,
|
||||||
mask, idx, ctx->lds, val);
|
mask, idx, val);
|
||||||
emit_data->output[emit_data->chan] = val;
|
emit_data->output[emit_data->chan] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4635,20 +4635,6 @@ static void create_function(struct si_shader_context *ctx)
|
|||||||
assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
|
assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
|
||||||
shader->info.num_input_vgprs -= num_prolog_vgprs;
|
shader->info.num_input_vgprs -= num_prolog_vgprs;
|
||||||
|
|
||||||
if (!ctx->screen->has_ds_bpermute &&
|
|
||||||
bld_base->info &&
|
|
||||||
(bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
|
|
||||||
bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
|
|
||||||
bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
|
|
||||||
bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
|
|
||||||
bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
|
|
||||||
bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
|
|
||||||
ctx->lds =
|
|
||||||
LLVMAddGlobalInAddressSpace(gallivm->module,
|
|
||||||
LLVMArrayType(ctx->i32, 64),
|
|
||||||
"ddxy_lds",
|
|
||||||
LOCAL_ADDR_SPACE);
|
|
||||||
|
|
||||||
if (shader->key.as_ls ||
|
if (shader->key.as_ls ||
|
||||||
ctx->type == PIPE_SHADER_TESS_CTRL ||
|
ctx->type == PIPE_SHADER_TESS_CTRL ||
|
||||||
/* GFX9 has the ESGS ring buffer in LDS. */
|
/* GFX9 has the ESGS ring buffer in LDS. */
|
||||||
|
Reference in New Issue
Block a user