radeon/ac: use ds_swizzle for derivs on si/cik.

This looks like it's supported since llvm 3.9 at least,
so switch over radeonsi and radv to using it, -pro also
uses this. We can now drop creating lds for these operations
as the ds_swizzle operation doesn't actually write to lds at all.

Acked-by: Marek Olšák <marek.olsak@amd.com>
(stable requested due to fixing radv CIK conformance tests)
Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Dave Airlie
2017-08-01 05:10:49 +01:00
parent 35338a242b
commit cb6f16dce9
4 changed files with 46 additions and 41 deletions

View File

@@ -796,12 +796,13 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
bool has_ds_bpermute, bool has_ds_bpermute,
uint32_t mask, uint32_t mask,
int idx, int idx,
LLVMValueRef lds,
LLVMValueRef val) LLVMValueRef val)
{ {
LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2]; LLVMValueRef tl, trbl, args[2];
LLVMValueRef result; LLVMValueRef result;
if (has_ds_bpermute) {
LLVMValueRef thread_id, tl_tid, trbl_tid;
thread_id = ac_get_thread_id(ctx); thread_id = ac_get_thread_id(ctx);
tl_tid = LLVMBuildAnd(ctx->builder, thread_id, tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
@@ -810,7 +811,6 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
LLVMConstInt(ctx->i32, idx, false), ""); LLVMConstInt(ctx->i32, idx, false), "");
if (has_ds_bpermute) {
args[0] = LLVMBuildMul(ctx->builder, tl_tid, args[0] = LLVMBuildMul(ctx->builder, tl_tid,
LLVMConstInt(ctx->i32, 4, false), ""); LLVMConstInt(ctx->i32, 4, false), "");
args[1] = val; args[1] = val;
@@ -828,15 +828,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_CONVERGENT); AC_FUNC_ATTR_CONVERGENT);
} else { } else {
LLVMValueRef store_ptr, load_ptr0, load_ptr1; uint32_t masks[2];
store_ptr = ac_build_gep0(ctx, lds, thread_id); switch (mask) {
load_ptr0 = ac_build_gep0(ctx, lds, tl_tid); case AC_TID_MASK_TOP_LEFT:
load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid); masks[0] = 0x8000;
if (idx == 1)
masks[1] = 0x8055;
else
masks[1] = 0x80aa;
LLVMBuildStore(ctx->builder, val, store_ptr); break;
tl = LLVMBuildLoad(ctx->builder, load_ptr0, ""); case AC_TID_MASK_TOP:
trbl = LLVMBuildLoad(ctx->builder, load_ptr1, ""); masks[0] = 0x8044;
masks[1] = 0x80ee;
break;
case AC_TID_MASK_LEFT:
masks[0] = 0x80a0;
masks[1] = 0x80f5;
break;
}
args[0] = val;
args[1] = LLVMConstInt(ctx->i32, masks[0], false);
tl = ac_build_intrinsic(ctx,
"llvm.amdgcn.ds.swizzle", ctx->i32,
args, 2,
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_CONVERGENT);
args[1] = LLVMConstInt(ctx->i32, masks[1], false);
trbl = ac_build_intrinsic(ctx,
"llvm.amdgcn.ds.swizzle", ctx->i32,
args, 2,
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_CONVERGENT);
} }
tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, ""); tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");

View File

@@ -174,7 +174,6 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
bool has_ds_bpermute, bool has_ds_bpermute,
uint32_t mask, uint32_t mask,
int idx, int idx,
LLVMValueRef lds,
LLVMValueRef val); LLVMValueRef val);
#define AC_SENDMSG_GS 2 #define AC_SENDMSG_GS 2

View File

@@ -68,8 +68,6 @@ struct ac_nir_context {
int num_locals; int num_locals;
LLVMValueRef *locals; LLVMValueRef *locals;
LLVMValueRef ddxy_lds;
struct nir_to_llvm_context *nctx; /* TODO get rid of this */ struct nir_to_llvm_context *nctx; /* TODO get rid of this */
}; };
@@ -1463,11 +1461,6 @@ static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
LLVMValueRef result; LLVMValueRef result;
bool has_ds_bpermute = ctx->abi->chip_class >= VI; bool has_ds_bpermute = ctx->abi->chip_class >= VI;
if (!ctx->ddxy_lds && !has_ds_bpermute)
ctx->ddxy_lds = LLVMAddGlobalInAddressSpace(ctx->ac.module,
LLVMArrayType(ctx->ac.i32, 64),
"ddxy_lds", LOCAL_ADDR_SPACE);
if (op == nir_op_fddx_fine || op == nir_op_fddx) if (op == nir_op_fddx_fine || op == nir_op_fddx)
mask = AC_TID_MASK_LEFT; mask = AC_TID_MASK_LEFT;
else if (op == nir_op_fddy_fine || op == nir_op_fddy) else if (op == nir_op_fddy_fine || op == nir_op_fddy)
@@ -1484,7 +1477,7 @@ static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
idx = 2; idx = 2;
result = ac_build_ddxy(&ctx->ac, has_ds_bpermute, result = ac_build_ddxy(&ctx->ac, has_ds_bpermute,
mask, idx, ctx->ddxy_lds, mask, idx,
src0); src0);
return result; return result;
} }

View File

@@ -3591,7 +3591,7 @@ static void si_llvm_emit_ddxy(
val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, ""); val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute, val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute,
mask, idx, ctx->lds, val); mask, idx, val);
emit_data->output[emit_data->chan] = val; emit_data->output[emit_data->chan] = val;
} }
@@ -4635,20 +4635,6 @@ static void create_function(struct si_shader_context *ctx)
assert(shader->info.num_input_vgprs >= num_prolog_vgprs); assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
shader->info.num_input_vgprs -= num_prolog_vgprs; shader->info.num_input_vgprs -= num_prolog_vgprs;
if (!ctx->screen->has_ds_bpermute &&
bld_base->info &&
(bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
ctx->lds =
LLVMAddGlobalInAddressSpace(gallivm->module,
LLVMArrayType(ctx->i32, 64),
"ddxy_lds",
LOCAL_ADDR_SPACE);
if (shader->key.as_ls || if (shader->key.as_ls ||
ctx->type == PIPE_SHADER_TESS_CTRL || ctx->type == PIPE_SHADER_TESS_CTRL ||
/* GFX9 has the ESGS ring buffer in LDS. */ /* GFX9 has the ESGS ring buffer in LDS. */