amd: remove support for LLVM 6.0
User are encouraged to switch to LLVM 7.0 released in September 2018. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
@@ -107,8 +107,8 @@ dnl LLVM versions
|
|||||||
LLVM_REQUIRED_GALLIUM=3.3.0
|
LLVM_REQUIRED_GALLIUM=3.3.0
|
||||||
LLVM_REQUIRED_OPENCL=3.9.0
|
LLVM_REQUIRED_OPENCL=3.9.0
|
||||||
LLVM_REQUIRED_R600=3.9.0
|
LLVM_REQUIRED_R600=3.9.0
|
||||||
LLVM_REQUIRED_RADEONSI=6.0.0
|
LLVM_REQUIRED_RADEONSI=7.0.0
|
||||||
LLVM_REQUIRED_RADV=6.0.0
|
LLVM_REQUIRED_RADV=7.0.0
|
||||||
LLVM_REQUIRED_SWR=6.0.0
|
LLVM_REQUIRED_SWR=6.0.0
|
||||||
|
|
||||||
dnl Check for progs
|
dnl Check for progs
|
||||||
|
@@ -1176,7 +1176,7 @@ if with_gallium_opencl
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
if with_amd_vk or with_gallium_radeonsi
|
if with_amd_vk or with_gallium_radeonsi
|
||||||
_llvm_version = '>= 6.0.0'
|
_llvm_version = '>= 7.0.0'
|
||||||
elif with_gallium_swr
|
elif with_gallium_swr
|
||||||
_llvm_version = '>= 6.0.0'
|
_llvm_version = '>= 6.0.0'
|
||||||
elif with_gallium_opencl or with_gallium_r600
|
elif with_gallium_opencl or with_gallium_r600
|
||||||
|
@@ -75,7 +75,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
|
|||||||
ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
|
ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
|
||||||
ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
|
ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
|
||||||
ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
|
ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
|
||||||
ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64;
|
ctx->intptr = ctx->i32;
|
||||||
ctx->f16 = LLVMHalfTypeInContext(ctx->context);
|
ctx->f16 = LLVMHalfTypeInContext(ctx->context);
|
||||||
ctx->f32 = LLVMFloatTypeInContext(ctx->context);
|
ctx->f32 = LLVMFloatTypeInContext(ctx->context);
|
||||||
ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
|
ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
|
||||||
@@ -1403,99 +1403,28 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
|
|||||||
int idx,
|
int idx,
|
||||||
LLVMValueRef val)
|
LLVMValueRef val)
|
||||||
{
|
{
|
||||||
|
unsigned tl_lanes[4], trbl_lanes[4];
|
||||||
LLVMValueRef tl, trbl, args[2];
|
LLVMValueRef tl, trbl, args[2];
|
||||||
LLVMValueRef result;
|
LLVMValueRef result;
|
||||||
|
|
||||||
if (HAVE_LLVM >= 0x0700) {
|
for (unsigned i = 0; i < 4; ++i) {
|
||||||
unsigned tl_lanes[4], trbl_lanes[4];
|
tl_lanes[i] = i & mask;
|
||||||
|
trbl_lanes[i] = (i & mask) + idx;
|
||||||
for (unsigned i = 0; i < 4; ++i) {
|
|
||||||
tl_lanes[i] = i & mask;
|
|
||||||
trbl_lanes[i] = (i & mask) + idx;
|
|
||||||
}
|
|
||||||
|
|
||||||
tl = ac_build_quad_swizzle(ctx, val,
|
|
||||||
tl_lanes[0], tl_lanes[1],
|
|
||||||
tl_lanes[2], tl_lanes[3]);
|
|
||||||
trbl = ac_build_quad_swizzle(ctx, val,
|
|
||||||
trbl_lanes[0], trbl_lanes[1],
|
|
||||||
trbl_lanes[2], trbl_lanes[3]);
|
|
||||||
} else if (ctx->chip_class >= VI) {
|
|
||||||
LLVMValueRef thread_id, tl_tid, trbl_tid;
|
|
||||||
thread_id = ac_get_thread_id(ctx);
|
|
||||||
|
|
||||||
tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
|
|
||||||
LLVMConstInt(ctx->i32, mask, false), "");
|
|
||||||
|
|
||||||
trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
|
|
||||||
LLVMConstInt(ctx->i32, idx, false), "");
|
|
||||||
|
|
||||||
args[0] = LLVMBuildMul(ctx->builder, tl_tid,
|
|
||||||
LLVMConstInt(ctx->i32, 4, false), "");
|
|
||||||
args[1] = val;
|
|
||||||
tl = ac_build_intrinsic(ctx,
|
|
||||||
"llvm.amdgcn.ds.bpermute", ctx->i32,
|
|
||||||
args, 2,
|
|
||||||
AC_FUNC_ATTR_READNONE |
|
|
||||||
AC_FUNC_ATTR_CONVERGENT);
|
|
||||||
|
|
||||||
args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
|
|
||||||
LLVMConstInt(ctx->i32, 4, false), "");
|
|
||||||
trbl = ac_build_intrinsic(ctx,
|
|
||||||
"llvm.amdgcn.ds.bpermute", ctx->i32,
|
|
||||||
args, 2,
|
|
||||||
AC_FUNC_ATTR_READNONE |
|
|
||||||
AC_FUNC_ATTR_CONVERGENT);
|
|
||||||
} else {
|
|
||||||
uint32_t masks[2] = {};
|
|
||||||
|
|
||||||
switch (mask) {
|
|
||||||
case AC_TID_MASK_TOP_LEFT:
|
|
||||||
masks[0] = 0x8000;
|
|
||||||
if (idx == 1)
|
|
||||||
masks[1] = 0x8055;
|
|
||||||
else
|
|
||||||
masks[1] = 0x80aa;
|
|
||||||
|
|
||||||
break;
|
|
||||||
case AC_TID_MASK_TOP:
|
|
||||||
masks[0] = 0x8044;
|
|
||||||
masks[1] = 0x80ee;
|
|
||||||
break;
|
|
||||||
case AC_TID_MASK_LEFT:
|
|
||||||
masks[0] = 0x80a0;
|
|
||||||
masks[1] = 0x80f5;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
args[0] = val;
|
|
||||||
args[1] = LLVMConstInt(ctx->i32, masks[0], false);
|
|
||||||
|
|
||||||
tl = ac_build_intrinsic(ctx,
|
|
||||||
"llvm.amdgcn.ds.swizzle", ctx->i32,
|
|
||||||
args, 2,
|
|
||||||
AC_FUNC_ATTR_READNONE |
|
|
||||||
AC_FUNC_ATTR_CONVERGENT);
|
|
||||||
|
|
||||||
args[1] = LLVMConstInt(ctx->i32, masks[1], false);
|
|
||||||
trbl = ac_build_intrinsic(ctx,
|
|
||||||
"llvm.amdgcn.ds.swizzle", ctx->i32,
|
|
||||||
args, 2,
|
|
||||||
AC_FUNC_ATTR_READNONE |
|
|
||||||
AC_FUNC_ATTR_CONVERGENT);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tl = ac_build_quad_swizzle(ctx, val,
|
||||||
|
tl_lanes[0], tl_lanes[1],
|
||||||
|
tl_lanes[2], tl_lanes[3]);
|
||||||
|
trbl = ac_build_quad_swizzle(ctx, val,
|
||||||
|
trbl_lanes[0], trbl_lanes[1],
|
||||||
|
trbl_lanes[2], trbl_lanes[3]);
|
||||||
|
|
||||||
tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
|
tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
|
||||||
trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
|
trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
|
||||||
result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
|
result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
|
||||||
|
|
||||||
if (HAVE_LLVM >= 0x0700) {
|
result = ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.f32", ctx->f32,
|
||||||
result = ac_build_intrinsic(ctx,
|
&result, 1, 0);
|
||||||
"llvm.amdgcn.wqm.f32", ctx->f32,
|
|
||||||
&result, 1, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@@ -1740,171 +1669,6 @@ static const char *get_atomic_name(enum ac_atomic_op op)
|
|||||||
unreachable("bad atomic op");
|
unreachable("bad atomic op");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* LLVM 6 and older */
|
|
||||||
static LLVMValueRef ac_build_image_opcode_llvm6(struct ac_llvm_context *ctx,
|
|
||||||
struct ac_image_args *a)
|
|
||||||
{
|
|
||||||
LLVMValueRef args[16];
|
|
||||||
LLVMTypeRef retty = ctx->v4f32;
|
|
||||||
const char *name = NULL;
|
|
||||||
const char *atomic_subop = "";
|
|
||||||
char intr_name[128], coords_type[64];
|
|
||||||
|
|
||||||
bool sample = a->opcode == ac_image_sample ||
|
|
||||||
a->opcode == ac_image_gather4 ||
|
|
||||||
a->opcode == ac_image_get_lod;
|
|
||||||
bool atomic = a->opcode == ac_image_atomic ||
|
|
||||||
a->opcode == ac_image_atomic_cmpswap;
|
|
||||||
bool da = a->dim == ac_image_cube ||
|
|
||||||
a->dim == ac_image_1darray ||
|
|
||||||
a->dim == ac_image_2darray ||
|
|
||||||
a->dim == ac_image_2darraymsaa;
|
|
||||||
if (a->opcode == ac_image_get_lod)
|
|
||||||
da = false;
|
|
||||||
|
|
||||||
unsigned num_coords =
|
|
||||||
a->opcode != ac_image_get_resinfo ? ac_num_coords(a->dim) : 0;
|
|
||||||
LLVMValueRef addr;
|
|
||||||
unsigned num_addr = 0;
|
|
||||||
|
|
||||||
if (a->opcode == ac_image_get_lod) {
|
|
||||||
switch (a->dim) {
|
|
||||||
case ac_image_1darray:
|
|
||||||
num_coords = 1;
|
|
||||||
break;
|
|
||||||
case ac_image_2darray:
|
|
||||||
case ac_image_cube:
|
|
||||||
num_coords = 2;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (a->offset)
|
|
||||||
args[num_addr++] = ac_to_integer(ctx, a->offset);
|
|
||||||
if (a->bias)
|
|
||||||
args[num_addr++] = ac_to_integer(ctx, a->bias);
|
|
||||||
if (a->compare)
|
|
||||||
args[num_addr++] = ac_to_integer(ctx, a->compare);
|
|
||||||
if (a->derivs[0]) {
|
|
||||||
unsigned num_derivs = ac_num_derivs(a->dim);
|
|
||||||
for (unsigned i = 0; i < num_derivs; ++i)
|
|
||||||
args[num_addr++] = ac_to_integer(ctx, a->derivs[i]);
|
|
||||||
}
|
|
||||||
for (unsigned i = 0; i < num_coords; ++i)
|
|
||||||
args[num_addr++] = ac_to_integer(ctx, a->coords[i]);
|
|
||||||
if (a->lod)
|
|
||||||
args[num_addr++] = ac_to_integer(ctx, a->lod);
|
|
||||||
|
|
||||||
unsigned pad_goal = util_next_power_of_two(num_addr);
|
|
||||||
while (num_addr < pad_goal)
|
|
||||||
args[num_addr++] = LLVMGetUndef(ctx->i32);
|
|
||||||
|
|
||||||
addr = ac_build_gather_values(ctx, args, num_addr);
|
|
||||||
|
|
||||||
unsigned num_args = 0;
|
|
||||||
if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) {
|
|
||||||
args[num_args++] = a->data[0];
|
|
||||||
if (a->opcode == ac_image_atomic_cmpswap)
|
|
||||||
args[num_args++] = a->data[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned coords_arg = num_args;
|
|
||||||
if (sample)
|
|
||||||
args[num_args++] = ac_to_float(ctx, addr);
|
|
||||||
else
|
|
||||||
args[num_args++] = ac_to_integer(ctx, addr);
|
|
||||||
|
|
||||||
args[num_args++] = a->resource;
|
|
||||||
if (sample)
|
|
||||||
args[num_args++] = a->sampler;
|
|
||||||
if (!atomic) {
|
|
||||||
args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
|
|
||||||
if (sample)
|
|
||||||
args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0);
|
|
||||||
args[num_args++] = a->cache_policy & ac_glc ? ctx->i1true : ctx->i1false;
|
|
||||||
args[num_args++] = a->cache_policy & ac_slc ? ctx->i1true : ctx->i1false;
|
|
||||||
args[num_args++] = ctx->i1false; /* lwe */
|
|
||||||
args[num_args++] = LLVMConstInt(ctx->i1, da, 0);
|
|
||||||
} else {
|
|
||||||
args[num_args++] = ctx->i1false; /* r128 */
|
|
||||||
args[num_args++] = LLVMConstInt(ctx->i1, da, 0);
|
|
||||||
args[num_args++] = a->cache_policy & ac_slc ? ctx->i1true : ctx->i1false;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (a->opcode) {
|
|
||||||
case ac_image_sample:
|
|
||||||
name = "llvm.amdgcn.image.sample";
|
|
||||||
break;
|
|
||||||
case ac_image_gather4:
|
|
||||||
name = "llvm.amdgcn.image.gather4";
|
|
||||||
break;
|
|
||||||
case ac_image_load:
|
|
||||||
name = "llvm.amdgcn.image.load";
|
|
||||||
break;
|
|
||||||
case ac_image_load_mip:
|
|
||||||
name = "llvm.amdgcn.image.load.mip";
|
|
||||||
break;
|
|
||||||
case ac_image_store:
|
|
||||||
name = "llvm.amdgcn.image.store";
|
|
||||||
retty = ctx->voidt;
|
|
||||||
break;
|
|
||||||
case ac_image_store_mip:
|
|
||||||
name = "llvm.amdgcn.image.store.mip";
|
|
||||||
retty = ctx->voidt;
|
|
||||||
break;
|
|
||||||
case ac_image_atomic:
|
|
||||||
case ac_image_atomic_cmpswap:
|
|
||||||
name = "llvm.amdgcn.image.atomic.";
|
|
||||||
retty = ctx->i32;
|
|
||||||
if (a->opcode == ac_image_atomic_cmpswap) {
|
|
||||||
atomic_subop = "cmpswap";
|
|
||||||
} else {
|
|
||||||
atomic_subop = get_atomic_name(a->atomic);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case ac_image_get_lod:
|
|
||||||
name = "llvm.amdgcn.image.getlod";
|
|
||||||
break;
|
|
||||||
case ac_image_get_resinfo:
|
|
||||||
name = "llvm.amdgcn.image.getresinfo";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unreachable("invalid image opcode");
|
|
||||||
}
|
|
||||||
|
|
||||||
ac_build_type_name_for_intr(LLVMTypeOf(args[coords_arg]), coords_type,
|
|
||||||
sizeof(coords_type));
|
|
||||||
|
|
||||||
if (atomic) {
|
|
||||||
snprintf(intr_name, sizeof(intr_name), "llvm.amdgcn.image.atomic.%s.%s",
|
|
||||||
atomic_subop, coords_type);
|
|
||||||
} else {
|
|
||||||
bool lod_suffix =
|
|
||||||
a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4);
|
|
||||||
|
|
||||||
snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32",
|
|
||||||
name,
|
|
||||||
a->compare ? ".c" : "",
|
|
||||||
a->bias ? ".b" :
|
|
||||||
lod_suffix ? ".l" :
|
|
||||||
a->derivs[0] ? ".d" :
|
|
||||||
a->level_zero ? ".lz" : "",
|
|
||||||
a->offset ? ".o" : "",
|
|
||||||
coords_type);
|
|
||||||
}
|
|
||||||
|
|
||||||
LLVMValueRef result =
|
|
||||||
ac_build_intrinsic(ctx, intr_name, retty, args, num_args,
|
|
||||||
a->attributes);
|
|
||||||
if (!sample && retty == ctx->v4f32) {
|
|
||||||
result = LLVMBuildBitCast(ctx->builder, result,
|
|
||||||
ctx->v4i32, "");
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
|
LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
|
||||||
struct ac_image_args *a)
|
struct ac_image_args *a)
|
||||||
{
|
{
|
||||||
@@ -1929,9 +1693,6 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
|
|||||||
(a->level_zero ? 1 : 0) +
|
(a->level_zero ? 1 : 0) +
|
||||||
(a->derivs[0] ? 1 : 0) <= 1);
|
(a->derivs[0] ? 1 : 0) <= 1);
|
||||||
|
|
||||||
if (HAVE_LLVM < 0x0700)
|
|
||||||
return ac_build_image_opcode_llvm6(ctx, a);
|
|
||||||
|
|
||||||
if (a->opcode == ac_image_get_lod) {
|
if (a->opcode == ac_image_get_lod) {
|
||||||
switch (dim) {
|
switch (dim) {
|
||||||
case ac_image_1darray:
|
case ac_image_1darray:
|
||||||
@@ -2720,9 +2481,6 @@ LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type)
|
|||||||
|
|
||||||
LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type)
|
LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type)
|
||||||
{
|
{
|
||||||
if (!HAVE_32BIT_POINTERS)
|
|
||||||
return ac_array_in_const_addr_space(elem_type);
|
|
||||||
|
|
||||||
return LLVMPointerType(LLVMArrayType(elem_type, 0),
|
return LLVMPointerType(LLVMArrayType(elem_type, 0),
|
||||||
AC_ADDR_SPACE_CONST_32BIT);
|
AC_ADDR_SPACE_CONST_32BIT);
|
||||||
}
|
}
|
||||||
|
@@ -34,14 +34,12 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0700)
|
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
AC_ADDR_SPACE_FLAT = HAVE_LLVM >= 0x0700 ? 0 : 4, /* Slower than global. */
|
AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */
|
||||||
AC_ADDR_SPACE_GLOBAL = 1,
|
AC_ADDR_SPACE_GLOBAL = 1,
|
||||||
AC_ADDR_SPACE_GDS = HAVE_LLVM >= 0x0700 ? 2 : 5,
|
AC_ADDR_SPACE_GDS = 2,
|
||||||
AC_ADDR_SPACE_LDS = 3,
|
AC_ADDR_SPACE_LDS = 3,
|
||||||
AC_ADDR_SPACE_CONST = HAVE_LLVM >= 0x0700 ? 4 : 2, /* Global allowing SMEM. */
|
AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */
|
||||||
AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
|
AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -39,9 +39,6 @@
|
|||||||
#include <llvm/Transforms/IPO.h>
|
#include <llvm/Transforms/IPO.h>
|
||||||
|
|
||||||
#include <llvm/IR/LegacyPassManager.h>
|
#include <llvm/IR/LegacyPassManager.h>
|
||||||
#if HAVE_LLVM < 0x0700
|
|
||||||
#include "llvm/Support/raw_ostream.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
|
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
|
||||||
{
|
{
|
||||||
@@ -132,9 +129,7 @@ struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
|
|||||||
llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
|
llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
|
||||||
|
|
||||||
if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
|
if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
|
||||||
#if HAVE_LLVM >= 0x0700
|
|
||||||
nullptr,
|
nullptr,
|
||||||
#endif
|
|
||||||
llvm::TargetMachine::CGFT_ObjectFile)) {
|
llvm::TargetMachine::CGFT_ObjectFile)) {
|
||||||
fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
|
fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
|
||||||
delete p;
|
delete p;
|
||||||
@@ -170,7 +165,5 @@ void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
|
|||||||
|
|
||||||
void ac_enable_global_isel(LLVMTargetMachineRef tm)
|
void ac_enable_global_isel(LLVMTargetMachineRef tm)
|
||||||
{
|
{
|
||||||
#if HAVE_LLVM >= 0x0700
|
|
||||||
reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
|
reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
@@ -30,9 +30,7 @@
|
|||||||
#include <llvm-c/Support.h>
|
#include <llvm-c/Support.h>
|
||||||
#include <llvm-c/Transforms/IPO.h>
|
#include <llvm-c/Transforms/IPO.h>
|
||||||
#include <llvm-c/Transforms/Scalar.h>
|
#include <llvm-c/Transforms/Scalar.h>
|
||||||
#if HAVE_LLVM >= 0x0700
|
|
||||||
#include <llvm-c/Transforms/Utils.h>
|
#include <llvm-c/Transforms/Utils.h>
|
||||||
#endif
|
|
||||||
#include "c11/threads.h"
|
#include "c11/threads.h"
|
||||||
#include "gallivm/lp_bld_misc.h"
|
#include "gallivm/lp_bld_misc.h"
|
||||||
#include "util/u_math.h"
|
#include "util/u_math.h"
|
||||||
@@ -132,9 +130,9 @@ const char *ac_get_llvm_processor_name(enum radeon_family family)
|
|||||||
case CHIP_RAVEN:
|
case CHIP_RAVEN:
|
||||||
return "gfx902";
|
return "gfx902";
|
||||||
case CHIP_VEGA12:
|
case CHIP_VEGA12:
|
||||||
return HAVE_LLVM >= 0x0700 ? "gfx904" : "gfx902";
|
return "gfx904";
|
||||||
case CHIP_VEGA20:
|
case CHIP_VEGA20:
|
||||||
return HAVE_LLVM >= 0x0700 ? "gfx906" : "gfx902";
|
return "gfx906";
|
||||||
case CHIP_RAVEN2:
|
case CHIP_RAVEN2:
|
||||||
return "gfx902"; /* TODO: use gfx909 when it's available */
|
return "gfx902"; /* TODO: use gfx909 when it's available */
|
||||||
default:
|
default:
|
||||||
@@ -303,7 +301,6 @@ ac_count_scratch_private_memory(LLVMValueRef function)
|
|||||||
|
|
||||||
bool
|
bool
|
||||||
ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
|
ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
|
||||||
bool okay_to_leak_target_library_info,
|
|
||||||
enum radeon_family family,
|
enum radeon_family family,
|
||||||
enum ac_target_machine_options tm_options)
|
enum ac_target_machine_options tm_options)
|
||||||
{
|
{
|
||||||
@@ -324,12 +321,10 @@ ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
|
|||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (okay_to_leak_target_library_info || (HAVE_LLVM >= 0x0700)) {
|
compiler->target_library_info =
|
||||||
compiler->target_library_info =
|
ac_create_target_library_info(triple);
|
||||||
ac_create_target_library_info(triple);
|
if (!compiler->target_library_info)
|
||||||
if (!compiler->target_library_info)
|
goto fail;
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
|
compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
|
||||||
tm_options & AC_TM_CHECK_IR);
|
tm_options & AC_TM_CHECK_IR);
|
||||||
@@ -347,11 +342,8 @@ ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
|
|||||||
{
|
{
|
||||||
if (compiler->passmgr)
|
if (compiler->passmgr)
|
||||||
LLVMDisposePassManager(compiler->passmgr);
|
LLVMDisposePassManager(compiler->passmgr);
|
||||||
#if HAVE_LLVM >= 0x0700
|
|
||||||
/* This crashes on LLVM 5.0 and 6.0 and Ubuntu 18.04, so leak it there. */
|
|
||||||
if (compiler->target_library_info)
|
if (compiler->target_library_info)
|
||||||
ac_dispose_target_library_info(compiler->target_library_info);
|
ac_dispose_target_library_info(compiler->target_library_info);
|
||||||
#endif
|
|
||||||
if (compiler->low_opt_tm)
|
if (compiler->low_opt_tm)
|
||||||
LLVMDisposeTargetMachine(compiler->low_opt_tm);
|
LLVMDisposeTargetMachine(compiler->low_opt_tm);
|
||||||
if (compiler->tm)
|
if (compiler->tm)
|
||||||
|
@@ -134,7 +134,6 @@ void ac_init_llvm_once(void);
|
|||||||
|
|
||||||
|
|
||||||
bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
|
bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
|
||||||
bool okay_to_leak_target_library_info,
|
|
||||||
enum radeon_family family,
|
enum radeon_family family,
|
||||||
enum ac_target_machine_options tm_options);
|
enum ac_target_machine_options tm_options);
|
||||||
void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler);
|
void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler);
|
||||||
|
@@ -429,22 +429,16 @@ static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx,
|
|||||||
{
|
{
|
||||||
LLVMValueRef result;
|
LLVMValueRef result;
|
||||||
|
|
||||||
if (HAVE_LLVM < 0x0700) {
|
/* FIXME: LLVM 7+ returns incorrect result when count is 0.
|
||||||
LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
|
* https://bugs.freedesktop.org/show_bug.cgi?id=107276
|
||||||
result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed);
|
*/
|
||||||
result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
|
LLVMValueRef zero = ctx->i32_0;
|
||||||
} else {
|
LLVMValueRef icond1 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
|
||||||
/* FIXME: LLVM 7 returns incorrect result when count is 0.
|
LLVMValueRef icond2 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], zero, "");
|
||||||
* https://bugs.freedesktop.org/show_bug.cgi?id=107276
|
|
||||||
*/
|
|
||||||
LLVMValueRef zero = ctx->i32_0;
|
|
||||||
LLVMValueRef icond1 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
|
|
||||||
LLVMValueRef icond2 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], zero, "");
|
|
||||||
|
|
||||||
result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed);
|
result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed);
|
||||||
result = LLVMBuildSelect(ctx->builder, icond1, srcs[0], result, "");
|
result = LLVMBuildSelect(ctx->builder, icond1, srcs[0], result, "");
|
||||||
result = LLVMBuildSelect(ctx->builder, icond2, zero, result, "");
|
result = LLVMBuildSelect(ctx->builder, icond2, zero, result, "");
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@@ -594,7 +594,7 @@ radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
if (loc->sgpr_idx == -1)
|
if (loc->sgpr_idx == -1)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
assert(loc->num_sgprs == (HAVE_32BIT_POINTERS ? 1 : 2));
|
assert(loc->num_sgprs == 1);
|
||||||
assert(!loc->indirect);
|
assert(!loc->indirect);
|
||||||
|
|
||||||
radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
|
radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
|
||||||
@@ -624,14 +624,12 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
struct radv_userdata_info *loc = &locs->descriptor_sets[start];
|
struct radv_userdata_info *loc = &locs->descriptor_sets[start];
|
||||||
unsigned sh_offset = sh_base + loc->sgpr_idx * 4;
|
unsigned sh_offset = sh_base + loc->sgpr_idx * 4;
|
||||||
|
|
||||||
radv_emit_shader_pointer_head(cs, sh_offset, count,
|
radv_emit_shader_pointer_head(cs, sh_offset, count, true);
|
||||||
HAVE_32BIT_POINTERS);
|
|
||||||
for (int i = 0; i < count; i++) {
|
for (int i = 0; i < count; i++) {
|
||||||
struct radv_descriptor_set *set =
|
struct radv_descriptor_set *set =
|
||||||
descriptors_state->sets[start + i];
|
descriptors_state->sets[start + i];
|
||||||
|
|
||||||
radv_emit_shader_pointer_body(device, cs, set->va,
|
radv_emit_shader_pointer_body(device, cs, set->va, true);
|
||||||
HAVE_32BIT_POINTERS);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1740,8 +1738,7 @@ radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
{
|
{
|
||||||
struct radv_descriptor_state *descriptors_state =
|
struct radv_descriptor_state *descriptors_state =
|
||||||
radv_get_descriptors_state(cmd_buffer, bind_point);
|
radv_get_descriptors_state(cmd_buffer, bind_point);
|
||||||
uint8_t ptr_size = HAVE_32BIT_POINTERS ? 1 : 2;
|
uint32_t size = MAX_SETS * 4;
|
||||||
uint32_t size = MAX_SETS * 4 * ptr_size;
|
|
||||||
uint32_t offset;
|
uint32_t offset;
|
||||||
void *ptr;
|
void *ptr;
|
||||||
|
|
||||||
@@ -1750,14 +1747,12 @@ radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
for (unsigned i = 0; i < MAX_SETS; i++) {
|
for (unsigned i = 0; i < MAX_SETS; i++) {
|
||||||
uint32_t *uptr = ((uint32_t *)ptr) + i * ptr_size;
|
uint32_t *uptr = ((uint32_t *)ptr) + i;
|
||||||
uint64_t set_va = 0;
|
uint64_t set_va = 0;
|
||||||
struct radv_descriptor_set *set = descriptors_state->sets[i];
|
struct radv_descriptor_set *set = descriptors_state->sets[i];
|
||||||
if (descriptors_state->valid & (1u << i))
|
if (descriptors_state->valid & (1u << i))
|
||||||
set_va = set->va;
|
set_va = set->va;
|
||||||
uptr[0] = set_va & 0xffffffff;
|
uptr[0] = set_va & 0xffffffff;
|
||||||
if (ptr_size == 2)
|
|
||||||
uptr[1] = set_va >> 32;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
|
uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
|
||||||
|
@@ -747,7 +747,7 @@ void radv_GetPhysicalDeviceFeatures(
|
|||||||
.shaderCullDistance = true,
|
.shaderCullDistance = true,
|
||||||
.shaderFloat64 = true,
|
.shaderFloat64 = true,
|
||||||
.shaderInt64 = true,
|
.shaderInt64 = true,
|
||||||
.shaderInt16 = pdevice->rad_info.chip_class >= GFX9 && HAVE_LLVM >= 0x700,
|
.shaderInt16 = pdevice->rad_info.chip_class >= GFX9,
|
||||||
.sparseBinding = true,
|
.sparseBinding = true,
|
||||||
.variableMultisampleRate = true,
|
.variableMultisampleRate = true,
|
||||||
.inheritedQueries = true,
|
.inheritedQueries = true,
|
||||||
@@ -789,7 +789,7 @@ void radv_GetPhysicalDeviceFeatures2(
|
|||||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
|
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
|
||||||
VkPhysicalDevice16BitStorageFeatures *features =
|
VkPhysicalDevice16BitStorageFeatures *features =
|
||||||
(VkPhysicalDevice16BitStorageFeatures*)ext;
|
(VkPhysicalDevice16BitStorageFeatures*)ext;
|
||||||
bool enabled = HAVE_LLVM >= 0x0700 && pdevice->rad_info.chip_class >= VI;
|
bool enabled = pdevice->rad_info.chip_class >= VI;
|
||||||
features->storageBuffer16BitAccess = enabled;
|
features->storageBuffer16BitAccess = enabled;
|
||||||
features->uniformAndStorageBuffer16BitAccess = enabled;
|
features->uniformAndStorageBuffer16BitAccess = enabled;
|
||||||
features->storagePushConstant16 = enabled;
|
features->storagePushConstant16 = enabled;
|
||||||
|
@@ -51,7 +51,7 @@ class Extension:
|
|||||||
# and dEQP-VK.api.info.device fail due to the duplicated strings.
|
# and dEQP-VK.api.info.device fail due to the duplicated strings.
|
||||||
EXTENSIONS = [
|
EXTENSIONS = [
|
||||||
Extension('VK_ANDROID_native_buffer', 5, 'ANDROID && device->rad_info.has_syncobj_wait_for_submit'),
|
Extension('VK_ANDROID_native_buffer', 5, 'ANDROID && device->rad_info.has_syncobj_wait_for_submit'),
|
||||||
Extension('VK_KHR_16bit_storage', 1, 'HAVE_LLVM >= 0x0700'),
|
Extension('VK_KHR_16bit_storage', 1, True),
|
||||||
Extension('VK_KHR_bind_memory2', 1, True),
|
Extension('VK_KHR_bind_memory2', 1, True),
|
||||||
Extension('VK_KHR_create_renderpass2', 1, True),
|
Extension('VK_KHR_create_renderpass2', 1, True),
|
||||||
Extension('VK_KHR_dedicated_allocation', 1, True),
|
Extension('VK_KHR_dedicated_allocation', 1, True),
|
||||||
|
@@ -40,7 +40,6 @@ public:
|
|||||||
bool init(void)
|
bool init(void)
|
||||||
{
|
{
|
||||||
if (!ac_init_llvm_compiler(&llvm_info,
|
if (!ac_init_llvm_compiler(&llvm_info,
|
||||||
true,
|
|
||||||
family,
|
family,
|
||||||
tm_options))
|
tm_options))
|
||||||
return false;
|
return false;
|
||||||
@@ -99,7 +98,6 @@ bool radv_compile_to_binary(struct ac_llvm_compiler *info,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
|
bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
|
||||||
bool okay_to_leak_target_library_info,
|
|
||||||
bool thread_compiler,
|
bool thread_compiler,
|
||||||
enum radeon_family family,
|
enum radeon_family family,
|
||||||
enum ac_target_machine_options tm_options)
|
enum ac_target_machine_options tm_options)
|
||||||
@@ -125,7 +123,6 @@ bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!ac_init_llvm_compiler(info,
|
if (!ac_init_llvm_compiler(info,
|
||||||
okay_to_leak_target_library_info,
|
|
||||||
family,
|
family,
|
||||||
tm_options))
|
tm_options))
|
||||||
return false;
|
return false;
|
||||||
|
@@ -33,9 +33,7 @@
|
|||||||
#include <llvm-c/Core.h>
|
#include <llvm-c/Core.h>
|
||||||
#include <llvm-c/TargetMachine.h>
|
#include <llvm-c/TargetMachine.h>
|
||||||
#include <llvm-c/Transforms/Scalar.h>
|
#include <llvm-c/Transforms/Scalar.h>
|
||||||
#if HAVE_LLVM >= 0x0700
|
|
||||||
#include <llvm-c/Transforms/Utils.h>
|
#include <llvm-c/Transforms/Utils.h>
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "sid.h"
|
#include "sid.h"
|
||||||
#include "gfx9d.h"
|
#include "gfx9d.h"
|
||||||
@@ -568,8 +566,7 @@ set_loc_shader(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx,
|
|||||||
static void
|
static void
|
||||||
set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx)
|
set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx)
|
||||||
{
|
{
|
||||||
bool use_32bit_pointers = HAVE_32BIT_POINTERS &&
|
bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS;
|
||||||
idx != AC_UD_SCRATCH_RING_OFFSETS;
|
|
||||||
|
|
||||||
set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
|
set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
|
||||||
}
|
}
|
||||||
@@ -583,7 +580,7 @@ set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx,
|
|||||||
struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
|
struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
|
||||||
assert(ud_info);
|
assert(ud_info);
|
||||||
|
|
||||||
set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect);
|
set_loc(ud_info, sgpr_idx, 1, indirect);
|
||||||
|
|
||||||
if (!indirect)
|
if (!indirect)
|
||||||
locs->descriptor_sets_enabled |= 1 << idx;
|
locs->descriptor_sets_enabled |= 1 << idx;
|
||||||
@@ -624,7 +621,7 @@ count_vs_user_sgprs(struct radv_shader_context *ctx)
|
|||||||
uint8_t count = 0;
|
uint8_t count = 0;
|
||||||
|
|
||||||
if (ctx->shader_info->info.vs.has_vertex_buffers)
|
if (ctx->shader_info->info.vs.has_vertex_buffers)
|
||||||
count += HAVE_32BIT_POINTERS ? 1 : 2;
|
count++;
|
||||||
count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2;
|
count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2;
|
||||||
|
|
||||||
return count;
|
return count;
|
||||||
@@ -693,14 +690,14 @@ static void allocate_user_sgprs(struct radv_shader_context *ctx,
|
|||||||
user_sgpr_count++;
|
user_sgpr_count++;
|
||||||
|
|
||||||
if (ctx->shader_info->info.loads_push_constants)
|
if (ctx->shader_info->info.loads_push_constants)
|
||||||
user_sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2;
|
user_sgpr_count++;
|
||||||
|
|
||||||
uint32_t available_sgprs = ctx->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
|
uint32_t available_sgprs = ctx->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
|
||||||
uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
|
uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
|
||||||
uint32_t num_desc_set =
|
uint32_t num_desc_set =
|
||||||
util_bitcount(ctx->shader_info->info.desc_set_used_mask);
|
util_bitcount(ctx->shader_info->info.desc_set_used_mask);
|
||||||
|
|
||||||
if (remaining_sgprs / (HAVE_32BIT_POINTERS ? 1 : 2) < num_desc_set) {
|
if (remaining_sgprs < num_desc_set) {
|
||||||
user_sgpr_info->indirect_all_descriptor_sets = true;
|
user_sgpr_info->indirect_all_descriptor_sets = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1243,7 +1243,7 @@ radv_emit_shader_pointer(struct radv_device *device,
|
|||||||
struct radeon_cmdbuf *cs,
|
struct radeon_cmdbuf *cs,
|
||||||
uint32_t sh_offset, uint64_t va, bool global)
|
uint32_t sh_offset, uint64_t va, bool global)
|
||||||
{
|
{
|
||||||
bool use_32bit_pointers = HAVE_32BIT_POINTERS && !global;
|
bool use_32bit_pointers = !global;
|
||||||
|
|
||||||
radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
|
radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
|
||||||
radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
|
radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
|
||||||
|
@@ -600,7 +600,7 @@ shader_variant_create(struct radv_device *device,
|
|||||||
|
|
||||||
thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
|
thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
|
||||||
radv_init_llvm_once();
|
radv_init_llvm_once();
|
||||||
radv_init_llvm_compiler(&ac_llvm, false,
|
radv_init_llvm_compiler(&ac_llvm,
|
||||||
thread_compiler,
|
thread_compiler,
|
||||||
chip_family, tm_options);
|
chip_family, tm_options);
|
||||||
if (gs_copy_shader) {
|
if (gs_copy_shader) {
|
||||||
|
@@ -27,7 +27,6 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
|
bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
|
||||||
bool okay_to_leak_target_library_info,
|
|
||||||
bool thread_compiler,
|
bool thread_compiler,
|
||||||
enum radeon_family family,
|
enum radeon_family family,
|
||||||
enum ac_target_machine_options tm_options);
|
enum ac_target_machine_options tm_options);
|
||||||
|
@@ -2055,7 +2055,7 @@ static void si_emit_shader_pointer_head(struct radeon_cmdbuf *cs,
|
|||||||
unsigned sh_offset,
|
unsigned sh_offset,
|
||||||
unsigned pointer_count)
|
unsigned pointer_count)
|
||||||
{
|
{
|
||||||
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (HAVE_32BIT_POINTERS ? 1 : 2), 0));
|
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count, 0));
|
||||||
radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
|
radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2065,10 +2065,7 @@ static void si_emit_shader_pointer_body(struct si_screen *sscreen,
|
|||||||
{
|
{
|
||||||
radeon_emit(cs, va);
|
radeon_emit(cs, va);
|
||||||
|
|
||||||
if (HAVE_32BIT_POINTERS)
|
assert(va == 0 || (va >> 32) == sscreen->info.address32_hi);
|
||||||
assert(va == 0 || (va >> 32) == sscreen->info.address32_hi);
|
|
||||||
else
|
|
||||||
radeon_emit(cs, va >> 32);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_emit_shader_pointer(struct si_context *sctx,
|
static void si_emit_shader_pointer(struct si_context *sctx,
|
||||||
@@ -2106,25 +2103,6 @@ static void si_emit_consecutive_shader_pointers(struct si_context *sctx,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_emit_disjoint_shader_pointers(struct si_context *sctx,
|
|
||||||
unsigned pointer_mask,
|
|
||||||
unsigned sh_base)
|
|
||||||
{
|
|
||||||
if (!sh_base)
|
|
||||||
return;
|
|
||||||
|
|
||||||
struct radeon_cmdbuf *cs = sctx->gfx_cs;
|
|
||||||
unsigned mask = sctx->shader_pointers_dirty & pointer_mask;
|
|
||||||
|
|
||||||
while (mask) {
|
|
||||||
struct si_descriptors *descs = &sctx->descriptors[u_bit_scan(&mask)];
|
|
||||||
unsigned sh_offset = sh_base + descs->shader_userdata_offset;
|
|
||||||
|
|
||||||
si_emit_shader_pointer_head(cs, sh_offset, 1);
|
|
||||||
si_emit_shader_pointer_body(sctx->screen, cs, descs->gpu_address);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void si_emit_global_shader_pointers(struct si_context *sctx,
|
static void si_emit_global_shader_pointers(struct si_context *sctx,
|
||||||
struct si_descriptors *descs)
|
struct si_descriptors *descs)
|
||||||
{
|
{
|
||||||
@@ -2164,17 +2142,10 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx)
|
|||||||
sh_base[PIPE_SHADER_TESS_EVAL]);
|
sh_base[PIPE_SHADER_TESS_EVAL]);
|
||||||
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT),
|
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT),
|
||||||
sh_base[PIPE_SHADER_FRAGMENT]);
|
sh_base[PIPE_SHADER_FRAGMENT]);
|
||||||
if (HAVE_32BIT_POINTERS || sctx->chip_class <= VI) {
|
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
|
||||||
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
|
sh_base[PIPE_SHADER_TESS_CTRL]);
|
||||||
sh_base[PIPE_SHADER_TESS_CTRL]);
|
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
|
||||||
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
|
sh_base[PIPE_SHADER_GEOMETRY]);
|
||||||
sh_base[PIPE_SHADER_GEOMETRY]);
|
|
||||||
} else {
|
|
||||||
si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
|
|
||||||
sh_base[PIPE_SHADER_TESS_CTRL]);
|
|
||||||
si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
|
|
||||||
sh_base[PIPE_SHADER_GEOMETRY]);
|
|
||||||
}
|
|
||||||
|
|
||||||
sctx->shader_pointers_dirty &=
|
sctx->shader_pointers_dirty &=
|
||||||
~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
|
~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
|
||||||
@@ -2665,10 +2636,6 @@ void si_init_all_descriptors(struct si_context *sctx)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
#if !HAVE_32BIT_POINTERS
|
|
||||||
STATIC_ASSERT(GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES % 2 == 0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (i = 0; i < SI_NUM_SHADERS; i++) {
|
for (i = 0; i < SI_NUM_SHADERS; i++) {
|
||||||
bool is_2nd = sctx->chip_class >= GFX9 &&
|
bool is_2nd = sctx->chip_class >= GFX9 &&
|
||||||
(i == PIPE_SHADER_TESS_CTRL ||
|
(i == PIPE_SHADER_TESS_CTRL ||
|
||||||
@@ -2699,7 +2666,6 @@ void si_init_all_descriptors(struct si_context *sctx)
|
|||||||
desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
|
desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
|
||||||
|
|
||||||
if (is_2nd) {
|
if (is_2nd) {
|
||||||
#if HAVE_32BIT_POINTERS
|
|
||||||
if (i == PIPE_SHADER_TESS_CTRL) {
|
if (i == PIPE_SHADER_TESS_CTRL) {
|
||||||
rel_dw_offset = (R_00B40C_SPI_SHADER_USER_DATA_ADDR_HI_HS -
|
rel_dw_offset = (R_00B40C_SPI_SHADER_USER_DATA_ADDR_HI_HS -
|
||||||
R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4;
|
R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4;
|
||||||
@@ -2707,9 +2673,6 @@ void si_init_all_descriptors(struct si_context *sctx)
|
|||||||
rel_dw_offset = (R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS -
|
rel_dw_offset = (R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS -
|
||||||
R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;
|
R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
rel_dw_offset = GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES;
|
|
||||||
#endif
|
|
||||||
} else {
|
} else {
|
||||||
rel_dw_offset = SI_SGPR_SAMPLERS_AND_IMAGES;
|
rel_dw_offset = SI_SGPR_SAMPLERS_AND_IMAGES;
|
||||||
}
|
}
|
||||||
|
@@ -455,15 +455,6 @@ static int si_get_shader_param(struct pipe_screen* pscreen,
|
|||||||
!sscreen->llvm_has_working_vgpr_indexing)
|
!sscreen->llvm_has_working_vgpr_indexing)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* Doing indirect indexing on GFX9 with LLVM 6.0 hangs.
|
|
||||||
* This means we don't support INTERP instructions with
|
|
||||||
* indirect indexing on inputs.
|
|
||||||
*/
|
|
||||||
if (shader == PIPE_SHADER_FRAGMENT &&
|
|
||||||
!sscreen->llvm_has_working_vgpr_indexing &&
|
|
||||||
HAVE_LLVM < 0x0700)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* TCS and TES load inputs directly from LDS or offchip
|
/* TCS and TES load inputs directly from LDS or offchip
|
||||||
* memory, so indirect indexing is always supported.
|
* memory, so indirect indexing is always supported.
|
||||||
* PS has to support indirect indexing, because we can't
|
* PS has to support indirect indexing, because we can't
|
||||||
|
@@ -127,7 +127,7 @@ static void si_init_compiler(struct si_screen *sscreen,
|
|||||||
(create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0);
|
(create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0);
|
||||||
|
|
||||||
ac_init_llvm_once();
|
ac_init_llvm_once();
|
||||||
ac_init_llvm_compiler(compiler, true, sscreen->info.family, tm_options);
|
ac_init_llvm_compiler(compiler, sscreen->info.family, tm_options);
|
||||||
compiler->passes = ac_create_llvm_passes(compiler->tm);
|
compiler->passes = ac_create_llvm_passes(compiler->tm);
|
||||||
|
|
||||||
if (compiler->low_opt_tm)
|
if (compiler->low_opt_tm)
|
||||||
|
@@ -2310,18 +2310,9 @@ static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *c
|
|||||||
ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
|
ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
|
||||||
|
|
||||||
LLVMValueRef desc0, desc1;
|
LLVMValueRef desc0, desc1;
|
||||||
if (HAVE_32BIT_POINTERS) {
|
desc0 = ptr;
|
||||||
desc0 = ptr;
|
desc1 = LLVMConstInt(ctx->i32,
|
||||||
desc1 = LLVMConstInt(ctx->i32,
|
S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
|
||||||
S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
|
|
||||||
} else {
|
|
||||||
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, "");
|
|
||||||
desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_0, "");
|
|
||||||
desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_1, "");
|
|
||||||
/* Mask out all bits except BASE_ADDRESS_HI. */
|
|
||||||
desc1 = LLVMBuildAnd(ctx->ac.builder, desc1,
|
|
||||||
LLVMConstInt(ctx->i32, ~C_008F04_BASE_ADDRESS_HI, 0), "");
|
|
||||||
}
|
|
||||||
|
|
||||||
LLVMValueRef desc_elems[] = {
|
LLVMValueRef desc_elems[] = {
|
||||||
desc0,
|
desc0,
|
||||||
@@ -3265,19 +3256,9 @@ si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
|
|||||||
LLVMBuilderRef builder = ctx->ac.builder;
|
LLVMBuilderRef builder = ctx->ac.builder;
|
||||||
LLVMValueRef ptr, lo, hi;
|
LLVMValueRef ptr, lo, hi;
|
||||||
|
|
||||||
if (HAVE_32BIT_POINTERS) {
|
|
||||||
ptr = LLVMGetParam(ctx->main_fn, param);
|
|
||||||
ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, "");
|
|
||||||
return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
ptr = LLVMGetParam(ctx->main_fn, param);
|
ptr = LLVMGetParam(ctx->main_fn, param);
|
||||||
ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i64, "");
|
ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, "");
|
||||||
ptr = LLVMBuildBitCast(builder, ptr, ctx->v2i32, "");
|
return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
|
||||||
lo = LLVMBuildExtractElement(builder, ptr, ctx->i32_0, "");
|
|
||||||
hi = LLVMBuildExtractElement(builder, ptr, ctx->i32_1, "");
|
|
||||||
ret = LLVMBuildInsertValue(builder, ret, lo, return_index, "");
|
|
||||||
return LLVMBuildInsertValue(builder, ret, hi, return_index + 1, "");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This only writes the tessellation factor levels. */
|
/* This only writes the tessellation factor levels. */
|
||||||
@@ -3378,8 +3359,7 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
|
|||||||
LLVMValueRef ret = ctx->return_value;
|
LLVMValueRef ret = ctx->return_value;
|
||||||
|
|
||||||
ret = si_insert_input_ptr(ctx, ret, 0, 0);
|
ret = si_insert_input_ptr(ctx, ret, 0, 0);
|
||||||
if (HAVE_32BIT_POINTERS)
|
ret = si_insert_input_ptr(ctx, ret, 1, 1);
|
||||||
ret = si_insert_input_ptr(ctx, ret, 1, 1);
|
|
||||||
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2);
|
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2);
|
||||||
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
|
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
|
||||||
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4);
|
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4);
|
||||||
@@ -3394,11 +3374,6 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
|
|||||||
ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits,
|
ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits,
|
||||||
8 + SI_SGPR_VS_STATE_BITS);
|
8 + SI_SGPR_VS_STATE_BITS);
|
||||||
|
|
||||||
#if !HAVE_32BIT_POINTERS
|
|
||||||
ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4,
|
|
||||||
8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
|
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
|
||||||
8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
|
8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
|
||||||
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets,
|
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets,
|
||||||
@@ -3422,8 +3397,7 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
|
|||||||
LLVMValueRef ret = ctx->return_value;
|
LLVMValueRef ret = ctx->return_value;
|
||||||
|
|
||||||
ret = si_insert_input_ptr(ctx, ret, 0, 0);
|
ret = si_insert_input_ptr(ctx, ret, 0, 0);
|
||||||
if (HAVE_32BIT_POINTERS)
|
ret = si_insert_input_ptr(ctx, ret, 1, 1);
|
||||||
ret = si_insert_input_ptr(ctx, ret, 1, 1);
|
|
||||||
ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
|
ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
|
||||||
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
|
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
|
||||||
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
|
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
|
||||||
@@ -3434,11 +3408,6 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
|
|||||||
ctx->param_bindless_samplers_and_images,
|
ctx->param_bindless_samplers_and_images,
|
||||||
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
|
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
|
||||||
|
|
||||||
#if !HAVE_32BIT_POINTERS
|
|
||||||
ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4,
|
|
||||||
8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
unsigned vgpr;
|
unsigned vgpr;
|
||||||
if (ctx->type == PIPE_SHADER_VERTEX)
|
if (ctx->type == PIPE_SHADER_VERTEX)
|
||||||
vgpr = 8 + GFX9_VSGS_NUM_USER_SGPR;
|
vgpr = 8 + GFX9_VSGS_NUM_USER_SGPR;
|
||||||
@@ -4702,13 +4671,8 @@ static void create_function(struct si_shader_context *ctx)
|
|||||||
case SI_SHADER_MERGED_VERTEX_TESSCTRL:
|
case SI_SHADER_MERGED_VERTEX_TESSCTRL:
|
||||||
/* Merged stages have 8 system SGPRs at the beginning. */
|
/* Merged stages have 8 system SGPRs at the beginning. */
|
||||||
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */
|
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */
|
||||||
if (HAVE_32BIT_POINTERS) {
|
declare_per_stage_desc_pointers(ctx, &fninfo,
|
||||||
declare_per_stage_desc_pointers(ctx, &fninfo,
|
ctx->type == PIPE_SHADER_TESS_CTRL);
|
||||||
ctx->type == PIPE_SHADER_TESS_CTRL);
|
|
||||||
} else {
|
|
||||||
declare_const_and_shader_buffers(ctx, &fninfo,
|
|
||||||
ctx->type == PIPE_SHADER_TESS_CTRL);
|
|
||||||
}
|
|
||||||
ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
@@ -4721,15 +4685,9 @@ static void create_function(struct si_shader_context *ctx)
|
|||||||
ctx->type == PIPE_SHADER_VERTEX);
|
ctx->type == PIPE_SHADER_VERTEX);
|
||||||
declare_vs_specific_input_sgprs(ctx, &fninfo);
|
declare_vs_specific_input_sgprs(ctx, &fninfo);
|
||||||
|
|
||||||
if (!HAVE_32BIT_POINTERS) {
|
|
||||||
declare_samplers_and_images(ctx, &fninfo,
|
|
||||||
ctx->type == PIPE_SHADER_TESS_CTRL);
|
|
||||||
}
|
|
||||||
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
if (!HAVE_32BIT_POINTERS) /* Align to 2 dwords. */
|
|
||||||
add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
|
|
||||||
ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
|
ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
|
||||||
ac_array_in_const32_addr_space(ctx->v4i32));
|
ac_array_in_const32_addr_space(ctx->v4i32));
|
||||||
|
|
||||||
@@ -4763,13 +4721,8 @@ static void create_function(struct si_shader_context *ctx)
|
|||||||
case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
|
case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
|
||||||
/* Merged stages have 8 system SGPRs at the beginning. */
|
/* Merged stages have 8 system SGPRs at the beginning. */
|
||||||
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */
|
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */
|
||||||
if (HAVE_32BIT_POINTERS) {
|
declare_per_stage_desc_pointers(ctx, &fninfo,
|
||||||
declare_per_stage_desc_pointers(ctx, &fninfo,
|
ctx->type == PIPE_SHADER_GEOMETRY);
|
||||||
ctx->type == PIPE_SHADER_GEOMETRY);
|
|
||||||
} else {
|
|
||||||
declare_const_and_shader_buffers(ctx, &fninfo,
|
|
||||||
ctx->type == PIPE_SHADER_GEOMETRY);
|
|
||||||
}
|
|
||||||
ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
@@ -4788,14 +4741,8 @@ static void create_function(struct si_shader_context *ctx)
|
|||||||
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
/* Declare as many input SGPRs as the VS has. */
|
/* Declare as many input SGPRs as the VS has. */
|
||||||
if (!HAVE_32BIT_POINTERS)
|
|
||||||
add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!HAVE_32BIT_POINTERS) {
|
|
||||||
declare_samplers_and_images(ctx, &fninfo,
|
|
||||||
ctx->type == PIPE_SHADER_GEOMETRY);
|
|
||||||
}
|
|
||||||
if (ctx->type == PIPE_SHADER_VERTEX) {
|
if (ctx->type == PIPE_SHADER_VERTEX) {
|
||||||
ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
|
ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
|
||||||
ac_array_in_const32_addr_space(ctx->v4i32));
|
ac_array_in_const32_addr_space(ctx->v4i32));
|
||||||
@@ -7157,20 +7104,9 @@ static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
|
|||||||
LLVMValueRef ptr[2], list;
|
LLVMValueRef ptr[2], list;
|
||||||
bool merged_shader = is_merged_shader(ctx);
|
bool merged_shader = is_merged_shader(ctx);
|
||||||
|
|
||||||
if (HAVE_32BIT_POINTERS) {
|
|
||||||
ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
|
|
||||||
list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
|
|
||||||
ac_array_in_const32_addr_space(ctx->v4i32), "");
|
|
||||||
return list;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Get the pointer to rw buffers. */
|
|
||||||
ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
|
ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
|
||||||
ptr[1] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS + 1);
|
list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
|
||||||
list = ac_build_gather_values(&ctx->ac, ptr, 2);
|
ac_array_in_const32_addr_space(ctx->v4i32), "");
|
||||||
list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, "");
|
|
||||||
list = LLVMBuildIntToPtr(ctx->ac.builder, list,
|
|
||||||
ac_array_in_const_addr_space(ctx->v4i32), "");
|
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -7398,8 +7334,6 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
|
|||||||
add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
if (!HAVE_32BIT_POINTERS)
|
|
||||||
add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
|
|
||||||
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||||
|
@@ -158,21 +158,9 @@ struct si_context;
|
|||||||
/* SGPR user data indices */
|
/* SGPR user data indices */
|
||||||
enum {
|
enum {
|
||||||
SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */
|
SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */
|
||||||
#if !HAVE_32BIT_POINTERS
|
|
||||||
SI_SGPR_RW_BUFFERS_HI,
|
|
||||||
#endif
|
|
||||||
SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
|
SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
|
||||||
#if !HAVE_32BIT_POINTERS
|
|
||||||
SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES_HI,
|
|
||||||
#endif
|
|
||||||
SI_SGPR_CONST_AND_SHADER_BUFFERS, /* or just a constant buffer 0 pointer */
|
SI_SGPR_CONST_AND_SHADER_BUFFERS, /* or just a constant buffer 0 pointer */
|
||||||
#if !HAVE_32BIT_POINTERS
|
|
||||||
SI_SGPR_CONST_AND_SHADER_BUFFERS_HI,
|
|
||||||
#endif
|
|
||||||
SI_SGPR_SAMPLERS_AND_IMAGES,
|
SI_SGPR_SAMPLERS_AND_IMAGES,
|
||||||
#if !HAVE_32BIT_POINTERS
|
|
||||||
SI_SGPR_SAMPLERS_AND_IMAGES_HI,
|
|
||||||
#endif
|
|
||||||
SI_NUM_RESOURCE_SGPRS,
|
SI_NUM_RESOURCE_SGPRS,
|
||||||
|
|
||||||
/* API VS, TES without GS, GS copy shader */
|
/* API VS, TES without GS, GS copy shader */
|
||||||
@@ -200,35 +188,20 @@ enum {
|
|||||||
GFX6_TCS_NUM_USER_SGPR,
|
GFX6_TCS_NUM_USER_SGPR,
|
||||||
|
|
||||||
/* GFX9: Merged shaders. */
|
/* GFX9: Merged shaders. */
|
||||||
#if HAVE_32BIT_POINTERS
|
|
||||||
/* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO (SGPR0). */
|
/* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO (SGPR0). */
|
||||||
/* 2ND_SAMPLERS_AND_IMAGES is set in USER_DATA_ADDR_HI (SGPR1). */
|
/* 2ND_SAMPLERS_AND_IMAGES is set in USER_DATA_ADDR_HI (SGPR1). */
|
||||||
GFX9_MERGED_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR,
|
GFX9_MERGED_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR,
|
||||||
#else
|
|
||||||
/* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO/HI (SGPR[0:1]). */
|
|
||||||
GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES = SI_VS_NUM_USER_SGPR,
|
|
||||||
GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES_HI,
|
|
||||||
GFX9_MERGED_NUM_USER_SGPR,
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* GFX9: Merged LS-HS (VS-TCS) only. */
|
/* GFX9: Merged LS-HS (VS-TCS) only. */
|
||||||
GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR,
|
GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR,
|
||||||
GFX9_SGPR_TCS_OUT_OFFSETS,
|
GFX9_SGPR_TCS_OUT_OFFSETS,
|
||||||
GFX9_SGPR_TCS_OUT_LAYOUT,
|
GFX9_SGPR_TCS_OUT_LAYOUT,
|
||||||
#if !HAVE_32BIT_POINTERS
|
|
||||||
GFX9_SGPR_align_for_vb_pointer,
|
|
||||||
#endif
|
|
||||||
GFX9_TCS_NUM_USER_SGPR,
|
GFX9_TCS_NUM_USER_SGPR,
|
||||||
|
|
||||||
/* GS limits */
|
/* GS limits */
|
||||||
GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
|
GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
|
||||||
#if HAVE_32BIT_POINTERS
|
|
||||||
GFX9_VSGS_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR,
|
GFX9_VSGS_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR,
|
||||||
GFX9_TESGS_NUM_USER_SGPR = SI_TES_NUM_USER_SGPR,
|
GFX9_TESGS_NUM_USER_SGPR = SI_TES_NUM_USER_SGPR,
|
||||||
#else
|
|
||||||
GFX9_VSGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR,
|
|
||||||
GFX9_TESGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR,
|
|
||||||
#endif
|
|
||||||
SI_GSCOPY_NUM_USER_SGPR = SI_NUM_VS_STATE_RESOURCE_SGPRS,
|
SI_GSCOPY_NUM_USER_SGPR = SI_NUM_VS_STATE_RESOURCE_SGPRS,
|
||||||
|
|
||||||
/* PS only */
|
/* PS only */
|
||||||
|
@@ -496,36 +496,23 @@ static void emit_bfe(const struct lp_build_tgsi_action *action,
|
|||||||
{
|
{
|
||||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||||
|
|
||||||
if (HAVE_LLVM < 0x0700) {
|
/* FIXME: LLVM 7 returns incorrect result when count is 0.
|
||||||
LLVMValueRef bfe_sm5 =
|
* https://bugs.freedesktop.org/show_bug.cgi?id=107276
|
||||||
ac_build_bfe(&ctx->ac, emit_data->args[0],
|
*/
|
||||||
emit_data->args[1], emit_data->args[2],
|
LLVMValueRef zero = ctx->i32_0;
|
||||||
emit_data->info->opcode == TGSI_OPCODE_IBFE);
|
LLVMValueRef bfe_sm5 =
|
||||||
|
ac_build_bfe(&ctx->ac, emit_data->args[0],
|
||||||
|
emit_data->args[1], emit_data->args[2],
|
||||||
|
emit_data->info->opcode == TGSI_OPCODE_IBFE);
|
||||||
|
|
||||||
/* Correct for GLSL semantics. */
|
/* Correct for GLSL semantics. */
|
||||||
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
|
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
|
||||||
LLVMConstInt(ctx->i32, 32, 0), "");
|
LLVMConstInt(ctx->i32, 32, 0), "");
|
||||||
emit_data->output[emit_data->chan] =
|
LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2],
|
||||||
LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
|
zero, "");
|
||||||
} else {
|
bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
|
||||||
/* FIXME: LLVM 7 returns incorrect result when count is 0.
|
emit_data->output[emit_data->chan] =
|
||||||
* https://bugs.freedesktop.org/show_bug.cgi?id=107276
|
LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, "");
|
||||||
*/
|
|
||||||
LLVMValueRef zero = ctx->i32_0;
|
|
||||||
LLVMValueRef bfe_sm5 =
|
|
||||||
ac_build_bfe(&ctx->ac, emit_data->args[0],
|
|
||||||
emit_data->args[1], emit_data->args[2],
|
|
||||||
emit_data->info->opcode == TGSI_OPCODE_IBFE);
|
|
||||||
|
|
||||||
/* Correct for GLSL semantics. */
|
|
||||||
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
|
|
||||||
LLVMConstInt(ctx->i32, 32, 0), "");
|
|
||||||
LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2],
|
|
||||||
zero, "");
|
|
||||||
bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
|
|
||||||
emit_data->output[emit_data->chan] =
|
|
||||||
LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, "");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* this is ffs in C */
|
/* this is ffs in C */
|
||||||
|
@@ -464,12 +464,7 @@ static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader)
|
|||||||
static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs)
|
static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs)
|
||||||
{
|
{
|
||||||
/* Add the pointer to VBO descriptors. */
|
/* Add the pointer to VBO descriptors. */
|
||||||
if (HAVE_32BIT_POINTERS) {
|
return num_always_on_user_sgprs + 1;
|
||||||
return num_always_on_user_sgprs + 1;
|
|
||||||
} else {
|
|
||||||
assert(num_always_on_user_sgprs % 2 == 0);
|
|
||||||
return num_always_on_user_sgprs + 2;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
|
static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
|
||||||
|
Reference in New Issue
Block a user