radeonsi: fix GPU hangs with bindless textures and LLVM 7.0
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
This commit is contained in:
@@ -835,6 +835,14 @@ ac_build_gep0(struct ac_llvm_context *ctx,
|
||||
return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
|
||||
LLVMValueRef index)
|
||||
{
|
||||
return LLVMBuildPointerCast(ctx->builder,
|
||||
ac_build_gep0(ctx, ptr, index),
|
||||
LLVMTypeOf(ptr), "");
|
||||
}
|
||||
|
||||
void
|
||||
ac_build_indexed_store(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef base_ptr, LLVMValueRef index,
|
||||
@@ -853,14 +861,39 @@ ac_build_indexed_store(struct ac_llvm_context *ctx,
|
||||
* \param uniform Whether the base_ptr and index can be assumed to be
|
||||
* dynamically uniform (i.e. load to an SGPR)
|
||||
* \param invariant Whether the load is invariant (no other opcodes affect it)
|
||||
* \param no_unsigned_wraparound
|
||||
* For all possible re-associations and re-distributions of an expression
|
||||
* "base_ptr + index * elemsize" into "addr + offset" (excluding GEPs
|
||||
* without inbounds in base_ptr), this parameter is true if "addr + offset"
|
||||
* does not result in an unsigned integer wraparound. This is used for
|
||||
* optimal code generation of 32-bit pointer arithmetic.
|
||||
*
|
||||
* For example, a 32-bit immediate offset that causes a 32-bit unsigned
|
||||
* integer wraparound can't be an imm offset in s_load_dword, because
|
||||
* the instruction performs "addr + offset" in 64 bits.
|
||||
*
|
||||
* Expected usage for bindless textures by chaining GEPs:
|
||||
* // possible unsigned wraparound, don't use InBounds:
|
||||
* ptr1 = LLVMBuildGEP(base_ptr, index);
|
||||
* image = load(ptr1); // becomes "s_load ptr1, 0"
|
||||
*
|
||||
* ptr2 = LLVMBuildInBoundsGEP(ptr1, 32 / elemsize);
|
||||
* sampler = load(ptr2); // becomes "s_load ptr1, 32" thanks to InBounds
|
||||
*/
|
||||
static LLVMValueRef
|
||||
ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
|
||||
LLVMValueRef index, bool uniform, bool invariant)
|
||||
LLVMValueRef index, bool uniform, bool invariant,
|
||||
bool no_unsigned_wraparound)
|
||||
{
|
||||
LLVMValueRef pointer, result;
|
||||
LLVMValueRef indices[2] = {ctx->i32_0, index};
|
||||
|
||||
if (no_unsigned_wraparound &&
|
||||
LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_CONST_32BIT_ADDR_SPACE)
|
||||
pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, indices, 2, "");
|
||||
else
|
||||
pointer = LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
|
||||
|
||||
pointer = ac_build_gep0(ctx, base_ptr, index);
|
||||
if (uniform)
|
||||
LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
|
||||
result = LLVMBuildLoad(ctx->builder, pointer, "");
|
||||
@@ -872,19 +905,28 @@ ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
|
||||
LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
|
||||
LLVMValueRef index)
|
||||
{
|
||||
return ac_build_load_custom(ctx, base_ptr, index, false, false);
|
||||
return ac_build_load_custom(ctx, base_ptr, index, false, false, false);
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef base_ptr, LLVMValueRef index)
|
||||
{
|
||||
return ac_build_load_custom(ctx, base_ptr, index, false, true);
|
||||
return ac_build_load_custom(ctx, base_ptr, index, false, true, false);
|
||||
}
|
||||
|
||||
/* This assumes that there is no unsigned integer wraparound during the address
|
||||
* computation, excluding all GEPs within base_ptr. */
|
||||
LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef base_ptr, LLVMValueRef index)
|
||||
{
|
||||
return ac_build_load_custom(ctx, base_ptr, index, true, true);
|
||||
return ac_build_load_custom(ctx, base_ptr, index, true, true, true);
|
||||
}
|
||||
|
||||
/* See ac_build_load_custom() documentation. */
|
||||
LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef base_ptr, LLVMValueRef index)
|
||||
{
|
||||
return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
|
||||
}
|
||||
|
||||
/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
|
||||
|
@@ -203,6 +203,8 @@ LLVMValueRef
|
||||
ac_build_gep0(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef index);
|
||||
LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
|
||||
LLVMValueRef index);
|
||||
|
||||
void
|
||||
ac_build_indexed_store(struct ac_llvm_context *ctx,
|
||||
@@ -215,6 +217,8 @@ LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef base_ptr, LLVMValueRef index);
|
||||
LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef base_ptr, LLVMValueRef index);
|
||||
LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef base_ptr, LLVMValueRef index);
|
||||
|
||||
void
|
||||
ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
|
||||
|
@@ -305,7 +305,8 @@ LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
|
||||
enum ac_descriptor_type type);
|
||||
LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
|
||||
LLVMValueRef list, LLVMValueRef index,
|
||||
enum ac_descriptor_type desc_type, bool dcc_off);
|
||||
enum ac_descriptor_type desc_type, bool dcc_off,
|
||||
bool bindless);
|
||||
|
||||
void si_load_system_value(struct si_shader_context *ctx,
|
||||
unsigned index,
|
||||
|
@@ -920,7 +920,7 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi,
|
||||
index, "");
|
||||
|
||||
/* TODO: be smarter about when we use dcc_off */
|
||||
return si_load_image_desc(ctx, list, index, desc_type, write);
|
||||
return si_load_image_desc(ctx, list, index, desc_type, write, bindless);
|
||||
}
|
||||
|
||||
assert(base_index + constant_index < ctx->num_samplers);
|
||||
@@ -931,6 +931,16 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi,
|
||||
index = LLVMBuildAdd(ctx->ac.builder, index,
|
||||
LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
|
||||
|
||||
if (bindless) {
|
||||
/* Since bindless handle arithmetic can contain an unsigned integer
|
||||
* wraparound and si_load_sampler_desc assumes there isn't any,
|
||||
* use GEP without "inbounds" (inside ac_build_pointer_add)
|
||||
* to prevent incorrect code generation and hangs.
|
||||
*/
|
||||
index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
|
||||
list = ac_build_pointer_add(&ctx->ac, list, index);
|
||||
index = ctx->i32_0;
|
||||
}
|
||||
return si_load_sampler_desc(ctx, list, index, desc_type);
|
||||
}
|
||||
|
||||
|
@@ -176,7 +176,8 @@ static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
|
||||
|
||||
LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
|
||||
LLVMValueRef list, LLVMValueRef index,
|
||||
enum ac_descriptor_type desc_type, bool dcc_off)
|
||||
enum ac_descriptor_type desc_type, bool dcc_off,
|
||||
bool bindless)
|
||||
{
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
LLVMValueRef rsrc;
|
||||
@@ -190,7 +191,11 @@ LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
|
||||
assert(desc_type == AC_DESC_IMAGE);
|
||||
}
|
||||
|
||||
if (bindless)
|
||||
rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
|
||||
else
|
||||
rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
|
||||
|
||||
if (desc_type == AC_DESC_IMAGE && dcc_off)
|
||||
rsrc = force_dcc_off(ctx, rsrc);
|
||||
return rsrc;
|
||||
@@ -240,6 +245,8 @@ image_fetch_rsrc(
|
||||
index, "");
|
||||
}
|
||||
|
||||
bool bindless = false;
|
||||
|
||||
if (image->Register.File != TGSI_FILE_IMAGE) {
|
||||
/* Bindless descriptors are accessible from a different pair of
|
||||
* user SGPR indices.
|
||||
@@ -254,11 +261,12 @@ image_fetch_rsrc(
|
||||
*/
|
||||
index = LLVMBuildMul(ctx->ac.builder, index,
|
||||
LLVMConstInt(ctx->i32, 2, 0), "");
|
||||
bindless = true;
|
||||
}
|
||||
|
||||
*rsrc = si_load_image_desc(ctx, rsrc_ptr, index,
|
||||
target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE,
|
||||
dcc_off);
|
||||
dcc_off, bindless);
|
||||
}
|
||||
|
||||
static void image_fetch_coords(
|
||||
@@ -1068,6 +1076,15 @@ static void tex_fetch_ptrs(struct lp_build_tgsi_context *bld_base,
|
||||
ctx->param_bindless_samplers_and_images);
|
||||
index = lp_build_emit_fetch_src(bld_base, reg,
|
||||
TGSI_TYPE_UNSIGNED, 0);
|
||||
|
||||
/* Since bindless handle arithmetic can contain an unsigned integer
|
||||
* wraparound and si_load_sampler_desc assumes there isn't any,
|
||||
* use GEP without "inbounds" (inside ac_build_pointer_add)
|
||||
* to prevent incorrect code generation and hangs.
|
||||
*/
|
||||
index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
|
||||
list = ac_build_pointer_add(&ctx->ac, list, index);
|
||||
index = ctx->i32_0;
|
||||
}
|
||||
|
||||
if (target == TGSI_TEXTURE_BUFFER)
|
||||
|
Reference in New Issue
Block a user