radeonsi: create load_const_buffer_desc_fast_path() helper
This will be shared by the TGSI and NIR backends. For simplicity we leave the SI LLVM 5.0 and lower work around only in the TGSI backend. Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
@@ -2319,6 +2319,49 @@ void si_tgsi_declare_compute_memory(struct si_shader_context *ctx,
|
||||
si_declare_compute_memory(ctx);
|
||||
}
|
||||
|
||||
static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx)
|
||||
{
|
||||
LLVMValueRef ptr =
|
||||
LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
|
||||
struct si_shader_selector *sel = ctx->shader->selector;
|
||||
|
||||
/* Do the bounds checking with a descriptor, because
|
||||
* doing computation and manual bounds checking of 64-bit
|
||||
* addresses generates horrible VALU code with very high
|
||||
* VGPR usage and very low SIMD occupancy.
|
||||
*/
|
||||
ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
|
||||
|
||||
LLVMValueRef desc0, desc1;
|
||||
if (HAVE_32BIT_POINTERS) {
|
||||
desc0 = ptr;
|
||||
desc1 = LLVMConstInt(ctx->i32,
|
||||
S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
|
||||
} else {
|
||||
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, "");
|
||||
desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_0, "");
|
||||
desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_1, "");
|
||||
/* Mask out all bits except BASE_ADDRESS_HI. */
|
||||
desc1 = LLVMBuildAnd(ctx->ac.builder, desc1,
|
||||
LLVMConstInt(ctx->i32, ~C_008F04_BASE_ADDRESS_HI, 0), "");
|
||||
}
|
||||
|
||||
LLVMValueRef desc_elems[] = {
|
||||
desc0,
|
||||
desc1,
|
||||
LLVMConstInt(ctx->i32, (sel->info.const_file_max[0] + 1) * 16, 0),
|
||||
LLVMConstInt(ctx->i32,
|
||||
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
|
||||
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
|
||||
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
|
||||
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
|
||||
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), 0)
|
||||
};
|
||||
|
||||
return ac_build_gather_values(&ctx->ac, desc_elems, 4);
|
||||
}
|
||||
|
||||
static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
|
||||
{
|
||||
LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn,
|
||||
@@ -2397,8 +2440,6 @@ static LLVMValueRef fetch_constant(
|
||||
/* Fast path when user data SGPRs point to constant buffer 0 directly. */
|
||||
if (sel->info.const_buffers_declared == 1 &&
|
||||
sel->info.shader_buffers_declared == 0) {
|
||||
LLVMValueRef ptr =
|
||||
LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
|
||||
|
||||
/* This enables use of s_load_dword and flat_load_dword for const buffer 0
|
||||
* loads, and up to x4 load opcode merging. However, it leads to horrible
|
||||
@@ -2413,48 +2454,17 @@ static LLVMValueRef fetch_constant(
|
||||
* s_buffer_load_dword (that we have to prevent) is when we use use
|
||||
* a literal offset where we don't need bounds checking.
|
||||
*/
|
||||
if (ctx->screen->info.chip_class == SI &&
|
||||
HAVE_LLVM < 0x0600 &&
|
||||
!reg->Register.Indirect) {
|
||||
if (ctx->screen->info.chip_class == SI && HAVE_LLVM < 0x0600 &&
|
||||
!reg->Register.Indirect) {
|
||||
LLVMValueRef ptr =
|
||||
LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
|
||||
|
||||
addr = LLVMBuildLShr(ctx->ac.builder, addr, LLVMConstInt(ctx->i32, 2, 0), "");
|
||||
LLVMValueRef result = ac_build_load_invariant(&ctx->ac, ptr, addr);
|
||||
return bitcast(bld_base, type, result);
|
||||
}
|
||||
|
||||
/* Do the bounds checking with a descriptor, because
|
||||
* doing computation and manual bounds checking of 64-bit
|
||||
* addresses generates horrible VALU code with very high
|
||||
* VGPR usage and very low SIMD occupancy.
|
||||
*/
|
||||
ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
|
||||
|
||||
LLVMValueRef desc0, desc1;
|
||||
if (HAVE_32BIT_POINTERS) {
|
||||
desc0 = ptr;
|
||||
desc1 = LLVMConstInt(ctx->i32,
|
||||
S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
|
||||
} else {
|
||||
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, "");
|
||||
desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_0, "");
|
||||
desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_1, "");
|
||||
/* Mask out all bits except BASE_ADDRESS_HI. */
|
||||
desc1 = LLVMBuildAnd(ctx->ac.builder, desc1,
|
||||
LLVMConstInt(ctx->i32, ~C_008F04_BASE_ADDRESS_HI, 0), "");
|
||||
}
|
||||
|
||||
LLVMValueRef desc_elems[] = {
|
||||
desc0,
|
||||
desc1,
|
||||
LLVMConstInt(ctx->i32, (sel->info.const_file_max[0] + 1) * 16, 0),
|
||||
LLVMConstInt(ctx->i32,
|
||||
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
|
||||
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
|
||||
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
|
||||
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
|
||||
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), 0)
|
||||
};
|
||||
LLVMValueRef desc = ac_build_gather_values(&ctx->ac, desc_elems, 4);
|
||||
LLVMValueRef desc = load_const_buffer_desc_fast_path(ctx);
|
||||
LLVMValueRef result = buffer_load_const(ctx, desc, addr);
|
||||
return bitcast(bld_base, type, result);
|
||||
}
|
||||
|
Reference in New Issue
Block a user