diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 348a20d06cc..7bfc8c8041f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -110,10 +110,9 @@ struct si_shader_context { struct ac_arg tcs_offchip_layout; /* API TCS */ - /* Offsets where TCS outputs and TCS patch outputs live in LDS: - * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32 = 64K (TODO: not enough bits) - * [16:31] = TCS output patch0 offset for per-patch / 16 - * max = (NUM_PATCHES + 1) * 32*32 = 66624 (TODO: not enough bits) + /* Offsets where TCS outputs and TCS patch outputs live in LDS (<= 16K): + * [0:15] = TCS output patch0 offset / 4, max = 16K / 4 = 4K + * [16:31] = TCS output patch0 offset for per-patch / 4, max = 16K / 4 = 4K */ struct ac_arg tcs_out_lds_offsets; /* Layout of TCS outputs / TES inputs: diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index c87cd1dd009..34f5175cbea 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -81,8 +81,7 @@ static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx) static LLVMValueRef get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx) { - return LLVMBuildMul(ctx->ac.builder, si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 16, 16), - LLVMConstInt(ctx->ac.i32, 4, 0), ""); + return si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 16, 16); } static LLVMValueRef get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 9fda376f57c..74692254694 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -763,8 +763,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa assert(((output_vertex_size / 4) & ~0xff) == 0); assert(((input_patch_size / 4) & ~0x1fff) == 0); assert(((output_patch_size / 4) & ~0x1fff) == 0); - assert(((output_patch0_offset / 16) & ~0xffff) == 0); - assert(((perpatch_output_offset / 16) & ~0xffff) == 0); + assert(((output_patch0_offset / 4) & ~0xffff) == 0); + assert(((perpatch_output_offset / 4) & ~0xffff) == 0); assert(num_tcs_input_cp <= 32); assert(num_tcs_output_cp <= 32); assert(*num_patches <= 64); @@ -775,7 +775,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa assert((ring_va & u_bit_consecutive(0, 19)) == 0); unsigned tcs_out_layout = (output_patch_size / 4) | (num_tcs_input_cp << 13) | ring_va; - unsigned tcs_out_offsets = (output_patch0_offset / 16) | ((perpatch_output_offset / 16) << 16); + unsigned tcs_out_offsets = (output_patch0_offset / 4) | ((perpatch_output_offset / 4) << 16); unsigned offchip_layout = (*num_patches - 1) | ((num_tcs_output_cp - 1) << 6) | ((pervertex_output_patch_size * *num_patches) << 11);