radeonsi: fix tcs_out_lds_offsets arg alignment

tcs_out_lds_offsets is not sure to be 16 byte aligned, it's
calculated like this:

  num_patches * patch_vertices * lshs_vertex_stride

num_patches and patch_vertices are not sure to be any value aligned,
lshs_vertex_stride is added one extra dword, so it's only 4 byte
aligned.

This may cause problem even before we switch to nir tess output
lower when write tess factor before read tail of input. But it's
more likely to cause problem after we switch to nir tess output
lower because the main body won't eliminate the low 4bit offset
but epilog will, so they use different offset to read/write tess
factor.

Fixes: 7598bfd768 ("radeonsi: replace llvm tcs output with nir lower pass")
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7083
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18174>
This commit is contained in:
Qiang Yu
2022-08-21 15:25:50 +08:00
committed by Marge Bot
parent bee2df64d2
commit ff7c59672f
3 changed files with 7 additions and 9 deletions

View File

@@ -110,10 +110,9 @@ struct si_shader_context {
struct ac_arg tcs_offchip_layout;
/* API TCS */
/* Offsets where TCS outputs and TCS patch outputs live in LDS:
* [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32 = 64K (TODO: not enough bits)
* [16:31] = TCS output patch0 offset for per-patch / 16
* max = (NUM_PATCHES + 1) * 32*32 = 66624 (TODO: not enough bits)
/* Offsets where TCS outputs and TCS patch outputs live in LDS (<= 16K):
* [0:15] = TCS output patch0 offset / 4, max = 16K / 4 = 4K
* [16:31] = TCS output patch0 offset for per-patch / 4, max = 16K / 4 = 4K
*/
struct ac_arg tcs_out_lds_offsets;
/* Layout of TCS outputs / TES inputs:

View File

@@ -81,8 +81,7 @@ static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
static LLVMValueRef get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx)
{
return LLVMBuildMul(ctx->ac.builder, si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 16, 16),
LLVMConstInt(ctx->ac.i32, 4, 0), "");
return si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 16, 16);
}
static LLVMValueRef get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)

View File

@@ -763,8 +763,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
assert(((output_vertex_size / 4) & ~0xff) == 0);
assert(((input_patch_size / 4) & ~0x1fff) == 0);
assert(((output_patch_size / 4) & ~0x1fff) == 0);
assert(((output_patch0_offset / 16) & ~0xffff) == 0);
assert(((perpatch_output_offset / 16) & ~0xffff) == 0);
assert(((output_patch0_offset / 4) & ~0xffff) == 0);
assert(((perpatch_output_offset / 4) & ~0xffff) == 0);
assert(num_tcs_input_cp <= 32);
assert(num_tcs_output_cp <= 32);
assert(*num_patches <= 64);
@@ -775,7 +775,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
assert((ring_va & u_bit_consecutive(0, 19)) == 0);
unsigned tcs_out_layout = (output_patch_size / 4) | (num_tcs_input_cp << 13) | ring_va;
unsigned tcs_out_offsets = (output_patch0_offset / 16) | ((perpatch_output_offset / 16) << 16);
unsigned tcs_out_offsets = (output_patch0_offset / 4) | ((perpatch_output_offset / 4) << 16);
unsigned offchip_layout =
(*num_patches - 1) | ((num_tcs_output_cp - 1) << 6) |
((pervertex_output_patch_size * *num_patches) << 11);