radeonsi: fix tcs_out_lds_offsets arg alignment
tcs_out_lds_offsets is not sure to be 16 byte aligned, it's calculated like this: num_patches * patch_vertices * lshs_vertex_stride num_patches and patch_vertices are not sure to be any value aligned, lshs_vertex_stride is added one extra dword, so it's only 4 byte aligned. This may cause problem even before we switch to nir tess output lower when write tess factor before read tail of input. But it's more likely to cause problem after we switch to nir tess output lower because the main body won't eliminate the low 4bit offset but epilog will, so they use different offset to read/write tess factor. Fixes:7598bfd768
("radeonsi: replace llvm tcs output with nir lower pass") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7083 Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18174> (cherry picked from commitff7c59672f
)
This commit is contained in:
@@ -11380,7 +11380,7 @@
|
||||
"description": "radeonsi: fix tcs_out_lds_offsets arg alignment",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": "7598bfd768f02d1d77007ebc07990db9c83a6fb4"
|
||||
},
|
||||
|
@@ -110,10 +110,9 @@ struct si_shader_context {
|
||||
struct ac_arg tcs_offchip_layout;
|
||||
|
||||
/* API TCS */
|
||||
/* Offsets where TCS outputs and TCS patch outputs live in LDS:
|
||||
* [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32 = 64K (TODO: not enough bits)
|
||||
* [16:31] = TCS output patch0 offset for per-patch / 16
|
||||
* max = (NUM_PATCHES + 1) * 32*32 = 66624 (TODO: not enough bits)
|
||||
/* Offsets where TCS outputs and TCS patch outputs live in LDS (<= 16K):
|
||||
* [0:15] = TCS output patch0 offset / 4, max = 16K / 4 = 4K
|
||||
* [16:31] = TCS output patch0 offset for per-patch / 4, max = 16K / 4 = 4K
|
||||
*/
|
||||
struct ac_arg tcs_out_lds_offsets;
|
||||
/* Layout of TCS outputs / TES inputs:
|
||||
|
@@ -81,8 +81,7 @@ static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
|
||||
|
||||
static LLVMValueRef get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx)
|
||||
{
|
||||
return LLVMBuildMul(ctx->ac.builder, si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 16, 16),
|
||||
LLVMConstInt(ctx->ac.i32, 4, 0), "");
|
||||
return si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 16, 16);
|
||||
}
|
||||
|
||||
static LLVMValueRef get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
|
||||
|
@@ -763,8 +763,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
|
||||
assert(((output_vertex_size / 4) & ~0xff) == 0);
|
||||
assert(((input_patch_size / 4) & ~0x1fff) == 0);
|
||||
assert(((output_patch_size / 4) & ~0x1fff) == 0);
|
||||
assert(((output_patch0_offset / 16) & ~0xffff) == 0);
|
||||
assert(((perpatch_output_offset / 16) & ~0xffff) == 0);
|
||||
assert(((output_patch0_offset / 4) & ~0xffff) == 0);
|
||||
assert(((perpatch_output_offset / 4) & ~0xffff) == 0);
|
||||
assert(num_tcs_input_cp <= 32);
|
||||
assert(num_tcs_output_cp <= 32);
|
||||
assert(*num_patches <= 64);
|
||||
@@ -775,7 +775,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
|
||||
assert((ring_va & u_bit_consecutive(0, 19)) == 0);
|
||||
|
||||
unsigned tcs_out_layout = (output_patch_size / 4) | (num_tcs_input_cp << 13) | ring_va;
|
||||
unsigned tcs_out_offsets = (output_patch0_offset / 16) | ((perpatch_output_offset / 16) << 16);
|
||||
unsigned tcs_out_offsets = (output_patch0_offset / 4) | ((perpatch_output_offset / 4) << 16);
|
||||
unsigned offchip_layout =
|
||||
(*num_patches - 1) | ((num_tcs_output_cp - 1) << 6) |
|
||||
((pervertex_output_patch_size * *num_patches) << 11);
|
||||
|
Reference in New Issue
Block a user