From 5c03cdbd02a69884ce759e0cbd0cf76dc212e2d3 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 18 Jan 2024 17:54:26 +0100 Subject: [PATCH] radv: fix indirect dispatches on the compute queue on GFX7 GFX7 CP requires the indirect dispatch VA to be aligned to 32-bytes. This fixes dEQP-VK.api.command_buffers.many_indirect_disps_on_secondary, but it's unexpected that it uncovered this bug. Cc: mesa-stable Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_gpu_info.c | 5 +++++ src/amd/common/ac_gpu_info.h | 1 + src/amd/vulkan/radv_cmd_buffer.c | 32 ++++++++++++++++++++++++++++++-- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 22f4eba18da..2e12a74f854 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -1255,6 +1255,11 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, info->has_async_compute_threadgroup_bug = info->family == CHIP_ICELAND || info->family == CHIP_TONGA; + /* GFX7 CP requires 32 bytes alignment for the indirect buffer arguments on + * the compute queue. + */ + info->has_async_compute_align32_bug = info->gfx_level == GFX7; + /* Support for GFX10.3 was added with F32_ME_FEATURE_VERSION_31 but the * feature version wasn't bumped. */ diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index d65d93fe1a8..f390e144f69 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -102,6 +102,7 @@ struct radeon_info { bool has_vgt_flush_ngg_legacy_bug; bool has_cs_regalloc_hang_bug; bool has_async_compute_threadgroup_bug; + bool has_async_compute_align32_bug; bool has_32bit_predication; bool has_3d_cube_border_color_mipmap; bool has_image_opcodes; diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 4487423185d..cd461ef509f 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -9691,11 +9691,39 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv } if (radv_cmd_buffer_uses_mec(cmd_buffer)) { + uint64_t indirect_va = info->va; + radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted, 4 /* DISPATCH_INDIRECT size */); + + if (cmd_buffer->device->physical_device->rad_info.has_async_compute_align32_bug && + cmd_buffer->qf == RADV_QUEUE_COMPUTE && !radv_is_aligned(indirect_va, 32)) { + const uint64_t unaligned_va = indirect_va; + UNUSED void *ptr; + uint32_t offset; + + if (!radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, sizeof(VkDispatchIndirectCommand), 32, &offset, &ptr)) + return; + + indirect_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset; + + for (uint32_t i = 0; i < 3; i++) { + const uint64_t src_va = unaligned_va + i * 4; + const uint64_t dst_va = indirect_va + i * 4; + + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, src_va); + radeon_emit(cs, src_va >> 32); + radeon_emit(cs, dst_va); + radeon_emit(cs, dst_va >> 32); + } + } + radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) | PKT3_SHADER_TYPE_S(1)); - radeon_emit(cs, info->va); - radeon_emit(cs, info->va >> 32); + radeon_emit(cs, indirect_va); + radeon_emit(cs, indirect_va >> 32); radeon_emit(cs, dispatch_initiator); } else { radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) | PKT3_SHADER_TYPE_S(1));