radeonsi: limit CP DMA to skip holes in sparse bo

CP DMA on gfx9 can't handle the hole in sparse buffer. The fix skip
sparse bo hole so that arb_sparse_buffer-buffer-data &&
arb_sparse_buffer-commit pass

Signed-off-by: Flora Cui <flora.cui@amd.com>
Signed-off-by: Julia Zhang <julia.zhang@amd.com>
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24535>
This commit is contained in:
Flora Cui
2023-04-11 15:04:02 +08:00
committed by Marge Bot
parent 8849e1e3a6
commit e259f4050d
6 changed files with 97 additions and 6 deletions

View File

@@ -77,8 +77,6 @@ spec@arb_program_interface_query@arb_program_interface_query-getprogramresourcei
spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail
spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail
spec@arb_sparse_buffer@buffer-data,Fail
spec@arb_sparse_buffer@commit,Fail
spec@egl 1.4@eglterminate then unbind context,Fail
spec@egl_chromium_sync_control@conformance,Fail
spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail

View File

@@ -65,8 +65,6 @@ spec@arb_program_interface_query@arb_program_interface_query-getprogramresourcei
spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail
spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail
spec@arb_sparse_buffer@buffer-data,Fail
spec@arb_sparse_buffer@commit,Fail
spec@egl_chromium_sync_control@conformance,Fail
spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
spec@egl_ext_protected_content@conformance,Fail
1 # piglit failures
65 spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
66 spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail
67 spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail
spec@arb_sparse_buffer@buffer-data,Fail
spec@arb_sparse_buffer@commit,Fail
68 spec@egl_chromium_sync_control@conformance,Fail
69 spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
70 spec@egl_ext_protected_content@conformance,Fail

View File

@@ -82,8 +82,6 @@ spec@arb_shader_clock@execution@clock2x32,Fail
spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail
spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail
spec@arb_sparse_buffer@buffer-data,Fail
spec@arb_sparse_buffer@commit,Fail
spec@egl_ext_protected_content@conformance,Fail
spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail
1 # piglit failures
82 spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail
83 spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail
84 spec@arb_sparse_buffer@buffer-data,Fail spec@egl_ext_protected_content@conformance,Fail
spec@arb_sparse_buffer@commit,Fail
spec@egl_ext_protected_content@conformance,Fail
85 spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
86 spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail
87 spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212,Fail

View File

@@ -32,6 +32,15 @@ static inline unsigned cp_dma_max_byte_count(struct si_context *sctx)
return max & ~(SI_CPDMA_ALIGNMENT - 1);
}
/* should cp dma skip the hole in sparse bo */
static inline bool cp_dma_sparse_wa(struct si_context *sctx, struct si_resource *sdst)
{
if ((sctx->gfx_level == GFX9) && sdst && (sdst->flags & RADEON_FLAG_SPARSE))
return true;
return false;
}
/* Emit a CP DMA packet to do a copy from one buffer to another, or to clear
* a buffer. The size must fit in bits [20:0]. If CP_DMA_CLEAR is set, src_va is a 32-bit
* clear value.
@@ -199,6 +208,17 @@ void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs,
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(sctx));
unsigned dma_flags = CP_DMA_CLEAR | (sdst ? 0 : CP_DMA_DST_IS_GDS);
if (cp_dma_sparse_wa(sctx,sdst)) {
unsigned skip_count =
sctx->ws->buffer_find_next_committed_memory(sdst->buf,
va - sdst->gpu_address, &byte_count);
va += skip_count;
size -= skip_count;
}
if (!byte_count)
continue;
si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, user_flags, coher, &is_first,
&dma_flags);
@@ -344,6 +364,27 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(sctx));
unsigned dma_flags = gds_flags;
if (cp_dma_sparse_wa(sctx, si_resource(dst))) {
unsigned skip_count =
sctx->ws->buffer_find_next_committed_memory(si_resource(dst)->buf,
main_dst_offset - si_resource(dst)->gpu_address, &byte_count);
main_dst_offset += skip_count;
main_src_offset += skip_count;
size -= skip_count;
}
if (cp_dma_sparse_wa(sctx, si_resource(src))) {
unsigned skip_count =
sctx->ws->buffer_find_next_committed_memory(si_resource(src)->buf,
main_src_offset - si_resource(src)->gpu_address, &byte_count);
main_dst_offset += skip_count;
main_src_offset += skip_count;
size -= skip_count;
}
if (!byte_count)
continue;
si_cp_dma_prepare(sctx, dst, src, byte_count, size + skipped_size + realign_size, user_flags,
coher, &is_first, &dma_flags);

View File

@@ -447,6 +447,13 @@ struct radeon_winsys {
bool (*buffer_commit)(struct radeon_winsys *ws, struct pb_buffer *buf,
uint64_t offset, uint64_t size, bool commit);
/**
* Calc size of the first committed part of the given sparse buffer.
* \note Only implemented by the amdgpu winsys.
* \return the skipped count if the range_offset fall into a hole.
*/
unsigned (*buffer_find_next_committed_memory)(struct pb_buffer *buf,
uint64_t range_offset, unsigned *range_size);
/**
* Return the virtual address of a buffer.
*

View File

@@ -1281,6 +1281,54 @@ out:
return ok;
}
static unsigned
amdgpu_bo_find_next_committed_memory(struct pb_buffer *buf,
uint64_t range_offset, unsigned *range_size)
{
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buf);
struct amdgpu_sparse_commitment *comm;
uint32_t va_page, end_va_page;
uint32_t span_va_page, start_va_page;
unsigned skip, skip_after;
skip = skip_after = 0;
comm = bo->u.sparse.commitments;
start_va_page = va_page = range_offset / RADEON_SPARSE_PAGE_SIZE;
end_va_page = va_page + DIV_ROUND_UP(*range_size, RADEON_SPARSE_PAGE_SIZE);
simple_mtx_lock(&bo->lock);
/* Lookup the first page with backing physical storage */
while (va_page < end_va_page && !comm[va_page].backing)
va_page++;
span_va_page = va_page;
/* Lookup the first page without backing physical storage */
while (va_page < end_va_page && comm[va_page].backing)
va_page++;
simple_mtx_unlock(&bo->lock);
if (span_va_page * RADEON_SPARSE_PAGE_SIZE >= range_offset + *range_size) {
skip = *range_size;
*range_size = 0;
return skip;
}
/* Calc byte count that need to skip before committed range */
if (span_va_page != start_va_page)
skip = (span_va_page - start_va_page) * RADEON_SPARSE_PAGE_SIZE
- range_offset % RADEON_SPARSE_PAGE_SIZE;
if (va_page != end_va_page) {
skip_after = (end_va_page - va_page - 1) * RADEON_SPARSE_PAGE_SIZE
+ *range_size % RADEON_SPARSE_PAGE_SIZE;
if (!(*range_size % RADEON_SPARSE_PAGE_SIZE))
skip_after += RADEON_SPARSE_PAGE_SIZE;
}
*range_size = *range_size - skip_after - skip;
return skip;
}
static void amdgpu_buffer_get_metadata(struct radeon_winsys *rws,
struct pb_buffer *_buf,
struct radeon_bo_metadata *md,
@@ -1755,6 +1803,7 @@ void amdgpu_bo_init_functions(struct amdgpu_screen_winsys *ws)
ws->base.buffer_is_suballocated = amdgpu_bo_is_suballocated;
ws->base.buffer_get_handle = amdgpu_bo_get_handle;
ws->base.buffer_commit = amdgpu_bo_sparse_commit;
ws->base.buffer_find_next_committed_memory = amdgpu_bo_find_next_committed_memory;
ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
ws->base.buffer_get_flags = amdgpu_bo_get_flags;