radeonsi: limit CP DMA to skip holes in sparse bo
CP DMA on gfx9 can't handle the hole in sparse buffer. The fix skip sparse bo hole so that arb_sparse_buffer-buffer-data && arb_sparse_buffer-commit pass Signed-off-by: Flora Cui <flora.cui@amd.com> Signed-off-by: Julia Zhang <julia.zhang@amd.com> Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24535>
This commit is contained in:
@@ -77,8 +77,6 @@ spec@arb_program_interface_query@arb_program_interface_query-getprogramresourcei
|
||||
spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
|
||||
spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail
|
||||
spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail
|
||||
spec@arb_sparse_buffer@buffer-data,Fail
|
||||
spec@arb_sparse_buffer@commit,Fail
|
||||
spec@egl 1.4@eglterminate then unbind context,Fail
|
||||
spec@egl_chromium_sync_control@conformance,Fail
|
||||
spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
|
||||
|
@@ -65,8 +65,6 @@ spec@arb_program_interface_query@arb_program_interface_query-getprogramresourcei
|
||||
spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
|
||||
spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail
|
||||
spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail
|
||||
spec@arb_sparse_buffer@buffer-data,Fail
|
||||
spec@arb_sparse_buffer@commit,Fail
|
||||
spec@egl_chromium_sync_control@conformance,Fail
|
||||
spec@egl_chromium_sync_control@conformance@eglGetSyncValuesCHROMIUM_msc_and_sbc_test,Fail
|
||||
spec@egl_ext_protected_content@conformance,Fail
|
||||
|
|
@@ -82,8 +82,6 @@ spec@arb_shader_clock@execution@clock2x32,Fail
|
||||
spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
|
||||
spec@arb_shading_language_packing@execution@built-in-functions@fs-packhalf2x16,Fail
|
||||
spec@arb_shading_language_packing@execution@built-in-functions@vs-packhalf2x16,Fail
|
||||
spec@arb_sparse_buffer@buffer-data,Fail
|
||||
spec@arb_sparse_buffer@commit,Fail
|
||||
spec@egl_ext_protected_content@conformance,Fail
|
||||
spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
|
||||
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail
|
||||
|
|
@@ -32,6 +32,15 @@ static inline unsigned cp_dma_max_byte_count(struct si_context *sctx)
|
||||
return max & ~(SI_CPDMA_ALIGNMENT - 1);
|
||||
}
|
||||
|
||||
/* should cp dma skip the hole in sparse bo */
|
||||
static inline bool cp_dma_sparse_wa(struct si_context *sctx, struct si_resource *sdst)
|
||||
{
|
||||
if ((sctx->gfx_level == GFX9) && sdst && (sdst->flags & RADEON_FLAG_SPARSE))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Emit a CP DMA packet to do a copy from one buffer to another, or to clear
|
||||
* a buffer. The size must fit in bits [20:0]. If CP_DMA_CLEAR is set, src_va is a 32-bit
|
||||
* clear value.
|
||||
@@ -199,6 +208,17 @@ void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs,
|
||||
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(sctx));
|
||||
unsigned dma_flags = CP_DMA_CLEAR | (sdst ? 0 : CP_DMA_DST_IS_GDS);
|
||||
|
||||
if (cp_dma_sparse_wa(sctx,sdst)) {
|
||||
unsigned skip_count =
|
||||
sctx->ws->buffer_find_next_committed_memory(sdst->buf,
|
||||
va - sdst->gpu_address, &byte_count);
|
||||
va += skip_count;
|
||||
size -= skip_count;
|
||||
}
|
||||
|
||||
if (!byte_count)
|
||||
continue;
|
||||
|
||||
si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, user_flags, coher, &is_first,
|
||||
&dma_flags);
|
||||
|
||||
@@ -344,6 +364,27 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
||||
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(sctx));
|
||||
unsigned dma_flags = gds_flags;
|
||||
|
||||
if (cp_dma_sparse_wa(sctx, si_resource(dst))) {
|
||||
unsigned skip_count =
|
||||
sctx->ws->buffer_find_next_committed_memory(si_resource(dst)->buf,
|
||||
main_dst_offset - si_resource(dst)->gpu_address, &byte_count);
|
||||
main_dst_offset += skip_count;
|
||||
main_src_offset += skip_count;
|
||||
size -= skip_count;
|
||||
}
|
||||
|
||||
if (cp_dma_sparse_wa(sctx, si_resource(src))) {
|
||||
unsigned skip_count =
|
||||
sctx->ws->buffer_find_next_committed_memory(si_resource(src)->buf,
|
||||
main_src_offset - si_resource(src)->gpu_address, &byte_count);
|
||||
main_dst_offset += skip_count;
|
||||
main_src_offset += skip_count;
|
||||
size -= skip_count;
|
||||
}
|
||||
|
||||
if (!byte_count)
|
||||
continue;
|
||||
|
||||
si_cp_dma_prepare(sctx, dst, src, byte_count, size + skipped_size + realign_size, user_flags,
|
||||
coher, &is_first, &dma_flags);
|
||||
|
||||
|
@@ -447,6 +447,13 @@ struct radeon_winsys {
|
||||
bool (*buffer_commit)(struct radeon_winsys *ws, struct pb_buffer *buf,
|
||||
uint64_t offset, uint64_t size, bool commit);
|
||||
|
||||
/**
|
||||
* Calc size of the first committed part of the given sparse buffer.
|
||||
* \note Only implemented by the amdgpu winsys.
|
||||
* \return the skipped count if the range_offset fall into a hole.
|
||||
*/
|
||||
unsigned (*buffer_find_next_committed_memory)(struct pb_buffer *buf,
|
||||
uint64_t range_offset, unsigned *range_size);
|
||||
/**
|
||||
* Return the virtual address of a buffer.
|
||||
*
|
||||
|
@@ -1281,6 +1281,54 @@ out:
|
||||
return ok;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
amdgpu_bo_find_next_committed_memory(struct pb_buffer *buf,
|
||||
uint64_t range_offset, unsigned *range_size)
|
||||
{
|
||||
struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buf);
|
||||
struct amdgpu_sparse_commitment *comm;
|
||||
uint32_t va_page, end_va_page;
|
||||
uint32_t span_va_page, start_va_page;
|
||||
unsigned skip, skip_after;
|
||||
|
||||
skip = skip_after = 0;
|
||||
comm = bo->u.sparse.commitments;
|
||||
start_va_page = va_page = range_offset / RADEON_SPARSE_PAGE_SIZE;
|
||||
end_va_page = va_page + DIV_ROUND_UP(*range_size, RADEON_SPARSE_PAGE_SIZE);
|
||||
|
||||
simple_mtx_lock(&bo->lock);
|
||||
/* Lookup the first page with backing physical storage */
|
||||
while (va_page < end_va_page && !comm[va_page].backing)
|
||||
va_page++;
|
||||
span_va_page = va_page;
|
||||
|
||||
/* Lookup the first page without backing physical storage */
|
||||
while (va_page < end_va_page && comm[va_page].backing)
|
||||
va_page++;
|
||||
simple_mtx_unlock(&bo->lock);
|
||||
|
||||
if (span_va_page * RADEON_SPARSE_PAGE_SIZE >= range_offset + *range_size) {
|
||||
skip = *range_size;
|
||||
*range_size = 0;
|
||||
return skip;
|
||||
}
|
||||
|
||||
/* Calc byte count that need to skip before committed range */
|
||||
if (span_va_page != start_va_page)
|
||||
skip = (span_va_page - start_va_page) * RADEON_SPARSE_PAGE_SIZE
|
||||
- range_offset % RADEON_SPARSE_PAGE_SIZE;
|
||||
|
||||
if (va_page != end_va_page) {
|
||||
skip_after = (end_va_page - va_page - 1) * RADEON_SPARSE_PAGE_SIZE
|
||||
+ *range_size % RADEON_SPARSE_PAGE_SIZE;
|
||||
if (!(*range_size % RADEON_SPARSE_PAGE_SIZE))
|
||||
skip_after += RADEON_SPARSE_PAGE_SIZE;
|
||||
}
|
||||
|
||||
*range_size = *range_size - skip_after - skip;
|
||||
return skip;
|
||||
}
|
||||
|
||||
static void amdgpu_buffer_get_metadata(struct radeon_winsys *rws,
|
||||
struct pb_buffer *_buf,
|
||||
struct radeon_bo_metadata *md,
|
||||
@@ -1755,6 +1803,7 @@ void amdgpu_bo_init_functions(struct amdgpu_screen_winsys *ws)
|
||||
ws->base.buffer_is_suballocated = amdgpu_bo_is_suballocated;
|
||||
ws->base.buffer_get_handle = amdgpu_bo_get_handle;
|
||||
ws->base.buffer_commit = amdgpu_bo_sparse_commit;
|
||||
ws->base.buffer_find_next_committed_memory = amdgpu_bo_find_next_committed_memory;
|
||||
ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
|
||||
ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
|
||||
ws->base.buffer_get_flags = amdgpu_bo_get_flags;
|
||||
|
Reference in New Issue
Block a user