diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index d5c7891de62..c6b659c9526 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -504,6 +504,9 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, info->vram_vis_size = vram_vis.heap_size; } + info->gart_size_kb = DIV_ROUND_UP(info->gart_size, 1024); + info->vram_size_kb = DIV_ROUND_UP(info->vram_size, 1024); + /* Add some margin of error, though this shouldn't be needed in theory. */ info->all_vram_visible = info->vram_size * 0.9 < info->vram_vis_size; diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 873ce69e98a..17ea5e169c9 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -87,6 +87,8 @@ struct radeon_info { /* Memory info. */ uint32_t pte_fragment_size; uint32_t gart_page_size; + uint32_t gart_size_kb; + uint32_t vram_size_kb; uint64_t gart_size; uint64_t vram_size; uint64_t vram_vis_size; diff --git a/src/gallium/drivers/r600/r600_cs.h b/src/gallium/drivers/r600/r600_cs.h index 80f7162049c..71e606b9b3c 100644 --- a/src/gallium/drivers/r600/r600_cs.h +++ b/src/gallium/drivers/r600/r600_cs.h @@ -45,8 +45,8 @@ radeon_cs_memory_below_limit(struct r600_common_screen *screen, struct radeon_cmdbuf *cs, uint64_t vram, uint64_t gtt) { - vram += cs->used_vram; - gtt += cs->used_gart; + vram += (uint64_t)cs->used_vram_kb * 1024; + gtt += (uint64_t)cs->used_gart_kb * 1024; /* Anything that goes above the VRAM size should go to GTT. */ if (vram > screen->info.vram_size) diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index e020213db2c..9b57691af40 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -228,8 +228,8 @@ static void r600_dma_emit_wait_idle(struct r600_common_context *rctx) void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, struct r600_resource *dst, struct r600_resource *src) { - uint64_t vram = ctx->dma.cs.used_vram; - uint64_t gtt = ctx->dma.cs.used_gart; + uint64_t vram = (uint64_t)ctx->dma.cs.used_vram_kb * 1024; + uint64_t gtt = (uint64_t)ctx->dma.cs.used_gart_kb * 1024; if (dst) { vram += dst->vram_usage; @@ -264,7 +264,7 @@ void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, */ num_dw++; /* for emit_wait_idle below */ if (!ctx->ws->cs_check_space(&ctx->dma.cs, num_dw, false) || - ctx->dma.cs.used_vram + ctx->dma.cs.used_gart > 64 * 1024 * 1024 || + ctx->dma.cs.used_vram_kb + ctx->dma.cs.used_gart_kb > 64 * 1024 || !radeon_cs_memory_below_limit(ctx->screen, &ctx->dma.cs, vram, gtt)) { ctx->dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL); assert((num_dw + ctx->dma.cs.current.cdw) <= ctx->dma.cs.current.max_dw); diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 3e4fa01c0cd..c2ebd0f69ff 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -203,8 +203,8 @@ struct radeon_cmdbuf { unsigned prev_dw; /* Total number of dwords in previous chunks. */ /* Memory usage of the buffer list. These are always 0 for preamble IBs. */ - uint64_t used_vram; - uint64_t used_gart; + uint32_t used_vram_kb; + uint32_t used_gart_kb; uint64_t gpu_address; /* Private winsys data. */ diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c index 7f769f4431f..33c3d6ad390 100644 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -145,13 +145,13 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res, res->flags |= RADEON_FLAG_UNCACHED; /* Set expected VRAM and GART usage for the buffer. */ - res->vram_usage = 0; - res->gart_usage = 0; + res->vram_usage_kb = 0; + res->gart_usage_kb = 0; res->max_forced_staging_uploads = 0; res->b.max_forced_staging_uploads = 0; if (res->domains & RADEON_DOMAIN_VRAM) { - res->vram_usage = size; + res->vram_usage_kb = MAX2(1, size / 1024); if (!sscreen->info.smart_access_memory) { /* We don't want to evict buffers from VRAM by mapping them for CPU access, @@ -168,7 +168,7 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res, sscreen->info.has_dedicated_vram && size >= min_size ? max_staging_uploads : 0; } } else if (res->domains & RADEON_DOMAIN_GTT) { - res->gart_usage = size; + res->gart_usage_kb = MAX2(1, size / 1024); } } @@ -278,8 +278,8 @@ void si_replace_buffer_storage(struct pipe_context *ctx, struct pipe_resource *d sdst->max_forced_staging_uploads = ssrc->max_forced_staging_uploads; sdst->flags = ssrc->flags; - assert(sdst->vram_usage == ssrc->vram_usage); - assert(sdst->gart_usage == ssrc->gart_usage); + assert(sdst->vram_usage_kb == ssrc->vram_usage_kb); + assert(sdst->gart_usage_kb == ssrc->gart_usage_kb); assert(sdst->bo_size == ssrc->bo_size); assert(sdst->bo_alignment == ssrc->bo_alignment); assert(sdst->domains == ssrc->domains); @@ -647,8 +647,8 @@ static struct pipe_resource *si_buffer_from_user_memory(struct pipe_screen *scre } buf->gpu_address = ws->buffer_get_virtual_address(buf->buf); - buf->vram_usage = 0; - buf->gart_usage = templ->width0; + buf->vram_usage_kb = 0; + buf->gart_usage_kb = templ->width0 / 1024; return &buf->b.b; } @@ -671,9 +671,9 @@ struct pipe_resource *si_buffer_from_winsys_buffer(struct pipe_screen *screen, res->domains = sscreen->ws->buffer_get_initial_domain(res->buf); if (res->domains & RADEON_DOMAIN_VRAM) - res->vram_usage = res->bo_size; + res->vram_usage_kb = MAX2(1, res->bo_size / 1024); else if (res->domains & RADEON_DOMAIN_GTT) - res->gart_usage = res->bo_size; + res->gart_usage_kb = MAX2(1, res->bo_size / 1024); if (sscreen->ws->buffer_get_flags) res->flags = sscreen->ws->buffer_get_flags(res->buf); diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index a12c42a875e..e01f0ae19bd 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -833,8 +833,8 @@ static void si_query_memory_info(struct pipe_screen *screen, struct pipe_memory_ struct radeon_winsys *ws = sscreen->ws; unsigned vram_usage, gtt_usage; - info->total_device_memory = sscreen->info.vram_size / 1024; - info->total_staging_memory = sscreen->info.gart_size / 1024; + info->total_device_memory = sscreen->info.vram_size_kb; + info->total_staging_memory = sscreen->info.gart_size_kb; /* The real TTM memory usage is somewhat random, because: * diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index d28019aabb9..01bce3ffe28 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -38,14 +38,14 @@ void si_need_gfx_cs_space(struct si_context *ctx, unsigned num_draws) * that have been added (cs_add_buffer) and two counters in the pipe * driver for those that haven't been added yet. */ - if (unlikely(!radeon_cs_memory_below_limit(ctx->screen, &ctx->gfx_cs, ctx->vram, ctx->gtt))) { - ctx->gtt = 0; - ctx->vram = 0; + if (unlikely(!radeon_cs_memory_below_limit(ctx->screen, &ctx->gfx_cs, ctx->vram_kb, ctx->gtt_kb))) { + ctx->gtt_kb = 0; + ctx->vram_kb = 0; si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); return; } - ctx->gtt = 0; - ctx->vram = 0; + ctx->gtt_kb = 0; + ctx->vram_kb = 0; unsigned need_dwords = si_get_minimum_num_gfx_cs_dwords(ctx, num_draws); if (!ctx->ws->cs_check_space(cs, need_dwords, false)) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 87ea96e7539..6c9d8130d73 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -284,8 +284,8 @@ struct si_resource { struct pb_buffer *buf; uint64_t gpu_address; /* Memory usage if the buffer placement is optimal. */ - uint64_t vram_usage; - uint64_t gart_usage; + uint32_t vram_usage_kb; + uint32_t gart_usage_kb; /* Resource properties. */ uint64_t bo_size; @@ -977,8 +977,8 @@ struct si_context { unsigned last_num_draw_calls; unsigned flags; /* flush flags */ /* Current unaccounted memory usage. */ - uint64_t vram; - uint64_t gtt; + uint32_t vram_kb; + uint32_t gtt_kb; /* Compute-based primitive discard. */ unsigned prim_discard_vertex_count_threshold; @@ -1635,8 +1635,8 @@ static inline void si_context_add_resource_size(struct si_context *sctx, struct { if (r) { /* Add memory usage for need_gfx_cs_space */ - sctx->vram += si_resource(r)->vram_usage; - sctx->gtt += si_resource(r)->gart_usage; + sctx->vram_kb += si_resource(r)->vram_usage_kb; + sctx->gtt_kb += si_resource(r)->gart_usage_kb; } } @@ -1864,17 +1864,17 @@ static inline bool util_rast_prim_is_triangles(unsigned prim) * \param gtt GTT memory size not added to the buffer list yet */ static inline bool radeon_cs_memory_below_limit(struct si_screen *screen, struct radeon_cmdbuf *cs, - uint64_t vram, uint64_t gtt) + uint32_t vram_kb, uint32_t gtt_kb) { - vram += cs->used_vram; - gtt += cs->used_gart; + vram_kb += cs->used_vram_kb; + gtt_kb += cs->used_gart_kb; /* Anything that goes above the VRAM size should go to GTT. */ - if (vram > screen->info.vram_size) - gtt += vram - screen->info.vram_size; + if (vram_kb > screen->info.vram_size_kb) + gtt_kb += vram_kb - screen->info.vram_size_kb; - /* Now we just need to check if we have enough GTT. */ - return gtt < screen->info.gart_size * 0.7; + /* Now we just need to check if we have enough GTT (the limit is 75% of max). */ + return gtt_kb < screen->info.gart_size_kb / 4 * 3; } /** @@ -1918,8 +1918,8 @@ static inline void radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sc bool check_mem) { if (check_mem && - !radeon_cs_memory_below_limit(sctx->screen, &sctx->gfx_cs, sctx->vram + bo->vram_usage, - sctx->gtt + bo->gart_usage)) + !radeon_cs_memory_below_limit(sctx->screen, &sctx->gfx_cs, sctx->vram_kb + bo->vram_usage_kb, + sctx->gtt_kb + bo->gart_usage_kb)) si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, bo, usage, priority); diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 1fa2e5234ef..eb0c132f421 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -432,8 +432,8 @@ static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_tex tex->buffer.b.b.bind = templ.bind; pb_reference(&tex->buffer.buf, new_tex->buffer.buf); tex->buffer.gpu_address = new_tex->buffer.gpu_address; - tex->buffer.vram_usage = new_tex->buffer.vram_usage; - tex->buffer.gart_usage = new_tex->buffer.gart_usage; + tex->buffer.vram_usage_kb = new_tex->buffer.vram_usage_kb; + tex->buffer.gart_usage_kb = new_tex->buffer.gart_usage_kb; tex->buffer.bo_size = new_tex->buffer.bo_size; tex->buffer.bo_alignment = new_tex->buffer.bo_alignment; tex->buffer.domains = new_tex->buffer.domains; @@ -974,8 +974,8 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, resource->bo_alignment = plane0->buffer.bo_alignment; resource->flags = plane0->buffer.flags; resource->domains = plane0->buffer.domains; - resource->vram_usage = plane0->buffer.vram_usage; - resource->gart_usage = plane0->buffer.gart_usage; + resource->vram_usage_kb = plane0->buffer.vram_usage_kb; + resource->gart_usage_kb = plane0->buffer.gart_usage_kb; pb_reference(&resource->buf, plane0->buffer.buf); resource->gpu_address = plane0->buffer.gpu_address; @@ -992,9 +992,9 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, resource->bo_alignment = imported_buf->alignment; resource->domains = sscreen->ws->buffer_get_initial_domain(resource->buf); if (resource->domains & RADEON_DOMAIN_VRAM) - resource->vram_usage = resource->bo_size; + resource->vram_usage_kb = MAX2(1, resource->bo_size / 1024); else if (resource->domains & RADEON_DOMAIN_GTT) - resource->gart_usage = resource->bo_size; + resource->gart_usage_kb = MAX2(1, resource->bo_size / 1024); if (sscreen->ws->buffer_get_flags) resource->flags = sscreen->ws->buffer_get_flags(resource->buf); } diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index c22947b0779..4190eadc968 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -505,9 +505,9 @@ amdgpu_lookup_or_add_real_buffer(struct radeon_cmdbuf *rcs, struct amdgpu_cs *ac cs->buffer_indices_hashlist[hash] = idx; if (bo->base.placement & RADEON_DOMAIN_VRAM) - rcs->used_vram += bo->base.size; + rcs->used_vram_kb += bo->base.size / 1024; else if (bo->base.placement & RADEON_DOMAIN_GTT) - rcs->used_gart += bo->base.size; + rcs->used_gart_kb += bo->base.size / 1024; return idx; } @@ -610,9 +610,9 @@ static int amdgpu_lookup_or_add_sparse_buffer(struct radeon_cmdbuf *rcs, list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) { if (bo->base.placement & RADEON_DOMAIN_VRAM) - rcs->used_vram += backing->bo->base.size; + rcs->used_vram_kb += backing->bo->base.size / 1024; else if (bo->base.placement & RADEON_DOMAIN_GTT) - rcs->used_gart += backing->bo->base.size; + rcs->used_gart_kb += backing->bo->base.size / 1024; } simple_mtx_unlock(&bo->lock); @@ -1878,8 +1878,8 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, RADEON_PRIO_IB1); } - rcs->used_gart = 0; - rcs->used_vram = 0; + rcs->used_gart_kb = 0; + rcs->used_vram_kb = 0; if (cs->ring_type == RING_GFX) ws->num_gfx_IBs++; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index c1c307283e6..31aded6db9c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -391,9 +391,9 @@ static unsigned radeon_drm_cs_add_buffer(struct radeon_cmdbuf *rcs, cs->csc->relocs_bo[index].u.real.priority_usage |= 1u << priority; if (added_domains & RADEON_DOMAIN_VRAM) - rcs->used_vram += bo->base.size; + rcs->used_vram_kb += bo->base.size / 1024; else if (added_domains & RADEON_DOMAIN_GTT) - rcs->used_gart += bo->base.size; + rcs->used_gart_kb += bo->base.size / 1024; return index; } @@ -410,8 +410,8 @@ static bool radeon_drm_cs_validate(struct radeon_cmdbuf *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); bool status = - rcs->used_gart < cs->ws->info.gart_size * 0.8 && - rcs->used_vram < cs->ws->info.vram_size * 0.8; + rcs->used_gart_kb < cs->ws->info.gart_size_kb * 0.8 && + rcs->used_vram_kb < cs->ws->info.vram_size_kb * 0.8; if (status) { cs->csc->num_validated_relocs = cs->csc->num_relocs; @@ -433,8 +433,8 @@ static bool radeon_drm_cs_validate(struct radeon_cmdbuf *rcs) RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); } else { radeon_cs_context_cleanup(cs->csc); - rcs->used_vram = 0; - rcs->used_gart = 0; + rcs->used_vram_kb = 0; + rcs->used_gart_kb = 0; assert(rcs->current.cdw == 0); if (rcs->current.cdw != 0) { @@ -712,8 +712,8 @@ static int radeon_drm_cs_flush(struct radeon_cmdbuf *rcs, /* Prepare a new CS. */ rcs->current.buf = cs->csc->buf; rcs->current.cdw = 0; - rcs->used_vram = 0; - rcs->used_gart = 0; + rcs->used_vram_kb = 0; + rcs->used_gart_kb = 0; if (cs->ring_type == RING_GFX) cs->ws->num_gfx_IBs++; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 6b306a6ce7b..8f194944088 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -368,6 +368,9 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) if (ws->info.drm_minor < 49) ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024); + ws->info.gart_size_kb = DIV_ROUND_UP(ws->info.gart_size, 1024); + ws->info.vram_size_kb = DIV_ROUND_UP(ws->info.vram_size, 1024); + /* Radeon allocates all buffers contiguously, which makes large allocations * unlikely to succeed. */ if (ws->info.has_dedicated_vram)