ac,radeonsi: track memory usage in KB to reduce types from uint64 to uint32
Decreasing the time spent in radeon_cs_memory_below_limit is the motivation. Reviewed-by: Zoltán Böszörményi <zboszor@gmail.com> Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8794>
This commit is contained in:
@@ -504,6 +504,9 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||||||
info->vram_vis_size = vram_vis.heap_size;
|
info->vram_vis_size = vram_vis.heap_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info->gart_size_kb = DIV_ROUND_UP(info->gart_size, 1024);
|
||||||
|
info->vram_size_kb = DIV_ROUND_UP(info->vram_size, 1024);
|
||||||
|
|
||||||
/* Add some margin of error, though this shouldn't be needed in theory. */
|
/* Add some margin of error, though this shouldn't be needed in theory. */
|
||||||
info->all_vram_visible = info->vram_size * 0.9 < info->vram_vis_size;
|
info->all_vram_visible = info->vram_size * 0.9 < info->vram_vis_size;
|
||||||
|
|
||||||
|
@@ -87,6 +87,8 @@ struct radeon_info {
|
|||||||
/* Memory info. */
|
/* Memory info. */
|
||||||
uint32_t pte_fragment_size;
|
uint32_t pte_fragment_size;
|
||||||
uint32_t gart_page_size;
|
uint32_t gart_page_size;
|
||||||
|
uint32_t gart_size_kb;
|
||||||
|
uint32_t vram_size_kb;
|
||||||
uint64_t gart_size;
|
uint64_t gart_size;
|
||||||
uint64_t vram_size;
|
uint64_t vram_size;
|
||||||
uint64_t vram_vis_size;
|
uint64_t vram_vis_size;
|
||||||
|
@@ -45,8 +45,8 @@ radeon_cs_memory_below_limit(struct r600_common_screen *screen,
|
|||||||
struct radeon_cmdbuf *cs,
|
struct radeon_cmdbuf *cs,
|
||||||
uint64_t vram, uint64_t gtt)
|
uint64_t vram, uint64_t gtt)
|
||||||
{
|
{
|
||||||
vram += cs->used_vram;
|
vram += (uint64_t)cs->used_vram_kb * 1024;
|
||||||
gtt += cs->used_gart;
|
gtt += (uint64_t)cs->used_gart_kb * 1024;
|
||||||
|
|
||||||
/* Anything that goes above the VRAM size should go to GTT. */
|
/* Anything that goes above the VRAM size should go to GTT. */
|
||||||
if (vram > screen->info.vram_size)
|
if (vram > screen->info.vram_size)
|
||||||
|
@@ -228,8 +228,8 @@ static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
|
|||||||
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
|
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
|
||||||
struct r600_resource *dst, struct r600_resource *src)
|
struct r600_resource *dst, struct r600_resource *src)
|
||||||
{
|
{
|
||||||
uint64_t vram = ctx->dma.cs.used_vram;
|
uint64_t vram = (uint64_t)ctx->dma.cs.used_vram_kb * 1024;
|
||||||
uint64_t gtt = ctx->dma.cs.used_gart;
|
uint64_t gtt = (uint64_t)ctx->dma.cs.used_gart_kb * 1024;
|
||||||
|
|
||||||
if (dst) {
|
if (dst) {
|
||||||
vram += dst->vram_usage;
|
vram += dst->vram_usage;
|
||||||
@@ -264,7 +264,7 @@ void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
|
|||||||
*/
|
*/
|
||||||
num_dw++; /* for emit_wait_idle below */
|
num_dw++; /* for emit_wait_idle below */
|
||||||
if (!ctx->ws->cs_check_space(&ctx->dma.cs, num_dw, false) ||
|
if (!ctx->ws->cs_check_space(&ctx->dma.cs, num_dw, false) ||
|
||||||
ctx->dma.cs.used_vram + ctx->dma.cs.used_gart > 64 * 1024 * 1024 ||
|
ctx->dma.cs.used_vram_kb + ctx->dma.cs.used_gart_kb > 64 * 1024 ||
|
||||||
!radeon_cs_memory_below_limit(ctx->screen, &ctx->dma.cs, vram, gtt)) {
|
!radeon_cs_memory_below_limit(ctx->screen, &ctx->dma.cs, vram, gtt)) {
|
||||||
ctx->dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
|
ctx->dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
|
||||||
assert((num_dw + ctx->dma.cs.current.cdw) <= ctx->dma.cs.current.max_dw);
|
assert((num_dw + ctx->dma.cs.current.cdw) <= ctx->dma.cs.current.max_dw);
|
||||||
|
@@ -203,8 +203,8 @@ struct radeon_cmdbuf {
|
|||||||
unsigned prev_dw; /* Total number of dwords in previous chunks. */
|
unsigned prev_dw; /* Total number of dwords in previous chunks. */
|
||||||
|
|
||||||
/* Memory usage of the buffer list. These are always 0 for preamble IBs. */
|
/* Memory usage of the buffer list. These are always 0 for preamble IBs. */
|
||||||
uint64_t used_vram;
|
uint32_t used_vram_kb;
|
||||||
uint64_t used_gart;
|
uint32_t used_gart_kb;
|
||||||
uint64_t gpu_address;
|
uint64_t gpu_address;
|
||||||
|
|
||||||
/* Private winsys data. */
|
/* Private winsys data. */
|
||||||
|
@@ -145,13 +145,13 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res,
|
|||||||
res->flags |= RADEON_FLAG_UNCACHED;
|
res->flags |= RADEON_FLAG_UNCACHED;
|
||||||
|
|
||||||
/* Set expected VRAM and GART usage for the buffer. */
|
/* Set expected VRAM and GART usage for the buffer. */
|
||||||
res->vram_usage = 0;
|
res->vram_usage_kb = 0;
|
||||||
res->gart_usage = 0;
|
res->gart_usage_kb = 0;
|
||||||
res->max_forced_staging_uploads = 0;
|
res->max_forced_staging_uploads = 0;
|
||||||
res->b.max_forced_staging_uploads = 0;
|
res->b.max_forced_staging_uploads = 0;
|
||||||
|
|
||||||
if (res->domains & RADEON_DOMAIN_VRAM) {
|
if (res->domains & RADEON_DOMAIN_VRAM) {
|
||||||
res->vram_usage = size;
|
res->vram_usage_kb = MAX2(1, size / 1024);
|
||||||
|
|
||||||
if (!sscreen->info.smart_access_memory) {
|
if (!sscreen->info.smart_access_memory) {
|
||||||
/* We don't want to evict buffers from VRAM by mapping them for CPU access,
|
/* We don't want to evict buffers from VRAM by mapping them for CPU access,
|
||||||
@@ -168,7 +168,7 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res,
|
|||||||
sscreen->info.has_dedicated_vram && size >= min_size ? max_staging_uploads : 0;
|
sscreen->info.has_dedicated_vram && size >= min_size ? max_staging_uploads : 0;
|
||||||
}
|
}
|
||||||
} else if (res->domains & RADEON_DOMAIN_GTT) {
|
} else if (res->domains & RADEON_DOMAIN_GTT) {
|
||||||
res->gart_usage = size;
|
res->gart_usage_kb = MAX2(1, size / 1024);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -278,8 +278,8 @@ void si_replace_buffer_storage(struct pipe_context *ctx, struct pipe_resource *d
|
|||||||
sdst->max_forced_staging_uploads = ssrc->max_forced_staging_uploads;
|
sdst->max_forced_staging_uploads = ssrc->max_forced_staging_uploads;
|
||||||
sdst->flags = ssrc->flags;
|
sdst->flags = ssrc->flags;
|
||||||
|
|
||||||
assert(sdst->vram_usage == ssrc->vram_usage);
|
assert(sdst->vram_usage_kb == ssrc->vram_usage_kb);
|
||||||
assert(sdst->gart_usage == ssrc->gart_usage);
|
assert(sdst->gart_usage_kb == ssrc->gart_usage_kb);
|
||||||
assert(sdst->bo_size == ssrc->bo_size);
|
assert(sdst->bo_size == ssrc->bo_size);
|
||||||
assert(sdst->bo_alignment == ssrc->bo_alignment);
|
assert(sdst->bo_alignment == ssrc->bo_alignment);
|
||||||
assert(sdst->domains == ssrc->domains);
|
assert(sdst->domains == ssrc->domains);
|
||||||
@@ -647,8 +647,8 @@ static struct pipe_resource *si_buffer_from_user_memory(struct pipe_screen *scre
|
|||||||
}
|
}
|
||||||
|
|
||||||
buf->gpu_address = ws->buffer_get_virtual_address(buf->buf);
|
buf->gpu_address = ws->buffer_get_virtual_address(buf->buf);
|
||||||
buf->vram_usage = 0;
|
buf->vram_usage_kb = 0;
|
||||||
buf->gart_usage = templ->width0;
|
buf->gart_usage_kb = templ->width0 / 1024;
|
||||||
|
|
||||||
return &buf->b.b;
|
return &buf->b.b;
|
||||||
}
|
}
|
||||||
@@ -671,9 +671,9 @@ struct pipe_resource *si_buffer_from_winsys_buffer(struct pipe_screen *screen,
|
|||||||
res->domains = sscreen->ws->buffer_get_initial_domain(res->buf);
|
res->domains = sscreen->ws->buffer_get_initial_domain(res->buf);
|
||||||
|
|
||||||
if (res->domains & RADEON_DOMAIN_VRAM)
|
if (res->domains & RADEON_DOMAIN_VRAM)
|
||||||
res->vram_usage = res->bo_size;
|
res->vram_usage_kb = MAX2(1, res->bo_size / 1024);
|
||||||
else if (res->domains & RADEON_DOMAIN_GTT)
|
else if (res->domains & RADEON_DOMAIN_GTT)
|
||||||
res->gart_usage = res->bo_size;
|
res->gart_usage_kb = MAX2(1, res->bo_size / 1024);
|
||||||
|
|
||||||
if (sscreen->ws->buffer_get_flags)
|
if (sscreen->ws->buffer_get_flags)
|
||||||
res->flags = sscreen->ws->buffer_get_flags(res->buf);
|
res->flags = sscreen->ws->buffer_get_flags(res->buf);
|
||||||
|
@@ -833,8 +833,8 @@ static void si_query_memory_info(struct pipe_screen *screen, struct pipe_memory_
|
|||||||
struct radeon_winsys *ws = sscreen->ws;
|
struct radeon_winsys *ws = sscreen->ws;
|
||||||
unsigned vram_usage, gtt_usage;
|
unsigned vram_usage, gtt_usage;
|
||||||
|
|
||||||
info->total_device_memory = sscreen->info.vram_size / 1024;
|
info->total_device_memory = sscreen->info.vram_size_kb;
|
||||||
info->total_staging_memory = sscreen->info.gart_size / 1024;
|
info->total_staging_memory = sscreen->info.gart_size_kb;
|
||||||
|
|
||||||
/* The real TTM memory usage is somewhat random, because:
|
/* The real TTM memory usage is somewhat random, because:
|
||||||
*
|
*
|
||||||
|
@@ -38,14 +38,14 @@ void si_need_gfx_cs_space(struct si_context *ctx, unsigned num_draws)
|
|||||||
* that have been added (cs_add_buffer) and two counters in the pipe
|
* that have been added (cs_add_buffer) and two counters in the pipe
|
||||||
* driver for those that haven't been added yet.
|
* driver for those that haven't been added yet.
|
||||||
*/
|
*/
|
||||||
if (unlikely(!radeon_cs_memory_below_limit(ctx->screen, &ctx->gfx_cs, ctx->vram, ctx->gtt))) {
|
if (unlikely(!radeon_cs_memory_below_limit(ctx->screen, &ctx->gfx_cs, ctx->vram_kb, ctx->gtt_kb))) {
|
||||||
ctx->gtt = 0;
|
ctx->gtt_kb = 0;
|
||||||
ctx->vram = 0;
|
ctx->vram_kb = 0;
|
||||||
si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
|
si_flush_gfx_cs(ctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ctx->gtt = 0;
|
ctx->gtt_kb = 0;
|
||||||
ctx->vram = 0;
|
ctx->vram_kb = 0;
|
||||||
|
|
||||||
unsigned need_dwords = si_get_minimum_num_gfx_cs_dwords(ctx, num_draws);
|
unsigned need_dwords = si_get_minimum_num_gfx_cs_dwords(ctx, num_draws);
|
||||||
if (!ctx->ws->cs_check_space(cs, need_dwords, false))
|
if (!ctx->ws->cs_check_space(cs, need_dwords, false))
|
||||||
|
@@ -284,8 +284,8 @@ struct si_resource {
|
|||||||
struct pb_buffer *buf;
|
struct pb_buffer *buf;
|
||||||
uint64_t gpu_address;
|
uint64_t gpu_address;
|
||||||
/* Memory usage if the buffer placement is optimal. */
|
/* Memory usage if the buffer placement is optimal. */
|
||||||
uint64_t vram_usage;
|
uint32_t vram_usage_kb;
|
||||||
uint64_t gart_usage;
|
uint32_t gart_usage_kb;
|
||||||
|
|
||||||
/* Resource properties. */
|
/* Resource properties. */
|
||||||
uint64_t bo_size;
|
uint64_t bo_size;
|
||||||
@@ -977,8 +977,8 @@ struct si_context {
|
|||||||
unsigned last_num_draw_calls;
|
unsigned last_num_draw_calls;
|
||||||
unsigned flags; /* flush flags */
|
unsigned flags; /* flush flags */
|
||||||
/* Current unaccounted memory usage. */
|
/* Current unaccounted memory usage. */
|
||||||
uint64_t vram;
|
uint32_t vram_kb;
|
||||||
uint64_t gtt;
|
uint32_t gtt_kb;
|
||||||
|
|
||||||
/* Compute-based primitive discard. */
|
/* Compute-based primitive discard. */
|
||||||
unsigned prim_discard_vertex_count_threshold;
|
unsigned prim_discard_vertex_count_threshold;
|
||||||
@@ -1635,8 +1635,8 @@ static inline void si_context_add_resource_size(struct si_context *sctx, struct
|
|||||||
{
|
{
|
||||||
if (r) {
|
if (r) {
|
||||||
/* Add memory usage for need_gfx_cs_space */
|
/* Add memory usage for need_gfx_cs_space */
|
||||||
sctx->vram += si_resource(r)->vram_usage;
|
sctx->vram_kb += si_resource(r)->vram_usage_kb;
|
||||||
sctx->gtt += si_resource(r)->gart_usage;
|
sctx->gtt_kb += si_resource(r)->gart_usage_kb;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1864,17 +1864,17 @@ static inline bool util_rast_prim_is_triangles(unsigned prim)
|
|||||||
* \param gtt GTT memory size not added to the buffer list yet
|
* \param gtt GTT memory size not added to the buffer list yet
|
||||||
*/
|
*/
|
||||||
static inline bool radeon_cs_memory_below_limit(struct si_screen *screen, struct radeon_cmdbuf *cs,
|
static inline bool radeon_cs_memory_below_limit(struct si_screen *screen, struct radeon_cmdbuf *cs,
|
||||||
uint64_t vram, uint64_t gtt)
|
uint32_t vram_kb, uint32_t gtt_kb)
|
||||||
{
|
{
|
||||||
vram += cs->used_vram;
|
vram_kb += cs->used_vram_kb;
|
||||||
gtt += cs->used_gart;
|
gtt_kb += cs->used_gart_kb;
|
||||||
|
|
||||||
/* Anything that goes above the VRAM size should go to GTT. */
|
/* Anything that goes above the VRAM size should go to GTT. */
|
||||||
if (vram > screen->info.vram_size)
|
if (vram_kb > screen->info.vram_size_kb)
|
||||||
gtt += vram - screen->info.vram_size;
|
gtt_kb += vram_kb - screen->info.vram_size_kb;
|
||||||
|
|
||||||
/* Now we just need to check if we have enough GTT. */
|
/* Now we just need to check if we have enough GTT (the limit is 75% of max). */
|
||||||
return gtt < screen->info.gart_size * 0.7;
|
return gtt_kb < screen->info.gart_size_kb / 4 * 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -1918,8 +1918,8 @@ static inline void radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sc
|
|||||||
bool check_mem)
|
bool check_mem)
|
||||||
{
|
{
|
||||||
if (check_mem &&
|
if (check_mem &&
|
||||||
!radeon_cs_memory_below_limit(sctx->screen, &sctx->gfx_cs, sctx->vram + bo->vram_usage,
|
!radeon_cs_memory_below_limit(sctx->screen, &sctx->gfx_cs, sctx->vram_kb + bo->vram_usage_kb,
|
||||||
sctx->gtt + bo->gart_usage))
|
sctx->gtt_kb + bo->gart_usage_kb))
|
||||||
si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
|
si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
|
||||||
|
|
||||||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, bo, usage, priority);
|
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, bo, usage, priority);
|
||||||
|
@@ -432,8 +432,8 @@ static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_tex
|
|||||||
tex->buffer.b.b.bind = templ.bind;
|
tex->buffer.b.b.bind = templ.bind;
|
||||||
pb_reference(&tex->buffer.buf, new_tex->buffer.buf);
|
pb_reference(&tex->buffer.buf, new_tex->buffer.buf);
|
||||||
tex->buffer.gpu_address = new_tex->buffer.gpu_address;
|
tex->buffer.gpu_address = new_tex->buffer.gpu_address;
|
||||||
tex->buffer.vram_usage = new_tex->buffer.vram_usage;
|
tex->buffer.vram_usage_kb = new_tex->buffer.vram_usage_kb;
|
||||||
tex->buffer.gart_usage = new_tex->buffer.gart_usage;
|
tex->buffer.gart_usage_kb = new_tex->buffer.gart_usage_kb;
|
||||||
tex->buffer.bo_size = new_tex->buffer.bo_size;
|
tex->buffer.bo_size = new_tex->buffer.bo_size;
|
||||||
tex->buffer.bo_alignment = new_tex->buffer.bo_alignment;
|
tex->buffer.bo_alignment = new_tex->buffer.bo_alignment;
|
||||||
tex->buffer.domains = new_tex->buffer.domains;
|
tex->buffer.domains = new_tex->buffer.domains;
|
||||||
@@ -974,8 +974,8 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
|
|||||||
resource->bo_alignment = plane0->buffer.bo_alignment;
|
resource->bo_alignment = plane0->buffer.bo_alignment;
|
||||||
resource->flags = plane0->buffer.flags;
|
resource->flags = plane0->buffer.flags;
|
||||||
resource->domains = plane0->buffer.domains;
|
resource->domains = plane0->buffer.domains;
|
||||||
resource->vram_usage = plane0->buffer.vram_usage;
|
resource->vram_usage_kb = plane0->buffer.vram_usage_kb;
|
||||||
resource->gart_usage = plane0->buffer.gart_usage;
|
resource->gart_usage_kb = plane0->buffer.gart_usage_kb;
|
||||||
|
|
||||||
pb_reference(&resource->buf, plane0->buffer.buf);
|
pb_reference(&resource->buf, plane0->buffer.buf);
|
||||||
resource->gpu_address = plane0->buffer.gpu_address;
|
resource->gpu_address = plane0->buffer.gpu_address;
|
||||||
@@ -992,9 +992,9 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
|
|||||||
resource->bo_alignment = imported_buf->alignment;
|
resource->bo_alignment = imported_buf->alignment;
|
||||||
resource->domains = sscreen->ws->buffer_get_initial_domain(resource->buf);
|
resource->domains = sscreen->ws->buffer_get_initial_domain(resource->buf);
|
||||||
if (resource->domains & RADEON_DOMAIN_VRAM)
|
if (resource->domains & RADEON_DOMAIN_VRAM)
|
||||||
resource->vram_usage = resource->bo_size;
|
resource->vram_usage_kb = MAX2(1, resource->bo_size / 1024);
|
||||||
else if (resource->domains & RADEON_DOMAIN_GTT)
|
else if (resource->domains & RADEON_DOMAIN_GTT)
|
||||||
resource->gart_usage = resource->bo_size;
|
resource->gart_usage_kb = MAX2(1, resource->bo_size / 1024);
|
||||||
if (sscreen->ws->buffer_get_flags)
|
if (sscreen->ws->buffer_get_flags)
|
||||||
resource->flags = sscreen->ws->buffer_get_flags(resource->buf);
|
resource->flags = sscreen->ws->buffer_get_flags(resource->buf);
|
||||||
}
|
}
|
||||||
|
@@ -505,9 +505,9 @@ amdgpu_lookup_or_add_real_buffer(struct radeon_cmdbuf *rcs, struct amdgpu_cs *ac
|
|||||||
cs->buffer_indices_hashlist[hash] = idx;
|
cs->buffer_indices_hashlist[hash] = idx;
|
||||||
|
|
||||||
if (bo->base.placement & RADEON_DOMAIN_VRAM)
|
if (bo->base.placement & RADEON_DOMAIN_VRAM)
|
||||||
rcs->used_vram += bo->base.size;
|
rcs->used_vram_kb += bo->base.size / 1024;
|
||||||
else if (bo->base.placement & RADEON_DOMAIN_GTT)
|
else if (bo->base.placement & RADEON_DOMAIN_GTT)
|
||||||
rcs->used_gart += bo->base.size;
|
rcs->used_gart_kb += bo->base.size / 1024;
|
||||||
|
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
@@ -610,9 +610,9 @@ static int amdgpu_lookup_or_add_sparse_buffer(struct radeon_cmdbuf *rcs,
|
|||||||
|
|
||||||
list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {
|
list_for_each_entry(struct amdgpu_sparse_backing, backing, &bo->u.sparse.backing, list) {
|
||||||
if (bo->base.placement & RADEON_DOMAIN_VRAM)
|
if (bo->base.placement & RADEON_DOMAIN_VRAM)
|
||||||
rcs->used_vram += backing->bo->base.size;
|
rcs->used_vram_kb += backing->bo->base.size / 1024;
|
||||||
else if (bo->base.placement & RADEON_DOMAIN_GTT)
|
else if (bo->base.placement & RADEON_DOMAIN_GTT)
|
||||||
rcs->used_gart += backing->bo->base.size;
|
rcs->used_gart_kb += backing->bo->base.size / 1024;
|
||||||
}
|
}
|
||||||
|
|
||||||
simple_mtx_unlock(&bo->lock);
|
simple_mtx_unlock(&bo->lock);
|
||||||
@@ -1878,8 +1878,8 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
|
|||||||
RADEON_PRIO_IB1);
|
RADEON_PRIO_IB1);
|
||||||
}
|
}
|
||||||
|
|
||||||
rcs->used_gart = 0;
|
rcs->used_gart_kb = 0;
|
||||||
rcs->used_vram = 0;
|
rcs->used_vram_kb = 0;
|
||||||
|
|
||||||
if (cs->ring_type == RING_GFX)
|
if (cs->ring_type == RING_GFX)
|
||||||
ws->num_gfx_IBs++;
|
ws->num_gfx_IBs++;
|
||||||
|
@@ -391,9 +391,9 @@ static unsigned radeon_drm_cs_add_buffer(struct radeon_cmdbuf *rcs,
|
|||||||
cs->csc->relocs_bo[index].u.real.priority_usage |= 1u << priority;
|
cs->csc->relocs_bo[index].u.real.priority_usage |= 1u << priority;
|
||||||
|
|
||||||
if (added_domains & RADEON_DOMAIN_VRAM)
|
if (added_domains & RADEON_DOMAIN_VRAM)
|
||||||
rcs->used_vram += bo->base.size;
|
rcs->used_vram_kb += bo->base.size / 1024;
|
||||||
else if (added_domains & RADEON_DOMAIN_GTT)
|
else if (added_domains & RADEON_DOMAIN_GTT)
|
||||||
rcs->used_gart += bo->base.size;
|
rcs->used_gart_kb += bo->base.size / 1024;
|
||||||
|
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
@@ -410,8 +410,8 @@ static bool radeon_drm_cs_validate(struct radeon_cmdbuf *rcs)
|
|||||||
{
|
{
|
||||||
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
|
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
|
||||||
bool status =
|
bool status =
|
||||||
rcs->used_gart < cs->ws->info.gart_size * 0.8 &&
|
rcs->used_gart_kb < cs->ws->info.gart_size_kb * 0.8 &&
|
||||||
rcs->used_vram < cs->ws->info.vram_size * 0.8;
|
rcs->used_vram_kb < cs->ws->info.vram_size_kb * 0.8;
|
||||||
|
|
||||||
if (status) {
|
if (status) {
|
||||||
cs->csc->num_validated_relocs = cs->csc->num_relocs;
|
cs->csc->num_validated_relocs = cs->csc->num_relocs;
|
||||||
@@ -433,8 +433,8 @@ static bool radeon_drm_cs_validate(struct radeon_cmdbuf *rcs)
|
|||||||
RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
|
RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
|
||||||
} else {
|
} else {
|
||||||
radeon_cs_context_cleanup(cs->csc);
|
radeon_cs_context_cleanup(cs->csc);
|
||||||
rcs->used_vram = 0;
|
rcs->used_vram_kb = 0;
|
||||||
rcs->used_gart = 0;
|
rcs->used_gart_kb = 0;
|
||||||
|
|
||||||
assert(rcs->current.cdw == 0);
|
assert(rcs->current.cdw == 0);
|
||||||
if (rcs->current.cdw != 0) {
|
if (rcs->current.cdw != 0) {
|
||||||
@@ -712,8 +712,8 @@ static int radeon_drm_cs_flush(struct radeon_cmdbuf *rcs,
|
|||||||
/* Prepare a new CS. */
|
/* Prepare a new CS. */
|
||||||
rcs->current.buf = cs->csc->buf;
|
rcs->current.buf = cs->csc->buf;
|
||||||
rcs->current.cdw = 0;
|
rcs->current.cdw = 0;
|
||||||
rcs->used_vram = 0;
|
rcs->used_vram_kb = 0;
|
||||||
rcs->used_gart = 0;
|
rcs->used_gart_kb = 0;
|
||||||
|
|
||||||
if (cs->ring_type == RING_GFX)
|
if (cs->ring_type == RING_GFX)
|
||||||
cs->ws->num_gfx_IBs++;
|
cs->ws->num_gfx_IBs++;
|
||||||
|
@@ -368,6 +368,9 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
|||||||
if (ws->info.drm_minor < 49)
|
if (ws->info.drm_minor < 49)
|
||||||
ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024);
|
ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024);
|
||||||
|
|
||||||
|
ws->info.gart_size_kb = DIV_ROUND_UP(ws->info.gart_size, 1024);
|
||||||
|
ws->info.vram_size_kb = DIV_ROUND_UP(ws->info.vram_size, 1024);
|
||||||
|
|
||||||
/* Radeon allocates all buffers contiguously, which makes large allocations
|
/* Radeon allocates all buffers contiguously, which makes large allocations
|
||||||
* unlikely to succeed. */
|
* unlikely to succeed. */
|
||||||
if (ws->info.has_dedicated_vram)
|
if (ws->info.has_dedicated_vram)
|
||||||
|
Reference in New Issue
Block a user