From cd0ef9b3f420bc0ee70a16f720786b174e78bb78 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Fri, 18 Feb 2022 10:28:58 +0100 Subject: [PATCH] gallium/u_threaded: late alloc cpu_storage Instead of allocating cpu_storage in threaded_resource_init, defer the allocation to first use (in tc_buffer_map). This avoids needless memory allocation if tc_buffer_disable_cpu_storage is called before tc_buffer_map. map_buffer_alignment is stored and serves as a "can cpu_storage be used" flag. Part-of: --- src/gallium/auxiliary/driver_noop/noop_pipe.c | 2 +- .../auxiliary/util/u_threaded_context.c | 42 +++++++++++-------- .../auxiliary/util/u_threaded_context.h | 7 ++-- src/gallium/drivers/crocus/crocus_resource.c | 2 +- src/gallium/drivers/d3d12/d3d12_resource.cpp | 6 +-- .../drivers/freedreno/freedreno_resource.c | 2 +- src/gallium/drivers/iris/iris_resource.c | 2 +- src/gallium/drivers/r600/r600_buffer_common.c | 2 +- src/gallium/drivers/radeonsi/si_buffer.c | 2 +- src/gallium/drivers/zink/zink_resource.c | 2 +- 10 files changed, 39 insertions(+), 30 deletions(-) diff --git a/src/gallium/auxiliary/driver_noop/noop_pipe.c b/src/gallium/auxiliary/driver_noop/noop_pipe.c index a1aca264de3..d35d3e83ac0 100644 --- a/src/gallium/auxiliary/driver_noop/noop_pipe.c +++ b/src/gallium/auxiliary/driver_noop/noop_pipe.c @@ -120,7 +120,7 @@ static struct pipe_resource *noop_resource_create(struct pipe_screen *screen, FREE(nresource); return NULL; } - threaded_resource_init(&nresource->b.b, false, 0); + threaded_resource_init(&nresource->b.b, false); return &nresource->b.b; } diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c index 8522d7e84e2..ee4dba93537 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.c +++ b/src/gallium/auxiliary/util/u_threaded_context.c @@ -671,8 +671,7 @@ tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf, * allow_cpu_storage should be false for user memory and imported buffers. */ void -threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage, - unsigned map_buffer_alignment) +threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage) { struct threaded_resource *tres = threaded_resource(res); @@ -692,7 +691,9 @@ threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage, /* We need buffer invalidation and buffer busyness tracking for the CPU * storage, which aren't supported with pipe_vertex_state. */ !(res->bind & PIPE_BIND_VERTEX_STATE)) - tres->cpu_storage = align_malloc(res->width0, map_buffer_alignment); + tres->allow_cpu_storage = true; + else + tres->allow_cpu_storage = false; } void @@ -2173,24 +2174,31 @@ tc_buffer_map(struct pipe_context *_pipe, usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width); /* If the CPU storage is enabled, return it directly. */ - if (tres->cpu_storage && !(usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) { + if (tres->allow_cpu_storage && !(usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) { /* We can't let resource_copy_region disable the CPU storage. */ assert(!(tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY)); - struct threaded_transfer *ttrans = slab_alloc(&tc->pool_transfers); - ttrans->b.resource = resource; - ttrans->b.level = 0; - ttrans->b.usage = usage; - ttrans->b.box = *box; - ttrans->b.stride = 0; - ttrans->b.layer_stride = 0; - ttrans->b.offset = 0; - ttrans->staging = NULL; - ttrans->valid_buffer_range = &tres->valid_buffer_range; - ttrans->cpu_storage_mapped = true; - *transfer = &ttrans->b; + if (!tres->cpu_storage) + tres->cpu_storage = align_malloc(resource->width0, tc->map_buffer_alignment); - return (uint8_t*)tres->cpu_storage + box->x; + if (tres->cpu_storage) { + struct threaded_transfer *ttrans = slab_alloc(&tc->pool_transfers); + ttrans->b.resource = resource; + ttrans->b.level = 0; + ttrans->b.usage = usage; + ttrans->b.box = *box; + ttrans->b.stride = 0; + ttrans->b.layer_stride = 0; + ttrans->b.offset = 0; + ttrans->staging = NULL; + ttrans->valid_buffer_range = &tres->valid_buffer_range; + ttrans->cpu_storage_mapped = true; + *transfer = &ttrans->b; + + return (uint8_t*)tres->cpu_storage + box->x; + } else { + tres->allow_cpu_storage = false; + } } /* Do a staging transfer within the threaded context. The driver should diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h index 20bc533503f..3b7ea7a2677 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.h +++ b/src/gallium/auxiliary/util/u_threaded_context.h @@ -344,8 +344,9 @@ struct threaded_resource { /* Drivers are required to update this for shared resources and user * pointers. */ - bool is_shared; + bool is_shared; bool is_user_ptr; + bool allow_cpu_storage; /* Unique buffer ID. Drivers must set it to non-zero for buffers and it must * be unique. Textures must set 0. Low bits are used as a hash of the ID. @@ -521,8 +522,7 @@ struct threaded_context { struct tc_buffer_list buffer_lists[TC_MAX_BUFFER_LISTS]; }; -void threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage, - unsigned map_buffer_alignment); +void threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage); void threaded_resource_deinit(struct pipe_resource *res); struct pipe_context *threaded_context_unwrap_sync(struct pipe_context *pipe); void tc_driver_internal_flush_notify(struct threaded_context *tc); @@ -613,6 +613,7 @@ tc_buffer_disable_cpu_storage(struct pipe_resource *buf) align_free(tres->cpu_storage); tres->cpu_storage = NULL; } + tres->allow_cpu_storage = false; } static inline void diff --git a/src/gallium/drivers/crocus/crocus_resource.c b/src/gallium/drivers/crocus/crocus_resource.c index 3ee44e7713a..38bfcc3660e 100644 --- a/src/gallium/drivers/crocus/crocus_resource.c +++ b/src/gallium/drivers/crocus/crocus_resource.c @@ -365,7 +365,7 @@ crocus_alloc_resource(struct pipe_screen *pscreen, res->base.b.screen = pscreen; res->orig_screen = crocus_pscreen_ref(pscreen); pipe_reference_init(&res->base.b.reference, 1); - threaded_resource_init(&res->base.b, false, 0); + threaded_resource_init(&res->base.b, false); if (templ->target == PIPE_BUFFER) util_range_init(&res->valid_buffer_range); diff --git a/src/gallium/drivers/d3d12/d3d12_resource.cpp b/src/gallium/drivers/d3d12/d3d12_resource.cpp index b0a75760f7d..ad1af0ad170 100644 --- a/src/gallium/drivers/d3d12/d3d12_resource.cpp +++ b/src/gallium/drivers/d3d12/d3d12_resource.cpp @@ -297,7 +297,7 @@ convert_planar_resource(struct d3d12_resource *res) *plane_res = *res; d3d12_bo_reference(plane_res->bo); pipe_reference_init(&plane_res->base.b.reference, 1); - threaded_resource_init(&plane_res->base.b, false, 0); + threaded_resource_init(&plane_res->base.b, false); } plane_res->base.b.next = next; @@ -357,7 +357,7 @@ d3d12_resource_create(struct pipe_screen *pscreen, init_valid_range(res); threaded_resource_init(&res->base.b, templ->usage == PIPE_USAGE_DEFAULT && - templ->target == PIPE_BUFFER, 64); + templ->target == PIPE_BUFFER); memset(&res->bind_counts, 0, sizeof(d3d12_resource::bind_counts)); @@ -556,7 +556,7 @@ d3d12_resource_from_handle(struct pipe_screen *pscreen, } init_valid_range(res); - threaded_resource_init(&res->base.b, false, 0); + threaded_resource_init(&res->base.b, false); convert_planar_resource(res); return &res->base.b; diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index 458e168e8b4..e1ba6b21c89 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -1109,7 +1109,7 @@ alloc_resource_struct(struct pipe_screen *pscreen, pipe_reference_init(&rsc->track->reference, 1); - threaded_resource_init(prsc, false, 0); + threaded_resource_init(prsc, false); if (tmpl->target == PIPE_BUFFER) rsc->b.buffer_id_unique = util_idalloc_mt_alloc(&screen->buffer_ids); diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c index 84e28743b68..49c36041e00 100644 --- a/src/gallium/drivers/iris/iris_resource.c +++ b/src/gallium/drivers/iris/iris_resource.c @@ -479,7 +479,7 @@ iris_alloc_resource(struct pipe_screen *pscreen, res->base.b.screen = pscreen; res->orig_screen = iris_pscreen_ref(pscreen); pipe_reference_init(&res->base.b.reference, 1); - threaded_resource_init(&res->base.b, false, 0); + threaded_resource_init(&res->base.b, false); if (templ->target == PIPE_BUFFER) util_range_init(&res->valid_buffer_range); diff --git a/src/gallium/drivers/r600/r600_buffer_common.c b/src/gallium/drivers/r600/r600_buffer_common.c index eda8554b8d5..e00bb0e0edb 100644 --- a/src/gallium/drivers/r600/r600_buffer_common.c +++ b/src/gallium/drivers/r600/r600_buffer_common.c @@ -584,7 +584,7 @@ r600_alloc_buffer_struct(struct pipe_screen *screen, pipe_reference_init(&rbuffer->b.b.reference, 1); rbuffer->b.b.screen = screen; - threaded_resource_init(&rbuffer->b.b, false, 0); + threaded_resource_init(&rbuffer->b.b, false); rbuffer->buf = NULL; rbuffer->bind_history = 0; diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c index fe25ee76bb9..fbbee4e13dd 100644 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -571,7 +571,7 @@ static struct si_resource *si_alloc_buffer_struct(struct pipe_screen *screen, pipe_reference_init(&buf->b.b.reference, 1); buf->b.b.screen = screen; - threaded_resource_init(&buf->b.b, allow_cpu_storage, SI_MAP_BUFFER_ALIGNMENT); + threaded_resource_init(&buf->b.b, allow_cpu_storage); buf->buf = NULL; buf->bind_history = 0; diff --git a/src/gallium/drivers/zink/zink_resource.c b/src/gallium/drivers/zink/zink_resource.c index a61d9fda2f0..2c39ef14e15 100644 --- a/src/gallium/drivers/zink/zink_resource.c +++ b/src/gallium/drivers/zink/zink_resource.c @@ -904,7 +904,7 @@ resource_create(struct pipe_screen *pscreen, res->base.b = *templ; - threaded_resource_init(&res->base.b, false, 0); + threaded_resource_init(&res->base.b, false); pipe_reference_init(&res->base.b.reference, 1); res->base.b.screen = pscreen;