gallium/u_threaded: flush batch when hitting mapping limit
tc_transfer_map maps buffers directly, but the unmap operation is executed in the driver thread. When an application does a lot of map/unmap operations, without flushing, this increase the RAM used (and eventually get the app killed by the oom-killer). This commit allows tc to keep track of how many bytes were mapped during the current batch. When this estimation becomes higher than a threshold, we flush the batch. See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2735 Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4508>
This commit is contained in:

committed by
Marge Bot

parent
35b3963928
commit
15cf7d170b
@@ -108,6 +108,7 @@ tc_batch_flush(struct threaded_context *tc)
|
||||
tc_assert(next->num_total_call_slots != 0);
|
||||
tc_batch_check(next);
|
||||
tc_debug_check(tc);
|
||||
tc->bytes_mapped_estimate = 0;
|
||||
p_atomic_add(&tc->num_offloaded_slots, next->num_total_call_slots);
|
||||
|
||||
if (next->token) {
|
||||
@@ -204,6 +205,7 @@ _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char
|
||||
/* .. and execute unflushed calls directly. */
|
||||
if (next->num_total_call_slots) {
|
||||
p_atomic_add(&tc->num_direct_slots, next->num_total_call_slots);
|
||||
tc->bytes_mapped_estimate = 0;
|
||||
tc_batch_execute(next, 0);
|
||||
synced = true;
|
||||
}
|
||||
@@ -1489,6 +1491,8 @@ tc_transfer_map(struct pipe_context *_pipe,
|
||||
usage & PIPE_TRANSFER_DISCARD_RANGE ? " discard_range" :
|
||||
usage & PIPE_TRANSFER_READ ? " read" : " ??");
|
||||
|
||||
tc->bytes_mapped_estimate += box->width;
|
||||
|
||||
return pipe->transfer_map(pipe, tres->latest ? tres->latest : resource,
|
||||
level, usage, box, transfer);
|
||||
}
|
||||
@@ -1584,6 +1588,10 @@ tc_call_transfer_unmap(struct pipe_context *pipe, union tc_payload *payload)
|
||||
pipe->transfer_unmap(pipe, payload->transfer);
|
||||
}
|
||||
|
||||
static void
|
||||
tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
|
||||
unsigned flags);
|
||||
|
||||
static void
|
||||
tc_transfer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
|
||||
{
|
||||
@@ -1606,6 +1614,16 @@ tc_transfer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
|
||||
}
|
||||
|
||||
tc_add_small_call(tc, TC_CALL_transfer_unmap)->transfer = transfer;
|
||||
|
||||
/* tc_transfer_map directly maps the buffers, but tc_transfer_unmap
|
||||
* defers the unmap operation to the batch execution.
|
||||
* bytes_mapped_estimate is an estimation of the map/unmap bytes delta
|
||||
* and if it goes over an optional limit the current batch is flushed,
|
||||
* to reclaim some RAM. */
|
||||
if (!ttrans->staging && tc->bytes_mapped_limit &&
|
||||
tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
|
||||
tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
|
||||
}
|
||||
}
|
||||
|
||||
struct tc_buffer_subdata {
|
||||
|
@@ -361,6 +361,12 @@ struct threaded_context {
|
||||
unsigned num_direct_slots;
|
||||
unsigned num_syncs;
|
||||
|
||||
/* Estimation of how much vram/gtt bytes are mmap'd in
|
||||
* the current tc_batch.
|
||||
*/
|
||||
uint64_t bytes_mapped_estimate;
|
||||
uint64_t bytes_mapped_limit;
|
||||
|
||||
struct util_queue queue;
|
||||
struct util_queue_fence *fence;
|
||||
|
||||
|
Reference in New Issue
Block a user