st/mesa: throttle texture uploads if their memory usage goes beyond a limit
This prevents radeonsi from running out of memory. It also increases texture upload performance by being nice to the kernel memory manager.
This commit is contained in:
@@ -221,3 +221,123 @@ util_wait_for_idle(struct pipe_context *ctx)
|
|||||||
ctx->flush(ctx, &fence, 0);
|
ctx->flush(ctx, &fence, 0);
|
||||||
ctx->screen->fence_finish(ctx->screen, NULL, fence, PIPE_TIMEOUT_INFINITE);
|
ctx->screen->fence_finish(ctx->screen, NULL, fence, PIPE_TIMEOUT_INFINITE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage)
|
||||||
|
{
|
||||||
|
t->max_mem_usage = max_mem_usage;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t)
|
||||||
|
{
|
||||||
|
for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
|
||||||
|
screen->fence_reference(screen, &t->ring[i].fence, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t
|
||||||
|
util_get_throttle_total_memory_usage(struct util_throttle *t)
|
||||||
|
{
|
||||||
|
uint64_t total_usage = 0;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
|
||||||
|
total_usage += t->ring[i].mem_usage;
|
||||||
|
return total_usage;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void util_dump_throttle_ring(struct util_throttle *t)
|
||||||
|
{
|
||||||
|
printf("Throttle:\n");
|
||||||
|
for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) {
|
||||||
|
printf(" ring[%u]: fence = %s, mem_usage = %"PRIu64"%s%s\n",
|
||||||
|
i, t->ring[i].fence ? "yes" : " no",
|
||||||
|
t->ring[i].mem_usage,
|
||||||
|
t->flush_index == i ? " [flush]" : "",
|
||||||
|
t->wait_index == i ? " [wait]" : "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Notify util_throttle that the next operation allocates memory.
|
||||||
|
* util_throttle tracks memory usage and waits for fences until its tracked
|
||||||
|
* memory usage decreases.
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
* util_throttle_memory_usage(..., w*h*d*Bpp);
|
||||||
|
* TexSubImage(..., w, h, d, ...);
|
||||||
|
*
|
||||||
|
* This means that TexSubImage can't allocate more memory its maximum limit
|
||||||
|
* set during initialization.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
util_throttle_memory_usage(struct pipe_context *pipe,
|
||||||
|
struct util_throttle *t, uint64_t memory_size)
|
||||||
|
{
|
||||||
|
(void)util_dump_throttle_ring; /* silence warning */
|
||||||
|
|
||||||
|
if (!t->max_mem_usage)
|
||||||
|
return;
|
||||||
|
|
||||||
|
struct pipe_screen *screen = pipe->screen;
|
||||||
|
struct pipe_fence_handle **fence = NULL;
|
||||||
|
unsigned ring_size = ARRAY_SIZE(t->ring);
|
||||||
|
uint64_t total = util_get_throttle_total_memory_usage(t);
|
||||||
|
|
||||||
|
/* If there is not enough memory, walk the list of fences and find
|
||||||
|
* the latest one that we need to wait for.
|
||||||
|
*/
|
||||||
|
while (t->wait_index != t->flush_index &&
|
||||||
|
total && total + memory_size > t->max_mem_usage) {
|
||||||
|
assert(t->ring[t->wait_index].fence);
|
||||||
|
|
||||||
|
/* Release an older fence if we need to wait for a newer one. */
|
||||||
|
if (fence)
|
||||||
|
screen->fence_reference(screen, fence, NULL);
|
||||||
|
|
||||||
|
fence = &t->ring[t->wait_index].fence;
|
||||||
|
t->ring[t->wait_index].mem_usage = 0;
|
||||||
|
t->wait_index = (t->wait_index + 1) % ring_size;
|
||||||
|
|
||||||
|
total = util_get_throttle_total_memory_usage(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for the fence to decrease memory usage. */
|
||||||
|
if (fence) {
|
||||||
|
screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
|
||||||
|
screen->fence_reference(screen, fence, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Flush and get a fence if we've exhausted memory usage for the current
|
||||||
|
* slot.
|
||||||
|
*/
|
||||||
|
if (t->ring[t->flush_index].mem_usage &&
|
||||||
|
t->ring[t->flush_index].mem_usage + memory_size >
|
||||||
|
t->max_mem_usage / (ring_size / 2)) {
|
||||||
|
struct pipe_fence_handle **fence =
|
||||||
|
&t->ring[t->flush_index].fence;
|
||||||
|
|
||||||
|
/* Expect that the current flush slot doesn't have a fence yet. */
|
||||||
|
assert(!*fence);
|
||||||
|
|
||||||
|
pipe->flush(pipe, fence, PIPE_FLUSH_ASYNC);
|
||||||
|
t->flush_index = (t->flush_index + 1) % ring_size;
|
||||||
|
|
||||||
|
/* Vacate the next slot if it's occupied. This should be rare. */
|
||||||
|
if (t->flush_index == t->wait_index) {
|
||||||
|
struct pipe_fence_handle **fence =
|
||||||
|
&t->ring[t->wait_index].fence;
|
||||||
|
|
||||||
|
t->ring[t->wait_index].mem_usage = 0;
|
||||||
|
t->wait_index = (t->wait_index + 1) % ring_size;
|
||||||
|
|
||||||
|
assert(*fence);
|
||||||
|
screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
|
||||||
|
screen->fence_reference(screen, fence, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(!t->ring[t->flush_index].mem_usage);
|
||||||
|
assert(!t->ring[t->flush_index].fence);
|
||||||
|
}
|
||||||
|
|
||||||
|
t->ring[t->flush_index].mem_usage += memory_size;
|
||||||
|
}
|
||||||
|
@@ -64,6 +64,23 @@ util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q,
|
|||||||
void
|
void
|
||||||
util_wait_for_idle(struct pipe_context *ctx);
|
util_wait_for_idle(struct pipe_context *ctx);
|
||||||
|
|
||||||
|
/* A utility for throttling execution based on memory usage. */
|
||||||
|
struct util_throttle {
|
||||||
|
struct {
|
||||||
|
struct pipe_fence_handle *fence;
|
||||||
|
uint64_t mem_usage;
|
||||||
|
} ring[10];
|
||||||
|
|
||||||
|
unsigned flush_index;
|
||||||
|
unsigned wait_index;
|
||||||
|
uint64_t max_mem_usage;
|
||||||
|
};
|
||||||
|
|
||||||
|
void util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage);
|
||||||
|
void util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t);
|
||||||
|
void util_throttle_memory_usage(struct pipe_context *pipe,
|
||||||
|
struct util_throttle *t, uint64_t memory_size);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@@ -1392,6 +1392,7 @@ try_pbo_upload(struct gl_context *ctx, GLuint dims,
|
|||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
st_TexSubImage(struct gl_context *ctx, GLuint dims,
|
st_TexSubImage(struct gl_context *ctx, GLuint dims,
|
||||||
struct gl_texture_image *texImage,
|
struct gl_texture_image *texImage,
|
||||||
@@ -1417,6 +1418,7 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
|
|||||||
GLubyte *map;
|
GLubyte *map;
|
||||||
unsigned dstz = texImage->Face + texImage->TexObject->MinLayer;
|
unsigned dstz = texImage->Face + texImage->TexObject->MinLayer;
|
||||||
unsigned dst_level = 0;
|
unsigned dst_level = 0;
|
||||||
|
bool throttled = false;
|
||||||
|
|
||||||
st_flush_bitmap_cache(st);
|
st_flush_bitmap_cache(st);
|
||||||
st_invalidate_readpix_cache(st);
|
st_invalidate_readpix_cache(st);
|
||||||
@@ -1456,6 +1458,10 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
|
|||||||
layer_stride = stride;
|
layer_stride = stride;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
util_throttle_memory_usage(pipe, &st->throttle,
|
||||||
|
width * height * depth *
|
||||||
|
util_format_get_blocksize(dst->format));
|
||||||
|
|
||||||
u_box_3d(xoffset, yoffset, zoffset + dstz, width, height, depth, &box);
|
u_box_3d(xoffset, yoffset, zoffset + dstz, width, height, depth, &box);
|
||||||
pipe->texture_subdata(pipe, dst, dst_level, 0,
|
pipe->texture_subdata(pipe, dst, dst_level, 0,
|
||||||
&box, data, stride, layer_stride);
|
&box, data, stride, layer_stride);
|
||||||
@@ -1561,6 +1567,11 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
|
|||||||
goto fallback;
|
goto fallback;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
util_throttle_memory_usage(pipe, &st->throttle,
|
||||||
|
width * height * depth *
|
||||||
|
util_format_get_blocksize(src_templ.format));
|
||||||
|
throttled = true;
|
||||||
|
|
||||||
/* Create the source texture. */
|
/* Create the source texture. */
|
||||||
src = screen->resource_create(screen, &src_templ);
|
src = screen->resource_create(screen, &src_templ);
|
||||||
if (!src) {
|
if (!src) {
|
||||||
@@ -1651,6 +1662,11 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
fallback:
|
fallback:
|
||||||
|
if (!throttled) {
|
||||||
|
util_throttle_memory_usage(pipe, &st->throttle,
|
||||||
|
width * height * depth *
|
||||||
|
_mesa_get_format_bytes(texImage->TexFormat));
|
||||||
|
}
|
||||||
_mesa_store_texsubimage(ctx, dims, texImage, xoffset, yoffset, zoffset,
|
_mesa_store_texsubimage(ctx, dims, texImage, xoffset, yoffset, zoffset,
|
||||||
width, height, depth, format, type, pixels,
|
width, height, depth, format, type, pixels,
|
||||||
unpack);
|
unpack);
|
||||||
|
@@ -275,6 +275,7 @@ st_destroy_context_priv(struct st_context *st, bool destroy_pipe)
|
|||||||
|
|
||||||
/* free glReadPixels cache data */
|
/* free glReadPixels cache data */
|
||||||
st_invalidate_readpix_cache(st);
|
st_invalidate_readpix_cache(st);
|
||||||
|
util_throttle_deinit(st->pipe->screen, &st->throttle);
|
||||||
|
|
||||||
cso_destroy_context(st->cso_context);
|
cso_destroy_context(st->cso_context);
|
||||||
|
|
||||||
@@ -467,6 +468,10 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe,
|
|||||||
PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS)
|
PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS)
|
||||||
? true : false;
|
? true : false;
|
||||||
|
|
||||||
|
util_throttle_init(&st->throttle,
|
||||||
|
screen->get_param(screen,
|
||||||
|
PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET));
|
||||||
|
|
||||||
/* GL limits and extensions */
|
/* GL limits and extensions */
|
||||||
st_init_limits(pipe->screen, &ctx->Const, &ctx->Extensions, ctx->API);
|
st_init_limits(pipe->screen, &ctx->Const, &ctx->Extensions, ctx->API);
|
||||||
st_init_extensions(pipe->screen, &ctx->Const,
|
st_init_extensions(pipe->screen, &ctx->Const,
|
||||||
|
@@ -32,6 +32,7 @@
|
|||||||
#include "state_tracker/st_api.h"
|
#include "state_tracker/st_api.h"
|
||||||
#include "main/fbobject.h"
|
#include "main/fbobject.h"
|
||||||
#include "state_tracker/st_atom.h"
|
#include "state_tracker/st_atom.h"
|
||||||
|
#include "util/u_helpers.h"
|
||||||
#include "util/u_inlines.h"
|
#include "util/u_inlines.h"
|
||||||
#include "util/list.h"
|
#include "util/list.h"
|
||||||
#include "vbo/vbo.h"
|
#include "vbo/vbo.h"
|
||||||
@@ -302,6 +303,12 @@ struct st_context
|
|||||||
|
|
||||||
/* Winsys buffers */
|
/* Winsys buffers */
|
||||||
struct list_head winsys_buffers;
|
struct list_head winsys_buffers;
|
||||||
|
|
||||||
|
/* Throttling for texture uploads and similar operations to limit memory
|
||||||
|
* usage by limiting the number of in-flight operations based on
|
||||||
|
* the estimated allocated size needed to execute those operations.
|
||||||
|
*/
|
||||||
|
struct util_throttle throttle;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user