vc4: Add a userspace BO cache.

Since our kernel BOs require CMA allocation, and the use of them requires
new mmaps, it's pretty expensive and we should avoid it if possible.
Copying my original design for Intel, make a userspace cache that reuses
BOs that haven't been shared to other processes but frees BOs that have
sat in the cache for over a second.

Improves glxgears framerate on RPi by around 30%.
This commit is contained in:
Eric Anholt
2014-12-13 15:27:39 -08:00
parent 39bc936011
commit 06890c444a
4 changed files with 175 additions and 4 deletions

View File

@@ -29,14 +29,49 @@
#include <xf86drmMode.h>
#include "util/u_memory.h"
#include "util/ralloc.h"
#include "vc4_context.h"
#include "vc4_screen.h"
#define container_of(ptr, type, field) \
(type*)((char*)ptr - offsetof(type, field))
static struct vc4_bo *
vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name)
{
struct vc4_bo_cache *cache = &screen->bo_cache;
uint32_t page_index = size / 4096 - 1;
if (cache->size_list_size <= page_index)
return NULL;
struct vc4_bo *bo = NULL;
pipe_mutex_lock(cache->lock);
if (!is_empty_list(&cache->size_list[page_index])) {
struct simple_node *node = last_elem(&cache->size_list[page_index]);
bo = container_of(node, struct vc4_bo, size_list);
pipe_reference_init(&bo->reference, 1);
remove_from_list(&bo->time_list);
remove_from_list(&bo->size_list);
bo->name = name;
}
pipe_mutex_unlock(cache->lock);
return bo;
}
struct vc4_bo *
vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
{
struct vc4_bo *bo = CALLOC_STRUCT(vc4_bo);
struct vc4_bo *bo;
size = align(size, 4096);
bo = vc4_bo_from_cache(screen, size, name);
if (bo)
return bo;
bo = CALLOC_STRUCT(vc4_bo);
if (!bo)
return NULL;
@@ -44,6 +79,7 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
bo->screen = screen;
bo->size = size;
bo->name = name;
bo->private = true;
struct drm_mode_create_dumb create;
memset(&create, 0, sizeof(create));
@@ -65,6 +101,18 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
}
void
vc4_bo_last_unreference(struct vc4_bo *bo)
{
struct vc4_screen *screen = bo->screen;
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
pipe_mutex_lock(screen->bo_cache.lock);
vc4_bo_last_unreference_locked_timed(bo, time.tv_sec);
pipe_mutex_unlock(screen->bo_cache.lock);
}
static void
vc4_bo_free(struct vc4_bo *bo)
{
struct vc4_screen *screen = bo->screen;
@@ -89,6 +137,69 @@ vc4_bo_free(struct vc4_bo *bo)
free(bo);
}
static void
free_stale_bos(struct vc4_screen *screen, time_t time)
{
while (!is_empty_list(&screen->bo_cache.time_list)) {
struct simple_node *node =
first_elem(&screen->bo_cache.time_list);
struct vc4_bo *bo = container_of(node, struct vc4_bo, time_list);
/* If it's more than a second old, free it. */
if (time - bo->free_time > 2) {
remove_from_list(&bo->time_list);
remove_from_list(&bo->size_list);
vc4_bo_free(bo);
} else {
break;
}
}
}
void
vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time)
{
struct vc4_screen *screen = bo->screen;
struct vc4_bo_cache *cache = &screen->bo_cache;
uint32_t page_index = bo->size / 4096 - 1;
if (!bo->private) {
vc4_bo_free(bo);
return;
}
if (cache->size_list_size <= page_index) {
struct simple_node *new_list =
ralloc_array(screen, struct simple_node, page_index + 1);
/* Move old list contents over (since the array has moved, and
* therefore the pointers to the list heads have to change.
*/
for (int i = 0; i < cache->size_list_size; i++) {
struct simple_node *old_head = &cache->size_list[i];
if (is_empty_list(old_head))
make_empty_list(&new_list[i]);
else {
new_list[i].next = old_head->next;
new_list[i].prev = old_head->prev;
new_list[i].next->prev = &new_list[i];
new_list[i].prev->next = &new_list[i];
}
}
for (int i = cache->size_list_size; i < page_index + 1; i++)
make_empty_list(&new_list[i]);
cache->size_list = new_list;
cache->size_list_size = page_index + 1;
}
bo->free_time = time;
insert_at_tail(&cache->size_list[page_index], &bo->size_list);
insert_at_tail(&cache->time_list, &bo->time_list);
free_stale_bos(screen, time);
}
static struct vc4_bo *
vc4_bo_open_handle(struct vc4_screen *screen,
uint32_t winsys_stride,
@@ -103,6 +214,7 @@ vc4_bo_open_handle(struct vc4_screen *screen,
bo->handle = handle;
bo->size = size;
bo->name = "winsys";
bo->private = false;
#ifdef USE_VC4_SIMULATOR
vc4_bo_map(bo);
@@ -194,6 +306,7 @@ vc4_bo_flink(struct vc4_bo *bo, uint32_t *name)
return false;
}
bo->private = false;
*name = flink.name;
return true;
@@ -289,3 +402,19 @@ vc4_bo_map(struct vc4_bo *bo)
return map;
}
void
vc4_bufmgr_destroy(struct pipe_screen *pscreen)
{
struct vc4_screen *screen = vc4_screen(pscreen);
struct vc4_bo_cache *cache = &screen->bo_cache;
while (!is_empty_list(&cache->time_list)) {
struct simple_node *node = first_elem(&cache->time_list);
struct vc4_bo *bo = container_of(node, struct vc4_bo, time_list);
remove_from_list(&bo->time_list);
remove_from_list(&bo->size_list);
vc4_bo_free(bo);
}
}

View File

@@ -26,6 +26,7 @@
#include <stdint.h>
#include "util/u_inlines.h"
#include "vc4_qir.h"
struct vc4_context;
@@ -41,13 +42,26 @@ struct vc4_bo {
void *simulator_winsys_map;
uint32_t simulator_winsys_stride;
#endif
/** Entry in the linked list of buffers freed, by age. */
struct simple_node time_list;
/** Entry in the per-page-count linked list of buffers freed (by age). */
struct simple_node size_list;
/** Approximate second when the bo was freed. */
time_t free_time;
/**
* Whether only our process has a reference to the BO (meaning that
* it's safe to reuse it in the BO cache).
*/
bool private;
};
struct vc4_bo *vc4_bo_alloc(struct vc4_screen *screen, uint32_t size,
const char *name);
struct vc4_bo *vc4_bo_alloc_mem(struct vc4_screen *screen, const void *data,
uint32_t size, const char *name);
void vc4_bo_free(struct vc4_bo *bo);
void vc4_bo_last_unreference(struct vc4_bo *bo);
void vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time);
struct vc4_bo *vc4_bo_open_name(struct vc4_screen *screen, uint32_t name,
uint32_t winsys_stride);
struct vc4_bo *vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd,
@@ -59,7 +73,7 @@ static inline void
vc4_bo_set_reference(struct vc4_bo **old_bo, struct vc4_bo *new_bo)
{
if (pipe_reference(&(*old_bo)->reference, &new_bo->reference))
vc4_bo_free(*old_bo);
vc4_bo_last_unreference(*old_bo);
*old_bo = new_bo;
}
@@ -77,7 +91,18 @@ vc4_bo_unreference(struct vc4_bo **bo)
return;
if (pipe_reference(&(*bo)->reference, NULL))
vc4_bo_free(*bo);
vc4_bo_last_unreference(*bo);
*bo = NULL;
}
static inline void
vc4_bo_unreference_locked_timed(struct vc4_bo **bo, time_t time)
{
if (!*bo)
return;
if (pipe_reference(&(*bo)->reference, NULL))
vc4_bo_last_unreference_locked_timed(*bo, time);
*bo = NULL;
}
@@ -93,5 +118,8 @@ vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns);
bool
vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns);
void
vc4_bufmgr_destroy(struct pipe_screen *pscreen);
#endif /* VC4_BUFMGR_H */

View File

@@ -76,6 +76,7 @@ vc4_screen_get_vendor(struct pipe_screen *pscreen)
static void
vc4_screen_destroy(struct pipe_screen *pscreen)
{
vc4_bufmgr_destroy(pscreen);
ralloc_free(pscreen);
}
@@ -449,6 +450,7 @@ vc4_screen_create(int fd)
pscreen->is_format_supported = vc4_screen_is_format_supported;
screen->fd = fd;
make_empty_list(&screen->bo_cache.time_list);
vc4_fence_init(screen);

View File

@@ -25,7 +25,9 @@
#define VC4_SCREEN_H
#include "pipe/p_screen.h"
#include "os/os_thread.h"
#include "state_tracker/drm_driver.h"
#include "vc4_qir.h"
struct vc4_bo;
@@ -55,6 +57,16 @@ struct vc4_screen {
* if we know the job's already done.
*/
uint64_t finished_seqno;
struct vc4_bo_cache {
/** List of struct vc4_bo freed, by age. */
struct simple_node time_list;
/** List of struct vc4_bo freed, per size, by age. */
struct simple_node *size_list;
uint32_t size_list_size;
pipe_mutex lock;
} bo_cache;
};
static inline struct vc4_screen *