freedreno/drm: Add sub-allocator
Add a heap that we can use for allocations of small mappable buffers. This avoids the churn of mmap/unmap, which is especially expensive in a VM. It also allows packing more smaller allocations together in a page, which is useful for PIPE_BUFFERs (which are also mappable). This avoid jank caused by the overhead of setting up or tearing down guest mappings when running in a VM. And also significantly reduces the # of BOs referenced on a submit. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20263>
This commit is contained in:
@@ -114,6 +114,13 @@ bo_new(struct fd_device *dev, uint32_t size, uint32_t flags,
|
||||
{
|
||||
struct fd_bo *bo = NULL;
|
||||
|
||||
if (size < FD_BO_HEAP_BLOCK_SIZE) {
|
||||
if ((flags == 0) && dev->default_heap)
|
||||
return fd_bo_heap_alloc(dev->default_heap, size);
|
||||
if ((flags == RING_FLAGS) && dev->ring_heap)
|
||||
return fd_bo_heap_alloc(dev->ring_heap, size);
|
||||
}
|
||||
|
||||
/* demote cached-coherent to WC if not supported: */
|
||||
if ((flags & FD_BO_CACHED_COHERENT) && !dev->has_cached_coherent)
|
||||
flags &= ~FD_BO_CACHED_COHERENT;
|
||||
@@ -278,13 +285,16 @@ bo_del_or_recycle(struct fd_bo *bo)
|
||||
{
|
||||
struct fd_device *dev = bo->dev;
|
||||
|
||||
if ((bo->bo_reuse == BO_CACHE) &&
|
||||
(fd_bo_cache_free(&dev->bo_cache, bo) == 0))
|
||||
return 0;
|
||||
/* No point in BO cache for suballocated buffers: */
|
||||
if (!suballoc_bo(bo)) {
|
||||
if ((bo->bo_reuse == BO_CACHE) &&
|
||||
(fd_bo_cache_free(&dev->bo_cache, bo) == 0))
|
||||
return 0;
|
||||
|
||||
if ((bo->bo_reuse == RING_CACHE) &&
|
||||
(fd_bo_cache_free(&dev->ring_cache, bo) == 0))
|
||||
return 0;
|
||||
if ((bo->bo_reuse == RING_CACHE) &&
|
||||
(fd_bo_cache_free(&dev->ring_cache, bo) == 0))
|
||||
return 0;
|
||||
}
|
||||
|
||||
return bo_del(bo);
|
||||
}
|
||||
@@ -355,6 +365,16 @@ fd_bo_del_list_nocache(struct list_head *list)
|
||||
close_handles(dev, handles, cnt);
|
||||
}
|
||||
|
||||
void
|
||||
fd_bo_fini_fences(struct fd_bo *bo)
|
||||
{
|
||||
for (int i = 0; i < bo->nr_fences; i++)
|
||||
fd_fence_del(bo->fences[i]);
|
||||
|
||||
if (bo->fences != &bo->_inline_fence)
|
||||
free(bo->fences);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper called by backends bo->funcs->destroy()
|
||||
*
|
||||
@@ -371,11 +391,7 @@ fd_bo_fini_common(struct fd_bo *bo)
|
||||
|
||||
VG_BO_FREE(bo);
|
||||
|
||||
for (int i = 0; i < bo->nr_fences; i++)
|
||||
fd_fence_del(bo->fences[i]);
|
||||
|
||||
if (bo->fences != &bo->_inline_fence)
|
||||
free(bo->fences);
|
||||
fd_bo_fini_fences(bo);
|
||||
|
||||
if (bo->map)
|
||||
os_munmap(bo->map, bo->size);
|
||||
|
284
src/freedreno/drm/freedreno_bo_heap.c
Normal file
284
src/freedreno/drm/freedreno_bo_heap.c
Normal file
@@ -0,0 +1,284 @@
|
||||
/*
|
||||
* Copyright © 2022 Google, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "freedreno_drmif.h"
|
||||
#include "freedreno_priv.h"
|
||||
|
||||
struct sa_bo {
|
||||
struct fd_bo base;
|
||||
struct fd_bo_heap *heap;
|
||||
unsigned offset;
|
||||
};
|
||||
FD_DEFINE_CAST(fd_bo, sa_bo);
|
||||
|
||||
#define HEAP_DEBUG 0
|
||||
|
||||
static void heap_clean(struct fd_bo_heap *heap, bool idle);
|
||||
static void heap_dump(struct fd_bo_heap *heap);
|
||||
|
||||
struct fd_bo_heap *
|
||||
fd_bo_heap_new(struct fd_device *dev, uint32_t flags)
|
||||
{
|
||||
struct fd_bo_heap *heap;
|
||||
|
||||
/* We cannot suballocate shared buffers! Implicit sync is not supported! */
|
||||
assert(!(flags & FD_BO_SHARED));
|
||||
|
||||
/* No internal buffers either, we need userspace fencing: */
|
||||
assert(!(flags & _FD_BO_NOSYNC));
|
||||
|
||||
heap = calloc(1, sizeof(*heap));
|
||||
|
||||
heap->dev = dev;
|
||||
heap->flags = flags;
|
||||
simple_mtx_init(&heap->lock, mtx_plain);
|
||||
list_inithead(&heap->freelist);
|
||||
|
||||
/* Note that util_vma_heap_init doesn't like offset==0, so we shift the
|
||||
* entire range by one block size (see block_idx()):
|
||||
*/
|
||||
util_vma_heap_init(&heap->heap, FD_BO_HEAP_BLOCK_SIZE,
|
||||
FD_BO_HEAP_BLOCK_SIZE * ARRAY_SIZE(heap->blocks));
|
||||
heap->heap.alloc_high = false;
|
||||
heap->heap.nospan_shift = ffs(FD_BO_HEAP_BLOCK_SIZE) - 1;
|
||||
|
||||
heap_dump(heap);
|
||||
|
||||
return heap;
|
||||
}
|
||||
|
||||
void fd_bo_heap_destroy(struct fd_bo_heap *heap)
|
||||
{
|
||||
/* drain the freelist: */
|
||||
heap_clean(heap, false);
|
||||
|
||||
util_vma_heap_finish(&heap->heap);
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(heap->blocks); i++)
|
||||
if (heap->blocks[i])
|
||||
fd_bo_del(heap->blocks[i]);
|
||||
free(heap);
|
||||
}
|
||||
|
||||
static bool
|
||||
sa_idle(struct fd_bo *bo)
|
||||
{
|
||||
enum fd_bo_state state = fd_bo_state(bo);
|
||||
assert(state != FD_BO_STATE_UNKNOWN);
|
||||
return state == FD_BO_STATE_IDLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* The backing block is determined by the offset within the heap, since all
|
||||
* the blocks are equal size
|
||||
*/
|
||||
static unsigned
|
||||
block_idx(struct sa_bo *s)
|
||||
{
|
||||
/* The vma allocator doesn't like offset=0 so the range is shifted up
|
||||
* by one block size:
|
||||
*/
|
||||
return (s->offset / FD_BO_HEAP_BLOCK_SIZE) - 1;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
block_offset(struct sa_bo *s)
|
||||
{
|
||||
return s->offset % FD_BO_HEAP_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
static void
|
||||
heap_dump(struct fd_bo_heap *heap)
|
||||
{
|
||||
if (!HEAP_DEBUG)
|
||||
return;
|
||||
fprintf(stderr, "HEAP[%x]: freelist: %u\n", heap->flags, list_length(&heap->freelist));
|
||||
util_vma_heap_print(&heap->heap, stderr, "",
|
||||
FD_BO_HEAP_BLOCK_SIZE * ARRAY_SIZE(heap->blocks));
|
||||
}
|
||||
|
||||
static void
|
||||
sa_release(struct fd_bo *bo)
|
||||
{
|
||||
struct sa_bo *s = to_sa_bo(bo);
|
||||
|
||||
simple_mtx_assert_locked(&s->heap->lock);
|
||||
|
||||
VG_BO_FREE(bo);
|
||||
|
||||
fd_bo_fini_fences(bo);
|
||||
|
||||
if (HEAP_DEBUG)
|
||||
mesa_logi("release: %08x-%x idx=%d", s->offset, bo->size, block_idx(s));
|
||||
|
||||
util_vma_heap_free(&s->heap->heap, s->offset, bo->size);
|
||||
|
||||
/* Drop our reference to the backing block object: */
|
||||
fd_bo_del(s->heap->blocks[block_idx(s)]);
|
||||
|
||||
list_del(&bo->node);
|
||||
|
||||
if ((++s->heap->cnt % 256) == 0)
|
||||
heap_dump(s->heap);
|
||||
|
||||
free(bo);
|
||||
}
|
||||
|
||||
static int
|
||||
sa_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op)
|
||||
{
|
||||
simple_mtx_lock(&fence_lock);
|
||||
unsigned nr = bo->nr_fences;
|
||||
struct fd_fence *fences[nr];
|
||||
for (unsigned i = 0; i < nr; i++)
|
||||
fences[i] = fd_fence_ref_locked(bo->fences[i]);
|
||||
simple_mtx_unlock(&fence_lock);
|
||||
|
||||
for (unsigned i = 0; i < nr; i++) {
|
||||
fd_fence_wait(fences[i]);
|
||||
fd_fence_del(fences[i]);
|
||||
}
|
||||
|
||||
/* expire completed fences */
|
||||
fd_bo_state(bo);
|
||||
|
||||
assert(fd_bo_state(bo) == FD_BO_STATE_IDLE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
sa_madvise(struct fd_bo *bo, int willneed)
|
||||
{
|
||||
return willneed;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
sa_iova(struct fd_bo *bo)
|
||||
{
|
||||
struct sa_bo *s = to_sa_bo(bo);
|
||||
|
||||
return s->heap->blocks[block_idx(s)]->iova + block_offset(s);
|
||||
}
|
||||
|
||||
static void
|
||||
sa_set_name(struct fd_bo *bo, const char *fmt, va_list ap)
|
||||
{
|
||||
/* No-op, kernel has a single name for the entire buffer we suballoc from */
|
||||
}
|
||||
|
||||
static void
|
||||
sa_destroy(struct fd_bo *bo)
|
||||
{
|
||||
struct fd_bo_heap *heap = to_sa_bo(bo)->heap;
|
||||
|
||||
simple_mtx_lock(&heap->lock);
|
||||
list_addtail(&bo->node, &heap->freelist);
|
||||
simple_mtx_unlock(&heap->lock);
|
||||
}
|
||||
|
||||
static struct fd_bo_funcs heap_bo_funcs = {
|
||||
.cpu_prep = sa_cpu_prep,
|
||||
.madvise = sa_madvise,
|
||||
.iova = sa_iova,
|
||||
.set_name = sa_set_name,
|
||||
.destroy = sa_destroy,
|
||||
};
|
||||
|
||||
/**
|
||||
* Get the backing heap block of a suballocated bo
|
||||
*/
|
||||
struct fd_bo *
|
||||
fd_bo_heap_block(struct fd_bo *bo)
|
||||
{
|
||||
assert(suballoc_bo(bo));
|
||||
|
||||
struct sa_bo *s = to_sa_bo(bo);
|
||||
return s->heap->blocks[block_idx(s)];
|
||||
}
|
||||
|
||||
static void
|
||||
heap_clean(struct fd_bo_heap *heap, bool idle)
|
||||
{
|
||||
simple_mtx_lock(&heap->lock);
|
||||
foreach_bo_safe (bo, &heap->freelist) {
|
||||
/* It might be nice if we could keep freelist sorted by fence # */
|
||||
if (idle && !sa_idle(bo))
|
||||
continue;
|
||||
sa_release(bo);
|
||||
}
|
||||
simple_mtx_unlock(&heap->lock);
|
||||
}
|
||||
|
||||
struct fd_bo *
|
||||
fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size)
|
||||
{
|
||||
heap_clean(heap, true);
|
||||
|
||||
struct sa_bo *s = calloc(1, sizeof(*s));
|
||||
|
||||
s->heap = heap;
|
||||
|
||||
/* util_vma does not like zero byte allocations, which we get, for
|
||||
* ex, with the initial query buffer allocation on pre-a5xx:
|
||||
*/
|
||||
size = MAX2(size, SUBALLOC_ALIGNMENT);
|
||||
|
||||
size = ALIGN(size, SUBALLOC_ALIGNMENT);
|
||||
|
||||
simple_mtx_lock(&heap->lock);
|
||||
/* Allocate larger buffers from the bottom, and smaller buffers from top
|
||||
* to help limit fragmentation:
|
||||
*
|
||||
* (The 8k threshold is just a random guess, but seems to work ok)
|
||||
*/
|
||||
heap->heap.alloc_high = (size <= 8 * 1024);
|
||||
s->offset = util_vma_heap_alloc(&heap->heap, size, SUBALLOC_ALIGNMENT);
|
||||
assert((s->offset / FD_BO_HEAP_BLOCK_SIZE) == (s->offset + size - 1) / FD_BO_HEAP_BLOCK_SIZE);
|
||||
unsigned idx = block_idx(s);
|
||||
if (HEAP_DEBUG)
|
||||
mesa_logi("alloc: %08x-%x idx=%d", s->offset, size, idx);
|
||||
if (!heap->blocks[idx]) {
|
||||
heap->blocks[idx] = fd_bo_new(
|
||||
heap->dev, FD_BO_HEAP_BLOCK_SIZE, heap->flags,
|
||||
"heap-%x-block-%u", heap->flags, idx);
|
||||
}
|
||||
/* Take a reference to the backing obj: */
|
||||
fd_bo_ref(heap->blocks[idx]);
|
||||
simple_mtx_unlock(&heap->lock);
|
||||
|
||||
struct fd_bo *bo = &s->base;
|
||||
|
||||
bo->size = size;
|
||||
bo->funcs = &heap_bo_funcs;
|
||||
bo->handle = 1; /* dummy handle to make fd_bo_init_common() happy */
|
||||
bo->alloc_flags = heap->flags;
|
||||
|
||||
fd_bo_init_common(bo, heap->dev);
|
||||
|
||||
bo->handle = FD_BO_SUBALLOC_HANDLE;
|
||||
|
||||
/* Pre-initialize mmap ptr, to avoid trying to os_mmap() */
|
||||
bo->map = ((uint8_t *)fd_bo_map(heap->blocks[idx])) + block_offset(s);
|
||||
|
||||
return bo;
|
||||
}
|
@@ -43,6 +43,7 @@ fd_device_new(int fd)
|
||||
{
|
||||
struct fd_device *dev = NULL;
|
||||
drmVersionPtr version;
|
||||
bool use_heap = false;
|
||||
|
||||
/* figure out if we are kgsl or msm drm driver: */
|
||||
version = drmGetVersion(fd);
|
||||
@@ -64,6 +65,10 @@ fd_device_new(int fd)
|
||||
} else if (!strcmp(version->name, "virtio_gpu")) {
|
||||
DEBUG_MSG("virtio_gpu DRM device");
|
||||
dev = virtio_device_new(fd, version);
|
||||
/* Only devices that support a hypervisor are a6xx+, so avoid the
|
||||
* extra guest<->host round trips associated with pipe creation:
|
||||
*/
|
||||
use_heap = true;
|
||||
#endif
|
||||
#if HAVE_FREEDRENO_KGSL
|
||||
} else if (!strcmp(version->name, "kgsl")) {
|
||||
@@ -96,6 +101,23 @@ out:
|
||||
simple_mtx_init(&dev->submit_lock, mtx_plain);
|
||||
simple_mtx_init(&dev->suballoc_lock, mtx_plain);
|
||||
|
||||
if (!use_heap) {
|
||||
struct fd_pipe *pipe = fd_pipe_new(dev, FD_PIPE_3D);
|
||||
|
||||
/* Userspace fences don't appear to be reliable enough (missing some
|
||||
* cache flushes?) on older gens, so limit sub-alloc heaps to a6xx+
|
||||
* for now:
|
||||
*/
|
||||
use_heap = fd_dev_gen(&pipe->dev_id) >= 6;
|
||||
|
||||
fd_pipe_del(pipe);
|
||||
}
|
||||
|
||||
if (use_heap) {
|
||||
dev->ring_heap = fd_bo_heap_new(dev, RING_FLAGS);
|
||||
dev->default_heap = fd_bo_heap_new(dev, 0);
|
||||
}
|
||||
|
||||
return dev;
|
||||
}
|
||||
|
||||
@@ -158,6 +180,12 @@ fd_device_del(struct fd_device *dev)
|
||||
if (dev->suballoc_bo)
|
||||
fd_bo_del(dev->suballoc_bo);
|
||||
|
||||
if (dev->ring_heap)
|
||||
fd_bo_heap_destroy(dev->ring_heap);
|
||||
|
||||
if (dev->default_heap)
|
||||
fd_bo_heap_destroy(dev->default_heap);
|
||||
|
||||
fd_bo_cache_cleanup(&dev->bo_cache, 0);
|
||||
fd_bo_cache_cleanup(&dev->ring_cache, 0);
|
||||
|
||||
|
@@ -130,6 +130,7 @@ struct fd_fence *fd_fence_ref_locked(struct fd_fence *f);
|
||||
void fd_fence_del(struct fd_fence *f);
|
||||
void fd_fence_del_locked(struct fd_fence *f);
|
||||
void fd_fence_flush(struct fd_fence *f);
|
||||
int fd_fence_wait(struct fd_fence *f);
|
||||
|
||||
/*
|
||||
* bo flags:
|
||||
|
@@ -286,3 +286,9 @@ fd_fence_flush(struct fd_fence *f)
|
||||
fd_pipe_flush(f->pipe, f->ufence);
|
||||
util_queue_fence_wait(&f->ready);
|
||||
}
|
||||
|
||||
int
|
||||
fd_fence_wait(struct fd_fence *f)
|
||||
{
|
||||
return fd_pipe_wait(f->pipe, f);
|
||||
}
|
||||
|
@@ -46,6 +46,7 @@
|
||||
#include "util/u_atomic.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/vma.h"
|
||||
|
||||
#include "freedreno_dev_info.h"
|
||||
#include "freedreno_drmif.h"
|
||||
@@ -126,6 +127,77 @@ struct fd_bo_cache {
|
||||
time_t time;
|
||||
};
|
||||
|
||||
/* Probably good for the block size to be a multiple of an available
|
||||
* large-page size. For overlap of what both the MMU (with 4kb granule)
|
||||
* and SMMU support, 2MB is that overlap. (Well, 4kb is as well, but
|
||||
* too small to be practical ;-))
|
||||
*/
|
||||
#define FD_BO_HEAP_BLOCK_SIZE (4 * 1024 * 1024)
|
||||
|
||||
/* Zero is an invalid handle, use it to indicate buffers that have been sub-
|
||||
* allocated from a larger backing heap block buffer.
|
||||
*/
|
||||
#define FD_BO_SUBALLOC_HANDLE 0
|
||||
|
||||
static inline bool
|
||||
suballoc_bo(struct fd_bo *bo)
|
||||
{
|
||||
return bo->handle == FD_BO_SUBALLOC_HANDLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* A heap is a virtual range of memory that is backed by N physical buffers,
|
||||
* from which buffers can be suballocated. This requires kernel support for
|
||||
* userspace allocated iova.
|
||||
*/
|
||||
struct fd_bo_heap {
|
||||
struct fd_device *dev;
|
||||
|
||||
int cnt;
|
||||
|
||||
/**
|
||||
* Buffer allocation flags for buffers allocated from this heap.
|
||||
*/
|
||||
uint32_t flags;
|
||||
|
||||
simple_mtx_t lock;
|
||||
|
||||
/**
|
||||
* Ranges of the backing buffer are allocated at a granularity of
|
||||
* SUBALLOC_ALIGNMENT
|
||||
*/
|
||||
struct util_vma_heap heap;
|
||||
|
||||
/**
|
||||
* List of recently freed suballocated BOs from this allocator until they
|
||||
* become idle. Backend should periodically call fd_bo_suballoc_clean()
|
||||
* to check for newly idle entries on the freelist, so that the memory can
|
||||
* be returned to the free heap.
|
||||
*/
|
||||
struct list_head freelist;
|
||||
|
||||
/**
|
||||
* The backing buffers. Maximum total heap size is:
|
||||
* FD_BO_HEAP_BLOCK_SIZE * ARRAY_SIZE(heap->blocks)
|
||||
*/
|
||||
struct fd_bo *blocks[256];
|
||||
};
|
||||
|
||||
struct fd_bo_heap *fd_bo_heap_new(struct fd_device *dev, uint32_t flags);
|
||||
void fd_bo_heap_destroy(struct fd_bo_heap *heap);
|
||||
|
||||
struct fd_bo *fd_bo_heap_block(struct fd_bo *bo);
|
||||
struct fd_bo *fd_bo_heap_alloc(struct fd_bo_heap *heap, uint32_t size);
|
||||
|
||||
static inline uint32_t
|
||||
submit_offset(struct fd_bo *bo, uint32_t offset)
|
||||
{
|
||||
if (suballoc_bo(bo)) {
|
||||
offset += bo->iova - fd_bo_heap_block(bo)->iova;
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
struct fd_device {
|
||||
int fd;
|
||||
enum fd_version version;
|
||||
@@ -147,6 +219,16 @@ struct fd_device {
|
||||
struct fd_bo_cache bo_cache;
|
||||
struct fd_bo_cache ring_cache;
|
||||
|
||||
/**
|
||||
* Heap for mappable + cached-coherent + gpu-readonly (ie. cmdstream)
|
||||
*/
|
||||
struct fd_bo_heap *ring_heap;
|
||||
|
||||
/**
|
||||
* Heap for mappable (ie. majority of small buffer allocations, etc)
|
||||
*/
|
||||
struct fd_bo_heap *default_heap;
|
||||
|
||||
bool has_cached_coherent;
|
||||
|
||||
bool closefd; /* call close(fd) upon destruction */
|
||||
@@ -352,6 +434,7 @@ enum fd_bo_state {
|
||||
enum fd_bo_state fd_bo_state(struct fd_bo *bo);
|
||||
|
||||
void fd_bo_init_common(struct fd_bo *bo, struct fd_device *dev);
|
||||
void fd_bo_fini_fences(struct fd_bo *bo);
|
||||
void fd_bo_fini_common(struct fd_bo *bo);
|
||||
|
||||
struct fd_bo *fd_bo_new_ring(struct fd_device *dev, uint32_t size);
|
||||
|
@@ -52,17 +52,46 @@ static struct fd_ringbuffer *
|
||||
fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
|
||||
enum fd_ringbuffer_flags flags);
|
||||
|
||||
|
||||
static void
|
||||
append_suballoc_bo(struct fd_submit_sp *submit, struct fd_bo *bo)
|
||||
{
|
||||
uint32_t idx = READ_ONCE(bo->idx);
|
||||
|
||||
if (unlikely((idx >= submit->nr_suballoc_bos) ||
|
||||
(submit->suballoc_bos[idx] != bo))) {
|
||||
uint32_t hash = _mesa_hash_pointer(bo);
|
||||
struct hash_entry *entry;
|
||||
|
||||
entry = _mesa_hash_table_search_pre_hashed(
|
||||
submit->suballoc_bo_table, hash, bo);
|
||||
if (entry) {
|
||||
/* found */
|
||||
idx = (uint32_t)(uintptr_t)entry->data;
|
||||
} else {
|
||||
idx = APPEND(submit, suballoc_bos, fd_bo_ref(bo));
|
||||
|
||||
_mesa_hash_table_insert_pre_hashed(
|
||||
submit->suballoc_bo_table, hash, bo, (void *)(uintptr_t)idx);
|
||||
}
|
||||
bo->idx = idx;
|
||||
}
|
||||
}
|
||||
|
||||
/* add (if needed) bo to submit and return index: */
|
||||
uint32_t
|
||||
fd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo)
|
||||
{
|
||||
uint32_t idx;
|
||||
if (suballoc_bo(bo)) {
|
||||
append_suballoc_bo(submit, bo);
|
||||
bo = fd_bo_heap_block(bo);
|
||||
}
|
||||
|
||||
/* NOTE: it is legal to use the same bo on different threads for
|
||||
* different submits. But it is not legal to use the same submit
|
||||
* from different threads.
|
||||
*/
|
||||
idx = READ_ONCE(bo->idx);
|
||||
uint32_t idx = READ_ONCE(bo->idx);
|
||||
|
||||
if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
|
||||
uint32_t hash = _mesa_hash_pointer(bo);
|
||||
@@ -187,6 +216,9 @@ fd_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
|
||||
fd_bo_add_fence(fd_submit->bos[i], out_fence);
|
||||
has_shared |= fd_submit->bos[i]->alloc_flags & FD_BO_SHARED;
|
||||
}
|
||||
for (unsigned i = 0; i < fd_submit->nr_suballoc_bos; i++) {
|
||||
fd_bo_add_fence(fd_submit->suballoc_bos[i], out_fence);
|
||||
}
|
||||
simple_mtx_unlock(&fence_lock);
|
||||
|
||||
fd_submit->out_fence = fd_fence_ref(out_fence);
|
||||
@@ -385,6 +417,7 @@ fd_submit_sp_destroy(struct fd_submit *submit)
|
||||
fd_ringbuffer_del(fd_submit->suballoc_ring);
|
||||
|
||||
_mesa_hash_table_destroy(fd_submit->bo_table, NULL);
|
||||
_mesa_hash_table_destroy(fd_submit->suballoc_bo_table, NULL);
|
||||
|
||||
// TODO it would be nice to have a way to assert() if all
|
||||
// rb's haven't been free'd back to the slab, because that is
|
||||
@@ -392,11 +425,14 @@ fd_submit_sp_destroy(struct fd_submit *submit)
|
||||
slab_destroy_child(&fd_submit->ring_pool);
|
||||
|
||||
fd_bo_del_array(fd_submit->bos, fd_submit->nr_bos);
|
||||
free(fd_submit->bos);
|
||||
|
||||
fd_bo_del_array(fd_submit->suballoc_bos, fd_submit->nr_suballoc_bos);
|
||||
free(fd_submit->suballoc_bos);
|
||||
|
||||
if (fd_submit->out_fence)
|
||||
fd_fence_del(fd_submit->out_fence);
|
||||
|
||||
free(fd_submit->bos);
|
||||
free(fd_submit);
|
||||
}
|
||||
|
||||
@@ -412,8 +448,8 @@ fd_submit_sp_new(struct fd_pipe *pipe, flush_submit_list_fn flush_submit_list)
|
||||
struct fd_submit_sp *fd_submit = calloc(1, sizeof(*fd_submit));
|
||||
struct fd_submit *submit;
|
||||
|
||||
fd_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
fd_submit->bo_table = _mesa_pointer_hash_table_create(NULL);
|
||||
fd_submit->suballoc_bo_table = _mesa_pointer_hash_table_create(NULL);
|
||||
|
||||
slab_create_child(&fd_submit->ring_pool, &pipe->ring_pool);
|
||||
|
||||
|
@@ -50,9 +50,19 @@ struct fd_submit_sp {
|
||||
|
||||
DECLARE_ARRAY(struct fd_bo *, bos);
|
||||
|
||||
/* Keep a separate table of sub-alloc BOs.. the backing objects are
|
||||
* tracked in the main bos table (because this is what the kernel
|
||||
* sees), but we need to attach userspace fences to the sub-alloc'd
|
||||
* BOs so the driver knows when they are idle
|
||||
*/
|
||||
DECLARE_ARRAY(struct fd_bo *, suballoc_bos);
|
||||
|
||||
/* maps fd_bo to idx in bos table: */
|
||||
struct hash_table *bo_table;
|
||||
|
||||
/* maps fd_bo to idx in suballoc_bos table: */
|
||||
struct hash_table *suballoc_bo_table;
|
||||
|
||||
struct slab_child_pool ring_pool;
|
||||
|
||||
/* Allow for sub-allocation of stateobj ring buffers (ie. sharing
|
||||
|
@@ -20,6 +20,7 @@
|
||||
|
||||
libfreedreno_drm_files = files(
|
||||
'freedreno_bo.c',
|
||||
'freedreno_bo_heap.c',
|
||||
'freedreno_bo_cache.c',
|
||||
'freedreno_device.c',
|
||||
'freedreno_drmif.h',
|
||||
|
@@ -314,7 +314,7 @@ msm_submit_flush(struct fd_submit *submit, int in_fence_fd, bool use_fence_fd)
|
||||
|
||||
cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF;
|
||||
cmds[i].submit_idx = append_bo(msm_submit, msm_ring->ring_bo);
|
||||
cmds[i].submit_offset = msm_ring->offset;
|
||||
cmds[i].submit_offset = submit_offset(msm_ring->ring_bo, msm_ring->offset);
|
||||
cmds[i].size = offset_bytes(ring->cur, ring->start);
|
||||
cmds[i].pad = 0;
|
||||
cmds[i].nr_relocs = msm_ring->cmd->nr_relocs;
|
||||
@@ -328,9 +328,9 @@ msm_submit_flush(struct fd_submit *submit, int in_fence_fd, bool use_fence_fd)
|
||||
} else {
|
||||
cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF;
|
||||
}
|
||||
cmds[i].submit_idx =
|
||||
append_bo(msm_submit, msm_ring->u.cmds[j]->ring_bo);
|
||||
cmds[i].submit_offset = msm_ring->offset;
|
||||
struct fd_bo *ring_bo = msm_ring->u.cmds[j]->ring_bo;
|
||||
cmds[i].submit_idx = append_bo(msm_submit, ring_bo);
|
||||
cmds[i].submit_offset = submit_offset(ring_bo, msm_ring->offset);
|
||||
cmds[i].size = msm_ring->u.cmds[j]->size;
|
||||
cmds[i].pad = 0;
|
||||
cmds[i].nr_relocs = msm_ring->u.cmds[j]->nr_relocs;
|
||||
|
@@ -67,10 +67,10 @@ flush_submit_list(struct list_head *submit_list)
|
||||
to_fd_ringbuffer_sp(submit->primary);
|
||||
|
||||
for (unsigned i = 0; i < deferred_primary->u.nr_cmds; i++) {
|
||||
struct fd_bo *ring_bo = deferred_primary->u.cmds[i].ring_bo;
|
||||
cmds[cmd_idx].type = MSM_SUBMIT_CMD_BUF;
|
||||
cmds[cmd_idx].submit_idx =
|
||||
fd_submit_append_bo(fd_submit, deferred_primary->u.cmds[i].ring_bo);
|
||||
cmds[cmd_idx].submit_offset = deferred_primary->offset;
|
||||
cmds[cmd_idx].submit_idx = fd_submit_append_bo(fd_submit, ring_bo);
|
||||
cmds[cmd_idx].submit_offset = submit_offset(ring_bo, deferred_primary->offset);
|
||||
cmds[cmd_idx].size = deferred_primary->u.cmds[i].size;
|
||||
cmds[cmd_idx].pad = 0;
|
||||
cmds[cmd_idx].nr_relocs = 0;
|
||||
|
@@ -85,10 +85,10 @@ flush_submit_list(struct list_head *submit_list)
|
||||
to_fd_ringbuffer_sp(submit->primary);
|
||||
|
||||
for (unsigned i = 0; i < deferred_primary->u.nr_cmds; i++) {
|
||||
struct fd_bo *ring_bo = deferred_primary->u.cmds[i].ring_bo;
|
||||
cmds[cmd_idx].type = MSM_SUBMIT_CMD_BUF;
|
||||
cmds[cmd_idx].submit_idx =
|
||||
fd_submit_append_bo(fd_submit, deferred_primary->u.cmds[i].ring_bo);
|
||||
cmds[cmd_idx].submit_offset = deferred_primary->offset;
|
||||
cmds[cmd_idx].submit_idx = fd_submit_append_bo(fd_submit, ring_bo);
|
||||
cmds[cmd_idx].submit_offset = submit_offset(ring_bo, deferred_primary->offset);
|
||||
cmds[cmd_idx].size = deferred_primary->u.cmds[i].size;
|
||||
cmds[cmd_idx].pad = 0;
|
||||
cmds[cmd_idx].nr_relocs = 0;
|
||||
|
Reference in New Issue
Block a user