panfrost: Share tiler_heap across batches/contexts
There's only one tiler, so this is safe. (The blob does the same optimization.) This avoids allocating multiple heaps for multiple batches, which wastes memory and CPU time. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6373>
This commit is contained in:

committed by
Tomeu Vizoso

parent
01d12c353e
commit
d8deb1eb6a
@@ -76,20 +76,14 @@ panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count)
|
|||||||
t.polygon_list_size = panfrost_tiler_full_size(
|
t.polygon_list_size = panfrost_tiler_full_size(
|
||||||
width, height, t.hierarchy_mask, hierarchy);
|
width, height, t.hierarchy_mask, hierarchy);
|
||||||
|
|
||||||
/* Sanity check */
|
|
||||||
|
|
||||||
if (vertex_count) {
|
if (vertex_count) {
|
||||||
struct panfrost_bo *tiler_heap;
|
|
||||||
|
|
||||||
tiler_heap = panfrost_batch_get_tiler_heap(batch);
|
|
||||||
t.polygon_list = panfrost_batch_get_polygon_list(batch,
|
t.polygon_list = panfrost_batch_get_polygon_list(batch,
|
||||||
header_size +
|
header_size +
|
||||||
t.polygon_list_size);
|
t.polygon_list_size);
|
||||||
|
|
||||||
|
|
||||||
/* Allow the entire tiler heap */
|
t.heap_start = device->tiler_heap->gpu;
|
||||||
t.heap_start = tiler_heap->gpu;
|
t.heap_end = device->tiler_heap->gpu + device->tiler_heap->size;
|
||||||
t.heap_end = tiler_heap->gpu + tiler_heap->size;
|
|
||||||
} else {
|
} else {
|
||||||
struct panfrost_bo *tiler_dummy;
|
struct panfrost_bo *tiler_dummy;
|
||||||
|
|
||||||
|
@@ -679,23 +679,6 @@ panfrost_batch_get_shared_memory(struct panfrost_batch *batch,
|
|||||||
return batch->shared_memory;
|
return batch->shared_memory;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct panfrost_bo *
|
|
||||||
panfrost_batch_get_tiler_heap(struct panfrost_batch *batch)
|
|
||||||
{
|
|
||||||
if (batch->tiler_heap)
|
|
||||||
return batch->tiler_heap;
|
|
||||||
|
|
||||||
batch->tiler_heap = panfrost_batch_create_bo(batch, 4096 * 4096,
|
|
||||||
PAN_BO_INVISIBLE |
|
|
||||||
PAN_BO_GROWABLE,
|
|
||||||
PAN_BO_ACCESS_PRIVATE |
|
|
||||||
PAN_BO_ACCESS_RW |
|
|
||||||
PAN_BO_ACCESS_VERTEX_TILER |
|
|
||||||
PAN_BO_ACCESS_FRAGMENT);
|
|
||||||
assert(batch->tiler_heap);
|
|
||||||
return batch->tiler_heap;
|
|
||||||
}
|
|
||||||
|
|
||||||
mali_ptr
|
mali_ptr
|
||||||
panfrost_batch_get_tiler_meta(struct panfrost_batch *batch, unsigned vertex_count)
|
panfrost_batch_get_tiler_meta(struct panfrost_batch *batch, unsigned vertex_count)
|
||||||
{
|
{
|
||||||
@@ -705,14 +688,13 @@ panfrost_batch_get_tiler_meta(struct panfrost_batch *batch, unsigned vertex_coun
|
|||||||
if (batch->tiler_meta)
|
if (batch->tiler_meta)
|
||||||
return batch->tiler_meta;
|
return batch->tiler_meta;
|
||||||
|
|
||||||
struct panfrost_bo *tiler_heap;
|
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
|
||||||
tiler_heap = panfrost_batch_get_tiler_heap(batch);
|
|
||||||
|
|
||||||
struct bifrost_tiler_heap_meta tiler_heap_meta = {
|
struct bifrost_tiler_heap_meta tiler_heap_meta = {
|
||||||
.heap_size = tiler_heap->size,
|
.heap_size = dev->tiler_heap->size,
|
||||||
.tiler_heap_start = tiler_heap->gpu,
|
.tiler_heap_start = dev->tiler_heap->gpu,
|
||||||
.tiler_heap_free = tiler_heap->gpu,
|
.tiler_heap_free = dev->tiler_heap->gpu,
|
||||||
.tiler_heap_end = tiler_heap->gpu + tiler_heap->size,
|
.tiler_heap_end = dev->tiler_heap->gpu + dev->tiler_heap->size,
|
||||||
.unk1 = 0x1,
|
.unk1 = 0x1,
|
||||||
.unk7e007e = 0x7e007e,
|
.unk7e007e = 0x7e007e,
|
||||||
};
|
};
|
||||||
@@ -997,7 +979,7 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
|
|||||||
submit.jc = first_job_desc;
|
submit.jc = first_job_desc;
|
||||||
submit.requirements = reqs;
|
submit.requirements = reqs;
|
||||||
|
|
||||||
bo_handles = calloc(batch->pool.bos->entries + batch->invisible_pool.bos->entries + batch->bos->entries, sizeof(*bo_handles));
|
bo_handles = calloc(batch->pool.bos->entries + batch->invisible_pool.bos->entries + batch->bos->entries + 1, sizeof(*bo_handles));
|
||||||
assert(bo_handles);
|
assert(bo_handles);
|
||||||
|
|
||||||
hash_table_foreach(batch->bos, entry)
|
hash_table_foreach(batch->bos, entry)
|
||||||
@@ -1009,6 +991,10 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
|
|||||||
hash_table_foreach(batch->invisible_pool.bos, entry)
|
hash_table_foreach(batch->invisible_pool.bos, entry)
|
||||||
panfrost_batch_record_bo(entry, bo_handles, submit.bo_handle_count++);
|
panfrost_batch_record_bo(entry, bo_handles, submit.bo_handle_count++);
|
||||||
|
|
||||||
|
/* Used by all tiler jobs (XXX: skip for compute-only) */
|
||||||
|
if (!(reqs & PANFROST_JD_REQ_FS))
|
||||||
|
bo_handles[submit.bo_handle_count++] = dev->tiler_heap->gem_handle;
|
||||||
|
|
||||||
submit.bo_handles = (u64) (uintptr_t) bo_handles;
|
submit.bo_handles = (u64) (uintptr_t) bo_handles;
|
||||||
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
|
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
|
||||||
free(bo_handles);
|
free(bo_handles);
|
||||||
|
@@ -182,9 +182,6 @@ panfrost_batch_get_shared_memory(struct panfrost_batch *batch, unsigned size, un
|
|||||||
mali_ptr
|
mali_ptr
|
||||||
panfrost_batch_get_polygon_list(struct panfrost_batch *batch, unsigned size);
|
panfrost_batch_get_polygon_list(struct panfrost_batch *batch, unsigned size);
|
||||||
|
|
||||||
struct panfrost_bo *
|
|
||||||
panfrost_batch_get_tiler_heap(struct panfrost_batch *batch);
|
|
||||||
|
|
||||||
struct panfrost_bo *
|
struct panfrost_bo *
|
||||||
panfrost_batch_get_tiler_dummy(struct panfrost_batch *batch);
|
panfrost_batch_get_tiler_dummy(struct panfrost_batch *batch);
|
||||||
|
|
||||||
|
@@ -127,6 +127,14 @@ struct panfrost_device {
|
|||||||
} bo_cache;
|
} bo_cache;
|
||||||
|
|
||||||
struct pan_blit_shaders blit_shaders;
|
struct pan_blit_shaders blit_shaders;
|
||||||
|
|
||||||
|
/* Tiler heap shared across all tiler jobs, allocated against the
|
||||||
|
* device since there's only a single tiler. Since this is invisible to
|
||||||
|
* the CPU, it's okay for multiple contexts to reference it
|
||||||
|
* simultaneously; by keeping on the device struct, we eliminate a
|
||||||
|
* costly per-context allocation. */
|
||||||
|
|
||||||
|
struct panfrost_bo *tiler_heap;
|
||||||
};
|
};
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@@ -172,12 +172,20 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev)
|
|||||||
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i)
|
for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i)
|
||||||
list_inithead(&dev->bo_cache.buckets[i]);
|
list_inithead(&dev->bo_cache.buckets[i]);
|
||||||
|
|
||||||
|
/* Tiler heap is internally required by the tiler, which can only be
|
||||||
|
* active for a single job chain at once, so a single heap can be
|
||||||
|
* shared across batches/contextes */
|
||||||
|
|
||||||
|
dev->tiler_heap = panfrost_bo_create(dev, 4096 * 4096,
|
||||||
|
PAN_BO_INVISIBLE | PAN_BO_GROWABLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
panfrost_close_device(struct panfrost_device *dev)
|
panfrost_close_device(struct panfrost_device *dev)
|
||||||
{
|
{
|
||||||
panfrost_bo_unreference(dev->blit_shaders.bo);
|
panfrost_bo_unreference(dev->blit_shaders.bo);
|
||||||
|
panfrost_bo_unreference(dev->tiler_heap);
|
||||||
panfrost_bo_cache_evict_all(dev);
|
panfrost_bo_cache_evict_all(dev);
|
||||||
pthread_mutex_destroy(&dev->bo_cache.lock);
|
pthread_mutex_destroy(&dev->bo_cache.lock);
|
||||||
drmFreeVersion(dev->kernel_version);
|
drmFreeVersion(dev->kernel_version);
|
||||||
|
Reference in New Issue
Block a user