diff --git a/src/imagination/.clang-format b/src/imagination/.clang-format index e6c6e45542b..af3a4e06169 100644 --- a/src/imagination/.clang-format +++ b/src/imagination/.clang-format @@ -226,6 +226,8 @@ ForEachMacros: [ 'nir_foreach_use', 'nir_foreach_use_safe', 'nir_foreach_variable_with_modes', + 'rb_tree_foreach', + 'rb_tree_foreach_safe', 'u_foreach_bit', 'u_vector_foreach', 'util_dynarray_foreach', diff --git a/src/imagination/common/pvr_debug.c b/src/imagination/common/pvr_debug.c index 2d56a12282a..66d7b3afb23 100644 --- a/src/imagination/common/pvr_debug.c +++ b/src/imagination/common/pvr_debug.c @@ -32,6 +32,8 @@ uint32_t PVR_DEBUG = 0; static const struct debug_named_value debug_control[] = { { "cs", PVR_DEBUG_DUMP_CONTROL_STREAM, "Dump the contents of the control stream buffer on every job submit." }, + { "bo_track", PVR_DEBUG_TRACK_BOS, + "Track all buffer objects with at least one reference." }, DEBUG_NAMED_VALUE_END }; /* clang-format on */ diff --git a/src/imagination/common/pvr_debug.h b/src/imagination/common/pvr_debug.h index 53dfb679eaa..9050be46ac8 100644 --- a/src/imagination/common/pvr_debug.h +++ b/src/imagination/common/pvr_debug.h @@ -35,6 +35,7 @@ extern uint32_t PVR_DEBUG; /* clang-format on */ #define PVR_DEBUG_DUMP_CONTROL_STREAM BITFIELD_BIT(0) +#define PVR_DEBUG_TRACK_BOS BITFIELD_BIT(1) void pvr_process_debug_variable(void); diff --git a/src/imagination/vulkan/pvr_bo.c b/src/imagination/vulkan/pvr_bo.c index 710d17474a9..e23ecf7e3cc 100644 --- a/src/imagination/vulkan/pvr_bo.c +++ b/src/imagination/vulkan/pvr_bo.c @@ -22,18 +22,168 @@ */ #include +#include #include #include +#include #include #include "pvr_bo.h" +#include "pvr_debug.h" #include "pvr_dump.h" #include "pvr_private.h" #include "pvr_types.h" +#include "pvr_util.h" #include "pvr_winsys.h" +#include "util/macros.h" +#include "util/rb_tree.h" +#include "util/simple_mtx.h" +#include "util/u_debug.h" #include "vk_alloc.h" #include "vk_log.h" +struct pvr_bo_store { + struct rb_tree tree; + simple_mtx_t mutex; + uint32_t size; +}; + +struct pvr_bo_store_entry { + struct rb_node node; + struct pvr_bo bo; +}; + +#define entry_from_node(node_) \ + container_of(node_, struct pvr_bo_store_entry, node) +#define entry_from_bo(bo_) container_of(bo_, struct pvr_bo_store_entry, bo) + +static inline int pvr_dev_addr_cmp(const pvr_dev_addr_t a, + const pvr_dev_addr_t b) +{ + const uint64_t addr_a = a.addr; + const uint64_t addr_b = b.addr; + + if (addr_a < addr_b) + return 1; + else if (addr_a > addr_b) + return -1; + else + return 0; +} + +/* Borrowed from pandecode. Using this comparator allows us to lookup intervals + * in the RB-tree without storing extra information. + */ +static inline int pvr_bo_store_entry_cmp_key(const struct rb_node *node, + const void *const key) +{ + const struct pvr_winsys_vma *const vma = entry_from_node(node)->bo.vma; + const pvr_dev_addr_t addr = *(const pvr_dev_addr_t *)key; + + if (addr.addr >= vma->dev_addr.addr && + addr.addr < (vma->dev_addr.addr + vma->size)) { + return 0; + } + + return pvr_dev_addr_cmp(vma->dev_addr, addr); +} + +static inline int pvr_bo_store_entry_cmp(const struct rb_node *const a, + const struct rb_node *const b) +{ + return pvr_dev_addr_cmp(entry_from_node(a)->bo.vma->dev_addr, + entry_from_node(b)->bo.vma->dev_addr); +} + +VkResult pvr_bo_store_create(struct pvr_device *device) +{ + struct pvr_bo_store *store; + + if (!PVR_IS_DEBUG_SET(TRACK_BOS)) { + device->bo_store = NULL; + return VK_SUCCESS; + } + + store = vk_alloc(&device->vk.alloc, + sizeof(*store), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!store) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + rb_tree_init(&store->tree); + store->size = 0; + simple_mtx_init(&store->mutex, mtx_plain); + + device->bo_store = store; + + return VK_SUCCESS; +} + +void pvr_bo_store_destroy(struct pvr_device *device) +{ + struct pvr_bo_store *store = device->bo_store; + + if (likely(!store)) + return; + + if (unlikely(!rb_tree_is_empty(&store->tree))) { + debug_warning("Non-empty BO store destroyed; dump follows"); + pvr_bo_store_dump(device); + } + + simple_mtx_destroy(&store->mutex); + + vk_free(&device->vk.alloc, store); + + device->bo_store = NULL; +} + +static void pvr_bo_store_insert(struct pvr_bo_store *const store, + struct pvr_bo *const bo) +{ + if (likely(!store)) + return; + + simple_mtx_lock(&store->mutex); + rb_tree_insert(&store->tree, + &entry_from_bo(bo)->node, + pvr_bo_store_entry_cmp); + store->size++; + simple_mtx_unlock(&store->mutex); +} + +static void pvr_bo_store_remove(struct pvr_bo_store *const store, + struct pvr_bo *const bo) +{ + if (likely(!store)) + return; + + simple_mtx_lock(&store->mutex); + rb_tree_remove(&store->tree, &entry_from_bo(bo)->node); + store->size--; + simple_mtx_unlock(&store->mutex); +} + +struct pvr_bo *pvr_bo_store_lookup(struct pvr_device *const device, + const pvr_dev_addr_t addr) +{ + struct pvr_bo_store *const store = device->bo_store; + struct rb_node *node; + + if (unlikely(!store)) + return NULL; + + simple_mtx_lock(&store->mutex); + node = rb_tree_search(&store->tree, &addr, pvr_bo_store_entry_cmp_key); + simple_mtx_unlock(&store->mutex); + + if (!node) + return NULL; + + return &entry_from_node(node)->bo; +} + static void pvr_bo_dump_line(struct pvr_dump_ctx *const ctx, const struct pvr_bo *bo, const uint32_t index, @@ -70,6 +220,32 @@ static void pvr_bo_dump_line(struct pvr_dump_ctx *const ctx, size); } +bool pvr_bo_store_dump(struct pvr_device *const device) +{ + struct pvr_bo_store *const store = device->bo_store; + const uint32_t nr_bos = store->size; + const uint32_t nr_bos_log10 = u32_dec_digits(nr_bos); + struct pvr_dump_ctx ctx; + uint32_t bo_idx = 0; + + if (unlikely(!store)) { + debug_warning("Requested BO store dump, but no BO store is present."); + return false; + } + + pvr_dump_begin(&ctx, stderr, "BO STORE", 1); + + pvr_dump_println(&ctx, "Dumping %" PRIu32 " BO store entries...", nr_bos); + + pvr_dump_indent(&ctx); + rb_tree_foreach_safe (struct pvr_bo_store_entry, entry, &store->tree, node) { + pvr_bo_dump_line(&ctx, &entry->bo, bo_idx++, nr_bos_log10); + } + pvr_dump_dedent(&ctx); + + return pvr_dump_end(&ctx); +} + void pvr_bo_list_dump(struct pvr_dump_ctx *const ctx, const struct list_head *const bo_list, const uint32_t nr_bos) @@ -102,6 +278,41 @@ static uint32_t pvr_bo_alloc_to_winsys_flags(uint64_t flags) return ws_flags; } +static inline struct pvr_bo * +pvr_bo_alloc_bo(const struct pvr_device *const device) +{ + size_t size; + void *ptr; + + if (unlikely(device->bo_store)) + size = sizeof(struct pvr_bo_store_entry); + else + size = sizeof(struct pvr_bo); + + ptr = + vk_alloc(&device->vk.alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (unlikely(!ptr)) + return NULL; + + if (unlikely(device->bo_store)) + return &((struct pvr_bo_store_entry *)ptr)->bo; + else + return (struct pvr_bo *)ptr; +} + +static inline void pvr_bo_free_bo(const struct pvr_device *const device, + struct pvr_bo *const bo) +{ + void *ptr; + + if (unlikely(device->bo_store)) + ptr = entry_from_bo(bo); + else + ptr = bo; + + vk_free(&device->vk.alloc, ptr); +} + /** * \brief Helper interface to allocate a GPU buffer and map it to both host and * device virtual memory. Host mapping is conditional and is controlled by @@ -131,10 +342,7 @@ VkResult pvr_bo_alloc(struct pvr_device *device, pvr_dev_addr_t addr; VkResult result; - pvr_bo = vk_alloc(&device->vk.alloc, - sizeof(*pvr_bo), - 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + pvr_bo = pvr_bo_alloc_bo(device); if (!pvr_bo) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -145,7 +353,7 @@ VkResult pvr_bo_alloc(struct pvr_device *device, ws_flags, &pvr_bo->bo); if (result != VK_SUCCESS) - goto err_vk_free; + goto err_free_bo; if (flags & PVR_BO_ALLOC_FLAG_CPU_MAPPED) { void *map = device->ws->ops->buffer_map(pvr_bo->bo); @@ -170,6 +378,7 @@ VkResult pvr_bo_alloc(struct pvr_device *device, goto err_heap_free; } + pvr_bo_store_insert(device->bo_store, pvr_bo); *pvr_bo_out = pvr_bo; return VK_SUCCESS; @@ -184,8 +393,8 @@ err_buffer_unmap: err_buffer_destroy: device->ws->ops->buffer_destroy(pvr_bo->bo); -err_vk_free: - vk_free(&device->vk.alloc, pvr_bo); +err_free_bo: + pvr_bo_free_bo(device, pvr_bo); return result; } @@ -260,6 +469,8 @@ void pvr_bo_free(struct pvr_device *device, struct pvr_bo *pvr_bo) vk_free(&device->vk.alloc, pvr_bo->bo->vbits); #endif /* defined(HAVE_VALGRIND) */ + pvr_bo_store_remove(device->bo_store, pvr_bo); + device->ws->ops->vma_unmap(pvr_bo->vma); device->ws->ops->heap_free(pvr_bo->vma); @@ -268,7 +479,7 @@ void pvr_bo_free(struct pvr_device *device, struct pvr_bo *pvr_bo) device->ws->ops->buffer_destroy(pvr_bo->bo); - vk_free(&device->vk.alloc, pvr_bo); + pvr_bo_free_bo(device, pvr_bo); } #if defined(HAVE_VALGRIND) diff --git a/src/imagination/vulkan/pvr_bo.h b/src/imagination/vulkan/pvr_bo.h index 7f870746e1b..b9c2b0c1276 100644 --- a/src/imagination/vulkan/pvr_bo.h +++ b/src/imagination/vulkan/pvr_bo.h @@ -28,6 +28,8 @@ #include #include +#include "pvr_types.h" +#include "pvr_winsys.h" #include "util/list.h" #include "util/macros.h" @@ -99,6 +101,14 @@ static ALWAYS_INLINE void *pvr_bo_cpu_map_unchanged(struct pvr_device *device, } #endif /* defined(HAVE_VALGRIND) */ +struct pvr_bo_store; + +VkResult pvr_bo_store_create(struct pvr_device *device); +void pvr_bo_store_destroy(struct pvr_device *device); +struct pvr_bo *pvr_bo_store_lookup(struct pvr_device *device, + pvr_dev_addr_t addr); +bool pvr_bo_store_dump(struct pvr_device *device); + void pvr_bo_list_dump(struct pvr_dump_ctx *ctx, const struct list_head *bo_list, uint32_t bo_size); diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c index 9717ff74130..7fd96ac5e16 100644 --- a/src/imagination/vulkan/pvr_device.c +++ b/src/imagination/vulkan/pvr_device.c @@ -2070,6 +2070,10 @@ VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice, device->ws->ops->get_heaps_info(device->ws, &device->heaps); + result = pvr_bo_store_create(device); + if (result != VK_SUCCESS) + goto err_pvr_winsys_destroy; + result = pvr_free_list_create(device, PVR_GLOBAL_FREE_LIST_INITIAL_SIZE, PVR_GLOBAL_FREE_LIST_MAX_SIZE, @@ -2078,7 +2082,7 @@ VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice, NULL /* parent_free_list */, &device->global_free_list); if (result != VK_SUCCESS) - goto err_pvr_winsys_destroy; + goto err_pvr_bo_store_destroy; result = pvr_device_init_nop_program(device); if (result != VK_SUCCESS) @@ -2138,6 +2142,9 @@ err_pvr_free_nop_program: err_pvr_free_list_destroy: pvr_free_list_destroy(device->global_free_list); +err_pvr_bo_store_destroy: + pvr_bo_store_destroy(device); + err_pvr_winsys_destroy: pvr_winsys_destroy(device->ws); @@ -2168,6 +2175,7 @@ void pvr_DestroyDevice(VkDevice _device, pvr_bo_free(device, device->nop_program.pds.pvr_bo); pvr_bo_free(device, device->nop_program.usc); pvr_free_list_destroy(device->global_free_list); + pvr_bo_store_destroy(device); pvr_winsys_destroy(device->ws); if (device->master_fd >= 0) diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index c6248f4e3b5..7cd98a9a458 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -218,6 +218,7 @@ enum pvr_deferred_cs_command_type { }; struct pvr_bo; +struct pvr_bo_store; struct pvr_compute_ctx; struct pvr_compute_pipeline; struct pvr_free_list; @@ -398,6 +399,8 @@ struct pvr_device { } static_clear_state; VkPhysicalDeviceFeatures features; + + struct pvr_bo_store *bo_store; }; struct pvr_device_memory { diff --git a/src/imagination/vulkan/winsys/pvr_winsys.h b/src/imagination/vulkan/winsys/pvr_winsys.h index aa62275fd6f..e7851025f4b 100644 --- a/src/imagination/vulkan/winsys/pvr_winsys.h +++ b/src/imagination/vulkan/winsys/pvr_winsys.h @@ -29,6 +29,7 @@ #define PVR_WINSYS_H #include +#include #include #include #include