venus: support caching image memory requirements

Similar idea to buffer memory requirements cache but CreateImage has
many more params that may affect the memory requirements.

Instead of a sparse array, generate a SHA1 hash of all the relevant
VkImageCreateInfo params including relevant pNext structures and use
part of the hash as a key to a hash table that stores the cache entries.

Signed-off-by: Juston Li <justonli@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26118>
This commit is contained in:
Juston Li
2023-11-07 16:44:52 -08:00
committed by Marge Bot
parent a32d76b545
commit b51ff22fbe
6 changed files with 250 additions and 3 deletions

View File

@@ -52,6 +52,7 @@ static const struct debug_control vn_perf_options[] = {
{ "no_async_mem_alloc", VN_PERF_NO_ASYNC_MEM_ALLOC },
{ "no_tiled_wsi_image", VN_PERF_NO_TILED_WSI_IMAGE },
{ "no_multi_ring", VN_PERF_NO_MULTI_RING },
{ "no_async_image_create", VN_PERF_NO_ASYNC_IMAGE_CREATE },
{ NULL, 0 },
/* clang-format on */
};

View File

@@ -125,6 +125,7 @@ enum vn_perf {
VN_PERF_NO_ASYNC_MEM_ALLOC = 1ull << 9,
VN_PERF_NO_TILED_WSI_IMAGE = 1ull << 10,
VN_PERF_NO_MULTI_RING = 1ull << 11,
VN_PERF_NO_ASYNC_IMAGE_CREATE = 1ull << 12,
};
typedef uint64_t vn_object_id;

View File

@@ -536,6 +536,7 @@ vn_device_init(struct vn_device *dev,
goto out_cmd_pools_fini;
vn_buffer_reqs_cache_init(dev);
vn_image_reqs_cache_init(dev);
/* This is a WA to allow fossilize replay to detect if the host side shader
* cache is no longer up to date.
@@ -626,6 +627,7 @@ vn_DestroyDevice(VkDevice device, const VkAllocationCallbacks *pAllocator)
if (!dev)
return;
vn_image_reqs_cache_fini(dev);
vn_buffer_reqs_cache_fini(dev);
for (uint32_t i = 0; i < dev->queue_count; i++)

View File

@@ -16,6 +16,7 @@
#include "vn_buffer.h"
#include "vn_device_memory.h"
#include "vn_feedback.h"
#include "vn_image.h"
struct vn_device_memory_report {
PFN_vkDeviceMemoryReportCallbackEXT callback;
@@ -55,6 +56,7 @@ struct vn_device {
uint32_t queue_count;
struct vn_buffer_reqs_cache buffer_reqs_cache;
struct vn_image_reqs_cache image_reqs_cache;
};
VK_DEFINE_HANDLE_CASTS(vn_device,
base.base.base,

View File

@@ -35,6 +35,213 @@ vn_image_get_plane_count(const VkImageCreateInfo *create_info)
return vk_format_get_plane_count(create_info->format);
}
static void
vn_image_cache_debug_dump(struct vn_image_reqs_cache *cache)
{
vn_log(NULL, "dumping image reqs cache statistics");
vn_log(NULL, " hit %u\n", cache->debug.cache_hit_count);
vn_log(NULL, " miss %u\n", cache->debug.cache_miss_count);
vn_log(NULL, " skip %u\n", cache->debug.cache_skip_count);
}
static uint32_t
vn_image_cache_key_hash_function(const void *key)
{
return _mesa_hash_data(key, SHA1_DIGEST_LENGTH);
}
static bool
vn_image_cache_key_equal_function(const void *void_a, const void *void_b)
{
const struct vn_image_reqs_cache_entry *a = void_a, *b = void_b;
return memcmp(a, b, SHA1_DIGEST_LENGTH) == 0;
}
static bool
vn_image_get_image_reqs_key(struct vn_device *dev,
const VkImageCreateInfo *create_info,
uint8_t *key)
{
struct mesa_sha1 sha1_ctx;
if (!dev->image_reqs_cache.ht)
return false;
_mesa_sha1_init(&sha1_ctx);
/* Hash relevant fields in the pNext chain */
vk_foreach_struct_const(src, create_info->pNext) {
switch (src->sType) {
case VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO: {
struct VkExternalMemoryImageCreateInfo *ext_mem =
(struct VkExternalMemoryImageCreateInfo *)src;
_mesa_sha1_update(&sha1_ctx, &ext_mem->handleTypes,
sizeof(VkExternalMemoryHandleTypeFlags));
break;
}
case VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO: {
struct VkImageFormatListCreateInfo *format_list =
(struct VkImageFormatListCreateInfo *)src;
_mesa_sha1_update(&sha1_ctx, format_list->pViewFormats,
sizeof(VkFormat) * format_list->viewFormatCount);
break;
}
case VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT: {
struct VkImageDrmFormatModifierListCreateInfoEXT *format_mod_list =
(struct VkImageDrmFormatModifierListCreateInfoEXT *)src;
_mesa_sha1_update(
&sha1_ctx, format_mod_list->pDrmFormatModifiers,
sizeof(uint64_t) * format_mod_list->drmFormatModifierCount);
break;
}
case VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT: {
struct VkImageDrmFormatModifierExplicitCreateInfoEXT
*format_mod_explicit =
(struct VkImageDrmFormatModifierExplicitCreateInfoEXT *)src;
_mesa_sha1_update(&sha1_ctx, &format_mod_explicit->drmFormatModifier,
sizeof(uint64_t));
_mesa_sha1_update(
&sha1_ctx, format_mod_explicit->pPlaneLayouts,
sizeof(VkSubresourceLayout) *
format_mod_explicit->drmFormatModifierPlaneCount);
break;
}
case VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO: {
struct VkImageStencilUsageCreateInfo *stencil_usage =
(struct VkImageStencilUsageCreateInfo *)src;
_mesa_sha1_update(&sha1_ctx, &stencil_usage->stencilUsage,
sizeof(VkImageUsageFlags));
break;
}
default:
/* Skip cache for unsupported pNext */
dev->image_reqs_cache.debug.cache_skip_count++;
return false;
}
}
/* Hash contingous block of VkImageCreateInfo starting with
* VkImageCreateInfo->flags and ending with VkImageCreateInfo->sharingMode
*
* There's no padding in involved in this hash block so no concern for C
* enum sizes or alignment.
*/
static const size_t create_image_hash_block_size =
offsetof(VkImageCreateInfo, queueFamilyIndexCount) -
offsetof(VkImageCreateInfo, flags);
_mesa_sha1_update(&sha1_ctx, &create_info->flags,
create_image_hash_block_size);
/* Follow pointer and hash pQueueFamilyIndices separately.
* pQueueFamilyIndices is ignored if sharingMode is not
* VK_SHARING_MODE_CONCURRENT
*/
if (create_info->sharingMode == VK_SHARING_MODE_CONCURRENT) {
_mesa_sha1_update(
&sha1_ctx, create_info->pQueueFamilyIndices,
sizeof(uint32_t) * create_info->queueFamilyIndexCount);
}
_mesa_sha1_update(&sha1_ctx, &create_info->initialLayout,
sizeof(create_info->initialLayout));
_mesa_sha1_final(&sha1_ctx, key);
return true;
}
void
vn_image_reqs_cache_init(struct vn_device *dev)
{
struct vn_image_reqs_cache *cache = &dev->image_reqs_cache;
if (VN_PERF(NO_ASYNC_IMAGE_CREATE))
return;
cache->ht = _mesa_hash_table_create(NULL, vn_image_cache_key_hash_function,
vn_image_cache_key_equal_function);
if (!cache->ht)
return;
simple_mtx_init(&cache->mutex, mtx_plain);
}
void
vn_image_reqs_cache_fini(struct vn_device *dev)
{
const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
struct vn_image_reqs_cache *cache = &dev->image_reqs_cache;
if (!cache->ht)
return;
hash_table_foreach(cache->ht, hash_entry) {
struct vn_image_reqs_cache_entry *cache_entry = hash_entry->data;
vk_free(alloc, cache_entry);
}
_mesa_hash_table_destroy(cache->ht, NULL);
simple_mtx_destroy(&cache->mutex);
if (VN_DEBUG(CACHE))
vn_image_cache_debug_dump(cache);
}
static bool
vn_image_init_reqs_from_cache(struct vn_device *dev,
struct vn_image *img,
uint8_t *key)
{
struct vn_image_reqs_cache *cache = &dev->image_reqs_cache;
assert(cache->ht);
simple_mtx_lock(&cache->mutex);
struct hash_entry *hash_entry = _mesa_hash_table_search(cache->ht, key);
if (hash_entry) {
struct vn_image_reqs_cache_entry *cache_entry = hash_entry->data;
for (uint32_t i = 0; i < cache_entry->plane_count; i++)
img->requirements[i] = cache_entry->requirements[i];
p_atomic_inc(&cache->debug.cache_hit_count);
} else {
p_atomic_inc(&cache->debug.cache_miss_count);
}
simple_mtx_unlock(&cache->mutex);
return !!hash_entry;
}
static void
vn_image_store_reqs_in_cache(struct vn_device *dev,
uint8_t *key,
uint32_t plane_count,
struct vn_image_memory_requirements *requirements)
{
const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
struct vn_image_reqs_cache *cache = &dev->image_reqs_cache;
struct vn_image_reqs_cache_entry *cache_entry;
assert(cache->ht);
cache_entry = vk_zalloc(alloc, sizeof(*cache_entry), VN_DEFAULT_ALIGN,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!cache_entry)
return;
for (uint32_t i = 0; i < plane_count; i++)
cache_entry->requirements[i] = requirements[i];
memcpy(cache_entry->key, key, SHA1_DIGEST_LENGTH);
cache_entry->plane_count = plane_count;
simple_mtx_lock(&cache->mutex);
if (!_mesa_hash_table_search(cache->ht, cache_entry->key)) {
_mesa_hash_table_insert(dev->image_reqs_cache.ht, cache_entry->key,
cache_entry);
}
simple_mtx_unlock(&cache->mutex);
}
static void
vn_image_init_memory_requirements(struct vn_image *img,
struct vn_device *dev,
@@ -42,7 +249,6 @@ vn_image_init_memory_requirements(struct vn_image *img,
{
assert(plane_count <= ARRAY_SIZE(img->requirements));
/* TODO add a per-device cache for the requirements */
for (uint32_t i = 0; i < plane_count; i++) {
img->requirements[i].memory.sType =
VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
@@ -186,7 +392,16 @@ vn_image_init(struct vn_device *dev,
img->sharing_mode = create_info->sharingMode;
/* TODO async */
/* Check if mem reqs in cache. If found, make async call */
uint8_t key[SHA1_DIGEST_LENGTH] = { 0 };
const bool cacheable = vn_image_get_image_reqs_key(dev, create_info, key);
if (cacheable && vn_image_init_reqs_from_cache(dev, img, key)) {
vn_async_vkCreateImage(dev->primary_ring, device, create_info, NULL,
&image);
return VK_SUCCESS;
}
result = vn_call_vkCreateImage(dev->primary_ring, device, create_info,
NULL, &image);
if (result != VK_SUCCESS)
@@ -195,6 +410,9 @@ vn_image_init(struct vn_device *dev,
const uint32_t plane_count = vn_image_get_plane_count(create_info);
vn_image_init_memory_requirements(img, dev, plane_count);
if (cacheable)
vn_image_store_reqs_in_cache(dev, key, plane_count, img->requirements);
return VK_SUCCESS;
}
@@ -828,7 +1046,7 @@ vn_GetDeviceImageMemoryRequirements(
{
struct vn_device *dev = vn_device_from_handle(device);
/* TODO per-device cache */
/* TODO integrate image memory requirements cache */
vn_call_vkGetDeviceImageMemoryRequirements(dev->primary_ring, device,
pInfo, pMemoryRequirements);
}

View File

@@ -23,6 +23,23 @@ struct vn_image_memory_requirements {
VkMemoryDedicatedRequirements dedicated;
};
struct vn_image_reqs_cache_entry {
struct vn_image_memory_requirements requirements[4];
uint8_t plane_count;
uint8_t key[SHA1_DIGEST_LENGTH];
};
struct vn_image_reqs_cache {
struct hash_table *ht;
simple_mtx_t mutex;
struct {
uint32_t cache_hit_count;
uint32_t cache_miss_count;
uint32_t cache_skip_count;
} debug;
};
struct vn_image_create_deferred_info {
VkImageCreateInfo create;
VkImageFormatListCreateInfo list;
@@ -108,4 +125,10 @@ vn_image_init_deferred(struct vn_device *dev,
const VkImageCreateInfo *create_info,
struct vn_image *img);
void
vn_image_reqs_cache_init(struct vn_device *dev);
void
vn_image_reqs_cache_fini(struct vn_device *dev);
#endif /* VN_IMAGE_H */