anv: Switch to the new common pipeline cache

This patch is intended to be somewhat minimal.  There's a lot of cleanup
work that can be done but we'll leave that to later patches.

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13184>
This commit is contained in:
Jason Ekstrand
2021-10-04 13:38:19 -05:00
committed by Marge Bot
parent c551f6c4df
commit 7f1e82306c
6 changed files with 188 additions and 636 deletions

View File

@@ -31,11 +31,8 @@ lookup_blorp_shader(struct blorp_batch *batch,
struct blorp_context *blorp = batch->blorp;
struct anv_device *device = blorp->driver_ctx;
/* The default cache must be a real cache */
assert(device->default_pipeline_cache.cache);
struct anv_shader_bin *bin =
anv_pipeline_cache_search(&device->default_pipeline_cache, key, key_size);
anv_pipeline_cache_search(device->blorp_cache, key, key_size);
if (!bin)
return false;
@@ -61,16 +58,13 @@ upload_blorp_shader(struct blorp_batch *batch, uint32_t stage,
struct blorp_context *blorp = batch->blorp;
struct anv_device *device = blorp->driver_ctx;
/* The blorp cache must be a real cache */
assert(device->default_pipeline_cache.cache);
struct anv_pipeline_bind_map bind_map = {
.surface_count = 0,
.sampler_count = 0,
};
struct anv_shader_bin *bin =
anv_pipeline_cache_upload_kernel(&device->default_pipeline_cache, stage,
anv_pipeline_cache_upload_kernel(device->blorp_cache, stage,
key, key_size, kernel, kernel_size,
prog_data, prog_data_size,
NULL, 0, NULL, &bind_map);
@@ -89,9 +83,23 @@ upload_blorp_shader(struct blorp_batch *batch, uint32_t stage,
return true;
}
void
bool
anv_device_init_blorp(struct anv_device *device)
{
/* BLORP needs its own pipeline cache because, unlike the rest of ANV, it
* won't work at all without the cache. It depends on it for shaders to
* remain resident while it runs. Therefore, we need a special cache just
* for BLORP that's forced to always be enabled.
*/
struct vk_pipeline_cache_create_info pcc_info = {
.force_enable = true,
};
device->blorp_cache =
vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
if (device->blorp_cache == NULL)
return false;
const struct blorp_config config = {
.use_mesh_shading = device->physical->vk.supported_extensions.NV_mesh_shader,
};
@@ -125,11 +133,13 @@ anv_device_init_blorp(struct anv_device *device)
default:
unreachable("Unknown hardware generation");
}
return true;
}
void
anv_device_finish_blorp(struct anv_device *device)
{
vk_pipeline_cache_destroy(device->blorp_cache, NULL);
blorp_finish(&device->blorp);
}

View File

@@ -609,9 +609,7 @@ anv_physical_device_init_disk_cache(struct anv_physical_device *device)
const uint64_t driver_flags =
brw_get_compiler_config_value(device->compiler);
device->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
#else
device->disk_cache = NULL;
device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
#endif
}
@@ -619,8 +617,10 @@ static void
anv_physical_device_free_disk_cache(struct anv_physical_device *device)
{
#ifdef ENABLE_SHADER_CACHE
if (device->disk_cache)
disk_cache_destroy(device->disk_cache);
if (device->vk.disk_cache) {
disk_cache_destroy(device->vk.disk_cache);
device->vk.disk_cache = NULL;
}
#else
assert(device->disk_cache == NULL);
#endif
@@ -925,6 +925,8 @@ anv_physical_device_try_create(struct anv_instance *instance,
assert(st_idx <= ARRAY_SIZE(device->sync_types));
device->vk.supported_sync_types = device->sync_types;
device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
device->always_use_bindless =
env_var_as_boolean("ANV_ALWAYS_BINDLESS", false);
@@ -1134,9 +1136,6 @@ VkResult anv_CreateInstance(
instance->physical_devices_enumerated = false;
list_inithead(&instance->physical_devices);
instance->pipeline_cache_enabled =
env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
anv_init_dri_options(instance);
@@ -3436,14 +3435,22 @@ VkResult anv_CreateDevice(
if (result != VK_SUCCESS)
goto fail_trivial_batch_bo_and_scratch_pool;
anv_pipeline_cache_init(&device->default_pipeline_cache, device,
true /* cache_enabled */, false /* external_sync */);
struct vk_pipeline_cache_create_info pcc_info = { };
device->default_pipeline_cache =
vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
if (!device->default_pipeline_cache) {
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail_trivial_batch_bo_and_scratch_pool;
}
result = anv_device_init_rt_shaders(device);
if (result != VK_SUCCESS)
goto fail_default_pipeline_cache;
anv_device_init_blorp(device);
if (!anv_device_init_blorp(device)) {
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail_rt_shaders;
}
anv_device_init_border_colors(device);
@@ -3455,8 +3462,10 @@ VkResult anv_CreateDevice(
return VK_SUCCESS;
fail_rt_shaders:
anv_device_finish_rt_shaders(device);
fail_default_pipeline_cache:
anv_pipeline_cache_finish(&device->default_pipeline_cache);
vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
fail_trivial_batch_bo_and_scratch_pool:
anv_scratch_pool_finish(device, &device->scratch_pool);
fail_trivial_batch:
@@ -3528,7 +3537,7 @@ void anv_DestroyDevice(
anv_device_finish_rt_shaders(device);
anv_pipeline_cache_finish(&device->default_pipeline_cache);
vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
#ifdef HAVE_VALGRIND
/* We only need to free these to prevent valgrind errors. The backing

View File

@@ -697,7 +697,7 @@ anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *p
static nir_shader *
anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
void *mem_ctx,
struct anv_pipeline_stage *stage)
{
@@ -1418,7 +1418,7 @@ anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
static VkResult
anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *info,
const VkPipelineRenderingCreateInfo *rendering_info)
{
@@ -1586,7 +1586,8 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
*/
assert(found < __builtin_popcount(pipeline->active_stages));
vk_perf(VK_LOG_OBJS(&cache->base),
vk_perf(VK_LOG_OBJS(cache ? &cache->base :
&pipeline->base.device->vk.base),
"Found a partial pipeline in the cache. This is "
"most likely caused by an incomplete pipeline cache "
"import or export");
@@ -1903,7 +1904,7 @@ fail:
VkResult
anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const VkComputePipelineCreateInfo *info,
const struct vk_shader_module *module,
const char *entrypoint,
@@ -2395,7 +2396,7 @@ vk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT *
VkResult
anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
struct anv_device *device,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const VkPipelineRenderingCreateInfo *rendering_info,
const VkAllocationCallbacks *alloc)
@@ -2568,7 +2569,7 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
static VkResult
compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
nir_shader *nir,
struct anv_pipeline_stage *stage,
struct anv_shader_bin **shader_out,
@@ -2777,7 +2778,7 @@ anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
static bool
anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const VkRayTracingPipelineCreateInfoKHR *info,
struct anv_pipeline_stage *stages,
uint32_t *stack_max)
@@ -2820,7 +2821,7 @@ anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,
static VkResult
anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const VkRayTracingPipelineCreateInfoKHR *info)
{
const struct intel_device_info *devinfo = &pipeline->base.device->info;
@@ -3040,7 +3041,7 @@ anv_device_init_rt_shaders(struct anv_device *device)
},
};
device->rt_trampoline =
anv_device_search_for_kernel(device, &device->default_pipeline_cache,
anv_device_search_for_kernel(device, device->default_pipeline_cache,
&trampoline_key, sizeof(trampoline_key),
&cache_hit);
if (device->rt_trampoline == NULL) {
@@ -3070,7 +3071,7 @@ anv_device_init_rt_shaders(struct anv_device *device)
brw_compile_cs(device->physical->compiler, tmp_ctx, &params);
device->rt_trampoline =
anv_device_upload_kernel(device, &device->default_pipeline_cache,
anv_device_upload_kernel(device, device->default_pipeline_cache,
MESA_SHADER_COMPUTE,
&trampoline_key, sizeof(trampoline_key),
tramp_data,
@@ -3092,7 +3093,7 @@ anv_device_init_rt_shaders(struct anv_device *device)
.name = "rt-trivial-ret",
};
device->rt_trivial_return =
anv_device_search_for_kernel(device, &device->default_pipeline_cache,
anv_device_search_for_kernel(device, device->default_pipeline_cache,
&return_key, sizeof(return_key),
&cache_hit);
if (device->rt_trivial_return == NULL) {
@@ -3118,7 +3119,7 @@ anv_device_init_rt_shaders(struct anv_device *device)
brw_compile_bs(device->physical->compiler, tmp_ctx, &params);
device->rt_trivial_return =
anv_device_upload_kernel(device, &device->default_pipeline_cache,
anv_device_upload_kernel(device, device->default_pipeline_cache,
MESA_SHADER_CALLABLE,
&return_key, sizeof(return_key),
return_data, return_prog_data.base.program_size,
@@ -3148,7 +3149,7 @@ anv_device_finish_rt_shaders(struct anv_device *device)
VkResult
anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
struct anv_device *device,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *alloc)
{

View File

@@ -31,6 +31,39 @@
#include "nir/nir_xfb_info.h"
#include "vulkan/util/vk_util.h"
static bool
anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
struct blob *blob);
struct vk_pipeline_cache_object *
anv_shader_bin_deserialize(struct vk_device *device,
const void *key_data, size_t key_size,
struct blob_reader *blob);
static void
anv_shader_bin_destroy(struct vk_pipeline_cache_object *object)
{
struct anv_device *device =
container_of(object->device, struct anv_device, vk);
struct anv_shader_bin *shader =
container_of(object, struct anv_shader_bin, base);
anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
vk_pipeline_cache_object_finish(&shader->base);
vk_free(&device->vk.alloc, shader);
}
static const struct vk_pipeline_cache_object_ops anv_shader_bin_ops = {
.serialize = anv_shader_bin_serialize,
.deserialize = anv_shader_bin_deserialize,
.destroy = anv_shader_bin_destroy,
};
const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2] = {
&anv_shader_bin_ops,
NULL
};
struct anv_shader_bin *
anv_shader_bin_create(struct anv_device *device,
gl_shader_stage stage,
@@ -44,8 +77,7 @@ anv_shader_bin_create(struct anv_device *device,
{
VK_MULTIALLOC(ma);
VK_MULTIALLOC_DECL(&ma, struct anv_shader_bin, shader, 1);
VK_MULTIALLOC_DECL_SIZE(&ma, struct anv_shader_bin_key, key,
sizeof(*key) + key_size);
VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size);
VK_MULTIALLOC_DECL_SIZE(&ma, struct brw_stage_prog_data, prog_data,
prog_data_size);
VK_MULTIALLOC_DECL(&ma, struct brw_shader_reloc, prog_data_relocs,
@@ -65,14 +97,12 @@ anv_shader_bin_create(struct anv_device *device,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
return NULL;
shader->ref_cnt = 1;
memcpy(obj_key_data, key_data, key_size);
vk_pipeline_cache_object_init(&device->vk, &shader->base,
&anv_shader_bin_ops, obj_key_data, key_size);
shader->stage = stage;
key->size = key_size;
memcpy(key->data, key_data, key_size);
shader->key = key;
shader->kernel =
anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
memcpy(shader->kernel.map, kernel_data, kernel_size);
@@ -149,23 +179,14 @@ anv_shader_bin_create(struct anv_device *device,
return shader;
}
void
anv_shader_bin_destroy(struct anv_device *device,
struct anv_shader_bin *shader)
{
assert(shader->ref_cnt == 0);
anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
vk_free(&device->vk.alloc, shader);
}
static bool
anv_shader_bin_write_to_blob(const struct anv_shader_bin *shader,
anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
struct blob *blob)
{
blob_write_uint32(blob, shader->stage);
struct anv_shader_bin *shader =
container_of(object, struct anv_shader_bin, base);
blob_write_uint32(blob, shader->key->size);
blob_write_bytes(blob, shader->key->data, shader->key->size);
blob_write_uint32(blob, shader->stage);
blob_write_uint32(blob, shader->kernel_size);
blob_write_bytes(blob, shader->kernel.map, shader->kernel_size);
@@ -209,14 +230,15 @@ anv_shader_bin_write_to_blob(const struct anv_shader_bin *shader,
return !blob->out_of_memory;
}
static struct anv_shader_bin *
anv_shader_bin_create_from_blob(struct anv_device *device,
struct vk_pipeline_cache_object *
anv_shader_bin_deserialize(struct vk_device *vk_device,
const void *key_data, size_t key_size,
struct blob_reader *blob)
{
gl_shader_stage stage = blob_read_uint32(blob);
struct anv_device *device =
container_of(vk_device, struct anv_device, vk);
uint32_t key_size = blob_read_uint32(blob);
const void *key_data = blob_read_bytes(blob, key_size);
gl_shader_stage stage = blob_read_uint32(blob);
uint32_t kernel_size = blob_read_uint32(blob);
const void *kernel_data = blob_read_bytes(blob, kernel_size);
@@ -259,205 +281,33 @@ anv_shader_bin_create_from_blob(struct anv_device *device,
if (blob->overrun)
return NULL;
return anv_shader_bin_create(device, stage,
struct anv_shader_bin *shader =
anv_shader_bin_create(device, stage,
key_data, key_size,
kernel_data, kernel_size,
&prog_data.base, prog_data_size,
stats, num_stats, xfb_info, &bind_map);
}
/* Remaining work:
*
* - Compact binding table layout so it's tight and not dependent on
* descriptor set layout.
*
* - Review prog_data struct for size and cacheability: struct
* brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
* bit quantities etc; use bit fields for all bools, eg dual_src_blend.
*/
static uint32_t
shader_bin_key_hash_func(const void *void_key)
{
const struct anv_shader_bin_key *key = void_key;
return _mesa_hash_data(key->data, key->size);
}
static bool
shader_bin_key_compare_func(const void *void_a, const void *void_b)
{
const struct anv_shader_bin_key *a = void_a, *b = void_b;
if (a->size != b->size)
return false;
return memcmp(a->data, b->data, a->size) == 0;
}
static uint32_t
sha1_hash_func(const void *sha1)
{
return _mesa_hash_data(sha1, 20);
}
static bool
sha1_compare_func(const void *sha1_a, const void *sha1_b)
{
return memcmp(sha1_a, sha1_b, 20) == 0;
}
void
anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
struct anv_device *device,
bool cache_enabled,
bool external_sync)
{
vk_object_base_init(&device->vk, &cache->base,
VK_OBJECT_TYPE_PIPELINE_CACHE);
cache->device = device;
cache->external_sync = external_sync;
pthread_mutex_init(&cache->mutex, NULL);
if (cache_enabled) {
cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
shader_bin_key_compare_func);
cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
sha1_compare_func);
} else {
cache->cache = NULL;
cache->nir_cache = NULL;
}
}
void
anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
{
pthread_mutex_destroy(&cache->mutex);
if (cache->cache) {
/* This is a bit unfortunate. In order to keep things from randomly
* going away, the shader cache has to hold a reference to all shader
* binaries it contains. We unref them when we destroy the cache.
*/
hash_table_foreach(cache->cache, entry)
anv_shader_bin_unref(cache->device, entry->data);
_mesa_hash_table_destroy(cache->cache, NULL);
}
if (cache->nir_cache) {
hash_table_foreach(cache->nir_cache, entry)
ralloc_free(entry->data);
_mesa_hash_table_destroy(cache->nir_cache, NULL);
}
vk_object_base_finish(&cache->base);
}
static struct anv_shader_bin *
anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
const void *key_data, uint32_t key_size)
{
uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
struct anv_shader_bin_key *key = (void *)vla;
key->size = key_size;
memcpy(key->data, key_data, key_size);
struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
if (entry)
return entry->data;
else
if (shader == NULL)
return NULL;
}
static inline void
anv_cache_lock(struct anv_pipeline_cache *cache)
{
if (!cache->external_sync)
pthread_mutex_lock(&cache->mutex);
}
static inline void
anv_cache_unlock(struct anv_pipeline_cache *cache)
{
if (!cache->external_sync)
pthread_mutex_unlock(&cache->mutex);
return &shader->base;
}
struct anv_shader_bin *
anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
anv_pipeline_cache_search(struct vk_pipeline_cache *cache,
const void *key_data, uint32_t key_size)
{
if (!cache->cache)
struct vk_pipeline_cache_object *object =
vk_pipeline_cache_lookup_object(cache, key_data, key_size,
&anv_shader_bin_ops, NULL);
if (object == NULL)
return NULL;
anv_cache_lock(cache);
struct anv_shader_bin *shader =
anv_pipeline_cache_search_locked(cache, key_data, key_size);
anv_cache_unlock(cache);
/* We increment refcount before handing it to the caller */
if (shader)
anv_shader_bin_ref(shader);
return shader;
}
static void
anv_pipeline_cache_add_shader_bin(struct anv_pipeline_cache *cache,
struct anv_shader_bin *bin)
{
if (!cache->cache)
return;
anv_cache_lock(cache);
struct hash_entry *entry = _mesa_hash_table_search(cache->cache, bin->key);
if (entry == NULL) {
/* Take a reference for the cache */
anv_shader_bin_ref(bin);
_mesa_hash_table_insert(cache->cache, bin->key, bin);
}
anv_cache_unlock(cache);
}
static struct anv_shader_bin *
anv_pipeline_cache_add_shader_locked(struct anv_pipeline_cache *cache,
gl_shader_stage stage,
const void *key_data, uint32_t key_size,
const void *kernel_data,
uint32_t kernel_size,
const struct brw_stage_prog_data *prog_data,
uint32_t prog_data_size,
const struct brw_compile_stats *stats,
uint32_t num_stats,
const nir_xfb_info *xfb_info,
const struct anv_pipeline_bind_map *bind_map)
{
struct anv_shader_bin *shader =
anv_pipeline_cache_search_locked(cache, key_data, key_size);
if (shader)
return shader;
struct anv_shader_bin *bin =
anv_shader_bin_create(cache->device, stage,
key_data, key_size,
kernel_data, kernel_size,
prog_data, prog_data_size,
stats, num_stats, xfb_info, bind_map);
if (!bin)
return NULL;
_mesa_hash_table_insert(cache->cache, bin->key, bin);
return bin;
return container_of(object, struct anv_shader_bin, base);
}
struct anv_shader_bin *
anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
anv_pipeline_cache_upload_kernel(struct vk_pipeline_cache *cache,
gl_shader_stage stage,
const void *key_data, uint32_t key_size,
const void *kernel_data, uint32_t kernel_size,
@@ -468,257 +318,48 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
const nir_xfb_info *xfb_info,
const struct anv_pipeline_bind_map *bind_map)
{
if (cache->cache) {
anv_cache_lock(cache);
struct anv_device *device =
container_of(cache->base.device, struct anv_device, vk);
struct anv_shader_bin *bin =
anv_pipeline_cache_add_shader_locked(cache, stage, key_data, key_size,
kernel_data, kernel_size,
prog_data, prog_data_size,
stats, num_stats,
xfb_info, bind_map);
anv_cache_unlock(cache);
/* We increment refcount before handing it to the caller */
if (bin)
anv_shader_bin_ref(bin);
return bin;
} else {
/* In this case, we're not caching it so the caller owns it entirely */
return anv_shader_bin_create(cache->device, stage,
struct anv_shader_bin *shader =
anv_shader_bin_create(device, stage,
key_data, key_size,
kernel_data, kernel_size,
prog_data, prog_data_size,
stats, num_stats,
xfb_info, bind_map);
}
}
if (shader == NULL)
return NULL;
static void
anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
const void *data, size_t size)
{
struct anv_device *device = cache->device;
struct anv_physical_device *pdevice = device->physical;
struct vk_pipeline_cache_object *cached =
vk_pipeline_cache_add_object(cache, &shader->base);
if (cache->cache == NULL)
return;
struct blob_reader blob;
blob_reader_init(&blob, data, size);
struct vk_pipeline_cache_header header;
blob_copy_bytes(&blob, &header, sizeof(header));
uint32_t count = blob_read_uint32(&blob);
if (blob.overrun)
return;
if (header.header_size < sizeof(header))
return;
if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
return;
if (header.vendor_id != 0x8086)
return;
if (header.device_id != device->info.pci_device_id)
return;
if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
return;
for (uint32_t i = 0; i < count; i++) {
struct anv_shader_bin *bin =
anv_shader_bin_create_from_blob(device, &blob);
if (!bin)
break;
_mesa_hash_table_insert(cache->cache, bin->key, bin);
}
}
VkResult anv_CreatePipelineCache(
VkDevice _device,
const VkPipelineCacheCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkPipelineCache* pPipelineCache)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_pipeline_cache *cache;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
cache = vk_alloc2(&device->vk.alloc, pAllocator,
sizeof(*cache), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (cache == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
anv_pipeline_cache_init(cache, device,
device->physical->instance->pipeline_cache_enabled,
pCreateInfo->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT);
if (pCreateInfo->initialDataSize > 0)
anv_pipeline_cache_load(cache,
pCreateInfo->pInitialData,
pCreateInfo->initialDataSize);
*pPipelineCache = anv_pipeline_cache_to_handle(cache);
return VK_SUCCESS;
}
void anv_DestroyPipelineCache(
VkDevice _device,
VkPipelineCache _cache,
const VkAllocationCallbacks* pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
if (!cache)
return;
anv_pipeline_cache_finish(cache);
vk_free2(&device->vk.alloc, pAllocator, cache);
}
VkResult anv_GetPipelineCacheData(
VkDevice _device,
VkPipelineCache _cache,
size_t* pDataSize,
void* pData)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
struct blob blob;
if (pData) {
blob_init_fixed(&blob, pData, *pDataSize);
} else {
blob_init_fixed(&blob, NULL, SIZE_MAX);
}
struct vk_pipeline_cache_header header = {
.header_size = sizeof(struct vk_pipeline_cache_header),
.header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
.vendor_id = 0x8086,
.device_id = device->info.pci_device_id,
};
memcpy(header.uuid, device->physical->pipeline_cache_uuid, VK_UUID_SIZE);
blob_write_bytes(&blob, &header, sizeof(header));
uint32_t count = 0;
intptr_t count_offset = blob_reserve_uint32(&blob);
if (count_offset < 0) {
*pDataSize = 0;
blob_finish(&blob);
return VK_INCOMPLETE;
}
VkResult result = VK_SUCCESS;
if (cache->cache) {
hash_table_foreach(cache->cache, entry) {
struct anv_shader_bin *shader = entry->data;
size_t save_size = blob.size;
if (!anv_shader_bin_write_to_blob(shader, &blob)) {
/* If it fails reset to the previous size and bail */
blob.size = save_size;
result = VK_INCOMPLETE;
break;
}
count++;
}
}
blob_overwrite_uint32(&blob, count_offset, count);
*pDataSize = blob.size;
blob_finish(&blob);
return result;
}
VkResult anv_MergePipelineCaches(
VkDevice _device,
VkPipelineCache destCache,
uint32_t srcCacheCount,
const VkPipelineCache* pSrcCaches)
{
ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
if (!dst->cache)
return VK_SUCCESS;
for (uint32_t i = 0; i < srcCacheCount; i++) {
ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
if (!src->cache)
continue;
hash_table_foreach(src->cache, entry) {
struct anv_shader_bin *bin = entry->data;
assert(bin);
if (_mesa_hash_table_search(dst->cache, bin->key))
continue;
anv_shader_bin_ref(bin);
_mesa_hash_table_insert(dst->cache, bin->key, bin);
}
}
return VK_SUCCESS;
return container_of(cached, struct anv_shader_bin, base);
}
struct anv_shader_bin *
anv_device_search_for_kernel(struct anv_device *device,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const void *key_data, uint32_t key_size,
bool *user_cache_hit)
{
struct anv_shader_bin *bin;
*user_cache_hit = false;
if (cache) {
bin = anv_pipeline_cache_search(cache, key_data, key_size);
if (bin) {
*user_cache_hit = cache != &device->default_pipeline_cache;
return bin;
}
}
#ifdef ENABLE_SHADER_CACHE
struct disk_cache *disk_cache = device->physical->disk_cache;
if (disk_cache && device->physical->instance->pipeline_cache_enabled) {
cache_key cache_key;
disk_cache_compute_key(disk_cache, key_data, key_size, cache_key);
size_t buffer_size;
uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size);
if (buffer) {
struct blob_reader blob;
blob_reader_init(&blob, buffer, buffer_size);
bin = anv_shader_bin_create_from_blob(device, &blob);
free(buffer);
if (bin) {
if (cache)
anv_pipeline_cache_add_shader_bin(cache, bin);
return bin;
}
}
}
#endif
if (cache == NULL)
return NULL;
struct vk_pipeline_cache_object *object =
vk_pipeline_cache_lookup_object(cache, key_data, key_size,
&anv_shader_bin_ops, user_cache_hit);
if (object == NULL)
return NULL;
return container_of(object, struct anv_shader_bin, base);
}
struct anv_shader_bin *
anv_device_upload_kernel(struct anv_device *device,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
gl_shader_stage stage,
const void *key_data, uint32_t key_size,
const void *kernel_data, uint32_t kernel_size,
@@ -747,109 +388,33 @@ anv_device_upload_kernel(struct anv_device *device,
if (bin == NULL)
return NULL;
#ifdef ENABLE_SHADER_CACHE
struct disk_cache *disk_cache = device->physical->disk_cache;
if (disk_cache) {
struct blob binary;
blob_init(&binary);
if (anv_shader_bin_write_to_blob(bin, &binary)) {
cache_key cache_key;
disk_cache_compute_key(disk_cache, key_data, key_size, cache_key);
disk_cache_put(disk_cache, cache_key, binary.data, binary.size, NULL);
}
blob_finish(&binary);
}
#endif
return bin;
}
struct serialized_nir {
unsigned char sha1_key[20];
size_t size;
char data[0];
};
#define SHA1_KEY_SIZE 20
struct nir_shader *
anv_device_search_for_nir(struct anv_device *device,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const nir_shader_compiler_options *nir_options,
unsigned char sha1_key[20],
unsigned char sha1_key[SHA1_KEY_SIZE],
void *mem_ctx)
{
if (cache && cache->nir_cache) {
const struct serialized_nir *snir = NULL;
if (cache == NULL)
return false;
anv_cache_lock(cache);
struct hash_entry *entry =
_mesa_hash_table_search(cache->nir_cache, sha1_key);
if (entry)
snir = entry->data;
anv_cache_unlock(cache);
if (snir) {
struct blob_reader blob;
blob_reader_init(&blob, snir->data, snir->size);
nir_shader *nir = nir_deserialize(mem_ctx, nir_options, &blob);
if (blob.overrun) {
ralloc_free(nir);
} else {
return nir;
}
}
}
return NULL;
return vk_pipeline_cache_lookup_nir(cache, sha1_key, SHA1_KEY_SIZE,
nir_options, NULL, mem_ctx);
}
void
anv_device_upload_nir(struct anv_device *device,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const struct nir_shader *nir,
unsigned char sha1_key[20])
unsigned char sha1_key[SHA1_KEY_SIZE])
{
if (cache && cache->nir_cache) {
anv_cache_lock(cache);
struct hash_entry *entry =
_mesa_hash_table_search(cache->nir_cache, sha1_key);
anv_cache_unlock(cache);
if (entry)
if (cache == NULL)
return;
struct blob blob;
blob_init(&blob);
nir_serialize(&blob, nir, false);
if (blob.out_of_memory) {
blob_finish(&blob);
return;
}
anv_cache_lock(cache);
/* Because ralloc isn't thread-safe, we have to do all this inside the
* lock. We could unlock for the big memcpy but it's probably not worth
* the hassle.
*/
entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
if (entry) {
blob_finish(&blob);
anv_cache_unlock(cache);
return;
}
struct serialized_nir *snir =
ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
memcpy(snir->sha1_key, sha1_key, 20);
snir->size = blob.size;
memcpy(snir->data, blob.data, blob.size);
blob_finish(&blob);
_mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
anv_cache_unlock(cache);
}
vk_pipeline_cache_add_nir(cache, sha1_key, SHA1_KEY_SIZE, nir);
}

View File

@@ -76,6 +76,7 @@
#include "vk_framebuffer.h"
#include "vk_image.h"
#include "vk_instance.h"
#include "vk_pipeline_cache.h"
#include "vk_physical_device.h"
#include "vk_shader_module.h"
#include "vk_sync.h"
@@ -1047,8 +1048,6 @@ struct anv_physical_device {
struct vk_sync_timeline_type sync_timeline_type;
const struct vk_sync_type * sync_types[4];
struct disk_cache * disk_cache;
struct wsi_device wsi_device;
int local_fd;
bool has_local;
@@ -1078,8 +1077,6 @@ struct anv_instance {
bool physical_devices_enumerated;
struct list_head physical_devices;
bool pipeline_cache_enabled;
struct driOptionCache dri_options;
struct driOptionCache available_dri_options;
};
@@ -1104,32 +1101,16 @@ struct anv_queue {
struct intel_ds_queue * ds;
};
struct anv_pipeline_cache {
struct vk_object_base base;
struct anv_device * device;
pthread_mutex_t mutex;
struct hash_table * nir_cache;
struct hash_table * cache;
bool external_sync;
};
struct nir_xfb_info;
struct anv_pipeline_bind_map;
void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
struct anv_device *device,
bool cache_enabled,
bool external_sync);
void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
extern const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2];
struct anv_shader_bin *
anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
anv_pipeline_cache_search(struct vk_pipeline_cache *cache,
const void *key, uint32_t key_size);
struct anv_shader_bin *
anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
anv_pipeline_cache_upload_kernel(struct vk_pipeline_cache *cache,
gl_shader_stage stage,
const void *key_data, uint32_t key_size,
const void *kernel_data, uint32_t kernel_size,
@@ -1142,13 +1123,13 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
struct anv_shader_bin *
anv_device_search_for_kernel(struct anv_device *device,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const void *key_data, uint32_t key_size,
bool *user_cache_bit);
struct anv_shader_bin *
anv_device_upload_kernel(struct anv_device *device,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
gl_shader_stage stage,
const void *key_data, uint32_t key_size,
const void *kernel_data, uint32_t kernel_size,
@@ -1164,14 +1145,14 @@ struct nir_shader_compiler_options;
struct nir_shader *
anv_device_search_for_nir(struct anv_device *device,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const struct nir_shader_compiler_options *nir_options,
unsigned char sha1_key[20],
void *mem_ctx);
void
anv_device_upload_nir(struct anv_device *device,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const struct nir_shader *nir,
unsigned char sha1_key[20]);
@@ -1221,7 +1202,8 @@ struct anv_device {
struct anv_bo * trivial_batch_bo;
struct anv_state null_surface_state;
struct anv_pipeline_cache default_pipeline_cache;
struct vk_pipeline_cache * default_pipeline_cache;
struct vk_pipeline_cache * blorp_cache;
struct blorp_context blorp;
struct anv_state border_colors;
@@ -1342,7 +1324,7 @@ anv_mocs(const struct anv_device *device,
return isl_mocs(&device->isl_dev, usage, bo && bo->is_external);
}
void anv_device_init_blorp(struct anv_device *device);
bool anv_device_init_blorp(struct anv_device *device);
void anv_device_finish_blorp(struct anv_device *device);
enum anv_bo_alloc_flags {
@@ -3251,18 +3233,11 @@ struct anv_pipeline_bind_map {
struct anv_push_range push_ranges[4];
};
struct anv_shader_bin_key {
uint32_t size;
uint8_t data[0];
};
struct anv_shader_bin {
uint32_t ref_cnt;
struct vk_pipeline_cache_object base;
gl_shader_stage stage;
const struct anv_shader_bin_key *key;
struct anv_state kernel;
uint32_t kernel_size;
@@ -3288,22 +3263,16 @@ anv_shader_bin_create(struct anv_device *device,
const struct nir_xfb_info *xfb_info,
const struct anv_pipeline_bind_map *bind_map);
void
anv_shader_bin_destroy(struct anv_device *device, struct anv_shader_bin *shader);
static inline void
anv_shader_bin_ref(struct anv_shader_bin *shader)
{
assert(shader && shader->ref_cnt >= 1);
p_atomic_inc(&shader->ref_cnt);
vk_pipeline_cache_object_ref(&shader->base);
}
static inline void
anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
{
assert(shader && shader->ref_cnt >= 1);
if (p_atomic_dec_zero(&shader->ref_cnt))
anv_shader_bin_destroy(device, shader);
vk_pipeline_cache_object_unref(&shader->base);
}
#define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \
@@ -3611,14 +3580,14 @@ anv_pipeline_finish(struct anv_pipeline *pipeline,
VkResult
anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const VkPipelineRenderingCreateInfo *rendering_info,
const VkAllocationCallbacks *alloc);
VkResult
anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const VkComputePipelineCreateInfo *info,
const struct vk_shader_module *module,
const char *entrypoint,
@@ -3627,7 +3596,7 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
VkResult
anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
struct anv_device *device,
struct anv_pipeline_cache *cache,
struct vk_pipeline_cache *cache,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *alloc);
@@ -4595,8 +4564,6 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, vk.base, VkImageView,
VK_OBJECT_TYPE_IMAGE_VIEW);
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, base, VkPipelineCache,
VK_OBJECT_TYPE_PIPELINE_CACHE)
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
VK_OBJECT_TYPE_PIPELINE)
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,

View File

@@ -2780,7 +2780,7 @@ emit_mesh_state(struct anv_graphics_pipeline *pipeline)
static VkResult
genX(graphics_pipeline_create)(
VkDevice _device,
struct anv_pipeline_cache * cache,
struct vk_pipeline_cache * cache,
const VkGraphicsPipelineCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipeline)
@@ -2792,8 +2792,8 @@ genX(graphics_pipeline_create)(
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
/* Use the default pipeline cache if none is specified */
if (cache == NULL && device->physical->instance->pipeline_cache_enabled)
cache = &device->default_pipeline_cache;
if (cache == NULL)
cache = device->default_pipeline_cache;
pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@@ -3088,7 +3088,7 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
static VkResult
compute_pipeline_create(
VkDevice _device,
struct anv_pipeline_cache * cache,
struct vk_pipeline_cache * cache,
const VkComputePipelineCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipeline)
@@ -3100,8 +3100,8 @@ compute_pipeline_create(
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
/* Use the default pipeline cache if none is specified */
if (cache == NULL && device->physical->instance->pipeline_cache_enabled)
cache = &device->default_pipeline_cache;
if (cache == NULL)
cache = device->default_pipeline_cache;
pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@@ -3147,7 +3147,7 @@ VkResult genX(CreateGraphicsPipelines)(
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipelines)
{
ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache);
VK_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
VkResult result = VK_SUCCESS;
@@ -3186,7 +3186,7 @@ VkResult genX(CreateComputePipelines)(
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipelines)
{
ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache);
VK_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
VkResult result = VK_SUCCESS;
@@ -3234,7 +3234,7 @@ assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR* pCreateInfo
static VkResult
ray_tracing_pipeline_create(
VkDevice _device,
struct anv_pipeline_cache * cache,
struct vk_pipeline_cache * cache,
const VkRayTracingPipelineCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipeline)
@@ -3245,8 +3245,8 @@ ray_tracing_pipeline_create(
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR);
/* Use the default pipeline cache if none is specified */
if (cache == NULL && device->physical->instance->pipeline_cache_enabled)
cache = &device->default_pipeline_cache;
if (cache == NULL)
cache = device->default_pipeline_cache;
VK_MULTIALLOC(ma);
VK_MULTIALLOC_DECL(&ma, struct anv_ray_tracing_pipeline, pipeline, 1);
@@ -3370,7 +3370,7 @@ genX(CreateRayTracingPipelinesKHR)(
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipelines)
{
ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache);
VK_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
VkResult result = VK_SUCCESS;