diff --git a/src/microsoft/vulkan/dzn_cmd_buffer.c b/src/microsoft/vulkan/dzn_cmd_buffer.c index 865fcd6e607..39cf2a6d888 100644 --- a/src/microsoft/vulkan/dzn_cmd_buffer.c +++ b/src/microsoft/vulkan/dzn_cmd_buffer.c @@ -3302,17 +3302,10 @@ dzn_cmd_buffer_update_heaps(struct dzn_cmd_buffer *cmdbuf, uint32_t bindpoint) uint32_t dynamic_buffer_count = pipeline->sets[s].dynamic_buffer_count; for (uint32_t o = 0; o < dynamic_buffer_count; o++) { const struct dzn_buffer_desc *bdesc = &set->dynamic_buffers[o]; - struct dxil_spirv_bindless_entry *map_entry = &map[pipeline->sets[s].dynamic_buffer_heap_offsets[o].primary]; - if (*bdesc->bindless_descriptor_slot >= 0) { - uint32_t embedded_offset = (bdesc->offset / D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT) * D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT; - uint32_t additional_offset = bdesc->offset - embedded_offset; - map_entry->buffer_idx = *bdesc->bindless_descriptor_slot; - map_entry->buffer_offset = additional_offset + desc_state->sets[s].dynamic_offsets[o]; - } else { - map_entry->buffer_idx = bdesc->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC ? - bdesc->buffer->uav_bindless_slot : bdesc->buffer->cbv_bindless_slot; - map_entry->buffer_offset = bdesc->offset + desc_state->sets[s].dynamic_offsets[o]; - } + volatile struct dxil_spirv_bindless_entry *map_entry = &map[pipeline->sets[s].dynamic_buffer_heap_offsets[o].primary]; + struct dzn_buffer_desc bdesc_updated = *bdesc; + bdesc_updated.offset += cmdbuf->state.bindpoint[bindpoint].desc_state.sets[s].dynamic_offsets[o]; + dzn_buffer_get_bindless_buffer_descriptor(device, &bdesc_updated, map_entry); } } diff --git a/src/microsoft/vulkan/dzn_descriptor_set.c b/src/microsoft/vulkan/dzn_descriptor_set.c index 0e3a3cda273..6385e73f910 100644 --- a/src/microsoft/vulkan/dzn_descriptor_set.c +++ b/src/microsoft/vulkan/dzn_descriptor_set.c @@ -1084,7 +1084,7 @@ dzn_descriptor_heap_write_buffer_desc(struct dzn_device *device, } static void -dzn_bindless_descriptor_set_write_sampler_desc(struct dxil_spirv_bindless_entry *map, +dzn_bindless_descriptor_set_write_sampler_desc(volatile struct dxil_spirv_bindless_entry *map, uint32_t desc_offset, const struct dzn_sampler *sampler) { @@ -1092,7 +1092,7 @@ dzn_bindless_descriptor_set_write_sampler_desc(struct dxil_spirv_bindless_entry } static void -dzn_bindless_descriptor_set_write_image_view_desc(struct dxil_spirv_bindless_entry *map, +dzn_bindless_descriptor_set_write_image_view_desc(volatile struct dxil_spirv_bindless_entry *map, VkDescriptorType type, uint32_t desc_offset, const struct dzn_image_view *iview) @@ -1112,7 +1112,7 @@ dzn_bindless_descriptor_set_write_image_view_desc(struct dxil_spirv_bindless_ent } static void -dzn_bindless_descriptor_set_write_buffer_view_desc(struct dxil_spirv_bindless_entry *map, +dzn_bindless_descriptor_set_write_buffer_view_desc(volatile struct dxil_spirv_bindless_entry *map, VkDescriptorType type, uint32_t desc_offset, const struct dzn_buffer_view *bview) @@ -1129,69 +1129,160 @@ dzn_bindless_descriptor_set_write_buffer_view_desc(struct dxil_spirv_bindless_en } } -static bool -need_custom_buffer_descriptor(struct dzn_device *device, const struct dzn_buffer_desc *info) -{ - if (info->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || - info->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) { - uint64_t upper_bound = info->range == VK_WHOLE_SIZE ? info->buffer->size : - info->offset + info->range; - /* The buffer's default CBV only addresses the first 64KiB. If this view needs a higher - * upper bound, then we need a custom descriptor. */ - if (upper_bound > D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 4 * sizeof(float)) { - /* It's invalid to use WHOLE_SIZE if it would address more than 64KiB, so if they're - * using it on a buffer that large, it better have an offset. */ - assert(info->range != VK_WHOLE_SIZE || info->offset > 0); - return true; - } - } - /* Addressing the whole buffer, no custom descriptor needed. */ - if (info->range == VK_WHOLE_SIZE || - info->offset + info->range == info->buffer->size) - return false; - /* We need proper out-of-bounds behavior, we need a descriptor with the right size. */ - if (device->vk.enabled_features.robustBufferAccess || - device->vk.enabled_features.robustBufferAccess2) - return true; - /* We can just apply an offset in the shader */ - return false; -} - static void dzn_bindless_descriptor_set_write_buffer_desc(struct dzn_device *device, - struct dxil_spirv_bindless_entry *map, + volatile struct dxil_spirv_bindless_entry *map, uint32_t desc_offset, const struct dzn_buffer_desc *info) { - if (!need_custom_buffer_descriptor(device, info)) { - switch (info->type) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - map[desc_offset].buffer_idx = info->buffer->cbv_bindless_slot; - break; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - map[desc_offset].buffer_idx = info->buffer->uav_bindless_slot; - break; - default: - unreachable("Unexpected descriptor type"); - } - map[desc_offset].buffer_offset = info->offset; - if (*info->bindless_descriptor_slot >= 0) - dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, *info->bindless_descriptor_slot); - } else { - if (*info->bindless_descriptor_slot < 0) - *info->bindless_descriptor_slot = - dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - uint32_t offset = (info->offset / D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT) * D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT; + dzn_buffer_get_bindless_buffer_descriptor(device, info, &map[desc_offset]); +} - struct dzn_buffer_desc local_info = *info; - local_info.offset = offset; - info = &local_info; +static bool +need_custom_buffer_descriptor(struct dzn_device *device, const struct dzn_buffer_desc *info, + struct dzn_buffer_desc *out_desc) +{ + uint64_t upper_bound = info->range == VK_WHOLE_SIZE ? + info->buffer->size : + info->offset + info->range; + /* Addressing the whole buffer, no custom descriptor needed. */ + if (upper_bound == info->buffer->size) + return false; - dzn_descriptor_heap_write_buffer_desc(device, &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap, - *info->bindless_descriptor_slot, info->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info); - map[desc_offset].buffer_idx = *info->bindless_descriptor_slot; - map[desc_offset].buffer_offset = info->offset - offset; + *out_desc = *info; + uint32_t upper_bound_default_descriptor; + uint32_t size_align, offset_align; + /* Canonicalize descriptor types for hash/compare, and get size/align info */ + switch (info->type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + out_desc->type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + FALLTHROUGH; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + upper_bound_default_descriptor = D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * sizeof(float) * 4; + size_align = offset_align = D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + out_desc->type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + FALLTHROUGH; + default: + upper_bound_default_descriptor = UINT32_MAX; + offset_align = D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT; + size_align = 4; + break; } + + out_desc->range = ALIGN_POT(upper_bound, size_align); + if (out_desc->range <= upper_bound_default_descriptor) { + /* Use a larger descriptor with the hope that we'll be more likely + * to be able to re-use it. The shader is already doing the offset + * add, so there's not really a cost to putting a nonzero value there. */ + out_desc->offset = 0; + } else { + /* At least align-down the base offset to ensure that's a valid view to create */ + out_desc->offset = (out_desc->offset / offset_align) * offset_align; + out_desc->range -= out_desc->offset; + } + return true; +} + +static uint32_t +hash_buffer_desc(const void *data) +{ + const struct dzn_buffer_desc *bdesc = data; + /* Avoid any potential padding in the struct */ + uint32_t type_hash = _mesa_hash_data(&bdesc->type, sizeof(bdesc->type)); + return _mesa_hash_data_with_seed(&bdesc->range, sizeof(bdesc->range) * 2, type_hash); +} + +static bool +compare_buffer_desc(const void *_a, const void *_b) +{ + const struct dzn_buffer_desc *a = _a, *b = _b; + assert(a->buffer == b->buffer); + /* Avoid any potential padding in the struct */ + return a->type == b->type && + a->range == b->range && + a->offset == b->offset; +} + +static int +handle_custom_descriptor_cache(struct dzn_device *device, + const struct dzn_buffer_desc *stack_desc) +{ + /* Buffer lock is held */ + + /* Initialize hash map */ + if (!stack_desc->buffer->custom_views) + stack_desc->buffer->custom_views = _mesa_hash_table_create(NULL, hash_buffer_desc, compare_buffer_desc); + + if (!stack_desc->buffer->custom_views) + return -1; + + uint32_t hash = hash_buffer_desc(stack_desc); + struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(stack_desc->buffer->custom_views, hash, stack_desc); + if (entry) + return (int)(intptr_t)entry->data; + + int slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + if (slot < 0) + return slot; + + struct dzn_buffer_desc *key = malloc(sizeof(*stack_desc)); + if (!key) { + dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, slot); + return -1; + } + + *key = *stack_desc; + entry = _mesa_hash_table_insert_pre_hashed(stack_desc->buffer->custom_views, hash, key, (void *)(intptr_t)slot); + if (!entry) { + free(key); + dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, slot); + return -1; + } + + dzn_descriptor_heap_write_buffer_desc(device, &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap, + slot, key->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, key); + return slot; +} + +void +dzn_buffer_get_bindless_buffer_descriptor(struct dzn_device *device, + const struct dzn_buffer_desc *bdesc, + volatile struct dxil_spirv_bindless_entry *out) +{ + int slot; + uint32_t offset = bdesc->offset; + switch (bdesc->type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + slot = bdesc->buffer->cbv_bindless_slot; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + slot = bdesc->buffer->uav_bindless_slot; + break; + default: + unreachable("Unexpected descriptor type"); + } + + struct dzn_buffer_desc local_desc; + if (need_custom_buffer_descriptor(device, bdesc, &local_desc)) { + mtx_lock(&bdesc->buffer->bindless_view_lock); + + int new_slot = handle_custom_descriptor_cache(device, &local_desc); + if (new_slot >= 0) { + slot = new_slot; + offset = bdesc->offset - local_desc.offset; + } + /* In the case of cache failure, just use the base view and try + * shader-based offsetting, it'll probably still work in most cases. */ + + mtx_unlock(&bdesc->buffer->bindless_view_lock); + } + + out->buffer_idx = slot; + out->buffer_offset = offset; } void @@ -1369,23 +1460,6 @@ dzn_descriptor_set_write_dynamic_buffer_desc(struct dzn_device *device, if (dynamic_buffer_idx == ~0) return; - if (device->bindless) { - if (!need_custom_buffer_descriptor(device, info)) { - if (*info->bindless_descriptor_slot >= 0) - dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, *info->bindless_descriptor_slot); - } else { - if (*info->bindless_descriptor_slot < 0) - *info->bindless_descriptor_slot = - dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - uint32_t offset = (info->offset / D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT) * D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT; - - struct dzn_buffer_desc local_info = *info; - local_info.offset = offset; - dzn_descriptor_heap_write_buffer_desc(device, &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap, - *info->bindless_descriptor_slot, info->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &local_info); - } - } - assert(dynamic_buffer_idx < set->layout->dynamic_buffers.count); set->dynamic_buffers[dynamic_buffer_idx] = *info; } @@ -1585,15 +1659,6 @@ dzn_descriptor_set_init(struct dzn_descriptor_set *set, { vk_object_base_init(&device->vk, &set->base, VK_OBJECT_TYPE_DESCRIPTOR_SET); - if (device->bindless && layout->buffer_count) { - set->buffer_heap_slots = malloc(sizeof(int) * layout->buffer_count); - if (!set->buffer_heap_slots) { - vk_object_base_finish(&set->base); - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - } - memset(set->buffer_heap_slots, 0xff, sizeof(int) * layout->buffer_count); - } - set->pool = pool; set->layout = layout; @@ -1636,14 +1701,6 @@ static void dzn_descriptor_set_finish(struct dzn_descriptor_set *set) { vk_object_base_finish(&set->base); - if (set->buffer_heap_slots) { - struct dzn_device *device = container_of(set->base.device, struct dzn_device, vk); - for (uint32_t i = 0; i < set->layout->buffer_count; ++i) - dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, - set->buffer_heap_slots[i]); - } - free(set->buffer_heap_slots); - set->buffer_heap_slots = NULL; set->pool = NULL; set->layout = NULL; } @@ -2121,8 +2178,7 @@ dzn_descriptor_set_write(struct dzn_device *device, struct dzn_buffer_desc desc = { pDescriptorWrite->descriptorType, dzn_buffer_from_handle(binfo->buffer), - binfo->range, binfo->offset, - &set->buffer_heap_slots[dzn_descriptor_set_ptr_get_buffer_idx(set->layout, &ptr)] + binfo->range, binfo->offset }; if (desc.buffer) @@ -2141,8 +2197,7 @@ dzn_descriptor_set_write(struct dzn_device *device, struct dzn_buffer_desc desc = { pDescriptorWrite->descriptorType, dzn_buffer_from_handle(binfo->buffer), - binfo->range, binfo->offset, - &set->buffer_heap_slots[dzn_descriptor_set_ptr_get_buffer_idx(set->layout, &ptr)] + binfo->range, binfo->offset }; if (desc.buffer) @@ -2231,8 +2286,8 @@ dzn_descriptor_set_copy(struct dzn_device *device, dst_heap_offset += dst_set->heap_offsets[type]; if (device->bindless) { - memcpy(&dst_set->pool->bindless.map[dst_heap_offset], - &src_set->pool->bindless.map[src_heap_offset], + memcpy((void *)&dst_set->pool->bindless.map[dst_heap_offset], + (const void *)&src_set->pool->bindless.map[src_heap_offset], sizeof(src_set->pool->bindless.map[0]) * count); /* There's never a reason to loop and memcpy again for bindless */ break; @@ -2507,8 +2562,7 @@ dzn_UpdateDescriptorSetWithTemplate(VkDevice _device, struct dzn_buffer_desc desc = { entry->type, dzn_buffer_from_handle(info->buffer), - info->range, info->offset, - &set->buffer_heap_slots[entry->buffer_idx], + info->range, info->offset }; if (desc.buffer) @@ -2527,8 +2581,7 @@ dzn_UpdateDescriptorSetWithTemplate(VkDevice _device, struct dzn_buffer_desc desc = { entry->type, dzn_buffer_from_handle(info->buffer), - info->range, info->offset, - &set->buffer_heap_slots[entry->buffer_idx] + info->range, info->offset }; if (desc.buffer) diff --git a/src/microsoft/vulkan/dzn_device.c b/src/microsoft/vulkan/dzn_device.c index d988d50d220..fedf1147fa1 100644 --- a/src/microsoft/vulkan/dzn_device.c +++ b/src/microsoft/vulkan/dzn_device.c @@ -2776,6 +2776,13 @@ dzn_buffer_destroy(struct dzn_buffer *buf, const VkAllocationCallbacks *pAllocat dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, buf->cbv_bindless_slot); dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, buf->uav_bindless_slot); + if (buf->custom_views) { + hash_table_foreach(buf->custom_views, entry) { + free((void *)entry->key); + dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, (int)(intptr_t)entry->data); + } + _mesa_hash_table_destroy(buf->custom_views, NULL); + } vk_object_base_finish(&buf->base); vk_free2(&device->vk.alloc, pAllocator, buf); @@ -2852,6 +2859,9 @@ dzn_buffer_create(struct dzn_device *device, } } + if (device->bindless) + mtx_init(&buf->bindless_view_lock, mtx_plain); + *out = dzn_buffer_to_handle(buf); return VK_SUCCESS; } diff --git a/src/microsoft/vulkan/dzn_private.h b/src/microsoft/vulkan/dzn_private.h index 011ce1f07eb..50193c5f80b 100644 --- a/src/microsoft/vulkan/dzn_private.h +++ b/src/microsoft/vulkan/dzn_private.h @@ -420,12 +420,9 @@ struct dzn_buffer_view; struct dzn_buffer_desc { VkDescriptorType type; - const struct dzn_buffer *buffer; + struct dzn_buffer *buffer; VkDeviceSize range; VkDeviceSize offset; - /* Points to an array owned by the descriptor set. - * Value is -1 if the buffer's pre-allocated descriptor is used. */ - int *bindless_descriptor_slot; }; #define MAX_DESCS_PER_SAMPLER_HEAP D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE @@ -719,7 +716,7 @@ struct dzn_descriptor_pool { struct dzn_descriptor_heap heaps[NUM_POOL_TYPES]; struct { ID3D12Resource *buf; - struct dxil_spirv_bindless_entry *map; + volatile struct dxil_spirv_bindless_entry *map; uint64_t gpuva; } bindless; }; @@ -788,8 +785,6 @@ struct dzn_descriptor_set { uint32_t heap_sizes[NUM_POOL_TYPES]; /* Layout (and pool) is null for a freed descriptor set */ const struct dzn_descriptor_set_layout *layout; - /* When bindless, stores dynamically-allocated heap slots for buffers */ - int *buffer_heap_slots; }; struct dzn_pipeline_layout_set { @@ -1146,10 +1141,17 @@ struct dzn_buffer { D3D12_BARRIER_ACCESS valid_access; D3D12_GPU_VIRTUAL_ADDRESS gpuva; + mtx_t bindless_view_lock; int cbv_bindless_slot; int uav_bindless_slot; + struct hash_table *custom_views; }; +void +dzn_buffer_get_bindless_buffer_descriptor(struct dzn_device *device, + const struct dzn_buffer_desc *bdesc, + volatile struct dxil_spirv_bindless_entry *out); + DXGI_FORMAT dzn_buffer_get_dxgi_format(VkFormat format);