diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c index bcdf55fb21a..1ec1383eb2f 100644 --- a/src/amd/vulkan/layers/radv_sqtt_layer.c +++ b/src/amd/vulkan/layers/radv_sqtt_layer.c @@ -333,7 +333,7 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) marker.device_id_low = device_id; marker.device_id_high = device_id >> 32; marker.queue = cmd_buffer->qf; - marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT; + marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT; if (cmd_buffer->qf == RADV_QUEUE_GENERAL) marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT; diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index ae41d17475e..14f26b8f2e9 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -173,6 +173,9 @@ radv_physical_device_init_queue_table(struct radv_physical_device *pdevice) idx++; } } + + pdevice->vk_queue_to_radv[idx++] = RADV_QUEUE_SPARSE; + pdevice->num_queues = idx; } @@ -2064,7 +2067,7 @@ static void radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice, uint32_t *pCount, VkQueueFamilyProperties **pQueueFamilyProperties) { - int num_queue_families = 1; + int num_queue_families = 2; int idx; if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 && !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) @@ -2086,8 +2089,7 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd idx = 0; if (*pCount >= 1) { *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){ - .queueFlags = - VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT, + .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, .queueCount = 1, .timestampValidBits = 64, .minImageTransferGranularity = (VkExtent3D){1, 1, 1}, @@ -2099,7 +2101,7 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) { if (*pCount > idx) { *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){ - .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT, + .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, .queueCount = pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues, .timestampValidBits = 64, .minImageTransferGranularity = (VkExtent3D){1, 1, 1}, @@ -2108,6 +2110,16 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd } } + if (*pCount > idx) { + *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){ + .queueFlags = VK_QUEUE_SPARSE_BINDING_BIT, + .queueCount = 1, + .timestampValidBits = 64, + .minImageTransferGranularity = (VkExtent3D){1, 1, 1}, + }; + idx++; + } + if (pdevice->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE) { if (pdevice->rad_info.ip[pdevice->vid_decode_ip].num_queues > 0) { if (*pCount > idx) { @@ -2145,9 +2157,10 @@ radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, ui &pQueueFamilyProperties[0].queueFamilyProperties, &pQueueFamilyProperties[1].queueFamilyProperties, &pQueueFamilyProperties[2].queueFamilyProperties, + &pQueueFamilyProperties[3].queueFamilyProperties, }; radv_get_physical_device_queue_family_properties(pdevice, pCount, properties); - assert(*pCount <= 3); + assert(*pCount <= 4); for (uint32_t i = 0; i < *pCount; i++) { vk_foreach_struct (ext, pQueueFamilyProperties[i].pNext) { diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c index abdfae500fe..85aa03c2125 100644 --- a/src/amd/vulkan/radv_queue.c +++ b/src/amd/vulkan/radv_queue.c @@ -1682,18 +1682,52 @@ fail: return result; } +static VkResult +radv_queue_sparse_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission) +{ + struct radv_queue *queue = (struct radv_queue *)vqueue; + struct radv_device *device = queue->device; + VkResult result; + + result = radv_queue_submit_bind_sparse_memory(device, submission); + if (result != VK_SUCCESS) + goto fail; + + /* We do a CPU wait here, in part to avoid more winsys mechanisms. In the likely kernel explicit + * sync mechanism, we'd need to do a CPU wait anyway. Haven't seen this be a perf issue yet, but + * we have to make sure the queue always has its submission thread enabled. */ + result = vk_sync_wait_many(&device->vk, submission->wait_count, submission->waits, 0, UINT64_MAX); + if (result != VK_SUCCESS) + goto fail; + + /* Ignore all the commandbuffers. They're necessarily empty anyway. */ + + for (unsigned i = 0; i < submission->signal_count; ++i) { + result = vk_sync_signal(&device->vk, submission->signals[i].sync, submission->signals[i].signal_value); + if (result != VK_SUCCESS) + goto fail; + } + +fail: + if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) { + /* When something bad happened during the submission, such as + * an out of memory issue, it might be hard to recover from + * this inconsistent state. To avoid this sort of problem, we + * assume that we are in a really bad situation and return + * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt + * to submit the same job again to this device. + */ + result = vk_device_set_lost(&queue->device->vk, "vkQueueSubmit() failed"); + } + return result; +} + static VkResult radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission) { struct radv_queue *queue = (struct radv_queue *)vqueue; VkResult result; - radv_rmv_log_submit(queue->device, radv_queue_ring(queue)); - - result = radv_queue_submit_bind_sparse_memory(queue->device, submission); - if (result != VK_SUCCESS) - goto fail; - if (!submission->command_buffer_count && !submission->wait_count && !submission->signal_count) return VK_SUCCESS; @@ -1703,7 +1737,6 @@ radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission) result = radv_queue_submit_normal(queue, submission); } -fail: if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) { /* When something bad happened during the submission, such as * an out of memory issue, it might be hard to recover from @@ -1760,7 +1793,12 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx, goto fail; } - queue->vk.driver_submit = radv_queue_submit; + if (queue->state.qf == RADV_QUEUE_SPARSE) { + queue->vk.driver_submit = radv_queue_sparse_submit; + vk_queue_enable_submit_thread(&queue->vk); + } else { + queue->vk.driver_submit = radv_queue_submit; + } return VK_SUCCESS; fail: vk_queue_finish(&queue->vk);