radv: Move sparse binding into a dedicated queue.

1) This better reflects the reality that we only have one timeline
   of sparse binding changes.

2) Allows making it a threaded queue from the start in prep of
   explicit sync stuff.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16935>
This commit is contained in:
Bas Nieuwenhuizen
2022-06-08 02:18:37 +02:00
committed by Marge Bot
parent 00faefa08e
commit 748b7f80ef
3 changed files with 65 additions and 14 deletions

View File

@@ -333,7 +333,7 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
marker.device_id_low = device_id;
marker.device_id_high = device_id >> 32;
marker.queue = cmd_buffer->qf;
marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT;
marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;

View File

@@ -173,6 +173,9 @@ radv_physical_device_init_queue_table(struct radv_physical_device *pdevice)
idx++;
}
}
pdevice->vk_queue_to_radv[idx++] = RADV_QUEUE_SPARSE;
pdevice->num_queues = idx;
}
@@ -2064,7 +2067,7 @@ static void
radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice, uint32_t *pCount,
VkQueueFamilyProperties **pQueueFamilyProperties)
{
int num_queue_families = 1;
int num_queue_families = 2;
int idx;
if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 &&
!(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
@@ -2086,8 +2089,7 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd
idx = 0;
if (*pCount >= 1) {
*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
.queueFlags =
VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
.queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
.queueCount = 1,
.timestampValidBits = 64,
.minImageTransferGranularity = (VkExtent3D){1, 1, 1},
@@ -2099,7 +2101,7 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd
!(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
if (*pCount > idx) {
*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
.queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
.queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
.queueCount = pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues,
.timestampValidBits = 64,
.minImageTransferGranularity = (VkExtent3D){1, 1, 1},
@@ -2108,6 +2110,16 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd
}
}
if (*pCount > idx) {
*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
.queueFlags = VK_QUEUE_SPARSE_BINDING_BIT,
.queueCount = 1,
.timestampValidBits = 64,
.minImageTransferGranularity = (VkExtent3D){1, 1, 1},
};
idx++;
}
if (pdevice->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE) {
if (pdevice->rad_info.ip[pdevice->vid_decode_ip].num_queues > 0) {
if (*pCount > idx) {
@@ -2145,9 +2157,10 @@ radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, ui
&pQueueFamilyProperties[0].queueFamilyProperties,
&pQueueFamilyProperties[1].queueFamilyProperties,
&pQueueFamilyProperties[2].queueFamilyProperties,
&pQueueFamilyProperties[3].queueFamilyProperties,
};
radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
assert(*pCount <= 3);
assert(*pCount <= 4);
for (uint32_t i = 0; i < *pCount; i++) {
vk_foreach_struct (ext, pQueueFamilyProperties[i].pNext) {

View File

@@ -1682,18 +1682,52 @@ fail:
return result;
}
static VkResult
radv_queue_sparse_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission)
{
struct radv_queue *queue = (struct radv_queue *)vqueue;
struct radv_device *device = queue->device;
VkResult result;
result = radv_queue_submit_bind_sparse_memory(device, submission);
if (result != VK_SUCCESS)
goto fail;
/* We do a CPU wait here, in part to avoid more winsys mechanisms. In the likely kernel explicit
* sync mechanism, we'd need to do a CPU wait anyway. Haven't seen this be a perf issue yet, but
* we have to make sure the queue always has its submission thread enabled. */
result = vk_sync_wait_many(&device->vk, submission->wait_count, submission->waits, 0, UINT64_MAX);
if (result != VK_SUCCESS)
goto fail;
/* Ignore all the commandbuffers. They're necessarily empty anyway. */
for (unsigned i = 0; i < submission->signal_count; ++i) {
result = vk_sync_signal(&device->vk, submission->signals[i].sync, submission->signals[i].signal_value);
if (result != VK_SUCCESS)
goto fail;
}
fail:
if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
/* When something bad happened during the submission, such as
* an out of memory issue, it might be hard to recover from
* this inconsistent state. To avoid this sort of problem, we
* assume that we are in a really bad situation and return
* VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
* to submit the same job again to this device.
*/
result = vk_device_set_lost(&queue->device->vk, "vkQueueSubmit() failed");
}
return result;
}
static VkResult
radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission)
{
struct radv_queue *queue = (struct radv_queue *)vqueue;
VkResult result;
radv_rmv_log_submit(queue->device, radv_queue_ring(queue));
result = radv_queue_submit_bind_sparse_memory(queue->device, submission);
if (result != VK_SUCCESS)
goto fail;
if (!submission->command_buffer_count && !submission->wait_count && !submission->signal_count)
return VK_SUCCESS;
@@ -1703,7 +1737,6 @@ radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission)
result = radv_queue_submit_normal(queue, submission);
}
fail:
if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
/* When something bad happened during the submission, such as
* an out of memory issue, it might be hard to recover from
@@ -1760,7 +1793,12 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
goto fail;
}
queue->vk.driver_submit = radv_queue_submit;
if (queue->state.qf == RADV_QUEUE_SPARSE) {
queue->vk.driver_submit = radv_queue_sparse_submit;
vk_queue_enable_submit_thread(&queue->vk);
} else {
queue->vk.driver_submit = radv_queue_submit;
}
return VK_SUCCESS;
fail:
vk_queue_finish(&queue->vk);