radv: Move sparse binding into a dedicated queue.
1) This better reflects the reality that we only have one timeline of sparse binding changes. 2) Allows making it a threaded queue from the start in prep of explicit sync stuff. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16935>
This commit is contained in:

committed by
Marge Bot

parent
00faefa08e
commit
748b7f80ef
@@ -333,7 +333,7 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
|
||||
marker.device_id_low = device_id;
|
||||
marker.device_id_high = device_id >> 32;
|
||||
marker.queue = cmd_buffer->qf;
|
||||
marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT;
|
||||
marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
|
||||
|
||||
if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
|
||||
marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
|
||||
|
@@ -173,6 +173,9 @@ radv_physical_device_init_queue_table(struct radv_physical_device *pdevice)
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
|
||||
pdevice->vk_queue_to_radv[idx++] = RADV_QUEUE_SPARSE;
|
||||
|
||||
pdevice->num_queues = idx;
|
||||
}
|
||||
|
||||
@@ -2064,7 +2067,7 @@ static void
|
||||
radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice, uint32_t *pCount,
|
||||
VkQueueFamilyProperties **pQueueFamilyProperties)
|
||||
{
|
||||
int num_queue_families = 1;
|
||||
int num_queue_families = 2;
|
||||
int idx;
|
||||
if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 &&
|
||||
!(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
|
||||
@@ -2086,8 +2089,7 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd
|
||||
idx = 0;
|
||||
if (*pCount >= 1) {
|
||||
*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
|
||||
.queueFlags =
|
||||
VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
|
||||
.queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
|
||||
.queueCount = 1,
|
||||
.timestampValidBits = 64,
|
||||
.minImageTransferGranularity = (VkExtent3D){1, 1, 1},
|
||||
@@ -2099,7 +2101,7 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd
|
||||
!(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
|
||||
if (*pCount > idx) {
|
||||
*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
|
||||
.queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
|
||||
.queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
|
||||
.queueCount = pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues,
|
||||
.timestampValidBits = 64,
|
||||
.minImageTransferGranularity = (VkExtent3D){1, 1, 1},
|
||||
@@ -2108,6 +2110,16 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd
|
||||
}
|
||||
}
|
||||
|
||||
if (*pCount > idx) {
|
||||
*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
|
||||
.queueFlags = VK_QUEUE_SPARSE_BINDING_BIT,
|
||||
.queueCount = 1,
|
||||
.timestampValidBits = 64,
|
||||
.minImageTransferGranularity = (VkExtent3D){1, 1, 1},
|
||||
};
|
||||
idx++;
|
||||
}
|
||||
|
||||
if (pdevice->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE) {
|
||||
if (pdevice->rad_info.ip[pdevice->vid_decode_ip].num_queues > 0) {
|
||||
if (*pCount > idx) {
|
||||
@@ -2145,9 +2157,10 @@ radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, ui
|
||||
&pQueueFamilyProperties[0].queueFamilyProperties,
|
||||
&pQueueFamilyProperties[1].queueFamilyProperties,
|
||||
&pQueueFamilyProperties[2].queueFamilyProperties,
|
||||
&pQueueFamilyProperties[3].queueFamilyProperties,
|
||||
};
|
||||
radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
|
||||
assert(*pCount <= 3);
|
||||
assert(*pCount <= 4);
|
||||
|
||||
for (uint32_t i = 0; i < *pCount; i++) {
|
||||
vk_foreach_struct (ext, pQueueFamilyProperties[i].pNext) {
|
||||
|
@@ -1682,18 +1682,52 @@ fail:
|
||||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
radv_queue_sparse_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission)
|
||||
{
|
||||
struct radv_queue *queue = (struct radv_queue *)vqueue;
|
||||
struct radv_device *device = queue->device;
|
||||
VkResult result;
|
||||
|
||||
result = radv_queue_submit_bind_sparse_memory(device, submission);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
/* We do a CPU wait here, in part to avoid more winsys mechanisms. In the likely kernel explicit
|
||||
* sync mechanism, we'd need to do a CPU wait anyway. Haven't seen this be a perf issue yet, but
|
||||
* we have to make sure the queue always has its submission thread enabled. */
|
||||
result = vk_sync_wait_many(&device->vk, submission->wait_count, submission->waits, 0, UINT64_MAX);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
/* Ignore all the commandbuffers. They're necessarily empty anyway. */
|
||||
|
||||
for (unsigned i = 0; i < submission->signal_count; ++i) {
|
||||
result = vk_sync_signal(&device->vk, submission->signals[i].sync, submission->signals[i].signal_value);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
fail:
|
||||
if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
|
||||
/* When something bad happened during the submission, such as
|
||||
* an out of memory issue, it might be hard to recover from
|
||||
* this inconsistent state. To avoid this sort of problem, we
|
||||
* assume that we are in a really bad situation and return
|
||||
* VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
|
||||
* to submit the same job again to this device.
|
||||
*/
|
||||
result = vk_device_set_lost(&queue->device->vk, "vkQueueSubmit() failed");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission)
|
||||
{
|
||||
struct radv_queue *queue = (struct radv_queue *)vqueue;
|
||||
VkResult result;
|
||||
|
||||
radv_rmv_log_submit(queue->device, radv_queue_ring(queue));
|
||||
|
||||
result = radv_queue_submit_bind_sparse_memory(queue->device, submission);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
if (!submission->command_buffer_count && !submission->wait_count && !submission->signal_count)
|
||||
return VK_SUCCESS;
|
||||
|
||||
@@ -1703,7 +1737,6 @@ radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission)
|
||||
result = radv_queue_submit_normal(queue, submission);
|
||||
}
|
||||
|
||||
fail:
|
||||
if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
|
||||
/* When something bad happened during the submission, such as
|
||||
* an out of memory issue, it might be hard to recover from
|
||||
@@ -1760,7 +1793,12 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
queue->vk.driver_submit = radv_queue_submit;
|
||||
if (queue->state.qf == RADV_QUEUE_SPARSE) {
|
||||
queue->vk.driver_submit = radv_queue_sparse_submit;
|
||||
vk_queue_enable_submit_thread(&queue->vk);
|
||||
} else {
|
||||
queue->vk.driver_submit = radv_queue_submit;
|
||||
}
|
||||
return VK_SUCCESS;
|
||||
fail:
|
||||
vk_queue_finish(&queue->vk);
|
||||
|
Reference in New Issue
Block a user