radv: Expose transfer queues, hidden behind a perftest flag.

This is highly experimental and only recommended
for users who know what they are doing.

To fully support the spec we are going to need
gang submissions which are going to be implemented later.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Tatsuyuki Ishi <ishitatsuyuki@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26913>
This commit is contained in:
Timur Kristóf
2023-10-13 23:21:52 +02:00
parent bd3f2567cc
commit 55e5c4e089
4 changed files with 41 additions and 6 deletions

View File

@@ -995,7 +995,7 @@ Clover environment variables
allows specifying additional linker options. Specified options are
appended after the options set by the OpenCL program in
``clLinkProgram``.
.. _rusticl-env-var:
.. envvar:: IRIS_ENABLE_CLOVER
@@ -1339,6 +1339,8 @@ RADV driver environment variables
enable optimizations to move more driver internal objects to VRAM.
``rtwave64``
enable wave64 for ray tracing shaders (GFX10+)
``transfer_queue``
enable experimental transfer queue support (GFX9+, not yet spec compliant)
``video_decode``
enable experimental video decoding support
``gsfastlaunch2``

View File

@@ -89,6 +89,7 @@ enum {
RADV_PERFTEST_VIDEO_DECODE = 1u << 11,
RADV_PERFTEST_DMA_SHADERS = 1u << 12,
RADV_PERFTEST_GS_FAST_LAUNCH_2 = 1u << 13,
RADV_PERFTEST_TRANSFER_QUEUE = 1u << 14,
};
bool radv_init_trace(struct radv_device *device);

View File

@@ -100,6 +100,7 @@ static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_P
{"video_decode", RADV_PERFTEST_VIDEO_DECODE},
{"dmashaders", RADV_PERFTEST_DMA_SHADERS},
{"gsfastlaunch2", RADV_PERFTEST_GS_FAST_LAUNCH_2},
{"transfer_queue", RADV_PERFTEST_TRANSFER_QUEUE},
{NULL, 0}};
const char *

View File

@@ -71,6 +71,17 @@ radv_taskmesh_enabled(const struct radv_physical_device *pdevice)
!(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE) && pdevice->rad_info.has_gang_submit;
}
static bool
radv_transfer_queue_enabled(const struct radv_physical_device *pdevice)
{
/* Check if the GPU has SDMA support and transfer queues are allowed. */
if (pdevice->rad_info.sdma_ip_version == SDMA_UNKNOWN || !pdevice->rad_info.ip[AMD_IP_SDMA].num_queues ||
!(pdevice->instance->perftest_flags & RADV_PERFTEST_TRANSFER_QUEUE))
return false;
return pdevice->rad_info.gfx_level >= GFX9;
}
static bool
radv_vrs_attachment_enabled(const struct radv_physical_device *pdevice)
{
@@ -199,6 +210,11 @@ radv_physical_device_init_queue_table(struct radv_physical_device *pdevice)
}
}
if (radv_transfer_queue_enabled(pdevice)) {
pdevice->vk_queue_to_radv[idx] = RADV_QUEUE_TRANSFER;
idx++;
}
pdevice->vk_queue_to_radv[idx++] = RADV_QUEUE_SPARSE;
pdevice->num_queues = idx;
@@ -2119,6 +2135,10 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd
num_queue_families++;
}
if (radv_transfer_queue_enabled(pdevice)) {
num_queue_families++;
}
if (pQueueFamilyProperties == NULL) {
*pCount = num_queue_families;
return;
@@ -2171,6 +2191,18 @@ radv_get_physical_device_queue_family_properties(struct radv_physical_device *pd
}
}
if (radv_transfer_queue_enabled(pdevice)) {
if (*pCount > idx) {
*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
.queueFlags = VK_QUEUE_TRANSFER_BIT,
.queueCount = pdevice->rad_info.ip[AMD_IP_SDMA].num_queues,
.timestampValidBits = 64,
.minImageTransferGranularity = (VkExtent3D){16, 16, 8},
};
idx++;
}
}
if (*pCount > idx) {
*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
.queueFlags = VK_QUEUE_SPARSE_BINDING_BIT,
@@ -2201,13 +2233,12 @@ radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, ui
return;
}
VkQueueFamilyProperties *properties[] = {
&pQueueFamilyProperties[0].queueFamilyProperties,
&pQueueFamilyProperties[1].queueFamilyProperties,
&pQueueFamilyProperties[2].queueFamilyProperties,
&pQueueFamilyProperties[3].queueFamilyProperties,
&pQueueFamilyProperties[0].queueFamilyProperties, &pQueueFamilyProperties[1].queueFamilyProperties,
&pQueueFamilyProperties[2].queueFamilyProperties, &pQueueFamilyProperties[3].queueFamilyProperties,
&pQueueFamilyProperties[4].queueFamilyProperties,
};
radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
assert(*pCount <= 4);
assert(*pCount <= 5);
for (uint32_t i = 0; i < *pCount; i++) {
vk_foreach_struct (ext, pQueueFamilyProperties[i].pNext) {