radv: Return VK_ERROR_INCOMPATIBLE_DRIVER for unsupported devices

VK_ERROR_INITIALIZATION_FAILED will fail physical device enumeration.
Returning VK_ERROR_INCOMPATIBLE_DRIVER means that the driver can still
be used on supported GPUs when multiple GPUs are installed.

cc: mesa-stable

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34783>
(cherry picked from commit 84b9c281fe82dd66f2552687cecb61a8e22809d0)
This commit is contained in:
Konstantin Seurer
2025-05-01 09:23:49 +02:00
committed by Eric Engestrom
parent 77e7fd0dee
commit 8b8ca028a0
7 changed files with 81 additions and 64 deletions

View File

@@ -4,7 +4,7 @@
"description": "radv: Return VK_ERROR_INCOMPATIBLE_DRIVER for unsupported devices",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View File

@@ -510,8 +510,9 @@ static void handle_env_var_force_family(struct radeon_info *info)
exit(1);
}
bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
bool require_pci_bus_info)
enum ac_query_gpu_info_result
ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
bool require_pci_bus_info)
{
struct amdgpu_gpu_info amdinfo;
struct drm_amdgpu_info_device device_info = {0};
@@ -535,7 +536,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
if (!ac_query_pci_bus_info(fd, info)) {
if (require_pci_bus_info)
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
assert(info->drm_major == 3);
@@ -545,27 +546,27 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
fprintf(stderr, "amdgpu: DRM version is %u.%u.%u, but this driver is "
"only compatible with 3.42.0 (kernel 5.15+) or later.\n",
info->drm_major, info->drm_minor, info->drm_patchlevel);
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
uint64_t cap;
r = drmGetCap(fd, DRM_CAP_SYNCOBJ, &cap);
if (r != 0 || cap == 0) {
fprintf(stderr, "amdgpu: syncobj support is missing but is required.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
/* Query hardware and driver information. */
r = ac_drm_query_gpu_info(dev, &amdinfo);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_gpu_info failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
r = ac_drm_query_info(dev, AMDGPU_INFO_DEV_INFO, sizeof(device_info), &device_info);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_info(dev_info) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
for (unsigned ip_type = 0; ip_type < AMD_NUM_IP_TYPES; ip_type++) {
@@ -633,35 +634,35 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
/* Only require gfx or compute. */
if (!info->ip[AMD_IP_GFX].num_queues && !info->ip[AMD_IP_COMPUTE].num_queues) {
fprintf(stderr, "amdgpu: failed to find gfx or compute.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
r = ac_drm_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_ME, 0, 0, &info->me_fw_version,
&info->me_fw_feature);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_firmware_version(me) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
r = ac_drm_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_MEC, 0, 0, &info->mec_fw_version,
&info->mec_fw_feature);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_firmware_version(mec) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
r = ac_drm_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_PFP, 0, 0, &info->pfp_fw_version,
&info->pfp_fw_feature);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_firmware_version(pfp) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
if (info->ip[AMD_IP_VCN_DEC].num_queues || info->ip[AMD_IP_VCN_UNIFIED].num_queues) {
r = ac_drm_query_firmware_version(dev, AMDGPU_INFO_FW_VCN, 0, 0, &vidip_fw_version, &vidip_fw_feature);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_firmware_version(vcn) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
} else {
info->vcn_dec_version = (vidip_fw_version & 0x0F000000) >> 24;
info->vcn_enc_major_version = (vidip_fw_version & 0x00F00000) >> 20;
@@ -672,7 +673,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
r = ac_drm_query_firmware_version(dev, AMDGPU_INFO_FW_VCE, 0, 0, &vidip_fw_version, &vidip_fw_feature);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_firmware_version(vce) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
} else
info->vce_fw_version = vidip_fw_version;
}
@@ -681,7 +682,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
r = ac_drm_query_firmware_version(dev, AMDGPU_INFO_FW_UVD, 0, 0, &vidip_fw_version, &vidip_fw_feature);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_firmware_version(uvd) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
} else
info->uvd_fw_version = vidip_fw_version;
}
@@ -690,7 +691,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
r = ac_drm_query_sw_info(dev, amdgpu_sw_info_address32_hi, &info->address32_hi);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_sw_info(address32_hi) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
struct drm_amdgpu_memory_info meminfo = {0};
@@ -698,7 +699,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
r = ac_drm_query_info(dev, AMDGPU_INFO_MEMORY, sizeof(meminfo), &meminfo);
if (r) {
fprintf(stderr, "amdgpu: ac_drm_query_info(memory) failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
/* Note: usable_heap_size values can be random and can't be relied on. */
@@ -837,7 +838,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
else {
fprintf(stderr, "amdgpu: Unknown gfx version: %u.%u\n",
info->ip[AMD_IP_GFX].ver_major, info->ip[AMD_IP_GFX].ver_minor);
return false;
return AC_QUERY_GPU_INFO_UNIMPLEMENTED_HW;
}
info->family_id = device_info.family;
@@ -852,7 +853,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
if (!info->name) {
fprintf(stderr, "amdgpu: unknown (family_id, chip_external_rev): (%u, %u)\n",
device_info.family, device_info.external_rev);
return false;
return AC_QUERY_GPU_INFO_UNIMPLEMENTED_HW;
}
memset(info->lowercase_name, 0, sizeof(info->lowercase_name));
@@ -1682,7 +1683,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
r = ac_drm_query_uq_fw_area_info(dev, AMDGPU_HW_IP_GFX, 0, &fw_info);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_uq_fw_area_info() failed.\n");
return false;
return AC_QUERY_GPU_INFO_FAIL;
}
info->has_fw_based_shadowing = true;
@@ -1747,7 +1748,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
exit(0);
}
}
return true;
return AC_QUERY_GPU_INFO_SUCCESS;
}
void ac_compute_driver_uuid(char *uuid, size_t size)

View File

@@ -328,8 +328,14 @@ struct radeon_info {
bool has_image_bvh_intersect_ray;
};
bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
bool require_pci_bus_info);
enum ac_query_gpu_info_result {
AC_QUERY_GPU_INFO_SUCCESS,
AC_QUERY_GPU_INFO_FAIL,
AC_QUERY_GPU_INFO_UNIMPLEMENTED_HW,
};
enum ac_query_gpu_info_result ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
bool require_pci_bus_info);
void ac_compute_driver_uuid(char *uuid, size_t size);

View File

@@ -2062,19 +2062,23 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
#ifdef _WIN32
pdev->ws = radv_null_winsys_create();
if (!pdev->ws)
result = VK_ERROR_OUT_OF_HOST_MEMORY;
#else
if (drm_device) {
bool reserve_vmid = instance->vk.trace_mode & RADV_TRACE_MODE_RGP;
pdev->ws =
radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, reserve_vmid, is_virtio);
result = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, reserve_vmid, is_virtio,
&pdev->ws);
} else {
pdev->ws = radv_null_winsys_create();
if (!pdev->ws)
result = VK_ERROR_OUT_OF_HOST_MEMORY;
}
#endif
if (!pdev->ws) {
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys");
if (result != VK_SUCCESS) {
result = vk_errorf(instance, result, "failed to initialize winsys");
goto fail_base;
}

View File

@@ -22,31 +22,6 @@
#include "vk_drm_syncobj.h"
#include "xf86drm.h"
static bool
do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
{
if (!ac_query_gpu_info(fd, ws->dev, &ws->info, true))
return false;
/*
* Override the max submits on video queues.
* If you submit multiple session contexts in the same IB sequence the
* hardware gets upset as it expects a kernel fence to be emitted to reset
* the session context in the hardware.
* Avoid this problem by never submitted more than one IB at a time.
* This possibly should be fixed in the kernel, and if it is this can be
* resolved.
*/
for (enum amd_ip_type ip_type = AMD_IP_UVD; ip_type <= AMD_IP_VCN_ENC; ip_type++)
ws->info.max_submitted_ibs[ip_type] = 1;
ws->info.ip[AMD_IP_SDMA].num_queues = MIN2(ws->info.ip[AMD_IP_SDMA].num_queues, MAX_RINGS_PER_TYPE);
ws->info.ip[AMD_IP_COMPUTE].num_queues = MIN2(ws->info.ip[AMD_IP_COMPUTE].num_queues, MAX_RINGS_PER_TYPE);
ws->use_ib_bos = true;
return true;
}
static void
radv_amdgpu_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *gpu_info)
{
@@ -195,9 +170,12 @@ radv_amdgpu_winsys_get_sync_types(struct radeon_winsys *rws)
return ws->sync_types;
}
struct radeon_winsys *
radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, bool reserve_vmid, bool is_virtio)
VkResult
radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, bool reserve_vmid, bool is_virtio,
struct radeon_winsys **winsys)
{
VkResult result = VK_SUCCESS;
uint32_t drm_major, drm_minor, r;
ac_drm_device *dev;
struct radv_amdgpu_winsys *ws = NULL;
@@ -205,7 +183,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
r = ac_drm_device_initialize(fd, is_virtio, &drm_major, &drm_minor, &dev);
if (r) {
fprintf(stderr, "radv/amdgpu: failed to initialize device.\n");
return NULL;
return VK_ERROR_INITIALIZATION_FAILED;
}
/* We have to keep this lock till insertion. */
@@ -214,6 +192,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
winsyses = _mesa_pointer_hash_table_create(NULL);
if (!winsyses) {
fprintf(stderr, "radv/amdgpu: failed to alloc winsys hash table.\n");
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
@@ -232,19 +211,22 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
((debug_flags & RADV_DEBUG_HANG) && !ws->debug_log_bos) ||
((debug_flags & RADV_DEBUG_NO_IBS) && ws->use_ib_bos) || (perftest_flags != ws->perftest)) {
fprintf(stderr, "radv/amdgpu: Found options that differ from the existing winsys.\n");
return NULL;
return VK_ERROR_INITIALIZATION_FAILED;
}
/* RADV_DEBUG_ZERO_VRAM is the only option that is allowed to be set again. */
if (debug_flags & RADV_DEBUG_ZERO_VRAM)
ws->zero_all_vram_allocs = true;
return &ws->base;
*winsys = &ws->base;
return VK_SUCCESS;
}
ws = calloc(1, sizeof(struct radv_amdgpu_winsys));
if (!ws)
if (!ws) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
ws->refcount = 1;
ws->dev = dev;
@@ -252,8 +234,29 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
ws->info.drm_major = drm_major;
ws->info.drm_minor = drm_minor;
ws->info.is_virtio = is_virtio;
if (!do_winsys_init(ws, fd))
enum ac_query_gpu_info_result info_result = ac_query_gpu_info(fd, ws->dev, &ws->info, true);
if (info_result != AC_QUERY_GPU_INFO_SUCCESS) {
result = info_result == AC_QUERY_GPU_INFO_FAIL ? VK_ERROR_INITIALIZATION_FAILED : VK_ERROR_INCOMPATIBLE_DRIVER;
goto winsys_fail;
}
/*
* Override the max submits on video queues.
* If you submit multiple session contexts in the same IB sequence the
* hardware gets upset as it expects a kernel fence to be emitted to reset
* the session context in the hardware.
* Avoid this problem by never submitted more than one IB at a time.
* This possibly should be fixed in the kernel, and if it is this can be
* resolved.
*/
for (enum amd_ip_type ip_type = AMD_IP_UVD; ip_type <= AMD_IP_VCN_ENC; ip_type++)
ws->info.max_submitted_ibs[ip_type] = 1;
ws->info.ip[AMD_IP_SDMA].num_queues = MIN2(ws->info.ip[AMD_IP_SDMA].num_queues, MAX_RINGS_PER_TYPE);
ws->info.ip[AMD_IP_COMPUTE].num_queues = MIN2(ws->info.ip[AMD_IP_COMPUTE].num_queues, MAX_RINGS_PER_TYPE);
ws->use_ib_bos = true;
ws->debug_all_bos = !!(debug_flags & RADV_DEBUG_ALL_BOS);
ws->debug_log_bos = debug_flags & RADV_DEBUG_HANG;
@@ -265,6 +268,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
r = ac_drm_vm_reserve_vmid(ws->dev, 0);
if (r) {
fprintf(stderr, "radv/amdgpu: failed to reserve vmid.\n");
result = VK_ERROR_INITIALIZATION_FAILED;
goto winsys_fail;
}
}
@@ -311,7 +315,9 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
_mesa_hash_table_insert(winsyses, (void *)ac_drm_device_get_cookie(dev), ws);
simple_mtx_unlock(&winsys_creation_mutex);
return &ws->base;
*winsys = &ws->base;
return result;
winsys_fail:
free(ws);
@@ -322,5 +328,5 @@ fail:
}
simple_mtx_unlock(&winsys_creation_mutex);
ac_drm_device_deinitialize(dev);
return NULL;
return result;
}

View File

@@ -12,8 +12,8 @@
#ifndef RADV_AMDGPU_WINSYS_PUBLIC_H
#define RADV_AMDGPU_WINSYS_PUBLIC_H
struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
bool reserve_vmid, bool is_virtio);
VkResult radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, bool reserve_vmid,
bool is_virtio, struct radeon_winsys **winsys);
struct radeon_winsys *radv_dummy_winsys_create(void);

View File

@@ -36,7 +36,7 @@ static bool do_winsys_init(struct amdgpu_winsys *aws,
const struct pipe_screen_config *config,
int fd)
{
if (!ac_query_gpu_info(fd, aws->dev, &aws->info, false))
if (ac_query_gpu_info(fd, aws->dev, &aws->info, false) != AC_QUERY_GPU_INFO_SUCCESS)
goto fail;
aws->addrlib = ac_addrlib_create(&aws->info, &aws->info.max_alignment);