amd: amdgpu-virtio implementation

Native context support is implemented by diverting the libdrm_amdgpu functions into new functions that use virtio-gpu. VA allocations are done directly in the guest, using newly exposed libdrm_amdgpu helpers (retrieved through dlopen/dlsym). Guest <-> Host roundtrips can be expensive so we try to avoid them as much as possible. When possible we also don't wait for the host reply in case where it's not needed to get correct result. Implicit sync works because virtio-gpu commands are submitted in order to the host (there a single queue per device, shared by all the guest processes). virtio-gpu also only supports one context per file description (but multiple file descriptions per process) while amdgpu only allows one fd per process, but multiple contexts per fd. This causes synchronization problems, because virtio-gpu drops all sync primitive if they belong to the same fd/context/ring: ie the amdgpu_ctx can't be expressed in virtio-gpu terms. For now the solution is to only allocate a single amdgpu_ctx per application. Contrary to radeonsi/radv, amdgpu_virtio can use libdrm_amdgpu directly: the ones that don't rely on ioctl() are safe to use here. Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com> Reviewed-by: Dmitry Osipenko <dmitry.osipenko@collabora.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21658>
2024-04-22 12:12:29 +02:00
parent a565f2994f
commit 22263616ed
16 changed files with 1900 additions and 32 deletions
--- a/meson.build
+++ b/meson.build
@@ -178,6 +178,8 @@ if with_swrast
  warning('`gallium-drivers=swrast` is a deprecated alias for `gallium-drivers=softpipe,llvmpipe` and will be removed in version 25.0')
 endif

+with_amdgpu_virtio = get_option('amdgpu-virtio')
+
 with_gallium_radeonsi = gallium_drivers.contains('radeonsi')
 with_gallium_r300 = gallium_drivers.contains('r300')
 with_gallium_r600 = gallium_drivers.contains('r600')
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -224,6 +224,12 @@ option(
                'gallium and vulkan driver',
 )

+option(
+  'amdgpu-virtio',
+  type : 'boolean',
+  value : false,
+  description : 'use experimental virtio backend for radeonsi/radv',
+)
 option(
  'imagination-srv',
  type : 'boolean',
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -219,6 +219,7 @@ struct radeon_info {
   uint32_t drm_patchlevel;
   uint32_t max_submitted_ibs[AMD_NUM_IP_TYPES];
   bool is_amdgpu;
+   bool is_virtio;
   bool has_userptr;
   bool has_syncobj;
   bool has_timeline_syncobj;
--- a/src/amd/common/ac_linux_drm.c
+++ b/src/amd/common/ac_linux_drm.c
@@ -11,14 +11,23 @@
 #include <time.h>
 #include <unistd.h>

+#ifdef HAVE_AMDGPU_VIRTIO
+#include "virtio/amdgpu_virtio.h"
+#endif
+
 struct ac_drm_device {
   union {
      amdgpu_device_handle adev;
+#ifdef HAVE_AMDGPU_VIRTIO
+      amdvgpu_device_handle vdev;
+#endif
   };
   int fd;
+   bool is_virtio;
 };

-int ac_drm_device_initialize(int fd, uint32_t *major_version, uint32_t *minor_version,
+int ac_drm_device_initialize(int fd, bool is_virtio,
+                             uint32_t *major_version, uint32_t *minor_version,
                             ac_drm_device **dev)
 {
   int r;
@@ -27,22 +36,43 @@ int ac_drm_device_initialize(int fd, uint32_t *major_version, uint32_t *minor_ve
   if (!(*dev))
      return -1;

-   amdgpu_device_handle adev;
-   r = amdgpu_device_initialize(fd, major_version, minor_version,
-                                &adev);
-   if (r == 0) {
-      (*dev)->adev = adev;
-      (*dev)->fd = amdgpu_device_get_fd(adev);
-   } else {
-      free(*dev);
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (is_virtio) {
+      amdvgpu_device_handle vdev;
+      r = amdvgpu_device_initialize(fd, major_version, minor_version,
+                                    &vdev);
+      if (r == 0) {
+         (*dev)->vdev = vdev;
+         (*dev)->fd = amdvgpu_device_get_fd(vdev);
+      }
+   } else
+#endif
+   {
+      amdgpu_device_handle adev;
+      r = amdgpu_device_initialize(fd, major_version, minor_version,
+                                   &adev);
+      if (r == 0) {
+         (*dev)->adev = adev;
+         (*dev)->fd = amdgpu_device_get_fd(adev);
+      }
   }

+   if (r == 0)
+      (*dev)->is_virtio = is_virtio;
+   else
+      free(*dev);
+
   return r;
 }

 void ac_drm_device_deinitialize(ac_drm_device *dev)
 {
-   amdgpu_device_deinitialize(dev->adev);
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      amdvgpu_device_deinitialize(dev->vdev);
+   else
+#endif
+      amdgpu_device_deinitialize(dev->adev);
   free(dev);
 }

@@ -53,6 +83,10 @@ int ac_drm_device_get_fd(ac_drm_device *device_handle)

 int ac_drm_bo_set_metadata(ac_drm_device *dev, uint32_t bo_handle, struct amdgpu_bo_metadata *info)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_bo_set_metadata(dev->vdev, bo_handle, info);
+#endif
   struct drm_amdgpu_gem_metadata args = {};

   args.handle = bo_handle;
@@ -74,6 +108,10 @@ int ac_drm_bo_set_metadata(ac_drm_device *dev, uint32_t bo_handle, struct amdgpu

 int ac_drm_bo_query_info(ac_drm_device *dev, uint32_t bo_handle, struct amdgpu_bo_info *info)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_bo_query_info(dev->vdev, bo_handle, info);
+#endif
   struct drm_amdgpu_gem_metadata metadata = {};
   struct drm_amdgpu_gem_create_in bo_info = {};
   struct drm_amdgpu_gem_op gem_op = {};
@@ -148,9 +186,16 @@ int ac_drm_bo_wait_for_idle(ac_drm_device *dev, ac_drm_bo bo, uint64_t timeout_n
   memset(&args, 0, sizeof(args));
   args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);

-   ac_drm_bo_export(dev, bo, amdgpu_bo_handle_type_kms,
-                    &args.in.handle);
-   r = drm_ioctl_write_read(dev->fd, DRM_AMDGPU_GEM_WAIT_IDLE, &args, sizeof(args));
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio) {
+      r = amdvgpu_bo_wait_for_idle(dev->vdev, bo.vbo, args.in.timeout);
+   } else
+#endif
+   {
+      ac_drm_bo_export(dev, bo, amdgpu_bo_handle_type_kms,
+                       &args.in.handle);
+      r = drm_ioctl_write_read(dev->fd, DRM_AMDGPU_GEM_WAIT_IDLE, &args, sizeof(args));
+   }

   if (r == 0) {
      *busy = args.out.status;
@@ -181,6 +226,11 @@ int ac_drm_bo_va_op_raw(ac_drm_device *dev, uint32_t bo_handle, uint64_t offset,
       ops != AMDGPU_VA_OP_CLEAR)
      return -EINVAL;

+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_bo_va_op_raw(dev->vdev, bo_handle, offset, size, addr, flags, ops);
+#endif
+
   memset(&va, 0, sizeof(va));
   va.handle = bo_handle;
   va.operation = ops;
@@ -239,6 +289,10 @@ int ac_drm_cs_ctx_create2(ac_drm_device *dev, uint32_t priority, uint32_t *ctx_i
      }
   }

+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_cs_ctx_create2(dev->vdev, priority, ctx_id);
+#endif
   /* Create the context */
   memset(&args, 0, sizeof(args));
   args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
@@ -256,6 +310,10 @@ int ac_drm_cs_ctx_create2(ac_drm_device *dev, uint32_t priority, uint32_t *ctx_i

 int ac_drm_cs_ctx_free(ac_drm_device *dev, uint32_t ctx_id)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_cs_ctx_free(dev->vdev, ctx_id);
+#endif
   union drm_amdgpu_ctx args;

   /* now deal with kernel side */
@@ -268,6 +326,10 @@ int ac_drm_cs_ctx_free(ac_drm_device *dev, uint32_t ctx_id)
 int ac_drm_cs_ctx_stable_pstate(ac_drm_device *dev, uint32_t ctx_id, uint32_t op, uint32_t flags,
                                uint32_t *out_flags)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_cs_ctx_stable_pstate(dev->vdev, ctx_id, op, flags, out_flags);
+#endif
   union drm_amdgpu_ctx args;
   int r;

@@ -286,6 +348,11 @@ int ac_drm_cs_ctx_stable_pstate(ac_drm_device *dev, uint32_t ctx_id, uint32_t op

 int ac_drm_cs_query_reset_state2(ac_drm_device *dev, uint32_t ctx_id, uint64_t *flags)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_cs_query_reset_state2(dev->vdev, ctx_id, flags);
+#endif
+
   union drm_amdgpu_ctx args;
   int r;

@@ -342,8 +409,14 @@ int ac_drm_cs_query_fence_status(ac_drm_device *dev, uint32_t ctx_id, uint32_t i

   *expired = false;

-   r = amdgpu_ioctl_wait_cs(dev->fd, ctx_id, ip_type, ip_instance, ring, fence_seq_no,
-                            timeout_ns, flags, &busy);
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      r = amdvgpu_cs_query_fence_status(dev->vdev, ctx_id, ip_type, ip_instance, ring, fence_seq_no,
+                                        timeout_ns, flags, expired);
+   else
+#endif
+      r = amdgpu_ioctl_wait_cs(dev->fd, ctx_id, ip_type, ip_instance, ring, fence_seq_no,
+                               timeout_ns, flags, &busy);

   if (!r && !busy)
      *expired = true;
@@ -432,6 +505,11 @@ int ac_drm_cs_syncobj_timeline_wait(int device_fd, uint32_t *handles, uint64_t *
 int ac_drm_cs_submit_raw2(ac_drm_device *dev, uint32_t ctx_id, uint32_t bo_list_handle,
                          int num_chunks, struct drm_amdgpu_cs_chunk *chunks, uint64_t *seq_no)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_cs_submit_raw2(dev->vdev, ctx_id, bo_list_handle, num_chunks, chunks, seq_no);
+#endif
+
   union drm_amdgpu_cs cs;
   uint64_t *chunk_array;
   int i, r;
@@ -466,6 +544,10 @@ int ac_drm_query_info(ac_drm_device *dev, unsigned info_id, unsigned size, void
   request.return_size = size;
   request.query = info_id;

+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_query_info(dev->vdev, &request);
+#endif
   return drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info));
 }

@@ -483,6 +565,10 @@ int ac_drm_read_mm_registers(ac_drm_device *dev, unsigned dword_offset, unsigned
   request.read_mmr_reg.instance = instance;
   request.read_mmr_reg.flags = flags;

+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_query_info(dev->vdev, &request);
+#endif
   return drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info));
 }

@@ -496,6 +582,10 @@ int ac_drm_query_hw_ip_count(ac_drm_device *dev, unsigned type, uint32_t *count)
   request.query = AMDGPU_INFO_HW_IP_COUNT;
   request.query_hw_ip.type = type;

+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_query_info(dev->vdev, &request);
+#endif
   return drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info));
 }

@@ -511,6 +601,10 @@ int ac_drm_query_hw_ip_info(ac_drm_device *dev, unsigned type, unsigned ip_insta
   request.query_hw_ip.type = type;
   request.query_hw_ip.ip_instance = ip_instance;

+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_query_info(dev->vdev, &request);
+#endif
   return drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info));
 }

@@ -529,7 +623,12 @@ int ac_drm_query_firmware_version(ac_drm_device *dev, unsigned fw_type, unsigned
   request.query_fw.ip_instance = ip_instance;
   request.query_fw.index = index;

-   r = drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info));
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      r = amdvgpu_query_info(dev->vdev, &request);
+   else
+#endif
+      r = drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info));
   if (r)
      return r;

@@ -690,6 +789,10 @@ int ac_drm_query_sensor_info(ac_drm_device *dev, unsigned sensor_type, unsigned
   request.query = AMDGPU_INFO_SENSOR;
   request.sensor_info.type = sensor_type;

+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_query_info(dev->vdev, &request);
+#endif
   return drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info));
 }

@@ -703,6 +806,10 @@ int ac_drm_query_video_caps_info(ac_drm_device *dev, unsigned cap_type, unsigned
   request.query = AMDGPU_INFO_VIDEO_CAPS;
   request.sensor_info.type = cap_type;

+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_query_info(dev->vdev, &request);
+#endif
   return drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info));
 }

@@ -715,11 +822,21 @@ int ac_drm_query_gpuvm_fault_info(ac_drm_device *dev, unsigned size, void *value
   request.return_size = size;
   request.query = AMDGPU_INFO_GPUVM_FAULT;

+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_query_info(dev->vdev, &request);
+#endif
   return drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info));
 }

 int ac_drm_vm_reserve_vmid(ac_drm_device *dev, uint32_t flags)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio) {
+      assert(flags == 0);
+      return amdvgpu_vm_reserve_vmid(dev->vdev, 1);
+   }
+#endif
   union drm_amdgpu_vm vm;

   vm.in.op = AMDGPU_VM_OP_RESERVE_VMID;
@@ -730,6 +847,12 @@ int ac_drm_vm_reserve_vmid(ac_drm_device *dev, uint32_t flags)

 int ac_drm_vm_unreserve_vmid(ac_drm_device *dev, uint32_t flags)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio) {
+      assert(flags == 0);
+      return amdvgpu_vm_reserve_vmid(dev->vdev, 0);
+   }
+#endif
   union drm_amdgpu_vm vm;

   vm.in.op = AMDGPU_VM_OP_UNRESERVE_VMID;
@@ -740,24 +863,41 @@ int ac_drm_vm_unreserve_vmid(ac_drm_device *dev, uint32_t flags)

 const char *ac_drm_get_marketing_name(ac_drm_device *dev)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_get_marketing_name(dev->vdev);
+#endif
   return amdgpu_get_marketing_name(dev->adev);
 }

 int ac_drm_query_sw_info(ac_drm_device *dev,
                         enum amdgpu_sw_info info, void *value)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio) {
+      assert(info == amdgpu_sw_info_address32_hi);
+      return amdvgpu_query_sw_info(dev->vdev, info, value);
+   }
+#endif
   return amdgpu_query_sw_info(dev->adev, info, value);
 }

 int ac_drm_bo_alloc(ac_drm_device *dev, struct amdgpu_bo_alloc_request *alloc_buffer,
                    ac_drm_bo *bo)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_bo_alloc(dev->vdev, alloc_buffer, &bo->vbo);
+#endif
   return amdgpu_bo_alloc(dev->adev, alloc_buffer, &bo->abo);
 }
-
 int ac_drm_bo_export(ac_drm_device *dev, ac_drm_bo bo,
                     enum amdgpu_bo_handle_type type, uint32_t *shared_handle)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_bo_export(dev->vdev, bo.vbo, type, shared_handle);
+#endif
   return amdgpu_bo_export(bo.abo, type, shared_handle);
 }

@@ -766,35 +906,65 @@ int ac_drm_bo_import(ac_drm_device *dev, enum amdgpu_bo_handle_type type,
 {
   int r;

-   struct amdgpu_bo_import_result result;
-   r = amdgpu_bo_import(dev->adev, type, shared_handle, &result);
-   if (r == 0) {
-      output->bo.abo = result.buf_handle;
-      output->alloc_size = result.alloc_size;
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio) {
+      struct amdvgpu_bo_import_result result;
+      r = amdvgpu_bo_import(dev->vdev, type, shared_handle, &result);
+      if (r == 0) {
+         output->bo.vbo = result.buf_handle;
+         output->alloc_size = result.alloc_size;
+      }
+   }
+   else
+#endif
+   {
+      struct amdgpu_bo_import_result result;
+      r = amdgpu_bo_import(dev->adev, type, shared_handle, &result);
+      if (r == 0) {
+         output->bo.abo = result.buf_handle;
+         output->alloc_size = result.alloc_size;
+      }
   }

   return r;
 }
-
 int ac_drm_create_bo_from_user_mem(ac_drm_device *dev, void *cpu,
                                   uint64_t size, ac_drm_bo *bo)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio) {
+      assert(false);
+      return -1;
+   }
+#endif
   return amdgpu_create_bo_from_user_mem(dev->adev, cpu, size, &bo->abo);
 }

 int ac_drm_bo_free(ac_drm_device *dev, ac_drm_bo bo)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_bo_free(dev->vdev, bo.vbo);
+#endif
   return amdgpu_bo_free(bo.abo);
 }

 int ac_drm_bo_cpu_map(ac_drm_device *dev, ac_drm_bo bo,
                      void **cpu)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_bo_cpu_map(dev->vdev, bo.vbo, cpu);
+#endif
   return amdgpu_bo_cpu_map(bo.abo, cpu);
 }

 int ac_drm_bo_cpu_unmap(ac_drm_device *dev, ac_drm_bo bo)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_bo_cpu_unmap(dev->vdev, bo.vbo);
+#endif
   return amdgpu_bo_cpu_unmap(bo.abo);
 }

@@ -803,6 +973,12 @@ int ac_drm_va_range_alloc(ac_drm_device *dev, enum amdgpu_gpu_va_range va_range_
                          uint64_t *va_base_allocated, amdgpu_va_handle *va_range_handle,
                          uint64_t flags)
 {
+#ifdef HAVE_AMDGPU_VIRTIO
+   if (dev->is_virtio)
+      return amdvgpu_va_range_alloc(dev->vdev, va_range_type, size, va_base_alignment,
+                                    va_base_required, va_base_allocated,
+                                    va_range_handle, flags);
+#endif
   return amdgpu_va_range_alloc(dev->adev, va_range_type, size, va_base_alignment,
                                va_base_required, va_base_allocated,
                                va_range_handle, flags);
@@ -821,6 +997,12 @@ int ac_drm_create_userqueue(ac_drm_device *dev, uint32_t ip_type, uint32_t doorb
   union drm_amdgpu_userq userq;
   uint64_t mqd_size;

+#ifdef HAVE_AMDGPU_VIRTIO
+   /* Not supported yet. */
+   if (dev->is_virtio)
+      return -1;
+#endif
+
   switch (ip_type) {
   case AMDGPU_HW_IP_GFX:
      mqd_size = sizeof(struct drm_amdgpu_userq_mqd_gfx11);
--- a/src/amd/common/ac_linux_drm.h
+++ b/src/amd/common/ac_linux_drm.h
@@ -49,6 +49,9 @@ typedef union ac_drm_bo {
 #else
   amdgpu_bo_handle abo;
 #endif
+#ifdef HAVE_AMDGPU_VIRTIO
+   struct amdvgpu_bo *vbo;
+#endif
 } ac_drm_bo;

 struct ac_drm_bo_import_result {
@@ -56,8 +59,9 @@ struct ac_drm_bo_import_result {
   uint64_t alloc_size;
 };

-PROC int ac_drm_device_initialize(int fd, uint32_t *major_version, uint32_t *minor_version,
-                                  ac_drm_device **dev) TAIL;
+PROC int ac_drm_device_initialize(int fd, bool is_virtio,
+                                  uint32_t *major_version, uint32_t *minor_version,
+                                  ac_drm_device **device_handle) TAIL;
 PROC void ac_drm_device_deinitialize(ac_drm_device *dev) TAILV;
 PROC int ac_drm_device_get_fd(ac_drm_device *dev) TAIL;
 PROC int ac_drm_bo_set_metadata(ac_drm_device *dev, uint32_t bo_handle,
--- a/src/amd/common/meson.build
+++ b/src/amd/common/meson.build
@@ -128,6 +128,19 @@ if not with_platform_windows
  )
 endif

+link_with = []
+c_args = ['-DADDR_FASTCALL=']
+if with_amdgpu_virtio
+  c_args += ['-DHAVE_AMDGPU_VIRTIO', '-DENABLE_DRM_AMDGPU']
+  amd_common_files += files(
+    'virtio/amdgpu_virtio.c',
+    'virtio/amdgpu_virtio_bo.c',
+    'virtio/amdgpu_virtio_device.c',
+    'virtio/amdgpu_virtio_private.h',
+    'virtio/amdgpu_virtio_proto.h')
+  link_with += libvdrm
+endif
+
 if dep_elf.found()
  amd_common_files += files(
    'ac_rtld.c',
@@ -140,14 +153,15 @@ libamd_common = static_library(
  'amd_common',
  [amd_common_files, sid_tables_h, amdgfxregs_h, gfx10_format_table_c],
  include_directories : [
-    inc_include, inc_src, inc_amd,
+    inc_include, inc_src, inc_amd, inc_virtio_gpu,
  ],
  dependencies : [dep_llvm.partial_dependency(compile_args: true, includes: true)] + [
    dep_thread, dep_elf, dep_libdrm_amdgpu, dep_valgrind,
    idep_mesautil, idep_nir_headers, idep_nir
  ],
+  link_with: [ link_with ],
  gnu_symbol_visibility : 'hidden',
-  c_args : ['-DADDR_FASTCALL=']
+  c_args : c_args
 )

 idep_amdgfxregs_h = declare_dependency(sources : [amdgfxregs_h])
--- a/src/amd/common/virtio/amdgpu_virtio.c
+++ b/src/amd/common/virtio/amdgpu_virtio.c
@@ -0,0 +1,577 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <xf86drm.h>
+#include <libsync.h>
+
+#include <dlfcn.h>
+#include <libdrm/amdgpu.h>
+
+#include "amdgpu_virtio_private.h"
+
+#include "util/log.h"
+
+int
+amdvgpu_query_info(amdvgpu_device_handle dev, struct drm_amdgpu_info *info)
+{
+   unsigned req_len = sizeof(struct amdgpu_ccmd_query_info_req);
+   unsigned rsp_len = sizeof(struct amdgpu_ccmd_query_info_rsp) + info->return_size;
+
+   uint8_t buf[req_len];
+   struct amdgpu_ccmd_query_info_req *req = (void *)buf;
+   struct amdgpu_ccmd_query_info_rsp *rsp;
+   assert(0 == (offsetof(struct amdgpu_ccmd_query_info_rsp, payload) % 8));
+
+   req->hdr = AMDGPU_CCMD(QUERY_INFO, req_len);
+   memcpy(&req->info, info, sizeof(struct drm_amdgpu_info));
+
+   rsp = vdrm_alloc_rsp(dev->vdev, &req->hdr, rsp_len);
+
+   int r = vdrm_send_req_wrapper(dev, &req->hdr, &rsp->hdr, true);
+   if (r)
+      return r;
+
+   memcpy((void*)(uintptr_t)info->return_pointer, rsp->payload, info->return_size);
+
+   return 0;
+}
+
+static int
+amdvgpu_query_info_simple(amdvgpu_device_handle dev, unsigned info_id, unsigned size, void *out)
+{
+   if (info_id == AMDGPU_INFO_DEV_INFO) {
+      assert(size == sizeof(dev->dev_info));
+      memcpy(out, &dev->dev_info, size);
+      return 0;
+   }
+   struct drm_amdgpu_info info;
+   info.return_pointer = (uintptr_t)out;
+   info.query = info_id;
+   info.return_size = size;
+   return amdvgpu_query_info(dev, &info);
+}
+
+static int
+amdvgpu_query_heap_info(amdvgpu_device_handle dev, unsigned heap, unsigned flags, struct amdgpu_heap_info *info)
+{
+   struct amdvgpu_shmem *shmem = to_amdvgpu_shmem(dev->vdev->shmem);
+   /* Get heap information from shared memory */
+   switch (heap) {
+   case AMDGPU_GEM_DOMAIN_VRAM:
+      if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
+         memcpy(info, &shmem->vis_vram, sizeof(*info));
+      else
+         memcpy(info, &shmem->vram, sizeof(*info));
+      break;
+   case AMDGPU_GEM_DOMAIN_GTT:
+      memcpy(info, &shmem->gtt, sizeof(*info));
+      break;
+   default:
+      return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int
+amdvgpu_query_hw_ip_count(amdvgpu_device_handle dev, unsigned type, uint32_t *count)
+{
+   struct drm_amdgpu_info request;
+   request.return_pointer = (uintptr_t) count;
+   request.return_size = sizeof(*count);
+   request.query = AMDGPU_INFO_HW_IP_COUNT;
+   request.query_hw_ip.type = type;
+   return amdvgpu_query_info(dev, &request);
+}
+
+static int
+amdvgpu_query_video_caps_info(amdvgpu_device_handle dev, unsigned cap_type,
+                              unsigned size, void *value)
+{
+   struct drm_amdgpu_info request;
+   request.return_pointer = (uintptr_t)value;
+   request.return_size = size;
+   request.query = AMDGPU_INFO_VIDEO_CAPS;
+   request.sensor_info.type = cap_type;
+
+   return amdvgpu_query_info(dev, &request);
+}
+
+int
+amdvgpu_query_sw_info(amdvgpu_device_handle dev, enum amdgpu_sw_info info, void *value)
+{
+   if (info != amdgpu_sw_info_address32_hi)
+      return -EINVAL;
+   memcpy(value, &dev->vdev->caps.u.amdgpu.address32_hi, 4);
+   return 0;
+}
+
+static int
+amdvgpu_query_firmware_version(amdvgpu_device_handle dev, unsigned fw_type, unsigned ip_instance, unsigned index,
+                               uint32_t *version, uint32_t *feature)
+{
+   struct drm_amdgpu_info request;
+   struct drm_amdgpu_info_firmware firmware = {};
+   int r;
+
+   memset(&request, 0, sizeof(request));
+   request.return_pointer = (uintptr_t)&firmware;
+   request.return_size = sizeof(firmware);
+   request.query = AMDGPU_INFO_FW_VERSION;
+   request.query_fw.fw_type = fw_type;
+   request.query_fw.ip_instance = ip_instance;
+   request.query_fw.index = index;
+
+   r = amdvgpu_query_info(dev, &request);
+
+   *version = firmware.ver;
+   *feature = firmware.feature;
+   return r;
+}
+
+static int
+amdvgpu_query_buffer_size_alignment(amdvgpu_device_handle dev,
+                                    struct amdgpu_buffer_size_alignments *info)
+{
+   memcpy(info, &dev->vdev->caps.u.amdgpu.alignments, sizeof(*info));
+   return 0;
+}
+
+static int
+amdvgpu_query_gpu_info(amdvgpu_device_handle dev, struct amdgpu_gpu_info *info)
+{
+   memcpy(info, &dev->vdev->caps.u.amdgpu.gpu_info, sizeof(*info));
+   return 0;
+}
+
+int
+amdvgpu_bo_set_metadata(amdvgpu_device_handle dev, uint32_t res_id,
+                        struct amdgpu_bo_metadata *info)
+{
+   unsigned req_len = sizeof(struct amdgpu_ccmd_set_metadata_req) + info->size_metadata;
+   unsigned rsp_len = sizeof(struct amdgpu_ccmd_rsp);
+
+   uint8_t buf[req_len];
+   struct amdgpu_ccmd_set_metadata_req *req = (void *)buf;
+   struct amdgpu_ccmd_rsp *rsp;
+
+   req->hdr = AMDGPU_CCMD(SET_METADATA, req_len);
+   req->res_id = res_id;
+   req->flags = info->flags;
+   req->tiling_info = info->tiling_info;
+   req->size_metadata = info->size_metadata;
+   memcpy(req->umd_metadata, info->umd_metadata, info->size_metadata);
+
+   rsp = vdrm_alloc_rsp(dev->vdev, &req->hdr, rsp_len);
+   return vdrm_send_req_wrapper(dev, &req->hdr, rsp, true);
+}
+
+int amdvgpu_bo_query_info(amdvgpu_device_handle dev, uint32_t res_id, struct amdgpu_bo_info *info) {
+   unsigned req_len = sizeof(struct amdgpu_ccmd_bo_query_info_req);
+   unsigned rsp_len = sizeof(struct amdgpu_ccmd_bo_query_info_rsp);
+
+   uint8_t buf[req_len];
+   struct amdgpu_ccmd_bo_query_info_req *req = (void *)buf;
+   struct amdgpu_ccmd_bo_query_info_rsp *rsp;
+
+   req->hdr = AMDGPU_CCMD(BO_QUERY_INFO, req_len);
+   req->res_id = res_id;
+   req->pad = 0;
+
+   rsp = vdrm_alloc_rsp(dev->vdev, &req->hdr, rsp_len);
+
+   int r = vdrm_send_req_wrapper(dev, &req->hdr, &rsp->hdr, true);
+   if (r)
+      return r;
+
+   info->alloc_size = rsp->info.alloc_size;
+   info->phys_alignment = rsp->info.phys_alignment;
+   info->preferred_heap = rsp->info.preferred_heap;
+   info->alloc_flags = rsp->info.alloc_flags;
+
+   info->metadata.flags = rsp->info.metadata.flags;
+   info->metadata.tiling_info = rsp->info.metadata.tiling_info;
+   info->metadata.size_metadata = rsp->info.metadata.size_metadata;
+   memcpy(info->metadata.umd_metadata, rsp->info.metadata.umd_metadata,
+          MIN2(sizeof(info->metadata.umd_metadata), rsp->info.metadata.size_metadata));
+
+   return 0;
+}
+
+int amdvgpu_cs_ctx_create2(amdvgpu_device_handle dev, int32_t priority,
+                           uint32_t *ctx_virtio) {
+   simple_mtx_lock(&dev->contexts_mutex);
+   if (!dev->allow_multiple_amdgpu_ctx && _mesa_hash_table_num_entries(&dev->contexts)) {
+      assert(_mesa_hash_table_num_entries(&dev->contexts) == 1);
+      struct hash_entry *he = _mesa_hash_table_random_entry(&dev->contexts, NULL);
+      struct amdvgpu_context *ctx = he->data;
+      p_atomic_inc(&ctx->refcount);
+      *ctx_virtio = (uint32_t)(uintptr_t)he->key;
+      simple_mtx_unlock(&dev->contexts_mutex);
+      return 0;
+   }
+
+   struct amdgpu_ccmd_create_ctx_req req = {
+      .priority = priority,
+      .flags = 0,
+   };
+   struct amdgpu_ccmd_create_ctx_rsp *rsp;
+
+   req.hdr = AMDGPU_CCMD(CREATE_CTX, sizeof(req));
+
+   rsp = vdrm_alloc_rsp(dev->vdev, &req.hdr, sizeof(struct amdgpu_ccmd_create_ctx_rsp));
+   int r = vdrm_send_req_wrapper(dev, &req.hdr, &rsp->hdr, true);
+
+   if (r)
+      goto unlock;
+
+   if (rsp->ctx_id == 0) {
+      r = -ENOTSUP;
+      goto unlock;
+   }
+
+   struct amdvgpu_context *ctx = calloc(1, sizeof(struct amdvgpu_context) + dev->num_virtio_rings * sizeof(uint64_t));
+   if (ctx == NULL) {
+      r = -ENOMEM;
+      goto unlock;
+   }
+
+   p_atomic_inc(&ctx->refcount);
+   ctx->host_context_id = rsp->ctx_id;
+   for (int i = 0; i < dev->num_virtio_rings; i++)
+      ctx->ring_next_seqno[i] = 1;
+   *ctx_virtio = ctx->host_context_id;
+
+   _mesa_hash_table_insert(&dev->contexts, (void*)(uintptr_t)ctx->host_context_id, ctx);
+
+unlock:
+   simple_mtx_unlock(&dev->contexts_mutex);
+
+   return r;
+}
+
+int amdvgpu_cs_ctx_free(amdvgpu_device_handle dev, uint32_t ctx_id)
+{
+   struct hash_entry *he = _mesa_hash_table_search(&dev->contexts,
+                                                   (void*)(uintptr_t)ctx_id);
+
+   if (!he)
+      return -1;
+
+   if (!dev->allow_multiple_amdgpu_ctx) {
+      struct amdvgpu_context *ctx = he->data;
+      if (p_atomic_dec_return(&ctx->refcount))
+         return 0;
+   }
+
+   struct amdgpu_ccmd_create_ctx_req req = {
+      .id = ctx_id,
+      .flags = AMDGPU_CCMD_CREATE_CTX_DESTROY,
+   };
+   req.hdr = AMDGPU_CCMD(CREATE_CTX, sizeof(req));
+
+   _mesa_hash_table_remove(&dev->contexts, he);
+
+   free(he->data);
+
+   struct amdgpu_ccmd_create_ctx_rsp *rsp;
+   rsp = vdrm_alloc_rsp(dev->vdev, &req.hdr, sizeof(struct amdgpu_ccmd_create_ctx_rsp));
+
+   return vdrm_send_req_wrapper(dev, &req.hdr, &rsp->hdr, false);
+}
+
+int
+amdvgpu_device_get_fd(amdvgpu_device_handle dev) {
+   return dev->fd;
+}
+
+const char *
+amdvgpu_get_marketing_name(amdvgpu_device_handle dev) {
+   return dev->vdev->caps.u.amdgpu.marketing_name;
+}
+
+static uint32_t cs_chunk_ib_to_virtio_ring_idx(amdvgpu_device_handle dev,
+                                               struct drm_amdgpu_cs_chunk_ib *ib) {
+   assert(dev->virtio_ring_mapping[ib->ip_type] != 0);
+   return dev->virtio_ring_mapping[ib->ip_type] + ib->ring;
+}
+
+int
+amdvgpu_cs_submit_raw2(amdvgpu_device_handle dev, uint32_t ctx_id,
+                       uint32_t bo_list_handle,
+                       int num_chunks, struct drm_amdgpu_cs_chunk *chunks,
+                       uint64_t *seqno)
+{
+   unsigned rsp_len = sizeof(struct amdgpu_ccmd_rsp);
+
+   struct extra_data_info {
+      const void *ptr;
+      uint32_t size;
+   } extra[1 + num_chunks];
+
+   int chunk_count = 0;
+   unsigned offset = 0;
+
+   struct desc {
+      uint16_t chunk_id;
+      uint16_t length_dw;
+      uint32_t offset;
+   };
+   struct desc descriptors[num_chunks];
+
+   unsigned virtio_ring_idx = 0xffffffff;
+
+   uint32_t syncobj_in_count = 0, syncobj_out_count = 0;
+   struct drm_virtgpu_execbuffer_syncobj *syncobj_in = NULL;
+   struct drm_virtgpu_execbuffer_syncobj *syncobj_out = NULL;
+   uint8_t *buf = NULL;
+   int ret;
+
+   const bool sync_submit = dev->sync_cmd & (1u << AMDGPU_CCMD_CS_SUBMIT);
+
+   struct hash_entry *he = _mesa_hash_table_search(&dev->contexts, (void*)(uintptr_t)ctx_id);
+   if (!he)
+      return -1;
+
+   struct amdvgpu_context *vctx = he->data;
+
+   /* Extract pointers from each chunk and copy them to the payload. */
+   for (int i = 0; i < num_chunks; i++) {
+      int extra_idx = 1 + chunk_count;
+      if (chunks[i].chunk_id == AMDGPU_CHUNK_ID_BO_HANDLES) {
+         struct drm_amdgpu_bo_list_in *list_in = (void*) (uintptr_t)chunks[i].chunk_data;
+         extra[extra_idx].ptr = (void*) (uintptr_t)list_in->bo_info_ptr;
+         extra[extra_idx].size = list_in->bo_info_size * list_in->bo_number;
+      } else if (chunks[i].chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES ||
+                 chunks[i].chunk_id == AMDGPU_CHUNK_ID_FENCE ||
+                 chunks[i].chunk_id == AMDGPU_CHUNK_ID_IB) {
+         extra[extra_idx].ptr = (void*)(uintptr_t)chunks[i].chunk_data;
+         extra[extra_idx].size = chunks[i].length_dw * 4;
+
+         if (chunks[i].chunk_id == AMDGPU_CHUNK_ID_IB) {
+            struct drm_amdgpu_cs_chunk_ib *ib = (void*)(uintptr_t)chunks[i].chunk_data;
+            virtio_ring_idx = cs_chunk_ib_to_virtio_ring_idx(dev, ib);
+         }
+      } else if (chunks[i].chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT ||
+                 chunks[i].chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
+         /* Translate from amdgpu CHUNK_ID_SYNCOBJ_* to drm_virtgpu_execbuffer_syncobj */
+         struct drm_amdgpu_cs_chunk_sem *amd_syncobj = (void*) (uintptr_t)chunks[i].chunk_data;
+         unsigned syncobj_count = (chunks[i].length_dw * 4) / sizeof(struct drm_amdgpu_cs_chunk_sem);
+         struct drm_virtgpu_execbuffer_syncobj *syncobjs =
+            calloc(syncobj_count, sizeof(struct drm_virtgpu_execbuffer_syncobj));
+
+         if (syncobjs == NULL) {
+            ret = -ENOMEM;
+            goto error;
+         }
+
+         for (int j = 0; j < syncobj_count; j++)
+            syncobjs[j].handle = amd_syncobj[j].handle;
+
+         if (chunks[i].chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
+            syncobj_in_count = syncobj_count;
+            syncobj_in = syncobjs;
+         } else {
+            syncobj_out_count = syncobj_count;
+            syncobj_out = syncobjs;
+         }
+
+         /* This chunk was converted to virtgpu UAPI so we don't need to forward it
+          * to the host.
+          */
+         continue;
+      } else {
+         mesa_loge("Unhandled chunk_id: %d\n", chunks[i].chunk_id);
+         continue;
+      }
+      descriptors[chunk_count].chunk_id = chunks[i].chunk_id;
+      descriptors[chunk_count].offset = offset;
+      descriptors[chunk_count].length_dw = extra[extra_idx].size / 4;
+      offset += extra[extra_idx].size;
+      chunk_count++;
+   }
+   assert(virtio_ring_idx != 0xffffffff);
+
+   /* Copy the descriptors at the beginning. */
+   extra[0].ptr = descriptors;
+   extra[0].size = chunk_count * sizeof(struct desc);
+
+   /* Determine how much extra space we need. */
+   uint32_t req_len = sizeof(struct amdgpu_ccmd_cs_submit_req);
+   uint32_t e_offset = req_len;
+   for (unsigned i = 0; i < 1 + chunk_count; i++)
+      req_len += extra[i].size;
+
+   /* Allocate the command buffer. */
+   buf = malloc(req_len);
+   if (buf == NULL) {
+      ret = -ENOMEM;
+      goto error;
+   }
+   struct amdgpu_ccmd_cs_submit_req *req = (void*)buf;
+   req->hdr = AMDGPU_CCMD(CS_SUBMIT, req_len);
+   req->ctx_id = ctx_id;
+   req->num_chunks = chunk_count;
+   req->ring_idx = virtio_ring_idx;
+   req->pad = 0;
+
+   UNUSED struct amdgpu_ccmd_rsp *rsp = vdrm_alloc_rsp(dev->vdev, &req->hdr, rsp_len);
+
+   /* Copy varying data after the fixed part of cs_submit_req. */
+   for (unsigned i = 0; i < 1 + chunk_count; i++) {
+      if (extra[i].size) {
+         memcpy(&buf[e_offset], extra[i].ptr, extra[i].size);
+         e_offset += extra[i].size;
+      }
+   }
+
+   /* Optional fence out (if we want synchronous submits). */
+   int *fence_fd_ptr = NULL;
+
+   struct vdrm_execbuf_params vdrm_execbuf_p = {
+      .ring_idx = virtio_ring_idx,
+      .req = &req->hdr,
+      .handles = NULL,
+      .num_handles = 0,
+      .in_syncobjs = syncobj_in,
+      .out_syncobjs = syncobj_out,
+      .has_in_fence_fd = 0,
+      .needs_out_fence_fd = sync_submit,
+      .fence_fd = 0,
+      .num_in_syncobjs = syncobj_in_count,
+      .num_out_syncobjs = syncobj_out_count,
+   };
+
+   if (sync_submit)
+      fence_fd_ptr = &vdrm_execbuf_p.fence_fd;
+
+   /* Push job to the host. */
+   ret = vdrm_execbuf(dev->vdev, &vdrm_execbuf_p);
+
+   /* Determine the host seqno for this job. */
+   *seqno = vctx->ring_next_seqno[virtio_ring_idx - 1]++;
+
+   if (ret == 0 && fence_fd_ptr) {
+      /* Sync execution */
+      sync_wait(*fence_fd_ptr, -1);
+      close(*fence_fd_ptr);
+      vdrm_host_sync(dev->vdev, &req->hdr);
+   }
+
+error:
+   free(buf);
+   free(syncobj_in);
+   free(syncobj_out);
+
+   return ret;
+}
+
+int amdvgpu_cs_query_reset_state2(amdvgpu_device_handle dev, uint32_t ctx_id,
+                                  uint64_t *flags)
+{
+   *flags = 0;
+
+   if (to_amdvgpu_shmem(dev->vdev->shmem)->async_error > 0)
+      *flags = AMDGPU_CTX_QUERY2_FLAGS_RESET | AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
+
+   return 0;
+}
+
+int amdvgpu_cs_query_fence_status(amdvgpu_device_handle dev,
+                                  uint32_t ctx_id,
+                                  uint32_t ip_type,
+                                  uint32_t ip_instance, uint32_t ring,
+                                  uint64_t fence_seq_no,
+                                  uint64_t timeout_ns, uint64_t flags,
+                                  uint32_t *expired)
+{
+   unsigned req_len = sizeof(struct amdgpu_ccmd_cs_query_fence_status_req);
+   unsigned rsp_len = sizeof(struct amdgpu_ccmd_cs_query_fence_status_rsp);
+
+   uint8_t buf[req_len];
+   struct amdgpu_ccmd_cs_query_fence_status_req *req = (void *)buf;
+   struct amdgpu_ccmd_cs_query_fence_status_rsp *rsp;
+
+   req->hdr = AMDGPU_CCMD(CS_QUERY_FENCE_STATUS, req_len);
+   req->ctx_id = ctx_id;
+   req->ip_type = ip_type;
+   req->ip_instance = ip_instance;
+   req->ring = ring;
+   req->fence = fence_seq_no;
+   req->timeout_ns = timeout_ns;
+   req->flags = flags;
+
+   rsp = vdrm_alloc_rsp(dev->vdev, &req->hdr, rsp_len);
+
+   int r = vdrm_send_req_wrapper(dev, &req->hdr, &rsp->hdr, true);
+
+   if (r == 0)
+      *expired = rsp->expired;
+
+   return r;
+}
+
+int amdvgpu_vm_reserve_vmid(amdvgpu_device_handle dev, int reserve) {
+   unsigned req_len = sizeof(struct amdgpu_ccmd_reserve_vmid_req);
+
+   uint8_t buf[req_len];
+   struct amdgpu_ccmd_reserve_vmid_req *req = (void *)buf;
+   struct amdgpu_ccmd_rsp *rsp = vdrm_alloc_rsp(dev->vdev, &req->hdr, sizeof(struct amdgpu_ccmd_rsp));
+
+   req->hdr = AMDGPU_CCMD(RESERVE_VMID, req_len);
+   req->flags = reserve ? 0 : AMDGPU_CCMD_RESERVE_VMID_UNRESERVE;
+
+   return vdrm_send_req_wrapper(dev, &req->hdr, rsp, true);
+}
+
+int amdvgpu_cs_ctx_stable_pstate(amdvgpu_device_handle dev,
+                                 uint32_t ctx_id,
+                                 uint32_t op,
+                                 uint32_t flags,
+                                 uint32_t *out_flags) {
+   unsigned req_len = sizeof(struct amdgpu_ccmd_set_pstate_req);
+   unsigned rsp_len = sizeof(struct amdgpu_ccmd_set_pstate_rsp);
+
+   uint8_t buf[req_len];
+   struct amdgpu_ccmd_set_pstate_req *req = (void *)buf;
+   struct amdgpu_ccmd_set_pstate_rsp *rsp;
+
+   req->hdr = AMDGPU_CCMD(SET_PSTATE, req_len);
+   req->ctx_id = ctx_id;
+   req->op = op;
+   req->flags = flags;
+   req->pad = 0;
+
+   rsp = vdrm_alloc_rsp(dev->vdev, &req->hdr, rsp_len);
+
+   int r = vdrm_send_req_wrapper(dev, &req->hdr, &rsp->hdr, out_flags);
+
+   if (r == 0 && out_flags)
+      *out_flags = rsp->out_flags;
+
+   return r;
+}
+
+int
+amdvgpu_va_range_alloc(amdvgpu_device_handle dev,
+                       enum amdgpu_gpu_va_range va_range_type,
+                       uint64_t size,
+                       uint64_t va_base_alignment,
+                       uint64_t va_base_required,
+                       uint64_t *va_base_allocated,
+                       amdgpu_va_handle *va_range_handle,
+                       uint64_t flags)
+{
+   return amdgpu_va_range_alloc2(dev->va_mgr, va_range_type, size,
+                                 va_base_alignment, va_base_required,
+                                 va_base_allocated, va_range_handle,
+                                 flags);
+}
--- a/src/amd/common/virtio/amdgpu_virtio.h
+++ b/src/amd/common/virtio/amdgpu_virtio.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef AMDGPU_VIRTIO_H
+#define AMDGPU_VIRTIO_H
+
+struct amdvgpu_bo;
+struct amdvgpu_device;
+struct amdvgpu_context;
+typedef struct amdvgpu_device* amdvgpu_device_handle;
+typedef struct amdvgpu_bo* amdvgpu_bo_handle;
+
+struct amdvgpu_bo_import_result {
+   amdvgpu_bo_handle buf_handle;
+   uint64_t alloc_size;
+};
+
+int amdvgpu_device_initialize(int fd, uint32_t *drm_major, uint32_t *drm_minor,
+                              amdvgpu_device_handle* dev);
+int amdvgpu_device_deinitialize(amdvgpu_device_handle dev);
+int amdvgpu_bo_va_op_raw(amdvgpu_device_handle dev,
+                         uint32_t res_id,
+                         uint64_t offset,
+                         uint64_t size,
+                         uint64_t addr,
+                         uint64_t flags,
+                         uint32_t ops);
+int amdvgpu_bo_import(amdvgpu_device_handle dev,
+                      enum amdgpu_bo_handle_type type,
+                      uint32_t handle,
+                      struct amdvgpu_bo_import_result *result);
+int amdvgpu_bo_export(amdvgpu_device_handle dev, amdvgpu_bo_handle bo,
+                      enum amdgpu_bo_handle_type type,
+                      uint32_t *shared_handle);
+int amdvgpu_bo_cpu_map(amdvgpu_device_handle dev, amdvgpu_bo_handle bo_handle, void **cpu);
+int amdvgpu_bo_cpu_unmap(amdvgpu_device_handle dev, amdvgpu_bo_handle bo);
+int amdvgpu_bo_alloc(amdvgpu_device_handle dev,
+                     struct amdgpu_bo_alloc_request *request,
+                     amdvgpu_bo_handle *bo);
+int amdvgpu_bo_free(amdvgpu_device_handle dev, struct amdvgpu_bo *bo);
+int amdvgpu_bo_wait_for_idle(amdvgpu_device_handle dev,
+                             amdvgpu_bo_handle bo,
+                             uint64_t abs_timeout_ns);
+int
+amdvgpu_bo_set_metadata(amdvgpu_device_handle dev, uint32_t res_id,
+                        struct amdgpu_bo_metadata *info);
+int amdvgpu_query_info(amdvgpu_device_handle dev, struct drm_amdgpu_info *info);
+int amdvgpu_bo_query_info(amdvgpu_device_handle dev, uint32_t res_id, struct amdgpu_bo_info *info);
+int amdvgpu_cs_ctx_create2(amdvgpu_device_handle dev, int32_t priority, uint32_t *ctx_virtio);
+int amdvgpu_cs_ctx_free(amdvgpu_device_handle dev, uint32_t ctx);
+int amdvgpu_cs_ctx_stable_pstate(amdvgpu_device_handle dev,
+                                 uint32_t ctx,
+                                 uint32_t op,
+                                 uint32_t flags,
+                                 uint32_t *out_flags);
+int amdvgpu_cs_query_reset_state2(amdvgpu_device_handle dev,
+                                  uint32_t ctx,
+                                  uint64_t *flags);
+int
+amdvgpu_va_range_alloc(amdvgpu_device_handle dev,
+                       enum amdgpu_gpu_va_range va_range_type,
+                       uint64_t size,
+                       uint64_t va_base_alignment,
+                       uint64_t va_base_required,
+                       uint64_t *va_base_allocated,
+                       amdgpu_va_handle *va_range_handle,
+                       uint64_t flags);
+int amdvgpu_cs_query_fence_status(amdvgpu_device_handle dev,
+                                  uint32_t ctx,
+                                  uint32_t ip_type,
+                                  uint32_t ip_instance, uint32_t ring,
+                                  uint64_t fence_seq_no,
+                                  uint64_t timeout_ns, uint64_t flags,
+                                  uint32_t *expired);
+int
+amdvgpu_device_get_fd(amdvgpu_device_handle dev);
+const char *
+amdvgpu_get_marketing_name(amdvgpu_device_handle dev);
+int
+amdvgpu_cs_submit_raw2(amdvgpu_device_handle dev, uint32_t ctx_id,
+                       uint32_t bo_list_handle,
+                       int num_chunks, struct drm_amdgpu_cs_chunk *chunks,
+                       uint64_t *seqno);
+int amdvgpu_vm_reserve_vmid(amdvgpu_device_handle dev, int reserve);
+int
+amdvgpu_query_sw_info(amdvgpu_device_handle dev, enum amdgpu_sw_info info, void *value);
+
+#endif
--- a/src/amd/common/virtio/amdgpu_virtio_bo.c
+++ b/src/amd/common/virtio/amdgpu_virtio_bo.c
@@ -0,0 +1,367 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "drm-uapi/amdgpu_drm.h"
+
+#include "amdgpu_virtio_private.h"
+#include "ac_linux_drm.h"
+#include "util/list.h"
+#include "util/log.h"
+#include "util/os_mman.h"
+#include "util/os_time.h"
+#include "util/u_math.h"
+
+#include <xf86drm.h>
+#include <string.h>
+#include <fcntl.h>
+
+struct amdvgpu_host_blob {
+   /* virtgpu properties */
+   uint32_t handle;
+   uint32_t res_id;
+   uint64_t alloc_size;
+
+   /* CPU mapping handling. */
+   uint64_t offset;
+   int map_count;
+   void *cpu_addr;
+   simple_mtx_t cpu_access_mutex;
+
+   /* Allocation parameters. */
+   uint32_t vm_flags;
+   uint32_t preferred_heap;
+   uint64_t phys_alignment;
+   uint64_t flags;
+};
+
+static
+void destroy_host_blob(amdvgpu_device_handle dev, struct amdvgpu_host_blob *hb);
+
+static
+struct amdvgpu_host_blob *create_host_blob(uint32_t kms_handle,
+                                           uint32_t res_id,
+                                           uint64_t size,
+                                           struct amdgpu_ccmd_gem_new_req *req)
+{
+   struct amdvgpu_host_blob *hb = calloc(1, sizeof(*hb));
+   hb->handle = kms_handle;
+   hb->res_id = res_id;
+   hb->alloc_size = size;
+
+   if (req) {
+      hb->phys_alignment = req->r.phys_alignment;
+      hb->preferred_heap = req->r.preferred_heap;
+      hb->flags = req->r.flags;
+   }
+
+   simple_mtx_init(&hb->cpu_access_mutex, mtx_plain);
+   return hb;
+}
+
+static
+void destroy_host_blob(amdvgpu_device_handle dev, struct amdvgpu_host_blob *hb) {
+   simple_mtx_destroy(&hb->cpu_access_mutex);
+
+   struct drm_gem_close req = {
+      .handle = hb->handle,
+   };
+   int r = drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
+   if (r != 0) {
+      mesa_loge("DRM_IOCTL_GEM_CLOSE failed for res_id: %d\n", hb->res_id);
+   }
+   free(hb);
+}
+
+static int
+alloc_host_blob(amdvgpu_bo_handle bo,
+                struct amdgpu_ccmd_gem_new_req *req,
+                uint32_t blob_flags)
+{
+      uint32_t kms_handle, res_id;
+
+      /* Create the host blob requires 2 steps. First create the host blob... */
+      kms_handle = vdrm_bo_create(bo->dev->vdev, req->r.alloc_size, blob_flags,
+                                  req->blob_id, &req->hdr);
+
+      /* 0 is an invalid handle and is used by vdrm_bo_create to signal an error. */
+      if (kms_handle == 0)
+         return -1;
+
+      /* ... and then retrieve its resource id (global id). */
+      res_id = vdrm_handle_to_res_id(bo->dev->vdev, kms_handle);
+
+      bo->host_blob = create_host_blob(kms_handle, res_id, req->r.alloc_size, req);
+
+      simple_mtx_lock(&bo->dev->handle_to_vbo_mutex);
+      _mesa_hash_table_insert(bo->dev->handle_to_vbo, (void*)(intptr_t)bo->host_blob->handle, bo);
+      simple_mtx_unlock(&bo->dev->handle_to_vbo_mutex);
+
+      return 0;
+}
+
+int amdvgpu_bo_export(amdvgpu_device_handle dev, amdvgpu_bo_handle bo,
+                      enum amdgpu_bo_handle_type type,
+                      uint32_t *shared_handle)
+{
+   switch (type) {
+   case amdgpu_bo_handle_type_kms:
+      /* Return the resource id as this handle is only going to be used
+       * internally (AMDGPU_CHUNK_ID_BO_HANDLES mostly).
+       */
+      *shared_handle = amdvgpu_get_resource_id(bo);
+      return 0;
+
+   case amdgpu_bo_handle_type_dma_buf_fd:
+      return drmPrimeHandleToFD(dev->fd, bo->host_blob->handle, DRM_CLOEXEC | DRM_RDWR,
+                                (int*)shared_handle);
+
+   case amdgpu_bo_handle_type_kms_noimport:
+      /* Treat this deprecated type as _type_kms and return the GEM handle. */
+      *shared_handle = bo->host_blob->handle;
+      return 0;
+
+   case amdgpu_bo_handle_type_gem_flink_name:
+      break;
+   }
+   return -EINVAL;
+}
+
+int amdvgpu_bo_free(amdvgpu_device_handle dev, struct amdvgpu_bo *bo) {
+   int refcnt = p_atomic_dec_return(&bo->refcount);
+
+   if (refcnt == 0) {
+      /* Flush pending ops. */
+      vdrm_flush(dev->vdev);
+
+      /* Remove it from the bo table. */
+      if (bo->host_blob->handle > 0) {
+         simple_mtx_lock(&dev->handle_to_vbo_mutex);
+         struct hash_entry *entry = _mesa_hash_table_search(dev->handle_to_vbo,
+                                                            (void*)(intptr_t)bo->host_blob->handle);
+         if (entry) {
+            /* entry can be NULL for the shmem buffer. */
+            _mesa_hash_table_remove(dev->handle_to_vbo, entry);
+         }
+         simple_mtx_unlock(&dev->handle_to_vbo_mutex);
+      }
+
+      if (bo->host_blob)
+         destroy_host_blob(dev, bo->host_blob);
+
+      free(bo);
+   }
+
+   return 0;
+}
+
+int amdvgpu_bo_alloc(amdvgpu_device_handle dev,
+                     struct amdgpu_bo_alloc_request *request,
+                     amdvgpu_bo_handle *bo)
+{
+   int r;
+   uint32_t blob_flags = 0;
+
+   struct amdgpu_ccmd_gem_new_req req = {
+         .hdr = AMDGPU_CCMD(GEM_NEW, sizeof(req)),
+         .blob_id = p_atomic_inc_return(&dev->next_blob_id),
+   };
+   req.r.alloc_size = request->alloc_size;
+   req.r.phys_alignment = request->phys_alignment;
+   req.r.preferred_heap = request->preferred_heap;
+   req.r.__pad = 0;
+   req.r.flags = request->flags;
+
+   if (!(request->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS))
+      blob_flags |= VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
+
+   /* blob_id 0 is reserved for the shared memory buffer. */
+   assert(req.blob_id > 0);
+
+   amdvgpu_bo_handle out = calloc(1, sizeof(struct amdvgpu_bo));
+   out->dev = dev;
+   out->size = request->alloc_size;
+
+   r = alloc_host_blob(out, &req, blob_flags);
+
+   if (r < 0) {
+      free(out);
+      return r;
+   }
+
+   p_atomic_set(&out->refcount, 1);
+   *bo = out;
+
+   return 0;
+}
+
+int amdvgpu_bo_va_op_raw(amdvgpu_device_handle dev,
+                         uint32_t res_id,
+                         uint64_t offset,
+                         uint64_t size,
+                         uint64_t addr,
+                         uint64_t flags,
+                         uint32_t ops)
+{
+   int r;
+
+   /* Fill base structure fields. */
+   struct amdgpu_ccmd_bo_va_op_req req = {
+      .hdr = AMDGPU_CCMD(BO_VA_OP, sizeof(req)),
+      .va = addr,
+      .res_id = res_id,
+      .offset = offset,
+      .vm_map_size = size,
+      .flags = flags,
+      .op = ops,
+      .flags2 = res_id == 0 ? AMDGPU_CCMD_BO_VA_OP_SPARSE_BO : 0,
+   };
+   struct amdgpu_ccmd_rsp *rsp =
+      vdrm_alloc_rsp(dev->vdev, &req.hdr, sizeof(*rsp));
+
+   r = vdrm_send_req_wrapper(dev, &req.hdr, rsp, false);
+
+   return r;
+}
+
+int amdvgpu_bo_import(amdvgpu_device_handle dev, enum amdgpu_bo_handle_type type,
+                      uint32_t handle, struct amdvgpu_bo_import_result *result)
+{
+   if (type != amdgpu_bo_handle_type_dma_buf_fd)
+      return -1;
+
+   uint32_t kms_handle;
+   int r = drmPrimeFDToHandle(dev->fd, handle, &kms_handle);
+   if (r) {
+      mesa_loge("drmPrimeFDToHandle failed for dmabuf fd: %u\n", handle);
+      return r;
+   }
+
+   /* Look up existing bo. */
+   simple_mtx_lock(&dev->handle_to_vbo_mutex);
+   struct hash_entry *entry = _mesa_hash_table_search(dev->handle_to_vbo, (void*)(intptr_t)kms_handle);
+
+   if (entry) {
+      struct amdvgpu_bo *bo = entry->data;
+      p_atomic_inc(&bo->refcount);
+      simple_mtx_unlock(&dev->handle_to_vbo_mutex);
+      result->buf_handle = (void*)bo;
+      result->alloc_size = bo->size;
+      assert(bo->host_blob);
+      return 0;
+   }
+   simple_mtx_unlock(&dev->handle_to_vbo_mutex);
+
+   struct drm_virtgpu_resource_info args = {
+         .bo_handle = kms_handle,
+   };
+   r = virtio_ioctl(dev->fd, VIRTGPU_RESOURCE_INFO, &args);
+
+   if (r) {
+      mesa_loge("VIRTGPU_RESOURCE_INFO failed (%s)\n", strerror(errno));
+      return r;
+   }
+
+   off_t size = lseek(handle, 0, SEEK_END);
+   if (size == (off_t) -1) {
+      mesa_loge("lseek failed (%s)\n", strerror(errno));
+      return -1;
+   }
+   lseek(handle, 0, SEEK_CUR);
+
+   struct amdvgpu_bo *bo = calloc(1, sizeof(struct amdvgpu_bo));
+   bo->dev = dev;
+   bo->size = size;
+   bo->host_blob = create_host_blob(kms_handle, args.res_handle, size, NULL);
+   p_atomic_set(&bo->refcount, 1);
+
+   result->buf_handle = bo;
+   result->alloc_size = bo->size;
+
+   simple_mtx_lock(&dev->handle_to_vbo_mutex);
+   _mesa_hash_table_insert(dev->handle_to_vbo, (void*)(intptr_t)bo->host_blob->handle, bo);
+   simple_mtx_unlock(&dev->handle_to_vbo_mutex);
+
+   return 0;
+}
+
+static int amdvgpu_get_offset(amdvgpu_bo_handle bo_handle)
+{
+   if (bo_handle->host_blob->offset)
+      return 0;
+
+   struct drm_virtgpu_map req = {
+      .handle = bo_handle->host_blob->handle,
+   };
+   int ret = virtio_ioctl(bo_handle->dev->fd, VIRTGPU_MAP, &req);
+   if (ret) {
+      mesa_loge("amdvgpu_bo_map failed (%s) handle: %d\n",
+              strerror(errno), bo_handle->host_blob->handle);
+      return ret;
+   }
+   bo_handle->host_blob->offset = req.offset;
+
+   return 0;
+}
+
+int amdvgpu_bo_cpu_map(amdvgpu_device_handle dev, amdvgpu_bo_handle bo_handle,
+                       void **cpu) {
+   int r;
+
+   simple_mtx_lock(&bo_handle->host_blob->cpu_access_mutex);
+
+   if (bo_handle->host_blob->cpu_addr == NULL) {
+      assert(bo_handle->host_blob->cpu_addr == NULL);
+      r = amdvgpu_get_offset(bo_handle);
+      if (r) {
+         mesa_loge("get_offset failed\n");
+         simple_mtx_unlock(&bo_handle->host_blob->cpu_access_mutex);
+         return r;
+      }
+
+      /* Use *cpu as a fixed address hint from the caller. */
+      bo_handle->host_blob->cpu_addr = os_mmap(*cpu, bo_handle->host_blob->alloc_size,
+                                               PROT_READ | PROT_WRITE, MAP_SHARED,
+                                               dev->fd,
+                                               bo_handle->host_blob->offset);
+   }
+
+   assert(bo_handle->host_blob->cpu_addr != MAP_FAILED);
+   *cpu = bo_handle->host_blob->cpu_addr;
+   p_atomic_inc(&bo_handle->host_blob->map_count);
+
+   simple_mtx_unlock(&bo_handle->host_blob->cpu_access_mutex);
+
+   return *cpu == MAP_FAILED;
+}
+
+int amdvgpu_bo_cpu_unmap(amdvgpu_device_handle dev, amdvgpu_bo_handle bo) {
+   int r = 0;
+
+   simple_mtx_lock(&bo->host_blob->cpu_access_mutex);
+   if (bo->host_blob->map_count == 0) {
+      simple_mtx_unlock(&bo->host_blob->cpu_access_mutex);
+      return 0;
+   }
+   assert(bo->host_blob->cpu_addr);
+   if (p_atomic_dec_zero(&bo->host_blob->map_count)) {
+      r = os_munmap(bo->host_blob->cpu_addr, bo->host_blob->alloc_size);
+      bo->host_blob->cpu_addr = NULL;
+   }
+   simple_mtx_unlock(&bo->host_blob->cpu_access_mutex);
+
+   return r;
+}
+
+uint32_t amdvgpu_get_resource_id(amdvgpu_bo_handle bo) {
+   return bo->host_blob->res_id;
+}
+
+int amdvgpu_bo_wait_for_idle(amdvgpu_device_handle dev,
+                             amdvgpu_bo_handle bo,
+                             uint64_t abs_timeout_ns) {
+   /* TODO: add a wait for idle command? */
+   return vdrm_bo_wait(dev->vdev, bo->host_blob->handle);
+}
--- a/src/amd/common/virtio/amdgpu_virtio_device.c
+++ b/src/amd/common/virtio/amdgpu_virtio_device.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "amdgpu_virtio_private.h"
+
+#include "util/bitscan.h"
+#include "util/log.h"
+#include "util/os_file.h"
+#include "util/u_debug.h"
+
+#include <xf86drm.h>
+
+/* amdvgpu_device manage the virtual GPU.
+ *
+ * It owns a vdrm_device instance, the rings and manage seqno.
+ * Since it's a drop-in replacement for libdrm_amdgpu's amdgpu_device,
+ * it follows its behavior: if the same device is opened multiple times,
+ * the same amdvgpu_device will be used.
+ */
+static simple_mtx_t dev_mutex = SIMPLE_MTX_INITIALIZER;
+static amdvgpu_device_handle dev_list;
+
+static int fd_compare(int fd1, int fd2)
+{
+   char *name1 = drmGetPrimaryDeviceNameFromFd(fd1);
+   char *name2 = drmGetPrimaryDeviceNameFromFd(fd2);
+   int result;
+
+   if (name1 == NULL || name2 == NULL) {
+      free(name1);
+      free(name2);
+      return 0;
+   }
+
+   result = strcmp(name1, name2);
+   free(name1);
+   free(name2);
+
+   return result;
+}
+
+static void amdvgpu_device_reference(struct amdvgpu_device **dst,
+                                     struct amdvgpu_device *src)
+{
+   if (update_references(*dst ? &(*dst)->refcount : NULL,
+                         src ? &src->refcount : NULL)) {
+      struct amdvgpu_device *dev, *prev = NULL;
+      for (dev = dev_list; dev; dev = dev->next) {
+         if (dev == (*dst)) {
+            if (prev == NULL)
+               dev_list = dev->next;
+            else
+               prev->next = dev->next;
+            break;
+         }
+         prev = dev;
+      }
+
+      dev = *dst;
+
+      /* Destroy BOs before closing vdrm */
+      hash_table_foreach(dev->handle_to_vbo, entry) {
+         struct amdvgpu_bo *bo = entry->data;
+         amdvgpu_bo_free(dev, bo);
+      }
+      _mesa_hash_table_destroy(dev->handle_to_vbo, NULL);
+      /* Destroy contextx. */
+      hash_table_foreach(&dev->contexts, entry)
+         amdvgpu_cs_ctx_free(dev, (uint32_t)(uintptr_t)entry->key);
+      _mesa_hash_table_clear(&dev->contexts, NULL);
+
+      simple_mtx_destroy(&dev->handle_to_vbo_mutex);
+      simple_mtx_destroy(&dev->contexts_mutex);
+
+      amdgpu_va_manager_deinit(dev->va_mgr);
+
+      vdrm_device_close(dev->vdev);
+
+      close(dev->fd);
+      free(dev);
+   }
+
+   *dst = src;
+}
+
+int amdvgpu_device_deinitialize(amdvgpu_device_handle dev) {
+   simple_mtx_lock(&dev_mutex);
+   amdvgpu_device_reference(&dev, NULL);
+   simple_mtx_unlock(&dev_mutex);
+   return 0;
+}
+
+int amdvgpu_device_initialize(int fd, uint32_t *drm_major, uint32_t *drm_minor,
+                              amdvgpu_device_handle* dev_out) {
+   simple_mtx_lock(&dev_mutex);
+   amdvgpu_device_handle dev;
+
+   for (dev = dev_list; dev; dev = dev->next)
+      if (fd_compare(dev->fd, fd) == 0)
+         break;
+
+   if (dev) {
+      *dev_out = NULL;
+      amdvgpu_device_reference(dev_out, dev);
+      *drm_major = dev->vdev->caps.version_major;
+      *drm_minor = dev->vdev->caps.version_minor;
+      simple_mtx_unlock(&dev_mutex);
+      return 0;
+   }
+
+   /* fd is owned by the amdgpu_screen_winsys that called this function.
+    * amdgpu_screen_winsys' lifetime may be shorter than the device's one,
+    * so dup fd to tie its lifetime to the device's one.
+    */
+   fd = os_dupfd_cloexec(fd);
+
+   struct vdrm_device *vdev = vdrm_device_connect(fd, VIRTGPU_DRM_CONTEXT_AMDGPU);
+   if (vdev == NULL) {
+      mesa_loge("vdrm_device_connect failed\n");
+      simple_mtx_unlock(&dev_mutex);
+      return -1;
+   }
+
+   dev = calloc(1, sizeof(struct amdvgpu_device));
+   dev->refcount = 1;
+   dev->next = dev_list;
+   dev_list = dev;
+   dev->fd = fd;
+   dev->vdev = vdev;
+
+   simple_mtx_init(&dev->handle_to_vbo_mutex, mtx_plain);
+   simple_mtx_init(&dev->contexts_mutex, mtx_plain);
+
+   dev->handle_to_vbo = _mesa_hash_table_create_u32_keys(NULL);
+
+   p_atomic_set(&dev->next_blob_id, 1);
+
+   *dev_out = dev;
+
+   simple_mtx_unlock(&dev_mutex);
+
+   struct drm_amdgpu_info info;
+   info.return_pointer = (uintptr_t)&dev->dev_info;
+   info.query = AMDGPU_INFO_DEV_INFO;
+   info.return_size = sizeof(dev->dev_info);
+   int r = amdvgpu_query_info(dev, &info);
+   assert(r == 0);
+
+   /* Ring idx 0 is reserved for commands running on CPU. */
+   unsigned next_ring_idx = 1;
+   for (unsigned i = 0; i < AMD_NUM_IP_TYPES; ++i) {
+      struct drm_amdgpu_info_hw_ip ip_info = {0};
+      struct drm_amdgpu_info request = {0};
+      request.return_pointer = (uintptr_t)&ip_info;
+      request.return_size = sizeof(ip_info);
+      request.query = AMDGPU_INFO_HW_IP_INFO;
+      request.query_hw_ip.type = i;
+      request.query_hw_ip.ip_instance = 0;
+      r = amdvgpu_query_info(dev, &request);
+      if (r == 0 && ip_info.available_rings) {
+         int count = util_bitcount(ip_info.available_rings);
+         dev->virtio_ring_mapping[i] = next_ring_idx;
+         next_ring_idx += count;
+      }
+   }
+   /* VIRTGPU_CONTEXT_PARAM_NUM_RINGS is hardcoded for now. */
+   assert(next_ring_idx <= 64);
+   dev->num_virtio_rings = next_ring_idx - 1;
+
+   dev->va_mgr = amdgpu_va_manager_alloc();
+   amdgpu_va_manager_init(dev->va_mgr,
+      dev->dev_info.virtual_address_offset, dev->dev_info.virtual_address_max,
+      dev->dev_info.high_va_offset, dev->dev_info.high_va_max,
+      dev->dev_info.virtual_address_alignment);
+
+   _mesa_hash_table_init(&dev->contexts, NULL,
+                         _mesa_hash_pointer, _mesa_key_pointer_equal);
+   dev->allow_multiple_amdgpu_ctx = debug_get_bool_option("MULTIPLE_AMDGPU_CTX", false);
+   dev->sync_cmd = debug_get_num_option("VIRTIO_SYNC_CMD", 0);
+
+   *drm_major = dev->vdev->caps.version_major;
+   *drm_minor = dev->vdev->caps.version_minor;
+
+   return 0;
+}
--- a/src/amd/common/virtio/amdgpu_virtio_private.h
+++ b/src/amd/common/virtio/amdgpu_virtio_private.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef AMDGPU_VIRTIO_PRIVATE_H
+#define AMDGPU_VIRTIO_PRIVATE_H
+
+#include "drm-uapi/amdgpu_drm.h"
+#include "drm-uapi/virtgpu_drm.h"
+
+#include "util/hash_table.h"
+#include "util/simple_mtx.h"
+
+#include "amd_family.h"
+
+#include "virtio/vdrm/vdrm.h"
+#include "virtio/virtio-gpu/drm_hw.h"
+#include "amdgpu_virtio_proto.h"
+#include "amdgpu_virtio.h"
+
+struct amdvgpu_host_blob;
+struct amdvgpu_host_blob_allocator;
+
+/* Host context seqno handling.
+ * seqno are monotonically increasing integer, so we don't need
+ * to actually submit to know the value. This allows to not
+ * wait for the submission to go to the host (= no need to wait
+ * in the guest) and to know the seqno (= so we can take advantage
+ * of user fence).
+ */
+struct amdvgpu_context {
+   uint32_t refcount;
+   uint32_t host_context_id;
+   uint64_t ring_next_seqno[];
+};
+
+struct amdvgpu_device {
+   struct vdrm_device * vdev;
+
+   /* List of existing devices */
+   int refcount;
+   struct amdvgpu_device *next;
+
+   int fd;
+
+   /* Table mapping kms handles to amdvgpu_bo instances.
+    * Used to maintain a 1-to-1 mapping between the 2.
+    */
+   simple_mtx_t handle_to_vbo_mutex;
+   struct hash_table *handle_to_vbo;
+
+   /* Submission through virtio-gpu are ring based.
+    * Ring 0 is used for CPU jobs, then N rings are allocated: 1
+    * per IP type per instance (so if the GPU has 1 gfx queue and 2
+    * queues -> ring0 + 3 hw rings = 4 rings total).
+    */
+   uint32_t num_virtio_rings;
+   uint32_t virtio_ring_mapping[AMD_NUM_IP_TYPES];
+
+   struct drm_amdgpu_info_device dev_info;
+
+   /* Blob id are per drm_file identifiers of host blobs.
+    * Use a monotically increased integer to assign the blob id.
+    */
+   uint32_t next_blob_id;
+
+   /* GPU VA management (allocation / release). */
+   amdgpu_va_manager_handle va_mgr;
+
+   /* Debug option to make some protocol commands synchronous.
+    * If bit N is set, then the specific command will be sync.
+    */
+   int64_t sync_cmd;
+
+   /* virtio-gpu uses a single context per drm_file and expects that
+    * any 2 jobs submitted to the same {context, ring} will execute in
+    * order.
+    * amdgpu on the other hand allows for multiple context per drm_file,
+    * so we either have to open multiple virtio-gpu drm_file to be able to
+    * have 1 virtio-gpu context per amdgpu-context or use a single amdgpu
+    * context.
+    * Using multiple drm_file might cause BO sharing issues so for now limit
+    * ourselves to a single amdgpu context. Each amdgpu_ctx object can schedule
+    * parallel work on 1 gfx, 2 sdma, 4 compute, 1 of each vcn queue.
+    */
+   simple_mtx_t contexts_mutex;
+   struct hash_table contexts;
+   bool allow_multiple_amdgpu_ctx;
+};
+
+/* Refcounting helpers. Returns true when dst reaches 0. */
+static inline bool update_references(int *dst, int *src)
+{
+   if (dst != src) {
+      /* bump src first */
+      if (src) {
+         assert(p_atomic_read(src) > 0);
+         p_atomic_inc(src);
+      }
+      if (dst) {
+         return p_atomic_dec_zero(dst);
+      }
+   }
+   return false;
+}
+
+#define virtio_ioctl(fd, name, args) ({                              \
+      int ret = drmIoctl((fd), DRM_IOCTL_ ## name, (args));          \
+      ret;                                                           \
+      })
+
+struct amdvgpu_host_blob_creation_params {
+   struct drm_virtgpu_resource_create_blob args;
+   struct amdgpu_ccmd_gem_new_req req;
+};
+
+struct amdvgpu_bo {
+   struct amdvgpu_device *dev;
+
+   /* Importing the same kms handle must return the same
+    * amdvgpu_pointer, so we need a refcount.
+    */
+   int refcount;
+
+   /* The size of the BO (might be smaller that the host
+    * bo' size).
+    */
+   unsigned size;
+
+   /* The host blob backing this bo. */
+   struct amdvgpu_host_blob *host_blob;
+};
+
+
+uint32_t amdvgpu_get_resource_id(amdvgpu_bo_handle bo);
+
+/* There are 2 return-code:
+ *    - the virtio one, returned by vdrm_send_req
+ *    - the host one, which only makes sense for sync
+ *      requests.
+ */
+static inline
+int vdrm_send_req_wrapper(amdvgpu_device_handle dev,
+                          struct vdrm_ccmd_req *req,
+                          struct amdgpu_ccmd_rsp *rsp,
+                          bool sync) {
+   if (dev->sync_cmd & (1u << req->cmd))
+      sync = true;
+
+   int r = vdrm_send_req(dev->vdev, req, sync);
+
+   if (r)
+      return r;
+
+   if (sync)
+      return rsp->ret;
+
+   return 0;
+}
+#endif /* AMDGPU_VIRTIO_PRIVATE_H */
--- a/src/amd/common/virtio/amdgpu_virtio_proto.h
+++ b/src/amd/common/virtio/amdgpu_virtio_proto.h
@@ -0,0 +1,275 @@
+#ifndef AMDGPU_VIRTIO_PROTO_H
+#define AMDGPU_VIRTIO_PROTO_H
+
+#include <stdint.h>
+#include "amdgpu.h"
+#include "amdgpu_drm.h"
+#ifdef __GNUC__
+# pragma GCC diagnostic push
+# pragma GCC diagnostic error "-Wpadded"
+#endif
+
+enum amdgpu_ccmd {
+   AMDGPU_CCMD_QUERY_INFO = 1,
+   AMDGPU_CCMD_GEM_NEW,
+   AMDGPU_CCMD_BO_VA_OP,
+   AMDGPU_CCMD_CS_SUBMIT,
+   AMDGPU_CCMD_SET_METADATA,
+   AMDGPU_CCMD_BO_QUERY_INFO,
+   AMDGPU_CCMD_CREATE_CTX,
+   AMDGPU_CCMD_RESERVE_VMID,
+   AMDGPU_CCMD_SET_PSTATE,
+   AMDGPU_CCMD_CS_QUERY_FENCE_STATUS,
+};
+
+struct amdgpu_ccmd_rsp {
+   struct vdrm_ccmd_rsp base;
+   int32_t ret;
+};
+static_assert(sizeof(struct amdgpu_ccmd_rsp) == 8, "bug");
+
+#define AMDGPU_STATIC_ASSERT_SIZE(t) \
+   static_assert(sizeof(struct t) % 8 == 0, "sizeof(struct " #t ") not multiple of 8"); \
+   static_assert(alignof(struct t) <= 8, "alignof(struct " #t ") too large");
+
+/**
+ * Defines the layout of shmem buffer used for host->guest communication.
+ */
+struct amdvgpu_shmem {
+   struct vdrm_shmem base;
+
+   /**
+    * Counter that is incremented on asynchronous errors, like SUBMIT
+    * or GEM_NEW failures.  The guest should treat errors as context-
+    * lost.
+    */
+   uint32_t async_error;
+
+   uint32_t __pad;
+
+   struct amdgpu_heap_info gtt;
+   struct amdgpu_heap_info vram;
+   struct amdgpu_heap_info vis_vram;
+};
+AMDGPU_STATIC_ASSERT_SIZE(amdvgpu_shmem)
+DEFINE_CAST(vdrm_shmem, amdvgpu_shmem)
+
+
+#define AMDGPU_CCMD(_cmd, _len) (struct vdrm_ccmd_req){ \
+       .cmd = AMDGPU_CCMD_##_cmd,                         \
+       .len = (_len),                                     \
+   }
+
+/*
+ * AMDGPU_CCMD_QUERY_INFO
+ *
+ * This is amdgpu_query_info.
+ */
+struct amdgpu_ccmd_query_info_req {
+   struct vdrm_ccmd_req hdr;
+   struct drm_amdgpu_info info;
+};
+DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_query_info_req)
+AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_query_info_req)
+
+struct amdgpu_ccmd_query_info_rsp {
+   struct amdgpu_ccmd_rsp hdr;
+   uint8_t payload[];
+};
+AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_query_info_rsp)
+
+struct amdgpu_ccmd_gem_new_req {
+   struct vdrm_ccmd_req hdr;
+
+   uint64_t blob_id;
+
+   /* This is amdgpu_bo_alloc_request but padded correctly. */
+   struct {
+      uint64_t alloc_size;
+      uint64_t phys_alignment;
+      uint32_t preferred_heap;
+      uint32_t __pad;
+      uint64_t flags;
+   } r;
+};
+DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_gem_new_req)
+AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_gem_new_req)
+
+
+/*
+ * AMDGPU_CCMD_BO_VA_OP
+ *
+ */
+struct amdgpu_ccmd_bo_va_op_req {
+   struct vdrm_ccmd_req hdr;
+   uint64_t va;
+   uint64_t vm_map_size;
+   uint64_t flags; /* Passed directly to kernel */
+   uint64_t flags2; /* AMDGPU_CCMD_BO_VA_OP_* */
+   uint64_t offset;
+   uint32_t res_id;
+   uint32_t op;
+};
+DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_bo_va_op_req)
+AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_bo_va_op_req)
+/* Specifies that this is a sparse BO. */
+#define AMDGPU_CCMD_BO_VA_OP_SPARSE_BO (1 << 0)
+
+/*
+ * AMDGPU_CCMD_CS_SUBMIT
+ */
+struct amdgpu_ccmd_cs_submit_req {
+   struct vdrm_ccmd_req hdr;
+
+   uint32_t ctx_id;
+   uint32_t num_chunks; /* limited to AMDGPU_CCMD_CS_SUBMIT_MAX_NUM_CHUNKS */
+   uint32_t pad;
+   uint32_t ring_idx;
+
+   /* Starts with a descriptor array:
+    *     (chunk_id, offset_in_payload), ...
+    */
+   uint8_t payload[];
+};
+DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_cs_submit_req)
+AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_cs_submit_req)
+#define AMDGPU_CCMD_CS_SUBMIT_MAX_NUM_CHUNKS 128
+
+/*
+ * AMDGPU_CCMD_SET_METADATA
+ */
+struct amdgpu_ccmd_set_metadata_req {
+   struct vdrm_ccmd_req hdr;
+   uint64_t flags;
+   uint64_t tiling_info;
+   uint32_t res_id;
+   uint32_t size_metadata;
+   uint32_t umd_metadata[];
+};
+DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_set_metadata_req)
+AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_set_metadata_req)
+
+
+/*
+ * AMDGPU_CCMD_BO_QUERY_INFO
+ */
+struct amdgpu_ccmd_bo_query_info_req {
+   struct vdrm_ccmd_req hdr;
+   uint32_t res_id;
+   uint32_t pad; /* must be zero */
+};
+DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_bo_query_info_req)
+AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_bo_query_info_req)
+
+struct amdgpu_ccmd_bo_query_info_rsp {
+   struct amdgpu_ccmd_rsp hdr;
+   /* This is almost struct amdgpu_bo_info, but padded to get
+    * the same struct on 32 bit and 64 bit builds.
+    */
+   struct {
+      uint64_t                   alloc_size;           /*     0     8 */
+      uint64_t                   phys_alignment;       /*     8     8 */
+      uint32_t                   preferred_heap;       /*    16     4 */
+      uint32_t                   __pad;                /*    20     4 */
+      uint64_t                   alloc_flags;          /*    24     8 */
+      /* This is almost struct amdgpu_bo_metadata, but padded to get
+       * the same struct on 32 bit and 64 bit builds.
+       */
+      struct {
+         uint64_t                flags;                /*    32     8 */
+         uint64_t                tiling_info;          /*    40     8 */
+         uint32_t                size_metadata;        /*    48     4 */
+         uint32_t                umd_metadata[64];     /*    52   256 */
+         uint32_t                __pad;                /*    308    4 */
+      } metadata;
+   } info;
+};
+AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_bo_query_info_rsp)
+
+/*
+ * AMDGPU_CCMD_CREATE_CTX
+ */
+struct amdgpu_ccmd_create_ctx_req {
+   struct vdrm_ccmd_req hdr;
+   union {
+      int32_t priority; /* create */
+      uint32_t id;      /* destroy */
+   };
+   uint32_t flags; /* AMDGPU_CCMD_CREATE_CTX_* */
+};
+DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_create_ctx_req)
+AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_create_ctx_req)
+/* Destroy a context instead of creating one */
+#define AMDGPU_CCMD_CREATE_CTX_DESTROY (1 << 0)
+
+struct amdgpu_ccmd_create_ctx_rsp {
+   struct amdgpu_ccmd_rsp hdr;
+   uint32_t ctx_id;
+   uint32_t pad;
+};
+AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_create_ctx_rsp)
+
+/*
+ * AMDGPU_CCMD_RESERVE_VMID
+ */
+struct amdgpu_ccmd_reserve_vmid_req {
+   struct vdrm_ccmd_req hdr;
+   uint64_t flags; /* AMDGPU_CCMD_RESERVE_VMID_* */
+};
+DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_reserve_vmid_req)
+AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_reserve_vmid_req)
+/* Unreserve a VMID instead of reserving one */
+#define AMDGPU_CCMD_RESERVE_VMID_UNRESERVE (1 << 0)
+
+/*
+ * AMDGPU_CCMD_SET_PSTATE
+ */
+struct amdgpu_ccmd_set_pstate_req {
+   struct vdrm_ccmd_req hdr;
+   uint32_t ctx_id;
+   uint32_t op;
+   uint32_t flags;
+   uint32_t pad;
+};
+DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_set_pstate_req)
+AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_set_pstate_req)
+
+struct amdgpu_ccmd_set_pstate_rsp {
+   struct amdgpu_ccmd_rsp hdr;
+   uint32_t out_flags;
+   uint32_t pad;
+};
+AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_set_pstate_rsp)
+
+/*
+ * AMDGPU_CCMD_CS_QUERY_FENCE_STATUS
+ */
+struct amdgpu_ccmd_cs_query_fence_status_req {
+   struct vdrm_ccmd_req hdr;
+
+   uint32_t ctx_id;
+
+   uint32_t ip_type;
+   uint32_t ip_instance;
+   uint32_t ring;
+
+   uint64_t fence;
+
+   uint64_t timeout_ns;
+   uint64_t flags;
+};
+DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_cs_query_fence_status_req)
+AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_cs_query_fence_status_req)
+
+struct amdgpu_ccmd_cs_query_fence_status_rsp {
+   struct amdgpu_ccmd_rsp hdr;
+   uint32_t expired;
+   uint32_t pad;
+};
+AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_cs_query_fence_status_rsp)
+
+#ifdef __GNUC__
+# pragma GCC diagnostic pop
+#endif
+
+#endif
--- a/src/amd/vulkan/radv_physical_device.c
+++ b/src/amd/vulkan/radv_physical_device.c
@@ -2000,6 +2000,7 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
 #ifdef _WIN32
   assert(drm_device == NULL);
 #else
+   bool is_virtio = false;
   if (drm_device) {
      const char *path = drm_device->nodes[DRM_NODE_RENDER];
      drmVersionPtr version;
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
@@ -202,7 +202,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags,
   ac_drm_device *dev;
   struct radv_amdgpu_winsys *ws = NULL;

-   r = ac_drm_device_initialize(fd, &drm_major, &drm_minor, &dev);
+   r = ac_drm_device_initialize(fd, false, &drm_major, &drm_minor, &dev);
   if (r) {
      fprintf(stderr, "radv/amdgpu: failed to initialize device.\n");
      return NULL;
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -395,7 +395,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,

   /* Initialize the amdgpu device. This should always return the same pointer
    * for the same fd. */
-   r = ac_drm_device_initialize(fd, &drm_major, &drm_minor, &dev);
+   r = ac_drm_device_initialize(fd, false, &drm_major, &drm_minor, &dev);
   if (r) {
      fprintf(stderr, "amdgpu: amdgpu_device_initialize failed.\n");
      goto fail;
--- a/src/meson.build
+++ b/src/meson.build
@@ -69,6 +69,9 @@ endif
 if with_any_intel
  subdir('intel')
 endif
+if system_has_kms_drm or with_gallium_virgl
+  subdir('virtio')
+endif
 if with_gallium_radeonsi or with_amd_vk or with_gallium_r300 or with_gallium_r600
  subdir('amd')
 endif
@@ -78,9 +81,6 @@ endif
 if with_gallium_etnaviv
  subdir('etnaviv')
 endif
-if system_has_kms_drm or with_gallium_virgl
-  subdir('virtio')
-endif
 if with_gallium_freedreno or with_freedreno_vk or with_tools.contains('freedreno')
  subdir('freedreno')
 endif