diff --git a/meson.build b/meson.build index d5c1d28b759..7cbf7a7770e 100644 --- a/meson.build +++ b/meson.build @@ -178,6 +178,8 @@ if with_swrast warning('`gallium-drivers=swrast` is a deprecated alias for `gallium-drivers=softpipe,llvmpipe` and will be removed in version 25.0') endif +with_amdgpu_virtio = get_option('amdgpu-virtio') + with_gallium_radeonsi = gallium_drivers.contains('radeonsi') with_gallium_r300 = gallium_drivers.contains('r300') with_gallium_r600 = gallium_drivers.contains('r600') diff --git a/meson_options.txt b/meson_options.txt index ea3f54126b3..f80b1edf74c 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -224,6 +224,12 @@ option( 'gallium and vulkan driver', ) +option( + 'amdgpu-virtio', + type : 'boolean', + value : false, + description : 'use experimental virtio backend for radeonsi/radv', +) option( 'imagination-srv', type : 'boolean', diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 69061d97b35..489d6043add 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -219,6 +219,7 @@ struct radeon_info { uint32_t drm_patchlevel; uint32_t max_submitted_ibs[AMD_NUM_IP_TYPES]; bool is_amdgpu; + bool is_virtio; bool has_userptr; bool has_syncobj; bool has_timeline_syncobj; diff --git a/src/amd/common/ac_linux_drm.c b/src/amd/common/ac_linux_drm.c index 5f902dcd700..8ee18d903ce 100644 --- a/src/amd/common/ac_linux_drm.c +++ b/src/amd/common/ac_linux_drm.c @@ -11,14 +11,23 @@ #include #include +#ifdef HAVE_AMDGPU_VIRTIO +#include "virtio/amdgpu_virtio.h" +#endif + struct ac_drm_device { union { amdgpu_device_handle adev; +#ifdef HAVE_AMDGPU_VIRTIO + amdvgpu_device_handle vdev; +#endif }; int fd; + bool is_virtio; }; -int ac_drm_device_initialize(int fd, uint32_t *major_version, uint32_t *minor_version, +int ac_drm_device_initialize(int fd, bool is_virtio, + uint32_t *major_version, uint32_t *minor_version, ac_drm_device **dev) { int r; @@ -27,22 +36,43 @@ int ac_drm_device_initialize(int fd, uint32_t *major_version, uint32_t *minor_ve if (!(*dev)) return -1; - amdgpu_device_handle adev; - r = amdgpu_device_initialize(fd, major_version, minor_version, - &adev); - if (r == 0) { - (*dev)->adev = adev; - (*dev)->fd = amdgpu_device_get_fd(adev); - } else { - free(*dev); +#ifdef HAVE_AMDGPU_VIRTIO + if (is_virtio) { + amdvgpu_device_handle vdev; + r = amdvgpu_device_initialize(fd, major_version, minor_version, + &vdev); + if (r == 0) { + (*dev)->vdev = vdev; + (*dev)->fd = amdvgpu_device_get_fd(vdev); + } + } else +#endif + { + amdgpu_device_handle adev; + r = amdgpu_device_initialize(fd, major_version, minor_version, + &adev); + if (r == 0) { + (*dev)->adev = adev; + (*dev)->fd = amdgpu_device_get_fd(adev); + } } + if (r == 0) + (*dev)->is_virtio = is_virtio; + else + free(*dev); + return r; } void ac_drm_device_deinitialize(ac_drm_device *dev) { - amdgpu_device_deinitialize(dev->adev); +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + amdvgpu_device_deinitialize(dev->vdev); + else +#endif + amdgpu_device_deinitialize(dev->adev); free(dev); } @@ -53,6 +83,10 @@ int ac_drm_device_get_fd(ac_drm_device *device_handle) int ac_drm_bo_set_metadata(ac_drm_device *dev, uint32_t bo_handle, struct amdgpu_bo_metadata *info) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_bo_set_metadata(dev->vdev, bo_handle, info); +#endif struct drm_amdgpu_gem_metadata args = {}; args.handle = bo_handle; @@ -74,6 +108,10 @@ int ac_drm_bo_set_metadata(ac_drm_device *dev, uint32_t bo_handle, struct amdgpu int ac_drm_bo_query_info(ac_drm_device *dev, uint32_t bo_handle, struct amdgpu_bo_info *info) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_bo_query_info(dev->vdev, bo_handle, info); +#endif struct drm_amdgpu_gem_metadata metadata = {}; struct drm_amdgpu_gem_create_in bo_info = {}; struct drm_amdgpu_gem_op gem_op = {}; @@ -148,9 +186,16 @@ int ac_drm_bo_wait_for_idle(ac_drm_device *dev, ac_drm_bo bo, uint64_t timeout_n memset(&args, 0, sizeof(args)); args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns); - ac_drm_bo_export(dev, bo, amdgpu_bo_handle_type_kms, - &args.in.handle); - r = drm_ioctl_write_read(dev->fd, DRM_AMDGPU_GEM_WAIT_IDLE, &args, sizeof(args)); +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) { + r = amdvgpu_bo_wait_for_idle(dev->vdev, bo.vbo, args.in.timeout); + } else +#endif + { + ac_drm_bo_export(dev, bo, amdgpu_bo_handle_type_kms, + &args.in.handle); + r = drm_ioctl_write_read(dev->fd, DRM_AMDGPU_GEM_WAIT_IDLE, &args, sizeof(args)); + } if (r == 0) { *busy = args.out.status; @@ -181,6 +226,11 @@ int ac_drm_bo_va_op_raw(ac_drm_device *dev, uint32_t bo_handle, uint64_t offset, ops != AMDGPU_VA_OP_CLEAR) return -EINVAL; +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_bo_va_op_raw(dev->vdev, bo_handle, offset, size, addr, flags, ops); +#endif + memset(&va, 0, sizeof(va)); va.handle = bo_handle; va.operation = ops; @@ -239,6 +289,10 @@ int ac_drm_cs_ctx_create2(ac_drm_device *dev, uint32_t priority, uint32_t *ctx_i } } +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_cs_ctx_create2(dev->vdev, priority, ctx_id); +#endif /* Create the context */ memset(&args, 0, sizeof(args)); args.in.op = AMDGPU_CTX_OP_ALLOC_CTX; @@ -256,6 +310,10 @@ int ac_drm_cs_ctx_create2(ac_drm_device *dev, uint32_t priority, uint32_t *ctx_i int ac_drm_cs_ctx_free(ac_drm_device *dev, uint32_t ctx_id) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_cs_ctx_free(dev->vdev, ctx_id); +#endif union drm_amdgpu_ctx args; /* now deal with kernel side */ @@ -268,6 +326,10 @@ int ac_drm_cs_ctx_free(ac_drm_device *dev, uint32_t ctx_id) int ac_drm_cs_ctx_stable_pstate(ac_drm_device *dev, uint32_t ctx_id, uint32_t op, uint32_t flags, uint32_t *out_flags) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_cs_ctx_stable_pstate(dev->vdev, ctx_id, op, flags, out_flags); +#endif union drm_amdgpu_ctx args; int r; @@ -286,6 +348,11 @@ int ac_drm_cs_ctx_stable_pstate(ac_drm_device *dev, uint32_t ctx_id, uint32_t op int ac_drm_cs_query_reset_state2(ac_drm_device *dev, uint32_t ctx_id, uint64_t *flags) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_cs_query_reset_state2(dev->vdev, ctx_id, flags); +#endif + union drm_amdgpu_ctx args; int r; @@ -342,8 +409,14 @@ int ac_drm_cs_query_fence_status(ac_drm_device *dev, uint32_t ctx_id, uint32_t i *expired = false; - r = amdgpu_ioctl_wait_cs(dev->fd, ctx_id, ip_type, ip_instance, ring, fence_seq_no, - timeout_ns, flags, &busy); +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + r = amdvgpu_cs_query_fence_status(dev->vdev, ctx_id, ip_type, ip_instance, ring, fence_seq_no, + timeout_ns, flags, expired); + else +#endif + r = amdgpu_ioctl_wait_cs(dev->fd, ctx_id, ip_type, ip_instance, ring, fence_seq_no, + timeout_ns, flags, &busy); if (!r && !busy) *expired = true; @@ -432,6 +505,11 @@ int ac_drm_cs_syncobj_timeline_wait(int device_fd, uint32_t *handles, uint64_t * int ac_drm_cs_submit_raw2(ac_drm_device *dev, uint32_t ctx_id, uint32_t bo_list_handle, int num_chunks, struct drm_amdgpu_cs_chunk *chunks, uint64_t *seq_no) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_cs_submit_raw2(dev->vdev, ctx_id, bo_list_handle, num_chunks, chunks, seq_no); +#endif + union drm_amdgpu_cs cs; uint64_t *chunk_array; int i, r; @@ -466,6 +544,10 @@ int ac_drm_query_info(ac_drm_device *dev, unsigned info_id, unsigned size, void request.return_size = size; request.query = info_id; +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_query_info(dev->vdev, &request); +#endif return drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)); } @@ -483,6 +565,10 @@ int ac_drm_read_mm_registers(ac_drm_device *dev, unsigned dword_offset, unsigned request.read_mmr_reg.instance = instance; request.read_mmr_reg.flags = flags; +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_query_info(dev->vdev, &request); +#endif return drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)); } @@ -496,6 +582,10 @@ int ac_drm_query_hw_ip_count(ac_drm_device *dev, unsigned type, uint32_t *count) request.query = AMDGPU_INFO_HW_IP_COUNT; request.query_hw_ip.type = type; +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_query_info(dev->vdev, &request); +#endif return drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)); } @@ -511,6 +601,10 @@ int ac_drm_query_hw_ip_info(ac_drm_device *dev, unsigned type, unsigned ip_insta request.query_hw_ip.type = type; request.query_hw_ip.ip_instance = ip_instance; +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_query_info(dev->vdev, &request); +#endif return drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)); } @@ -529,7 +623,12 @@ int ac_drm_query_firmware_version(ac_drm_device *dev, unsigned fw_type, unsigned request.query_fw.ip_instance = ip_instance; request.query_fw.index = index; - r = drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)); +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + r = amdvgpu_query_info(dev->vdev, &request); + else +#endif + r = drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)); if (r) return r; @@ -690,6 +789,10 @@ int ac_drm_query_sensor_info(ac_drm_device *dev, unsigned sensor_type, unsigned request.query = AMDGPU_INFO_SENSOR; request.sensor_info.type = sensor_type; +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_query_info(dev->vdev, &request); +#endif return drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)); } @@ -703,6 +806,10 @@ int ac_drm_query_video_caps_info(ac_drm_device *dev, unsigned cap_type, unsigned request.query = AMDGPU_INFO_VIDEO_CAPS; request.sensor_info.type = cap_type; +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_query_info(dev->vdev, &request); +#endif return drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)); } @@ -715,11 +822,21 @@ int ac_drm_query_gpuvm_fault_info(ac_drm_device *dev, unsigned size, void *value request.return_size = size; request.query = AMDGPU_INFO_GPUVM_FAULT; +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_query_info(dev->vdev, &request); +#endif return drm_ioctl_write(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)); } int ac_drm_vm_reserve_vmid(ac_drm_device *dev, uint32_t flags) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) { + assert(flags == 0); + return amdvgpu_vm_reserve_vmid(dev->vdev, 1); + } +#endif union drm_amdgpu_vm vm; vm.in.op = AMDGPU_VM_OP_RESERVE_VMID; @@ -730,6 +847,12 @@ int ac_drm_vm_reserve_vmid(ac_drm_device *dev, uint32_t flags) int ac_drm_vm_unreserve_vmid(ac_drm_device *dev, uint32_t flags) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) { + assert(flags == 0); + return amdvgpu_vm_reserve_vmid(dev->vdev, 0); + } +#endif union drm_amdgpu_vm vm; vm.in.op = AMDGPU_VM_OP_UNRESERVE_VMID; @@ -740,24 +863,41 @@ int ac_drm_vm_unreserve_vmid(ac_drm_device *dev, uint32_t flags) const char *ac_drm_get_marketing_name(ac_drm_device *dev) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_get_marketing_name(dev->vdev); +#endif return amdgpu_get_marketing_name(dev->adev); } int ac_drm_query_sw_info(ac_drm_device *dev, enum amdgpu_sw_info info, void *value) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) { + assert(info == amdgpu_sw_info_address32_hi); + return amdvgpu_query_sw_info(dev->vdev, info, value); + } +#endif return amdgpu_query_sw_info(dev->adev, info, value); } int ac_drm_bo_alloc(ac_drm_device *dev, struct amdgpu_bo_alloc_request *alloc_buffer, ac_drm_bo *bo) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_bo_alloc(dev->vdev, alloc_buffer, &bo->vbo); +#endif return amdgpu_bo_alloc(dev->adev, alloc_buffer, &bo->abo); } - int ac_drm_bo_export(ac_drm_device *dev, ac_drm_bo bo, enum amdgpu_bo_handle_type type, uint32_t *shared_handle) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_bo_export(dev->vdev, bo.vbo, type, shared_handle); +#endif return amdgpu_bo_export(bo.abo, type, shared_handle); } @@ -766,35 +906,65 @@ int ac_drm_bo_import(ac_drm_device *dev, enum amdgpu_bo_handle_type type, { int r; - struct amdgpu_bo_import_result result; - r = amdgpu_bo_import(dev->adev, type, shared_handle, &result); - if (r == 0) { - output->bo.abo = result.buf_handle; - output->alloc_size = result.alloc_size; +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) { + struct amdvgpu_bo_import_result result; + r = amdvgpu_bo_import(dev->vdev, type, shared_handle, &result); + if (r == 0) { + output->bo.vbo = result.buf_handle; + output->alloc_size = result.alloc_size; + } + } + else +#endif + { + struct amdgpu_bo_import_result result; + r = amdgpu_bo_import(dev->adev, type, shared_handle, &result); + if (r == 0) { + output->bo.abo = result.buf_handle; + output->alloc_size = result.alloc_size; + } } return r; } - int ac_drm_create_bo_from_user_mem(ac_drm_device *dev, void *cpu, uint64_t size, ac_drm_bo *bo) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) { + assert(false); + return -1; + } +#endif return amdgpu_create_bo_from_user_mem(dev->adev, cpu, size, &bo->abo); } int ac_drm_bo_free(ac_drm_device *dev, ac_drm_bo bo) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_bo_free(dev->vdev, bo.vbo); +#endif return amdgpu_bo_free(bo.abo); } int ac_drm_bo_cpu_map(ac_drm_device *dev, ac_drm_bo bo, void **cpu) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_bo_cpu_map(dev->vdev, bo.vbo, cpu); +#endif return amdgpu_bo_cpu_map(bo.abo, cpu); } int ac_drm_bo_cpu_unmap(ac_drm_device *dev, ac_drm_bo bo) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_bo_cpu_unmap(dev->vdev, bo.vbo); +#endif return amdgpu_bo_cpu_unmap(bo.abo); } @@ -803,6 +973,12 @@ int ac_drm_va_range_alloc(ac_drm_device *dev, enum amdgpu_gpu_va_range va_range_ uint64_t *va_base_allocated, amdgpu_va_handle *va_range_handle, uint64_t flags) { +#ifdef HAVE_AMDGPU_VIRTIO + if (dev->is_virtio) + return amdvgpu_va_range_alloc(dev->vdev, va_range_type, size, va_base_alignment, + va_base_required, va_base_allocated, + va_range_handle, flags); +#endif return amdgpu_va_range_alloc(dev->adev, va_range_type, size, va_base_alignment, va_base_required, va_base_allocated, va_range_handle, flags); @@ -821,6 +997,12 @@ int ac_drm_create_userqueue(ac_drm_device *dev, uint32_t ip_type, uint32_t doorb union drm_amdgpu_userq userq; uint64_t mqd_size; +#ifdef HAVE_AMDGPU_VIRTIO + /* Not supported yet. */ + if (dev->is_virtio) + return -1; +#endif + switch (ip_type) { case AMDGPU_HW_IP_GFX: mqd_size = sizeof(struct drm_amdgpu_userq_mqd_gfx11); diff --git a/src/amd/common/ac_linux_drm.h b/src/amd/common/ac_linux_drm.h index 74f5bfdc72e..42d994d2dba 100644 --- a/src/amd/common/ac_linux_drm.h +++ b/src/amd/common/ac_linux_drm.h @@ -49,6 +49,9 @@ typedef union ac_drm_bo { #else amdgpu_bo_handle abo; #endif +#ifdef HAVE_AMDGPU_VIRTIO + struct amdvgpu_bo *vbo; +#endif } ac_drm_bo; struct ac_drm_bo_import_result { @@ -56,8 +59,9 @@ struct ac_drm_bo_import_result { uint64_t alloc_size; }; -PROC int ac_drm_device_initialize(int fd, uint32_t *major_version, uint32_t *minor_version, - ac_drm_device **dev) TAIL; +PROC int ac_drm_device_initialize(int fd, bool is_virtio, + uint32_t *major_version, uint32_t *minor_version, + ac_drm_device **device_handle) TAIL; PROC void ac_drm_device_deinitialize(ac_drm_device *dev) TAILV; PROC int ac_drm_device_get_fd(ac_drm_device *dev) TAIL; PROC int ac_drm_bo_set_metadata(ac_drm_device *dev, uint32_t bo_handle, diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build index cf255966022..6137f190025 100644 --- a/src/amd/common/meson.build +++ b/src/amd/common/meson.build @@ -128,6 +128,19 @@ if not with_platform_windows ) endif +link_with = [] +c_args = ['-DADDR_FASTCALL='] +if with_amdgpu_virtio + c_args += ['-DHAVE_AMDGPU_VIRTIO', '-DENABLE_DRM_AMDGPU'] + amd_common_files += files( + 'virtio/amdgpu_virtio.c', + 'virtio/amdgpu_virtio_bo.c', + 'virtio/amdgpu_virtio_device.c', + 'virtio/amdgpu_virtio_private.h', + 'virtio/amdgpu_virtio_proto.h') + link_with += libvdrm +endif + if dep_elf.found() amd_common_files += files( 'ac_rtld.c', @@ -140,14 +153,15 @@ libamd_common = static_library( 'amd_common', [amd_common_files, sid_tables_h, amdgfxregs_h, gfx10_format_table_c], include_directories : [ - inc_include, inc_src, inc_amd, + inc_include, inc_src, inc_amd, inc_virtio_gpu, ], dependencies : [dep_llvm.partial_dependency(compile_args: true, includes: true)] + [ dep_thread, dep_elf, dep_libdrm_amdgpu, dep_valgrind, idep_mesautil, idep_nir_headers, idep_nir ], + link_with: [ link_with ], gnu_symbol_visibility : 'hidden', - c_args : ['-DADDR_FASTCALL='] + c_args : c_args ) idep_amdgfxregs_h = declare_dependency(sources : [amdgfxregs_h]) diff --git a/src/amd/common/virtio/amdgpu_virtio.c b/src/amd/common/virtio/amdgpu_virtio.c new file mode 100644 index 00000000000..835d51fe844 --- /dev/null +++ b/src/amd/common/virtio/amdgpu_virtio.c @@ -0,0 +1,577 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "amdgpu_virtio_private.h" + +#include "util/log.h" + +int +amdvgpu_query_info(amdvgpu_device_handle dev, struct drm_amdgpu_info *info) +{ + unsigned req_len = sizeof(struct amdgpu_ccmd_query_info_req); + unsigned rsp_len = sizeof(struct amdgpu_ccmd_query_info_rsp) + info->return_size; + + uint8_t buf[req_len]; + struct amdgpu_ccmd_query_info_req *req = (void *)buf; + struct amdgpu_ccmd_query_info_rsp *rsp; + assert(0 == (offsetof(struct amdgpu_ccmd_query_info_rsp, payload) % 8)); + + req->hdr = AMDGPU_CCMD(QUERY_INFO, req_len); + memcpy(&req->info, info, sizeof(struct drm_amdgpu_info)); + + rsp = vdrm_alloc_rsp(dev->vdev, &req->hdr, rsp_len); + + int r = vdrm_send_req_wrapper(dev, &req->hdr, &rsp->hdr, true); + if (r) + return r; + + memcpy((void*)(uintptr_t)info->return_pointer, rsp->payload, info->return_size); + + return 0; +} + +static int +amdvgpu_query_info_simple(amdvgpu_device_handle dev, unsigned info_id, unsigned size, void *out) +{ + if (info_id == AMDGPU_INFO_DEV_INFO) { + assert(size == sizeof(dev->dev_info)); + memcpy(out, &dev->dev_info, size); + return 0; + } + struct drm_amdgpu_info info; + info.return_pointer = (uintptr_t)out; + info.query = info_id; + info.return_size = size; + return amdvgpu_query_info(dev, &info); +} + +static int +amdvgpu_query_heap_info(amdvgpu_device_handle dev, unsigned heap, unsigned flags, struct amdgpu_heap_info *info) +{ + struct amdvgpu_shmem *shmem = to_amdvgpu_shmem(dev->vdev->shmem); + /* Get heap information from shared memory */ + switch (heap) { + case AMDGPU_GEM_DOMAIN_VRAM: + if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) + memcpy(info, &shmem->vis_vram, sizeof(*info)); + else + memcpy(info, &shmem->vram, sizeof(*info)); + break; + case AMDGPU_GEM_DOMAIN_GTT: + memcpy(info, &shmem->gtt, sizeof(*info)); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int +amdvgpu_query_hw_ip_count(amdvgpu_device_handle dev, unsigned type, uint32_t *count) +{ + struct drm_amdgpu_info request; + request.return_pointer = (uintptr_t) count; + request.return_size = sizeof(*count); + request.query = AMDGPU_INFO_HW_IP_COUNT; + request.query_hw_ip.type = type; + return amdvgpu_query_info(dev, &request); +} + +static int +amdvgpu_query_video_caps_info(amdvgpu_device_handle dev, unsigned cap_type, + unsigned size, void *value) +{ + struct drm_amdgpu_info request; + request.return_pointer = (uintptr_t)value; + request.return_size = size; + request.query = AMDGPU_INFO_VIDEO_CAPS; + request.sensor_info.type = cap_type; + + return amdvgpu_query_info(dev, &request); +} + +int +amdvgpu_query_sw_info(amdvgpu_device_handle dev, enum amdgpu_sw_info info, void *value) +{ + if (info != amdgpu_sw_info_address32_hi) + return -EINVAL; + memcpy(value, &dev->vdev->caps.u.amdgpu.address32_hi, 4); + return 0; +} + +static int +amdvgpu_query_firmware_version(amdvgpu_device_handle dev, unsigned fw_type, unsigned ip_instance, unsigned index, + uint32_t *version, uint32_t *feature) +{ + struct drm_amdgpu_info request; + struct drm_amdgpu_info_firmware firmware = {}; + int r; + + memset(&request, 0, sizeof(request)); + request.return_pointer = (uintptr_t)&firmware; + request.return_size = sizeof(firmware); + request.query = AMDGPU_INFO_FW_VERSION; + request.query_fw.fw_type = fw_type; + request.query_fw.ip_instance = ip_instance; + request.query_fw.index = index; + + r = amdvgpu_query_info(dev, &request); + + *version = firmware.ver; + *feature = firmware.feature; + return r; +} + +static int +amdvgpu_query_buffer_size_alignment(amdvgpu_device_handle dev, + struct amdgpu_buffer_size_alignments *info) +{ + memcpy(info, &dev->vdev->caps.u.amdgpu.alignments, sizeof(*info)); + return 0; +} + +static int +amdvgpu_query_gpu_info(amdvgpu_device_handle dev, struct amdgpu_gpu_info *info) +{ + memcpy(info, &dev->vdev->caps.u.amdgpu.gpu_info, sizeof(*info)); + return 0; +} + +int +amdvgpu_bo_set_metadata(amdvgpu_device_handle dev, uint32_t res_id, + struct amdgpu_bo_metadata *info) +{ + unsigned req_len = sizeof(struct amdgpu_ccmd_set_metadata_req) + info->size_metadata; + unsigned rsp_len = sizeof(struct amdgpu_ccmd_rsp); + + uint8_t buf[req_len]; + struct amdgpu_ccmd_set_metadata_req *req = (void *)buf; + struct amdgpu_ccmd_rsp *rsp; + + req->hdr = AMDGPU_CCMD(SET_METADATA, req_len); + req->res_id = res_id; + req->flags = info->flags; + req->tiling_info = info->tiling_info; + req->size_metadata = info->size_metadata; + memcpy(req->umd_metadata, info->umd_metadata, info->size_metadata); + + rsp = vdrm_alloc_rsp(dev->vdev, &req->hdr, rsp_len); + return vdrm_send_req_wrapper(dev, &req->hdr, rsp, true); +} + +int amdvgpu_bo_query_info(amdvgpu_device_handle dev, uint32_t res_id, struct amdgpu_bo_info *info) { + unsigned req_len = sizeof(struct amdgpu_ccmd_bo_query_info_req); + unsigned rsp_len = sizeof(struct amdgpu_ccmd_bo_query_info_rsp); + + uint8_t buf[req_len]; + struct amdgpu_ccmd_bo_query_info_req *req = (void *)buf; + struct amdgpu_ccmd_bo_query_info_rsp *rsp; + + req->hdr = AMDGPU_CCMD(BO_QUERY_INFO, req_len); + req->res_id = res_id; + req->pad = 0; + + rsp = vdrm_alloc_rsp(dev->vdev, &req->hdr, rsp_len); + + int r = vdrm_send_req_wrapper(dev, &req->hdr, &rsp->hdr, true); + if (r) + return r; + + info->alloc_size = rsp->info.alloc_size; + info->phys_alignment = rsp->info.phys_alignment; + info->preferred_heap = rsp->info.preferred_heap; + info->alloc_flags = rsp->info.alloc_flags; + + info->metadata.flags = rsp->info.metadata.flags; + info->metadata.tiling_info = rsp->info.metadata.tiling_info; + info->metadata.size_metadata = rsp->info.metadata.size_metadata; + memcpy(info->metadata.umd_metadata, rsp->info.metadata.umd_metadata, + MIN2(sizeof(info->metadata.umd_metadata), rsp->info.metadata.size_metadata)); + + return 0; +} + +int amdvgpu_cs_ctx_create2(amdvgpu_device_handle dev, int32_t priority, + uint32_t *ctx_virtio) { + simple_mtx_lock(&dev->contexts_mutex); + if (!dev->allow_multiple_amdgpu_ctx && _mesa_hash_table_num_entries(&dev->contexts)) { + assert(_mesa_hash_table_num_entries(&dev->contexts) == 1); + struct hash_entry *he = _mesa_hash_table_random_entry(&dev->contexts, NULL); + struct amdvgpu_context *ctx = he->data; + p_atomic_inc(&ctx->refcount); + *ctx_virtio = (uint32_t)(uintptr_t)he->key; + simple_mtx_unlock(&dev->contexts_mutex); + return 0; + } + + struct amdgpu_ccmd_create_ctx_req req = { + .priority = priority, + .flags = 0, + }; + struct amdgpu_ccmd_create_ctx_rsp *rsp; + + req.hdr = AMDGPU_CCMD(CREATE_CTX, sizeof(req)); + + rsp = vdrm_alloc_rsp(dev->vdev, &req.hdr, sizeof(struct amdgpu_ccmd_create_ctx_rsp)); + int r = vdrm_send_req_wrapper(dev, &req.hdr, &rsp->hdr, true); + + if (r) + goto unlock; + + if (rsp->ctx_id == 0) { + r = -ENOTSUP; + goto unlock; + } + + struct amdvgpu_context *ctx = calloc(1, sizeof(struct amdvgpu_context) + dev->num_virtio_rings * sizeof(uint64_t)); + if (ctx == NULL) { + r = -ENOMEM; + goto unlock; + } + + p_atomic_inc(&ctx->refcount); + ctx->host_context_id = rsp->ctx_id; + for (int i = 0; i < dev->num_virtio_rings; i++) + ctx->ring_next_seqno[i] = 1; + *ctx_virtio = ctx->host_context_id; + + _mesa_hash_table_insert(&dev->contexts, (void*)(uintptr_t)ctx->host_context_id, ctx); + +unlock: + simple_mtx_unlock(&dev->contexts_mutex); + + return r; +} + +int amdvgpu_cs_ctx_free(amdvgpu_device_handle dev, uint32_t ctx_id) +{ + struct hash_entry *he = _mesa_hash_table_search(&dev->contexts, + (void*)(uintptr_t)ctx_id); + + if (!he) + return -1; + + if (!dev->allow_multiple_amdgpu_ctx) { + struct amdvgpu_context *ctx = he->data; + if (p_atomic_dec_return(&ctx->refcount)) + return 0; + } + + struct amdgpu_ccmd_create_ctx_req req = { + .id = ctx_id, + .flags = AMDGPU_CCMD_CREATE_CTX_DESTROY, + }; + req.hdr = AMDGPU_CCMD(CREATE_CTX, sizeof(req)); + + _mesa_hash_table_remove(&dev->contexts, he); + + free(he->data); + + struct amdgpu_ccmd_create_ctx_rsp *rsp; + rsp = vdrm_alloc_rsp(dev->vdev, &req.hdr, sizeof(struct amdgpu_ccmd_create_ctx_rsp)); + + return vdrm_send_req_wrapper(dev, &req.hdr, &rsp->hdr, false); +} + +int +amdvgpu_device_get_fd(amdvgpu_device_handle dev) { + return dev->fd; +} + +const char * +amdvgpu_get_marketing_name(amdvgpu_device_handle dev) { + return dev->vdev->caps.u.amdgpu.marketing_name; +} + +static uint32_t cs_chunk_ib_to_virtio_ring_idx(amdvgpu_device_handle dev, + struct drm_amdgpu_cs_chunk_ib *ib) { + assert(dev->virtio_ring_mapping[ib->ip_type] != 0); + return dev->virtio_ring_mapping[ib->ip_type] + ib->ring; +} + +int +amdvgpu_cs_submit_raw2(amdvgpu_device_handle dev, uint32_t ctx_id, + uint32_t bo_list_handle, + int num_chunks, struct drm_amdgpu_cs_chunk *chunks, + uint64_t *seqno) +{ + unsigned rsp_len = sizeof(struct amdgpu_ccmd_rsp); + + struct extra_data_info { + const void *ptr; + uint32_t size; + } extra[1 + num_chunks]; + + int chunk_count = 0; + unsigned offset = 0; + + struct desc { + uint16_t chunk_id; + uint16_t length_dw; + uint32_t offset; + }; + struct desc descriptors[num_chunks]; + + unsigned virtio_ring_idx = 0xffffffff; + + uint32_t syncobj_in_count = 0, syncobj_out_count = 0; + struct drm_virtgpu_execbuffer_syncobj *syncobj_in = NULL; + struct drm_virtgpu_execbuffer_syncobj *syncobj_out = NULL; + uint8_t *buf = NULL; + int ret; + + const bool sync_submit = dev->sync_cmd & (1u << AMDGPU_CCMD_CS_SUBMIT); + + struct hash_entry *he = _mesa_hash_table_search(&dev->contexts, (void*)(uintptr_t)ctx_id); + if (!he) + return -1; + + struct amdvgpu_context *vctx = he->data; + + /* Extract pointers from each chunk and copy them to the payload. */ + for (int i = 0; i < num_chunks; i++) { + int extra_idx = 1 + chunk_count; + if (chunks[i].chunk_id == AMDGPU_CHUNK_ID_BO_HANDLES) { + struct drm_amdgpu_bo_list_in *list_in = (void*) (uintptr_t)chunks[i].chunk_data; + extra[extra_idx].ptr = (void*) (uintptr_t)list_in->bo_info_ptr; + extra[extra_idx].size = list_in->bo_info_size * list_in->bo_number; + } else if (chunks[i].chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES || + chunks[i].chunk_id == AMDGPU_CHUNK_ID_FENCE || + chunks[i].chunk_id == AMDGPU_CHUNK_ID_IB) { + extra[extra_idx].ptr = (void*)(uintptr_t)chunks[i].chunk_data; + extra[extra_idx].size = chunks[i].length_dw * 4; + + if (chunks[i].chunk_id == AMDGPU_CHUNK_ID_IB) { + struct drm_amdgpu_cs_chunk_ib *ib = (void*)(uintptr_t)chunks[i].chunk_data; + virtio_ring_idx = cs_chunk_ib_to_virtio_ring_idx(dev, ib); + } + } else if (chunks[i].chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT || + chunks[i].chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) { + /* Translate from amdgpu CHUNK_ID_SYNCOBJ_* to drm_virtgpu_execbuffer_syncobj */ + struct drm_amdgpu_cs_chunk_sem *amd_syncobj = (void*) (uintptr_t)chunks[i].chunk_data; + unsigned syncobj_count = (chunks[i].length_dw * 4) / sizeof(struct drm_amdgpu_cs_chunk_sem); + struct drm_virtgpu_execbuffer_syncobj *syncobjs = + calloc(syncobj_count, sizeof(struct drm_virtgpu_execbuffer_syncobj)); + + if (syncobjs == NULL) { + ret = -ENOMEM; + goto error; + } + + for (int j = 0; j < syncobj_count; j++) + syncobjs[j].handle = amd_syncobj[j].handle; + + if (chunks[i].chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) { + syncobj_in_count = syncobj_count; + syncobj_in = syncobjs; + } else { + syncobj_out_count = syncobj_count; + syncobj_out = syncobjs; + } + + /* This chunk was converted to virtgpu UAPI so we don't need to forward it + * to the host. + */ + continue; + } else { + mesa_loge("Unhandled chunk_id: %d\n", chunks[i].chunk_id); + continue; + } + descriptors[chunk_count].chunk_id = chunks[i].chunk_id; + descriptors[chunk_count].offset = offset; + descriptors[chunk_count].length_dw = extra[extra_idx].size / 4; + offset += extra[extra_idx].size; + chunk_count++; + } + assert(virtio_ring_idx != 0xffffffff); + + /* Copy the descriptors at the beginning. */ + extra[0].ptr = descriptors; + extra[0].size = chunk_count * sizeof(struct desc); + + /* Determine how much extra space we need. */ + uint32_t req_len = sizeof(struct amdgpu_ccmd_cs_submit_req); + uint32_t e_offset = req_len; + for (unsigned i = 0; i < 1 + chunk_count; i++) + req_len += extra[i].size; + + /* Allocate the command buffer. */ + buf = malloc(req_len); + if (buf == NULL) { + ret = -ENOMEM; + goto error; + } + struct amdgpu_ccmd_cs_submit_req *req = (void*)buf; + req->hdr = AMDGPU_CCMD(CS_SUBMIT, req_len); + req->ctx_id = ctx_id; + req->num_chunks = chunk_count; + req->ring_idx = virtio_ring_idx; + req->pad = 0; + + UNUSED struct amdgpu_ccmd_rsp *rsp = vdrm_alloc_rsp(dev->vdev, &req->hdr, rsp_len); + + /* Copy varying data after the fixed part of cs_submit_req. */ + for (unsigned i = 0; i < 1 + chunk_count; i++) { + if (extra[i].size) { + memcpy(&buf[e_offset], extra[i].ptr, extra[i].size); + e_offset += extra[i].size; + } + } + + /* Optional fence out (if we want synchronous submits). */ + int *fence_fd_ptr = NULL; + + struct vdrm_execbuf_params vdrm_execbuf_p = { + .ring_idx = virtio_ring_idx, + .req = &req->hdr, + .handles = NULL, + .num_handles = 0, + .in_syncobjs = syncobj_in, + .out_syncobjs = syncobj_out, + .has_in_fence_fd = 0, + .needs_out_fence_fd = sync_submit, + .fence_fd = 0, + .num_in_syncobjs = syncobj_in_count, + .num_out_syncobjs = syncobj_out_count, + }; + + if (sync_submit) + fence_fd_ptr = &vdrm_execbuf_p.fence_fd; + + /* Push job to the host. */ + ret = vdrm_execbuf(dev->vdev, &vdrm_execbuf_p); + + /* Determine the host seqno for this job. */ + *seqno = vctx->ring_next_seqno[virtio_ring_idx - 1]++; + + if (ret == 0 && fence_fd_ptr) { + /* Sync execution */ + sync_wait(*fence_fd_ptr, -1); + close(*fence_fd_ptr); + vdrm_host_sync(dev->vdev, &req->hdr); + } + +error: + free(buf); + free(syncobj_in); + free(syncobj_out); + + return ret; +} + +int amdvgpu_cs_query_reset_state2(amdvgpu_device_handle dev, uint32_t ctx_id, + uint64_t *flags) +{ + *flags = 0; + + if (to_amdvgpu_shmem(dev->vdev->shmem)->async_error > 0) + *flags = AMDGPU_CTX_QUERY2_FLAGS_RESET | AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST; + + return 0; +} + +int amdvgpu_cs_query_fence_status(amdvgpu_device_handle dev, + uint32_t ctx_id, + uint32_t ip_type, + uint32_t ip_instance, uint32_t ring, + uint64_t fence_seq_no, + uint64_t timeout_ns, uint64_t flags, + uint32_t *expired) +{ + unsigned req_len = sizeof(struct amdgpu_ccmd_cs_query_fence_status_req); + unsigned rsp_len = sizeof(struct amdgpu_ccmd_cs_query_fence_status_rsp); + + uint8_t buf[req_len]; + struct amdgpu_ccmd_cs_query_fence_status_req *req = (void *)buf; + struct amdgpu_ccmd_cs_query_fence_status_rsp *rsp; + + req->hdr = AMDGPU_CCMD(CS_QUERY_FENCE_STATUS, req_len); + req->ctx_id = ctx_id; + req->ip_type = ip_type; + req->ip_instance = ip_instance; + req->ring = ring; + req->fence = fence_seq_no; + req->timeout_ns = timeout_ns; + req->flags = flags; + + rsp = vdrm_alloc_rsp(dev->vdev, &req->hdr, rsp_len); + + int r = vdrm_send_req_wrapper(dev, &req->hdr, &rsp->hdr, true); + + if (r == 0) + *expired = rsp->expired; + + return r; +} + +int amdvgpu_vm_reserve_vmid(amdvgpu_device_handle dev, int reserve) { + unsigned req_len = sizeof(struct amdgpu_ccmd_reserve_vmid_req); + + uint8_t buf[req_len]; + struct amdgpu_ccmd_reserve_vmid_req *req = (void *)buf; + struct amdgpu_ccmd_rsp *rsp = vdrm_alloc_rsp(dev->vdev, &req->hdr, sizeof(struct amdgpu_ccmd_rsp)); + + req->hdr = AMDGPU_CCMD(RESERVE_VMID, req_len); + req->flags = reserve ? 0 : AMDGPU_CCMD_RESERVE_VMID_UNRESERVE; + + return vdrm_send_req_wrapper(dev, &req->hdr, rsp, true); +} + +int amdvgpu_cs_ctx_stable_pstate(amdvgpu_device_handle dev, + uint32_t ctx_id, + uint32_t op, + uint32_t flags, + uint32_t *out_flags) { + unsigned req_len = sizeof(struct amdgpu_ccmd_set_pstate_req); + unsigned rsp_len = sizeof(struct amdgpu_ccmd_set_pstate_rsp); + + uint8_t buf[req_len]; + struct amdgpu_ccmd_set_pstate_req *req = (void *)buf; + struct amdgpu_ccmd_set_pstate_rsp *rsp; + + req->hdr = AMDGPU_CCMD(SET_PSTATE, req_len); + req->ctx_id = ctx_id; + req->op = op; + req->flags = flags; + req->pad = 0; + + rsp = vdrm_alloc_rsp(dev->vdev, &req->hdr, rsp_len); + + int r = vdrm_send_req_wrapper(dev, &req->hdr, &rsp->hdr, out_flags); + + if (r == 0 && out_flags) + *out_flags = rsp->out_flags; + + return r; +} + +int +amdvgpu_va_range_alloc(amdvgpu_device_handle dev, + enum amdgpu_gpu_va_range va_range_type, + uint64_t size, + uint64_t va_base_alignment, + uint64_t va_base_required, + uint64_t *va_base_allocated, + amdgpu_va_handle *va_range_handle, + uint64_t flags) +{ + return amdgpu_va_range_alloc2(dev->va_mgr, va_range_type, size, + va_base_alignment, va_base_required, + va_base_allocated, va_range_handle, + flags); +} diff --git a/src/amd/common/virtio/amdgpu_virtio.h b/src/amd/common/virtio/amdgpu_virtio.h new file mode 100644 index 00000000000..74fbab45221 --- /dev/null +++ b/src/amd/common/virtio/amdgpu_virtio.h @@ -0,0 +1,90 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: MIT + */ +#ifndef AMDGPU_VIRTIO_H +#define AMDGPU_VIRTIO_H + +struct amdvgpu_bo; +struct amdvgpu_device; +struct amdvgpu_context; +typedef struct amdvgpu_device* amdvgpu_device_handle; +typedef struct amdvgpu_bo* amdvgpu_bo_handle; + +struct amdvgpu_bo_import_result { + amdvgpu_bo_handle buf_handle; + uint64_t alloc_size; +}; + +int amdvgpu_device_initialize(int fd, uint32_t *drm_major, uint32_t *drm_minor, + amdvgpu_device_handle* dev); +int amdvgpu_device_deinitialize(amdvgpu_device_handle dev); +int amdvgpu_bo_va_op_raw(amdvgpu_device_handle dev, + uint32_t res_id, + uint64_t offset, + uint64_t size, + uint64_t addr, + uint64_t flags, + uint32_t ops); +int amdvgpu_bo_import(amdvgpu_device_handle dev, + enum amdgpu_bo_handle_type type, + uint32_t handle, + struct amdvgpu_bo_import_result *result); +int amdvgpu_bo_export(amdvgpu_device_handle dev, amdvgpu_bo_handle bo, + enum amdgpu_bo_handle_type type, + uint32_t *shared_handle); +int amdvgpu_bo_cpu_map(amdvgpu_device_handle dev, amdvgpu_bo_handle bo_handle, void **cpu); +int amdvgpu_bo_cpu_unmap(amdvgpu_device_handle dev, amdvgpu_bo_handle bo); +int amdvgpu_bo_alloc(amdvgpu_device_handle dev, + struct amdgpu_bo_alloc_request *request, + amdvgpu_bo_handle *bo); +int amdvgpu_bo_free(amdvgpu_device_handle dev, struct amdvgpu_bo *bo); +int amdvgpu_bo_wait_for_idle(amdvgpu_device_handle dev, + amdvgpu_bo_handle bo, + uint64_t abs_timeout_ns); +int +amdvgpu_bo_set_metadata(amdvgpu_device_handle dev, uint32_t res_id, + struct amdgpu_bo_metadata *info); +int amdvgpu_query_info(amdvgpu_device_handle dev, struct drm_amdgpu_info *info); +int amdvgpu_bo_query_info(amdvgpu_device_handle dev, uint32_t res_id, struct amdgpu_bo_info *info); +int amdvgpu_cs_ctx_create2(amdvgpu_device_handle dev, int32_t priority, uint32_t *ctx_virtio); +int amdvgpu_cs_ctx_free(amdvgpu_device_handle dev, uint32_t ctx); +int amdvgpu_cs_ctx_stable_pstate(amdvgpu_device_handle dev, + uint32_t ctx, + uint32_t op, + uint32_t flags, + uint32_t *out_flags); +int amdvgpu_cs_query_reset_state2(amdvgpu_device_handle dev, + uint32_t ctx, + uint64_t *flags); +int +amdvgpu_va_range_alloc(amdvgpu_device_handle dev, + enum amdgpu_gpu_va_range va_range_type, + uint64_t size, + uint64_t va_base_alignment, + uint64_t va_base_required, + uint64_t *va_base_allocated, + amdgpu_va_handle *va_range_handle, + uint64_t flags); +int amdvgpu_cs_query_fence_status(amdvgpu_device_handle dev, + uint32_t ctx, + uint32_t ip_type, + uint32_t ip_instance, uint32_t ring, + uint64_t fence_seq_no, + uint64_t timeout_ns, uint64_t flags, + uint32_t *expired); +int +amdvgpu_device_get_fd(amdvgpu_device_handle dev); +const char * +amdvgpu_get_marketing_name(amdvgpu_device_handle dev); +int +amdvgpu_cs_submit_raw2(amdvgpu_device_handle dev, uint32_t ctx_id, + uint32_t bo_list_handle, + int num_chunks, struct drm_amdgpu_cs_chunk *chunks, + uint64_t *seqno); +int amdvgpu_vm_reserve_vmid(amdvgpu_device_handle dev, int reserve); +int +amdvgpu_query_sw_info(amdvgpu_device_handle dev, enum amdgpu_sw_info info, void *value); + +#endif diff --git a/src/amd/common/virtio/amdgpu_virtio_bo.c b/src/amd/common/virtio/amdgpu_virtio_bo.c new file mode 100644 index 00000000000..dc6505ffe1e --- /dev/null +++ b/src/amd/common/virtio/amdgpu_virtio_bo.c @@ -0,0 +1,367 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: MIT + */ + +#include "drm-uapi/amdgpu_drm.h" + +#include "amdgpu_virtio_private.h" +#include "ac_linux_drm.h" +#include "util/list.h" +#include "util/log.h" +#include "util/os_mman.h" +#include "util/os_time.h" +#include "util/u_math.h" + +#include +#include +#include + +struct amdvgpu_host_blob { + /* virtgpu properties */ + uint32_t handle; + uint32_t res_id; + uint64_t alloc_size; + + /* CPU mapping handling. */ + uint64_t offset; + int map_count; + void *cpu_addr; + simple_mtx_t cpu_access_mutex; + + /* Allocation parameters. */ + uint32_t vm_flags; + uint32_t preferred_heap; + uint64_t phys_alignment; + uint64_t flags; +}; + +static +void destroy_host_blob(amdvgpu_device_handle dev, struct amdvgpu_host_blob *hb); + +static +struct amdvgpu_host_blob *create_host_blob(uint32_t kms_handle, + uint32_t res_id, + uint64_t size, + struct amdgpu_ccmd_gem_new_req *req) +{ + struct amdvgpu_host_blob *hb = calloc(1, sizeof(*hb)); + hb->handle = kms_handle; + hb->res_id = res_id; + hb->alloc_size = size; + + if (req) { + hb->phys_alignment = req->r.phys_alignment; + hb->preferred_heap = req->r.preferred_heap; + hb->flags = req->r.flags; + } + + simple_mtx_init(&hb->cpu_access_mutex, mtx_plain); + return hb; +} + +static +void destroy_host_blob(amdvgpu_device_handle dev, struct amdvgpu_host_blob *hb) { + simple_mtx_destroy(&hb->cpu_access_mutex); + + struct drm_gem_close req = { + .handle = hb->handle, + }; + int r = drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req); + if (r != 0) { + mesa_loge("DRM_IOCTL_GEM_CLOSE failed for res_id: %d\n", hb->res_id); + } + free(hb); +} + +static int +alloc_host_blob(amdvgpu_bo_handle bo, + struct amdgpu_ccmd_gem_new_req *req, + uint32_t blob_flags) +{ + uint32_t kms_handle, res_id; + + /* Create the host blob requires 2 steps. First create the host blob... */ + kms_handle = vdrm_bo_create(bo->dev->vdev, req->r.alloc_size, blob_flags, + req->blob_id, &req->hdr); + + /* 0 is an invalid handle and is used by vdrm_bo_create to signal an error. */ + if (kms_handle == 0) + return -1; + + /* ... and then retrieve its resource id (global id). */ + res_id = vdrm_handle_to_res_id(bo->dev->vdev, kms_handle); + + bo->host_blob = create_host_blob(kms_handle, res_id, req->r.alloc_size, req); + + simple_mtx_lock(&bo->dev->handle_to_vbo_mutex); + _mesa_hash_table_insert(bo->dev->handle_to_vbo, (void*)(intptr_t)bo->host_blob->handle, bo); + simple_mtx_unlock(&bo->dev->handle_to_vbo_mutex); + + return 0; +} + +int amdvgpu_bo_export(amdvgpu_device_handle dev, amdvgpu_bo_handle bo, + enum amdgpu_bo_handle_type type, + uint32_t *shared_handle) +{ + switch (type) { + case amdgpu_bo_handle_type_kms: + /* Return the resource id as this handle is only going to be used + * internally (AMDGPU_CHUNK_ID_BO_HANDLES mostly). + */ + *shared_handle = amdvgpu_get_resource_id(bo); + return 0; + + case amdgpu_bo_handle_type_dma_buf_fd: + return drmPrimeHandleToFD(dev->fd, bo->host_blob->handle, DRM_CLOEXEC | DRM_RDWR, + (int*)shared_handle); + + case amdgpu_bo_handle_type_kms_noimport: + /* Treat this deprecated type as _type_kms and return the GEM handle. */ + *shared_handle = bo->host_blob->handle; + return 0; + + case amdgpu_bo_handle_type_gem_flink_name: + break; + } + return -EINVAL; +} + +int amdvgpu_bo_free(amdvgpu_device_handle dev, struct amdvgpu_bo *bo) { + int refcnt = p_atomic_dec_return(&bo->refcount); + + if (refcnt == 0) { + /* Flush pending ops. */ + vdrm_flush(dev->vdev); + + /* Remove it from the bo table. */ + if (bo->host_blob->handle > 0) { + simple_mtx_lock(&dev->handle_to_vbo_mutex); + struct hash_entry *entry = _mesa_hash_table_search(dev->handle_to_vbo, + (void*)(intptr_t)bo->host_blob->handle); + if (entry) { + /* entry can be NULL for the shmem buffer. */ + _mesa_hash_table_remove(dev->handle_to_vbo, entry); + } + simple_mtx_unlock(&dev->handle_to_vbo_mutex); + } + + if (bo->host_blob) + destroy_host_blob(dev, bo->host_blob); + + free(bo); + } + + return 0; +} + +int amdvgpu_bo_alloc(amdvgpu_device_handle dev, + struct amdgpu_bo_alloc_request *request, + amdvgpu_bo_handle *bo) +{ + int r; + uint32_t blob_flags = 0; + + struct amdgpu_ccmd_gem_new_req req = { + .hdr = AMDGPU_CCMD(GEM_NEW, sizeof(req)), + .blob_id = p_atomic_inc_return(&dev->next_blob_id), + }; + req.r.alloc_size = request->alloc_size; + req.r.phys_alignment = request->phys_alignment; + req.r.preferred_heap = request->preferred_heap; + req.r.__pad = 0; + req.r.flags = request->flags; + + if (!(request->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) + blob_flags |= VIRTGPU_BLOB_FLAG_USE_MAPPABLE; + + /* blob_id 0 is reserved for the shared memory buffer. */ + assert(req.blob_id > 0); + + amdvgpu_bo_handle out = calloc(1, sizeof(struct amdvgpu_bo)); + out->dev = dev; + out->size = request->alloc_size; + + r = alloc_host_blob(out, &req, blob_flags); + + if (r < 0) { + free(out); + return r; + } + + p_atomic_set(&out->refcount, 1); + *bo = out; + + return 0; +} + +int amdvgpu_bo_va_op_raw(amdvgpu_device_handle dev, + uint32_t res_id, + uint64_t offset, + uint64_t size, + uint64_t addr, + uint64_t flags, + uint32_t ops) +{ + int r; + + /* Fill base structure fields. */ + struct amdgpu_ccmd_bo_va_op_req req = { + .hdr = AMDGPU_CCMD(BO_VA_OP, sizeof(req)), + .va = addr, + .res_id = res_id, + .offset = offset, + .vm_map_size = size, + .flags = flags, + .op = ops, + .flags2 = res_id == 0 ? AMDGPU_CCMD_BO_VA_OP_SPARSE_BO : 0, + }; + struct amdgpu_ccmd_rsp *rsp = + vdrm_alloc_rsp(dev->vdev, &req.hdr, sizeof(*rsp)); + + r = vdrm_send_req_wrapper(dev, &req.hdr, rsp, false); + + return r; +} + +int amdvgpu_bo_import(amdvgpu_device_handle dev, enum amdgpu_bo_handle_type type, + uint32_t handle, struct amdvgpu_bo_import_result *result) +{ + if (type != amdgpu_bo_handle_type_dma_buf_fd) + return -1; + + uint32_t kms_handle; + int r = drmPrimeFDToHandle(dev->fd, handle, &kms_handle); + if (r) { + mesa_loge("drmPrimeFDToHandle failed for dmabuf fd: %u\n", handle); + return r; + } + + /* Look up existing bo. */ + simple_mtx_lock(&dev->handle_to_vbo_mutex); + struct hash_entry *entry = _mesa_hash_table_search(dev->handle_to_vbo, (void*)(intptr_t)kms_handle); + + if (entry) { + struct amdvgpu_bo *bo = entry->data; + p_atomic_inc(&bo->refcount); + simple_mtx_unlock(&dev->handle_to_vbo_mutex); + result->buf_handle = (void*)bo; + result->alloc_size = bo->size; + assert(bo->host_blob); + return 0; + } + simple_mtx_unlock(&dev->handle_to_vbo_mutex); + + struct drm_virtgpu_resource_info args = { + .bo_handle = kms_handle, + }; + r = virtio_ioctl(dev->fd, VIRTGPU_RESOURCE_INFO, &args); + + if (r) { + mesa_loge("VIRTGPU_RESOURCE_INFO failed (%s)\n", strerror(errno)); + return r; + } + + off_t size = lseek(handle, 0, SEEK_END); + if (size == (off_t) -1) { + mesa_loge("lseek failed (%s)\n", strerror(errno)); + return -1; + } + lseek(handle, 0, SEEK_CUR); + + struct amdvgpu_bo *bo = calloc(1, sizeof(struct amdvgpu_bo)); + bo->dev = dev; + bo->size = size; + bo->host_blob = create_host_blob(kms_handle, args.res_handle, size, NULL); + p_atomic_set(&bo->refcount, 1); + + result->buf_handle = bo; + result->alloc_size = bo->size; + + simple_mtx_lock(&dev->handle_to_vbo_mutex); + _mesa_hash_table_insert(dev->handle_to_vbo, (void*)(intptr_t)bo->host_blob->handle, bo); + simple_mtx_unlock(&dev->handle_to_vbo_mutex); + + return 0; +} + +static int amdvgpu_get_offset(amdvgpu_bo_handle bo_handle) +{ + if (bo_handle->host_blob->offset) + return 0; + + struct drm_virtgpu_map req = { + .handle = bo_handle->host_blob->handle, + }; + int ret = virtio_ioctl(bo_handle->dev->fd, VIRTGPU_MAP, &req); + if (ret) { + mesa_loge("amdvgpu_bo_map failed (%s) handle: %d\n", + strerror(errno), bo_handle->host_blob->handle); + return ret; + } + bo_handle->host_blob->offset = req.offset; + + return 0; +} + +int amdvgpu_bo_cpu_map(amdvgpu_device_handle dev, amdvgpu_bo_handle bo_handle, + void **cpu) { + int r; + + simple_mtx_lock(&bo_handle->host_blob->cpu_access_mutex); + + if (bo_handle->host_blob->cpu_addr == NULL) { + assert(bo_handle->host_blob->cpu_addr == NULL); + r = amdvgpu_get_offset(bo_handle); + if (r) { + mesa_loge("get_offset failed\n"); + simple_mtx_unlock(&bo_handle->host_blob->cpu_access_mutex); + return r; + } + + /* Use *cpu as a fixed address hint from the caller. */ + bo_handle->host_blob->cpu_addr = os_mmap(*cpu, bo_handle->host_blob->alloc_size, + PROT_READ | PROT_WRITE, MAP_SHARED, + dev->fd, + bo_handle->host_blob->offset); + } + + assert(bo_handle->host_blob->cpu_addr != MAP_FAILED); + *cpu = bo_handle->host_blob->cpu_addr; + p_atomic_inc(&bo_handle->host_blob->map_count); + + simple_mtx_unlock(&bo_handle->host_blob->cpu_access_mutex); + + return *cpu == MAP_FAILED; +} + +int amdvgpu_bo_cpu_unmap(amdvgpu_device_handle dev, amdvgpu_bo_handle bo) { + int r = 0; + + simple_mtx_lock(&bo->host_blob->cpu_access_mutex); + if (bo->host_blob->map_count == 0) { + simple_mtx_unlock(&bo->host_blob->cpu_access_mutex); + return 0; + } + assert(bo->host_blob->cpu_addr); + if (p_atomic_dec_zero(&bo->host_blob->map_count)) { + r = os_munmap(bo->host_blob->cpu_addr, bo->host_blob->alloc_size); + bo->host_blob->cpu_addr = NULL; + } + simple_mtx_unlock(&bo->host_blob->cpu_access_mutex); + + return r; +} + +uint32_t amdvgpu_get_resource_id(amdvgpu_bo_handle bo) { + return bo->host_blob->res_id; +} + +int amdvgpu_bo_wait_for_idle(amdvgpu_device_handle dev, + amdvgpu_bo_handle bo, + uint64_t abs_timeout_ns) { + /* TODO: add a wait for idle command? */ + return vdrm_bo_wait(dev->vdev, bo->host_blob->handle); +} diff --git a/src/amd/common/virtio/amdgpu_virtio_device.c b/src/amd/common/virtio/amdgpu_virtio_device.c new file mode 100644 index 00000000000..55d280f6879 --- /dev/null +++ b/src/amd/common/virtio/amdgpu_virtio_device.c @@ -0,0 +1,188 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: MIT + */ + +#include "amdgpu_virtio_private.h" + +#include "util/bitscan.h" +#include "util/log.h" +#include "util/os_file.h" +#include "util/u_debug.h" + +#include + +/* amdvgpu_device manage the virtual GPU. + * + * It owns a vdrm_device instance, the rings and manage seqno. + * Since it's a drop-in replacement for libdrm_amdgpu's amdgpu_device, + * it follows its behavior: if the same device is opened multiple times, + * the same amdvgpu_device will be used. + */ +static simple_mtx_t dev_mutex = SIMPLE_MTX_INITIALIZER; +static amdvgpu_device_handle dev_list; + +static int fd_compare(int fd1, int fd2) +{ + char *name1 = drmGetPrimaryDeviceNameFromFd(fd1); + char *name2 = drmGetPrimaryDeviceNameFromFd(fd2); + int result; + + if (name1 == NULL || name2 == NULL) { + free(name1); + free(name2); + return 0; + } + + result = strcmp(name1, name2); + free(name1); + free(name2); + + return result; +} + +static void amdvgpu_device_reference(struct amdvgpu_device **dst, + struct amdvgpu_device *src) +{ + if (update_references(*dst ? &(*dst)->refcount : NULL, + src ? &src->refcount : NULL)) { + struct amdvgpu_device *dev, *prev = NULL; + for (dev = dev_list; dev; dev = dev->next) { + if (dev == (*dst)) { + if (prev == NULL) + dev_list = dev->next; + else + prev->next = dev->next; + break; + } + prev = dev; + } + + dev = *dst; + + /* Destroy BOs before closing vdrm */ + hash_table_foreach(dev->handle_to_vbo, entry) { + struct amdvgpu_bo *bo = entry->data; + amdvgpu_bo_free(dev, bo); + } + _mesa_hash_table_destroy(dev->handle_to_vbo, NULL); + /* Destroy contextx. */ + hash_table_foreach(&dev->contexts, entry) + amdvgpu_cs_ctx_free(dev, (uint32_t)(uintptr_t)entry->key); + _mesa_hash_table_clear(&dev->contexts, NULL); + + simple_mtx_destroy(&dev->handle_to_vbo_mutex); + simple_mtx_destroy(&dev->contexts_mutex); + + amdgpu_va_manager_deinit(dev->va_mgr); + + vdrm_device_close(dev->vdev); + + close(dev->fd); + free(dev); + } + + *dst = src; +} + +int amdvgpu_device_deinitialize(amdvgpu_device_handle dev) { + simple_mtx_lock(&dev_mutex); + amdvgpu_device_reference(&dev, NULL); + simple_mtx_unlock(&dev_mutex); + return 0; +} + +int amdvgpu_device_initialize(int fd, uint32_t *drm_major, uint32_t *drm_minor, + amdvgpu_device_handle* dev_out) { + simple_mtx_lock(&dev_mutex); + amdvgpu_device_handle dev; + + for (dev = dev_list; dev; dev = dev->next) + if (fd_compare(dev->fd, fd) == 0) + break; + + if (dev) { + *dev_out = NULL; + amdvgpu_device_reference(dev_out, dev); + *drm_major = dev->vdev->caps.version_major; + *drm_minor = dev->vdev->caps.version_minor; + simple_mtx_unlock(&dev_mutex); + return 0; + } + + /* fd is owned by the amdgpu_screen_winsys that called this function. + * amdgpu_screen_winsys' lifetime may be shorter than the device's one, + * so dup fd to tie its lifetime to the device's one. + */ + fd = os_dupfd_cloexec(fd); + + struct vdrm_device *vdev = vdrm_device_connect(fd, VIRTGPU_DRM_CONTEXT_AMDGPU); + if (vdev == NULL) { + mesa_loge("vdrm_device_connect failed\n"); + simple_mtx_unlock(&dev_mutex); + return -1; + } + + dev = calloc(1, sizeof(struct amdvgpu_device)); + dev->refcount = 1; + dev->next = dev_list; + dev_list = dev; + dev->fd = fd; + dev->vdev = vdev; + + simple_mtx_init(&dev->handle_to_vbo_mutex, mtx_plain); + simple_mtx_init(&dev->contexts_mutex, mtx_plain); + + dev->handle_to_vbo = _mesa_hash_table_create_u32_keys(NULL); + + p_atomic_set(&dev->next_blob_id, 1); + + *dev_out = dev; + + simple_mtx_unlock(&dev_mutex); + + struct drm_amdgpu_info info; + info.return_pointer = (uintptr_t)&dev->dev_info; + info.query = AMDGPU_INFO_DEV_INFO; + info.return_size = sizeof(dev->dev_info); + int r = amdvgpu_query_info(dev, &info); + assert(r == 0); + + /* Ring idx 0 is reserved for commands running on CPU. */ + unsigned next_ring_idx = 1; + for (unsigned i = 0; i < AMD_NUM_IP_TYPES; ++i) { + struct drm_amdgpu_info_hw_ip ip_info = {0}; + struct drm_amdgpu_info request = {0}; + request.return_pointer = (uintptr_t)&ip_info; + request.return_size = sizeof(ip_info); + request.query = AMDGPU_INFO_HW_IP_INFO; + request.query_hw_ip.type = i; + request.query_hw_ip.ip_instance = 0; + r = amdvgpu_query_info(dev, &request); + if (r == 0 && ip_info.available_rings) { + int count = util_bitcount(ip_info.available_rings); + dev->virtio_ring_mapping[i] = next_ring_idx; + next_ring_idx += count; + } + } + /* VIRTGPU_CONTEXT_PARAM_NUM_RINGS is hardcoded for now. */ + assert(next_ring_idx <= 64); + dev->num_virtio_rings = next_ring_idx - 1; + + dev->va_mgr = amdgpu_va_manager_alloc(); + amdgpu_va_manager_init(dev->va_mgr, + dev->dev_info.virtual_address_offset, dev->dev_info.virtual_address_max, + dev->dev_info.high_va_offset, dev->dev_info.high_va_max, + dev->dev_info.virtual_address_alignment); + + _mesa_hash_table_init(&dev->contexts, NULL, + _mesa_hash_pointer, _mesa_key_pointer_equal); + dev->allow_multiple_amdgpu_ctx = debug_get_bool_option("MULTIPLE_AMDGPU_CTX", false); + dev->sync_cmd = debug_get_num_option("VIRTIO_SYNC_CMD", 0); + + *drm_major = dev->vdev->caps.version_major; + *drm_minor = dev->vdev->caps.version_minor; + + return 0; +} diff --git a/src/amd/common/virtio/amdgpu_virtio_private.h b/src/amd/common/virtio/amdgpu_virtio_private.h new file mode 100644 index 00000000000..434d2f71e3a --- /dev/null +++ b/src/amd/common/virtio/amdgpu_virtio_private.h @@ -0,0 +1,161 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: MIT + */ +#ifndef AMDGPU_VIRTIO_PRIVATE_H +#define AMDGPU_VIRTIO_PRIVATE_H + +#include "drm-uapi/amdgpu_drm.h" +#include "drm-uapi/virtgpu_drm.h" + +#include "util/hash_table.h" +#include "util/simple_mtx.h" + +#include "amd_family.h" + +#include "virtio/vdrm/vdrm.h" +#include "virtio/virtio-gpu/drm_hw.h" +#include "amdgpu_virtio_proto.h" +#include "amdgpu_virtio.h" + +struct amdvgpu_host_blob; +struct amdvgpu_host_blob_allocator; + +/* Host context seqno handling. + * seqno are monotonically increasing integer, so we don't need + * to actually submit to know the value. This allows to not + * wait for the submission to go to the host (= no need to wait + * in the guest) and to know the seqno (= so we can take advantage + * of user fence). + */ +struct amdvgpu_context { + uint32_t refcount; + uint32_t host_context_id; + uint64_t ring_next_seqno[]; +}; + +struct amdvgpu_device { + struct vdrm_device * vdev; + + /* List of existing devices */ + int refcount; + struct amdvgpu_device *next; + + int fd; + + /* Table mapping kms handles to amdvgpu_bo instances. + * Used to maintain a 1-to-1 mapping between the 2. + */ + simple_mtx_t handle_to_vbo_mutex; + struct hash_table *handle_to_vbo; + + /* Submission through virtio-gpu are ring based. + * Ring 0 is used for CPU jobs, then N rings are allocated: 1 + * per IP type per instance (so if the GPU has 1 gfx queue and 2 + * queues -> ring0 + 3 hw rings = 4 rings total). + */ + uint32_t num_virtio_rings; + uint32_t virtio_ring_mapping[AMD_NUM_IP_TYPES]; + + struct drm_amdgpu_info_device dev_info; + + /* Blob id are per drm_file identifiers of host blobs. + * Use a monotically increased integer to assign the blob id. + */ + uint32_t next_blob_id; + + /* GPU VA management (allocation / release). */ + amdgpu_va_manager_handle va_mgr; + + /* Debug option to make some protocol commands synchronous. + * If bit N is set, then the specific command will be sync. + */ + int64_t sync_cmd; + + /* virtio-gpu uses a single context per drm_file and expects that + * any 2 jobs submitted to the same {context, ring} will execute in + * order. + * amdgpu on the other hand allows for multiple context per drm_file, + * so we either have to open multiple virtio-gpu drm_file to be able to + * have 1 virtio-gpu context per amdgpu-context or use a single amdgpu + * context. + * Using multiple drm_file might cause BO sharing issues so for now limit + * ourselves to a single amdgpu context. Each amdgpu_ctx object can schedule + * parallel work on 1 gfx, 2 sdma, 4 compute, 1 of each vcn queue. + */ + simple_mtx_t contexts_mutex; + struct hash_table contexts; + bool allow_multiple_amdgpu_ctx; +}; + +/* Refcounting helpers. Returns true when dst reaches 0. */ +static inline bool update_references(int *dst, int *src) +{ + if (dst != src) { + /* bump src first */ + if (src) { + assert(p_atomic_read(src) > 0); + p_atomic_inc(src); + } + if (dst) { + return p_atomic_dec_zero(dst); + } + } + return false; +} + +#define virtio_ioctl(fd, name, args) ({ \ + int ret = drmIoctl((fd), DRM_IOCTL_ ## name, (args)); \ + ret; \ + }) + +struct amdvgpu_host_blob_creation_params { + struct drm_virtgpu_resource_create_blob args; + struct amdgpu_ccmd_gem_new_req req; +}; + +struct amdvgpu_bo { + struct amdvgpu_device *dev; + + /* Importing the same kms handle must return the same + * amdvgpu_pointer, so we need a refcount. + */ + int refcount; + + /* The size of the BO (might be smaller that the host + * bo' size). + */ + unsigned size; + + /* The host blob backing this bo. */ + struct amdvgpu_host_blob *host_blob; +}; + + +uint32_t amdvgpu_get_resource_id(amdvgpu_bo_handle bo); + +/* There are 2 return-code: + * - the virtio one, returned by vdrm_send_req + * - the host one, which only makes sense for sync + * requests. + */ +static inline +int vdrm_send_req_wrapper(amdvgpu_device_handle dev, + struct vdrm_ccmd_req *req, + struct amdgpu_ccmd_rsp *rsp, + bool sync) { + if (dev->sync_cmd & (1u << req->cmd)) + sync = true; + + int r = vdrm_send_req(dev->vdev, req, sync); + + if (r) + return r; + + if (sync) + return rsp->ret; + + return 0; +} +#endif /* AMDGPU_VIRTIO_PRIVATE_H */ diff --git a/src/amd/common/virtio/amdgpu_virtio_proto.h b/src/amd/common/virtio/amdgpu_virtio_proto.h new file mode 100644 index 00000000000..ccb8831a10c --- /dev/null +++ b/src/amd/common/virtio/amdgpu_virtio_proto.h @@ -0,0 +1,275 @@ +#ifndef AMDGPU_VIRTIO_PROTO_H +#define AMDGPU_VIRTIO_PROTO_H + +#include +#include "amdgpu.h" +#include "amdgpu_drm.h" +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic error "-Wpadded" +#endif + +enum amdgpu_ccmd { + AMDGPU_CCMD_QUERY_INFO = 1, + AMDGPU_CCMD_GEM_NEW, + AMDGPU_CCMD_BO_VA_OP, + AMDGPU_CCMD_CS_SUBMIT, + AMDGPU_CCMD_SET_METADATA, + AMDGPU_CCMD_BO_QUERY_INFO, + AMDGPU_CCMD_CREATE_CTX, + AMDGPU_CCMD_RESERVE_VMID, + AMDGPU_CCMD_SET_PSTATE, + AMDGPU_CCMD_CS_QUERY_FENCE_STATUS, +}; + +struct amdgpu_ccmd_rsp { + struct vdrm_ccmd_rsp base; + int32_t ret; +}; +static_assert(sizeof(struct amdgpu_ccmd_rsp) == 8, "bug"); + +#define AMDGPU_STATIC_ASSERT_SIZE(t) \ + static_assert(sizeof(struct t) % 8 == 0, "sizeof(struct " #t ") not multiple of 8"); \ + static_assert(alignof(struct t) <= 8, "alignof(struct " #t ") too large"); + +/** + * Defines the layout of shmem buffer used for host->guest communication. + */ +struct amdvgpu_shmem { + struct vdrm_shmem base; + + /** + * Counter that is incremented on asynchronous errors, like SUBMIT + * or GEM_NEW failures. The guest should treat errors as context- + * lost. + */ + uint32_t async_error; + + uint32_t __pad; + + struct amdgpu_heap_info gtt; + struct amdgpu_heap_info vram; + struct amdgpu_heap_info vis_vram; +}; +AMDGPU_STATIC_ASSERT_SIZE(amdvgpu_shmem) +DEFINE_CAST(vdrm_shmem, amdvgpu_shmem) + + +#define AMDGPU_CCMD(_cmd, _len) (struct vdrm_ccmd_req){ \ + .cmd = AMDGPU_CCMD_##_cmd, \ + .len = (_len), \ + } + +/* + * AMDGPU_CCMD_QUERY_INFO + * + * This is amdgpu_query_info. + */ +struct amdgpu_ccmd_query_info_req { + struct vdrm_ccmd_req hdr; + struct drm_amdgpu_info info; +}; +DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_query_info_req) +AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_query_info_req) + +struct amdgpu_ccmd_query_info_rsp { + struct amdgpu_ccmd_rsp hdr; + uint8_t payload[]; +}; +AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_query_info_rsp) + +struct amdgpu_ccmd_gem_new_req { + struct vdrm_ccmd_req hdr; + + uint64_t blob_id; + + /* This is amdgpu_bo_alloc_request but padded correctly. */ + struct { + uint64_t alloc_size; + uint64_t phys_alignment; + uint32_t preferred_heap; + uint32_t __pad; + uint64_t flags; + } r; +}; +DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_gem_new_req) +AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_gem_new_req) + + +/* + * AMDGPU_CCMD_BO_VA_OP + * + */ +struct amdgpu_ccmd_bo_va_op_req { + struct vdrm_ccmd_req hdr; + uint64_t va; + uint64_t vm_map_size; + uint64_t flags; /* Passed directly to kernel */ + uint64_t flags2; /* AMDGPU_CCMD_BO_VA_OP_* */ + uint64_t offset; + uint32_t res_id; + uint32_t op; +}; +DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_bo_va_op_req) +AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_bo_va_op_req) +/* Specifies that this is a sparse BO. */ +#define AMDGPU_CCMD_BO_VA_OP_SPARSE_BO (1 << 0) + +/* + * AMDGPU_CCMD_CS_SUBMIT + */ +struct amdgpu_ccmd_cs_submit_req { + struct vdrm_ccmd_req hdr; + + uint32_t ctx_id; + uint32_t num_chunks; /* limited to AMDGPU_CCMD_CS_SUBMIT_MAX_NUM_CHUNKS */ + uint32_t pad; + uint32_t ring_idx; + + /* Starts with a descriptor array: + * (chunk_id, offset_in_payload), ... + */ + uint8_t payload[]; +}; +DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_cs_submit_req) +AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_cs_submit_req) +#define AMDGPU_CCMD_CS_SUBMIT_MAX_NUM_CHUNKS 128 + +/* + * AMDGPU_CCMD_SET_METADATA + */ +struct amdgpu_ccmd_set_metadata_req { + struct vdrm_ccmd_req hdr; + uint64_t flags; + uint64_t tiling_info; + uint32_t res_id; + uint32_t size_metadata; + uint32_t umd_metadata[]; +}; +DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_set_metadata_req) +AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_set_metadata_req) + + +/* + * AMDGPU_CCMD_BO_QUERY_INFO + */ +struct amdgpu_ccmd_bo_query_info_req { + struct vdrm_ccmd_req hdr; + uint32_t res_id; + uint32_t pad; /* must be zero */ +}; +DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_bo_query_info_req) +AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_bo_query_info_req) + +struct amdgpu_ccmd_bo_query_info_rsp { + struct amdgpu_ccmd_rsp hdr; + /* This is almost struct amdgpu_bo_info, but padded to get + * the same struct on 32 bit and 64 bit builds. + */ + struct { + uint64_t alloc_size; /* 0 8 */ + uint64_t phys_alignment; /* 8 8 */ + uint32_t preferred_heap; /* 16 4 */ + uint32_t __pad; /* 20 4 */ + uint64_t alloc_flags; /* 24 8 */ + /* This is almost struct amdgpu_bo_metadata, but padded to get + * the same struct on 32 bit and 64 bit builds. + */ + struct { + uint64_t flags; /* 32 8 */ + uint64_t tiling_info; /* 40 8 */ + uint32_t size_metadata; /* 48 4 */ + uint32_t umd_metadata[64]; /* 52 256 */ + uint32_t __pad; /* 308 4 */ + } metadata; + } info; +}; +AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_bo_query_info_rsp) + +/* + * AMDGPU_CCMD_CREATE_CTX + */ +struct amdgpu_ccmd_create_ctx_req { + struct vdrm_ccmd_req hdr; + union { + int32_t priority; /* create */ + uint32_t id; /* destroy */ + }; + uint32_t flags; /* AMDGPU_CCMD_CREATE_CTX_* */ +}; +DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_create_ctx_req) +AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_create_ctx_req) +/* Destroy a context instead of creating one */ +#define AMDGPU_CCMD_CREATE_CTX_DESTROY (1 << 0) + +struct amdgpu_ccmd_create_ctx_rsp { + struct amdgpu_ccmd_rsp hdr; + uint32_t ctx_id; + uint32_t pad; +}; +AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_create_ctx_rsp) + +/* + * AMDGPU_CCMD_RESERVE_VMID + */ +struct amdgpu_ccmd_reserve_vmid_req { + struct vdrm_ccmd_req hdr; + uint64_t flags; /* AMDGPU_CCMD_RESERVE_VMID_* */ +}; +DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_reserve_vmid_req) +AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_reserve_vmid_req) +/* Unreserve a VMID instead of reserving one */ +#define AMDGPU_CCMD_RESERVE_VMID_UNRESERVE (1 << 0) + +/* + * AMDGPU_CCMD_SET_PSTATE + */ +struct amdgpu_ccmd_set_pstate_req { + struct vdrm_ccmd_req hdr; + uint32_t ctx_id; + uint32_t op; + uint32_t flags; + uint32_t pad; +}; +DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_set_pstate_req) +AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_set_pstate_req) + +struct amdgpu_ccmd_set_pstate_rsp { + struct amdgpu_ccmd_rsp hdr; + uint32_t out_flags; + uint32_t pad; +}; +AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_set_pstate_rsp) + +/* + * AMDGPU_CCMD_CS_QUERY_FENCE_STATUS + */ +struct amdgpu_ccmd_cs_query_fence_status_req { + struct vdrm_ccmd_req hdr; + + uint32_t ctx_id; + + uint32_t ip_type; + uint32_t ip_instance; + uint32_t ring; + + uint64_t fence; + + uint64_t timeout_ns; + uint64_t flags; +}; +DEFINE_CAST(vdrm_ccmd_req, amdgpu_ccmd_cs_query_fence_status_req) +AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_cs_query_fence_status_req) + +struct amdgpu_ccmd_cs_query_fence_status_rsp { + struct amdgpu_ccmd_rsp hdr; + uint32_t expired; + uint32_t pad; +}; +AMDGPU_STATIC_ASSERT_SIZE(amdgpu_ccmd_cs_query_fence_status_rsp) + +#ifdef __GNUC__ +# pragma GCC diagnostic pop +#endif + +#endif diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index 7609c840ec2..2c19e42aee2 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -2000,6 +2000,7 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm #ifdef _WIN32 assert(drm_device == NULL); #else + bool is_virtio = false; if (drm_device) { const char *path = drm_device->nodes[DRM_NODE_RENDER]; drmVersionPtr version; diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c index bac06384c9d..c284e756b65 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c @@ -202,7 +202,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, ac_drm_device *dev; struct radv_amdgpu_winsys *ws = NULL; - r = ac_drm_device_initialize(fd, &drm_major, &drm_minor, &dev); + r = ac_drm_device_initialize(fd, false, &drm_major, &drm_minor, &dev); if (r) { fprintf(stderr, "radv/amdgpu: failed to initialize device.\n"); return NULL; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index 6cd5bf3de06..f454ed29dfe 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -395,7 +395,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, /* Initialize the amdgpu device. This should always return the same pointer * for the same fd. */ - r = ac_drm_device_initialize(fd, &drm_major, &drm_minor, &dev); + r = ac_drm_device_initialize(fd, false, &drm_major, &drm_minor, &dev); if (r) { fprintf(stderr, "amdgpu: amdgpu_device_initialize failed.\n"); goto fail; diff --git a/src/meson.build b/src/meson.build index ad32e56bb1b..442583eba09 100644 --- a/src/meson.build +++ b/src/meson.build @@ -69,6 +69,9 @@ endif if with_any_intel subdir('intel') endif +if system_has_kms_drm or with_gallium_virgl + subdir('virtio') +endif if with_gallium_radeonsi or with_amd_vk or with_gallium_r300 or with_gallium_r600 subdir('amd') endif @@ -78,9 +81,6 @@ endif if with_gallium_etnaviv subdir('etnaviv') endif -if system_has_kms_drm or with_gallium_virgl - subdir('virtio') -endif if with_gallium_freedreno or with_freedreno_vk or with_tools.contains('freedreno') subdir('freedreno') endif