diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index d8acc58b2d9..4d503e57073 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -326,6 +326,29 @@ tu_physical_device_init(struct tu_physical_device *device, goto fail_free_name; } + device->memory.type_count = 1; + device->memory.types[0] = + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + if (device->has_cached_coherent_memory) { + device->memory.types[device->memory.type_count] = + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + device->memory.type_count++; + } + + if (device->has_cached_non_coherent_memory) { + device->memory.types[device->memory.type_count] = + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + device->memory.type_count++; + } + if (device->has_set_iova) { mtx_init(&device->vma_mutex, mtx_plain); util_vma_heap_init(&device->vma, device->va_start, @@ -1645,12 +1668,13 @@ tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev, props->memoryHeaps[0].size = physical_device->heap.size; props->memoryHeaps[0].flags = physical_device->heap.flags; - props->memoryTypeCount = 1; - props->memoryTypes[0].propertyFlags = - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - props->memoryTypes[0].heapIndex = 0; + props->memoryTypeCount = physical_device->memory.type_count; + for (uint32_t i = 0; i < physical_device->memory.type_count; i++) { + props->memoryTypes[i] = (VkMemoryType) { + .propertyFlags = physical_device->memory.types[i], + .heapIndex = 0, + }; + } vk_foreach_struct(ext, props2->pNext) { @@ -2673,9 +2697,11 @@ tu_AllocateMemory(VkDevice _device, if (device->bo_sizes) snprintf(name, ARRAY_SIZE(name), "vkAllocateMemory(%ldkb)", (long)DIV_ROUND_UP(pAllocateInfo->allocationSize, 1024)); + VkMemoryPropertyFlags mem_property = + device->physical_device->memory.types[pAllocateInfo->memoryTypeIndex]; result = tu_bo_init_new_explicit_iova( device, &mem->bo, pAllocateInfo->allocationSize, client_address, - alloc_flags, name); + mem_property, alloc_flags, name); } if (result == VK_SUCCESS) { @@ -2761,30 +2787,14 @@ tu_UnmapMemory(VkDevice _device, VkDeviceMemory _memory) /* TODO: unmap here instead of waiting for FreeMemory */ } -VKAPI_ATTR VkResult VKAPI_CALL -tu_FlushMappedMemoryRanges(VkDevice _device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange *pMemoryRanges) -{ - return VK_SUCCESS; -} - -VKAPI_ATTR VkResult VKAPI_CALL -tu_InvalidateMappedMemoryRanges(VkDevice _device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange *pMemoryRanges) -{ - return VK_SUCCESS; -} - static void -tu_get_buffer_memory_requirements(uint64_t size, +tu_get_buffer_memory_requirements(struct tu_device *dev, uint64_t size, VkMemoryRequirements2 *pMemoryRequirements) { pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) { .size = MAX2(align64(size, 64), size), .alignment = 64, - .memoryTypeBits = 1, + .memoryTypeBits = (1 << dev->physical_device->memory.type_count) - 1, }; vk_foreach_struct(ext, pMemoryRequirements->pNext) { @@ -2804,22 +2814,24 @@ tu_get_buffer_memory_requirements(uint64_t size, VKAPI_ATTR void VKAPI_CALL tu_GetBufferMemoryRequirements2( - VkDevice device, + VkDevice _device, const VkBufferMemoryRequirementsInfo2 *pInfo, VkMemoryRequirements2 *pMemoryRequirements) { + TU_FROM_HANDLE(tu_device, device, _device); TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer); - tu_get_buffer_memory_requirements(buffer->vk.size, pMemoryRequirements); + tu_get_buffer_memory_requirements(device, buffer->vk.size, pMemoryRequirements); } VKAPI_ATTR void VKAPI_CALL tu_GetDeviceBufferMemoryRequirements( - VkDevice device, + VkDevice _device, const VkDeviceBufferMemoryRequirements *pInfo, VkMemoryRequirements2 *pMemoryRequirements) { - tu_get_buffer_memory_requirements(pInfo->pCreateInfo->size, pMemoryRequirements); + TU_FROM_HANDLE(tu_device, device, _device); + tu_get_buffer_memory_requirements(device, pInfo->pCreateInfo->size, pMemoryRequirements); } VKAPI_ATTR void VKAPI_CALL @@ -3296,8 +3308,10 @@ tu_GetMemoryFdPropertiesKHR(VkDevice _device, int fd, VkMemoryFdPropertiesKHR *pMemoryFdProperties) { + TU_FROM_HANDLE(tu_device, device, _device); assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); - pMemoryFdProperties->memoryTypeBits = 1; + pMemoryFdProperties->memoryTypeBits = + (1 << device->physical_device->memory.type_count) - 1; return VK_SUCCESS; } diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index 1cdf1209109..9dbc65a67a3 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -91,6 +91,14 @@ struct tu_physical_device uint64_t va_start; uint64_t va_size; + bool has_cached_coherent_memory; + bool has_cached_non_coherent_memory; + + struct { + uint32_t type_count; + VkMemoryPropertyFlags types[VK_MAX_MEMORY_TYPES]; + } memory; + struct fd_dev_id dev_id; const struct fd_dev_info *info; diff --git a/src/freedreno/vulkan/tu_image.cc b/src/freedreno/vulkan/tu_image.cc index 093f320d6e9..ca3cc60f7c5 100644 --- a/src/freedreno/vulkan/tu_image.cc +++ b/src/freedreno/vulkan/tu_image.cc @@ -752,13 +752,13 @@ tu_DestroyImage(VkDevice _device, } static void -tu_get_image_memory_requirements(struct tu_image *image, +tu_get_image_memory_requirements(struct tu_device *dev, struct tu_image *image, VkMemoryRequirements2 *pMemoryRequirements) { pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) { .size = image->total_size, .alignment = image->layout[0].base_align, - .memoryTypeBits = 1, + .memoryTypeBits = (1 << dev->physical_device->memory.type_count) - 1, }; vk_foreach_struct(ext, pMemoryRequirements->pNext) { @@ -778,13 +778,14 @@ tu_get_image_memory_requirements(struct tu_image *image, } VKAPI_ATTR void VKAPI_CALL -tu_GetImageMemoryRequirements2(VkDevice device, +tu_GetImageMemoryRequirements2(VkDevice _device, const VkImageMemoryRequirementsInfo2 *pInfo, VkMemoryRequirements2 *pMemoryRequirements) { + TU_FROM_HANDLE(tu_device, device, _device); TU_FROM_HANDLE(tu_image, image, pInfo->image); - tu_get_image_memory_requirements(image, pMemoryRequirements); + tu_get_image_memory_requirements(device, image, pMemoryRequirements); } VKAPI_ATTR void VKAPI_CALL @@ -810,7 +811,7 @@ tu_GetDeviceImageMemoryRequirements( tu_image_init(device, &image, pInfo->pCreateInfo, DRM_FORMAT_MOD_INVALID, NULL); - tu_get_image_memory_requirements(&image, pMemoryRequirements); + tu_get_image_memory_requirements(device, &image, pMemoryRequirements); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/freedreno/vulkan/tu_knl.cc b/src/freedreno/vulkan/tu_knl.cc index c953431bb14..549acefc9a7 100644 --- a/src/freedreno/vulkan/tu_knl.cc +++ b/src/freedreno/vulkan/tu_knl.cc @@ -27,9 +27,10 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size, uint64_t client_iova, + VkMemoryPropertyFlags mem_property, enum tu_bo_alloc_flags flags, const char *name) { - return dev->instance->knl->bo_init(dev, out_bo, size, client_iova, flags, name); + return dev->instance->knl->bo_init(dev, out_bo, size, client_iova, mem_property, flags, name); } VkResult diff --git a/src/freedreno/vulkan/tu_knl.h b/src/freedreno/vulkan/tu_knl.h index ede292c744a..41a2bf80996 100644 --- a/src/freedreno/vulkan/tu_knl.h +++ b/src/freedreno/vulkan/tu_knl.h @@ -60,7 +60,8 @@ struct tu_knl { int (*submitqueue_new)(const struct tu_device *dev, int priority, uint32_t *queue_id); void (*submitqueue_close)(const struct tu_device *dev, uint32_t queue_id); VkResult (*bo_init)(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size, - uint64_t client_iova, enum tu_bo_alloc_flags flags, const char *name); + uint64_t client_iova, VkMemoryPropertyFlags mem_property, + enum tu_bo_alloc_flags flags, const char *name); VkResult (*bo_init_dmabuf)(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size, int prime_fd); int (*bo_export_dmabuf)(struct tu_device *dev, struct tu_bo *bo); @@ -87,13 +88,20 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size, uint64_t client_iova, - enum tu_bo_alloc_flags flags, const char *name); + VkMemoryPropertyFlags mem_property, + enum tu_bo_alloc_flags flags, + const char *name); static inline VkResult tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size, enum tu_bo_alloc_flags flags, const char *name) { - return tu_bo_init_new_explicit_iova(dev, out_bo, size, 0, flags, name); + return tu_bo_init_new_explicit_iova( + dev, out_bo, size, 0, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + flags, name); } VkResult diff --git a/src/freedreno/vulkan/tu_knl_drm_msm.cc b/src/freedreno/vulkan/tu_knl_drm_msm.cc index c658592d348..f68341e7994 100644 --- a/src/freedreno/vulkan/tu_knl_drm_msm.cc +++ b/src/freedreno/vulkan/tu_knl_drm_msm.cc @@ -133,6 +133,25 @@ tu_drm_get_priorities(const struct tu_physical_device *dev) return val; } +static bool +tu_drm_is_memory_type_supported(int fd, uint32_t flags) +{ + struct drm_msm_gem_new req_alloc = { .size = 0x1000, .flags = flags }; + + int ret = + drmCommandWriteRead(fd, DRM_MSM_GEM_NEW, &req_alloc, sizeof(req_alloc)); + if (ret) { + return false; + } + + struct drm_gem_close req_close = { + .handle = req_alloc.handle, + }; + drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &req_close); + + return true; +} + static int msm_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts) { @@ -387,17 +406,21 @@ msm_bo_init(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size, uint64_t client_iova, + VkMemoryPropertyFlags mem_property, enum tu_bo_alloc_flags flags, const char *name) { - /* TODO: Choose better flags. As of 2018-11-12, freedreno/drm/msm_bo.c - * always sets `flags = MSM_BO_WC`, and we copy that behavior here. - */ struct drm_msm_gem_new req = { .size = size, - .flags = MSM_BO_WC + .flags = 0 }; + if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) { + req.flags |= MSM_BO_CACHED_COHERENT; + } else { + req.flags |= MSM_BO_WC; + } + if (flags & TU_BO_ALLOC_GPU_READ_ONLY) req.flags |= MSM_BO_GPU_READONLY; @@ -559,6 +582,22 @@ msm_bo_finish(struct tu_device *dev, struct tu_bo *bo) u_rwlock_rdunlock(&dev->dma_bo_lock); } +VkResult +tu_FlushMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return VK_SUCCESS; +} + +VkResult +tu_InvalidateMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return VK_SUCCESS; +} + extern const struct vk_sync_type tu_timeline_sync_type; static inline bool @@ -1252,6 +1291,12 @@ tu_knl_drm_msm_load(struct tu_instance *instance, */ device->has_set_iova = false; + /* Even if kernel is new enough, the GPU itself may not support it. */ + device->has_cached_coherent_memory = + (device->msm_minor_version >= 8) && + tu_drm_is_memory_type_supported(fd, MSM_BO_CACHED_COHERENT); + device->has_cached_non_coherent_memory = false; + ret = tu_drm_get_param(device, MSM_PARAM_FAULTS, &device->fault_count); if (ret != 0) { result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, diff --git a/src/freedreno/vulkan/tu_knl_kgsl.cc b/src/freedreno/vulkan/tu_knl_kgsl.cc index 85e9f3d281d..796215f1f07 100644 --- a/src/freedreno/vulkan/tu_knl_kgsl.cc +++ b/src/freedreno/vulkan/tu_knl_kgsl.cc @@ -72,6 +72,7 @@ kgsl_bo_init(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size, uint64_t client_iova, + VkMemoryPropertyFlags mem_property, enum tu_bo_alloc_flags flags, const char *name) { @@ -81,6 +82,16 @@ kgsl_bo_init(struct tu_device *dev, .size = size, }; + if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) { + if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) { + req.flags |= KGSL_MEMFLAGS_IOCOHERENT; + } + + req.flags |= KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT; + } else { + req.flags |= KGSL_CACHEMODE_WRITECOMBINE << KGSL_CACHEMODE_SHIFT; + } + if (flags & TU_BO_ALLOC_GPU_READ_ONLY) req.flags |= KGSL_MEMFLAGS_GPUREADONLY; @@ -209,6 +220,66 @@ kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo) safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req); } +static VkResult +kgsl_sync_cache(VkDevice _device, + uint32_t op, + uint32_t count, + const VkMappedMemoryRange *ranges) +{ + TU_FROM_HANDLE(tu_device, device, _device); + + struct kgsl_gpuobj_sync_obj *sync_list = + (struct kgsl_gpuobj_sync_obj *) vk_zalloc( + &device->vk.alloc, sizeof(*sync_list), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + + struct kgsl_gpuobj_sync gpuobj_sync = { + .objs = (uintptr_t) sync_list, + .obj_len = sizeof(*sync_list), + .count = count, + }; + + for (uint32_t i = 0; i < count; i++) { + TU_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory); + + sync_list[i].op = op; + sync_list[i].id = mem->bo->gem_handle; + sync_list[i].offset = ranges[i].offset; + sync_list[i].length = ranges[i].size == VK_WHOLE_SIZE + ? (mem->bo->size - ranges[i].offset) + : ranges[i].size; + } + + /* There are two other KGSL ioctls for flushing/invalidation: + * - IOCTL_KGSL_GPUMEM_SYNC_CACHE - processes one memory range at a time; + * - IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK - processes several buffers but + * not way to specify ranges. + * + * While IOCTL_KGSL_GPUOBJ_SYNC exactly maps to VK function. + */ + safe_ioctl(device->fd, IOCTL_KGSL_GPUOBJ_SYNC, &gpuobj_sync); + + vk_free(&device->vk.alloc, sync_list); + + return VK_SUCCESS; +} + +VkResult +tu_FlushMappedMemoryRanges(VkDevice device, + uint32_t count, + const VkMappedMemoryRange *ranges) +{ + return kgsl_sync_cache(device, KGSL_GPUMEM_CACHE_TO_GPU, count, ranges); +} + +VkResult +tu_InvalidateMappedMemoryRanges(VkDevice device, + uint32_t count, + const VkMappedMemoryRange *ranges) +{ + return kgsl_sync_cache(device, KGSL_GPUMEM_CACHE_FROM_GPU, count, ranges); +} + static VkResult get_kgsl_prop(int fd, unsigned int type, void *value, size_t size) { @@ -223,6 +294,26 @@ get_kgsl_prop(int fd, unsigned int type, void *value, size_t size) : VK_SUCCESS; } +static bool +kgsl_is_memory_type_supported(int fd, uint32_t flags) +{ + struct kgsl_gpumem_alloc_id req_alloc = { + .flags = flags, + .size = 0x1000, + }; + + int ret = safe_ioctl(fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req_alloc); + if (ret) { + return false; + } + + struct kgsl_gpumem_free_id req_free = { .id = req_alloc.id }; + + safe_ioctl(fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req_free); + + return true; +} + enum kgsl_syncobj_state { KGSL_SYNCOBJ_STATE_UNSIGNALED, KGSL_SYNCOBJ_STATE_SIGNALED, @@ -1169,6 +1260,12 @@ tu_knl_kgsl_load(struct tu_instance *instance, int fd) device->heap.used = 0u; device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; + /* Even if kernel is new enough, the GPU itself may not support it. */ + device->has_cached_coherent_memory = kgsl_is_memory_type_supported( + fd, KGSL_MEMFLAGS_IOCOHERENT | + (KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT)); + device->has_cached_non_coherent_memory = true; + instance->knl = &kgsl_knl_funcs; result = tu_physical_device_init(device, instance);