turnip: add cached and cached-coherent memory types

vkd3d requires cached memory type.

MSM backend doesn't have a special ioctl for memory
flushing/invalidation, we'd have to use cvac and civac
arm assembly instructions (would be done in following commit).

KGSL has an the ioctl for this, which is used in this commit.

Note, CTS tests doesn't seem good at testing flushing and
invalidating, the ones I found passed on KGSL with both
functions being no-op.

Based on the old patch from Jonathan Marek.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7636

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20550>
This commit is contained in:
Danylo Piliaiev
2023-01-06 18:59:36 +01:00
committed by Marge Bot
parent bd816084c6
commit 5a59410962
7 changed files with 217 additions and 43 deletions

View File

@@ -326,6 +326,29 @@ tu_physical_device_init(struct tu_physical_device *device,
goto fail_free_name;
}
device->memory.type_count = 1;
device->memory.types[0] =
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
if (device->has_cached_coherent_memory) {
device->memory.types[device->memory.type_count] =
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
device->memory.type_count++;
}
if (device->has_cached_non_coherent_memory) {
device->memory.types[device->memory.type_count] =
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
device->memory.type_count++;
}
if (device->has_set_iova) {
mtx_init(&device->vma_mutex, mtx_plain);
util_vma_heap_init(&device->vma, device->va_start,
@@ -1645,12 +1668,13 @@ tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
props->memoryHeaps[0].size = physical_device->heap.size;
props->memoryHeaps[0].flags = physical_device->heap.flags;
props->memoryTypeCount = 1;
props->memoryTypes[0].propertyFlags =
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
props->memoryTypes[0].heapIndex = 0;
props->memoryTypeCount = physical_device->memory.type_count;
for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
props->memoryTypes[i] = (VkMemoryType) {
.propertyFlags = physical_device->memory.types[i],
.heapIndex = 0,
};
}
vk_foreach_struct(ext, props2->pNext)
{
@@ -2673,9 +2697,11 @@ tu_AllocateMemory(VkDevice _device,
if (device->bo_sizes)
snprintf(name, ARRAY_SIZE(name), "vkAllocateMemory(%ldkb)",
(long)DIV_ROUND_UP(pAllocateInfo->allocationSize, 1024));
VkMemoryPropertyFlags mem_property =
device->physical_device->memory.types[pAllocateInfo->memoryTypeIndex];
result = tu_bo_init_new_explicit_iova(
device, &mem->bo, pAllocateInfo->allocationSize, client_address,
alloc_flags, name);
mem_property, alloc_flags, name);
}
if (result == VK_SUCCESS) {
@@ -2761,30 +2787,14 @@ tu_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
/* TODO: unmap here instead of waiting for FreeMemory */
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_FlushMappedMemoryRanges(VkDevice _device,
uint32_t memoryRangeCount,
const VkMappedMemoryRange *pMemoryRanges)
{
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_InvalidateMappedMemoryRanges(VkDevice _device,
uint32_t memoryRangeCount,
const VkMappedMemoryRange *pMemoryRanges)
{
return VK_SUCCESS;
}
static void
tu_get_buffer_memory_requirements(uint64_t size,
tu_get_buffer_memory_requirements(struct tu_device *dev, uint64_t size,
VkMemoryRequirements2 *pMemoryRequirements)
{
pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
.size = MAX2(align64(size, 64), size),
.alignment = 64,
.memoryTypeBits = 1,
.memoryTypeBits = (1 << dev->physical_device->memory.type_count) - 1,
};
vk_foreach_struct(ext, pMemoryRequirements->pNext) {
@@ -2804,22 +2814,24 @@ tu_get_buffer_memory_requirements(uint64_t size,
VKAPI_ATTR void VKAPI_CALL
tu_GetBufferMemoryRequirements2(
VkDevice device,
VkDevice _device,
const VkBufferMemoryRequirementsInfo2 *pInfo,
VkMemoryRequirements2 *pMemoryRequirements)
{
TU_FROM_HANDLE(tu_device, device, _device);
TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer);
tu_get_buffer_memory_requirements(buffer->vk.size, pMemoryRequirements);
tu_get_buffer_memory_requirements(device, buffer->vk.size, pMemoryRequirements);
}
VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceBufferMemoryRequirements(
VkDevice device,
VkDevice _device,
const VkDeviceBufferMemoryRequirements *pInfo,
VkMemoryRequirements2 *pMemoryRequirements)
{
tu_get_buffer_memory_requirements(pInfo->pCreateInfo->size, pMemoryRequirements);
TU_FROM_HANDLE(tu_device, device, _device);
tu_get_buffer_memory_requirements(device, pInfo->pCreateInfo->size, pMemoryRequirements);
}
VKAPI_ATTR void VKAPI_CALL
@@ -3296,8 +3308,10 @@ tu_GetMemoryFdPropertiesKHR(VkDevice _device,
int fd,
VkMemoryFdPropertiesKHR *pMemoryFdProperties)
{
TU_FROM_HANDLE(tu_device, device, _device);
assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
pMemoryFdProperties->memoryTypeBits = 1;
pMemoryFdProperties->memoryTypeBits =
(1 << device->physical_device->memory.type_count) - 1;
return VK_SUCCESS;
}

View File

@@ -91,6 +91,14 @@ struct tu_physical_device
uint64_t va_start;
uint64_t va_size;
bool has_cached_coherent_memory;
bool has_cached_non_coherent_memory;
struct {
uint32_t type_count;
VkMemoryPropertyFlags types[VK_MAX_MEMORY_TYPES];
} memory;
struct fd_dev_id dev_id;
const struct fd_dev_info *info;

View File

@@ -752,13 +752,13 @@ tu_DestroyImage(VkDevice _device,
}
static void
tu_get_image_memory_requirements(struct tu_image *image,
tu_get_image_memory_requirements(struct tu_device *dev, struct tu_image *image,
VkMemoryRequirements2 *pMemoryRequirements)
{
pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
.size = image->total_size,
.alignment = image->layout[0].base_align,
.memoryTypeBits = 1,
.memoryTypeBits = (1 << dev->physical_device->memory.type_count) - 1,
};
vk_foreach_struct(ext, pMemoryRequirements->pNext) {
@@ -778,13 +778,14 @@ tu_get_image_memory_requirements(struct tu_image *image,
}
VKAPI_ATTR void VKAPI_CALL
tu_GetImageMemoryRequirements2(VkDevice device,
tu_GetImageMemoryRequirements2(VkDevice _device,
const VkImageMemoryRequirementsInfo2 *pInfo,
VkMemoryRequirements2 *pMemoryRequirements)
{
TU_FROM_HANDLE(tu_device, device, _device);
TU_FROM_HANDLE(tu_image, image, pInfo->image);
tu_get_image_memory_requirements(image, pMemoryRequirements);
tu_get_image_memory_requirements(device, image, pMemoryRequirements);
}
VKAPI_ATTR void VKAPI_CALL
@@ -810,7 +811,7 @@ tu_GetDeviceImageMemoryRequirements(
tu_image_init(device, &image, pInfo->pCreateInfo, DRM_FORMAT_MOD_INVALID,
NULL);
tu_get_image_memory_requirements(&image, pMemoryRequirements);
tu_get_image_memory_requirements(device, &image, pMemoryRequirements);
}
VKAPI_ATTR void VKAPI_CALL

View File

@@ -27,9 +27,10 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev,
struct tu_bo **out_bo,
uint64_t size,
uint64_t client_iova,
VkMemoryPropertyFlags mem_property,
enum tu_bo_alloc_flags flags, const char *name)
{
return dev->instance->knl->bo_init(dev, out_bo, size, client_iova, flags, name);
return dev->instance->knl->bo_init(dev, out_bo, size, client_iova, mem_property, flags, name);
}
VkResult

View File

@@ -60,7 +60,8 @@ struct tu_knl {
int (*submitqueue_new)(const struct tu_device *dev, int priority, uint32_t *queue_id);
void (*submitqueue_close)(const struct tu_device *dev, uint32_t queue_id);
VkResult (*bo_init)(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
uint64_t client_iova, enum tu_bo_alloc_flags flags, const char *name);
uint64_t client_iova, VkMemoryPropertyFlags mem_property,
enum tu_bo_alloc_flags flags, const char *name);
VkResult (*bo_init_dmabuf)(struct tu_device *dev, struct tu_bo **out_bo,
uint64_t size, int prime_fd);
int (*bo_export_dmabuf)(struct tu_device *dev, struct tu_bo *bo);
@@ -87,13 +88,20 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev,
struct tu_bo **out_bo,
uint64_t size,
uint64_t client_iova,
enum tu_bo_alloc_flags flags, const char *name);
VkMemoryPropertyFlags mem_property,
enum tu_bo_alloc_flags flags,
const char *name);
static inline VkResult
tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
enum tu_bo_alloc_flags flags, const char *name)
{
return tu_bo_init_new_explicit_iova(dev, out_bo, size, 0, flags, name);
return tu_bo_init_new_explicit_iova(
dev, out_bo, size, 0,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
flags, name);
}
VkResult

View File

@@ -133,6 +133,25 @@ tu_drm_get_priorities(const struct tu_physical_device *dev)
return val;
}
static bool
tu_drm_is_memory_type_supported(int fd, uint32_t flags)
{
struct drm_msm_gem_new req_alloc = { .size = 0x1000, .flags = flags };
int ret =
drmCommandWriteRead(fd, DRM_MSM_GEM_NEW, &req_alloc, sizeof(req_alloc));
if (ret) {
return false;
}
struct drm_gem_close req_close = {
.handle = req_alloc.handle,
};
drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &req_close);
return true;
}
static int
msm_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
{
@@ -387,17 +406,21 @@ msm_bo_init(struct tu_device *dev,
struct tu_bo **out_bo,
uint64_t size,
uint64_t client_iova,
VkMemoryPropertyFlags mem_property,
enum tu_bo_alloc_flags flags,
const char *name)
{
/* TODO: Choose better flags. As of 2018-11-12, freedreno/drm/msm_bo.c
* always sets `flags = MSM_BO_WC`, and we copy that behavior here.
*/
struct drm_msm_gem_new req = {
.size = size,
.flags = MSM_BO_WC
.flags = 0
};
if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
req.flags |= MSM_BO_CACHED_COHERENT;
} else {
req.flags |= MSM_BO_WC;
}
if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
req.flags |= MSM_BO_GPU_READONLY;
@@ -559,6 +582,22 @@ msm_bo_finish(struct tu_device *dev, struct tu_bo *bo)
u_rwlock_rdunlock(&dev->dma_bo_lock);
}
VkResult
tu_FlushMappedMemoryRanges(VkDevice _device,
uint32_t memoryRangeCount,
const VkMappedMemoryRange *pMemoryRanges)
{
return VK_SUCCESS;
}
VkResult
tu_InvalidateMappedMemoryRanges(VkDevice _device,
uint32_t memoryRangeCount,
const VkMappedMemoryRange *pMemoryRanges)
{
return VK_SUCCESS;
}
extern const struct vk_sync_type tu_timeline_sync_type;
static inline bool
@@ -1252,6 +1291,12 @@ tu_knl_drm_msm_load(struct tu_instance *instance,
*/
device->has_set_iova = false;
/* Even if kernel is new enough, the GPU itself may not support it. */
device->has_cached_coherent_memory =
(device->msm_minor_version >= 8) &&
tu_drm_is_memory_type_supported(fd, MSM_BO_CACHED_COHERENT);
device->has_cached_non_coherent_memory = false;
ret = tu_drm_get_param(device, MSM_PARAM_FAULTS, &device->fault_count);
if (ret != 0) {
result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,

View File

@@ -72,6 +72,7 @@ kgsl_bo_init(struct tu_device *dev,
struct tu_bo **out_bo,
uint64_t size,
uint64_t client_iova,
VkMemoryPropertyFlags mem_property,
enum tu_bo_alloc_flags flags,
const char *name)
{
@@ -81,6 +82,16 @@ kgsl_bo_init(struct tu_device *dev,
.size = size,
};
if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) {
req.flags |= KGSL_MEMFLAGS_IOCOHERENT;
}
req.flags |= KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT;
} else {
req.flags |= KGSL_CACHEMODE_WRITECOMBINE << KGSL_CACHEMODE_SHIFT;
}
if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
req.flags |= KGSL_MEMFLAGS_GPUREADONLY;
@@ -209,6 +220,66 @@ kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo)
safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req);
}
static VkResult
kgsl_sync_cache(VkDevice _device,
uint32_t op,
uint32_t count,
const VkMappedMemoryRange *ranges)
{
TU_FROM_HANDLE(tu_device, device, _device);
struct kgsl_gpuobj_sync_obj *sync_list =
(struct kgsl_gpuobj_sync_obj *) vk_zalloc(
&device->vk.alloc, sizeof(*sync_list), 8,
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
struct kgsl_gpuobj_sync gpuobj_sync = {
.objs = (uintptr_t) sync_list,
.obj_len = sizeof(*sync_list),
.count = count,
};
for (uint32_t i = 0; i < count; i++) {
TU_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory);
sync_list[i].op = op;
sync_list[i].id = mem->bo->gem_handle;
sync_list[i].offset = ranges[i].offset;
sync_list[i].length = ranges[i].size == VK_WHOLE_SIZE
? (mem->bo->size - ranges[i].offset)
: ranges[i].size;
}
/* There are two other KGSL ioctls for flushing/invalidation:
* - IOCTL_KGSL_GPUMEM_SYNC_CACHE - processes one memory range at a time;
* - IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK - processes several buffers but
* not way to specify ranges.
*
* While IOCTL_KGSL_GPUOBJ_SYNC exactly maps to VK function.
*/
safe_ioctl(device->fd, IOCTL_KGSL_GPUOBJ_SYNC, &gpuobj_sync);
vk_free(&device->vk.alloc, sync_list);
return VK_SUCCESS;
}
VkResult
tu_FlushMappedMemoryRanges(VkDevice device,
uint32_t count,
const VkMappedMemoryRange *ranges)
{
return kgsl_sync_cache(device, KGSL_GPUMEM_CACHE_TO_GPU, count, ranges);
}
VkResult
tu_InvalidateMappedMemoryRanges(VkDevice device,
uint32_t count,
const VkMappedMemoryRange *ranges)
{
return kgsl_sync_cache(device, KGSL_GPUMEM_CACHE_FROM_GPU, count, ranges);
}
static VkResult
get_kgsl_prop(int fd, unsigned int type, void *value, size_t size)
{
@@ -223,6 +294,26 @@ get_kgsl_prop(int fd, unsigned int type, void *value, size_t size)
: VK_SUCCESS;
}
static bool
kgsl_is_memory_type_supported(int fd, uint32_t flags)
{
struct kgsl_gpumem_alloc_id req_alloc = {
.flags = flags,
.size = 0x1000,
};
int ret = safe_ioctl(fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req_alloc);
if (ret) {
return false;
}
struct kgsl_gpumem_free_id req_free = { .id = req_alloc.id };
safe_ioctl(fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req_free);
return true;
}
enum kgsl_syncobj_state {
KGSL_SYNCOBJ_STATE_UNSIGNALED,
KGSL_SYNCOBJ_STATE_SIGNALED,
@@ -1169,6 +1260,12 @@ tu_knl_kgsl_load(struct tu_instance *instance, int fd)
device->heap.used = 0u;
device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
/* Even if kernel is new enough, the GPU itself may not support it. */
device->has_cached_coherent_memory = kgsl_is_memory_type_supported(
fd, KGSL_MEMFLAGS_IOCOHERENT |
(KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT));
device->has_cached_non_coherent_memory = true;
instance->knl = &kgsl_knl_funcs;
result = tu_physical_device_init(device, instance);