diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc
index d8acc58b2d9..4d503e57073 100644
--- a/src/freedreno/vulkan/tu_device.cc
+++ b/src/freedreno/vulkan/tu_device.cc
@@ -326,6 +326,29 @@ tu_physical_device_init(struct tu_physical_device *device,
       goto fail_free_name;
    }
 
+   device->memory.type_count = 1;
+   device->memory.types[0] =
+      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+      VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+      VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+
+   if (device->has_cached_coherent_memory) {
+      device->memory.types[device->memory.type_count] =
+         VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+         VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+      device->memory.type_count++;
+   }
+
+   if (device->has_cached_non_coherent_memory) {
+      device->memory.types[device->memory.type_count] =
+         VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+         VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+      device->memory.type_count++;
+   }
+
    if (device->has_set_iova) {
       mtx_init(&device->vma_mutex, mtx_plain);
       util_vma_heap_init(&device->vma, device->va_start,
@@ -1645,12 +1668,13 @@ tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
    props->memoryHeaps[0].size = physical_device->heap.size;
    props->memoryHeaps[0].flags = physical_device->heap.flags;
 
-   props->memoryTypeCount = 1;
-   props->memoryTypes[0].propertyFlags =
-      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-      VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-      VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
-   props->memoryTypes[0].heapIndex = 0;
+   props->memoryTypeCount = physical_device->memory.type_count;
+   for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
+      props->memoryTypes[i] = (VkMemoryType) {
+         .propertyFlags = physical_device->memory.types[i],
+         .heapIndex     = 0,
+      };
+   }
 
    vk_foreach_struct(ext, props2->pNext)
    {
@@ -2673,9 +2697,11 @@ tu_AllocateMemory(VkDevice _device,
       if (device->bo_sizes)
          snprintf(name, ARRAY_SIZE(name), "vkAllocateMemory(%ldkb)",
                   (long)DIV_ROUND_UP(pAllocateInfo->allocationSize, 1024));
+      VkMemoryPropertyFlags mem_property =
+         device->physical_device->memory.types[pAllocateInfo->memoryTypeIndex];
       result = tu_bo_init_new_explicit_iova(
          device, &mem->bo, pAllocateInfo->allocationSize, client_address,
-         alloc_flags, name);
+         mem_property, alloc_flags, name);
    }
 
    if (result == VK_SUCCESS) {
@@ -2761,30 +2787,14 @@ tu_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
    /* TODO: unmap here instead of waiting for FreeMemory */
 }
 
-VKAPI_ATTR VkResult VKAPI_CALL
-tu_FlushMappedMemoryRanges(VkDevice _device,
-                           uint32_t memoryRangeCount,
-                           const VkMappedMemoryRange *pMemoryRanges)
-{
-   return VK_SUCCESS;
-}
-
-VKAPI_ATTR VkResult VKAPI_CALL
-tu_InvalidateMappedMemoryRanges(VkDevice _device,
-                                uint32_t memoryRangeCount,
-                                const VkMappedMemoryRange *pMemoryRanges)
-{
-   return VK_SUCCESS;
-}
-
 static void
-tu_get_buffer_memory_requirements(uint64_t size,
+tu_get_buffer_memory_requirements(struct tu_device *dev, uint64_t size,
                                   VkMemoryRequirements2 *pMemoryRequirements)
 {
    pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
       .size = MAX2(align64(size, 64), size),
       .alignment = 64,
-      .memoryTypeBits = 1,
+      .memoryTypeBits = (1 << dev->physical_device->memory.type_count) - 1,
    };
 
    vk_foreach_struct(ext, pMemoryRequirements->pNext) {
@@ -2804,22 +2814,24 @@ tu_get_buffer_memory_requirements(uint64_t size,
 
 VKAPI_ATTR void VKAPI_CALL
 tu_GetBufferMemoryRequirements2(
-   VkDevice device,
+   VkDevice _device,
    const VkBufferMemoryRequirementsInfo2 *pInfo,
    VkMemoryRequirements2 *pMemoryRequirements)
 {
+   TU_FROM_HANDLE(tu_device, device, _device);
    TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer);
 
-   tu_get_buffer_memory_requirements(buffer->vk.size, pMemoryRequirements);
+   tu_get_buffer_memory_requirements(device, buffer->vk.size, pMemoryRequirements);
 }
 
 VKAPI_ATTR void VKAPI_CALL
 tu_GetDeviceBufferMemoryRequirements(
-   VkDevice device,
+   VkDevice _device,
    const VkDeviceBufferMemoryRequirements *pInfo,
    VkMemoryRequirements2 *pMemoryRequirements)
 {
-   tu_get_buffer_memory_requirements(pInfo->pCreateInfo->size, pMemoryRequirements);
+   TU_FROM_HANDLE(tu_device, device, _device);
+   tu_get_buffer_memory_requirements(device, pInfo->pCreateInfo->size, pMemoryRequirements);
 }
 
 VKAPI_ATTR void VKAPI_CALL
@@ -3296,8 +3308,10 @@ tu_GetMemoryFdPropertiesKHR(VkDevice _device,
                             int fd,
                             VkMemoryFdPropertiesKHR *pMemoryFdProperties)
 {
+   TU_FROM_HANDLE(tu_device, device, _device);
    assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
-   pMemoryFdProperties->memoryTypeBits = 1;
+   pMemoryFdProperties->memoryTypeBits =
+      (1 << device->physical_device->memory.type_count) - 1;
    return VK_SUCCESS;
 }
 
diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h
index 1cdf1209109..9dbc65a67a3 100644
--- a/src/freedreno/vulkan/tu_device.h
+++ b/src/freedreno/vulkan/tu_device.h
@@ -91,6 +91,14 @@ struct tu_physical_device
    uint64_t va_start;
    uint64_t va_size;
 
+   bool has_cached_coherent_memory;
+   bool has_cached_non_coherent_memory;
+
+   struct {
+      uint32_t type_count;
+      VkMemoryPropertyFlags types[VK_MAX_MEMORY_TYPES];
+   } memory;
+
    struct fd_dev_id dev_id;
    const struct fd_dev_info *info;
 
diff --git a/src/freedreno/vulkan/tu_image.cc b/src/freedreno/vulkan/tu_image.cc
index 093f320d6e9..ca3cc60f7c5 100644
--- a/src/freedreno/vulkan/tu_image.cc
+++ b/src/freedreno/vulkan/tu_image.cc
@@ -752,13 +752,13 @@ tu_DestroyImage(VkDevice _device,
 }
 
 static void
-tu_get_image_memory_requirements(struct tu_image *image,
+tu_get_image_memory_requirements(struct tu_device *dev, struct tu_image *image,
                                  VkMemoryRequirements2 *pMemoryRequirements)
 {
    pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
       .size = image->total_size,
       .alignment = image->layout[0].base_align,
-      .memoryTypeBits = 1,
+      .memoryTypeBits = (1 << dev->physical_device->memory.type_count) - 1,
    };
 
    vk_foreach_struct(ext, pMemoryRequirements->pNext) {
@@ -778,13 +778,14 @@ tu_get_image_memory_requirements(struct tu_image *image,
 }
 
 VKAPI_ATTR void VKAPI_CALL
-tu_GetImageMemoryRequirements2(VkDevice device,
+tu_GetImageMemoryRequirements2(VkDevice _device,
                                const VkImageMemoryRequirementsInfo2 *pInfo,
                                VkMemoryRequirements2 *pMemoryRequirements)
 {
+   TU_FROM_HANDLE(tu_device, device, _device);
    TU_FROM_HANDLE(tu_image, image, pInfo->image);
 
-   tu_get_image_memory_requirements(image, pMemoryRequirements);
+   tu_get_image_memory_requirements(device, image, pMemoryRequirements);
 }
 
 VKAPI_ATTR void VKAPI_CALL
@@ -810,7 +811,7 @@ tu_GetDeviceImageMemoryRequirements(
    tu_image_init(device, &image, pInfo->pCreateInfo, DRM_FORMAT_MOD_INVALID,
                  NULL);
 
-   tu_get_image_memory_requirements(&image, pMemoryRequirements);
+   tu_get_image_memory_requirements(device, &image, pMemoryRequirements);
 }
 
 VKAPI_ATTR void VKAPI_CALL
diff --git a/src/freedreno/vulkan/tu_knl.cc b/src/freedreno/vulkan/tu_knl.cc
index c953431bb14..549acefc9a7 100644
--- a/src/freedreno/vulkan/tu_knl.cc
+++ b/src/freedreno/vulkan/tu_knl.cc
@@ -27,9 +27,10 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev,
                              struct tu_bo **out_bo,
                              uint64_t size,
                              uint64_t client_iova,
+                             VkMemoryPropertyFlags mem_property,
                              enum tu_bo_alloc_flags flags, const char *name)
 {
-   return dev->instance->knl->bo_init(dev, out_bo, size, client_iova, flags, name);
+   return dev->instance->knl->bo_init(dev, out_bo, size, client_iova, mem_property, flags, name);
 }
 
 VkResult
diff --git a/src/freedreno/vulkan/tu_knl.h b/src/freedreno/vulkan/tu_knl.h
index ede292c744a..41a2bf80996 100644
--- a/src/freedreno/vulkan/tu_knl.h
+++ b/src/freedreno/vulkan/tu_knl.h
@@ -60,7 +60,8 @@ struct tu_knl {
    int (*submitqueue_new)(const struct tu_device *dev, int priority, uint32_t *queue_id);
    void (*submitqueue_close)(const struct tu_device *dev, uint32_t queue_id);
    VkResult (*bo_init)(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
-                       uint64_t client_iova, enum tu_bo_alloc_flags flags, const char *name);
+                       uint64_t client_iova, VkMemoryPropertyFlags mem_property,
+                       enum tu_bo_alloc_flags flags, const char *name);
    VkResult (*bo_init_dmabuf)(struct tu_device *dev, struct tu_bo **out_bo,
                               uint64_t size, int prime_fd);
    int (*bo_export_dmabuf)(struct tu_device *dev, struct tu_bo *bo);
@@ -87,13 +88,20 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev,
                              struct tu_bo **out_bo,
                              uint64_t size,
                              uint64_t client_iova,
-                             enum tu_bo_alloc_flags flags, const char *name);
+                             VkMemoryPropertyFlags mem_property,
+                             enum tu_bo_alloc_flags flags,
+                             const char *name);
 
 static inline VkResult
 tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
                enum tu_bo_alloc_flags flags, const char *name)
 {
-   return tu_bo_init_new_explicit_iova(dev, out_bo, size, 0, flags, name);
+   return tu_bo_init_new_explicit_iova(
+      dev, out_bo, size, 0,
+      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+      flags, name);
 }
 
 VkResult
diff --git a/src/freedreno/vulkan/tu_knl_drm_msm.cc b/src/freedreno/vulkan/tu_knl_drm_msm.cc
index c658592d348..f68341e7994 100644
--- a/src/freedreno/vulkan/tu_knl_drm_msm.cc
+++ b/src/freedreno/vulkan/tu_knl_drm_msm.cc
@@ -133,6 +133,25 @@ tu_drm_get_priorities(const struct tu_physical_device *dev)
    return val;
 }
 
+static bool
+tu_drm_is_memory_type_supported(int fd, uint32_t flags)
+{
+   struct drm_msm_gem_new req_alloc = { .size = 0x1000, .flags = flags };
+
+   int ret =
+      drmCommandWriteRead(fd, DRM_MSM_GEM_NEW, &req_alloc, sizeof(req_alloc));
+   if (ret) {
+      return false;
+   }
+
+   struct drm_gem_close req_close = {
+      .handle = req_alloc.handle,
+   };
+   drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &req_close);
+
+   return true;
+}
+
 static int
 msm_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
 {
@@ -387,17 +406,21 @@ msm_bo_init(struct tu_device *dev,
             struct tu_bo **out_bo,
             uint64_t size,
             uint64_t client_iova,
+            VkMemoryPropertyFlags mem_property,
             enum tu_bo_alloc_flags flags,
             const char *name)
 {
-   /* TODO: Choose better flags. As of 2018-11-12, freedreno/drm/msm_bo.c
-    * always sets `flags = MSM_BO_WC`, and we copy that behavior here.
-    */
    struct drm_msm_gem_new req = {
       .size = size,
-      .flags = MSM_BO_WC
+      .flags = 0
    };
 
+   if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
+      req.flags |= MSM_BO_CACHED_COHERENT;
+   } else {
+      req.flags |= MSM_BO_WC;
+   }
+
    if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
       req.flags |= MSM_BO_GPU_READONLY;
 
@@ -559,6 +582,22 @@ msm_bo_finish(struct tu_device *dev, struct tu_bo *bo)
    u_rwlock_rdunlock(&dev->dma_bo_lock);
 }
 
+VkResult
+tu_FlushMappedMemoryRanges(VkDevice _device,
+                           uint32_t memoryRangeCount,
+                           const VkMappedMemoryRange *pMemoryRanges)
+{
+   return VK_SUCCESS;
+}
+
+VkResult
+tu_InvalidateMappedMemoryRanges(VkDevice _device,
+                                uint32_t memoryRangeCount,
+                                const VkMappedMemoryRange *pMemoryRanges)
+{
+   return VK_SUCCESS;
+}
+
 extern const struct vk_sync_type tu_timeline_sync_type;
 
 static inline bool
@@ -1252,6 +1291,12 @@ tu_knl_drm_msm_load(struct tu_instance *instance,
     */
    device->has_set_iova = false;
 
+   /* Even if kernel is new enough, the GPU itself may not support it. */
+   device->has_cached_coherent_memory =
+      (device->msm_minor_version >= 8) &&
+      tu_drm_is_memory_type_supported(fd, MSM_BO_CACHED_COHERENT);
+   device->has_cached_non_coherent_memory = false;
+
    ret = tu_drm_get_param(device, MSM_PARAM_FAULTS, &device->fault_count);
    if (ret != 0) {
       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
diff --git a/src/freedreno/vulkan/tu_knl_kgsl.cc b/src/freedreno/vulkan/tu_knl_kgsl.cc
index 85e9f3d281d..796215f1f07 100644
--- a/src/freedreno/vulkan/tu_knl_kgsl.cc
+++ b/src/freedreno/vulkan/tu_knl_kgsl.cc
@@ -72,6 +72,7 @@ kgsl_bo_init(struct tu_device *dev,
              struct tu_bo **out_bo,
              uint64_t size,
              uint64_t client_iova,
+             VkMemoryPropertyFlags mem_property,
              enum tu_bo_alloc_flags flags,
              const char *name)
 {
@@ -81,6 +82,16 @@ kgsl_bo_init(struct tu_device *dev,
       .size = size,
    };
 
+   if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
+      if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) {
+         req.flags |= KGSL_MEMFLAGS_IOCOHERENT;
+      }
+
+      req.flags |= KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT;
+   } else {
+      req.flags |= KGSL_CACHEMODE_WRITECOMBINE << KGSL_CACHEMODE_SHIFT;
+   }
+
    if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
       req.flags |= KGSL_MEMFLAGS_GPUREADONLY;
 
@@ -209,6 +220,66 @@ kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo)
    safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req);
 }
 
+static VkResult
+kgsl_sync_cache(VkDevice _device,
+                uint32_t op,
+                uint32_t count,
+                const VkMappedMemoryRange *ranges)
+{
+   TU_FROM_HANDLE(tu_device, device, _device);
+
+   struct kgsl_gpuobj_sync_obj *sync_list =
+      (struct kgsl_gpuobj_sync_obj *) vk_zalloc(
+         &device->vk.alloc, sizeof(*sync_list), 8,
+         VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+
+   struct kgsl_gpuobj_sync gpuobj_sync = {
+      .objs = (uintptr_t) sync_list,
+      .obj_len = sizeof(*sync_list),
+      .count = count,
+   };
+
+   for (uint32_t i = 0; i < count; i++) {
+      TU_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory);
+
+      sync_list[i].op = op;
+      sync_list[i].id = mem->bo->gem_handle;
+      sync_list[i].offset = ranges[i].offset;
+      sync_list[i].length = ranges[i].size == VK_WHOLE_SIZE
+                               ? (mem->bo->size - ranges[i].offset)
+                               : ranges[i].size;
+   }
+
+   /* There are two other KGSL ioctls for flushing/invalidation:
+    * - IOCTL_KGSL_GPUMEM_SYNC_CACHE - processes one memory range at a time;
+    * - IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK - processes several buffers but
+    *   not way to specify ranges.
+    *
+    * While IOCTL_KGSL_GPUOBJ_SYNC exactly maps to VK function.
+    */
+   safe_ioctl(device->fd, IOCTL_KGSL_GPUOBJ_SYNC, &gpuobj_sync);
+
+   vk_free(&device->vk.alloc, sync_list);
+
+   return VK_SUCCESS;
+}
+
+VkResult
+tu_FlushMappedMemoryRanges(VkDevice device,
+                           uint32_t count,
+                           const VkMappedMemoryRange *ranges)
+{
+   return kgsl_sync_cache(device, KGSL_GPUMEM_CACHE_TO_GPU, count, ranges);
+}
+
+VkResult
+tu_InvalidateMappedMemoryRanges(VkDevice device,
+                                uint32_t count,
+                                const VkMappedMemoryRange *ranges)
+{
+   return kgsl_sync_cache(device, KGSL_GPUMEM_CACHE_FROM_GPU, count, ranges);
+}
+
 static VkResult
 get_kgsl_prop(int fd, unsigned int type, void *value, size_t size)
 {
@@ -223,6 +294,26 @@ get_kgsl_prop(int fd, unsigned int type, void *value, size_t size)
              : VK_SUCCESS;
 }
 
+static bool
+kgsl_is_memory_type_supported(int fd, uint32_t flags)
+{
+   struct kgsl_gpumem_alloc_id req_alloc = {
+      .flags = flags,
+      .size = 0x1000,
+   };
+
+   int ret = safe_ioctl(fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req_alloc);
+   if (ret) {
+      return false;
+   }
+
+   struct kgsl_gpumem_free_id req_free = { .id = req_alloc.id };
+
+   safe_ioctl(fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req_free);
+
+   return true;
+}
+
 enum kgsl_syncobj_state {
    KGSL_SYNCOBJ_STATE_UNSIGNALED,
    KGSL_SYNCOBJ_STATE_SIGNALED,
@@ -1169,6 +1260,12 @@ tu_knl_kgsl_load(struct tu_instance *instance, int fd)
    device->heap.used = 0u;
    device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
 
+   /* Even if kernel is new enough, the GPU itself may not support it. */
+   device->has_cached_coherent_memory = kgsl_is_memory_type_supported(
+      fd, KGSL_MEMFLAGS_IOCOHERENT |
+             (KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT));
+   device->has_cached_non_coherent_memory = true;
+
    instance->knl = &kgsl_knl_funcs;
 
    result = tu_physical_device_init(device, instance);