From 492761ab8d6ad94660d8465a93f41e244664da6e Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 2 Sep 2022 21:53:18 +0300 Subject: [PATCH] anv: add a new NO_LOCAL_MEM allocation flag We found a perf regression with 9027c5df4c51b ("anv: remove the LOCAL_MEM allocation bit") which seems to be that we over subscribe local memory, leading i915 to swap things in/out too much. This change avoid putting buffers in local memory if they are not allocated from a DEVICE_LOCAL heap. Maybe we can revisit this later if i915 is better able to deal with more buffers in local memory. v2: Remove implicit_css from anv_bo when not in lmem (Ivan) Signed-off-by: Lionel Landwerlin Fixes: 9027c5df4c51b ("anv: remove the LOCAL_MEM allocation bit") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7188 Reviewed-by: Ivan Briano Part-of: --- src/intel/vulkan/anv_allocator.c | 13 +++++++++---- src/intel/vulkan/anv_device.c | 3 +++ src/intel/vulkan/anv_private.h | 3 +++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 80f96037427..4f637f4f424 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -1463,15 +1463,19 @@ anv_device_alloc_bo(struct anv_device *device, /* This always try to put the object in local memory. Here * vram_non_mappable & vram_mappable actually are the same region. */ - regions[nregions++] = device->physical->vram_non_mappable.region; + if (alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM) + regions[nregions++] = device->physical->sys.region; + else + regions[nregions++] = device->physical->vram_non_mappable.region; /* If the buffer is mapped on the host, add the system memory region. * This ensures that if the buffer cannot live in mappable local memory, * it can be spilled to system memory. */ uint32_t flags = 0; - if ((alloc_flags & ANV_BO_ALLOC_MAPPED) || - (alloc_flags & ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE)) { + if (!(alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM) && + ((alloc_flags & ANV_BO_ALLOC_MAPPED) || + (alloc_flags & ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE))) { regions[nregions++] = device->physical->sys.region; if (device->physical->vram_non_mappable.size > 0) flags |= I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS; @@ -1497,7 +1501,8 @@ anv_device_alloc_bo(struct anv_device *device, .is_external = (alloc_flags & ANV_BO_ALLOC_EXTERNAL), .has_client_visible_address = (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0, - .has_implicit_ccs = ccs_size > 0 || device->info->verx10 >= 125, + .has_implicit_ccs = ccs_size > 0 || + (device->info->verx10 >= 125 && !(alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)), }; if (alloc_flags & ANV_BO_ALLOC_MAPPED) { diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index db84230191e..87760eb9c52 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -3852,6 +3852,9 @@ VkResult anv_AllocateMemory( (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE; + if (!(mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) + alloc_flags |= ANV_BO_ALLOC_NO_LOCAL_MEM; + if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT) alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 8e6eff5b56e..deb0ce73a6d 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1237,6 +1237,9 @@ enum anv_bo_alloc_flags { /** This buffer is allocated from local memory and should be cpu visible */ ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 10), + + /** For non device local allocations */ + ANV_BO_ALLOC_NO_LOCAL_MEM = (1 << 11), }; VkResult anv_device_alloc_bo(struct anv_device *device,