anv: remove the LOCAL_MEM allocation bit

We always want to use local memory if possible, we'll just add the
system memory heap if the buffer needs to be host visible.

v2: Drop some usages of ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17873>
This commit is contained in:
Lionel Landwerlin
2022-08-02 16:17:31 +03:00
parent a254aff643
commit 9027c5df4c
4 changed files with 20 additions and 36 deletions

View File

@@ -509,7 +509,6 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
pool->name, pool->name,
new_bo_size, new_bo_size,
bo_alloc_flags | bo_alloc_flags |
ANV_BO_ALLOC_LOCAL_MEM |
ANV_BO_ALLOC_FIXED_ADDRESS | ANV_BO_ALLOC_FIXED_ADDRESS |
ANV_BO_ALLOC_MAPPED | ANV_BO_ALLOC_MAPPED |
ANV_BO_ALLOC_SNOOPED, ANV_BO_ALLOC_SNOOPED,
@@ -1383,7 +1382,6 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
VkResult result = anv_device_alloc_bo(pool->device, VkResult result = anv_device_alloc_bo(pool->device,
pool->name, pool->name,
pow2_size, pow2_size,
ANV_BO_ALLOC_LOCAL_MEM |
ANV_BO_ALLOC_MAPPED | ANV_BO_ALLOC_MAPPED |
ANV_BO_ALLOC_SNOOPED | ANV_BO_ALLOC_SNOOPED |
ANV_BO_ALLOC_CAPTURE, ANV_BO_ALLOC_CAPTURE,
@@ -1490,9 +1488,8 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
* *
* so nothing will ever touch the top page. * so nothing will ever touch the top page.
*/ */
enum anv_bo_alloc_flags alloc_flags = ANV_BO_ALLOC_LOCAL_MEM; const enum anv_bo_alloc_flags alloc_flags =
if (devinfo->verx10 < 125) devinfo->verx10 < 125 ? ANV_BO_ALLOC_32BIT_ADDRESS : 0;
alloc_flags |= ANV_BO_ALLOC_32BIT_ADDRESS;
VkResult result = anv_device_alloc_bo(device, "scratch", size, VkResult result = anv_device_alloc_bo(device, "scratch", size,
alloc_flags, alloc_flags,
0 /* explicit_address */, 0 /* explicit_address */,
@@ -1668,10 +1665,6 @@ anv_device_alloc_bo(struct anv_device *device,
uint64_t explicit_address, uint64_t explicit_address,
struct anv_bo **bo_out) struct anv_bo **bo_out)
{ {
if (!(alloc_flags & ANV_BO_ALLOC_LOCAL_MEM))
anv_perf_warn(VK_LOG_NO_OBJS(&device->physical->instance->vk.base),
"system memory used");
if (!device->physical->has_implicit_ccs) if (!device->physical->has_implicit_ccs)
assert(!(alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS)); assert(!(alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS));
@@ -1702,18 +1695,20 @@ anv_device_alloc_bo(struct anv_device *device,
struct drm_i915_gem_memory_class_instance regions[2]; struct drm_i915_gem_memory_class_instance regions[2];
uint32_t nregions = 0; uint32_t nregions = 0;
if (alloc_flags & ANV_BO_ALLOC_LOCAL_MEM) { /* This always try to put the object in local memory. Here
/* vram_non_mappable & vram_mappable actually are the same region. */ * vram_non_mappable & vram_mappable actually are the same region.
*/
regions[nregions++] = device->physical->vram_non_mappable.region; regions[nregions++] = device->physical->vram_non_mappable.region;
} else {
regions[nregions++] = device->physical->sys.region;
}
/* If the buffer is mapped on the host, add the system memory region.
* This ensures that if the buffer cannot live in mappable local memory,
* it can be spilled to system memory.
*/
uint32_t flags = 0; uint32_t flags = 0;
if (alloc_flags & ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE) { if ((alloc_flags & ANV_BO_ALLOC_MAPPED) ||
assert(alloc_flags & ANV_BO_ALLOC_LOCAL_MEM); (alloc_flags & ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE)) {
/* We're required to add smem as a region when using mappable vram. */
regions[nregions++] = device->physical->sys.region; regions[nregions++] = device->physical->sys.region;
if (device->physical->vram_non_mappable.size > 0)
flags |= I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS; flags |= I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS;
} }
@@ -1737,8 +1732,7 @@ anv_device_alloc_bo(struct anv_device *device,
.is_external = (alloc_flags & ANV_BO_ALLOC_EXTERNAL), .is_external = (alloc_flags & ANV_BO_ALLOC_EXTERNAL),
.has_client_visible_address = .has_client_visible_address =
(alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0, (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0,
.has_implicit_ccs = ccs_size > 0 || (device->info->verx10 >= 125 && .has_implicit_ccs = ccs_size > 0 || device->info->verx10 >= 125,
(alloc_flags & ANV_BO_ALLOC_LOCAL_MEM)),
}; };
if (alloc_flags & ANV_BO_ALLOC_MAPPED) { if (alloc_flags & ANV_BO_ALLOC_MAPPED) {

View File

@@ -329,7 +329,7 @@ anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer *cmd_buffer,
struct anv_bo *new_bo; struct anv_bo *new_bo;
VkResult result = anv_device_alloc_bo(device, "RT queries shadow", VkResult result = anv_device_alloc_bo(device, "RT queries shadow",
ray_shadow_size, ray_shadow_size,
ANV_BO_ALLOC_LOCAL_MEM, /* alloc_flags */ 0, /* alloc_flags */
0, /* explicit_address */ 0, /* explicit_address */
&new_bo); &new_bo);
if (result != VK_SUCCESS) { if (result != VK_SUCCESS) {
@@ -1087,7 +1087,7 @@ void anv_CmdSetRayTracingPipelineStackSizeKHR(
struct anv_bo *new_bo; struct anv_bo *new_bo;
VkResult result = anv_device_alloc_bo(device, "RT scratch", VkResult result = anv_device_alloc_bo(device, "RT scratch",
rt->scratch.layout.total_size, rt->scratch.layout.total_size,
ANV_BO_ALLOC_LOCAL_MEM, 0, /* alloc_flags */
0, /* explicit_address */ 0, /* explicit_address */
&new_bo); &new_bo);
if (result != VK_SUCCESS) { if (result != VK_SUCCESS) {

View File

@@ -3407,8 +3407,7 @@ VkResult anv_CreateDevice(
result = anv_device_alloc_bo(device, "workaround", 4096, result = anv_device_alloc_bo(device, "workaround", 4096,
ANV_BO_ALLOC_CAPTURE | ANV_BO_ALLOC_CAPTURE |
ANV_BO_ALLOC_MAPPED | ANV_BO_ALLOC_MAPPED,
ANV_BO_ALLOC_LOCAL_MEM,
0 /* explicit_address */, 0 /* explicit_address */,
&device->workaround_bo); &device->workaround_bo);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
@@ -3433,7 +3432,7 @@ VkResult anv_CreateDevice(
result = anv_device_alloc_bo(device, "ray queries", result = anv_device_alloc_bo(device, "ray queries",
ray_queries_size, ray_queries_size,
ANV_BO_ALLOC_LOCAL_MEM, 0,
0 /* explicit_address */, 0 /* explicit_address */,
&device->ray_query_bo); &device->ray_query_bo);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
@@ -3997,12 +3996,6 @@ VkResult anv_AllocateMemory(
goto success; goto success;
} }
/* Set ALLOC_LOCAL_MEM flag if heap has device local bit set and requested
* memory property flag has DEVICE_LOCAL_BIT set.
*/
if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM;
/* Regular allocate (not importing memory). */ /* Regular allocate (not importing memory). */
result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize, result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,

View File

@@ -1368,11 +1368,8 @@ enum anv_bo_alloc_flags {
/** This buffer has implicit CCS data attached to it */ /** This buffer has implicit CCS data attached to it */
ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9), ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
/** This buffer is allocated from local memory */
ANV_BO_ALLOC_LOCAL_MEM = (1 << 10),
/** This buffer is allocated from local memory and should be cpu visible */ /** This buffer is allocated from local memory and should be cpu visible */
ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 11), ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 10),
}; };
VkResult anv_device_alloc_bo(struct anv_device *device, VkResult anv_device_alloc_bo(struct anv_device *device,