anv: Delete implicit CCS code

Stop allocating CCS at the end of some BOs. Anv no longer uses that
memory range.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jianxun Zhang <jianxun.zhang@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25003>
This commit is contained in:
Nanley Chery
2023-08-28 18:46:38 -04:00
committed by Marge Bot
parent 4cdd3178fb
commit 9e402e93d2
5 changed files with 12 additions and 85 deletions

View File

@@ -1342,7 +1342,7 @@ anv_bo_vma_free(struct anv_device *device, struct anv_bo *bo)
{
if (bo->offset != 0 && !bo->has_fixed_address) {
assert(bo->vma_heap != NULL);
anv_vma_free(device, bo->vma_heap, bo->offset, bo->size + bo->_ccs_size);
anv_vma_free(device, bo->vma_heap, bo->offset, bo->size);
}
bo->vma_heap = NULL;
}
@@ -1384,16 +1384,15 @@ anv_bo_vma_alloc_or_close(struct anv_device *device,
*
* Only available on ICL+.
*/
if (device->info->ver >= 11 && (bo->size + bo->_ccs_size) >= 1 * 1024 * 1024)
if (device->info->ver >= 11 && bo->size >= 1 * 1024 * 1024)
align = MAX2(2 * 1024 * 1024, align);
if (alloc_flags & ANV_BO_ALLOC_FIXED_ADDRESS) {
bo->has_fixed_address = true;
bo->offset = intel_canonical_address(explicit_address);
} else {
bo->offset = anv_vma_alloc(device, bo->size + bo->_ccs_size,
align, alloc_flags, explicit_address,
&bo->vma_heap);
bo->offset = anv_vma_alloc(device, bo->size, align, alloc_flags,
explicit_address, &bo->vma_heap);
if (bo->offset == 0) {
anv_bo_unmap_close(device, bo);
return vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
@@ -1412,27 +1411,12 @@ anv_device_alloc_bo(struct anv_device *device,
uint64_t explicit_address,
struct anv_bo **bo_out)
{
if (!device->physical->has_implicit_ccs)
assert(!(alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS));
const uint32_t bo_flags =
device->kmd_backend->bo_alloc_flags_to_bo_flags(device, alloc_flags);
/* The kernel is going to give us whole pages anyway. And we
* also need 4KB alignment for 1MB AUX buffer that follows
* the main region. The 4KB also covers 64KB AUX granularity
* that has 256B AUX mapping to the main.
*/
/* The kernel is going to give us whole pages anyway. */
size = align64(size, 4096);
uint64_t ccs_size = 0;
if (device->info->has_aux_map && (alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS)) {
uint64_t aux_ratio =
intel_aux_get_main_to_aux_ratio(device->aux_map_ctx);
/* See anv_bo::_ccs_size */
ccs_size = align64(DIV_ROUND_UP(size, aux_ratio), 4096);
}
const struct intel_memory_class_instance *regions[2];
uint32_t nregions = 0;
@@ -1462,8 +1446,7 @@ anv_device_alloc_bo(struct anv_device *device,
uint64_t actual_size;
uint32_t gem_handle = device->kmd_backend->gem_create(device, regions,
nregions,
size + ccs_size,
nregions, size,
alloc_flags,
&actual_size);
if (gem_handle == 0)
@@ -1475,14 +1458,11 @@ anv_device_alloc_bo(struct anv_device *device,
.refcount = 1,
.offset = -1,
.size = size,
._ccs_size = ccs_size,
.actual_size = actual_size,
.flags = bo_flags,
.is_external = (alloc_flags & ANV_BO_ALLOC_EXTERNAL),
.has_client_visible_address =
(alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0,
.has_implicit_ccs = ccs_size > 0 ||
(device->info->verx10 >= 125 && !(alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)),
.vram_only = nregions == 1 &&
regions[0] == device->physical->vram_non_mappable.region,
};
@@ -1566,9 +1546,6 @@ anv_device_import_bo_from_host_ptr(struct anv_device *device,
ANV_BO_ALLOC_DEDICATED |
ANV_BO_ALLOC_FIXED_ADDRESS)));
assert(!(alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS) ||
(device->physical->has_implicit_ccs && device->info->has_aux_map));
struct anv_bo_cache *cache = &device->bo_cache;
const uint32_t bo_flags =
device->kmd_backend->bo_alloc_flags_to_bo_flags(device, alloc_flags);
@@ -1670,9 +1647,6 @@ anv_device_import_bo(struct anv_device *device,
ANV_BO_ALLOC_SNOOPED |
ANV_BO_ALLOC_FIXED_ADDRESS)));
assert(!(alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS) ||
(device->physical->has_implicit_ccs && device->info->has_aux_map));
struct anv_bo_cache *cache = &device->bo_cache;
pthread_mutex_lock(&cache->mutex);
@@ -1731,7 +1705,6 @@ anv_device_import_bo(struct anv_device *device,
new_bo.size = size;
new_bo.actual_size = size;
assert(new_bo._ccs_size == 0);
VkResult result = anv_bo_vma_alloc_or_close(device, &new_bo,
alloc_flags,
client_address);

View File

@@ -1408,9 +1408,6 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
device->use_call_secondary =
!debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
device->has_implicit_ccs = device->info.has_aux_map ||
device->info.verx10 >= 125;
device->video_decode_enabled = debug_get_bool_option("ANV_VIDEO_DECODE", false);
device->uses_ex_bso = device->info.verx10 >= 125;
@@ -3971,10 +3968,6 @@ VkResult anv_AllocateMemory(
}
}
/* By default, we want all VkDeviceMemory objects to support CCS */
if (device->physical->has_implicit_ccs && device->info->has_aux_map)
alloc_flags |= ANV_BO_ALLOC_IMPLICIT_CCS;
/* If i915 reported a mappable/non_mappable vram regions and the
* application want lmem mappable, then we need to use the
* I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag to create our BO.

View File

@@ -386,8 +386,8 @@ enum anv_bo_alloc_flags {
/** Has an address which is visible to the client */
ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
/** This buffer has implicit CCS data attached to it */
ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
/** This BO will be dedicated to a buffer or an image */
ANV_BO_ALLOC_DEDICATED = (1 << 9),
/** This buffer is allocated from local memory and should be cpu visible */
ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 10),
@@ -400,9 +400,6 @@ enum anv_bo_alloc_flags {
/** For descriptor pools */
ANV_BO_ALLOC_DESCRIPTOR_POOL = (1 << 13),
/** This BO will be dedicated to a buffer or an image */
ANV_BO_ALLOC_DEDICATED = (1 << 14),
};
struct anv_bo {
@@ -434,7 +431,7 @@ struct anv_bo {
*/
uint64_t offset;
/** Size of the buffer not including implicit aux */
/** Size of the buffer */
uint64_t size;
/* Map for internally mapped BOs.
@@ -444,32 +441,8 @@ struct anv_bo {
*/
void *map;
/** Size of the implicit CCS range at the end of the buffer
*
* On Gfx12, CCS data is always a direct 1/256 scale-down. A single 64K
* page of main surface data maps to a 256B chunk of CCS data and that
* mapping is provided on TGL-LP by the AUX table which maps virtual memory
* addresses in the main surface to virtual memory addresses for CCS data.
*
* Because we can't change these maps around easily and because Vulkan
* allows two VkImages to be bound to overlapping memory regions (as long
* as the app is careful), it's not feasible to make this mapping part of
* the image. (On Gfx11 and earlier, the mapping was provided via
* RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.)
* Instead, we attach the CCS data directly to the buffer object and setup
* the AUX table mapping at BO creation time.
*
* This field is for internal tracking use by the BO allocator only and
* should not be touched by other parts of the code. If something wants to
* know if a BO has implicit CCS data, it should instead look at the
* has_implicit_ccs boolean below.
*
* This data is not included in maps of this buffer.
*/
uint32_t _ccs_size;
/* The actual size of bo allocated by kmd, basically:
* align(size + _ccs_size, mem_alignment)
* align(size, mem_alignment)
*/
uint64_t actual_size;
@@ -488,9 +461,6 @@ struct anv_bo {
/** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */
bool has_client_visible_address:1;
/** True if this BO has implicit CCS data attached to it */
bool has_implicit_ccs:1;
/** True if this BO can only live in VRAM */
bool vram_only:1;
};
@@ -908,13 +878,6 @@ struct anv_physical_device {
*/
bool has_reg_timestamp;
/** True if this device has implicit AUX
*
* If true, CCS is handled as an implicit attachment to the BO rather than
* as an explicitly bound surface.
*/
bool has_implicit_ccs;
/** True if we can create protected contexts. */
bool has_protected_contexts;

View File

@@ -422,7 +422,6 @@ transition_stencil_buffer(struct anv_cmd_buffer *cmd_buffer,
if ((initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) &&
cmd_buffer->device->physical->has_implicit_ccs &&
cmd_buffer->device->info->has_aux_map) {
/* If will_full_fast_clear is set, the caller promises to fast-clear the
* largest portion of the specified range as it can.

View File

@@ -636,12 +636,11 @@ anv_i915_debug_submit(const struct anv_execbuf *execbuf)
(float)total_vram_only_size_kb / 1024.0f);
for (uint32_t i = 0; i < execbuf->bo_count; i++) {
const struct anv_bo *bo = execbuf->bos[i];
uint64_t size = bo->size + bo->_ccs_size;
fprintf(stderr, " BO: addr=0x%016"PRIx64"-0x%016"PRIx64" size=%7"PRIu64
"KB handle=%05u capture=%u vram_only=%u name=%s\n",
bo->offset, bo->offset + size - 1, size / 1024, bo->gem_handle,
(bo->flags & EXEC_OBJECT_CAPTURE) != 0,
bo->offset, bo->offset + bo->size - 1, bo->size / 1024,
bo->gem_handle, (bo->flags & EXEC_OBJECT_CAPTURE) != 0,
bo->vram_only, bo->name);
}
}