anv: Use WC mapped local memory for block pool BO

Improve DG1 performance:

  Fallout: +7%
  Talos:  +15%

v2: Don't drop SNOOP (Lionel)

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18841>
This commit is contained in:
Mark Janes
2022-09-27 11:53:00 +03:00
committed by Marge Bot
parent 755b413ffa
commit 5c62ad34b6
2 changed files with 75 additions and 64 deletions

View File

@@ -385,6 +385,13 @@ anv_block_pool_init(struct anv_block_pool *pool,
pool->state.next = 0;
pool->state.end = 0;
pool->bo_alloc_flags =
ANV_BO_ALLOC_FIXED_ADDRESS |
ANV_BO_ALLOC_MAPPED |
ANV_BO_ALLOC_SNOOPED |
ANV_BO_ALLOC_CAPTURE |
(device->info->has_local_mem ? ANV_BO_ALLOC_WRITE_COMBINE : 0);
result = anv_block_pool_expand_range(pool, initial_size);
if (result != VK_SUCCESS)
return result;
@@ -438,17 +445,13 @@ anv_block_pool_expand_range(struct anv_block_pool *pool, uint32_t size)
* hard work for us. When using softpin, we're in control and the fixed
* addresses we choose are fine for base addresses.
*/
enum anv_bo_alloc_flags bo_alloc_flags = ANV_BO_ALLOC_CAPTURE;
uint32_t new_bo_size = size - pool->size;
struct anv_bo *new_bo = NULL;
VkResult result = anv_device_alloc_bo(pool->device,
pool->name,
new_bo_size,
bo_alloc_flags |
ANV_BO_ALLOC_FIXED_ADDRESS |
ANV_BO_ALLOC_MAPPED |
ANV_BO_ALLOC_SNOOPED,
pool->bo_alloc_flags,
pool->start_address + pool->size,
&new_bo);
if (result != VK_SUCCESS)
@@ -1102,6 +1105,12 @@ anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
{
pool->name = name;
pool->device = device;
pool->bo_alloc_flags =
ANV_BO_ALLOC_MAPPED |
ANV_BO_ALLOC_SNOOPED |
ANV_BO_ALLOC_CAPTURE |
(device->info->has_local_mem ? ANV_BO_ALLOC_WRITE_COMBINE : 0);
for (unsigned i = 0; i < ARRAY_SIZE(pool->free_list); i++) {
util_sparse_array_free_list_init(&pool->free_list[i],
&device->bo_cache.bo_map, 0,
@@ -1150,9 +1159,7 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
VkResult result = anv_device_alloc_bo(pool->device,
pool->name,
pow2_size,
ANV_BO_ALLOC_MAPPED |
ANV_BO_ALLOC_SNOOPED |
ANV_BO_ALLOC_CAPTURE,
pool->bo_alloc_flags,
0 /* explicit_address */,
&bo);
if (result != VK_SUCCESS)

View File

@@ -472,6 +472,62 @@ void __anv_perf_warn(struct anv_device *device,
#define anv_assert(x)
#endif
enum anv_bo_alloc_flags {
/** Specifies that the BO must have a 32-bit address
*
* This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
*/
ANV_BO_ALLOC_32BIT_ADDRESS = (1 << 0),
/** Specifies that the BO may be shared externally */
ANV_BO_ALLOC_EXTERNAL = (1 << 1),
/** Specifies that the BO should be mapped */
ANV_BO_ALLOC_MAPPED = (1 << 2),
/** Specifies that the BO should be snooped so we get coherency */
ANV_BO_ALLOC_SNOOPED = (1 << 3),
/** Specifies that the BO should be captured in error states */
ANV_BO_ALLOC_CAPTURE = (1 << 4),
/** Specifies that the BO will have an address assigned by the caller
*
* Such BOs do not exist in any VMA heap.
*/
ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
/** Enables implicit synchronization on the BO
*
* This is the opposite of EXEC_OBJECT_ASYNC.
*/
ANV_BO_ALLOC_IMPLICIT_SYNC = (1 << 6),
/** Enables implicit synchronization on the BO
*
* This is equivalent to EXEC_OBJECT_WRITE.
*/
ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
/** Has an address which is visible to the client */
ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
/** This buffer has implicit CCS data attached to it */
ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
/** This buffer is allocated from local memory and should be cpu visible */
ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 10),
/** For non device local allocations */
ANV_BO_ALLOC_NO_LOCAL_MEM = (1 << 11),
/** For local memory, ensure that the writes are combined.
*
* Should be faster for bo pools, which write but do not read
*/
ANV_BO_ALLOC_WRITE_COMBINE = (1 << 12),
};
struct anv_bo {
const char *name;
@@ -669,6 +725,8 @@ struct anv_block_pool {
uint32_t center_bo_offset;
struct anv_block_state state;
enum anv_bo_alloc_flags bo_alloc_flags;
};
/* Block pools are backed by a fixed-size 1GB memfd */
@@ -823,6 +881,8 @@ struct anv_bo_pool {
struct anv_device *device;
enum anv_bo_alloc_flags bo_alloc_flags;
struct util_sparse_array_free_list free_list[16];
};
@@ -1223,62 +1283,6 @@ anv_mocs(const struct anv_device *device,
void anv_device_init_blorp(struct anv_device *device);
void anv_device_finish_blorp(struct anv_device *device);
enum anv_bo_alloc_flags {
/** Specifies that the BO must have a 32-bit address
*
* This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
*/
ANV_BO_ALLOC_32BIT_ADDRESS = (1 << 0),
/** Specifies that the BO may be shared externally */
ANV_BO_ALLOC_EXTERNAL = (1 << 1),
/** Specifies that the BO should be mapped */
ANV_BO_ALLOC_MAPPED = (1 << 2),
/** Specifies that the BO should be snooped so we get coherency */
ANV_BO_ALLOC_SNOOPED = (1 << 3),
/** Specifies that the BO should be captured in error states */
ANV_BO_ALLOC_CAPTURE = (1 << 4),
/** Specifies that the BO will have an address assigned by the caller
*
* Such BOs do not exist in any VMA heap.
*/
ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
/** Enables implicit synchronization on the BO
*
* This is the opposite of EXEC_OBJECT_ASYNC.
*/
ANV_BO_ALLOC_IMPLICIT_SYNC = (1 << 6),
/** Enables implicit synchronization on the BO
*
* This is equivalent to EXEC_OBJECT_WRITE.
*/
ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
/** Has an address which is visible to the client */
ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
/** This buffer has implicit CCS data attached to it */
ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
/** This buffer is allocated from local memory and should be cpu visible */
ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 10),
/** For non device local allocations */
ANV_BO_ALLOC_NO_LOCAL_MEM = (1 << 11),
/** For local memory, ensure that the writes are combined.
*
* Should be faster for bo pools, which write but do not read
*/
ANV_BO_ALLOC_WRITE_COMBINE = (1 << 12),
};
VkResult anv_device_alloc_bo(struct anv_device *device,
const char *name, uint64_t size,
enum anv_bo_alloc_flags alloc_flags,