anv: Use WC mapped local memory for block pool BO
Improve DG1 performance: Fallout: +7% Talos: +15% v2: Don't drop SNOOP (Lionel) Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18841>
This commit is contained in:
@@ -385,6 +385,13 @@ anv_block_pool_init(struct anv_block_pool *pool,
|
|||||||
pool->state.next = 0;
|
pool->state.next = 0;
|
||||||
pool->state.end = 0;
|
pool->state.end = 0;
|
||||||
|
|
||||||
|
pool->bo_alloc_flags =
|
||||||
|
ANV_BO_ALLOC_FIXED_ADDRESS |
|
||||||
|
ANV_BO_ALLOC_MAPPED |
|
||||||
|
ANV_BO_ALLOC_SNOOPED |
|
||||||
|
ANV_BO_ALLOC_CAPTURE |
|
||||||
|
(device->info->has_local_mem ? ANV_BO_ALLOC_WRITE_COMBINE : 0);
|
||||||
|
|
||||||
result = anv_block_pool_expand_range(pool, initial_size);
|
result = anv_block_pool_expand_range(pool, initial_size);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
return result;
|
return result;
|
||||||
@@ -438,17 +445,13 @@ anv_block_pool_expand_range(struct anv_block_pool *pool, uint32_t size)
|
|||||||
* hard work for us. When using softpin, we're in control and the fixed
|
* hard work for us. When using softpin, we're in control and the fixed
|
||||||
* addresses we choose are fine for base addresses.
|
* addresses we choose are fine for base addresses.
|
||||||
*/
|
*/
|
||||||
enum anv_bo_alloc_flags bo_alloc_flags = ANV_BO_ALLOC_CAPTURE;
|
|
||||||
|
|
||||||
uint32_t new_bo_size = size - pool->size;
|
uint32_t new_bo_size = size - pool->size;
|
||||||
struct anv_bo *new_bo = NULL;
|
struct anv_bo *new_bo = NULL;
|
||||||
VkResult result = anv_device_alloc_bo(pool->device,
|
VkResult result = anv_device_alloc_bo(pool->device,
|
||||||
pool->name,
|
pool->name,
|
||||||
new_bo_size,
|
new_bo_size,
|
||||||
bo_alloc_flags |
|
pool->bo_alloc_flags,
|
||||||
ANV_BO_ALLOC_FIXED_ADDRESS |
|
|
||||||
ANV_BO_ALLOC_MAPPED |
|
|
||||||
ANV_BO_ALLOC_SNOOPED,
|
|
||||||
pool->start_address + pool->size,
|
pool->start_address + pool->size,
|
||||||
&new_bo);
|
&new_bo);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
@@ -1102,6 +1105,12 @@ anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
|
|||||||
{
|
{
|
||||||
pool->name = name;
|
pool->name = name;
|
||||||
pool->device = device;
|
pool->device = device;
|
||||||
|
pool->bo_alloc_flags =
|
||||||
|
ANV_BO_ALLOC_MAPPED |
|
||||||
|
ANV_BO_ALLOC_SNOOPED |
|
||||||
|
ANV_BO_ALLOC_CAPTURE |
|
||||||
|
(device->info->has_local_mem ? ANV_BO_ALLOC_WRITE_COMBINE : 0);
|
||||||
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(pool->free_list); i++) {
|
for (unsigned i = 0; i < ARRAY_SIZE(pool->free_list); i++) {
|
||||||
util_sparse_array_free_list_init(&pool->free_list[i],
|
util_sparse_array_free_list_init(&pool->free_list[i],
|
||||||
&device->bo_cache.bo_map, 0,
|
&device->bo_cache.bo_map, 0,
|
||||||
@@ -1150,9 +1159,7 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
|
|||||||
VkResult result = anv_device_alloc_bo(pool->device,
|
VkResult result = anv_device_alloc_bo(pool->device,
|
||||||
pool->name,
|
pool->name,
|
||||||
pow2_size,
|
pow2_size,
|
||||||
ANV_BO_ALLOC_MAPPED |
|
pool->bo_alloc_flags,
|
||||||
ANV_BO_ALLOC_SNOOPED |
|
|
||||||
ANV_BO_ALLOC_CAPTURE,
|
|
||||||
0 /* explicit_address */,
|
0 /* explicit_address */,
|
||||||
&bo);
|
&bo);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
|
@@ -472,6 +472,62 @@ void __anv_perf_warn(struct anv_device *device,
|
|||||||
#define anv_assert(x)
|
#define anv_assert(x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
enum anv_bo_alloc_flags {
|
||||||
|
/** Specifies that the BO must have a 32-bit address
|
||||||
|
*
|
||||||
|
* This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
|
||||||
|
*/
|
||||||
|
ANV_BO_ALLOC_32BIT_ADDRESS = (1 << 0),
|
||||||
|
|
||||||
|
/** Specifies that the BO may be shared externally */
|
||||||
|
ANV_BO_ALLOC_EXTERNAL = (1 << 1),
|
||||||
|
|
||||||
|
/** Specifies that the BO should be mapped */
|
||||||
|
ANV_BO_ALLOC_MAPPED = (1 << 2),
|
||||||
|
|
||||||
|
/** Specifies that the BO should be snooped so we get coherency */
|
||||||
|
ANV_BO_ALLOC_SNOOPED = (1 << 3),
|
||||||
|
|
||||||
|
/** Specifies that the BO should be captured in error states */
|
||||||
|
ANV_BO_ALLOC_CAPTURE = (1 << 4),
|
||||||
|
|
||||||
|
/** Specifies that the BO will have an address assigned by the caller
|
||||||
|
*
|
||||||
|
* Such BOs do not exist in any VMA heap.
|
||||||
|
*/
|
||||||
|
ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
|
||||||
|
|
||||||
|
/** Enables implicit synchronization on the BO
|
||||||
|
*
|
||||||
|
* This is the opposite of EXEC_OBJECT_ASYNC.
|
||||||
|
*/
|
||||||
|
ANV_BO_ALLOC_IMPLICIT_SYNC = (1 << 6),
|
||||||
|
|
||||||
|
/** Enables implicit synchronization on the BO
|
||||||
|
*
|
||||||
|
* This is equivalent to EXEC_OBJECT_WRITE.
|
||||||
|
*/
|
||||||
|
ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
|
||||||
|
|
||||||
|
/** Has an address which is visible to the client */
|
||||||
|
ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
|
||||||
|
|
||||||
|
/** This buffer has implicit CCS data attached to it */
|
||||||
|
ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
|
||||||
|
|
||||||
|
/** This buffer is allocated from local memory and should be cpu visible */
|
||||||
|
ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 10),
|
||||||
|
|
||||||
|
/** For non device local allocations */
|
||||||
|
ANV_BO_ALLOC_NO_LOCAL_MEM = (1 << 11),
|
||||||
|
|
||||||
|
/** For local memory, ensure that the writes are combined.
|
||||||
|
*
|
||||||
|
* Should be faster for bo pools, which write but do not read
|
||||||
|
*/
|
||||||
|
ANV_BO_ALLOC_WRITE_COMBINE = (1 << 12),
|
||||||
|
};
|
||||||
|
|
||||||
struct anv_bo {
|
struct anv_bo {
|
||||||
const char *name;
|
const char *name;
|
||||||
|
|
||||||
@@ -669,6 +725,8 @@ struct anv_block_pool {
|
|||||||
uint32_t center_bo_offset;
|
uint32_t center_bo_offset;
|
||||||
|
|
||||||
struct anv_block_state state;
|
struct anv_block_state state;
|
||||||
|
|
||||||
|
enum anv_bo_alloc_flags bo_alloc_flags;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Block pools are backed by a fixed-size 1GB memfd */
|
/* Block pools are backed by a fixed-size 1GB memfd */
|
||||||
@@ -823,6 +881,8 @@ struct anv_bo_pool {
|
|||||||
|
|
||||||
struct anv_device *device;
|
struct anv_device *device;
|
||||||
|
|
||||||
|
enum anv_bo_alloc_flags bo_alloc_flags;
|
||||||
|
|
||||||
struct util_sparse_array_free_list free_list[16];
|
struct util_sparse_array_free_list free_list[16];
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -1223,62 +1283,6 @@ anv_mocs(const struct anv_device *device,
|
|||||||
void anv_device_init_blorp(struct anv_device *device);
|
void anv_device_init_blorp(struct anv_device *device);
|
||||||
void anv_device_finish_blorp(struct anv_device *device);
|
void anv_device_finish_blorp(struct anv_device *device);
|
||||||
|
|
||||||
enum anv_bo_alloc_flags {
|
|
||||||
/** Specifies that the BO must have a 32-bit address
|
|
||||||
*
|
|
||||||
* This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
|
|
||||||
*/
|
|
||||||
ANV_BO_ALLOC_32BIT_ADDRESS = (1 << 0),
|
|
||||||
|
|
||||||
/** Specifies that the BO may be shared externally */
|
|
||||||
ANV_BO_ALLOC_EXTERNAL = (1 << 1),
|
|
||||||
|
|
||||||
/** Specifies that the BO should be mapped */
|
|
||||||
ANV_BO_ALLOC_MAPPED = (1 << 2),
|
|
||||||
|
|
||||||
/** Specifies that the BO should be snooped so we get coherency */
|
|
||||||
ANV_BO_ALLOC_SNOOPED = (1 << 3),
|
|
||||||
|
|
||||||
/** Specifies that the BO should be captured in error states */
|
|
||||||
ANV_BO_ALLOC_CAPTURE = (1 << 4),
|
|
||||||
|
|
||||||
/** Specifies that the BO will have an address assigned by the caller
|
|
||||||
*
|
|
||||||
* Such BOs do not exist in any VMA heap.
|
|
||||||
*/
|
|
||||||
ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
|
|
||||||
|
|
||||||
/** Enables implicit synchronization on the BO
|
|
||||||
*
|
|
||||||
* This is the opposite of EXEC_OBJECT_ASYNC.
|
|
||||||
*/
|
|
||||||
ANV_BO_ALLOC_IMPLICIT_SYNC = (1 << 6),
|
|
||||||
|
|
||||||
/** Enables implicit synchronization on the BO
|
|
||||||
*
|
|
||||||
* This is equivalent to EXEC_OBJECT_WRITE.
|
|
||||||
*/
|
|
||||||
ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
|
|
||||||
|
|
||||||
/** Has an address which is visible to the client */
|
|
||||||
ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
|
|
||||||
|
|
||||||
/** This buffer has implicit CCS data attached to it */
|
|
||||||
ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
|
|
||||||
|
|
||||||
/** This buffer is allocated from local memory and should be cpu visible */
|
|
||||||
ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE = (1 << 10),
|
|
||||||
|
|
||||||
/** For non device local allocations */
|
|
||||||
ANV_BO_ALLOC_NO_LOCAL_MEM = (1 << 11),
|
|
||||||
|
|
||||||
/** For local memory, ensure that the writes are combined.
|
|
||||||
*
|
|
||||||
* Should be faster for bo pools, which write but do not read
|
|
||||||
*/
|
|
||||||
ANV_BO_ALLOC_WRITE_COMBINE = (1 << 12),
|
|
||||||
};
|
|
||||||
|
|
||||||
VkResult anv_device_alloc_bo(struct anv_device *device,
|
VkResult anv_device_alloc_bo(struct anv_device *device,
|
||||||
const char *name, uint64_t size,
|
const char *name, uint64_t size,
|
||||||
enum anv_bo_alloc_flags alloc_flags,
|
enum anv_bo_alloc_flags alloc_flags,
|
||||||
|
Reference in New Issue
Block a user