anv: Add vma_heap allocators in anv_device
These will be used to assign virtual addresses to soft pinned buffers in a later patch. Two allocators are added for separate 'low' and 'high' virtual memory areas. Another alternative would have been to add a double-sided allocator, which wasn't done here just because it didn't appear to give any code complexity advantages. v2 (Scott Phillips): - rename has_exec_softpin to use_softpin (Jason) - Only remove bottom one page and top 4 GiB from virt (Jason) - refer to comment in anv_allocator about state address + size overflowing 48 bits (Jason) - Mention hi/lo allocators vs double-sided allocator in commit message (Chris) - assign state pool memory ranges statically (Jason) v3 (Jason Ekstrand): - Use (LOW|HIGH)_HEAP_(MIN|MAX)_ADDRESS rather than (1 << 31) for determining which heap to use in anv_vma_free - Only return de-canonicalized addresses to the heap Reviewed-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>
This commit is contained in:

committed by
Jason Ekstrand

parent
6e4672f881
commit
aaea46242d
@@ -374,6 +374,9 @@ anv_physical_device_init(struct anv_physical_device *device,
|
|||||||
anv_gem_supports_syncobj_wait(fd);
|
anv_gem_supports_syncobj_wait(fd);
|
||||||
device->has_context_priority = anv_gem_has_context_priority(fd);
|
device->has_context_priority = anv_gem_has_context_priority(fd);
|
||||||
|
|
||||||
|
device->use_softpin = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN)
|
||||||
|
&& device->supports_48bit_addresses;
|
||||||
|
|
||||||
bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
|
bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
|
||||||
|
|
||||||
/* Starting with Gen10, the timestamp frequency of the command streamer may
|
/* Starting with Gen10, the timestamp frequency of the command streamer may
|
||||||
@@ -1527,6 +1530,27 @@ VkResult anv_CreateDevice(
|
|||||||
goto fail_fd;
|
goto fail_fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (physical_device->use_softpin) {
|
||||||
|
if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
|
||||||
|
result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
|
||||||
|
goto fail_fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* keep the page with address zero out of the allocator */
|
||||||
|
util_vma_heap_init(&device->vma_lo, LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
|
||||||
|
device->vma_lo_available =
|
||||||
|
physical_device->memory.heaps[physical_device->memory.heap_count - 1].size;
|
||||||
|
|
||||||
|
/* Leave the last 4GiB out of the high vma range, so that no state base
|
||||||
|
* address + size can overflow 48 bits. For more information see the
|
||||||
|
* comment about Wa32bitGeneralStateOffset in anv_allocator.c
|
||||||
|
*/
|
||||||
|
util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
|
||||||
|
HIGH_HEAP_SIZE);
|
||||||
|
device->vma_hi_available = physical_device->memory.heap_count == 1 ? 0 :
|
||||||
|
physical_device->memory.heaps[0].size;
|
||||||
|
}
|
||||||
|
|
||||||
/* As per spec, the driver implementation may deny requests to acquire
|
/* As per spec, the driver implementation may deny requests to acquire
|
||||||
* a priority above the default priority (MEDIUM) if the caller does not
|
* a priority above the default priority (MEDIUM) if the caller does not
|
||||||
* have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT
|
* have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT
|
||||||
@@ -1887,6 +1911,66 @@ VkResult anv_DeviceWaitIdle(
|
|||||||
return anv_device_submit_simple_batch(device, &batch);
|
return anv_device_submit_simple_batch(device, &batch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
anv_vma_alloc(struct anv_device *device, struct anv_bo *bo)
|
||||||
|
{
|
||||||
|
if (!(bo->flags & EXEC_OBJECT_PINNED))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
pthread_mutex_lock(&device->vma_mutex);
|
||||||
|
|
||||||
|
bo->offset = 0;
|
||||||
|
|
||||||
|
if (bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS &&
|
||||||
|
device->vma_hi_available >= bo->size) {
|
||||||
|
uint64_t addr = util_vma_heap_alloc(&device->vma_hi, bo->size, 4096);
|
||||||
|
if (addr) {
|
||||||
|
bo->offset = gen_canonical_address(addr);
|
||||||
|
assert(addr == gen_48b_address(bo->offset));
|
||||||
|
device->vma_hi_available -= bo->size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bo->offset == 0 && device->vma_lo_available >= bo->size) {
|
||||||
|
uint64_t addr = util_vma_heap_alloc(&device->vma_lo, bo->size, 4096);
|
||||||
|
if (addr) {
|
||||||
|
bo->offset = gen_canonical_address(addr);
|
||||||
|
assert(addr == gen_48b_address(bo->offset));
|
||||||
|
device->vma_lo_available -= bo->size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_unlock(&device->vma_mutex);
|
||||||
|
|
||||||
|
return bo->offset != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
anv_vma_free(struct anv_device *device, struct anv_bo *bo)
|
||||||
|
{
|
||||||
|
if (!(bo->flags & EXEC_OBJECT_PINNED))
|
||||||
|
return;
|
||||||
|
|
||||||
|
const uint64_t addr_48b = gen_48b_address(bo->offset);
|
||||||
|
|
||||||
|
pthread_mutex_lock(&device->vma_mutex);
|
||||||
|
|
||||||
|
if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
|
||||||
|
addr_48b <= LOW_HEAP_MAX_ADDRESS) {
|
||||||
|
util_vma_heap_free(&device->vma_lo, addr_48b, bo->size);
|
||||||
|
device->vma_lo_available += bo->size;
|
||||||
|
} else {
|
||||||
|
assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS &&
|
||||||
|
addr_48b <= HIGH_HEAP_MAX_ADDRESS);
|
||||||
|
util_vma_heap_free(&device->vma_hi, addr_48b, bo->size);
|
||||||
|
device->vma_hi_available += bo->size;
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_unlock(&device->vma_mutex);
|
||||||
|
|
||||||
|
bo->offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
VkResult
|
VkResult
|
||||||
anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
|
anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
|
||||||
{
|
{
|
||||||
|
@@ -50,6 +50,7 @@
|
|||||||
#include "util/list.h"
|
#include "util/list.h"
|
||||||
#include "util/u_atomic.h"
|
#include "util/u_atomic.h"
|
||||||
#include "util/u_vector.h"
|
#include "util/u_vector.h"
|
||||||
|
#include "util/vma.h"
|
||||||
#include "vk_alloc.h"
|
#include "vk_alloc.h"
|
||||||
#include "vk_debug_report.h"
|
#include "vk_debug_report.h"
|
||||||
|
|
||||||
@@ -80,6 +81,55 @@ struct gen_l3_config;
|
|||||||
#include "common/intel_log.h"
|
#include "common/intel_log.h"
|
||||||
#include "wsi_common.h"
|
#include "wsi_common.h"
|
||||||
|
|
||||||
|
/* anv Virtual Memory Layout
|
||||||
|
* =========================
|
||||||
|
*
|
||||||
|
* When the anv driver is determining the virtual graphics addresses of memory
|
||||||
|
* objects itself using the softpin mechanism, the following memory ranges
|
||||||
|
* will be used.
|
||||||
|
*
|
||||||
|
* Three special considerations to notice:
|
||||||
|
*
|
||||||
|
* (1) the dynamic state pool is located within the same 4 GiB as the low
|
||||||
|
* heap. This is to work around a VF cache issue described in a comment in
|
||||||
|
* anv_physical_device_init_heaps.
|
||||||
|
*
|
||||||
|
* (2) the binding table pool is located at lower addresses than the surface
|
||||||
|
* state pool, within a 4 GiB range. This allows surface state base addresses
|
||||||
|
* to cover both binding tables (16 bit offsets) and surface states (32 bit
|
||||||
|
* offsets).
|
||||||
|
*
|
||||||
|
* (3) the last 4 GiB of the address space is withheld from the high
|
||||||
|
* heap. Various hardware units will read past the end of an object for
|
||||||
|
* various reasons. This healthy margin prevents reads from wrapping around
|
||||||
|
* 48-bit addresses.
|
||||||
|
*/
|
||||||
|
#define LOW_HEAP_MIN_ADDRESS 0x000000001000ULL /* 4 KiB */
|
||||||
|
#define LOW_HEAP_MAX_ADDRESS 0x0000bfffffffULL
|
||||||
|
#define DYNAMIC_STATE_POOL_MIN_ADDRESS 0x0000c0000000ULL /* 3 GiB */
|
||||||
|
#define DYNAMIC_STATE_POOL_MAX_ADDRESS 0x0000ffffffffULL
|
||||||
|
#define BINDING_TABLE_POOL_MIN_ADDRESS 0x000100000000ULL /* 4 GiB */
|
||||||
|
#define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL
|
||||||
|
#define SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */
|
||||||
|
#define SURFACE_STATE_POOL_MAX_ADDRESS 0x00017fffffffULL
|
||||||
|
#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */
|
||||||
|
#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
|
||||||
|
#define HIGH_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */
|
||||||
|
#define HIGH_HEAP_MAX_ADDRESS 0xfffeffffffffULL
|
||||||
|
|
||||||
|
#define LOW_HEAP_SIZE \
|
||||||
|
(LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
|
||||||
|
#define HIGH_HEAP_SIZE \
|
||||||
|
(HIGH_HEAP_MAX_ADDRESS - HIGH_HEAP_MIN_ADDRESS + 1)
|
||||||
|
#define DYNAMIC_STATE_POOL_SIZE \
|
||||||
|
(DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
|
||||||
|
#define BINDING_TABLE_POOL_SIZE \
|
||||||
|
(BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
|
||||||
|
#define SURFACE_STATE_POOL_SIZE \
|
||||||
|
(SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1)
|
||||||
|
#define INSTRUCTION_STATE_POOL_SIZE \
|
||||||
|
(INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
|
||||||
|
|
||||||
/* Allowing different clear colors requires us to perform a depth resolve at
|
/* Allowing different clear colors requires us to perform a depth resolve at
|
||||||
* the end of certain render passes. This is because while slow clears store
|
* the end of certain render passes. This is because while slow clears store
|
||||||
* the clear color in the HiZ buffer, fast clears (without a resolve) don't.
|
* the clear color in the HiZ buffer, fast clears (without a resolve) don't.
|
||||||
@@ -791,6 +841,7 @@ struct anv_physical_device {
|
|||||||
bool has_syncobj;
|
bool has_syncobj;
|
||||||
bool has_syncobj_wait;
|
bool has_syncobj_wait;
|
||||||
bool has_context_priority;
|
bool has_context_priority;
|
||||||
|
bool use_softpin;
|
||||||
|
|
||||||
struct anv_device_extension_table supported_extensions;
|
struct anv_device_extension_table supported_extensions;
|
||||||
|
|
||||||
@@ -884,6 +935,12 @@ struct anv_device {
|
|||||||
struct anv_device_extension_table enabled_extensions;
|
struct anv_device_extension_table enabled_extensions;
|
||||||
struct anv_dispatch_table dispatch;
|
struct anv_dispatch_table dispatch;
|
||||||
|
|
||||||
|
pthread_mutex_t vma_mutex;
|
||||||
|
struct util_vma_heap vma_lo;
|
||||||
|
struct util_vma_heap vma_hi;
|
||||||
|
uint64_t vma_lo_available;
|
||||||
|
uint64_t vma_hi_available;
|
||||||
|
|
||||||
struct anv_bo_pool batch_bo_pool;
|
struct anv_bo_pool batch_bo_pool;
|
||||||
|
|
||||||
struct anv_bo_cache bo_cache;
|
struct anv_bo_cache bo_cache;
|
||||||
@@ -977,6 +1034,9 @@ int anv_gem_syncobj_wait(struct anv_device *device,
|
|||||||
uint32_t *handles, uint32_t num_handles,
|
uint32_t *handles, uint32_t num_handles,
|
||||||
int64_t abs_timeout_ns, bool wait_all);
|
int64_t abs_timeout_ns, bool wait_all);
|
||||||
|
|
||||||
|
bool anv_vma_alloc(struct anv_device *device, struct anv_bo *bo);
|
||||||
|
void anv_vma_free(struct anv_device *device, struct anv_bo *bo);
|
||||||
|
|
||||||
VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size);
|
VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size);
|
||||||
|
|
||||||
struct anv_reloc_list {
|
struct anv_reloc_list {
|
||||||
|
Reference in New Issue
Block a user