anv: make internal address space allocation more dynamic
We're about to manipulate these pools and dealing with the fix address ranges is painful. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22847>
This commit is contained in:
@@ -486,6 +486,8 @@ Intel driver environment variables
|
||||
dump shader assembly for fragment shaders
|
||||
``gs``
|
||||
dump shader assembly for geometry shaders
|
||||
``heaps``
|
||||
print information about the driver's heaps (Anv only)
|
||||
``hex``
|
||||
print instruction hex dump with the disassembly
|
||||
``l3``
|
||||
|
@@ -101,6 +101,7 @@ static const struct debug_control debug_control[] = {
|
||||
{ "capture-all", DEBUG_CAPTURE_ALL },
|
||||
{ "perf-symbol-names", DEBUG_PERF_SYMBOL_NAMES },
|
||||
{ "swsb-stall", DEBUG_SWSB_STALL },
|
||||
{ "heaps", DEBUG_HEAPS },
|
||||
{ NULL, 0 }
|
||||
};
|
||||
|
||||
|
@@ -91,6 +91,7 @@ extern uint64_t intel_debug;
|
||||
#define DEBUG_CAPTURE_ALL (1ull << 43)
|
||||
#define DEBUG_PERF_SYMBOL_NAMES (1ull << 44)
|
||||
#define DEBUG_SWSB_STALL (1ull << 45)
|
||||
#define DEBUG_HEAPS (1ull << 46)
|
||||
|
||||
#define DEBUG_ANY (~0ull)
|
||||
|
||||
|
@@ -1414,6 +1414,8 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_compiler;
|
||||
|
||||
anv_physical_device_init_va_ranges(device);
|
||||
|
||||
anv_physical_device_init_disk_cache(device);
|
||||
|
||||
if (instance->vk.enabled_extensions.KHR_display) {
|
||||
@@ -3064,9 +3066,9 @@ VkResult anv_CreateDevice(
|
||||
decode_get_bo, NULL, device);
|
||||
|
||||
decoder->engine = physical_device->queue.families[i].engine_class;
|
||||
decoder->dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
|
||||
decoder->surface_base = INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS;
|
||||
decoder->instruction_base = INSTRUCTION_STATE_POOL_MIN_ADDRESS;
|
||||
decoder->dynamic_base = physical_device->va.dynamic_state_pool.addr;
|
||||
decoder->surface_base = physical_device->va.internal_surface_state_pool.addr;
|
||||
decoder->instruction_base = physical_device->va.instruction_state_pool.addr;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3142,18 +3144,16 @@ VkResult anv_CreateDevice(
|
||||
|
||||
/* keep the page with address zero out of the allocator */
|
||||
util_vma_heap_init(&device->vma_lo,
|
||||
LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
|
||||
device->physical->va.low_heap.addr,
|
||||
device->physical->va.low_heap.size);
|
||||
|
||||
util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
|
||||
CLIENT_VISIBLE_HEAP_SIZE);
|
||||
util_vma_heap_init(&device->vma_cva,
|
||||
device->physical->va.client_visible_heap.addr,
|
||||
device->physical->va.client_visible_heap.size);
|
||||
|
||||
/* Leave the last 4GiB out of the high vma range, so that no state
|
||||
* base address + size can overflow 48 bits. For more information see
|
||||
* the comment about Wa32bitGeneralStateOffset in anv_allocator.c
|
||||
*/
|
||||
util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
|
||||
physical_device->gtt_size - (1ull << 32) -
|
||||
HIGH_HEAP_MIN_ADDRESS);
|
||||
util_vma_heap_init(&device->vma_hi,
|
||||
device->physical->va.high_heap.addr,
|
||||
device->physical->va.high_heap.size);
|
||||
|
||||
list_inithead(&device->memory_objects);
|
||||
|
||||
@@ -3191,13 +3191,13 @@ VkResult anv_CreateDevice(
|
||||
*/
|
||||
result = anv_state_pool_init(&device->general_state_pool, device,
|
||||
"general pool",
|
||||
0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
|
||||
0, device->physical->va.general_state_pool.addr, 16384);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_batch_bo_pool;
|
||||
|
||||
result = anv_state_pool_init(&device->dynamic_state_pool, device,
|
||||
"dynamic pool",
|
||||
DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
|
||||
device->physical->va.dynamic_state_pool.addr, 0, 16384);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_general_state_pool;
|
||||
|
||||
@@ -3214,7 +3214,8 @@ VkResult anv_CreateDevice(
|
||||
|
||||
result = anv_state_pool_init(&device->instruction_state_pool, device,
|
||||
"instruction pool",
|
||||
INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
|
||||
device->physical->va.instruction_state_pool.addr,
|
||||
0, 16384);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_dynamic_state_pool;
|
||||
|
||||
@@ -3224,25 +3225,29 @@ VkResult anv_CreateDevice(
|
||||
*/
|
||||
result = anv_state_pool_init(&device->scratch_surface_state_pool, device,
|
||||
"scratch surface state pool",
|
||||
SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
|
||||
device->physical->va.scratch_surface_state_pool.addr,
|
||||
0, 4096);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_instruction_state_pool;
|
||||
|
||||
result = anv_state_pool_init(&device->internal_surface_state_pool, device,
|
||||
"internal surface state pool",
|
||||
INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS,
|
||||
SCRATCH_SURFACE_STATE_POOL_SIZE, 4096);
|
||||
device->physical->va.internal_surface_state_pool.addr,
|
||||
device->physical->va.scratch_surface_state_pool.size,
|
||||
4096);
|
||||
} else {
|
||||
result = anv_state_pool_init(&device->internal_surface_state_pool, device,
|
||||
"internal surface state pool",
|
||||
INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
|
||||
device->physical->va.internal_surface_state_pool.addr,
|
||||
0, 4096);
|
||||
}
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_scratch_surface_state_pool;
|
||||
|
||||
result = anv_state_pool_init(&device->bindless_surface_state_pool, device,
|
||||
"bindless surface state pool",
|
||||
BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
|
||||
device->physical->va.bindless_surface_state_pool.addr,
|
||||
0, 4096);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_internal_surface_state_pool;
|
||||
|
||||
@@ -3252,15 +3257,21 @@ VkResult anv_CreateDevice(
|
||||
*/
|
||||
result = anv_state_pool_init(&device->binding_table_pool, device,
|
||||
"binding table pool",
|
||||
BINDING_TABLE_POOL_MIN_ADDRESS, 0,
|
||||
device->physical->va.binding_table_pool.addr, 0,
|
||||
BINDING_TABLE_POOL_BLOCK_SIZE);
|
||||
} else {
|
||||
int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
|
||||
(int64_t)INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS;
|
||||
/* The binding table should be in front of the surface states in virtual
|
||||
* address space so that all surface states can be express as relative
|
||||
* offsets from the binding table location.
|
||||
*/
|
||||
assert(device->physical->va.binding_table_pool.addr <
|
||||
device->physical->va.internal_surface_state_pool.addr);
|
||||
int64_t bt_pool_offset = (int64_t)device->physical->va.binding_table_pool.addr -
|
||||
(int64_t)device->physical->va.internal_surface_state_pool.addr;
|
||||
assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
|
||||
result = anv_state_pool_init(&device->binding_table_pool, device,
|
||||
"binding table pool",
|
||||
INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS,
|
||||
device->physical->va.internal_surface_state_pool.addr,
|
||||
bt_pool_offset,
|
||||
BINDING_TABLE_POOL_BLOCK_SIZE);
|
||||
}
|
||||
|
@@ -115,9 +115,10 @@ anv_shader_bin_create(struct anv_device *device,
|
||||
memcpy(shader->kernel.map, kernel_data, kernel_size);
|
||||
shader->kernel_size = kernel_size;
|
||||
|
||||
uint64_t shader_data_addr = INSTRUCTION_STATE_POOL_MIN_ADDRESS +
|
||||
shader->kernel.offset +
|
||||
prog_data_in->const_data_offset;
|
||||
uint64_t shader_data_addr =
|
||||
device->physical->va.instruction_state_pool.addr +
|
||||
shader->kernel.offset +
|
||||
prog_data_in->const_data_offset;
|
||||
|
||||
int rv_count = 0;
|
||||
struct brw_shader_reloc_value reloc_values[5];
|
||||
@@ -125,10 +126,10 @@ anv_shader_bin_create(struct anv_device *device,
|
||||
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
|
||||
.value = shader_data_addr,
|
||||
};
|
||||
assert(shader_data_addr >> 32 == INSTRUCTION_STATE_POOL_MIN_ADDRESS >> 32);
|
||||
assert(shader_data_addr >> 32 == device->physical->va.instruction_state_pool.addr >> 32);
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
|
||||
.value = INSTRUCTION_STATE_POOL_MIN_ADDRESS >> 32
|
||||
.value = device->physical->va.instruction_state_pool.addr >> 32,
|
||||
};
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_SHADER_START_OFFSET,
|
||||
@@ -137,9 +138,10 @@ anv_shader_bin_create(struct anv_device *device,
|
||||
if (brw_shader_stage_is_bindless(stage)) {
|
||||
const struct brw_bs_prog_data *bs_prog_data =
|
||||
brw_bs_prog_data_const(prog_data_in);
|
||||
uint64_t resume_sbt_addr = INSTRUCTION_STATE_POOL_MIN_ADDRESS +
|
||||
shader->kernel.offset +
|
||||
bs_prog_data->resume_sbt_offset;
|
||||
uint64_t resume_sbt_addr =
|
||||
device->physical->va.instruction_state_pool.addr +
|
||||
shader->kernel.offset +
|
||||
bs_prog_data->resume_sbt_offset;
|
||||
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
|
||||
.id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
|
||||
.value = resume_sbt_addr,
|
||||
|
@@ -132,69 +132,7 @@ struct intel_perf_query_result;
|
||||
|
||||
#define NSEC_PER_SEC 1000000000ull
|
||||
|
||||
/* anv Virtual Memory Layout
|
||||
* =========================
|
||||
*
|
||||
* When the anv driver is determining the virtual graphics addresses of memory
|
||||
* objects itself using the softpin mechanism, the following memory ranges
|
||||
* will be used.
|
||||
*
|
||||
* Three special considerations to notice:
|
||||
*
|
||||
* (1) the dynamic state pool is located within the same 4 GiB as the low
|
||||
* heap. This is to work around a VF cache issue described in a comment in
|
||||
* anv_physical_device_init_heaps.
|
||||
*
|
||||
* (2) the binding table pool is located at lower addresses than the BT
|
||||
* (binding table) surface state pool, within a 4 GiB range which also
|
||||
* contains the bindless surface state pool. This allows surface state base
|
||||
* addresses to cover both binding tables (16 bit offsets), the internal
|
||||
* surface states (32 bit offsets) and the bindless surface states.
|
||||
*
|
||||
* (3) the last 4 GiB of the address space is withheld from the high
|
||||
* heap. Various hardware units will read past the end of an object for
|
||||
* various reasons. This healthy margin prevents reads from wrapping around
|
||||
* 48-bit addresses.
|
||||
*/
|
||||
#define GENERAL_STATE_POOL_MIN_ADDRESS 0x000000200000ULL /* 2 MiB */
|
||||
#define GENERAL_STATE_POOL_MAX_ADDRESS 0x00003fffffffULL
|
||||
#define LOW_HEAP_MIN_ADDRESS 0x000040000000ULL /* 1 GiB */
|
||||
#define LOW_HEAP_MAX_ADDRESS 0x00007fffffffULL
|
||||
#define DYNAMIC_STATE_POOL_MIN_ADDRESS 0x0000c0000000ULL /* 3 GiB */
|
||||
#define DYNAMIC_STATE_POOL_MAX_ADDRESS 0x0000ffffffffULL
|
||||
#define BINDING_TABLE_POOL_MIN_ADDRESS 0x000100000000ULL /* 4 GiB */
|
||||
#define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL
|
||||
#define INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */
|
||||
#define INTERNAL_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
|
||||
#define SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB (8MiB overlaps surface state pool) */
|
||||
#define SCRATCH_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001407fffffULL
|
||||
#define BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB (64MiB) */
|
||||
#define BINDLESS_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001c3ffffffULL
|
||||
#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000200000000ULL /* 8 GiB */
|
||||
#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x00023fffffffULL
|
||||
#define CLIENT_VISIBLE_HEAP_MIN_ADDRESS 0x000240000000ULL /* 9 GiB */
|
||||
#define CLIENT_VISIBLE_HEAP_MAX_ADDRESS 0x000a3fffffffULL
|
||||
#define HIGH_HEAP_MIN_ADDRESS 0x000a40000000ULL /* 41 GiB */
|
||||
|
||||
#define GENERAL_STATE_POOL_SIZE \
|
||||
(GENERAL_STATE_POOL_MAX_ADDRESS - GENERAL_STATE_POOL_MIN_ADDRESS + 1)
|
||||
#define LOW_HEAP_SIZE \
|
||||
(LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
|
||||
#define DYNAMIC_STATE_POOL_SIZE \
|
||||
(DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
|
||||
#define BINDING_TABLE_POOL_SIZE \
|
||||
(BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
|
||||
#define BINDING_TABLE_POOL_BLOCK_SIZE (65536)
|
||||
#define SCRATCH_SURFACE_STATE_POOL_SIZE \
|
||||
(SCRATCH_SURFACE_STATE_POOL_MAX_ADDRESS - SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS + 1)
|
||||
#define BINDLESS_SURFACE_STATE_POOL_SIZE \
|
||||
(BINDLESS_SURFACE_STATE_POOL_MAX_ADDRESS - BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS + 1)
|
||||
#define INTERNAL_SURFACE_STATE_POOL_SIZE \
|
||||
(INTERNAL_SURFACE_STATE_POOL_MAX_ADDRESS - INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS + 1)
|
||||
#define INSTRUCTION_STATE_POOL_SIZE \
|
||||
(INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
|
||||
#define CLIENT_VISIBLE_HEAP_SIZE \
|
||||
(CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1)
|
||||
|
||||
/* Allowing different clear colors requires us to perform a depth resolve at
|
||||
* the end of certain render passes. This is because while slow clears store
|
||||
@@ -598,6 +536,12 @@ anv_address_map(struct anv_address addr)
|
||||
return addr.bo->map + addr.offset;
|
||||
}
|
||||
|
||||
/* Represent a virtual address range */
|
||||
struct anv_va_range {
|
||||
uint64_t addr;
|
||||
uint64_t size;
|
||||
};
|
||||
|
||||
/* Represents a lock-free linked list of "free" things. This is used by
|
||||
* both the block pool and the state pools. Unfortunately, in order to
|
||||
* solve the ABA problem, we can't use a single uint32_t head.
|
||||
@@ -985,6 +929,19 @@ struct anv_physical_device {
|
||||
#endif
|
||||
} memory;
|
||||
|
||||
struct {
|
||||
struct anv_va_range general_state_pool;
|
||||
struct anv_va_range low_heap;
|
||||
struct anv_va_range dynamic_state_pool;
|
||||
struct anv_va_range binding_table_pool;
|
||||
struct anv_va_range internal_surface_state_pool;
|
||||
struct anv_va_range scratch_surface_state_pool;
|
||||
struct anv_va_range bindless_surface_state_pool;
|
||||
struct anv_va_range instruction_state_pool;
|
||||
struct anv_va_range client_visible_heap;
|
||||
struct anv_va_range high_heap;
|
||||
} va;
|
||||
|
||||
/* Either we have a single vram region and it's all mappable, or we have
|
||||
* both mappable & non-mappable parts. System memory is always available.
|
||||
*/
|
||||
@@ -1276,10 +1233,11 @@ anv_binding_table_pool_free(struct anv_device *device, struct anv_state state)
|
||||
}
|
||||
|
||||
static inline struct anv_state
|
||||
anv_bindless_state_for_binding_table(struct anv_state state)
|
||||
anv_bindless_state_for_binding_table(struct anv_device *device,
|
||||
struct anv_state state)
|
||||
{
|
||||
state.offset += BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS -
|
||||
INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS;
|
||||
state.offset += device->physical->va.bindless_surface_state_pool.addr -
|
||||
device->physical->va.internal_surface_state_pool.addr;
|
||||
return state;
|
||||
}
|
||||
|
||||
@@ -4401,6 +4359,7 @@ struct anv_performance_configuration_intel {
|
||||
uint64_t config_id;
|
||||
};
|
||||
|
||||
void anv_physical_device_init_va_ranges(struct anv_physical_device *device);
|
||||
void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);
|
||||
void anv_device_perf_init(struct anv_device *device);
|
||||
void anv_perf_write_pass_results(struct intel_perf_config *perf,
|
||||
|
138
src/intel/vulkan/anv_va.c
Normal file
138
src/intel/vulkan/anv_va.c
Normal file
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Copyright © 2023 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
#include "util/u_math.h"
|
||||
|
||||
static uint64_t
|
||||
va_add(struct anv_va_range *range, uint64_t addr, uint64_t size)
|
||||
{
|
||||
range->addr = addr;
|
||||
range->size = size;
|
||||
|
||||
return addr + size;
|
||||
}
|
||||
|
||||
static void
|
||||
va_at(struct anv_va_range *range, uint64_t addr, uint64_t size)
|
||||
{
|
||||
range->addr = addr;
|
||||
range->size = size;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_device_print_vas(struct anv_physical_device *device)
|
||||
{
|
||||
fprintf(stderr, "Driver heaps:\n");
|
||||
#define PRINT_HEAP(name) \
|
||||
fprintf(stderr, " 0x%016"PRIx64"-0x%016"PRIx64": %s\n", \
|
||||
device->va.name.addr, \
|
||||
device->va.name.addr + device->va.name.size, \
|
||||
#name);
|
||||
PRINT_HEAP(general_state_pool);
|
||||
PRINT_HEAP(low_heap);
|
||||
PRINT_HEAP(dynamic_state_pool);
|
||||
PRINT_HEAP(binding_table_pool);
|
||||
PRINT_HEAP(internal_surface_state_pool);
|
||||
PRINT_HEAP(bindless_surface_state_pool);
|
||||
PRINT_HEAP(instruction_state_pool);
|
||||
PRINT_HEAP(client_visible_heap);
|
||||
PRINT_HEAP(high_heap);
|
||||
}
|
||||
|
||||
void
|
||||
anv_physical_device_init_va_ranges(struct anv_physical_device *device)
|
||||
{
|
||||
/* anv Virtual Memory Layout
|
||||
* =========================
|
||||
*
|
||||
* When the anv driver is determining the virtual graphics addresses of
|
||||
* memory objects itself using the softpin mechanism, the following memory
|
||||
* ranges will be used.
|
||||
*
|
||||
* Three special considerations to notice:
|
||||
*
|
||||
* (1) the dynamic state pool is located within the same 4 GiB as the low
|
||||
* heap. This is to work around a VF cache issue described in a comment in
|
||||
* anv_physical_device_init_heaps.
|
||||
*
|
||||
* (2) the binding table pool is located at lower addresses than the BT
|
||||
* (binding table) surface state pool, within a 4 GiB range which also
|
||||
* contains the bindless surface state pool. This allows surface state base
|
||||
* addresses to cover both binding tables (16 bit offsets), the internal
|
||||
* surface states (32 bit offsets) and the bindless surface states.
|
||||
*
|
||||
* (3) the last 4 GiB of the address space is withheld from the high heap.
|
||||
* Various hardware units will read past the end of an object for various
|
||||
* reasons. This healthy margin prevents reads from wrapping around 48-bit
|
||||
* addresses.
|
||||
*/
|
||||
uint64_t _1Mb = 1ull * 1024 * 1024;
|
||||
uint64_t _1Gb = 1ull * 1024 * 1024 * 1024;
|
||||
uint64_t _4Gb = 4ull * 1024 * 1024 * 1024;
|
||||
|
||||
uint64_t address = 0x000000200000ULL; /* 2MiB */
|
||||
|
||||
address = va_add(&device->va.general_state_pool, address,
|
||||
_1Gb - address);
|
||||
|
||||
address = va_add(&device->va.low_heap, address, _1Gb);
|
||||
address = va_add(&device->va.dynamic_state_pool, address, _1Gb);
|
||||
|
||||
/* The following addresses have to be located in a 4Gb range so that the
|
||||
* binding tables can address internal surface states & bindless surface
|
||||
* states.
|
||||
*/
|
||||
address = va_add(&device->va.binding_table_pool, address, _1Gb);
|
||||
address = va_add(&device->va.internal_surface_state_pool, address, 2 * _1Gb);
|
||||
/* Scratch surface state overlaps with the internal surface state */
|
||||
va_at(&device->va.scratch_surface_state_pool,
|
||||
device->va.internal_surface_state_pool.addr,
|
||||
8 * _1Mb);
|
||||
address = va_add(&device->va.bindless_surface_state_pool, address, _1Gb);
|
||||
|
||||
/* We use a trick to compute constant data offsets in the shaders to avoid
|
||||
* unnecessary 64bit address computations (see lower_load_constant() in
|
||||
* anv_nir_apply_pipeline_layout.c). This assumes the instruction pool is
|
||||
* located at an address with the lower 32bits at 0.
|
||||
*/
|
||||
address = align64(address, _4Gb);
|
||||
address = va_add(&device->va.instruction_state_pool, address, _1Gb);
|
||||
|
||||
/* Whatever we have left we split in 2 for app allocations client-visible &
|
||||
* non-client-visible.
|
||||
*
|
||||
* Leave the last 4GiB out of the high vma range, so that no state
|
||||
* base address + size can overflow 48 bits. For more information see
|
||||
* the comment about Wa32bitGeneralStateOffset in anv_allocator.c
|
||||
*/
|
||||
uint64_t user_heaps_size = device->gtt_size - address - 4 * _1Gb;
|
||||
uint64_t heaps_size_Gb = user_heaps_size / _1Gb / 2 ;
|
||||
|
||||
address = va_add(&device->va.client_visible_heap, address, heaps_size_Gb * _1Gb);
|
||||
address = va_add(&device->va.high_heap, address, heaps_size_Gb * _1Gb);
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_HEAPS))
|
||||
anv_device_print_vas(device);
|
||||
}
|
@@ -117,7 +117,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
||||
&cmd_buffer->batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) {
|
||||
btpa.BindingTablePoolBaseAddress =
|
||||
anv_cmd_buffer_surface_base_address(cmd_buffer);
|
||||
btpa.BindingTablePoolBufferSize = BINDING_TABLE_POOL_BLOCK_SIZE / 4096;
|
||||
btpa.BindingTablePoolBufferSize = device->physical->va.binding_table_pool.size / 4096;
|
||||
btpa.MOCS = mocs;
|
||||
}
|
||||
#else /* GFX_VERx10 < 125 */
|
||||
@@ -177,8 +177,8 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
||||
|
||||
sba.GeneralStateBufferSize = 0xfffff;
|
||||
sba.IndirectObjectBufferSize = 0xfffff;
|
||||
sba.DynamicStateBufferSize = DYNAMIC_STATE_POOL_SIZE / 4096;
|
||||
sba.InstructionBufferSize = INSTRUCTION_STATE_POOL_SIZE / 4096;
|
||||
sba.DynamicStateBufferSize = device->physical->va.dynamic_state_pool.size / 4096;
|
||||
sba.InstructionBufferSize = device->physical->va.instruction_state_pool.size / 4096;
|
||||
sba.GeneralStateBufferSizeModifyEnable = true;
|
||||
sba.IndirectObjectBufferSizeModifyEnable = true;
|
||||
sba.DynamicStateBufferSizeModifyEnable = true;
|
||||
@@ -2018,6 +2018,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
|
||||
if (shader->push_desc_info.fully_promoted_ubo_descriptors & BITFIELD_BIT(desc_idx)) {
|
||||
surface_state = anv_bindless_state_for_binding_table(
|
||||
cmd_buffer->device,
|
||||
cmd_buffer->device->null_surface_state);
|
||||
break;
|
||||
}
|
||||
@@ -2045,11 +2046,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
desc->image_view->planes[binding->plane].general_sampler_surface_state :
|
||||
desc->image_view->planes[binding->plane].optimal_sampler_surface_state;
|
||||
surface_state =
|
||||
anv_bindless_state_for_binding_table(sstate.state);
|
||||
anv_bindless_state_for_binding_table(cmd_buffer->device, sstate.state);
|
||||
assert(surface_state.alloc_size);
|
||||
} else {
|
||||
surface_state =
|
||||
anv_bindless_state_for_binding_table(
|
||||
cmd_buffer->device,
|
||||
cmd_buffer->device->null_surface_state);
|
||||
}
|
||||
break;
|
||||
@@ -2059,10 +2061,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
if (desc->image_view) {
|
||||
struct anv_surface_state sstate =
|
||||
desc->image_view->planes[binding->plane].storage_surface_state;
|
||||
surface_state = anv_bindless_state_for_binding_table(sstate.state);
|
||||
surface_state = anv_bindless_state_for_binding_table(
|
||||
cmd_buffer->device, sstate.state);
|
||||
assert(surface_state.alloc_size);
|
||||
} else {
|
||||
surface_state = anv_bindless_state_for_binding_table(
|
||||
cmd_buffer->device,
|
||||
cmd_buffer->device->null_surface_state);
|
||||
}
|
||||
break;
|
||||
@@ -2075,6 +2079,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
assert(surface_state.alloc_size);
|
||||
} else {
|
||||
surface_state = anv_bindless_state_for_binding_table(
|
||||
cmd_buffer->device,
|
||||
cmd_buffer->device->null_surface_state);
|
||||
}
|
||||
break;
|
||||
@@ -2082,10 +2087,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
||||
if (desc->buffer_view) {
|
||||
surface_state = anv_bindless_state_for_binding_table(
|
||||
cmd_buffer->device,
|
||||
desc->buffer_view->surface_state);
|
||||
assert(surface_state.alloc_size);
|
||||
} else {
|
||||
surface_state = anv_bindless_state_for_binding_table(
|
||||
cmd_buffer->device,
|
||||
cmd_buffer->device->null_surface_state);
|
||||
}
|
||||
break;
|
||||
@@ -2126,6 +2133,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
} else {
|
||||
surface_state =
|
||||
anv_bindless_state_for_binding_table(
|
||||
cmd_buffer->device,
|
||||
cmd_buffer->device->null_surface_state);
|
||||
}
|
||||
break;
|
||||
@@ -2134,10 +2142,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
||||
if (desc->buffer_view) {
|
||||
surface_state = anv_bindless_state_for_binding_table(
|
||||
cmd_buffer->device,
|
||||
desc->buffer_view->storage_surface_state);
|
||||
assert(surface_state.alloc_size);
|
||||
} else {
|
||||
surface_state = anv_bindless_state_for_binding_table(
|
||||
cmd_buffer->device,
|
||||
cmd_buffer->device->null_surface_state);
|
||||
}
|
||||
break;
|
||||
|
@@ -284,6 +284,7 @@ genX(cmd_buffer_emit_generate_draws_pipeline)(struct anv_cmd_buffer *cmd_buffer)
|
||||
|
||||
uint32_t *bt_map = cmd_buffer->generation_bt_state.map;
|
||||
bt_map[0] = anv_bindless_state_for_binding_table(
|
||||
cmd_buffer->device,
|
||||
cmd_buffer->device->null_surface_state).offset + bt_offset;
|
||||
|
||||
cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
@@ -593,11 +594,13 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer
|
||||
* use the same area.
|
||||
*/
|
||||
if (start_generation_batch) {
|
||||
genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, 0,
|
||||
(struct anv_address) {
|
||||
.offset = DYNAMIC_STATE_POOL_MIN_ADDRESS,
|
||||
},
|
||||
DYNAMIC_STATE_POOL_SIZE);
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(
|
||||
cmd_buffer, 0,
|
||||
(struct anv_address) {
|
||||
.offset = device->physical->va.dynamic_state_pool.addr,
|
||||
},
|
||||
device->physical->va.dynamic_state_pool.size);
|
||||
}
|
||||
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
|
@@ -213,13 +213,17 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch)
|
||||
sba.StatelessDataPortAccessMOCS = mocs;
|
||||
|
||||
sba.SurfaceStateBaseAddress =
|
||||
(struct anv_address) { .offset = INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS };
|
||||
(struct anv_address) { .offset =
|
||||
device->physical->va.internal_surface_state_pool.addr,
|
||||
};
|
||||
sba.SurfaceStateMOCS = mocs;
|
||||
sba.SurfaceStateBaseAddressModifyEnable = true;
|
||||
|
||||
sba.DynamicStateBaseAddress =
|
||||
(struct anv_address) { .offset = DYNAMIC_STATE_POOL_MIN_ADDRESS };
|
||||
sba.DynamicStateBufferSize = DYNAMIC_STATE_POOL_SIZE / 4096;
|
||||
(struct anv_address) { .offset =
|
||||
device->physical->va.dynamic_state_pool.addr,
|
||||
};
|
||||
sba.DynamicStateBufferSize = device->physical->va.dynamic_state_pool.size / 4096;
|
||||
sba.DynamicStateMOCS = mocs;
|
||||
sba.DynamicStateBaseAddressModifyEnable = true;
|
||||
sba.DynamicStateBufferSizeModifyEnable = true;
|
||||
@@ -231,14 +235,18 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch)
|
||||
sba.IndirectObjectBufferSizeModifyEnable = true;
|
||||
|
||||
sba.InstructionBaseAddress =
|
||||
(struct anv_address) { .offset = INSTRUCTION_STATE_POOL_MIN_ADDRESS };
|
||||
sba.InstructionBufferSize = INSTRUCTION_STATE_POOL_SIZE / 4096;
|
||||
(struct anv_address) { .offset =
|
||||
device->physical->va.instruction_state_pool.addr,
|
||||
};
|
||||
sba.InstructionBufferSize = device->physical->va.instruction_state_pool.size / 4096;
|
||||
sba.InstructionMOCS = mocs;
|
||||
sba.InstructionBaseAddressModifyEnable = true;
|
||||
sba.InstructionBuffersizeModifyEnable = true;
|
||||
|
||||
sba.BindlessSurfaceStateBaseAddress =
|
||||
(struct anv_address) { .offset = BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS };
|
||||
(struct anv_address) { .offset =
|
||||
device->physical->va.bindless_surface_state_pool.addr,
|
||||
};
|
||||
sba.BindlessSurfaceStateSize = (1 << 20) - 1;
|
||||
sba.BindlessSurfaceStateMOCS = mocs;
|
||||
sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
|
||||
|
@@ -175,6 +175,7 @@ libanv_files = files(
|
||||
'anv_queue.c',
|
||||
'anv_util.c',
|
||||
'anv_utrace.c',
|
||||
'anv_va.c',
|
||||
'anv_video.c',
|
||||
'anv_wsi.c',
|
||||
)
|
||||
|
Reference in New Issue
Block a user