anv: make internal address space allocation more dynamic

We're about to manipulate these pools and dealing with the fix address
ranges is painful.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22847>
This commit is contained in:
Lionel Landwerlin
2023-02-23 09:59:45 +02:00
parent 843afd4c63
commit c60e94d61f
11 changed files with 250 additions and 114 deletions

View File

@@ -486,6 +486,8 @@ Intel driver environment variables
dump shader assembly for fragment shaders
``gs``
dump shader assembly for geometry shaders
``heaps``
print information about the driver's heaps (Anv only)
``hex``
print instruction hex dump with the disassembly
``l3``

View File

@@ -101,6 +101,7 @@ static const struct debug_control debug_control[] = {
{ "capture-all", DEBUG_CAPTURE_ALL },
{ "perf-symbol-names", DEBUG_PERF_SYMBOL_NAMES },
{ "swsb-stall", DEBUG_SWSB_STALL },
{ "heaps", DEBUG_HEAPS },
{ NULL, 0 }
};

View File

@@ -91,6 +91,7 @@ extern uint64_t intel_debug;
#define DEBUG_CAPTURE_ALL (1ull << 43)
#define DEBUG_PERF_SYMBOL_NAMES (1ull << 44)
#define DEBUG_SWSB_STALL (1ull << 45)
#define DEBUG_HEAPS (1ull << 46)
#define DEBUG_ANY (~0ull)

View File

@@ -1414,6 +1414,8 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
if (result != VK_SUCCESS)
goto fail_compiler;
anv_physical_device_init_va_ranges(device);
anv_physical_device_init_disk_cache(device);
if (instance->vk.enabled_extensions.KHR_display) {
@@ -3064,9 +3066,9 @@ VkResult anv_CreateDevice(
decode_get_bo, NULL, device);
decoder->engine = physical_device->queue.families[i].engine_class;
decoder->dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
decoder->surface_base = INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS;
decoder->instruction_base = INSTRUCTION_STATE_POOL_MIN_ADDRESS;
decoder->dynamic_base = physical_device->va.dynamic_state_pool.addr;
decoder->surface_base = physical_device->va.internal_surface_state_pool.addr;
decoder->instruction_base = physical_device->va.instruction_state_pool.addr;
}
}
@@ -3142,18 +3144,16 @@ VkResult anv_CreateDevice(
/* keep the page with address zero out of the allocator */
util_vma_heap_init(&device->vma_lo,
LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
device->physical->va.low_heap.addr,
device->physical->va.low_heap.size);
util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
CLIENT_VISIBLE_HEAP_SIZE);
util_vma_heap_init(&device->vma_cva,
device->physical->va.client_visible_heap.addr,
device->physical->va.client_visible_heap.size);
/* Leave the last 4GiB out of the high vma range, so that no state
* base address + size can overflow 48 bits. For more information see
* the comment about Wa32bitGeneralStateOffset in anv_allocator.c
*/
util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
physical_device->gtt_size - (1ull << 32) -
HIGH_HEAP_MIN_ADDRESS);
util_vma_heap_init(&device->vma_hi,
device->physical->va.high_heap.addr,
device->physical->va.high_heap.size);
list_inithead(&device->memory_objects);
@@ -3191,13 +3191,13 @@ VkResult anv_CreateDevice(
*/
result = anv_state_pool_init(&device->general_state_pool, device,
"general pool",
0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
0, device->physical->va.general_state_pool.addr, 16384);
if (result != VK_SUCCESS)
goto fail_batch_bo_pool;
result = anv_state_pool_init(&device->dynamic_state_pool, device,
"dynamic pool",
DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
device->physical->va.dynamic_state_pool.addr, 0, 16384);
if (result != VK_SUCCESS)
goto fail_general_state_pool;
@@ -3214,7 +3214,8 @@ VkResult anv_CreateDevice(
result = anv_state_pool_init(&device->instruction_state_pool, device,
"instruction pool",
INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
device->physical->va.instruction_state_pool.addr,
0, 16384);
if (result != VK_SUCCESS)
goto fail_dynamic_state_pool;
@@ -3224,25 +3225,29 @@ VkResult anv_CreateDevice(
*/
result = anv_state_pool_init(&device->scratch_surface_state_pool, device,
"scratch surface state pool",
SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
device->physical->va.scratch_surface_state_pool.addr,
0, 4096);
if (result != VK_SUCCESS)
goto fail_instruction_state_pool;
result = anv_state_pool_init(&device->internal_surface_state_pool, device,
"internal surface state pool",
INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS,
SCRATCH_SURFACE_STATE_POOL_SIZE, 4096);
device->physical->va.internal_surface_state_pool.addr,
device->physical->va.scratch_surface_state_pool.size,
4096);
} else {
result = anv_state_pool_init(&device->internal_surface_state_pool, device,
"internal surface state pool",
INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
device->physical->va.internal_surface_state_pool.addr,
0, 4096);
}
if (result != VK_SUCCESS)
goto fail_scratch_surface_state_pool;
result = anv_state_pool_init(&device->bindless_surface_state_pool, device,
"bindless surface state pool",
BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
device->physical->va.bindless_surface_state_pool.addr,
0, 4096);
if (result != VK_SUCCESS)
goto fail_internal_surface_state_pool;
@@ -3252,15 +3257,21 @@ VkResult anv_CreateDevice(
*/
result = anv_state_pool_init(&device->binding_table_pool, device,
"binding table pool",
BINDING_TABLE_POOL_MIN_ADDRESS, 0,
device->physical->va.binding_table_pool.addr, 0,
BINDING_TABLE_POOL_BLOCK_SIZE);
} else {
int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
(int64_t)INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS;
/* The binding table should be in front of the surface states in virtual
* address space so that all surface states can be express as relative
* offsets from the binding table location.
*/
assert(device->physical->va.binding_table_pool.addr <
device->physical->va.internal_surface_state_pool.addr);
int64_t bt_pool_offset = (int64_t)device->physical->va.binding_table_pool.addr -
(int64_t)device->physical->va.internal_surface_state_pool.addr;
assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
result = anv_state_pool_init(&device->binding_table_pool, device,
"binding table pool",
INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS,
device->physical->va.internal_surface_state_pool.addr,
bt_pool_offset,
BINDING_TABLE_POOL_BLOCK_SIZE);
}

View File

@@ -115,9 +115,10 @@ anv_shader_bin_create(struct anv_device *device,
memcpy(shader->kernel.map, kernel_data, kernel_size);
shader->kernel_size = kernel_size;
uint64_t shader_data_addr = INSTRUCTION_STATE_POOL_MIN_ADDRESS +
shader->kernel.offset +
prog_data_in->const_data_offset;
uint64_t shader_data_addr =
device->physical->va.instruction_state_pool.addr +
shader->kernel.offset +
prog_data_in->const_data_offset;
int rv_count = 0;
struct brw_shader_reloc_value reloc_values[5];
@@ -125,10 +126,10 @@ anv_shader_bin_create(struct anv_device *device,
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
.value = shader_data_addr,
};
assert(shader_data_addr >> 32 == INSTRUCTION_STATE_POOL_MIN_ADDRESS >> 32);
assert(shader_data_addr >> 32 == device->physical->va.instruction_state_pool.addr >> 32);
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
.value = INSTRUCTION_STATE_POOL_MIN_ADDRESS >> 32
.value = device->physical->va.instruction_state_pool.addr >> 32,
};
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_SHADER_START_OFFSET,
@@ -137,9 +138,10 @@ anv_shader_bin_create(struct anv_device *device,
if (brw_shader_stage_is_bindless(stage)) {
const struct brw_bs_prog_data *bs_prog_data =
brw_bs_prog_data_const(prog_data_in);
uint64_t resume_sbt_addr = INSTRUCTION_STATE_POOL_MIN_ADDRESS +
shader->kernel.offset +
bs_prog_data->resume_sbt_offset;
uint64_t resume_sbt_addr =
device->physical->va.instruction_state_pool.addr +
shader->kernel.offset +
bs_prog_data->resume_sbt_offset;
reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
.id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
.value = resume_sbt_addr,

View File

@@ -132,69 +132,7 @@ struct intel_perf_query_result;
#define NSEC_PER_SEC 1000000000ull
/* anv Virtual Memory Layout
* =========================
*
* When the anv driver is determining the virtual graphics addresses of memory
* objects itself using the softpin mechanism, the following memory ranges
* will be used.
*
* Three special considerations to notice:
*
* (1) the dynamic state pool is located within the same 4 GiB as the low
* heap. This is to work around a VF cache issue described in a comment in
* anv_physical_device_init_heaps.
*
* (2) the binding table pool is located at lower addresses than the BT
* (binding table) surface state pool, within a 4 GiB range which also
* contains the bindless surface state pool. This allows surface state base
* addresses to cover both binding tables (16 bit offsets), the internal
* surface states (32 bit offsets) and the bindless surface states.
*
* (3) the last 4 GiB of the address space is withheld from the high
* heap. Various hardware units will read past the end of an object for
* various reasons. This healthy margin prevents reads from wrapping around
* 48-bit addresses.
*/
#define GENERAL_STATE_POOL_MIN_ADDRESS 0x000000200000ULL /* 2 MiB */
#define GENERAL_STATE_POOL_MAX_ADDRESS 0x00003fffffffULL
#define LOW_HEAP_MIN_ADDRESS 0x000040000000ULL /* 1 GiB */
#define LOW_HEAP_MAX_ADDRESS 0x00007fffffffULL
#define DYNAMIC_STATE_POOL_MIN_ADDRESS 0x0000c0000000ULL /* 3 GiB */
#define DYNAMIC_STATE_POOL_MAX_ADDRESS 0x0000ffffffffULL
#define BINDING_TABLE_POOL_MIN_ADDRESS 0x000100000000ULL /* 4 GiB */
#define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL
#define INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */
#define INTERNAL_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
#define SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB (8MiB overlaps surface state pool) */
#define SCRATCH_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001407fffffULL
#define BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB (64MiB) */
#define BINDLESS_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001c3ffffffULL
#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000200000000ULL /* 8 GiB */
#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x00023fffffffULL
#define CLIENT_VISIBLE_HEAP_MIN_ADDRESS 0x000240000000ULL /* 9 GiB */
#define CLIENT_VISIBLE_HEAP_MAX_ADDRESS 0x000a3fffffffULL
#define HIGH_HEAP_MIN_ADDRESS 0x000a40000000ULL /* 41 GiB */
#define GENERAL_STATE_POOL_SIZE \
(GENERAL_STATE_POOL_MAX_ADDRESS - GENERAL_STATE_POOL_MIN_ADDRESS + 1)
#define LOW_HEAP_SIZE \
(LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
#define DYNAMIC_STATE_POOL_SIZE \
(DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
#define BINDING_TABLE_POOL_SIZE \
(BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
#define BINDING_TABLE_POOL_BLOCK_SIZE (65536)
#define SCRATCH_SURFACE_STATE_POOL_SIZE \
(SCRATCH_SURFACE_STATE_POOL_MAX_ADDRESS - SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS + 1)
#define BINDLESS_SURFACE_STATE_POOL_SIZE \
(BINDLESS_SURFACE_STATE_POOL_MAX_ADDRESS - BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS + 1)
#define INTERNAL_SURFACE_STATE_POOL_SIZE \
(INTERNAL_SURFACE_STATE_POOL_MAX_ADDRESS - INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS + 1)
#define INSTRUCTION_STATE_POOL_SIZE \
(INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
#define CLIENT_VISIBLE_HEAP_SIZE \
(CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1)
/* Allowing different clear colors requires us to perform a depth resolve at
* the end of certain render passes. This is because while slow clears store
@@ -598,6 +536,12 @@ anv_address_map(struct anv_address addr)
return addr.bo->map + addr.offset;
}
/* Represent a virtual address range */
struct anv_va_range {
uint64_t addr;
uint64_t size;
};
/* Represents a lock-free linked list of "free" things. This is used by
* both the block pool and the state pools. Unfortunately, in order to
* solve the ABA problem, we can't use a single uint32_t head.
@@ -985,6 +929,19 @@ struct anv_physical_device {
#endif
} memory;
struct {
struct anv_va_range general_state_pool;
struct anv_va_range low_heap;
struct anv_va_range dynamic_state_pool;
struct anv_va_range binding_table_pool;
struct anv_va_range internal_surface_state_pool;
struct anv_va_range scratch_surface_state_pool;
struct anv_va_range bindless_surface_state_pool;
struct anv_va_range instruction_state_pool;
struct anv_va_range client_visible_heap;
struct anv_va_range high_heap;
} va;
/* Either we have a single vram region and it's all mappable, or we have
* both mappable & non-mappable parts. System memory is always available.
*/
@@ -1276,10 +1233,11 @@ anv_binding_table_pool_free(struct anv_device *device, struct anv_state state)
}
static inline struct anv_state
anv_bindless_state_for_binding_table(struct anv_state state)
anv_bindless_state_for_binding_table(struct anv_device *device,
struct anv_state state)
{
state.offset += BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS -
INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS;
state.offset += device->physical->va.bindless_surface_state_pool.addr -
device->physical->va.internal_surface_state_pool.addr;
return state;
}
@@ -4401,6 +4359,7 @@ struct anv_performance_configuration_intel {
uint64_t config_id;
};
void anv_physical_device_init_va_ranges(struct anv_physical_device *device);
void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);
void anv_device_perf_init(struct anv_device *device);
void anv_perf_write_pass_results(struct intel_perf_config *perf,

138
src/intel/vulkan/anv_va.c Normal file
View File

@@ -0,0 +1,138 @@
/*
* Copyright © 2023 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_private.h"
#include "util/u_math.h"
static uint64_t
va_add(struct anv_va_range *range, uint64_t addr, uint64_t size)
{
range->addr = addr;
range->size = size;
return addr + size;
}
static void
va_at(struct anv_va_range *range, uint64_t addr, uint64_t size)
{
range->addr = addr;
range->size = size;
}
static void
anv_device_print_vas(struct anv_physical_device *device)
{
fprintf(stderr, "Driver heaps:\n");
#define PRINT_HEAP(name) \
fprintf(stderr, " 0x%016"PRIx64"-0x%016"PRIx64": %s\n", \
device->va.name.addr, \
device->va.name.addr + device->va.name.size, \
#name);
PRINT_HEAP(general_state_pool);
PRINT_HEAP(low_heap);
PRINT_HEAP(dynamic_state_pool);
PRINT_HEAP(binding_table_pool);
PRINT_HEAP(internal_surface_state_pool);
PRINT_HEAP(bindless_surface_state_pool);
PRINT_HEAP(instruction_state_pool);
PRINT_HEAP(client_visible_heap);
PRINT_HEAP(high_heap);
}
void
anv_physical_device_init_va_ranges(struct anv_physical_device *device)
{
/* anv Virtual Memory Layout
* =========================
*
* When the anv driver is determining the virtual graphics addresses of
* memory objects itself using the softpin mechanism, the following memory
* ranges will be used.
*
* Three special considerations to notice:
*
* (1) the dynamic state pool is located within the same 4 GiB as the low
* heap. This is to work around a VF cache issue described in a comment in
* anv_physical_device_init_heaps.
*
* (2) the binding table pool is located at lower addresses than the BT
* (binding table) surface state pool, within a 4 GiB range which also
* contains the bindless surface state pool. This allows surface state base
* addresses to cover both binding tables (16 bit offsets), the internal
* surface states (32 bit offsets) and the bindless surface states.
*
* (3) the last 4 GiB of the address space is withheld from the high heap.
* Various hardware units will read past the end of an object for various
* reasons. This healthy margin prevents reads from wrapping around 48-bit
* addresses.
*/
uint64_t _1Mb = 1ull * 1024 * 1024;
uint64_t _1Gb = 1ull * 1024 * 1024 * 1024;
uint64_t _4Gb = 4ull * 1024 * 1024 * 1024;
uint64_t address = 0x000000200000ULL; /* 2MiB */
address = va_add(&device->va.general_state_pool, address,
_1Gb - address);
address = va_add(&device->va.low_heap, address, _1Gb);
address = va_add(&device->va.dynamic_state_pool, address, _1Gb);
/* The following addresses have to be located in a 4Gb range so that the
* binding tables can address internal surface states & bindless surface
* states.
*/
address = va_add(&device->va.binding_table_pool, address, _1Gb);
address = va_add(&device->va.internal_surface_state_pool, address, 2 * _1Gb);
/* Scratch surface state overlaps with the internal surface state */
va_at(&device->va.scratch_surface_state_pool,
device->va.internal_surface_state_pool.addr,
8 * _1Mb);
address = va_add(&device->va.bindless_surface_state_pool, address, _1Gb);
/* We use a trick to compute constant data offsets in the shaders to avoid
* unnecessary 64bit address computations (see lower_load_constant() in
* anv_nir_apply_pipeline_layout.c). This assumes the instruction pool is
* located at an address with the lower 32bits at 0.
*/
address = align64(address, _4Gb);
address = va_add(&device->va.instruction_state_pool, address, _1Gb);
/* Whatever we have left we split in 2 for app allocations client-visible &
* non-client-visible.
*
* Leave the last 4GiB out of the high vma range, so that no state
* base address + size can overflow 48 bits. For more information see
* the comment about Wa32bitGeneralStateOffset in anv_allocator.c
*/
uint64_t user_heaps_size = device->gtt_size - address - 4 * _1Gb;
uint64_t heaps_size_Gb = user_heaps_size / _1Gb / 2 ;
address = va_add(&device->va.client_visible_heap, address, heaps_size_Gb * _1Gb);
address = va_add(&device->va.high_heap, address, heaps_size_Gb * _1Gb);
if (INTEL_DEBUG(DEBUG_HEAPS))
anv_device_print_vas(device);
}

View File

@@ -117,7 +117,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
&cmd_buffer->batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) {
btpa.BindingTablePoolBaseAddress =
anv_cmd_buffer_surface_base_address(cmd_buffer);
btpa.BindingTablePoolBufferSize = BINDING_TABLE_POOL_BLOCK_SIZE / 4096;
btpa.BindingTablePoolBufferSize = device->physical->va.binding_table_pool.size / 4096;
btpa.MOCS = mocs;
}
#else /* GFX_VERx10 < 125 */
@@ -177,8 +177,8 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
sba.GeneralStateBufferSize = 0xfffff;
sba.IndirectObjectBufferSize = 0xfffff;
sba.DynamicStateBufferSize = DYNAMIC_STATE_POOL_SIZE / 4096;
sba.InstructionBufferSize = INSTRUCTION_STATE_POOL_SIZE / 4096;
sba.DynamicStateBufferSize = device->physical->va.dynamic_state_pool.size / 4096;
sba.InstructionBufferSize = device->physical->va.instruction_state_pool.size / 4096;
sba.GeneralStateBufferSizeModifyEnable = true;
sba.IndirectObjectBufferSizeModifyEnable = true;
sba.DynamicStateBufferSizeModifyEnable = true;
@@ -2018,6 +2018,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
if (shader->push_desc_info.fully_promoted_ubo_descriptors & BITFIELD_BIT(desc_idx)) {
surface_state = anv_bindless_state_for_binding_table(
cmd_buffer->device,
cmd_buffer->device->null_surface_state);
break;
}
@@ -2045,11 +2046,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
desc->image_view->planes[binding->plane].general_sampler_surface_state :
desc->image_view->planes[binding->plane].optimal_sampler_surface_state;
surface_state =
anv_bindless_state_for_binding_table(sstate.state);
anv_bindless_state_for_binding_table(cmd_buffer->device, sstate.state);
assert(surface_state.alloc_size);
} else {
surface_state =
anv_bindless_state_for_binding_table(
cmd_buffer->device,
cmd_buffer->device->null_surface_state);
}
break;
@@ -2059,10 +2061,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
if (desc->image_view) {
struct anv_surface_state sstate =
desc->image_view->planes[binding->plane].storage_surface_state;
surface_state = anv_bindless_state_for_binding_table(sstate.state);
surface_state = anv_bindless_state_for_binding_table(
cmd_buffer->device, sstate.state);
assert(surface_state.alloc_size);
} else {
surface_state = anv_bindless_state_for_binding_table(
cmd_buffer->device,
cmd_buffer->device->null_surface_state);
}
break;
@@ -2075,6 +2079,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
assert(surface_state.alloc_size);
} else {
surface_state = anv_bindless_state_for_binding_table(
cmd_buffer->device,
cmd_buffer->device->null_surface_state);
}
break;
@@ -2082,10 +2087,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
if (desc->buffer_view) {
surface_state = anv_bindless_state_for_binding_table(
cmd_buffer->device,
desc->buffer_view->surface_state);
assert(surface_state.alloc_size);
} else {
surface_state = anv_bindless_state_for_binding_table(
cmd_buffer->device,
cmd_buffer->device->null_surface_state);
}
break;
@@ -2126,6 +2133,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
} else {
surface_state =
anv_bindless_state_for_binding_table(
cmd_buffer->device,
cmd_buffer->device->null_surface_state);
}
break;
@@ -2134,10 +2142,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
if (desc->buffer_view) {
surface_state = anv_bindless_state_for_binding_table(
cmd_buffer->device,
desc->buffer_view->storage_surface_state);
assert(surface_state.alloc_size);
} else {
surface_state = anv_bindless_state_for_binding_table(
cmd_buffer->device,
cmd_buffer->device->null_surface_state);
}
break;

View File

@@ -284,6 +284,7 @@ genX(cmd_buffer_emit_generate_draws_pipeline)(struct anv_cmd_buffer *cmd_buffer)
uint32_t *bt_map = cmd_buffer->generation_bt_state.map;
bt_map[0] = anv_bindless_state_for_binding_table(
cmd_buffer->device,
cmd_buffer->device->null_surface_state).offset + bt_offset;
cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
@@ -593,11 +594,13 @@ genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer
* use the same area.
*/
if (start_generation_batch) {
genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, 0,
(struct anv_address) {
.offset = DYNAMIC_STATE_POOL_MIN_ADDRESS,
},
DYNAMIC_STATE_POOL_SIZE);
struct anv_device *device = cmd_buffer->device;
genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(
cmd_buffer, 0,
(struct anv_address) {
.offset = device->physical->va.dynamic_state_pool.addr,
},
device->physical->va.dynamic_state_pool.size);
}
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;

View File

@@ -213,13 +213,17 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch)
sba.StatelessDataPortAccessMOCS = mocs;
sba.SurfaceStateBaseAddress =
(struct anv_address) { .offset = INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS };
(struct anv_address) { .offset =
device->physical->va.internal_surface_state_pool.addr,
};
sba.SurfaceStateMOCS = mocs;
sba.SurfaceStateBaseAddressModifyEnable = true;
sba.DynamicStateBaseAddress =
(struct anv_address) { .offset = DYNAMIC_STATE_POOL_MIN_ADDRESS };
sba.DynamicStateBufferSize = DYNAMIC_STATE_POOL_SIZE / 4096;
(struct anv_address) { .offset =
device->physical->va.dynamic_state_pool.addr,
};
sba.DynamicStateBufferSize = device->physical->va.dynamic_state_pool.size / 4096;
sba.DynamicStateMOCS = mocs;
sba.DynamicStateBaseAddressModifyEnable = true;
sba.DynamicStateBufferSizeModifyEnable = true;
@@ -231,14 +235,18 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch)
sba.IndirectObjectBufferSizeModifyEnable = true;
sba.InstructionBaseAddress =
(struct anv_address) { .offset = INSTRUCTION_STATE_POOL_MIN_ADDRESS };
sba.InstructionBufferSize = INSTRUCTION_STATE_POOL_SIZE / 4096;
(struct anv_address) { .offset =
device->physical->va.instruction_state_pool.addr,
};
sba.InstructionBufferSize = device->physical->va.instruction_state_pool.size / 4096;
sba.InstructionMOCS = mocs;
sba.InstructionBaseAddressModifyEnable = true;
sba.InstructionBuffersizeModifyEnable = true;
sba.BindlessSurfaceStateBaseAddress =
(struct anv_address) { .offset = BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS };
(struct anv_address) { .offset =
device->physical->va.bindless_surface_state_pool.addr,
};
sba.BindlessSurfaceStateSize = (1 << 20) - 1;
sba.BindlessSurfaceStateMOCS = mocs;
sba.BindlessSurfaceStateBaseAddressModifyEnable = true;

View File

@@ -175,6 +175,7 @@ libanv_files = files(
'anv_queue.c',
'anv_util.c',
'anv_utrace.c',
'anv_va.c',
'anv_video.c',
'anv_wsi.c',
)