intel/fs: put scratch surface in the surface state heap
In4ceaed7839
we made scratch surface state allocations part of the internal heap (mapped to STATE_BASE_ADDRESS::SurfaceStateBaseAddress) so that it doesn't uses slots in the application's expected 1M descriptors (especially with vkd3d-proton). But all our compiler code relies on BSS (STATE_BASE_ADDRESS::BindlessSurfaceStateBaseAddress). The additional issue is that there is only 26bits of surface offset available in CS instruction (CFE_STATE, 3DSTATE_VS, etc...) for scratch surfaces. So we need the drivers to put the scratch surfaces in the first chunk of STATE_BASE_ADDRESS::SurfaceStateBaseAddress (hence all the driver changes). Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Fixes:4ceaed7839
("anv: split internal surface states from descriptors") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7687 Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19727>
This commit is contained in:

committed by
Marge Bot

parent
daab161535
commit
9c1c1888d9
@@ -709,6 +709,9 @@ init_state_base_address(struct iris_batch *batch)
|
||||
sba.IndirectObjectMOCS = mocs;
|
||||
sba.InstructionMOCS = mocs;
|
||||
sba.SurfaceStateMOCS = mocs;
|
||||
#if GFX_VER >= 9
|
||||
sba.BindlessSurfaceStateMOCS = mocs;
|
||||
#endif
|
||||
|
||||
sba.GeneralStateBaseAddressModifyEnable = true;
|
||||
sba.DynamicStateBaseAddressModifyEnable = true;
|
||||
@@ -717,12 +720,6 @@ init_state_base_address(struct iris_batch *batch)
|
||||
sba.GeneralStateBufferSizeModifyEnable = true;
|
||||
sba.DynamicStateBufferSizeModifyEnable = true;
|
||||
sba.SurfaceStateBaseAddressModifyEnable = true;
|
||||
#if GFX_VER >= 9
|
||||
sba.BindlessSurfaceStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SCRATCH_START);
|
||||
sba.BindlessSurfaceStateSize = (IRIS_SCRATCH_ZONE_SIZE >> 12) - 1;
|
||||
sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
|
||||
sba.BindlessSurfaceStateMOCS = mocs;
|
||||
#endif
|
||||
#if GFX_VER >= 11
|
||||
sba.BindlessSamplerStateMOCS = mocs;
|
||||
#endif
|
||||
|
@@ -1567,6 +1567,11 @@ enum brw_message_target {
|
||||
#define GFX8_BTI_STATELESS_NON_COHERENT 253
|
||||
#define GFX9_BTI_BINDLESS 252
|
||||
|
||||
/* This ID doesn't map anything HW related value. It exists to inform the
|
||||
* lowering code to not use the bindless heap.
|
||||
*/
|
||||
#define GFX125_NON_BINDLESS (1u << 16)
|
||||
|
||||
/* Dataport atomic operations for Untyped Atomic Integer Operation message
|
||||
* (and others).
|
||||
*/
|
||||
|
@@ -5090,6 +5090,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
fs_reg handle = component(ubld.vgrf(BRW_REGISTER_TYPE_UD), 0);
|
||||
ubld.AND(handle, retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
|
||||
brw_imm_ud(~0x3ffu));
|
||||
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GFX125_NON_BINDLESS);
|
||||
srcs[SURFACE_LOGICAL_SRC_SURFACE_HANDLE] = handle;
|
||||
} else if (devinfo->ver >= 8) {
|
||||
srcs[SURFACE_LOGICAL_SRC_SURFACE] =
|
||||
@@ -5156,6 +5157,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
fs_reg handle = component(ubld.vgrf(BRW_REGISTER_TYPE_UD), 0);
|
||||
ubld.AND(handle, retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
|
||||
brw_imm_ud(~0x3ffu));
|
||||
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GFX125_NON_BINDLESS);
|
||||
srcs[SURFACE_LOGICAL_SRC_SURFACE_HANDLE] = handle;
|
||||
} else if (devinfo->ver >= 8) {
|
||||
srcs[SURFACE_LOGICAL_SRC_SURFACE] =
|
||||
|
@@ -843,7 +843,7 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld,
|
||||
unspill_inst->sfid = GFX12_SFID_UGM;
|
||||
unspill_inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD,
|
||||
unspill_inst->exec_size,
|
||||
LSC_ADDR_SURFTYPE_BSS,
|
||||
LSC_ADDR_SURFTYPE_SS,
|
||||
LSC_ADDR_SIZE_A32,
|
||||
1 /* num_coordinates */,
|
||||
LSC_DATA_SIZE_D32,
|
||||
@@ -940,7 +940,7 @@ fs_reg_alloc::emit_spill(const fs_builder &bld,
|
||||
spill_inst->sfid = GFX12_SFID_UGM;
|
||||
spill_inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE,
|
||||
bld.dispatch_width(),
|
||||
LSC_ADDR_SURFTYPE_BSS,
|
||||
LSC_ADDR_SURFTYPE_SS,
|
||||
LSC_ADDR_SIZE_A32,
|
||||
1 /* num_coordinates */,
|
||||
LSC_DATA_SIZE_D32,
|
||||
|
@@ -1718,13 +1718,20 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
else
|
||||
inst->sfid = GFX12_SFID_UGM;
|
||||
|
||||
/* We must have exactly one of surface and surface_handle */
|
||||
assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
|
||||
/* We should have exactly one of surface and surface_handle. For scratch
|
||||
* messages generated by brw_fs_nir.cpp we also allow a special value to
|
||||
* know what heap base we should use in STATE_BASE_ADDRESS (SS = Surface
|
||||
* State Offset, or BSS = Bindless Surface State Offset).
|
||||
*/
|
||||
bool non_bindless = surface.file == IMM && surface.ud == GFX125_NON_BINDLESS;
|
||||
assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE) ||
|
||||
(non_bindless && surface_handle.file != BAD_FILE));
|
||||
|
||||
enum lsc_addr_surface_type surf_type;
|
||||
if (surface_handle.file != BAD_FILE)
|
||||
surf_type = LSC_ADDR_SURFTYPE_BSS;
|
||||
else if (surface.file == IMM && surface.ud == GFX7_BTI_SLM)
|
||||
if (surface_handle.file != BAD_FILE) {
|
||||
assert(surface.file == IMM && (surface.ud == 0 || surface.ud == GFX125_NON_BINDLESS));
|
||||
surf_type = non_bindless ? LSC_ADDR_SURFTYPE_SS : LSC_ADDR_SURFTYPE_BSS;
|
||||
} else if (surface.file == IMM && surface.ud == GFX7_BTI_SLM)
|
||||
surf_type = LSC_ADDR_SURFTYPE_FLAT;
|
||||
else
|
||||
surf_type = LSC_ADDR_SURFTYPE_BTI;
|
||||
@@ -1801,6 +1808,7 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
case LSC_ADDR_SURFTYPE_FLAT:
|
||||
inst->src[1] = brw_imm_ud(0);
|
||||
break;
|
||||
case LSC_ADDR_SURFTYPE_SS:
|
||||
case LSC_ADDR_SURFTYPE_BSS:
|
||||
/* We assume that the driver provided the handle in the top 20 bits so
|
||||
* we can use the surface handle directly as the extended descriptor.
|
||||
|
@@ -1197,7 +1197,7 @@ anv_scratch_pool_finish(struct anv_device *device, struct anv_scratch_pool *pool
|
||||
|
||||
for (unsigned i = 0; i < 16; i++) {
|
||||
if (pool->surf_states[i].map != NULL) {
|
||||
anv_state_pool_free(&device->internal_surface_state_pool,
|
||||
anv_state_pool_free(&device->scratch_surface_state_pool,
|
||||
pool->surf_states[i]);
|
||||
}
|
||||
}
|
||||
@@ -1274,6 +1274,8 @@ anv_scratch_pool_get_surf(struct anv_device *device,
|
||||
struct anv_scratch_pool *pool,
|
||||
unsigned per_thread_scratch)
|
||||
{
|
||||
assert(device->info->verx10 >= 125);
|
||||
|
||||
if (per_thread_scratch == 0)
|
||||
return 0;
|
||||
|
||||
@@ -1290,7 +1292,7 @@ anv_scratch_pool_get_surf(struct anv_device *device,
|
||||
struct anv_address addr = { .bo = bo };
|
||||
|
||||
struct anv_state state =
|
||||
anv_state_pool_alloc(&device->internal_surface_state_pool,
|
||||
anv_state_pool_alloc(&device->scratch_surface_state_pool,
|
||||
device->isl_dev.ss.size, 64);
|
||||
|
||||
isl_buffer_fill_state(&device->isl_dev, state.map,
|
||||
@@ -1305,7 +1307,7 @@ anv_scratch_pool_get_surf(struct anv_device *device,
|
||||
uint32_t current = p_atomic_cmpxchg(&pool->surfs[scratch_size_log2],
|
||||
0, state.offset);
|
||||
if (current) {
|
||||
anv_state_pool_free(&device->internal_surface_state_pool, state);
|
||||
anv_state_pool_free(&device->scratch_surface_state_pool, state);
|
||||
return current;
|
||||
} else {
|
||||
pool->surf_states[scratch_size_log2] = state;
|
||||
|
@@ -1443,7 +1443,7 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
|
||||
}
|
||||
|
||||
/* Add all the global BOs to the object list for softpin case. */
|
||||
result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool);
|
||||
result = pin_state_pool(device, execbuf, &device->scratch_surface_state_pool);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
@@ -1451,6 +1451,10 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = pin_state_pool(device, execbuf, &device->dynamic_state_pool);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
@@ -3063,10 +3063,12 @@ decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
|
||||
return ret_bo;
|
||||
if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
|
||||
return ret_bo;
|
||||
if (get_bo_from_pool(&ret_bo, &device->internal_surface_state_pool.block_pool, address))
|
||||
if (get_bo_from_pool(&ret_bo, &device->scratch_surface_state_pool.block_pool, address))
|
||||
return ret_bo;
|
||||
if (get_bo_from_pool(&ret_bo, &device->bindless_surface_state_pool.block_pool, address))
|
||||
return ret_bo;
|
||||
if (get_bo_from_pool(&ret_bo, &device->internal_surface_state_pool.block_pool, address))
|
||||
return ret_bo;
|
||||
|
||||
if (!device->cmd_buffer_being_decoded)
|
||||
return (struct intel_batch_decode_bo) { };
|
||||
@@ -3437,11 +3439,27 @@ VkResult anv_CreateDevice(
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_dynamic_state_pool;
|
||||
|
||||
result = anv_state_pool_init(&device->internal_surface_state_pool, device,
|
||||
"internal surface state pool",
|
||||
INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
|
||||
if (device->info->verx10 >= 125) {
|
||||
/* Put the scratch surface states at the beginning of the internal
|
||||
* surface state pool.
|
||||
*/
|
||||
result = anv_state_pool_init(&device->scratch_surface_state_pool, device,
|
||||
"scratch surface state pool",
|
||||
SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_instruction_state_pool;
|
||||
|
||||
result = anv_state_pool_init(&device->internal_surface_state_pool, device,
|
||||
"internal surface state pool",
|
||||
INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS,
|
||||
SCRATCH_SURFACE_STATE_POOL_SIZE, 4096);
|
||||
} else {
|
||||
result = anv_state_pool_init(&device->internal_surface_state_pool, device,
|
||||
"internal surface state pool",
|
||||
INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
|
||||
}
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_instruction_state_pool;
|
||||
goto fail_scratch_surface_state_pool;
|
||||
|
||||
result = anv_state_pool_init(&device->bindless_surface_state_pool, device,
|
||||
"bindless surface state pool",
|
||||
@@ -3549,7 +3567,9 @@ VkResult anv_CreateDevice(
|
||||
* to zero and they have a valid descriptor.
|
||||
*/
|
||||
device->null_surface_state =
|
||||
anv_state_pool_alloc(&device->internal_surface_state_pool,
|
||||
anv_state_pool_alloc(device->info->verx10 >= 125 ?
|
||||
&device->scratch_surface_state_pool :
|
||||
&device->internal_surface_state_pool,
|
||||
device->isl_dev.ss.size,
|
||||
device->isl_dev.ss.align);
|
||||
isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
|
||||
@@ -3650,6 +3670,9 @@ VkResult anv_CreateDevice(
|
||||
anv_state_pool_finish(&device->bindless_surface_state_pool);
|
||||
fail_internal_surface_state_pool:
|
||||
anv_state_pool_finish(&device->internal_surface_state_pool);
|
||||
fail_scratch_surface_state_pool:
|
||||
if (device->info->verx10 >= 125)
|
||||
anv_state_pool_finish(&device->scratch_surface_state_pool);
|
||||
fail_instruction_state_pool:
|
||||
anv_state_pool_finish(&device->instruction_state_pool);
|
||||
fail_dynamic_state_pool:
|
||||
@@ -3738,6 +3761,8 @@ void anv_DestroyDevice(
|
||||
}
|
||||
|
||||
anv_state_pool_finish(&device->binding_table_pool);
|
||||
if (device->info->verx10 >= 125)
|
||||
anv_state_pool_finish(&device->scratch_surface_state_pool);
|
||||
anv_state_pool_finish(&device->internal_surface_state_pool);
|
||||
anv_state_pool_finish(&device->bindless_surface_state_pool);
|
||||
anv_state_pool_finish(&device->instruction_state_pool);
|
||||
|
@@ -2679,6 +2679,8 @@ anv_CreateImageView(VkDevice _device,
|
||||
.h = image->vk.extent.height,
|
||||
.d = image->vk.extent.depth,
|
||||
});
|
||||
|
||||
iview->planes[vplane].lowered_surface_state_is_null = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -160,8 +160,10 @@ struct intel_perf_query_result;
|
||||
#define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL
|
||||
#define INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */
|
||||
#define INTERNAL_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
|
||||
#define BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */
|
||||
#define BINDLESS_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
|
||||
#define SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB (8MiB overlaps surface state pool) */
|
||||
#define SCRATCH_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001407fffffULL
|
||||
#define BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB (64MiB) */
|
||||
#define BINDLESS_SURFACE_STATE_POOL_MAX_ADDRESS 0x0001c3ffffffULL
|
||||
#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000200000000ULL /* 8 GiB */
|
||||
#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x00023fffffffULL
|
||||
#define CLIENT_VISIBLE_HEAP_MIN_ADDRESS 0x000240000000ULL /* 9 GiB */
|
||||
@@ -177,10 +179,12 @@ struct intel_perf_query_result;
|
||||
#define BINDING_TABLE_POOL_SIZE \
|
||||
(BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
|
||||
#define BINDING_TABLE_POOL_BLOCK_SIZE (65536)
|
||||
#define INTERNAL_SURFACE_STATE_POOL_SIZE \
|
||||
(INTERNAL_SURFACE_STATE_POOL_MAX_ADDRESS - INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS + 1)
|
||||
#define SCRATCH_SURFACE_STATE_POOL_SIZE \
|
||||
(SCRATCH_SURFACE_STATE_POOL_MAX_ADDRESS - SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS + 1)
|
||||
#define BINDLESS_SURFACE_STATE_POOL_SIZE \
|
||||
(BINDLESS_SURFACE_STATE_POOL_MAX_ADDRESS - BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS + 1)
|
||||
#define INTERNAL_SURFACE_STATE_POOL_SIZE \
|
||||
(INTERNAL_SURFACE_STATE_POOL_MAX_ADDRESS - INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS + 1)
|
||||
#define INSTRUCTION_STATE_POOL_SIZE \
|
||||
(INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
|
||||
#define CLIENT_VISIBLE_HEAP_SIZE \
|
||||
@@ -1158,6 +1162,7 @@ struct anv_device {
|
||||
struct anv_state_pool dynamic_state_pool;
|
||||
struct anv_state_pool instruction_state_pool;
|
||||
struct anv_state_pool binding_table_pool;
|
||||
struct anv_state_pool scratch_surface_state_pool;
|
||||
struct anv_state_pool internal_surface_state_pool;
|
||||
struct anv_state_pool bindless_surface_state_pool;
|
||||
|
||||
@@ -3831,6 +3836,8 @@ struct anv_image_view {
|
||||
*/
|
||||
struct anv_surface_state storage_surface_state;
|
||||
struct anv_surface_state lowered_storage_surface_state;
|
||||
|
||||
bool lowered_surface_state_is_null;
|
||||
} planes[3];
|
||||
};
|
||||
|
||||
|
@@ -181,7 +181,11 @@ static uint32_t
|
||||
blorp_binding_table_offset_to_pointer(struct blorp_batch *batch,
|
||||
uint32_t offset)
|
||||
{
|
||||
#if GFX_VERX10 >= 125
|
||||
return SCRATCH_SURFACE_STATE_POOL_SIZE + offset;
|
||||
#else
|
||||
return offset;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void *
|
||||
|
@@ -2421,7 +2421,12 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
continue;
|
||||
}
|
||||
const struct anv_descriptor *desc = &set->descriptors[binding->index];
|
||||
|
||||
/* Relative offset in the STATE_BASE_ADDRESS::SurfaceStateBaseAddress
|
||||
* heap. Depending on where the descriptor surface state is
|
||||
* allocated, they can either come from
|
||||
* device->internal_surface_state_pool or
|
||||
* device->bindless_surface_state_pool.
|
||||
*/
|
||||
switch (desc->type) {
|
||||
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
||||
@@ -2451,10 +2456,11 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
binding->lowered_storage_surface
|
||||
? desc->image_view->planes[binding->plane].lowered_storage_surface_state
|
||||
: desc->image_view->planes[binding->plane].storage_surface_state;
|
||||
surface_state =
|
||||
anv_bindless_state_for_binding_table(sstate.state);
|
||||
const bool lowered_surface_state_is_null =
|
||||
desc->image_view->planes[binding->plane].lowered_surface_state_is_null;
|
||||
surface_state = anv_bindless_state_for_binding_table(sstate.state);
|
||||
assert(surface_state.alloc_size);
|
||||
if (surface_state.offset == 0) {
|
||||
if (binding->lowered_storage_surface && lowered_surface_state_is_null) {
|
||||
mesa_loge("Bound a image to a descriptor where the "
|
||||
"descriptor does not have NonReadable "
|
||||
"set and the image does not have a "
|
||||
@@ -2752,7 +2758,12 @@ cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer,
|
||||
anv_batch_emit(&cmd_buffer->batch,
|
||||
GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), btp) {
|
||||
btp._3DCommandSubOpcode = binding_table_opcodes[s];
|
||||
#if GFX_VERX10 >= 125
|
||||
btp.PointertoVSBindingTable = SCRATCH_SURFACE_STATE_POOL_SIZE +
|
||||
cmd_buffer->state.binding_tables[s].offset;
|
||||
#else
|
||||
btp.PointertoVSBindingTable = cmd_buffer->state.binding_tables[s].offset;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user