anv: toggle extended bindless surface state on Gfx12.5+
We bump the max surfaces to ~16 million instead of ~1 million on Gfx9-12. We could do more but that'll come later. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>
This commit is contained in:

committed by
Marge Bot

parent
7fa0fceaba
commit
257bf9b6c3
@@ -36,11 +36,6 @@
|
||||
* Descriptor set layouts.
|
||||
*/
|
||||
|
||||
/* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64
|
||||
* and we can't put anything else there we use 64b.
|
||||
*/
|
||||
#define ANV_SURFACE_STATE_SIZE (64)
|
||||
|
||||
static enum anv_descriptor_data
|
||||
anv_descriptor_data_for_type(const struct anv_physical_device *device,
|
||||
VkDescriptorType type)
|
||||
@@ -1418,15 +1413,21 @@ VkResult anv_FreeDescriptorSets(
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
anv_surface_state_to_handle(struct anv_state state)
|
||||
anv_surface_state_to_handle(struct anv_physical_device *device,
|
||||
struct anv_state state)
|
||||
{
|
||||
/* Bits 31:12 of the bindless surface offset in the extended message
|
||||
* descriptor is bits 25:6 of the byte-based address.
|
||||
*/
|
||||
assert(state.offset >= 0);
|
||||
uint32_t offset = state.offset;
|
||||
assert((offset & 0x3f) == 0 && offset < (1 << 26));
|
||||
return offset << 6;
|
||||
if (device->uses_ex_bso) {
|
||||
assert((offset & 0x3f) == 0);
|
||||
return offset;
|
||||
} else {
|
||||
assert((offset & 0x3f) == 0 && offset < (1 << 26));
|
||||
return offset << 6;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1505,7 +1506,8 @@ anv_descriptor_set_write_image_view(struct anv_device *device,
|
||||
(desc->layout == VK_IMAGE_LAYOUT_GENERAL) ?
|
||||
image_view->planes[p].general_sampler_surface_state :
|
||||
image_view->planes[p].optimal_sampler_surface_state;
|
||||
desc_data[p].image = anv_surface_state_to_handle(sstate.state);
|
||||
desc_data[p].image =
|
||||
anv_surface_state_to_handle(device->physical, sstate.state);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1529,7 +1531,8 @@ anv_descriptor_set_write_image_view(struct anv_device *device,
|
||||
assert(image_view->n_planes == 1);
|
||||
struct anv_storage_image_descriptor desc_data = {
|
||||
.vanilla = anv_surface_state_to_handle(
|
||||
image_view->planes[0].storage_surface_state.state),
|
||||
device->physical,
|
||||
image_view->planes[0].storage_surface_state.state),
|
||||
};
|
||||
memcpy(desc_map, &desc_data, sizeof(desc_data));
|
||||
}
|
||||
@@ -1571,7 +1574,9 @@ anv_descriptor_set_write_buffer_view(struct anv_device *device,
|
||||
|
||||
if (data & ANV_DESCRIPTOR_SAMPLED_IMAGE) {
|
||||
struct anv_sampled_image_descriptor desc_data = {
|
||||
.image = anv_surface_state_to_handle(buffer_view->surface_state),
|
||||
.image = anv_surface_state_to_handle(
|
||||
device->physical,
|
||||
buffer_view->surface_state),
|
||||
};
|
||||
memcpy(desc_map, &desc_data, sizeof(desc_data));
|
||||
}
|
||||
@@ -1579,7 +1584,8 @@ anv_descriptor_set_write_buffer_view(struct anv_device *device,
|
||||
if (data & ANV_DESCRIPTOR_STORAGE_IMAGE) {
|
||||
struct anv_storage_image_descriptor desc_data = {
|
||||
.vanilla = anv_surface_state_to_handle(
|
||||
buffer_view->storage_surface_state),
|
||||
device->physical,
|
||||
buffer_view->storage_surface_state),
|
||||
};
|
||||
memcpy(desc_map, &desc_data, sizeof(desc_data));
|
||||
}
|
||||
|
@@ -1323,6 +1323,8 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
|
||||
|
||||
device->video_decode_enabled = debug_get_bool_option("ANV_VIDEO_DECODE", false);
|
||||
|
||||
device->uses_ex_bso = device->info.verx10 >= 125;
|
||||
|
||||
/* Check if we can read the GPU timestamp register from the CPU */
|
||||
uint64_t u64_ignore;
|
||||
device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
|
||||
@@ -1342,6 +1344,7 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
|
||||
device->compiler->constant_buffer_0_is_relative = false;
|
||||
device->compiler->supports_shader_constants = true;
|
||||
device->compiler->indirect_ubos_use_sampler = device->info.ver < 12;
|
||||
device->compiler->extended_bindless_surface_offset = device->uses_ex_bso;
|
||||
|
||||
isl_device_init(&device->isl_dev, &device->info);
|
||||
|
||||
@@ -1867,7 +1870,8 @@ anv_get_physical_device_properties_1_2(struct anv_physical_device *pdevice,
|
||||
* twice a bunch of times (or a bunch of null descriptors), we can safely
|
||||
* advertise a larger limit here.
|
||||
*/
|
||||
const unsigned max_bindless_views = 1 << 20;
|
||||
const unsigned max_bindless_views =
|
||||
anv_physical_device_bindless_heap_size(pdevice) / ANV_SURFACE_STATE_SIZE;
|
||||
p->maxUpdateAfterBindDescriptorsInAllPools = max_bindless_views;
|
||||
p->shaderUniformBufferArrayNonUniformIndexingNative = false;
|
||||
p->shaderSampledImageArrayNonUniformIndexingNative = false;
|
||||
|
@@ -181,6 +181,11 @@ struct intel_perf_query_result;
|
||||
#define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16
|
||||
#define MAX_SAMPLE_LOCATIONS 16
|
||||
|
||||
/* RENDER_SURFACE_STATE is a bit smaller (48b) but since it is aligned to 64
|
||||
* and we can't put anything else there we use 64b.
|
||||
*/
|
||||
#define ANV_SURFACE_STATE_SIZE (64)
|
||||
|
||||
/* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
|
||||
*
|
||||
* "The surface state model is used when a Binding Table Index (specified
|
||||
@@ -893,6 +898,9 @@ struct anv_physical_device {
|
||||
/** True if we can create protected contexts. */
|
||||
bool has_protected_contexts;
|
||||
|
||||
/**/
|
||||
bool uses_ex_bso;
|
||||
|
||||
bool always_flush_cache;
|
||||
|
||||
/**
|
||||
@@ -967,6 +975,14 @@ struct anv_physical_device {
|
||||
struct intel_measure_device measure_device;
|
||||
};
|
||||
|
||||
static inline uint32_t
|
||||
anv_physical_device_bindless_heap_size(const struct anv_physical_device *device)
|
||||
{
|
||||
return device->uses_ex_bso ?
|
||||
128 * 1024 * 1024 /* 128 MiB */ :
|
||||
64 * 1024 * 1024 /* 64 MiB */;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
anv_physical_device_has_vram(const struct anv_physical_device *device)
|
||||
{
|
||||
|
@@ -184,8 +184,11 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
||||
sba.DynamicStateBufferSizeModifyEnable = true;
|
||||
sba.InstructionBuffersizeModifyEnable = true;
|
||||
sba.BindlessSurfaceStateBaseAddress =
|
||||
(struct anv_address) { device->bindless_surface_state_pool.block_pool.bo, 0 };
|
||||
sba.BindlessSurfaceStateSize = (1 << 20) - 1;
|
||||
(struct anv_address) { .offset =
|
||||
device->physical->va.bindless_surface_state_pool.addr,
|
||||
};
|
||||
sba.BindlessSurfaceStateSize =
|
||||
anv_physical_device_bindless_heap_size(device->physical) / ANV_SURFACE_STATE_SIZE - 1;
|
||||
sba.BindlessSurfaceStateMOCS = mocs;
|
||||
sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
|
||||
#if GFX_VER >= 11
|
||||
|
@@ -263,9 +263,9 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch)
|
||||
|
||||
sba.BindlessSurfaceStateBaseAddress =
|
||||
(struct anv_address) { .offset =
|
||||
device->physical->va.bindless_surface_state_pool.addr,
|
||||
};
|
||||
sba.BindlessSurfaceStateSize = (1 << 20) - 1;
|
||||
device->physical->va.bindless_surface_state_pool.addr, };
|
||||
sba.BindlessSurfaceStateSize =
|
||||
anv_physical_device_bindless_heap_size(device->physical) / ANV_SURFACE_STATE_SIZE - 1;
|
||||
sba.BindlessSurfaceStateMOCS = mocs;
|
||||
sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
|
||||
|
||||
|
Reference in New Issue
Block a user