anv: reduce BT emissions & surface state writes with push descriptors
Zink on Anv running Gfxbench gl_driver2 is significantly slower than Iris. The reason is simple, whereas Iris implements uniform updates using push constants and only has to emit 3DSTATE_CONSTANT_* packets, Zink uses push descriptors with a uniform buffer, which on our implementation use both push constants & binding tables. Anv ends up doing the following for each uniform update : - allocate 2 surface states : - one for the uniform buffer as the offset specify by zink - one for the descriptor set buffer - pack the 2 RENDER_SURFACE_STATE - re-emit binding tables - re-emit push constants Of all of those operations, only the last one ends up being useful in this benchmark because all the uniforms have been promoted to push constants. This change defers the 3 first operations at draw time and executes them only if the pipeline needs them. Vkoverhead before / after : descriptor_template_1ubo_push: 40670 / 85786 descriptor_template_12ubo_push: 4050 / 13820 descriptor_template_1combined_sampler_push, 34410 / 34043 descriptor_template_16combined_sampler_push, 2746 / 2711 descriptor_template_1sampled_image_push, 34765 / 34089 descriptor_template_16sampled_image_push, 2794 / 2649 descriptor_template_1texelbuffer_push, 108537 / 111342 descriptor_template_16texelbuffer_push, 20619 / 20166 descriptor_template_1ssbo_push, 41506 / 85976 descriptor_template_8ssbo_push, 6036 / 18703 descriptor_template_1image_push, 88932 / 89610 descriptor_template_16image_push, 20937 / 20959 descriptor_template_1imagebuffer_push, 108407 / 113240 descriptor_template_16imagebuffer_push, 32661 / 34651 Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Emma Anholt <emma@anholt.net> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19050>
This commit is contained in:

committed by
Marge Bot

parent
ff91c5ca42
commit
b49b18f0b7
@@ -590,7 +590,10 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
|
||||
}
|
||||
}
|
||||
|
||||
cmd_buffer->state.descriptors_dirty |= dirty_stages;
|
||||
if (set->is_push)
|
||||
cmd_buffer->state.push_descriptors_dirty |= dirty_stages;
|
||||
else
|
||||
cmd_buffer->state.descriptors_dirty |= dirty_stages;
|
||||
cmd_buffer->state.push_constants_dirty |= dirty_stages;
|
||||
}
|
||||
|
||||
@@ -895,6 +898,7 @@ anv_cmd_buffer_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
|
||||
anv_descriptor_set_layout_ref(layout);
|
||||
set->layout = layout;
|
||||
}
|
||||
set->is_push = true;
|
||||
set->size = anv_descriptor_set_layout_size(layout, 0);
|
||||
set->buffer_view_count = layout->buffer_view_count;
|
||||
set->descriptor_count = layout->descriptor_count;
|
||||
@@ -921,21 +925,6 @@ anv_cmd_buffer_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
|
||||
.bo = cmd_buffer->dynamic_state_stream.state_pool->block_pool.bo,
|
||||
.offset = set->desc_mem.offset,
|
||||
};
|
||||
|
||||
enum isl_format format =
|
||||
anv_isl_format_for_descriptor_type(cmd_buffer->device,
|
||||
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
|
||||
|
||||
const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
|
||||
set->desc_surface_state =
|
||||
anv_state_stream_alloc(&cmd_buffer->surface_state_stream,
|
||||
isl_dev->ss.size, isl_dev->ss.align);
|
||||
anv_fill_buffer_surface_state(cmd_buffer->device,
|
||||
set->desc_surface_state,
|
||||
format, ISL_SWIZZLE_IDENTITY,
|
||||
ISL_SURF_USAGE_CONSTANT_BUFFER_BIT,
|
||||
set->desc_addr,
|
||||
layout->descriptor_buffer_size, 1);
|
||||
}
|
||||
|
||||
return set;
|
||||
@@ -1003,7 +992,6 @@ void anv_CmdPushDescriptorSetKHR(
|
||||
ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer);
|
||||
|
||||
anv_descriptor_set_write_buffer(cmd_buffer->device, set,
|
||||
&cmd_buffer->surface_state_stream,
|
||||
write->descriptorType,
|
||||
buffer,
|
||||
write->dstBinding,
|
||||
@@ -1061,7 +1049,6 @@ void anv_CmdPushDescriptorSetWithTemplateKHR(
|
||||
return;
|
||||
|
||||
anv_descriptor_set_write_template(cmd_buffer->device, set,
|
||||
&cmd_buffer->surface_state_stream,
|
||||
template,
|
||||
pData);
|
||||
|
||||
|
@@ -1110,6 +1110,7 @@ anv_descriptor_set_create(struct anv_device *device,
|
||||
anv_descriptor_set_layout_descriptor_buffer_size(layout, var_desc_count);
|
||||
|
||||
set->desc_surface_state = ANV_STATE_NULL;
|
||||
set->is_push = false;
|
||||
|
||||
if (descriptor_buffer_size) {
|
||||
uint64_t pool_vma_offset =
|
||||
@@ -1480,10 +1481,33 @@ anv_descriptor_set_write_buffer_view(struct anv_device *device,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
anv_descriptor_write_surface_state(struct anv_device *device,
|
||||
struct anv_descriptor *desc,
|
||||
struct anv_state surface_state)
|
||||
{
|
||||
struct anv_buffer_view *bview = desc->buffer_view;
|
||||
|
||||
bview->surface_state = surface_state;
|
||||
|
||||
assert(bview->surface_state.alloc_size);
|
||||
|
||||
isl_surf_usage_flags_t usage =
|
||||
(desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
|
||||
desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) ?
|
||||
ISL_SURF_USAGE_CONSTANT_BUFFER_BIT :
|
||||
ISL_SURF_USAGE_STORAGE_BIT;
|
||||
|
||||
enum isl_format format =
|
||||
anv_isl_format_for_descriptor_type(device, desc->type);
|
||||
anv_fill_buffer_surface_state(device, bview->surface_state,
|
||||
format, ISL_SWIZZLE_IDENTITY,
|
||||
usage, bview->address, bview->range, 1);
|
||||
}
|
||||
|
||||
void
|
||||
anv_descriptor_set_write_buffer(struct anv_device *device,
|
||||
struct anv_descriptor_set *set,
|
||||
struct anv_state_stream *alloc_stream,
|
||||
VkDescriptorType type,
|
||||
struct anv_buffer *buffer,
|
||||
uint32_t binding,
|
||||
@@ -1491,12 +1515,10 @@ anv_descriptor_set_write_buffer(struct anv_device *device,
|
||||
VkDeviceSize offset,
|
||||
VkDeviceSize range)
|
||||
{
|
||||
assert(alloc_stream || set->pool);
|
||||
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout =
|
||||
&set->layout->binding[binding];
|
||||
struct anv_descriptor *desc =
|
||||
&set->descriptors[bind_layout->descriptor_index + element];
|
||||
const uint32_t descriptor_index = bind_layout->descriptor_index + element;
|
||||
struct anv_descriptor *desc = &set->descriptors[descriptor_index];
|
||||
|
||||
assert(type == bind_layout->type ||
|
||||
bind_layout->type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT);
|
||||
@@ -1549,30 +1571,15 @@ anv_descriptor_set_write_buffer(struct anv_device *device,
|
||||
struct anv_buffer_view *bview =
|
||||
&set->buffer_views[bind_layout->buffer_view_index + element];
|
||||
|
||||
desc->set_buffer_view = bview;
|
||||
|
||||
bview->range = bind_range;
|
||||
bview->address = bind_addr;
|
||||
|
||||
/* If we're writing descriptors through a push command, we need to
|
||||
* allocate the surface state from the command buffer. Otherwise it will
|
||||
* be allocated by the descriptor pool when calling
|
||||
* vkAllocateDescriptorSets. */
|
||||
if (alloc_stream) {
|
||||
bview->surface_state = anv_state_stream_alloc(alloc_stream, 64, 64);
|
||||
}
|
||||
|
||||
assert(bview->surface_state.alloc_size);
|
||||
|
||||
isl_surf_usage_flags_t usage =
|
||||
(type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
|
||||
type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) ?
|
||||
ISL_SURF_USAGE_CONSTANT_BUFFER_BIT :
|
||||
ISL_SURF_USAGE_STORAGE_BIT;
|
||||
|
||||
enum isl_format format = anv_isl_format_for_descriptor_type(device, type);
|
||||
anv_fill_buffer_surface_state(device, bview->surface_state,
|
||||
format, ISL_SWIZZLE_IDENTITY,
|
||||
usage, bind_addr, bind_range, 1);
|
||||
desc->set_buffer_view = bview;
|
||||
if (set->is_push)
|
||||
set->generate_surface_states |= BITFIELD_BIT(descriptor_index);
|
||||
else
|
||||
anv_descriptor_write_surface_state(device, desc, bview->surface_state);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1676,7 +1683,6 @@ void anv_UpdateDescriptorSets(
|
||||
ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer);
|
||||
|
||||
anv_descriptor_set_write_buffer(device, set,
|
||||
NULL,
|
||||
write->descriptorType,
|
||||
buffer,
|
||||
write->dstBinding,
|
||||
@@ -1776,7 +1782,6 @@ void anv_UpdateDescriptorSets(
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
anv_descriptor_set_write_buffer(device, dst,
|
||||
NULL,
|
||||
src_desc[j].type,
|
||||
src_desc[j].buffer,
|
||||
copy->dstBinding,
|
||||
@@ -1808,7 +1813,6 @@ void anv_UpdateDescriptorSets(
|
||||
void
|
||||
anv_descriptor_set_write_template(struct anv_device *device,
|
||||
struct anv_descriptor_set *set,
|
||||
struct anv_state_stream *alloc_stream,
|
||||
const struct vk_descriptor_update_template *template,
|
||||
const void *data)
|
||||
{
|
||||
@@ -1857,7 +1861,6 @@ anv_descriptor_set_write_template(struct anv_device *device,
|
||||
ANV_FROM_HANDLE(anv_buffer, buffer, info->buffer);
|
||||
|
||||
anv_descriptor_set_write_buffer(device, set,
|
||||
alloc_stream,
|
||||
entry->type,
|
||||
buffer,
|
||||
entry->binding,
|
||||
@@ -1904,5 +1907,5 @@ void anv_UpdateDescriptorSetWithTemplate(
|
||||
VK_FROM_HANDLE(vk_descriptor_update_template, template,
|
||||
descriptorUpdateTemplate);
|
||||
|
||||
anv_descriptor_set_write_template(device, set, NULL, template, pData);
|
||||
anv_descriptor_set_write_template(device, set, template, pData);
|
||||
}
|
||||
|
@@ -1795,6 +1795,14 @@ struct anv_descriptor_set {
|
||||
*/
|
||||
uint32_t size;
|
||||
|
||||
/* Is this descriptor set a push descriptor */
|
||||
bool is_push;
|
||||
|
||||
/* Bitfield of descriptors for which we need to generate surface states.
|
||||
* Only valid for push descriptors
|
||||
*/
|
||||
uint32_t generate_surface_states;
|
||||
|
||||
/* State relative to anv_descriptor_pool::bo */
|
||||
struct anv_state desc_mem;
|
||||
/* Surface state for the descriptor buffer */
|
||||
@@ -1908,7 +1916,6 @@ anv_descriptor_set_write_buffer_view(struct anv_device *device,
|
||||
void
|
||||
anv_descriptor_set_write_buffer(struct anv_device *device,
|
||||
struct anv_descriptor_set *set,
|
||||
struct anv_state_stream *alloc_stream,
|
||||
VkDescriptorType type,
|
||||
struct anv_buffer *buffer,
|
||||
uint32_t binding,
|
||||
@@ -1916,6 +1923,11 @@ anv_descriptor_set_write_buffer(struct anv_device *device,
|
||||
VkDeviceSize offset,
|
||||
VkDeviceSize range);
|
||||
|
||||
void
|
||||
anv_descriptor_write_surface_state(struct anv_device *device,
|
||||
struct anv_descriptor *desc,
|
||||
struct anv_state surface_state);
|
||||
|
||||
void
|
||||
anv_descriptor_set_write_acceleration_structure(struct anv_device *device,
|
||||
struct anv_descriptor_set *set,
|
||||
@@ -1934,7 +1946,6 @@ anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
|
||||
void
|
||||
anv_descriptor_set_write_template(struct anv_device *device,
|
||||
struct anv_descriptor_set *set,
|
||||
struct anv_state_stream *alloc_stream,
|
||||
const struct vk_descriptor_update_template *template,
|
||||
const void *data);
|
||||
|
||||
@@ -2545,6 +2556,7 @@ struct anv_cmd_state {
|
||||
|
||||
enum anv_pipe_bits pending_pipe_bits;
|
||||
VkShaderStageFlags descriptors_dirty;
|
||||
VkShaderStageFlags push_descriptors_dirty;
|
||||
VkShaderStageFlags push_constants_dirty;
|
||||
|
||||
struct anv_vertex_binding vertex_bindings[MAX_VBS];
|
||||
|
@@ -2661,6 +2661,51 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer,
|
||||
return flushed;
|
||||
}
|
||||
|
||||
/* This functions generates surface states used by a pipeline for push
|
||||
* descriptors. This is delayed to the draw/dispatch time to avoid allocation
|
||||
* and surface state generation when a pipeline is not going to use the
|
||||
* binding table to access any push descriptor data.
|
||||
*/
|
||||
static void
|
||||
flush_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_cmd_pipeline_state *state,
|
||||
struct anv_pipeline *pipeline)
|
||||
{
|
||||
const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
|
||||
struct anv_descriptor_set *set = &state->push_descriptor->set;
|
||||
struct anv_descriptor_set_layout *layout = set->layout;
|
||||
|
||||
if (pipeline->use_push_descriptor) {
|
||||
while (set->generate_surface_states) {
|
||||
int desc_idx = u_bit_scan(&set->generate_surface_states);
|
||||
struct anv_descriptor *desc = &set->descriptors[desc_idx];
|
||||
struct anv_buffer_view *bview = desc->set_buffer_view;
|
||||
|
||||
bview->surface_state =
|
||||
anv_state_stream_alloc(&cmd_buffer->surface_state_stream,
|
||||
isl_dev->ss.size, isl_dev->ss.align);
|
||||
anv_descriptor_write_surface_state(cmd_buffer->device, desc,
|
||||
bview->surface_state);
|
||||
}
|
||||
}
|
||||
|
||||
if (pipeline->use_push_descriptor_buffer) {
|
||||
enum isl_format format =
|
||||
anv_isl_format_for_descriptor_type(cmd_buffer->device,
|
||||
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
|
||||
|
||||
set->desc_surface_state =
|
||||
anv_state_stream_alloc(&cmd_buffer->surface_state_stream,
|
||||
isl_dev->ss.size, isl_dev->ss.align);
|
||||
anv_fill_buffer_surface_state(cmd_buffer->device,
|
||||
set->desc_surface_state,
|
||||
format, ISL_SWIZZLE_IDENTITY,
|
||||
ISL_SURF_USAGE_CONSTANT_BUFFER_BIT,
|
||||
set->desc_addr,
|
||||
layout->descriptor_buffer_size, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer,
|
||||
uint32_t stages)
|
||||
@@ -3522,6 +3567,18 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
vk_dynamic_graphics_state_any_dirty(dyn);
|
||||
uint32_t descriptors_dirty = cmd_buffer->state.descriptors_dirty &
|
||||
pipeline->active_stages;
|
||||
|
||||
const uint32_t push_descriptor_dirty =
|
||||
cmd_buffer->state.push_descriptors_dirty &
|
||||
pipeline->base.use_push_descriptor;
|
||||
if (push_descriptor_dirty) {
|
||||
flush_push_descriptor_set(cmd_buffer,
|
||||
&cmd_buffer->state.gfx.base,
|
||||
&pipeline->base);
|
||||
descriptors_dirty |= push_descriptor_dirty;
|
||||
cmd_buffer->state.push_descriptors_dirty &= ~push_descriptor_dirty;
|
||||
}
|
||||
|
||||
if (!cmd_buffer->state.gfx.dirty && !descriptors_dirty &&
|
||||
!any_dynamic_state_dirty &&
|
||||
!cmd_buffer->state.push_constants_dirty)
|
||||
@@ -4993,6 +5050,17 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
}
|
||||
|
||||
const uint32_t push_descriptor_dirty =
|
||||
cmd_buffer->state.push_descriptors_dirty &
|
||||
pipeline->base.use_push_descriptor;
|
||||
if (push_descriptor_dirty) {
|
||||
flush_push_descriptor_set(cmd_buffer,
|
||||
&cmd_buffer->state.compute.base,
|
||||
&pipeline->base);
|
||||
cmd_buffer->state.descriptors_dirty |= push_descriptor_dirty;
|
||||
cmd_buffer->state.push_descriptors_dirty &= ~push_descriptor_dirty;
|
||||
}
|
||||
|
||||
if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
|
||||
cmd_buffer->state.compute.pipeline_dirty) {
|
||||
flush_descriptor_sets(cmd_buffer,
|
||||
|
Reference in New Issue
Block a user