radv: bind the static vertex input state from the pipeline

This removes one more pipeline dependency when writing VBO descriptors.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30916>
This commit is contained in:
Samuel Pitoiset
2024-08-29 09:25:14 +02:00
committed by Marge Bot
parent aea6cec434
commit b3834b7dcd
5 changed files with 40 additions and 71 deletions

View File

@@ -5149,13 +5149,7 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v
uint8_t format_req = state->format_align_req_minus_1[index];
uint8_t component_req = state->component_align_req_minus_1[index];
uint64_t vb_offset = cmd_buffer->vertex_bindings[binding].offset;
uint64_t vb_stride;
if (cmd_buffer->state.uses_dynamic_vertex_binding_stride) {
vb_stride = cmd_buffer->vertex_bindings[binding].stride;
} else {
vb_stride = cmd_buffer->state.graphics_pipeline->binding_stride[binding];
}
uint64_t vb_stride = cmd_buffer->vertex_bindings[binding].stride;
VkDeviceSize offset = vb_offset + state->offsets[index];
@@ -6151,8 +6145,7 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stag
}
void
radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_graphics_pipeline *pipeline,
bool full_null_descriptors, void *vb_ptr)
radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, bool full_null_descriptors, void *vb_ptr)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
@@ -6162,18 +6155,18 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
unsigned desc_index = 0;
uint32_t mask = vs_shader->info.vs.vb_desc_usage_mask;
uint64_t va;
const struct radv_vs_input_state *vs_state =
vs_shader->info.vs.dynamic_inputs ? &cmd_buffer->state.dynamic_vs_input : NULL;
assert(!vs_state || vs_shader->info.vs.use_per_attribute_vb_descs);
const bool uses_dynamic_inputs = vs_shader->info.vs.dynamic_inputs;
const struct radv_vs_input_state *vs_state = &cmd_buffer->state.dynamic_vs_input;
const struct ac_vtx_format_info *vtx_info_table = vs_state ? ac_get_vtx_format_info_table(chip, family) : NULL;
const struct ac_vtx_format_info *vtx_info_table =
uses_dynamic_inputs ? ac_get_vtx_format_info_table(chip, family) : NULL;
while (mask) {
unsigned i = u_bit_scan(&mask);
uint32_t *desc = &((uint32_t *)vb_ptr)[desc_index++ * 4];
uint32_t offset, rsrc_word3;
if (vs_state && !(vs_state->attribute_mask & BITFIELD_BIT(i))) {
if (uses_dynamic_inputs && !(vs_state->attribute_mask & BITFIELD_BIT(i))) {
/* No vertex attribute description given: assume that the shader doesn't use this
* location (vb_desc_usage_mask can be larger than attribute usage) and use a null
* descriptor to avoid hangs (prologs load all attributes, even if there are holes).
@@ -6182,13 +6175,12 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
continue;
}
unsigned binding = vs_state ? cmd_buffer->state.dynamic_vs_input.bindings[i]
: (vs_shader->info.vs.use_per_attribute_vb_descs ? pipeline->attrib_bindings[i] : i);
const unsigned binding = vs_state->bindings[i];
struct radv_buffer *buffer = cmd_buffer->vertex_binding_buffers[binding];
unsigned num_records;
unsigned stride;
const unsigned stride = cmd_buffer->vertex_bindings[binding].stride;
if (vs_state && !(vs_state->nontrivial_formats & BITFIELD_BIT(i))) {
if (uses_dynamic_inputs && !(vs_state->nontrivial_formats & BITFIELD_BIT(i))) {
const struct ac_vtx_format_info *vtx_info = &vtx_info_table[vs_state->formats[i]];
unsigned hw_format = vtx_info->hw_format[vtx_info->num_channels - 1];
@@ -6208,12 +6200,6 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
if (cmd_buffer->state.uses_dynamic_vertex_binding_stride) {
stride = cmd_buffer->vertex_bindings[binding].stride;
} else {
stride = pipeline->binding_stride[binding];
}
if (!buffer) {
if (full_null_descriptors) {
/* Put all the info in for the DGC generation shader in case the VBO gets overridden. */
@@ -6221,7 +6207,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
desc[1] = S_008F04_STRIDE(stride);
desc[2] = 0;
desc[3] = rsrc_word3;
} else if (vs_state) {
} else if (uses_dynamic_inputs) {
/* Stride needs to be non-zero on GFX9, or else bounds checking is disabled. We need
* to include the format/word3 so that the alpha channel is 1 for formats without an
* alpha channel.
@@ -6241,7 +6227,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
offset = cmd_buffer->vertex_bindings[binding].offset;
va += offset + buffer->offset;
if (vs_state)
if (uses_dynamic_inputs)
va += vs_state->offsets[i];
if (cmd_buffer->vertex_bindings[binding].size) {
@@ -6251,7 +6237,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
}
if (vs_shader->info.vs.use_per_attribute_vb_descs) {
uint32_t attrib_end = vs_state ? vs_state->offsets[i] + vs_state->format_sizes[i] : pipeline->attrib_ends[i];
const uint32_t attrib_end = vs_state->offsets[i] + vs_state->format_sizes[i];
if (num_records < attrib_end) {
num_records = 0; /* not enough space for one vertex */
@@ -6263,7 +6249,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
* attrib_offset/stride and decrease the offset by attrib_offset%stride. This is
* only allowed with static strides.
*/
num_records += pipeline ? pipeline->attrib_index_offset[i] : 0;
num_records += vs_state->attrib_index_offset[i];
}
/* GFX10 uses OOB_SELECT_RAW if stride==0, so convert num_records from elements into
@@ -6283,7 +6269,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
desc[1] = S_008F04_STRIDE(stride);
desc[2] = 0;
desc[3] = rsrc_word3;
} else if (vs_state) {
} else if (uses_dynamic_inputs) {
desc[0] = 0;
desc[1] = S_008F04_STRIDE(16);
desc[2] = 0;
@@ -6327,7 +6313,6 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer)
/* Mesh shaders don't have vertex descriptors. */
assert(!cmd_buffer->state.mesh_shading);
struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
unsigned vb_desc_alloc_size = util_bitcount(vs->info.vs.vb_desc_usage_mask) * 16;
unsigned vb_offset;
void *vb_ptr;
@@ -6337,7 +6322,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer)
if (!radv_cmd_buffer_upload_alloc(cmd_buffer, vb_desc_alloc_size, &vb_offset, &vb_ptr))
return;
radv_write_vertex_descriptors(cmd_buffer, pipeline, false, vb_ptr);
radv_write_vertex_descriptors(cmd_buffer, false, vb_ptr);
va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
va += vb_offset;
@@ -7868,16 +7853,23 @@ radv_bind_vs_input_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_g
const struct radv_shader *vs_shader = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_VERTEX);
const struct radv_vs_input_state *src = &pipeline->vs_input_state;
/* Bind the vertex input state from the pipeline when the VS has a prolog and the state isn't
* dynamic. This can happen when the pre-rasterization stages and the vertex input state are from
* two different libraries. Otherwise, if the VS has a prolog, the state is dynamic and there is
* nothing to bind.
*/
if (!vs_shader || !vs_shader->info.vs.has_prolog || (pipeline->dynamic_states & RADV_DYNAMIC_VERTEX_INPUT))
/* Bind the vertex input state from the pipeline when it's static. */
if (!vs_shader || !vs_shader->info.vs.vb_desc_usage_mask || (pipeline->dynamic_states & RADV_DYNAMIC_VERTEX_INPUT))
return;
cmd_buffer->state.dynamic_vs_input = *src;
if (!(pipeline->dynamic_states & RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE)) {
for (uint32_t i = 0; i < MAX_VBS; i++)
cmd_buffer->vertex_bindings[i].stride = pipeline->binding_stride[i];
}
/* When the vertex input state is static but the VS has been compiled without it (GPL), the
* driver needs to compile a VS prolog.
*/
if (!vs_shader->info.vs.has_prolog)
return;
cmd_buffer->state.vbo_misaligned_mask = 0;
cmd_buffer->state.vbo_unaligned_mask = 0;
cmd_buffer->state.vbo_misaligned_mask_invalid = src->attribute_mask;
@@ -8384,8 +8376,6 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
cmd_buffer->state.uses_vrs = graphics_pipeline->uses_vrs;
cmd_buffer->state.uses_vrs_attachment = graphics_pipeline->uses_vrs_attachment;
cmd_buffer->state.uses_vrs_coarse_shading = graphics_pipeline->uses_vrs_coarse_shading;
cmd_buffer->state.uses_dynamic_vertex_binding_stride =
!!(graphics_pipeline->dynamic_states & (RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | RADV_DYNAMIC_VERTEX_INPUT));
break;
}
default:
@@ -11122,8 +11112,6 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer)
(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) {
cmd_buffer->state.uses_dynamic_patch_control_points = true;
}
cmd_buffer->state.uses_dynamic_vertex_binding_stride = true;
}
/* MUST inline this function to avoid massive perf loss in drawoverhead */
@@ -13854,7 +13842,6 @@ radv_reset_pipeline_state(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoin
cmd_buffer->state.rast_prim = 0;
cmd_buffer->state.uses_out_of_order_rast = false;
cmd_buffer->state.uses_vrs_attachment = false;
cmd_buffer->state.uses_dynamic_vertex_binding_stride = false;
}
if (cmd_buffer->state.emitted_graphics_pipeline) {
radv_bind_custom_blend_mode(cmd_buffer, 0);

View File

@@ -448,7 +448,6 @@ struct radv_cmd_state {
bool uses_vrs_attachment;
bool uses_vrs_coarse_shading;
bool uses_dynamic_patch_control_points;
bool uses_dynamic_vertex_binding_stride;
bool uses_fbfetch_output;
};
@@ -705,9 +704,7 @@ void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const
unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer,
const struct radv_graphics_pipeline *pipeline, bool full_null_descriptors,
void *vb_ptr);
void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, bool full_null_descriptors, void *vb_ptr);
enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stages,
VkAccessFlags2 src_flags, const struct radv_image *image);

View File

@@ -2339,27 +2339,24 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedC
params->dynamic_vs_input = layout->bind_vbo_mask && vs->info.vs.dynamic_inputs;
if (layout->bind_vbo_mask) {
const struct radv_vs_input_state *vs_state =
vs->info.vs.dynamic_inputs ? &cmd_buffer->state.dynamic_vs_input : NULL;
const struct radv_vs_input_state *vs_state = &cmd_buffer->state.dynamic_vs_input;
uint32_t mask = vs->info.vs.vb_desc_usage_mask;
unsigned vb_desc_alloc_size = util_bitcount(mask) * 16;
radv_write_vertex_descriptors(cmd_buffer, graphics_pipeline, true, *upload_data);
radv_write_vertex_descriptors(cmd_buffer, true, *upload_data);
uint32_t *vbo_info = (uint32_t *)((char *)*upload_data + vb_desc_alloc_size);
unsigned idx = 0;
while (mask) {
unsigned i = u_bit_scan(&mask);
const unsigned binding =
vs_state ? cmd_buffer->state.dynamic_vs_input.bindings[i]
: (vs->info.vs.use_per_attribute_vb_descs ? graphics_pipeline->attrib_bindings[i] : i);
const uint32_t attrib_end =
vs_state ? vs_state->offsets[i] + vs_state->format_sizes[i] : graphics_pipeline->attrib_ends[i];
const unsigned binding = vs_state->bindings[i];
const uint32_t attrib_end = vs_state->offsets[i] + vs_state->format_sizes[i];
const uint32_t attrib_index_offset = vs_state->attrib_index_offset[i];
params->vbo_bind_mask |= ((layout->bind_vbo_mask >> binding) & 1u) << idx;
vbo_info[2 * idx] = ((vs->info.vs.use_per_attribute_vb_descs ? 1u : 0u) << 31) | layout->vbo_offsets[binding];
vbo_info[2 * idx + 1] = graphics_pipeline->attrib_index_offset[i] | (attrib_end << 16);
vbo_info[2 * idx + 1] = attrib_index_offset | (attrib_end << 16);
++idx;
}
params->vbo_cnt = idx;

View File

@@ -2927,19 +2927,6 @@ radv_pipeline_init_vertex_input_state(const struct radv_device *device, struct r
}
if (vs->info.vs.use_per_attribute_vb_descs) {
u_foreach_bit (i, state->vi->attributes_valid) {
uint32_t binding = state->vi->attributes[i].binding;
uint32_t offset = state->vi->attributes[i].offset;
VkFormat format = state->vi->attributes[i].format;
pipeline->attrib_ends[i] = offset + vk_format_get_blocksize(format);
pipeline->attrib_bindings[i] = binding;
if (state->vi->bindings[binding].stride) {
pipeline->attrib_index_offset[i] = offset / state->vi->bindings[binding].stride;
}
}
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const enum radeon_family family = pdev->info.family;
const struct ac_vtx_format_info *vtx_info_table = ac_get_vtx_format_info_table(gfx_level, family);
@@ -2991,6 +2978,10 @@ radv_pipeline_init_vertex_input_state(const struct radv_device *device, struct r
pipeline->vs_input_state.nontrivial_formats |= BITFIELD_BIT(i);
}
}
} else {
u_foreach_bit (i, vs->info.vs.vb_desc_usage_mask) {
pipeline->vs_input_state.bindings[i] = i;
}
}
}

View File

@@ -96,9 +96,6 @@ struct radv_graphics_pipeline {
struct radv_multisample_state ms;
struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
uint32_t binding_stride[MAX_VBS];
uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS];
uint32_t attrib_ends[MAX_VERTEX_ATTRIBS];
uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS];
uint32_t db_render_control;
/* Last pre-PS API stage */