From 9ddd296cd387bae3adfceb66af91e965fdf10f08 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 31 Jan 2023 22:15:11 +0100 Subject: [PATCH] anv: implement VK_EXT_vertex_input_dynamic_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Lionel Landwerlin Reviewed-by: Tapani Pälli Part-of: --- docs/features.txt | 2 +- src/intel/vulkan/anv_cmd_buffer.c | 3 ++- src/intel/vulkan/anv_device.c | 8 ++++++++ src/intel/vulkan/anv_pipeline.c | 7 +------ src/intel/vulkan/anv_private.h | 4 ++-- src/intel/vulkan/genX_cmd_buffer.c | 11 ++++++++--- src/intel/vulkan/genX_cmd_draw_helpers.h | 4 +++- src/intel/vulkan/genX_pipeline.c | 23 +++++++++++++++++------ src/intel/vulkan/gfx8_cmd_buffer.c | 13 +++++++++++-- 9 files changed, 53 insertions(+), 22 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index 3823abe5681..08c8a5ec546 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -590,7 +590,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_shader_module_identifier DONE (anv, radv, tu, v3dv) VK_EXT_transform_feedback DONE (anv, lvp, radv, tu, vn) VK_EXT_vertex_attribute_divisor DONE (anv, dzn, panvk, radv, lvp, tu, v3dv, vn) - VK_EXT_vertex_input_dynamic_state DONE (lvp, radv, tu) + VK_EXT_vertex_input_dynamic_state DONE (anv, lvp, radv, tu) VK_EXT_ycbcr_image_arrays DONE (anv, radv) VK_ANDROID_external_memory_android_hardware_buffer DONE (anv, radv, vn) VK_ANDROID_native_buffer DONE (anv, radv, tu, v3dv, vn) diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index f7589b13a76..d5b74466090 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -102,6 +102,8 @@ anv_create_cmd_buffer(struct vk_command_pool *pool, cmd_buffer->vk.dynamic_graphics_state.ms.sample_locations = &cmd_buffer->state.gfx.sample_locations; + cmd_buffer->vk.dynamic_graphics_state.vi = + &cmd_buffer->state.gfx.vertex_input; cmd_buffer->batch.status = VK_SUCCESS; cmd_buffer->generation_batch.status = VK_SUCCESS; @@ -384,7 +386,6 @@ void anv_CmdBindPipeline( return; cmd_buffer->state.gfx.pipeline = gfx_pipeline; - cmd_buffer->state.gfx.vb_dirty |= gfx_pipeline->vb_used; cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE; anv_foreach_stage(stage, gfx_pipeline->active_stages) { diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 170a0b8b76d..cc085fbdd20 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -349,6 +349,7 @@ get_device_extensions(const struct anv_physical_device *device, .EXT_tooling_info = true, .EXT_transform_feedback = true, .EXT_vertex_attribute_divisor = true, + .EXT_vertex_input_dynamic_state = true, .EXT_ycbcr_image_arrays = true, #ifdef ANDROID .ANDROID_external_memory_android_hardware_buffer = true, @@ -1804,6 +1805,13 @@ void anv_GetPhysicalDeviceFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT: { + VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *features = + (VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *) ext; + features->vertexInputDynamicState = true; + break; + } + default: anv_debug_ignored_stype(ext->sType); break; diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 1250884aa09..ec8584243de 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -2261,6 +2261,7 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, assert(device->physical->vk.supported_extensions.NV_mesh_shader || device->physical->vk.supported_extensions.EXT_mesh_shader); + pipeline->dynamic_state.vi = &pipeline->vertex_input; pipeline->dynamic_state.ms.sample_locations = &pipeline->sample_locations; vk_dynamic_graphics_state_fill(&pipeline->dynamic_state, state); @@ -2276,12 +2277,6 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, if (anv_pipeline_is_primitive(pipeline)) { const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); - const uint64_t inputs_read = vs_prog_data->inputs_read; - - u_foreach_bit(a, state->vi->attributes_valid) { - if (inputs_read & BITFIELD64_BIT(VERT_ATTRIB_GENERIC0 + a)) - pipeline->vb_used |= BITFIELD64_BIT(state->vi->attributes[a].binding); - } /* The total number of vertex elements we need to program. We might need * a couple more to implement some of the draw parameters. diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index a7ab99d39e6..f22cd789d47 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2533,6 +2533,7 @@ struct anv_cmd_graphics_state { uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */ uint32_t index_offset; + struct vk_vertex_input_state vertex_input; struct vk_sample_locations_state sample_locations; bool object_preemption; @@ -3069,6 +3070,7 @@ struct anv_graphics_pipeline { VkShaderStageFlags active_stages; + struct vk_vertex_input_state vertex_input; struct vk_sample_locations_state sample_locations; struct vk_dynamic_graphics_state dynamic_state; @@ -3085,8 +3087,6 @@ struct anv_graphics_pipeline { bool force_fragment_thread_dispatch; bool uses_xfb; - uint32_t vb_used; - /* Number of VERTEX_ELEMENT_STATE input elements used by the shader */ uint32_t vs_input_elements; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 86d45d3f102..81c9d07e369 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -3297,9 +3297,14 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer) */ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); - uint32_t vb_emit = cmd_buffer->state.gfx.vb_dirty & pipeline->vb_used; - if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) - vb_emit |= pipeline->vb_used; + /* Check what vertex buffers have been rebound against the set of bindings + * being used by the current set of vertex attributes. + */ + uint32_t vb_emit = cmd_buffer->state.gfx.vb_dirty & dyn->vi->bindings_valid; + /* If the pipeline changed, the we have to consider all the valid bindings. */ + if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDING_STRIDES)) + vb_emit |= dyn->vi->bindings_valid; if (vb_emit) { const uint32_t num_buffers = __builtin_popcount(vb_emit); diff --git a/src/intel/vulkan/genX_cmd_draw_helpers.h b/src/intel/vulkan/genX_cmd_draw_helpers.h index 25e199dc946..c30333588b0 100644 --- a/src/intel/vulkan/genX_cmd_draw_helpers.h +++ b/src/intel/vulkan/genX_cmd_draw_helpers.h @@ -109,10 +109,12 @@ update_dirty_vbs_for_gfx8_vb_flush(struct anv_cmd_buffer *cmd_buffer, uint32_t access_type) { #if GFX_VER == 9 + const struct vk_dynamic_graphics_state *dyn = + &cmd_buffer->vk.dynamic_graphics_state; struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); - uint64_t vb_used = pipeline->vb_used; + uint64_t vb_used = dyn->vi->bindings_valid; if (vs_prog_data->uses_firstvertex || vs_prog_data->uses_baseinstance) vb_used |= 1ull << ANV_SVGS_VB_INDEX; diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index ed7f19b46f4..05949421de9 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -183,11 +183,17 @@ genX(emit_vertex_input)(struct anv_batch *batch, static void emit_vertex_input(struct anv_graphics_pipeline *pipeline, + const struct vk_graphics_pipeline_state *state, const struct vk_vertex_input_state *vi) { - genX(emit_vertex_input)(&pipeline->base.batch, - pipeline->vertex_input_data, - pipeline, vi); + /* Only pack the VERTEX_ELEMENT_STATE if not dynamic so we can just memcpy + * everything in gfx8_cmd_buffer.c + */ + if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI)) { + genX(emit_vertex_input)(&pipeline->base.batch, + pipeline->vertex_input_data, + pipeline, vi); + } const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); const bool needs_svgs_elem = pipeline->svgs_count > 1 || @@ -195,6 +201,9 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline, const uint32_t id_slot = pipeline->vs_input_elements; const uint32_t drawid_slot = id_slot + needs_svgs_elem; if (pipeline->svgs_count > 0) { + assert(pipeline->vertex_input_elems >= pipeline->svgs_count); + uint32_t slot_offset = + pipeline->vertex_input_elems - pipeline->svgs_count; if (needs_svgs_elem) { #if GFX_VER < 11 /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum: @@ -227,8 +236,9 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline, .Component3Control = VFCOMP_STORE_0, }; GENX(VERTEX_ELEMENT_STATE_pack)(NULL, - &pipeline->vertex_input_data[id_slot * 2], + &pipeline->vertex_input_data[slot_offset * 2], &element); + slot_offset++; anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) { vfi.VertexElementIndex = id_slot; @@ -251,8 +261,9 @@ emit_vertex_input(struct anv_graphics_pipeline *pipeline, .Component3Control = VFCOMP_STORE_0, }; GENX(VERTEX_ELEMENT_STATE_pack)(NULL, - &pipeline->vertex_input_data[drawid_slot * 2], + &pipeline->vertex_input_data[slot_offset * 2], &element); + slot_offset++; anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) { vfi.VertexElementIndex = drawid_slot; @@ -1872,7 +1883,7 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline, #endif if (anv_pipeline_is_primitive(pipeline)) { - emit_vertex_input(pipeline, state->vi); + emit_vertex_input(pipeline, state, state->vi); emit_3dstate_vs(pipeline); emit_3dstate_hs_ds(pipeline, state->ts); diff --git a/src/intel/vulkan/gfx8_cmd_buffer.c b/src/intel/vulkan/gfx8_cmd_buffer.c index 7d06e871f27..1b3bde66e3b 100644 --- a/src/intel/vulkan/gfx8_cmd_buffer.c +++ b/src/intel/vulkan/gfx8_cmd_buffer.c @@ -392,7 +392,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) const struct vk_dynamic_graphics_state *dyn = &cmd_buffer->vk.dynamic_graphics_state; - if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) { + if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI)) { const uint32_t ve_count = pipeline->vs_input_elements + pipeline->svgs_count; const uint32_t num_dwords = 1 + 2 * MAX2(1, ve_count); @@ -403,12 +404,20 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) if (ve_count == 0) { memcpy(p + 1, cmd_buffer->device->empty_vs_input, sizeof(cmd_buffer->device->empty_vs_input)); - } else { + } else if (ve_count == pipeline->vertex_input_elems) { /* MESA_VK_DYNAMIC_VI is not dynamic for this pipeline, so * everything is in pipeline->vertex_input_data and we can just * memcpy */ memcpy(p + 1, pipeline->vertex_input_data, 4 * 2 * ve_count); + } else { + /* Use dyn->vi to emit the dynamic VERTEX_ELEMENT_STATE input. */ + genX(emit_vertex_input)(&cmd_buffer->batch, p + 1, + pipeline, dyn->vi); + /* Then append the VERTEX_ELEMENT_STATE for the draw parameters */ + memcpy(p + 1 + 2 * pipeline->vs_input_elements, + pipeline->vertex_input_data, + 4 * 2 * pipeline->vertex_input_elems); } } }