From fbb1ed66e42b577c089fdf02eb34152e8154202d Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 28 Aug 2024 17:16:45 +0200 Subject: [PATCH] radv: stop storing DGC info as part of the VBO descriptors This is no longer needed and it's possible to just load&store the bindings that aren't updated by DGC. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 21 ++-------- src/amd/vulkan/radv_cmd_buffer.h | 2 +- .../vulkan/radv_device_generated_commands.c | 39 +++++++++---------- 3 files changed, 24 insertions(+), 38 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index cc5c39c5c6b..76d9038ed17 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -6181,7 +6181,7 @@ radv_get_rsrc3_vbo_desc(const struct radv_cmd_buffer *cmd_buffer, const struct r void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs, - bool full_null_descriptors, void *vb_ptr) + void *vb_ptr) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); @@ -6213,13 +6213,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st uint32_t rsrc_word3 = radv_get_rsrc3_vbo_desc(cmd_buffer, vs, i); if (!buffer) { - if (full_null_descriptors) { - /* Put all the info in for the DGC generation shader in case the VBO gets overridden. */ - desc[0] = 0; - desc[1] = S_008F04_STRIDE(stride); - desc[2] = 0; - desc[3] = rsrc_word3; - } else if (uses_dynamic_inputs) { + if (uses_dynamic_inputs) { /* Stride needs to be non-zero on GFX9, or else bounds checking is disabled. We need * to include the format/word3 so that the alpha channel is 1 for formats without an * alpha channel. @@ -6274,14 +6268,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st * num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and * GFX10.3 but it doesn't hurt. */ - if (full_null_descriptors) { - /* Put all the info in for the DGC generation shader in case the VBO gets overridden. - */ - desc[0] = 0; - desc[1] = S_008F04_STRIDE(stride); - desc[2] = 0; - desc[3] = rsrc_word3; - } else if (uses_dynamic_inputs) { + if (uses_dynamic_inputs) { desc[0] = 0; desc[1] = S_008F04_STRIDE(16); desc[2] = 0; @@ -6334,7 +6321,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer) if (!radv_cmd_buffer_upload_alloc(cmd_buffer, vb_desc_alloc_size, &vb_offset, &vb_ptr)) return; - radv_write_vertex_descriptors(cmd_buffer, vs, false, vb_ptr); + radv_write_vertex_descriptors(cmd_buffer, vs, vb_ptr); va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); va += vb_offset; diff --git a/src/amd/vulkan/radv_cmd_buffer.h b/src/amd/vulkan/radv_cmd_buffer.h index 0100e8f1078..e0273f5b978 100644 --- a/src/amd/vulkan/radv_cmd_buffer.h +++ b/src/amd/vulkan/radv_cmd_buffer.h @@ -705,7 +705,7 @@ void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs); void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs, - bool full_null_descriptors, void *vb_ptr); + void *vb_ptr); enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stages, VkAccessFlags2 src_flags, const struct radv_image *image); diff --git a/src/amd/vulkan/radv_device_generated_commands.c b/src/amd/vulkan/radv_device_generated_commands.c index fba15291ce4..909748625dc 100644 --- a/src/amd/vulkan/radv_device_generated_commands.c +++ b/src/amd/vulkan/radv_device_generated_commands.c @@ -1466,28 +1466,27 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *vbo nir_def *new_vbo_data[4] = {nir_unpack_64_2x32_split_x(b, va), nir_ior(b, nir_ishl_imm(b, stride, 16), va_hi), nir_load_var(b, num_records), rsrc_word3}; nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf); - } - nir_pop_if(b, NULL); - /* On GFX9, it seems bounds checking is disabled if both - * num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and - * GFX10.3 but it doesn't hurt. - */ - nir_def *num_records = nir_channel(b, nir_load_var(b, vbo_data), 2); - nir_def *buf_va = - nir_iand_imm(b, nir_pack_64_2x32(b, nir_trim_vector(b, nir_load_var(b, vbo_data), 2)), (1ull << 48) - 1ull); - nir_push_if(b, nir_ior(b, nir_ieq_imm(b, num_records, 0), nir_ieq_imm(b, buf_va, 0))); - { - nir_def *has_dynamic_vs_input = nir_ieq_imm(b, load_param8(b, dynamic_vs_input), 1); - nir_def *new_vbo_data[4]; + /* On GFX9, it seems bounds checking is disabled if both + * num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and + * GFX10.3 but it doesn't hurt. + */ + nir_def *buf_va = + nir_iand_imm(b, nir_pack_64_2x32(b, nir_trim_vector(b, nir_load_var(b, vbo_data), 2)), (1ull << 48) - 1ull); + nir_push_if(b, nir_ior(b, nir_ieq_imm(b, nir_load_var(b, num_records), 0), nir_ieq_imm(b, buf_va, 0))); + { + nir_def *has_dynamic_vs_input = nir_ieq_imm(b, load_param8(b, dynamic_vs_input), 1); - new_vbo_data[0] = nir_imm_int(b, 0); - new_vbo_data[1] = nir_bcsel(b, has_dynamic_vs_input, nir_imm_int(b, S_008F04_STRIDE(16)), nir_imm_int(b, 0)); - new_vbo_data[2] = nir_imm_int(b, 0); - new_vbo_data[3] = - nir_bcsel(b, has_dynamic_vs_input, nir_channel(b, nir_load_var(b, vbo_data), 3), nir_imm_int(b, 0)); + new_vbo_data[0] = nir_imm_int(b, 0); + new_vbo_data[1] = + nir_bcsel(b, has_dynamic_vs_input, nir_imm_int(b, S_008F04_STRIDE(16)), nir_imm_int(b, 0)); + new_vbo_data[2] = nir_imm_int(b, 0); + new_vbo_data[3] = + nir_bcsel(b, has_dynamic_vs_input, nir_channel(b, nir_load_var(b, vbo_data), 3), nir_imm_int(b, 0)); - nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf); + nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf); + } + nir_pop_if(b, NULL); } nir_pop_if(b, NULL); @@ -2345,7 +2344,7 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedC uint32_t mask = vs->info.vs.vb_desc_usage_mask; unsigned vb_desc_alloc_size = util_bitcount(mask) * 16; - radv_write_vertex_descriptors(cmd_buffer, vs, true, *upload_data); + radv_write_vertex_descriptors(cmd_buffer, vs, *upload_data); uint32_t *vbo_info = (uint32_t *)((char *)*upload_data + vb_desc_alloc_size);