radv: stop storing DGC info as part of the VBO descriptors

This is no longer needed and it's possible to just load&store the
bindings that aren't updated by DGC.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30918>
This commit is contained in:
Samuel Pitoiset
2024-08-28 17:16:45 +02:00
committed by Marge Bot
parent 57aa34a30d
commit fbb1ed66e4
3 changed files with 24 additions and 38 deletions

View File

@@ -6181,7 +6181,7 @@ radv_get_rsrc3_vbo_desc(const struct radv_cmd_buffer *cmd_buffer, const struct r
void void
radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs, radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs,
bool full_null_descriptors, void *vb_ptr) void *vb_ptr)
{ {
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_physical_device *pdev = radv_device_physical(device);
@@ -6213,13 +6213,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
uint32_t rsrc_word3 = radv_get_rsrc3_vbo_desc(cmd_buffer, vs, i); uint32_t rsrc_word3 = radv_get_rsrc3_vbo_desc(cmd_buffer, vs, i);
if (!buffer) { if (!buffer) {
if (full_null_descriptors) { if (uses_dynamic_inputs) {
/* Put all the info in for the DGC generation shader in case the VBO gets overridden. */
desc[0] = 0;
desc[1] = S_008F04_STRIDE(stride);
desc[2] = 0;
desc[3] = rsrc_word3;
} else if (uses_dynamic_inputs) {
/* Stride needs to be non-zero on GFX9, or else bounds checking is disabled. We need /* Stride needs to be non-zero on GFX9, or else bounds checking is disabled. We need
* to include the format/word3 so that the alpha channel is 1 for formats without an * to include the format/word3 so that the alpha channel is 1 for formats without an
* alpha channel. * alpha channel.
@@ -6274,14 +6268,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
* num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and * num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and
* GFX10.3 but it doesn't hurt. * GFX10.3 but it doesn't hurt.
*/ */
if (full_null_descriptors) { if (uses_dynamic_inputs) {
/* Put all the info in for the DGC generation shader in case the VBO gets overridden.
*/
desc[0] = 0;
desc[1] = S_008F04_STRIDE(stride);
desc[2] = 0;
desc[3] = rsrc_word3;
} else if (uses_dynamic_inputs) {
desc[0] = 0; desc[0] = 0;
desc[1] = S_008F04_STRIDE(16); desc[1] = S_008F04_STRIDE(16);
desc[2] = 0; desc[2] = 0;
@@ -6334,7 +6321,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer)
if (!radv_cmd_buffer_upload_alloc(cmd_buffer, vb_desc_alloc_size, &vb_offset, &vb_ptr)) if (!radv_cmd_buffer_upload_alloc(cmd_buffer, vb_desc_alloc_size, &vb_offset, &vb_ptr))
return; return;
radv_write_vertex_descriptors(cmd_buffer, vs, false, vb_ptr); radv_write_vertex_descriptors(cmd_buffer, vs, vb_ptr);
va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
va += vb_offset; va += vb_offset;

View File

@@ -705,7 +705,7 @@ void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const
unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs); unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs, void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs,
bool full_null_descriptors, void *vb_ptr); void *vb_ptr);
enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stages, enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stages,
VkAccessFlags2 src_flags, const struct radv_image *image); VkAccessFlags2 src_flags, const struct radv_image *image);

View File

@@ -1466,28 +1466,27 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *vbo
nir_def *new_vbo_data[4] = {nir_unpack_64_2x32_split_x(b, va), nir_ior(b, nir_ishl_imm(b, stride, 16), va_hi), nir_def *new_vbo_data[4] = {nir_unpack_64_2x32_split_x(b, va), nir_ior(b, nir_ishl_imm(b, stride, 16), va_hi),
nir_load_var(b, num_records), rsrc_word3}; nir_load_var(b, num_records), rsrc_word3};
nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf); nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf);
}
nir_pop_if(b, NULL);
/* On GFX9, it seems bounds checking is disabled if both /* On GFX9, it seems bounds checking is disabled if both
* num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and * num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and
* GFX10.3 but it doesn't hurt. * GFX10.3 but it doesn't hurt.
*/ */
nir_def *num_records = nir_channel(b, nir_load_var(b, vbo_data), 2); nir_def *buf_va =
nir_def *buf_va = nir_iand_imm(b, nir_pack_64_2x32(b, nir_trim_vector(b, nir_load_var(b, vbo_data), 2)), (1ull << 48) - 1ull);
nir_iand_imm(b, nir_pack_64_2x32(b, nir_trim_vector(b, nir_load_var(b, vbo_data), 2)), (1ull << 48) - 1ull); nir_push_if(b, nir_ior(b, nir_ieq_imm(b, nir_load_var(b, num_records), 0), nir_ieq_imm(b, buf_va, 0)));
nir_push_if(b, nir_ior(b, nir_ieq_imm(b, num_records, 0), nir_ieq_imm(b, buf_va, 0))); {
{ nir_def *has_dynamic_vs_input = nir_ieq_imm(b, load_param8(b, dynamic_vs_input), 1);
nir_def *has_dynamic_vs_input = nir_ieq_imm(b, load_param8(b, dynamic_vs_input), 1);
nir_def *new_vbo_data[4];
new_vbo_data[0] = nir_imm_int(b, 0); new_vbo_data[0] = nir_imm_int(b, 0);
new_vbo_data[1] = nir_bcsel(b, has_dynamic_vs_input, nir_imm_int(b, S_008F04_STRIDE(16)), nir_imm_int(b, 0)); new_vbo_data[1] =
new_vbo_data[2] = nir_imm_int(b, 0); nir_bcsel(b, has_dynamic_vs_input, nir_imm_int(b, S_008F04_STRIDE(16)), nir_imm_int(b, 0));
new_vbo_data[3] = new_vbo_data[2] = nir_imm_int(b, 0);
nir_bcsel(b, has_dynamic_vs_input, nir_channel(b, nir_load_var(b, vbo_data), 3), nir_imm_int(b, 0)); new_vbo_data[3] =
nir_bcsel(b, has_dynamic_vs_input, nir_channel(b, nir_load_var(b, vbo_data), 3), nir_imm_int(b, 0));
nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf); nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf);
}
nir_pop_if(b, NULL);
} }
nir_pop_if(b, NULL); nir_pop_if(b, NULL);
@@ -2345,7 +2344,7 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedC
uint32_t mask = vs->info.vs.vb_desc_usage_mask; uint32_t mask = vs->info.vs.vb_desc_usage_mask;
unsigned vb_desc_alloc_size = util_bitcount(mask) * 16; unsigned vb_desc_alloc_size = util_bitcount(mask) * 16;
radv_write_vertex_descriptors(cmd_buffer, vs, true, *upload_data); radv_write_vertex_descriptors(cmd_buffer, vs, *upload_data);
uint32_t *vbo_info = (uint32_t *)((char *)*upload_data + vb_desc_alloc_size); uint32_t *vbo_info = (uint32_t *)((char *)*upload_data + vb_desc_alloc_size);