radv: stop storing DGC info as part of the VBO descriptors
This is no longer needed and it's possible to just load&store the bindings that aren't updated by DGC. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30918>
This commit is contained in:

committed by
Marge Bot

parent
57aa34a30d
commit
fbb1ed66e4
@@ -6181,7 +6181,7 @@ radv_get_rsrc3_vbo_desc(const struct radv_cmd_buffer *cmd_buffer, const struct r
|
|||||||
|
|
||||||
void
|
void
|
||||||
radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs,
|
radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs,
|
||||||
bool full_null_descriptors, void *vb_ptr)
|
void *vb_ptr)
|
||||||
{
|
{
|
||||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||||
@@ -6213,13 +6213,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
|
|||||||
uint32_t rsrc_word3 = radv_get_rsrc3_vbo_desc(cmd_buffer, vs, i);
|
uint32_t rsrc_word3 = radv_get_rsrc3_vbo_desc(cmd_buffer, vs, i);
|
||||||
|
|
||||||
if (!buffer) {
|
if (!buffer) {
|
||||||
if (full_null_descriptors) {
|
if (uses_dynamic_inputs) {
|
||||||
/* Put all the info in for the DGC generation shader in case the VBO gets overridden. */
|
|
||||||
desc[0] = 0;
|
|
||||||
desc[1] = S_008F04_STRIDE(stride);
|
|
||||||
desc[2] = 0;
|
|
||||||
desc[3] = rsrc_word3;
|
|
||||||
} else if (uses_dynamic_inputs) {
|
|
||||||
/* Stride needs to be non-zero on GFX9, or else bounds checking is disabled. We need
|
/* Stride needs to be non-zero on GFX9, or else bounds checking is disabled. We need
|
||||||
* to include the format/word3 so that the alpha channel is 1 for formats without an
|
* to include the format/word3 so that the alpha channel is 1 for formats without an
|
||||||
* alpha channel.
|
* alpha channel.
|
||||||
@@ -6274,14 +6268,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
|
|||||||
* num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and
|
* num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and
|
||||||
* GFX10.3 but it doesn't hurt.
|
* GFX10.3 but it doesn't hurt.
|
||||||
*/
|
*/
|
||||||
if (full_null_descriptors) {
|
if (uses_dynamic_inputs) {
|
||||||
/* Put all the info in for the DGC generation shader in case the VBO gets overridden.
|
|
||||||
*/
|
|
||||||
desc[0] = 0;
|
|
||||||
desc[1] = S_008F04_STRIDE(stride);
|
|
||||||
desc[2] = 0;
|
|
||||||
desc[3] = rsrc_word3;
|
|
||||||
} else if (uses_dynamic_inputs) {
|
|
||||||
desc[0] = 0;
|
desc[0] = 0;
|
||||||
desc[1] = S_008F04_STRIDE(16);
|
desc[1] = S_008F04_STRIDE(16);
|
||||||
desc[2] = 0;
|
desc[2] = 0;
|
||||||
@@ -6334,7 +6321,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer)
|
|||||||
if (!radv_cmd_buffer_upload_alloc(cmd_buffer, vb_desc_alloc_size, &vb_offset, &vb_ptr))
|
if (!radv_cmd_buffer_upload_alloc(cmd_buffer, vb_desc_alloc_size, &vb_offset, &vb_ptr))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
radv_write_vertex_descriptors(cmd_buffer, vs, false, vb_ptr);
|
radv_write_vertex_descriptors(cmd_buffer, vs, vb_ptr);
|
||||||
|
|
||||||
va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
|
va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
|
||||||
va += vb_offset;
|
va += vb_offset;
|
||||||
|
@@ -705,7 +705,7 @@ void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const
|
|||||||
unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
|
unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
|
||||||
|
|
||||||
void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs,
|
void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs,
|
||||||
bool full_null_descriptors, void *vb_ptr);
|
void *vb_ptr);
|
||||||
|
|
||||||
enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stages,
|
enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stages,
|
||||||
VkAccessFlags2 src_flags, const struct radv_image *image);
|
VkAccessFlags2 src_flags, const struct radv_image *image);
|
||||||
|
@@ -1466,28 +1466,27 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *vbo
|
|||||||
nir_def *new_vbo_data[4] = {nir_unpack_64_2x32_split_x(b, va), nir_ior(b, nir_ishl_imm(b, stride, 16), va_hi),
|
nir_def *new_vbo_data[4] = {nir_unpack_64_2x32_split_x(b, va), nir_ior(b, nir_ishl_imm(b, stride, 16), va_hi),
|
||||||
nir_load_var(b, num_records), rsrc_word3};
|
nir_load_var(b, num_records), rsrc_word3};
|
||||||
nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf);
|
nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf);
|
||||||
}
|
|
||||||
nir_pop_if(b, NULL);
|
|
||||||
|
|
||||||
/* On GFX9, it seems bounds checking is disabled if both
|
/* On GFX9, it seems bounds checking is disabled if both
|
||||||
* num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and
|
* num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and
|
||||||
* GFX10.3 but it doesn't hurt.
|
* GFX10.3 but it doesn't hurt.
|
||||||
*/
|
*/
|
||||||
nir_def *num_records = nir_channel(b, nir_load_var(b, vbo_data), 2);
|
nir_def *buf_va =
|
||||||
nir_def *buf_va =
|
nir_iand_imm(b, nir_pack_64_2x32(b, nir_trim_vector(b, nir_load_var(b, vbo_data), 2)), (1ull << 48) - 1ull);
|
||||||
nir_iand_imm(b, nir_pack_64_2x32(b, nir_trim_vector(b, nir_load_var(b, vbo_data), 2)), (1ull << 48) - 1ull);
|
nir_push_if(b, nir_ior(b, nir_ieq_imm(b, nir_load_var(b, num_records), 0), nir_ieq_imm(b, buf_va, 0)));
|
||||||
nir_push_if(b, nir_ior(b, nir_ieq_imm(b, num_records, 0), nir_ieq_imm(b, buf_va, 0)));
|
{
|
||||||
{
|
nir_def *has_dynamic_vs_input = nir_ieq_imm(b, load_param8(b, dynamic_vs_input), 1);
|
||||||
nir_def *has_dynamic_vs_input = nir_ieq_imm(b, load_param8(b, dynamic_vs_input), 1);
|
|
||||||
nir_def *new_vbo_data[4];
|
|
||||||
|
|
||||||
new_vbo_data[0] = nir_imm_int(b, 0);
|
new_vbo_data[0] = nir_imm_int(b, 0);
|
||||||
new_vbo_data[1] = nir_bcsel(b, has_dynamic_vs_input, nir_imm_int(b, S_008F04_STRIDE(16)), nir_imm_int(b, 0));
|
new_vbo_data[1] =
|
||||||
new_vbo_data[2] = nir_imm_int(b, 0);
|
nir_bcsel(b, has_dynamic_vs_input, nir_imm_int(b, S_008F04_STRIDE(16)), nir_imm_int(b, 0));
|
||||||
new_vbo_data[3] =
|
new_vbo_data[2] = nir_imm_int(b, 0);
|
||||||
nir_bcsel(b, has_dynamic_vs_input, nir_channel(b, nir_load_var(b, vbo_data), 3), nir_imm_int(b, 0));
|
new_vbo_data[3] =
|
||||||
|
nir_bcsel(b, has_dynamic_vs_input, nir_channel(b, nir_load_var(b, vbo_data), 3), nir_imm_int(b, 0));
|
||||||
|
|
||||||
nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf);
|
nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf);
|
||||||
|
}
|
||||||
|
nir_pop_if(b, NULL);
|
||||||
}
|
}
|
||||||
nir_pop_if(b, NULL);
|
nir_pop_if(b, NULL);
|
||||||
|
|
||||||
@@ -2345,7 +2344,7 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedC
|
|||||||
uint32_t mask = vs->info.vs.vb_desc_usage_mask;
|
uint32_t mask = vs->info.vs.vb_desc_usage_mask;
|
||||||
unsigned vb_desc_alloc_size = util_bitcount(mask) * 16;
|
unsigned vb_desc_alloc_size = util_bitcount(mask) * 16;
|
||||||
|
|
||||||
radv_write_vertex_descriptors(cmd_buffer, vs, true, *upload_data);
|
radv_write_vertex_descriptors(cmd_buffer, vs, *upload_data);
|
||||||
|
|
||||||
uint32_t *vbo_info = (uint32_t *)((char *)*upload_data + vb_desc_alloc_size);
|
uint32_t *vbo_info = (uint32_t *)((char *)*upload_data + vb_desc_alloc_size);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user