radv: pass more VBO info as parameters to the DGC prepare shader

Instead of storing the stride/rsrc_word3 as part of the VBO descriptors,
pass them as parameters. This is cleaner and this will allow us
to simplify VBO in DGC.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30918>
This commit is contained in:
Samuel Pitoiset
2024-08-28 16:53:34 +02:00
committed by Marge Bot
parent a5dbd62267
commit 57aa34a30d
3 changed files with 54 additions and 33 deletions

View File

@@ -6144,6 +6144,41 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stag
cmd_buffer->push_constant_stages |= dirty_stages;
}
uint32_t
radv_get_rsrc3_vbo_desc(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs, uint32_t vbo_idx)
{
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_vertex_input_state *vi_state = &cmd_buffer->state.vertex_input;
uint32_t rsrc_word3;
if (vs->info.vs.dynamic_inputs && !(vi_state->nontrivial_formats & BITFIELD_BIT(vbo_idx))) {
const struct ac_vtx_format_info *vtx_info_table =
ac_get_vtx_format_info_table(pdev->info.gfx_level, pdev->info.family);
const struct ac_vtx_format_info *vtx_info = &vtx_info_table[vi_state->formats[vbo_idx]];
unsigned hw_format = vtx_info->hw_format[vtx_info->num_channels - 1];
if (pdev->info.gfx_level >= GFX10) {
rsrc_word3 = vtx_info->dst_sel | S_008F0C_FORMAT_GFX10(hw_format);
} else {
rsrc_word3 =
vtx_info->dst_sel | S_008F0C_NUM_FORMAT((hw_format >> 4) & 0x7) | S_008F0C_DATA_FORMAT(hw_format & 0xf);
}
} else {
rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (pdev->info.gfx_level >= GFX10) {
rsrc_word3 |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_UINT);
} else {
rsrc_word3 |=
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
}
return rsrc_word3;
}
void
radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs,
bool full_null_descriptors, void *vb_ptr)
@@ -6151,20 +6186,16 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level chip = pdev->info.gfx_level;
enum radeon_family family = pdev->info.family;
unsigned desc_index = 0;
uint32_t mask = vs->info.vs.vb_desc_usage_mask;
uint64_t va;
const bool uses_dynamic_inputs = vs->info.vs.dynamic_inputs;
const struct radv_vertex_input_state *vi_state = &cmd_buffer->state.vertex_input;
const struct ac_vtx_format_info *vtx_info_table =
uses_dynamic_inputs ? ac_get_vtx_format_info_table(chip, family) : NULL;
while (mask) {
unsigned i = u_bit_scan(&mask);
uint32_t *desc = &((uint32_t *)vb_ptr)[desc_index++ * 4];
uint32_t offset, rsrc_word3;
uint32_t offset;
if (uses_dynamic_inputs && !(vi_state->attribute_mask & BITFIELD_BIT(i))) {
/* No vertex attribute description given: assume that the shader doesn't use this
@@ -6179,26 +6210,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
struct radv_buffer *buffer = cmd_buffer->vertex_binding_buffers[binding];
unsigned num_records;
const unsigned stride = cmd_buffer->vertex_bindings[binding].stride;
if (uses_dynamic_inputs && !(vi_state->nontrivial_formats & BITFIELD_BIT(i))) {
const struct ac_vtx_format_info *vtx_info = &vtx_info_table[vi_state->formats[i]];
unsigned hw_format = vtx_info->hw_format[vtx_info->num_channels - 1];
if (chip >= GFX10) {
rsrc_word3 = vtx_info->dst_sel | S_008F0C_FORMAT_GFX10(hw_format);
} else {
rsrc_word3 =
vtx_info->dst_sel | S_008F0C_NUM_FORMAT((hw_format >> 4) & 0x7) | S_008F0C_DATA_FORMAT(hw_format & 0xf);
}
} else {
rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (chip >= GFX10)
rsrc_word3 |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_UINT);
else
rsrc_word3 |=
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
}
uint32_t rsrc_word3 = radv_get_rsrc3_vbo_desc(cmd_buffer, vs, i);
if (!buffer) {
if (full_null_descriptors) {

View File

@@ -784,4 +784,7 @@ void radv_end_conditional_rendering(struct radv_cmd_buffer *cmd_buffer);
uint64_t radv_descriptor_get_va(const struct radv_descriptor_state *descriptors_state, unsigned set_idx);
uint32_t radv_get_rsrc3_vbo_desc(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs,
uint32_t vbo_idx);
#endif /* RADV_CMD_BUFFER_H */

View File

@@ -15,6 +15,8 @@
#include "vk_common_entrypoints.h"
#include "vk_shader_module.h"
#define DGC_VBO_SIZE 32
/* The DGC command buffer layout is quite complex, here's some explanations:
*
* Without the DGC preamble, the default layout looks like:
@@ -1239,7 +1241,7 @@ dgc_get_pc_params(struct dgc_cmdbuf *cs)
nir_builder *b = cs->b;
nir_def *vbo_cnt = load_param8(b, vbo_cnt);
nir_def *param_offset = nir_imul_imm(b, vbo_cnt, 24);
nir_def *param_offset = nir_imul_imm(b, vbo_cnt, DGC_VBO_SIZE);
params.buf = radv_meta_load_descriptor(b, 0, 0);
params.offset = nir_iadd_imm(b, param_offset, layout->bind_pipeline ? MAX_SETS * 4 : 0);
@@ -1382,8 +1384,8 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *vbo
nir_push_if(b, vbo_override);
{
nir_def *vbo_offset_offset =
nir_iadd(b, nir_imul_imm(b, vbo_cnt, 16), nir_imul_imm(b, nir_load_var(b, vbo_idx), 8));
nir_def *vbo_over_data = nir_load_ssbo(b, 2, 32, param_buf, vbo_offset_offset);
nir_iadd(b, nir_imul_imm(b, vbo_cnt, 16), nir_imul_imm(b, nir_load_var(b, vbo_idx), DGC_VBO_SIZE - 16));
nir_def *vbo_over_data = nir_load_ssbo(b, 4, 32, param_buf, vbo_offset_offset);
nir_def *stream_offset = nir_iand_imm(b, nir_channel(b, vbo_over_data, 0), 0x7FFF);
nir_def *stream_data = nir_build_load_global(b, 4, 32, nir_iadd(b, stream_addr, nir_u2u64(b, stream_offset)),
.access = ACCESS_NON_WRITEABLE);
@@ -1395,7 +1397,7 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *vbo
if (layout->vertex_dynamic_stride) {
stride = nir_channel(b, stream_data, 3);
} else {
stride = nir_ubfe_imm(b, nir_channel(b, nir_load_var(b, vbo_data), 1), 16, 14);
stride = nir_channel(b, vbo_over_data, 2);
}
nir_def *use_per_attribute_vb_descs = nir_test_mask(b, nir_channel(b, vbo_over_data, 0), 1u << 31);
@@ -1451,7 +1453,7 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *vbo
}
nir_pop_if(b, NULL);
nir_def *rsrc_word3 = nir_channel(b, nir_load_var(b, vbo_data), 3);
nir_def *rsrc_word3 = nir_channel(b, vbo_over_data, 3);
if (pdev->info.gfx_level >= GFX10) {
nir_def *oob_select = nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW),
nir_imm_int(b, V_008F0C_OOB_SELECT_STRUCTURED));
@@ -2295,7 +2297,7 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedC
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
struct radv_shader *vs = radv_get_shader(graphics_pipeline->base.shaders, MESA_SHADER_VERTEX);
unsigned vb_size = layout->bind_vbo_mask ? util_bitcount(vs->info.vs.vb_desc_usage_mask) * 24 : 0;
unsigned vb_size = layout->bind_vbo_mask ? util_bitcount(vs->info.vs.vb_desc_usage_mask) * DGC_VBO_SIZE : 0;
*upload_size = MAX2(*upload_size + vb_size, 16);
@@ -2353,10 +2355,14 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedC
const unsigned binding = vi_state->bindings[i];
const uint32_t attrib_end = vi_state->offsets[i] + vi_state->format_sizes[i];
const uint32_t attrib_index_offset = vi_state->attrib_index_offset[i];
const uint32_t stride = cmd_buffer->vertex_bindings[binding].stride;
const uint32_t rsrc_word3 = radv_get_rsrc3_vbo_desc(cmd_buffer, vs, i);
params->vbo_bind_mask |= ((layout->bind_vbo_mask >> binding) & 1u) << idx;
vbo_info[2 * idx] = ((vs->info.vs.use_per_attribute_vb_descs ? 1u : 0u) << 31) | layout->vbo_offsets[binding];
vbo_info[2 * idx + 1] = attrib_index_offset | (attrib_end << 16);
vbo_info[4 * idx] = ((vs->info.vs.use_per_attribute_vb_descs ? 1u : 0u) << 31) | layout->vbo_offsets[binding];
vbo_info[4 * idx + 1] = attrib_index_offset | (attrib_end << 16);
vbo_info[4 * idx + 2] = stride;
vbo_info[4 * idx + 3] = rsrc_word3;
++idx;
}
params->vbo_cnt = idx;