radv: pass more VBO info as parameters to the DGC prepare shader
Instead of storing the stride/rsrc_word3 as part of the VBO descriptors, pass them as parameters. This is cleaner and this will allow us to simplify VBO in DGC. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30918>
This commit is contained in:

committed by
Marge Bot

parent
a5dbd62267
commit
57aa34a30d
@@ -6144,6 +6144,41 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stag
|
||||
cmd_buffer->push_constant_stages |= dirty_stages;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
radv_get_rsrc3_vbo_desc(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs, uint32_t vbo_idx)
|
||||
{
|
||||
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_vertex_input_state *vi_state = &cmd_buffer->state.vertex_input;
|
||||
uint32_t rsrc_word3;
|
||||
|
||||
if (vs->info.vs.dynamic_inputs && !(vi_state->nontrivial_formats & BITFIELD_BIT(vbo_idx))) {
|
||||
const struct ac_vtx_format_info *vtx_info_table =
|
||||
ac_get_vtx_format_info_table(pdev->info.gfx_level, pdev->info.family);
|
||||
const struct ac_vtx_format_info *vtx_info = &vtx_info_table[vi_state->formats[vbo_idx]];
|
||||
unsigned hw_format = vtx_info->hw_format[vtx_info->num_channels - 1];
|
||||
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
rsrc_word3 = vtx_info->dst_sel | S_008F0C_FORMAT_GFX10(hw_format);
|
||||
} else {
|
||||
rsrc_word3 =
|
||||
vtx_info->dst_sel | S_008F0C_NUM_FORMAT((hw_format >> 4) & 0x7) | S_008F0C_DATA_FORMAT(hw_format & 0xf);
|
||||
}
|
||||
} else {
|
||||
rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
rsrc_word3 |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_UINT);
|
||||
} else {
|
||||
rsrc_word3 |=
|
||||
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
|
||||
}
|
||||
}
|
||||
|
||||
return rsrc_word3;
|
||||
}
|
||||
|
||||
void
|
||||
radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs,
|
||||
bool full_null_descriptors, void *vb_ptr)
|
||||
@@ -6151,20 +6186,16 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
enum amd_gfx_level chip = pdev->info.gfx_level;
|
||||
enum radeon_family family = pdev->info.family;
|
||||
unsigned desc_index = 0;
|
||||
uint32_t mask = vs->info.vs.vb_desc_usage_mask;
|
||||
uint64_t va;
|
||||
const bool uses_dynamic_inputs = vs->info.vs.dynamic_inputs;
|
||||
const struct radv_vertex_input_state *vi_state = &cmd_buffer->state.vertex_input;
|
||||
|
||||
const struct ac_vtx_format_info *vtx_info_table =
|
||||
uses_dynamic_inputs ? ac_get_vtx_format_info_table(chip, family) : NULL;
|
||||
|
||||
while (mask) {
|
||||
unsigned i = u_bit_scan(&mask);
|
||||
uint32_t *desc = &((uint32_t *)vb_ptr)[desc_index++ * 4];
|
||||
uint32_t offset, rsrc_word3;
|
||||
uint32_t offset;
|
||||
|
||||
if (uses_dynamic_inputs && !(vi_state->attribute_mask & BITFIELD_BIT(i))) {
|
||||
/* No vertex attribute description given: assume that the shader doesn't use this
|
||||
@@ -6179,26 +6210,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st
|
||||
struct radv_buffer *buffer = cmd_buffer->vertex_binding_buffers[binding];
|
||||
unsigned num_records;
|
||||
const unsigned stride = cmd_buffer->vertex_bindings[binding].stride;
|
||||
|
||||
if (uses_dynamic_inputs && !(vi_state->nontrivial_formats & BITFIELD_BIT(i))) {
|
||||
const struct ac_vtx_format_info *vtx_info = &vtx_info_table[vi_state->formats[i]];
|
||||
unsigned hw_format = vtx_info->hw_format[vtx_info->num_channels - 1];
|
||||
|
||||
if (chip >= GFX10) {
|
||||
rsrc_word3 = vtx_info->dst_sel | S_008F0C_FORMAT_GFX10(hw_format);
|
||||
} else {
|
||||
rsrc_word3 =
|
||||
vtx_info->dst_sel | S_008F0C_NUM_FORMAT((hw_format >> 4) & 0x7) | S_008F0C_DATA_FORMAT(hw_format & 0xf);
|
||||
}
|
||||
} else {
|
||||
rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
if (chip >= GFX10)
|
||||
rsrc_word3 |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_UINT);
|
||||
else
|
||||
rsrc_word3 |=
|
||||
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
|
||||
}
|
||||
uint32_t rsrc_word3 = radv_get_rsrc3_vbo_desc(cmd_buffer, vs, i);
|
||||
|
||||
if (!buffer) {
|
||||
if (full_null_descriptors) {
|
||||
|
@@ -784,4 +784,7 @@ void radv_end_conditional_rendering(struct radv_cmd_buffer *cmd_buffer);
|
||||
|
||||
uint64_t radv_descriptor_get_va(const struct radv_descriptor_state *descriptors_state, unsigned set_idx);
|
||||
|
||||
uint32_t radv_get_rsrc3_vbo_desc(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs,
|
||||
uint32_t vbo_idx);
|
||||
|
||||
#endif /* RADV_CMD_BUFFER_H */
|
||||
|
@@ -15,6 +15,8 @@
|
||||
#include "vk_common_entrypoints.h"
|
||||
#include "vk_shader_module.h"
|
||||
|
||||
#define DGC_VBO_SIZE 32
|
||||
|
||||
/* The DGC command buffer layout is quite complex, here's some explanations:
|
||||
*
|
||||
* Without the DGC preamble, the default layout looks like:
|
||||
@@ -1239,7 +1241,7 @@ dgc_get_pc_params(struct dgc_cmdbuf *cs)
|
||||
nir_builder *b = cs->b;
|
||||
|
||||
nir_def *vbo_cnt = load_param8(b, vbo_cnt);
|
||||
nir_def *param_offset = nir_imul_imm(b, vbo_cnt, 24);
|
||||
nir_def *param_offset = nir_imul_imm(b, vbo_cnt, DGC_VBO_SIZE);
|
||||
|
||||
params.buf = radv_meta_load_descriptor(b, 0, 0);
|
||||
params.offset = nir_iadd_imm(b, param_offset, layout->bind_pipeline ? MAX_SETS * 4 : 0);
|
||||
@@ -1382,8 +1384,8 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *vbo
|
||||
nir_push_if(b, vbo_override);
|
||||
{
|
||||
nir_def *vbo_offset_offset =
|
||||
nir_iadd(b, nir_imul_imm(b, vbo_cnt, 16), nir_imul_imm(b, nir_load_var(b, vbo_idx), 8));
|
||||
nir_def *vbo_over_data = nir_load_ssbo(b, 2, 32, param_buf, vbo_offset_offset);
|
||||
nir_iadd(b, nir_imul_imm(b, vbo_cnt, 16), nir_imul_imm(b, nir_load_var(b, vbo_idx), DGC_VBO_SIZE - 16));
|
||||
nir_def *vbo_over_data = nir_load_ssbo(b, 4, 32, param_buf, vbo_offset_offset);
|
||||
nir_def *stream_offset = nir_iand_imm(b, nir_channel(b, vbo_over_data, 0), 0x7FFF);
|
||||
nir_def *stream_data = nir_build_load_global(b, 4, 32, nir_iadd(b, stream_addr, nir_u2u64(b, stream_offset)),
|
||||
.access = ACCESS_NON_WRITEABLE);
|
||||
@@ -1395,7 +1397,7 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *vbo
|
||||
if (layout->vertex_dynamic_stride) {
|
||||
stride = nir_channel(b, stream_data, 3);
|
||||
} else {
|
||||
stride = nir_ubfe_imm(b, nir_channel(b, nir_load_var(b, vbo_data), 1), 16, 14);
|
||||
stride = nir_channel(b, vbo_over_data, 2);
|
||||
}
|
||||
|
||||
nir_def *use_per_attribute_vb_descs = nir_test_mask(b, nir_channel(b, vbo_over_data, 0), 1u << 31);
|
||||
@@ -1451,7 +1453,7 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *vbo
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_def *rsrc_word3 = nir_channel(b, nir_load_var(b, vbo_data), 3);
|
||||
nir_def *rsrc_word3 = nir_channel(b, vbo_over_data, 3);
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
nir_def *oob_select = nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW),
|
||||
nir_imm_int(b, V_008F0C_OOB_SELECT_STRUCTURED));
|
||||
@@ -2295,7 +2297,7 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedC
|
||||
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
|
||||
struct radv_shader *vs = radv_get_shader(graphics_pipeline->base.shaders, MESA_SHADER_VERTEX);
|
||||
unsigned vb_size = layout->bind_vbo_mask ? util_bitcount(vs->info.vs.vb_desc_usage_mask) * 24 : 0;
|
||||
unsigned vb_size = layout->bind_vbo_mask ? util_bitcount(vs->info.vs.vb_desc_usage_mask) * DGC_VBO_SIZE : 0;
|
||||
|
||||
*upload_size = MAX2(*upload_size + vb_size, 16);
|
||||
|
||||
@@ -2353,10 +2355,14 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedC
|
||||
const unsigned binding = vi_state->bindings[i];
|
||||
const uint32_t attrib_end = vi_state->offsets[i] + vi_state->format_sizes[i];
|
||||
const uint32_t attrib_index_offset = vi_state->attrib_index_offset[i];
|
||||
const uint32_t stride = cmd_buffer->vertex_bindings[binding].stride;
|
||||
const uint32_t rsrc_word3 = radv_get_rsrc3_vbo_desc(cmd_buffer, vs, i);
|
||||
|
||||
params->vbo_bind_mask |= ((layout->bind_vbo_mask >> binding) & 1u) << idx;
|
||||
vbo_info[2 * idx] = ((vs->info.vs.use_per_attribute_vb_descs ? 1u : 0u) << 31) | layout->vbo_offsets[binding];
|
||||
vbo_info[2 * idx + 1] = attrib_index_offset | (attrib_end << 16);
|
||||
vbo_info[4 * idx] = ((vs->info.vs.use_per_attribute_vb_descs ? 1u : 0u) << 31) | layout->vbo_offsets[binding];
|
||||
vbo_info[4 * idx + 1] = attrib_index_offset | (attrib_end << 16);
|
||||
vbo_info[4 * idx + 2] = stride;
|
||||
vbo_info[4 * idx + 3] = rsrc_word3;
|
||||
++idx;
|
||||
}
|
||||
params->vbo_cnt = idx;
|
||||
|
Reference in New Issue
Block a user