diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index df53c194d32..cc5c39c5c6b 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -6144,6 +6144,41 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stag cmd_buffer->push_constant_stages |= dirty_stages; } +uint32_t +radv_get_rsrc3_vbo_desc(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs, uint32_t vbo_idx) +{ + const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + const struct radv_physical_device *pdev = radv_device_physical(device); + const struct radv_vertex_input_state *vi_state = &cmd_buffer->state.vertex_input; + uint32_t rsrc_word3; + + if (vs->info.vs.dynamic_inputs && !(vi_state->nontrivial_formats & BITFIELD_BIT(vbo_idx))) { + const struct ac_vtx_format_info *vtx_info_table = + ac_get_vtx_format_info_table(pdev->info.gfx_level, pdev->info.family); + const struct ac_vtx_format_info *vtx_info = &vtx_info_table[vi_state->formats[vbo_idx]]; + unsigned hw_format = vtx_info->hw_format[vtx_info->num_channels - 1]; + + if (pdev->info.gfx_level >= GFX10) { + rsrc_word3 = vtx_info->dst_sel | S_008F0C_FORMAT_GFX10(hw_format); + } else { + rsrc_word3 = + vtx_info->dst_sel | S_008F0C_NUM_FORMAT((hw_format >> 4) & 0x7) | S_008F0C_DATA_FORMAT(hw_format & 0xf); + } + } else { + rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); + + if (pdev->info.gfx_level >= GFX10) { + rsrc_word3 |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_UINT); + } else { + rsrc_word3 |= + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + } + } + + return rsrc_word3; +} + void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs, bool full_null_descriptors, void *vb_ptr) @@ -6151,20 +6186,16 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); const struct radv_physical_device *pdev = radv_device_physical(device); enum amd_gfx_level chip = pdev->info.gfx_level; - enum radeon_family family = pdev->info.family; unsigned desc_index = 0; uint32_t mask = vs->info.vs.vb_desc_usage_mask; uint64_t va; const bool uses_dynamic_inputs = vs->info.vs.dynamic_inputs; const struct radv_vertex_input_state *vi_state = &cmd_buffer->state.vertex_input; - const struct ac_vtx_format_info *vtx_info_table = - uses_dynamic_inputs ? ac_get_vtx_format_info_table(chip, family) : NULL; - while (mask) { unsigned i = u_bit_scan(&mask); uint32_t *desc = &((uint32_t *)vb_ptr)[desc_index++ * 4]; - uint32_t offset, rsrc_word3; + uint32_t offset; if (uses_dynamic_inputs && !(vi_state->attribute_mask & BITFIELD_BIT(i))) { /* No vertex attribute description given: assume that the shader doesn't use this @@ -6179,26 +6210,7 @@ radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const st struct radv_buffer *buffer = cmd_buffer->vertex_binding_buffers[binding]; unsigned num_records; const unsigned stride = cmd_buffer->vertex_bindings[binding].stride; - - if (uses_dynamic_inputs && !(vi_state->nontrivial_formats & BITFIELD_BIT(i))) { - const struct ac_vtx_format_info *vtx_info = &vtx_info_table[vi_state->formats[i]]; - unsigned hw_format = vtx_info->hw_format[vtx_info->num_channels - 1]; - - if (chip >= GFX10) { - rsrc_word3 = vtx_info->dst_sel | S_008F0C_FORMAT_GFX10(hw_format); - } else { - rsrc_word3 = - vtx_info->dst_sel | S_008F0C_NUM_FORMAT((hw_format >> 4) & 0x7) | S_008F0C_DATA_FORMAT(hw_format & 0xf); - } - } else { - rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - if (chip >= GFX10) - rsrc_word3 |= S_008F0C_FORMAT_GFX10(V_008F0C_GFX10_FORMAT_32_UINT); - else - rsrc_word3 |= - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); - } + uint32_t rsrc_word3 = radv_get_rsrc3_vbo_desc(cmd_buffer, vs, i); if (!buffer) { if (full_null_descriptors) { diff --git a/src/amd/vulkan/radv_cmd_buffer.h b/src/amd/vulkan/radv_cmd_buffer.h index 741ec8068b5..0100e8f1078 100644 --- a/src/amd/vulkan/radv_cmd_buffer.h +++ b/src/amd/vulkan/radv_cmd_buffer.h @@ -784,4 +784,7 @@ void radv_end_conditional_rendering(struct radv_cmd_buffer *cmd_buffer); uint64_t radv_descriptor_get_va(const struct radv_descriptor_state *descriptors_state, unsigned set_idx); +uint32_t radv_get_rsrc3_vbo_desc(const struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs, + uint32_t vbo_idx); + #endif /* RADV_CMD_BUFFER_H */ diff --git a/src/amd/vulkan/radv_device_generated_commands.c b/src/amd/vulkan/radv_device_generated_commands.c index 7bfc59e1fc6..fba15291ce4 100644 --- a/src/amd/vulkan/radv_device_generated_commands.c +++ b/src/amd/vulkan/radv_device_generated_commands.c @@ -15,6 +15,8 @@ #include "vk_common_entrypoints.h" #include "vk_shader_module.h" +#define DGC_VBO_SIZE 32 + /* The DGC command buffer layout is quite complex, here's some explanations: * * Without the DGC preamble, the default layout looks like: @@ -1239,7 +1241,7 @@ dgc_get_pc_params(struct dgc_cmdbuf *cs) nir_builder *b = cs->b; nir_def *vbo_cnt = load_param8(b, vbo_cnt); - nir_def *param_offset = nir_imul_imm(b, vbo_cnt, 24); + nir_def *param_offset = nir_imul_imm(b, vbo_cnt, DGC_VBO_SIZE); params.buf = radv_meta_load_descriptor(b, 0, 0); params.offset = nir_iadd_imm(b, param_offset, layout->bind_pipeline ? MAX_SETS * 4 : 0); @@ -1382,8 +1384,8 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *vbo nir_push_if(b, vbo_override); { nir_def *vbo_offset_offset = - nir_iadd(b, nir_imul_imm(b, vbo_cnt, 16), nir_imul_imm(b, nir_load_var(b, vbo_idx), 8)); - nir_def *vbo_over_data = nir_load_ssbo(b, 2, 32, param_buf, vbo_offset_offset); + nir_iadd(b, nir_imul_imm(b, vbo_cnt, 16), nir_imul_imm(b, nir_load_var(b, vbo_idx), DGC_VBO_SIZE - 16)); + nir_def *vbo_over_data = nir_load_ssbo(b, 4, 32, param_buf, vbo_offset_offset); nir_def *stream_offset = nir_iand_imm(b, nir_channel(b, vbo_over_data, 0), 0x7FFF); nir_def *stream_data = nir_build_load_global(b, 4, 32, nir_iadd(b, stream_addr, nir_u2u64(b, stream_offset)), .access = ACCESS_NON_WRITEABLE); @@ -1395,7 +1397,7 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *vbo if (layout->vertex_dynamic_stride) { stride = nir_channel(b, stream_data, 3); } else { - stride = nir_ubfe_imm(b, nir_channel(b, nir_load_var(b, vbo_data), 1), 16, 14); + stride = nir_channel(b, vbo_over_data, 2); } nir_def *use_per_attribute_vb_descs = nir_test_mask(b, nir_channel(b, vbo_over_data, 0), 1u << 31); @@ -1451,7 +1453,7 @@ dgc_emit_vertex_buffer(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *vbo } nir_pop_if(b, NULL); - nir_def *rsrc_word3 = nir_channel(b, nir_load_var(b, vbo_data), 3); + nir_def *rsrc_word3 = nir_channel(b, vbo_over_data, 3); if (pdev->info.gfx_level >= GFX10) { nir_def *oob_select = nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW), nir_imm_int(b, V_008F0C_OOB_SELECT_STRUCTURED)); @@ -2295,7 +2297,7 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedC const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); struct radv_shader *vs = radv_get_shader(graphics_pipeline->base.shaders, MESA_SHADER_VERTEX); - unsigned vb_size = layout->bind_vbo_mask ? util_bitcount(vs->info.vs.vb_desc_usage_mask) * 24 : 0; + unsigned vb_size = layout->bind_vbo_mask ? util_bitcount(vs->info.vs.vb_desc_usage_mask) * DGC_VBO_SIZE : 0; *upload_size = MAX2(*upload_size + vb_size, 16); @@ -2353,10 +2355,14 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedC const unsigned binding = vi_state->bindings[i]; const uint32_t attrib_end = vi_state->offsets[i] + vi_state->format_sizes[i]; const uint32_t attrib_index_offset = vi_state->attrib_index_offset[i]; + const uint32_t stride = cmd_buffer->vertex_bindings[binding].stride; + const uint32_t rsrc_word3 = radv_get_rsrc3_vbo_desc(cmd_buffer, vs, i); params->vbo_bind_mask |= ((layout->bind_vbo_mask >> binding) & 1u) << idx; - vbo_info[2 * idx] = ((vs->info.vs.use_per_attribute_vb_descs ? 1u : 0u) << 31) | layout->vbo_offsets[binding]; - vbo_info[2 * idx + 1] = attrib_index_offset | (attrib_end << 16); + vbo_info[4 * idx] = ((vs->info.vs.use_per_attribute_vb_descs ? 1u : 0u) << 31) | layout->vbo_offsets[binding]; + vbo_info[4 * idx + 1] = attrib_index_offset | (attrib_end << 16); + vbo_info[4 * idx + 2] = stride; + vbo_info[4 * idx + 3] = rsrc_word3; ++idx; } params->vbo_cnt = idx;