From 10e2dbb63b9d1f8f35c4fc3f570cd19b3fc03b43 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 15 Jan 2024 18:19:56 +0100 Subject: [PATCH] radv: delay emitting streamout enable at draw time Since Vulkan 1.3.271, the spec allowed vkCmdBeginTransformFeedbackEXT to be called without an active graphics pipeline bound when using shader objects. That means that the last VGT shader would be NULL once VKCTS is updated accordingly. This change delays emitting streamout enable at draw time to make sure the last VGT shader is present, regarldess if ESO is enabled or not. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/meta/radv_meta.c | 4 +- src/amd/vulkan/radv_cmd_buffer.c | 66 +++++++++++++++++--------------- src/amd/vulkan/radv_private.h | 3 +- src/amd/vulkan/radv_query.c | 4 +- 4 files changed, 41 insertions(+), 36 deletions(-) diff --git a/src/amd/vulkan/meta/radv_meta.c b/src/amd/vulkan/meta/radv_meta.c index 7277ae001a2..81d3d86e8d1 100644 --- a/src/amd/vulkan/meta/radv_meta.c +++ b/src/amd/vulkan/meta/radv_meta.c @@ -63,7 +63,7 @@ radv_suspend_queries(struct radv_meta_saved_state *state, struct radv_cmd_buffer /* Primitives generated queries (legacy). */ if (cmd_buffer->state.active_prims_gen_queries) { cmd_buffer->state.suspend_streamout = true; - radv_emit_streamout_enable(cmd_buffer); + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_ENABLE; } /* Primitives generated queries (NGG). */ @@ -106,7 +106,7 @@ radv_resume_queries(const struct radv_meta_saved_state *state, struct radv_cmd_b /* Primitives generated queries (legacy). */ if (cmd_buffer->state.active_prims_gen_queries) { cmd_buffer->state.suspend_streamout = false; - radv_emit_streamout_enable(cmd_buffer); + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_ENABLE; } /* Primitives generated queries (NGG). */ diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index bd0b976d349..4487423185d 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -8927,6 +8927,35 @@ radv_emit_db_shader_control(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_DB_SHADER_CONTROL; } +static void +radv_emit_streamout_enable_state(struct radv_cmd_buffer *cmd_buffer) +{ + const struct radv_streamout_state *so = &cmd_buffer->state.streamout; + const bool streamout_enabled = radv_is_streamout_enabled(cmd_buffer); + uint32_t enabled_stream_buffers_mask = 0; + + if (streamout_enabled && cmd_buffer->state.last_vgt_shader) { + const struct radv_shader_info *info = &cmd_buffer->state.last_vgt_shader->info; + + enabled_stream_buffers_mask = info->so.enabled_stream_buffers_mask; + + if (!cmd_buffer->device->physical_device->use_ngg_streamout) { + u_foreach_bit (i, so->enabled_mask) { + radeon_set_context_reg(cmd_buffer->cs, R_028AD4_VGT_STRMOUT_VTX_STRIDE_0 + 16 * i, info->so.strides[i]); + } + } + } + + radeon_set_context_reg_seq(cmd_buffer->cs, R_028B94_VGT_STRMOUT_CONFIG, 2); + radeon_emit(cmd_buffer->cs, S_028B94_STREAMOUT_0_EN(streamout_enabled) | S_028B94_RAST_STREAM(0) | + S_028B94_STREAMOUT_1_EN(streamout_enabled) | + S_028B94_STREAMOUT_2_EN(streamout_enabled) | + S_028B94_STREAMOUT_3_EN(streamout_enabled)); + radeon_emit(cmd_buffer->cs, so->hw_enabled_mask & enabled_stream_buffers_mask); + + cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_STREAMOUT_ENABLE; +} + static void radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info) { @@ -9020,6 +9049,9 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r if (info->indexed && info->indirect && cmd_buffer->state.dirty & RADV_CMD_DIRTY_INDEX_BUFFER) radv_emit_index_buffer(cmd_buffer); + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_STREAMOUT_ENABLE) + radv_emit_streamout_enable_state(cmd_buffer); + const uint64_t dynamic_states = cmd_buffer->state.dirty & cmd_buffer->state.emitted_graphics_pipeline->needed_dynamic_state; if (dynamic_states) { @@ -10889,31 +10921,6 @@ radv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, uint32_t cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER; } -void -radv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer) -{ - struct radv_streamout_state *so = &cmd_buffer->state.streamout; - bool streamout_enabled = radv_is_streamout_enabled(cmd_buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; - uint32_t enabled_stream_buffers_mask = 0; - - ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4); - - if (cmd_buffer->state.last_vgt_shader) { - enabled_stream_buffers_mask = cmd_buffer->state.last_vgt_shader->info.so.enabled_stream_buffers_mask; - } - - radeon_set_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2); - radeon_emit(cs, S_028B94_STREAMOUT_0_EN(streamout_enabled) | S_028B94_RAST_STREAM(0) | - S_028B94_STREAMOUT_1_EN(streamout_enabled) | S_028B94_STREAMOUT_2_EN(streamout_enabled) | - S_028B94_STREAMOUT_3_EN(streamout_enabled)); - radeon_emit(cs, so->hw_enabled_mask & enabled_stream_buffers_mask); - - cmd_buffer->state.context_roll_without_scissor_emitted = true; - - assert(cs->cdw <= cdw_max); -} - static void radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable) { @@ -10929,7 +10936,7 @@ radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable) if (!cmd_buffer->device->physical_device->use_ngg_streamout && ((old_streamout_enabled != radv_is_streamout_enabled(cmd_buffer)) || (old_hw_enabled_mask != so->hw_enabled_mask))) - radv_emit_streamout_enable(cmd_buffer); + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_ENABLE; if (cmd_buffer->device->physical_device->use_ngg_streamout) { /* Re-emit streamout desciptors because with NGG streamout, a buffer size of 0 acts like a @@ -10985,7 +10992,6 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; struct radv_streamout_state *so = &cmd_buffer->state.streamout; - struct radv_shader_info *info = &cmd_buffer->state.last_vgt_shader->info; struct radeon_cmdbuf *cs = cmd_buffer->cs; assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); @@ -11034,9 +11040,7 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC * VGT only counts primitives and tells the shader through * SGPRs what to do. */ - radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 2); - radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */ - radeon_emit(cs, info->so.strides[i]); /* VTX_STRIDE (in DW) */ + radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, sb[i].size >> 2); cmd_buffer->state.context_roll_without_scissor_emitted = true; @@ -11064,6 +11068,8 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC assert(cs->cdw <= cdw_max); radv_set_streamout_enable(cmd_buffer, true); + + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_ENABLE; } VKAPI_ATTR void VKAPI_CALL diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 1baea731ccf..82614a6aa6d 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1249,6 +1249,7 @@ enum radv_cmd_dirty_bits { RADV_CMD_DIRTY_SHADER_QUERY = 1ull << 57, RADV_CMD_DIRTY_OCCLUSION_QUERY = 1ull << 58, RADV_CMD_DIRTY_DB_SHADER_CONTROL = 1ull << 59, + RADV_CMD_DIRTY_STREAMOUT_ENABLE = 1ull << 60, }; enum radv_cmd_flush_bits { @@ -1800,8 +1801,6 @@ struct radv_image_view; bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer); -void radv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer); - void radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs); void radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs); diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 0a215ba761d..13330ea5eb5 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -2122,7 +2122,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo cmd_buffer->state.active_prims_gen_queries++; if (old_streamout_enabled != radv_is_streamout_enabled(cmd_buffer)) { - radv_emit_streamout_enable(cmd_buffer); + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_ENABLE; } } else { cmd_buffer->state.active_prims_gen_queries++; @@ -2313,7 +2313,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, cmd_buffer->state.active_prims_gen_queries--; if (old_streamout_enabled != radv_is_streamout_enabled(cmd_buffer)) { - radv_emit_streamout_enable(cmd_buffer); + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_ENABLE; } } else { cmd_buffer->state.active_prims_gen_queries--;