radv: move emitting the strmout buffer in CmdDrawIndirectByteCountEXT()
This doesn't need to be in the generic draw path because only one draw command uses it. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20299>
This commit is contained in:

committed by
Marge Bot

parent
cb0a17652d
commit
6aaba10c6e
@@ -5058,38 +5058,6 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d
|
||||
}
|
||||
}
|
||||
|
||||
if (draw_info->strmout_buffer) {
|
||||
uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo);
|
||||
|
||||
va += draw_info->strmout_buffer->offset + draw_info->strmout_buffer_offset;
|
||||
|
||||
radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, draw_info->stride);
|
||||
|
||||
if (info->gfx_level >= GFX10) {
|
||||
/* Emitting a COPY_DATA packet should be enough because RADV doesn't support preemption
|
||||
* (shadow memory) but for unknown reasons, it can lead to GPU hangs on GFX10+.
|
||||
*/
|
||||
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
|
||||
radeon_emit(cs, 0);
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
radeon_emit(cs, (R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE - SI_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_emit(cs, 1); /* 1 DWORD */
|
||||
} else {
|
||||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
|
||||
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) |
|
||||
COPY_DATA_WR_CONFIRM);
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
|
||||
radeon_emit(cs, 0); /* unused */
|
||||
}
|
||||
|
||||
radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo);
|
||||
}
|
||||
|
||||
/* RDNA2 is affected by a hardware bug when instance packing is enabled for adjacent primitive
|
||||
* topologies and instance_count > 1, pipeline stats generated by GE are incorrect. It needs to
|
||||
* be applied for indexed and non-indexed draws.
|
||||
@@ -10859,6 +10827,42 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou
|
||||
radv_set_streamout_enable(cmd_buffer, false);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level;
|
||||
uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
|
||||
va += draw_info->strmout_buffer->offset + draw_info->strmout_buffer_offset;
|
||||
|
||||
radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, draw_info->stride);
|
||||
|
||||
if (gfx_level >= GFX10) {
|
||||
/* Emitting a COPY_DATA packet should be enough because RADV doesn't support preemption
|
||||
* (shadow memory) but for unknown reasons, it can lead to GPU hangs on GFX10+.
|
||||
*/
|
||||
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
|
||||
radeon_emit(cs, 0);
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
radeon_emit(cs, (R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE - SI_CONTEXT_REG_OFFSET) >> 2);
|
||||
radeon_emit(cs, 1); /* 1 DWORD */
|
||||
} else {
|
||||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
|
||||
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) |
|
||||
COPY_DATA_WR_CONFIRM);
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
|
||||
radeon_emit(cs, 0); /* unused */
|
||||
}
|
||||
|
||||
radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanceCount,
|
||||
uint32_t firstInstance, VkBuffer _counterBuffer,
|
||||
@@ -10881,6 +10885,7 @@ radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanc
|
||||
if (!radv_before_draw(cmd_buffer, &info, 1))
|
||||
return;
|
||||
struct VkMultiDrawInfoEXT minfo = { 0, 0 };
|
||||
radv_emit_strmout_buffer(cmd_buffer, &info);
|
||||
radv_emit_direct_draw_packets(cmd_buffer, &info, 1, &minfo, S_0287F0_USE_OPAQUE(1), 0);
|
||||
radv_after_draw(cmd_buffer);
|
||||
}
|
||||
|
Reference in New Issue
Block a user