turnip: implement VK_EXT_multi_draw

vkoverhead running:
    * draw numbers are reported as thousands of operations per second
    * percentages for draw cases are relative to 'draw'
   0, draw,                                      29151,        100.0%
   1, draw_multi,                                35449,        121.6%
   2, draw_vertex,                               28907,        99.2%
   3, draw_multi_vertex,                         56658,        194.4%

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11502>
This commit is contained in:
Danylo Piliaiev
2022-09-05 11:12:01 +03:00
committed by Marge Bot
parent 2e93833224
commit 34109c8c10
4 changed files with 119 additions and 7 deletions

View File

@@ -564,7 +564,7 @@ Khronos extensions that are not part of any Vulkan version:
VK_EXT_load_store_op_none DONE (radv, tu)
VK_EXT_memory_budget DONE (anv, radv, tu, v3dv)
VK_EXT_memory_priority DONE (radv)
VK_EXT_multi_draw DONE (anv, lvp, radv)
VK_EXT_multi_draw DONE (anv, lvp, radv, tu)
VK_EXT_multisampled_render_to_single_sampled DONE (lvp)
VK_EXT_non_seamless_cube_map DONE (anv, lvp, radv, tu)
VK_EXT_pci_bus_info DONE (anv, radv)

View File

@@ -4582,20 +4582,22 @@ tu6_emit_empty_vs_params(struct tu_cmd_buffer *cmd)
static void
tu6_emit_vs_params(struct tu_cmd_buffer *cmd,
uint32_t draw_id,
uint32_t vertex_offset,
uint32_t first_instance)
{
uint32_t offset = vs_params_offset(cmd);
/* Beside re-emitting params when they are changed, we should re-emit
* them after constants are invalidated via HLSQ_INVALIDATE_CMD.
*/
if (!(cmd->state.dirty & (TU_CMD_DIRTY_DRAW_STATE | TU_CMD_DIRTY_VS_PARAMS)) &&
(offset == 0 || draw_id == cmd->state.last_vs_params.draw_id) &&
vertex_offset == cmd->state.last_vs_params.vertex_offset &&
first_instance == cmd->state.last_vs_params.first_instance) {
return;
}
uint32_t offset = vs_params_offset(cmd);
struct tu_cs cs;
VkResult result = tu_cs_begin_sub_stream(&cmd->sub_cs, 3 + (offset ? 8 : 0), &cs);
if (result != VK_SUCCESS) {
@@ -4617,7 +4619,7 @@ tu6_emit_vs_params(struct tu_cmd_buffer *cmd,
tu_cs_emit(&cs, 0);
tu_cs_emit(&cs, 0);
tu_cs_emit(&cs, 0);
tu_cs_emit(&cs, draw_id);
tu_cs_emit(&cs, vertex_offset);
tu_cs_emit(&cs, first_instance);
tu_cs_emit(&cs, 0);
@@ -4625,6 +4627,7 @@ tu6_emit_vs_params(struct tu_cmd_buffer *cmd,
cmd->state.last_vs_params.vertex_offset = vertex_offset;
cmd->state.last_vs_params.first_instance = first_instance;
cmd->state.last_vs_params.draw_id = draw_id;
struct tu_cs_entry entry = tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
cmd->state.vs_params = (struct tu_draw_state) {entry.bo->iova + entry.offset, entry.size / 4};
@@ -4642,7 +4645,7 @@ tu_CmdDraw(VkCommandBuffer commandBuffer,
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs *cs = &cmd->draw_cs;
tu6_emit_vs_params(cmd, firstVertex, firstInstance);
tu6_emit_vs_params(cmd, 0, firstVertex, firstInstance);
tu6_draw_common(cmd, cs, false, vertexCount);
@@ -4652,6 +4655,51 @@ tu_CmdDraw(VkCommandBuffer commandBuffer,
tu_cs_emit(cs, vertexCount);
}
VKAPI_ATTR void VKAPI_CALL
tu_CmdDrawMultiEXT(VkCommandBuffer commandBuffer,
uint32_t drawCount,
const VkMultiDrawInfoEXT *pVertexInfo,
uint32_t instanceCount,
uint32_t firstInstance,
uint32_t stride)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs *cs = &cmd->draw_cs;
if (!drawCount)
return;
bool has_tess =
cmd->state.pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
uint32_t max_vertex_count = 0;
if (has_tess) {
uint32_t i = 0;
vk_foreach_multi_draw(draw, i, pVertexInfo, drawCount, stride) {
max_vertex_count = MAX2(max_vertex_count, draw->vertexCount);
}
}
uint32_t i = 0;
vk_foreach_multi_draw(draw, i, pVertexInfo, drawCount, stride) {
tu6_emit_vs_params(cmd, i, draw->firstVertex, firstInstance);
if (i == 0)
tu6_draw_common(cmd, cs, false, max_vertex_count);
if (cmd->state.dirty & TU_CMD_DIRTY_VS_PARAMS) {
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
cmd->state.dirty &= ~TU_CMD_DIRTY_VS_PARAMS;
}
tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3);
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX));
tu_cs_emit(cs, instanceCount);
tu_cs_emit(cs, draw->vertexCount);
}
}
VKAPI_ATTR void VKAPI_CALL
tu_CmdDrawIndexed(VkCommandBuffer commandBuffer,
uint32_t indexCount,
@@ -4663,7 +4711,7 @@ tu_CmdDrawIndexed(VkCommandBuffer commandBuffer,
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs *cs = &cmd->draw_cs;
tu6_emit_vs_params(cmd, vertexOffset, firstInstance);
tu6_emit_vs_params(cmd, 0, vertexOffset, firstInstance);
tu6_draw_common(cmd, cs, true, indexCount);
@@ -4676,6 +4724,56 @@ tu_CmdDrawIndexed(VkCommandBuffer commandBuffer,
tu_cs_emit(cs, cmd->state.max_index_count);
}
VKAPI_ATTR void VKAPI_CALL
tu_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer,
uint32_t drawCount,
const VkMultiDrawIndexedInfoEXT *pIndexInfo,
uint32_t instanceCount,
uint32_t firstInstance,
uint32_t stride,
const int32_t *pVertexOffset)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
struct tu_cs *cs = &cmd->draw_cs;
if (!drawCount)
return;
bool has_tess =
cmd->state.pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
uint32_t max_index_count = 0;
if (has_tess) {
uint32_t i = 0;
vk_foreach_multi_draw_indexed(draw, i, pIndexInfo, drawCount, stride) {
max_index_count = MAX2(max_index_count, draw->indexCount);
}
}
uint32_t i = 0;
vk_foreach_multi_draw_indexed(draw, i, pIndexInfo, drawCount, stride) {
int32_t vertexOffset = pVertexOffset ? *pVertexOffset : draw->vertexOffset;
tu6_emit_vs_params(cmd, i, vertexOffset, firstInstance);
if (i == 0)
tu6_draw_common(cmd, cs, true, max_index_count);
if (cmd->state.dirty & TU_CMD_DIRTY_VS_PARAMS) {
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
cmd->state.dirty &= ~TU_CMD_DIRTY_VS_PARAMS;
}
tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 7);
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_DMA));
tu_cs_emit(cs, instanceCount);
tu_cs_emit(cs, draw->indexCount);
tu_cs_emit(cs, draw->firstIndex);
tu_cs_emit_qw(cs, cmd->state.index_va);
tu_cs_emit(cs, cmd->state.max_index_count);
}
}
/* Various firmware bugs/inconsistencies mean that some indirect draw opcodes
* do not wait for WFI's to complete before executing. Add a WAIT_FOR_ME if
* pending for these opcodes. This may result in a few extra WAIT_FOR_ME's
@@ -4833,7 +4931,7 @@ tu_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer,
*/
draw_wfm(cmd);
tu6_emit_vs_params(cmd, 0, firstInstance);
tu6_emit_vs_params(cmd, 0, 0, firstInstance);
tu6_draw_common(cmd, cs, false, 0);

View File

@@ -249,6 +249,7 @@ struct tu_cache_state {
struct tu_vs_params {
uint32_t vertex_offset;
uint32_t first_instance;
uint32_t draw_id;
};
/* This should be for state that is set inside a renderpass and used at

View File

@@ -218,6 +218,7 @@ get_device_extensions(const struct tu_physical_device *device,
.EXT_vertex_input_dynamic_state = true,
.EXT_attachment_feedback_loop_layout = true,
.EXT_rasterization_order_attachment_access = true,
.EXT_multi_draw = true,
#ifndef TU_USE_KGSL
.EXT_physical_device_drm = true,
#endif
@@ -896,6 +897,12 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
features->globalPriorityQuery = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: {
VkPhysicalDeviceMultiDrawFeaturesEXT *features =
(VkPhysicalDeviceMultiDrawFeaturesEXT *)ext;
features->multiDraw = true;
break;
}
default:
break;
@@ -1355,6 +1362,12 @@ tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
sizeof(props->shaderModuleIdentifierAlgorithmUUID));
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
VkPhysicalDeviceMultiDrawPropertiesEXT *properties =
(VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
properties->maxMultiDrawCount = 2048;
break;
}
default:
break;
}