turnip: implement VK_EXT_multi_draw
vkoverhead running: * draw numbers are reported as thousands of operations per second * percentages for draw cases are relative to 'draw' 0, draw, 29151, 100.0% 1, draw_multi, 35449, 121.6% 2, draw_vertex, 28907, 99.2% 3, draw_multi_vertex, 56658, 194.4% Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11502>
This commit is contained in:

committed by
Marge Bot

parent
2e93833224
commit
34109c8c10
@@ -564,7 +564,7 @@ Khronos extensions that are not part of any Vulkan version:
|
||||
VK_EXT_load_store_op_none DONE (radv, tu)
|
||||
VK_EXT_memory_budget DONE (anv, radv, tu, v3dv)
|
||||
VK_EXT_memory_priority DONE (radv)
|
||||
VK_EXT_multi_draw DONE (anv, lvp, radv)
|
||||
VK_EXT_multi_draw DONE (anv, lvp, radv, tu)
|
||||
VK_EXT_multisampled_render_to_single_sampled DONE (lvp)
|
||||
VK_EXT_non_seamless_cube_map DONE (anv, lvp, radv, tu)
|
||||
VK_EXT_pci_bus_info DONE (anv, radv)
|
||||
|
@@ -4582,20 +4582,22 @@ tu6_emit_empty_vs_params(struct tu_cmd_buffer *cmd)
|
||||
|
||||
static void
|
||||
tu6_emit_vs_params(struct tu_cmd_buffer *cmd,
|
||||
uint32_t draw_id,
|
||||
uint32_t vertex_offset,
|
||||
uint32_t first_instance)
|
||||
{
|
||||
uint32_t offset = vs_params_offset(cmd);
|
||||
|
||||
/* Beside re-emitting params when they are changed, we should re-emit
|
||||
* them after constants are invalidated via HLSQ_INVALIDATE_CMD.
|
||||
*/
|
||||
if (!(cmd->state.dirty & (TU_CMD_DIRTY_DRAW_STATE | TU_CMD_DIRTY_VS_PARAMS)) &&
|
||||
(offset == 0 || draw_id == cmd->state.last_vs_params.draw_id) &&
|
||||
vertex_offset == cmd->state.last_vs_params.vertex_offset &&
|
||||
first_instance == cmd->state.last_vs_params.first_instance) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t offset = vs_params_offset(cmd);
|
||||
|
||||
struct tu_cs cs;
|
||||
VkResult result = tu_cs_begin_sub_stream(&cmd->sub_cs, 3 + (offset ? 8 : 0), &cs);
|
||||
if (result != VK_SUCCESS) {
|
||||
@@ -4617,7 +4619,7 @@ tu6_emit_vs_params(struct tu_cmd_buffer *cmd,
|
||||
tu_cs_emit(&cs, 0);
|
||||
tu_cs_emit(&cs, 0);
|
||||
|
||||
tu_cs_emit(&cs, 0);
|
||||
tu_cs_emit(&cs, draw_id);
|
||||
tu_cs_emit(&cs, vertex_offset);
|
||||
tu_cs_emit(&cs, first_instance);
|
||||
tu_cs_emit(&cs, 0);
|
||||
@@ -4625,6 +4627,7 @@ tu6_emit_vs_params(struct tu_cmd_buffer *cmd,
|
||||
|
||||
cmd->state.last_vs_params.vertex_offset = vertex_offset;
|
||||
cmd->state.last_vs_params.first_instance = first_instance;
|
||||
cmd->state.last_vs_params.draw_id = draw_id;
|
||||
|
||||
struct tu_cs_entry entry = tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
|
||||
cmd->state.vs_params = (struct tu_draw_state) {entry.bo->iova + entry.offset, entry.size / 4};
|
||||
@@ -4642,7 +4645,7 @@ tu_CmdDraw(VkCommandBuffer commandBuffer,
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
|
||||
tu6_emit_vs_params(cmd, firstVertex, firstInstance);
|
||||
tu6_emit_vs_params(cmd, 0, firstVertex, firstInstance);
|
||||
|
||||
tu6_draw_common(cmd, cs, false, vertexCount);
|
||||
|
||||
@@ -4652,6 +4655,51 @@ tu_CmdDraw(VkCommandBuffer commandBuffer,
|
||||
tu_cs_emit(cs, vertexCount);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
tu_CmdDrawMultiEXT(VkCommandBuffer commandBuffer,
|
||||
uint32_t drawCount,
|
||||
const VkMultiDrawInfoEXT *pVertexInfo,
|
||||
uint32_t instanceCount,
|
||||
uint32_t firstInstance,
|
||||
uint32_t stride)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
|
||||
if (!drawCount)
|
||||
return;
|
||||
|
||||
bool has_tess =
|
||||
cmd->state.pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
|
||||
|
||||
uint32_t max_vertex_count = 0;
|
||||
if (has_tess) {
|
||||
uint32_t i = 0;
|
||||
vk_foreach_multi_draw(draw, i, pVertexInfo, drawCount, stride) {
|
||||
max_vertex_count = MAX2(max_vertex_count, draw->vertexCount);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t i = 0;
|
||||
vk_foreach_multi_draw(draw, i, pVertexInfo, drawCount, stride) {
|
||||
tu6_emit_vs_params(cmd, i, draw->firstVertex, firstInstance);
|
||||
|
||||
if (i == 0)
|
||||
tu6_draw_common(cmd, cs, false, max_vertex_count);
|
||||
|
||||
if (cmd->state.dirty & TU_CMD_DIRTY_VS_PARAMS) {
|
||||
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
|
||||
cmd->state.dirty &= ~TU_CMD_DIRTY_VS_PARAMS;
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3);
|
||||
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX));
|
||||
tu_cs_emit(cs, instanceCount);
|
||||
tu_cs_emit(cs, draw->vertexCount);
|
||||
}
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
tu_CmdDrawIndexed(VkCommandBuffer commandBuffer,
|
||||
uint32_t indexCount,
|
||||
@@ -4663,7 +4711,7 @@ tu_CmdDrawIndexed(VkCommandBuffer commandBuffer,
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
|
||||
tu6_emit_vs_params(cmd, vertexOffset, firstInstance);
|
||||
tu6_emit_vs_params(cmd, 0, vertexOffset, firstInstance);
|
||||
|
||||
tu6_draw_common(cmd, cs, true, indexCount);
|
||||
|
||||
@@ -4676,6 +4724,56 @@ tu_CmdDrawIndexed(VkCommandBuffer commandBuffer,
|
||||
tu_cs_emit(cs, cmd->state.max_index_count);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
tu_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer,
|
||||
uint32_t drawCount,
|
||||
const VkMultiDrawIndexedInfoEXT *pIndexInfo,
|
||||
uint32_t instanceCount,
|
||||
uint32_t firstInstance,
|
||||
uint32_t stride,
|
||||
const int32_t *pVertexOffset)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
|
||||
if (!drawCount)
|
||||
return;
|
||||
|
||||
bool has_tess =
|
||||
cmd->state.pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
|
||||
|
||||
uint32_t max_index_count = 0;
|
||||
if (has_tess) {
|
||||
uint32_t i = 0;
|
||||
vk_foreach_multi_draw_indexed(draw, i, pIndexInfo, drawCount, stride) {
|
||||
max_index_count = MAX2(max_index_count, draw->indexCount);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t i = 0;
|
||||
vk_foreach_multi_draw_indexed(draw, i, pIndexInfo, drawCount, stride) {
|
||||
int32_t vertexOffset = pVertexOffset ? *pVertexOffset : draw->vertexOffset;
|
||||
tu6_emit_vs_params(cmd, i, vertexOffset, firstInstance);
|
||||
|
||||
if (i == 0)
|
||||
tu6_draw_common(cmd, cs, true, max_index_count);
|
||||
|
||||
if (cmd->state.dirty & TU_CMD_DIRTY_VS_PARAMS) {
|
||||
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
|
||||
tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params);
|
||||
cmd->state.dirty &= ~TU_CMD_DIRTY_VS_PARAMS;
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 7);
|
||||
tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_DMA));
|
||||
tu_cs_emit(cs, instanceCount);
|
||||
tu_cs_emit(cs, draw->indexCount);
|
||||
tu_cs_emit(cs, draw->firstIndex);
|
||||
tu_cs_emit_qw(cs, cmd->state.index_va);
|
||||
tu_cs_emit(cs, cmd->state.max_index_count);
|
||||
}
|
||||
}
|
||||
|
||||
/* Various firmware bugs/inconsistencies mean that some indirect draw opcodes
|
||||
* do not wait for WFI's to complete before executing. Add a WAIT_FOR_ME if
|
||||
* pending for these opcodes. This may result in a few extra WAIT_FOR_ME's
|
||||
@@ -4833,7 +4931,7 @@ tu_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer,
|
||||
*/
|
||||
draw_wfm(cmd);
|
||||
|
||||
tu6_emit_vs_params(cmd, 0, firstInstance);
|
||||
tu6_emit_vs_params(cmd, 0, 0, firstInstance);
|
||||
|
||||
tu6_draw_common(cmd, cs, false, 0);
|
||||
|
||||
|
@@ -249,6 +249,7 @@ struct tu_cache_state {
|
||||
struct tu_vs_params {
|
||||
uint32_t vertex_offset;
|
||||
uint32_t first_instance;
|
||||
uint32_t draw_id;
|
||||
};
|
||||
|
||||
/* This should be for state that is set inside a renderpass and used at
|
||||
|
@@ -218,6 +218,7 @@ get_device_extensions(const struct tu_physical_device *device,
|
||||
.EXT_vertex_input_dynamic_state = true,
|
||||
.EXT_attachment_feedback_loop_layout = true,
|
||||
.EXT_rasterization_order_attachment_access = true,
|
||||
.EXT_multi_draw = true,
|
||||
#ifndef TU_USE_KGSL
|
||||
.EXT_physical_device_drm = true,
|
||||
#endif
|
||||
@@ -896,6 +897,12 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
||||
features->globalPriorityQuery = true;
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: {
|
||||
VkPhysicalDeviceMultiDrawFeaturesEXT *features =
|
||||
(VkPhysicalDeviceMultiDrawFeaturesEXT *)ext;
|
||||
features->multiDraw = true;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
@@ -1355,6 +1362,12 @@ tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
|
||||
sizeof(props->shaderModuleIdentifierAlgorithmUUID));
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
|
||||
VkPhysicalDeviceMultiDrawPropertiesEXT *properties =
|
||||
(VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
|
||||
properties->maxMultiDrawCount = 2048;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
Reference in New Issue
Block a user