diff --git a/docs/features.txt b/docs/features.txt index 97a435d0ead..d51d0dd7d0b 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -564,7 +564,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_load_store_op_none DONE (radv, tu) VK_EXT_memory_budget DONE (anv, radv, tu, v3dv) VK_EXT_memory_priority DONE (radv) - VK_EXT_multi_draw DONE (anv, lvp, radv) + VK_EXT_multi_draw DONE (anv, lvp, radv, tu) VK_EXT_multisampled_render_to_single_sampled DONE (lvp) VK_EXT_non_seamless_cube_map DONE (anv, lvp, radv, tu) VK_EXT_pci_bus_info DONE (anv, radv) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 70acc2497f2..365146cf6a1 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -4582,20 +4582,22 @@ tu6_emit_empty_vs_params(struct tu_cmd_buffer *cmd) static void tu6_emit_vs_params(struct tu_cmd_buffer *cmd, + uint32_t draw_id, uint32_t vertex_offset, uint32_t first_instance) { + uint32_t offset = vs_params_offset(cmd); + /* Beside re-emitting params when they are changed, we should re-emit * them after constants are invalidated via HLSQ_INVALIDATE_CMD. */ if (!(cmd->state.dirty & (TU_CMD_DIRTY_DRAW_STATE | TU_CMD_DIRTY_VS_PARAMS)) && + (offset == 0 || draw_id == cmd->state.last_vs_params.draw_id) && vertex_offset == cmd->state.last_vs_params.vertex_offset && first_instance == cmd->state.last_vs_params.first_instance) { return; } - uint32_t offset = vs_params_offset(cmd); - struct tu_cs cs; VkResult result = tu_cs_begin_sub_stream(&cmd->sub_cs, 3 + (offset ? 8 : 0), &cs); if (result != VK_SUCCESS) { @@ -4617,7 +4619,7 @@ tu6_emit_vs_params(struct tu_cmd_buffer *cmd, tu_cs_emit(&cs, 0); tu_cs_emit(&cs, 0); - tu_cs_emit(&cs, 0); + tu_cs_emit(&cs, draw_id); tu_cs_emit(&cs, vertex_offset); tu_cs_emit(&cs, first_instance); tu_cs_emit(&cs, 0); @@ -4625,6 +4627,7 @@ tu6_emit_vs_params(struct tu_cmd_buffer *cmd, cmd->state.last_vs_params.vertex_offset = vertex_offset; cmd->state.last_vs_params.first_instance = first_instance; + cmd->state.last_vs_params.draw_id = draw_id; struct tu_cs_entry entry = tu_cs_end_sub_stream(&cmd->sub_cs, &cs); cmd->state.vs_params = (struct tu_draw_state) {entry.bo->iova + entry.offset, entry.size / 4}; @@ -4642,7 +4645,7 @@ tu_CmdDraw(VkCommandBuffer commandBuffer, TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); struct tu_cs *cs = &cmd->draw_cs; - tu6_emit_vs_params(cmd, firstVertex, firstInstance); + tu6_emit_vs_params(cmd, 0, firstVertex, firstInstance); tu6_draw_common(cmd, cs, false, vertexCount); @@ -4652,6 +4655,51 @@ tu_CmdDraw(VkCommandBuffer commandBuffer, tu_cs_emit(cs, vertexCount); } +VKAPI_ATTR void VKAPI_CALL +tu_CmdDrawMultiEXT(VkCommandBuffer commandBuffer, + uint32_t drawCount, + const VkMultiDrawInfoEXT *pVertexInfo, + uint32_t instanceCount, + uint32_t firstInstance, + uint32_t stride) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs *cs = &cmd->draw_cs; + + if (!drawCount) + return; + + bool has_tess = + cmd->state.pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + + uint32_t max_vertex_count = 0; + if (has_tess) { + uint32_t i = 0; + vk_foreach_multi_draw(draw, i, pVertexInfo, drawCount, stride) { + max_vertex_count = MAX2(max_vertex_count, draw->vertexCount); + } + } + + uint32_t i = 0; + vk_foreach_multi_draw(draw, i, pVertexInfo, drawCount, stride) { + tu6_emit_vs_params(cmd, i, draw->firstVertex, firstInstance); + + if (i == 0) + tu6_draw_common(cmd, cs, false, max_vertex_count); + + if (cmd->state.dirty & TU_CMD_DIRTY_VS_PARAMS) { + tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3); + tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params); + cmd->state.dirty &= ~TU_CMD_DIRTY_VS_PARAMS; + } + + tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3); + tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_AUTO_INDEX)); + tu_cs_emit(cs, instanceCount); + tu_cs_emit(cs, draw->vertexCount); + } +} + VKAPI_ATTR void VKAPI_CALL tu_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, @@ -4663,7 +4711,7 @@ tu_CmdDrawIndexed(VkCommandBuffer commandBuffer, TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); struct tu_cs *cs = &cmd->draw_cs; - tu6_emit_vs_params(cmd, vertexOffset, firstInstance); + tu6_emit_vs_params(cmd, 0, vertexOffset, firstInstance); tu6_draw_common(cmd, cs, true, indexCount); @@ -4676,6 +4724,56 @@ tu_CmdDrawIndexed(VkCommandBuffer commandBuffer, tu_cs_emit(cs, cmd->state.max_index_count); } +VKAPI_ATTR void VKAPI_CALL +tu_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer, + uint32_t drawCount, + const VkMultiDrawIndexedInfoEXT *pIndexInfo, + uint32_t instanceCount, + uint32_t firstInstance, + uint32_t stride, + const int32_t *pVertexOffset) +{ + TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); + struct tu_cs *cs = &cmd->draw_cs; + + if (!drawCount) + return; + + bool has_tess = + cmd->state.pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + + uint32_t max_index_count = 0; + if (has_tess) { + uint32_t i = 0; + vk_foreach_multi_draw_indexed(draw, i, pIndexInfo, drawCount, stride) { + max_index_count = MAX2(max_index_count, draw->indexCount); + } + } + + uint32_t i = 0; + vk_foreach_multi_draw_indexed(draw, i, pIndexInfo, drawCount, stride) { + int32_t vertexOffset = pVertexOffset ? *pVertexOffset : draw->vertexOffset; + tu6_emit_vs_params(cmd, i, vertexOffset, firstInstance); + + if (i == 0) + tu6_draw_common(cmd, cs, true, max_index_count); + + if (cmd->state.dirty & TU_CMD_DIRTY_VS_PARAMS) { + tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3); + tu_cs_emit_draw_state(cs, TU_DRAW_STATE_VS_PARAMS, cmd->state.vs_params); + cmd->state.dirty &= ~TU_CMD_DIRTY_VS_PARAMS; + } + + tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 7); + tu_cs_emit(cs, tu_draw_initiator(cmd, DI_SRC_SEL_DMA)); + tu_cs_emit(cs, instanceCount); + tu_cs_emit(cs, draw->indexCount); + tu_cs_emit(cs, draw->firstIndex); + tu_cs_emit_qw(cs, cmd->state.index_va); + tu_cs_emit(cs, cmd->state.max_index_count); + } +} + /* Various firmware bugs/inconsistencies mean that some indirect draw opcodes * do not wait for WFI's to complete before executing. Add a WAIT_FOR_ME if * pending for these opcodes. This may result in a few extra WAIT_FOR_ME's @@ -4833,7 +4931,7 @@ tu_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, */ draw_wfm(cmd); - tu6_emit_vs_params(cmd, 0, firstInstance); + tu6_emit_vs_params(cmd, 0, 0, firstInstance); tu6_draw_common(cmd, cs, false, 0); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index ddba3002da9..bb20ec809b9 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -249,6 +249,7 @@ struct tu_cache_state { struct tu_vs_params { uint32_t vertex_offset; uint32_t first_instance; + uint32_t draw_id; }; /* This should be for state that is set inside a renderpass and used at diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index ef4ddbd66fd..0f97ad13f65 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -218,6 +218,7 @@ get_device_extensions(const struct tu_physical_device *device, .EXT_vertex_input_dynamic_state = true, .EXT_attachment_feedback_loop_layout = true, .EXT_rasterization_order_attachment_access = true, + .EXT_multi_draw = true, #ifndef TU_USE_KGSL .EXT_physical_device_drm = true, #endif @@ -896,6 +897,12 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, features->globalPriorityQuery = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: { + VkPhysicalDeviceMultiDrawFeaturesEXT *features = + (VkPhysicalDeviceMultiDrawFeaturesEXT *)ext; + features->multiDraw = true; + break; + } default: break; @@ -1355,6 +1362,12 @@ tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, sizeof(props->shaderModuleIdentifierAlgorithmUUID)); break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: { + VkPhysicalDeviceMultiDrawPropertiesEXT *properties = + (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext; + properties->maxMultiDrawCount = 2048; + break; + } default: break; }