diff --git a/docs/features.txt b/docs/features.txt index e688333fe54..c37784369ef 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -560,7 +560,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_physical_device_drm DONE (anv, radv, tu, v3dv) VK_EXT_post_depth_coverage DONE (anv/gfx10+, lvp, radv/gfx10+) VK_EXT_primitive_topology_list_restart DONE (anv, lvp, radv, tu) - VK_EXT_primitives_generated_query DONE (lvp) + VK_EXT_primitives_generated_query DONE (lvp, tu) VK_EXT_provoking_vertex DONE (anv, lvp, radv, tu, v3dv) VK_EXT_queue_family_foreign DONE (anv, radv, vn) VK_EXT_robustness2 DONE (anv, radv, tu) diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index f0cc72de141..b12f49e2465 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -203,6 +203,7 @@ get_device_extensions(const struct tu_physical_device *device, .EXT_line_rasterization = true, .EXT_subgroup_size_control = true, .EXT_image_robustness = true, + .EXT_primitives_generated_query = true, #ifndef TU_USE_KGSL .EXT_physical_device_drm = true, #endif @@ -838,6 +839,14 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, features->texelBufferAlignment = true; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: { + VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features = + (VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *)ext; + features->primitivesGeneratedQuery = true; + features->primitivesGeneratedQueryWithRasterizerDiscard = false; + features->primitivesGeneratedQueryWithNonZeroStreams = false; + break; + } default: break; diff --git a/src/freedreno/vulkan/tu_query.c b/src/freedreno/vulkan/tu_query.c index 81a67a05239..7bf710f5e30 100644 --- a/src/freedreno/vulkan/tu_query.c +++ b/src/freedreno/vulkan/tu_query.c @@ -108,6 +108,13 @@ struct PACKED perf_query_slot { struct perfcntr_query_slot perfcntr; }; +struct PACKED primitives_generated_query_slot { + struct query_slot common; + uint64_t result; + uint64_t begin; + uint64_t end; +}; + /* Returns the IOVA of a given uint64_t field in a given slot of a query * pool. */ #define query_iova(type, pool, query, field) \ @@ -130,6 +137,9 @@ struct PACKED perf_query_slot { sizeof(struct perfcntr_query_slot) * (i) + \ offsetof(struct perfcntr_query_slot, field) +#define primitives_generated_query_iova(pool, query, field) \ + query_iova(struct primitives_generated_query_slot, pool, query, field) + #define query_available_iova(pool, query) \ query_iova(struct query_slot, pool, query, available) @@ -239,6 +249,9 @@ tu_CreateQueryPool(VkDevice _device, case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: slot_size = sizeof(struct primitive_query_slot); break; + case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: + slot_size = sizeof(struct primitives_generated_query_slot); + break; case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: { perf_query_info = vk_find_struct_const(pCreateInfo->pNext, @@ -364,6 +377,7 @@ get_result_count(struct tu_query_pool *pool) /* Occulusion and timestamp queries write one integer value */ case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_TIMESTAMP: + case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: return 1; /* Transform feedback queries write two integer values */ case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: @@ -548,6 +562,7 @@ tu_GetQueryPoolResults(VkDevice _device, case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_TIMESTAMP: case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: case VK_QUERY_TYPE_PIPELINE_STATISTICS: case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: return get_query_pool_results(device, pool, firstQuery, queryCount, @@ -689,6 +704,7 @@ tu_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_TIMESTAMP: case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: case VK_QUERY_TYPE_PIPELINE_STATISTICS: return emit_copy_query_pool_results(cmdbuf, cs, pool, firstQuery, queryCount, buffer, dstOffset, stride, flags); @@ -749,6 +765,7 @@ tu_CmdResetQueryPool(VkCommandBuffer commandBuffer, case VK_QUERY_TYPE_TIMESTAMP: case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: case VK_QUERY_TYPE_PIPELINE_STATISTICS: case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: emit_reset_query_pool(cmdbuf, pool, firstQuery, queryCount); @@ -936,6 +953,27 @@ emit_begin_xfb_query(struct tu_cmd_buffer *cmdbuf, tu6_emit_event_write(cmdbuf, cs, WRITE_PRIMITIVE_COUNTS); } +static void +emit_begin_prim_generated_query(struct tu_cmd_buffer *cmdbuf, + struct tu_query_pool *pool, + uint32_t query) +{ + struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs; + uint64_t begin_iova = primitives_generated_query_iova(pool, query, begin); + + tu6_emit_event_write(cmdbuf, cs, START_PRIMITIVE_CTRS); + tu6_emit_event_write(cmdbuf, cs, RST_PIX_CNT); + tu6_emit_event_write(cmdbuf, cs, TILE_FLUSH); + + tu_cs_emit_wfi(cs); + + tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3); + tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PRIMCTR_7_LO) | + CP_REG_TO_MEM_0_CNT(2) | + CP_REG_TO_MEM_0_64B); + tu_cs_emit_qw(cs, begin_iova); +} + VKAPI_ATTR void VKAPI_CALL tu_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, @@ -957,6 +995,9 @@ tu_CmdBeginQuery(VkCommandBuffer commandBuffer, case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: emit_begin_xfb_query(cmdbuf, pool, query, 0); break; + case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: + emit_begin_prim_generated_query(cmdbuf, pool, query); + break; case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: emit_begin_perf_query(cmdbuf, pool, query); break; @@ -985,6 +1026,9 @@ tu_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: emit_begin_xfb_query(cmdbuf, pool, query, index); break; + case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: + emit_begin_prim_generated_query(cmdbuf, pool, query); + break; default: assert(!"Invalid query type"); } @@ -1242,6 +1286,49 @@ emit_end_xfb_query(struct tu_cmd_buffer *cmdbuf, tu_cs_emit_qw(cs, 0x1); } +static void +emit_end_prim_generated_query(struct tu_cmd_buffer *cmdbuf, + struct tu_query_pool *pool, + uint32_t query) +{ + struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs; + + uint64_t begin_iova = primitives_generated_query_iova(pool, query, begin); + uint64_t end_iova = primitives_generated_query_iova(pool, query, end); + uint64_t result_iova = primitives_generated_query_iova(pool, query, result); + uint64_t available_iova = query_available_iova(pool, query); + + tu6_emit_event_write(cmdbuf, cs, STOP_PRIMITIVE_CTRS); + tu6_emit_event_write(cmdbuf, cs, RST_VTX_CNT); + tu6_emit_event_write(cmdbuf, cs, STAT_EVENT); + + tu_cs_emit_wfi(cs); + + tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3); + tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PRIMCTR_7_LO) | + CP_REG_TO_MEM_0_CNT(2) | + CP_REG_TO_MEM_0_64B); + tu_cs_emit_qw(cs, end_iova); + + tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 9); + tu_cs_emit(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | + CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES); + tu_cs_emit_qw(cs, result_iova); + tu_cs_emit_qw(cs, result_iova); + tu_cs_emit_qw(cs, end_iova); + tu_cs_emit_qw(cs, begin_iova); + + tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0); + + if (cmdbuf->state.pass) + cs = &cmdbuf->draw_epilogue_cs; + + /* Set the availability to 1 */ + tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 4); + tu_cs_emit_qw(cs, available_iova); + tu_cs_emit_qw(cs, 0x1); +} + /* Implement this bit of spec text from section 17.2 "Query Operation": * * If queries are used while executing a render pass instance that has @@ -1296,6 +1383,9 @@ tu_CmdEndQuery(VkCommandBuffer commandBuffer, case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: emit_end_xfb_query(cmdbuf, pool, query, 0); break; + case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: + emit_end_prim_generated_query(cmdbuf, pool, query); + break; case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: emit_end_perf_query(cmdbuf, pool, query); break; @@ -1326,6 +1416,9 @@ tu_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, assert(index <= 4); emit_end_xfb_query(cmdbuf, pool, query, index); break; + case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: + emit_end_prim_generated_query(cmdbuf, pool, query); + break; default: assert(!"Invalid query type"); }