venus: switch to unconditionally deferred query feedback

All commands that make queries available have feedback cmds batched and stored during recording. At submission time, for each batch (SubmitInfo) these feedback cmds are recorded in a cmd buffer that is appended after the last original cmd buffer (but before semaphore/fence feedback). Query reset cmds are deferred as well and also remove any prior feedback cmds for queries its resetting within the batch. Cc: 23.3 <mesa-stable> Signed-off-by: Juston Li <justonli@google.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25413>
2023-09-26 16:36:58 -07:00
parent 7fbf608f2d
commit 5b24ab91e4
5 changed files with 144 additions and 97 deletions
--- a/src/virtio/vulkan/vn_command_buffer.c
+++ b/src/virtio/vulkan/vn_command_buffer.c
@@ -506,20 +506,21 @@ vn_cmd_transfer_present_src_images(
                                 count, img_barriers);
 }

-static bool
-vn_cmd_query_batch_push(struct vn_command_buffer *cmd,
-                        struct vn_query_pool *query_pool,
-                        uint32_t query,
-                        uint32_t query_count)
+struct vn_feedback_query_batch *
+vn_cmd_query_batch_alloc(struct vn_command_pool *pool,
+                         struct vn_query_pool *query_pool,
+                         uint32_t query,
+                         uint32_t query_count,
+                         bool copy)
 {
   struct vn_feedback_query_batch *batch;
-   if (list_is_empty(&cmd->pool->free_query_batches)) {
-      batch = vk_alloc(&cmd->pool->allocator, sizeof(*batch),
-                       VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (list_is_empty(&pool->free_query_batches)) {
+      batch = vk_alloc(&pool->allocator, sizeof(*batch), VN_DEFAULT_ALIGN,
+                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
      if (!batch)
-         return false;
+         return NULL;
   } else {
-      batch = list_first_entry(&cmd->pool->free_query_batches,
+      batch = list_first_entry(&pool->free_query_batches,
                               struct vn_feedback_query_batch, head);
      list_del(&batch->head);
   }
@@ -527,29 +528,9 @@ vn_cmd_query_batch_push(struct vn_command_buffer *cmd,
   batch->query_pool = query_pool;
   batch->query = query;
   batch->query_count = query_count;
-   list_add(&batch->head, &cmd->builder.query_batches);
+   batch->copy = copy;

-   return true;
-}
-
-static inline void
-vn_cmd_query_batch_pop(struct vn_command_buffer *cmd,
-                       struct vn_feedback_query_batch *batch)
-{
-   list_move_to(&batch->head, &cmd->pool->free_query_batches);
-}
-
-static void
-vn_cmd_record_batched_query_feedback(struct vn_command_buffer *cmd)
-{
-   list_for_each_entry_safe(struct vn_feedback_query_batch, batch,
-                            &cmd->builder.query_batches, head) {
-      vn_feedback_query_cmd_record(vn_command_buffer_to_handle(cmd),
-                                   vn_query_pool_to_handle(batch->query_pool),
-                                   batch->query, batch->query_count, true);
-
-      vn_cmd_query_batch_pop(cmd, batch);
-   }
+   return batch;
 }

 static inline void
@@ -558,12 +539,18 @@ vn_cmd_merge_batched_query_feedback(struct vn_command_buffer *primary_cmd,
 {
   list_for_each_entry_safe(struct vn_feedback_query_batch, secondary_batch,
                            &secondary_cmd->builder.query_batches, head) {
-      if (!vn_cmd_query_batch_push(primary_cmd, secondary_batch->query_pool,
-                                   secondary_batch->query,
-                                   secondary_batch->query_count)) {
+
+      struct vn_feedback_query_batch *batch = vn_cmd_query_batch_alloc(
+         primary_cmd->pool, secondary_batch->query_pool,
+         secondary_batch->query, secondary_batch->query_count,
+         secondary_batch->copy);
+
+      if (!batch) {
         primary_cmd->state = VN_COMMAND_BUFFER_STATE_INVALID;
         return;
      }
+
+      list_addtail(&batch->head, &primary_cmd->builder.query_batches);
   }
 }

@@ -628,8 +615,6 @@ vn_cmd_end_render_pass(struct vn_command_buffer *cmd)
   const struct vn_render_pass *pass = cmd->builder.render_pass;
   const struct vn_image **images = cmd->builder.present_src_images;

-   vn_cmd_record_batched_query_feedback(cmd);
-
   cmd->builder.render_pass = NULL;
   cmd->builder.present_src_images = NULL;
   cmd->builder.in_render_pass = false;
@@ -660,33 +645,13 @@ vn_cmd_begin_rendering(struct vn_command_buffer *cmd,
                       const VkRenderingInfo *rendering_info)
 {
   cmd->builder.in_render_pass = true;
-   cmd->builder.suspending =
-      rendering_info->flags & VK_RENDERING_SUSPENDING_BIT;
   cmd->builder.view_mask = rendering_info->viewMask;
 }

 static inline void
 vn_cmd_end_rendering(struct vn_command_buffer *cmd)
 {
-   /* XXX query feedback is broken for suspended render pass instance
-    *
-    * If resume occurs in a different cmd (only needed for parallel render
-    * pass recording), the batched query feedbacks here are never recorded.
-    * Query result retrieval will end up with device lost.
-    *
-    * In practice, explicit query usages within the suspended render pass
-    * instance is a minor (not seeing any so far). Will fix once hit. The
-    * potential options are:
-    * - doing a synchronous query pool results retrieval as a fallback
-    * - store the deferred query feedback in a cmd and inject upon submit
-    */
-   if (!cmd->builder.suspending)
-      vn_cmd_record_batched_query_feedback(cmd);
-   else if (VN_DEBUG(RESULT) && !list_is_empty(&cmd->builder.query_batches))
-      vn_log(cmd->pool->device->instance, "query dropped by suspended pass");
-
   cmd->builder.in_render_pass = false;
-   cmd->builder.suspending = false;
   cmd->builder.view_mask = 0;
 }

@@ -801,7 +766,7 @@ vn_cmd_reset(struct vn_command_buffer *cmd)

   list_for_each_entry_safe(struct vn_feedback_query_batch, batch,
                            &cmd->builder.query_batches, head)
-      vn_cmd_query_batch_pop(cmd, batch);
+      list_move_to(&batch->head, &cmd->pool->free_query_batches);

   if (cmd->linked_query_feedback_cmd)
      vn_recycle_query_feedback_cmd(cmd);
@@ -921,7 +886,7 @@ vn_FreeCommandBuffers(VkDevice device,

      list_for_each_entry_safe(struct vn_feedback_query_batch, batch,
                               &cmd->builder.query_batches, head)
-         vn_cmd_query_batch_pop(cmd, batch);
+         list_move_to(&batch->head, &cmd->pool->free_query_batches);

      if (cmd->linked_query_feedback_cmd)
         vn_recycle_query_feedback_cmd(cmd);
@@ -1812,28 +1777,50 @@ vn_cmd_add_query_feedback(VkCommandBuffer cmd_handle,
                          uint32_t query)
 {
   struct vn_command_buffer *cmd = vn_command_buffer_from_handle(cmd_handle);
+   struct vn_query_pool *query_pool = vn_query_pool_from_handle(pool_handle);

-   /* Outside the render pass instance, vkCmdCopyQueryPoolResults can be
-    * directly appended. Otherwise, defer the copy cmd until outside.
-    */
-   if (!cmd->builder.in_render_pass) {
-      vn_feedback_query_cmd_record(cmd_handle, pool_handle, query, 1, true);
-      return;
-   }
-
-   struct vn_query_pool *pool = vn_query_pool_from_handle(pool_handle);
-   if (!pool->feedback)
+   if (!query_pool->feedback)
      return;

   /* Per 1.3.255 spec "If queries are used while executing a render pass
-    * instance that has multiview enabled, the query uses N consecutive query
-    * indices in the query pool (starting at query) where N is the number of
-    * bits set in the view mask in the subpass the query is used in."
+    * instance that has multiview enabled, the query uses N consecutive
+    * query indices in the query pool (starting at query) where N is the
+    * number of bits set in the view mask in the subpass the query is used
+    * in."
    */
-   const uint32_t query_count =
-      cmd->builder.view_mask ? util_bitcount(cmd->builder.view_mask) : 1;
-   if (!vn_cmd_query_batch_push(cmd, pool, query, query_count))
+   uint32_t query_count =
+      (cmd->builder.in_render_pass && cmd->builder.view_mask)
+         ? util_bitcount(cmd->builder.view_mask)
+         : 1;
+
+   struct vn_feedback_query_batch *batch = vn_cmd_query_batch_alloc(
+      cmd->pool, query_pool, query, query_count, true);
+   if (!batch) {
      cmd->state = VN_COMMAND_BUFFER_STATE_INVALID;
+      return;
+   }
+
+   list_addtail(&batch->head, &cmd->builder.query_batches);
+}
+
+static inline void
+vn_cmd_add_query_reset_feedback(VkCommandBuffer cmd_handle,
+                                VkQueryPool pool_handle,
+                                uint32_t query,
+                                uint32_t query_count)
+{
+   struct vn_command_buffer *cmd = vn_command_buffer_from_handle(cmd_handle);
+   struct vn_query_pool *query_pool = vn_query_pool_from_handle(pool_handle);
+
+   if (!query_pool->feedback)
+      return;
+
+   struct vn_feedback_query_batch *batch = vn_cmd_query_batch_alloc(
+      cmd->pool, query_pool, query, query_count, false);
+   if (!batch)
+      cmd->state = VN_COMMAND_BUFFER_STATE_INVALID;
+
+   list_addtail(&batch->head, &cmd->builder.query_batches);
 }

 void
@@ -1855,8 +1842,8 @@ vn_CmdResetQueryPool(VkCommandBuffer commandBuffer,
   VN_CMD_ENQUEUE(vkCmdResetQueryPool, commandBuffer, queryPool, firstQuery,
                  queryCount);

-   vn_feedback_query_cmd_record(commandBuffer, queryPool, firstQuery,
-                                queryCount, false);
+   vn_cmd_add_query_reset_feedback(commandBuffer, queryPool, firstQuery,
+                                   queryCount);
 }

 void
@@ -1990,13 +1977,10 @@ vn_CmdExecuteCommands(VkCommandBuffer commandBuffer,

   struct vn_command_buffer *primary_cmd =
      vn_command_buffer_from_handle(commandBuffer);
-   if (primary_cmd->builder.in_render_pass) {
-      for (uint32_t i = 0; i < commandBufferCount; i++) {
-         struct vn_command_buffer *secondary_cmd =
-            vn_command_buffer_from_handle(pCommandBuffers[i]);
-         assert(secondary_cmd->builder.in_render_pass);
-         vn_cmd_merge_batched_query_feedback(primary_cmd, secondary_cmd);
-      }
+   for (uint32_t i = 0; i < commandBufferCount; i++) {
+      struct vn_command_buffer *secondary_cmd =
+         vn_command_buffer_from_handle(pCommandBuffers[i]);
+      vn_cmd_merge_batched_query_feedback(primary_cmd, secondary_cmd);
   }
 }

--- a/src/virtio/vulkan/vn_command_buffer.h
+++ b/src/virtio/vulkan/vn_command_buffer.h
@@ -14,6 +14,7 @@
 #include "vn_common.h"

 #include "vn_cs.h"
+#include "vn_feedback.h"

 struct vn_command_pool {
   struct vn_object_base base;
@@ -61,8 +62,6 @@ struct vn_command_buffer_builder {
   const struct vn_image **present_src_images;
   /* track if inside a render pass instance */
   bool in_render_pass;
-   /* track whether the active render pass instance is suspending */
-   bool suspending;
   /* track the active subpass for view mask used in the subpass */
   uint32_t subpass_index;
   /* track the active view mask inside a render pass instance */
@@ -96,4 +95,10 @@ VK_DEFINE_HANDLE_CASTS(vn_command_buffer,
                       VkCommandBuffer,
                       VK_OBJECT_TYPE_COMMAND_BUFFER)

+struct vn_feedback_query_batch *
+vn_cmd_query_batch_alloc(struct vn_command_pool *pool,
+                         struct vn_query_pool *query_pool,
+                         uint32_t query,
+                         uint32_t query_count,
+                         bool copy);
 #endif /* VN_COMMAND_BUFFER_H */
--- a/src/virtio/vulkan/vn_feedback.c
+++ b/src/virtio/vulkan/vn_feedback.c
@@ -513,7 +513,7 @@ vn_feedback_cmd_record(VkCommandBuffer cmd_handle,
   return vn_EndCommandBuffer(cmd_handle);
 }

-void
+static void
 vn_feedback_query_cmd_record(VkCommandBuffer cmd_handle,
                             VkQueryPool pool_handle,
                             uint32_t query,
--- a/src/virtio/vulkan/vn_feedback.h
+++ b/src/virtio/vulkan/vn_feedback.h
@@ -146,13 +146,6 @@ vn_feedback_event_cmd_record(VkCommandBuffer cmd_handle,
                             VkResult status,
                             bool sync2);

-void
-vn_feedback_query_cmd_record(VkCommandBuffer cmd_handle,
-                             VkQueryPool pool_handle,
-                             uint32_t query,
-                             uint32_t count,
-                             bool copy);
-
 VkResult
 vn_feedback_query_batch_record(VkDevice dev_handle,
                               struct vn_feedback_cmd_pool *feedback_pool,
--- a/src/virtio/vulkan/vn_queue.c
+++ b/src/virtio/vulkan/vn_queue.c
@@ -21,6 +21,7 @@
 #include "vn_device.h"
 #include "vn_device_memory.h"
 #include "vn_physical_device.h"
+#include "vn_query_pool.h"
 #include "vn_renderer.h"
 #include "vn_wsi.h"

@@ -508,6 +509,59 @@ vn_get_feedback_cmd_handle(struct vn_queue_submission *submit,
             : &feedback_cmds->cmd_buffer_infos[cmd_index].commandBuffer;
 }

+static VkResult
+vn_combine_query_feedback_batches(VkCommandBuffer *src_cmd_handles,
+                                  uint32_t cmd_buffer_count,
+                                  uint32_t stride,
+                                  struct vn_feedback_cmd_pool *feedback_pool,
+                                  struct list_head *combined_query_batches)
+{
+   struct vn_command_pool *cmd_pool =
+      vn_command_pool_from_handle(feedback_pool->pool);
+
+   uintptr_t cmd_handle_ptr = (uintptr_t)src_cmd_handles;
+   for (uint32_t i = 0; i < cmd_buffer_count; i++) {
+      struct vn_command_buffer *cmd_buffer =
+         vn_command_buffer_from_handle(*(VkCommandBuffer *)cmd_handle_ptr);
+
+      list_for_each_entry_safe(struct vn_feedback_query_batch, new_batch,
+                               &cmd_buffer->builder.query_batches, head) {
+
+         if (!new_batch->copy) {
+            list_for_each_entry_safe(struct vn_feedback_query_batch,
+                                     combined_batch, combined_query_batches,
+                                     head) {
+               /* If we previously added a query feedback that is now getting
+                * reset, remove it since it is now a no-op and the deferred
+                * feedback copy will cause a hang waiting for the reset query
+                * to become available.
+                */
+               if (combined_batch->copy &&
+                   (vn_query_pool_to_handle(combined_batch->query_pool) ==
+                    vn_query_pool_to_handle(new_batch->query_pool)) &&
+                   combined_batch->query >= new_batch->query &&
+                   combined_batch->query <=
+                      new_batch->query + new_batch->query_count) {
+                  list_move_to(&combined_batch->head,
+                               &cmd_pool->free_query_batches);
+               }
+            }
+         }
+         struct vn_feedback_query_batch *batch = vn_cmd_query_batch_alloc(
+            cmd_pool, new_batch->query_pool, new_batch->query,
+            new_batch->query_count, new_batch->copy);
+         if (!batch)
+            return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+         list_addtail(&batch->head, combined_query_batches);
+      }
+
+      cmd_handle_ptr += stride;
+   }
+
+   return VK_SUCCESS;
+}
+
 static VkResult
 vn_queue_submission_add_query_feedback(struct vn_queue_submission *submit,
                                       uint32_t cmd_buffer_count,
@@ -530,10 +584,21 @@ vn_queue_submission_add_query_feedback(struct vn_queue_submission *submit,
      if (dev->queue_families[pool_index] == queue_vk->queue_family_index)
         break;
   }
+   struct vn_feedback_cmd_pool *feedback_cmd_pool =
+      &dev->cmd_pools[pool_index];

-   result = vn_feedback_query_batch_record(
-      dev_handle, &dev->cmd_pools[pool_index], src_cmd_handles,
-      cmd_buffer_count, stride, feedback_cmd_handle);
+   struct list_head combined_query_batches;
+   list_inithead(&combined_query_batches);
+
+   result = vn_combine_query_feedback_batches(
+      src_cmd_handles, cmd_buffer_count, stride, feedback_cmd_pool,
+      &combined_query_batches);
+   if (result != VK_SUCCESS)
+      return result;
+
+   result = vn_feedback_query_batch_record(dev_handle, feedback_cmd_pool,
+                                           &combined_query_batches,
+                                           feedback_cmd_handle);
   if (result != VK_SUCCESS)
      return result;