anv: fix transfer barriers flushes with compute queue

Transfer operation are implemented differently on the compute engine and require a different kind of cache flush. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: mesa-stable Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com> (cherry picked from commit 3b9466dd51) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27353>
2024-01-24 13:29:01 +02:00
parent 733dd5db5d
commit c395b341ec
2 changed files with 16 additions and 10 deletions
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -534,7 +534,7 @@
        "description": "anv: fix transfer barriers flushes with compute queue",
        "nominated": true,
        "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
        "main_sha": null,
        "because_sha": null,
        "notes": null
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -3750,7 +3750,7 @@ genX(CmdExecuteCommands)(
 }

 static inline enum anv_pipe_bits
-anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
+anv_pipe_flush_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer,
                                     VkAccessFlags2 flags)
 {
   enum anv_pipe_bits pipe_bits = 0;
@@ -3791,12 +3791,17 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
          *     - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
          *
          * Most of these operations are implemented using Blorp which writes
-          * through the render target, so flush that cache to make it visible
-          * to future operations. And for depth related operations we also
-          * need to flush the depth cache.
+          * through the render target cache or the depth cache on the graphics
+          * queue. On the compute queue, the writes are done through the data
+          * port.
          */
-         pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
-         pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
+         if (anv_cmd_buffer_is_compute_queue(cmd_buffer)) {
+            pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
+            pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
+         } else {
+            pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
+            pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
+         }
         break;
      case VK_ACCESS_2_MEMORY_WRITE_BIT:
         /* We're transitioning a buffer for generic write operations. Flush
@@ -3833,9 +3838,10 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
 }

 static inline enum anv_pipe_bits
-anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
+anv_pipe_invalidate_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer,
                                          VkAccessFlags2 flags)
 {
+   struct anv_device *device = cmd_buffer->device;
   enum anv_pipe_bits pipe_bits = 0;

   u_foreach_bit64(b, flags) {
@@ -4338,8 +4344,8 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
   }

   enum anv_pipe_bits bits =
-      anv_pipe_flush_bits_for_access_flags(device, src_flags) |
-      anv_pipe_invalidate_bits_for_access_flags(device, dst_flags);
+      anv_pipe_flush_bits_for_access_flags(cmd_buffer, src_flags) |
+      anv_pipe_invalidate_bits_for_access_flags(cmd_buffer, dst_flags);

   /* Our HW implementation of the sparse feature lives in the GAM unit
    * (interface between all the GPU caches and external memory). As a result