anv: fix transfer barriers flushes with compute queue

Transfer operation are implemented differently on the compute engine
and require a different kind of cache flush.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: mesa-stable
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
(cherry picked from commit 3b9466dd51)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27353>
This commit is contained in:
Lionel Landwerlin
2024-01-24 13:29:01 +02:00
committed by Eric Engestrom
parent 733dd5db5d
commit c395b341ec
2 changed files with 16 additions and 10 deletions

View File

@@ -534,7 +534,7 @@
"description": "anv: fix transfer barriers flushes with compute queue",
"nominated": true,
"nomination_type": 0,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View File

@@ -3750,7 +3750,7 @@ genX(CmdExecuteCommands)(
}
static inline enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
anv_pipe_flush_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer,
VkAccessFlags2 flags)
{
enum anv_pipe_bits pipe_bits = 0;
@@ -3791,12 +3791,17 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
* - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
*
* Most of these operations are implemented using Blorp which writes
* through the render target, so flush that cache to make it visible
* to future operations. And for depth related operations we also
* need to flush the depth cache.
* through the render target cache or the depth cache on the graphics
* queue. On the compute queue, the writes are done through the data
* port.
*/
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
if (anv_cmd_buffer_is_compute_queue(cmd_buffer)) {
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
} else {
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
}
break;
case VK_ACCESS_2_MEMORY_WRITE_BIT:
/* We're transitioning a buffer for generic write operations. Flush
@@ -3833,9 +3838,10 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
}
static inline enum anv_pipe_bits
anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
anv_pipe_invalidate_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer,
VkAccessFlags2 flags)
{
struct anv_device *device = cmd_buffer->device;
enum anv_pipe_bits pipe_bits = 0;
u_foreach_bit64(b, flags) {
@@ -4338,8 +4344,8 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
}
enum anv_pipe_bits bits =
anv_pipe_flush_bits_for_access_flags(device, src_flags) |
anv_pipe_invalidate_bits_for_access_flags(device, dst_flags);
anv_pipe_flush_bits_for_access_flags(cmd_buffer, src_flags) |
anv_pipe_invalidate_bits_for_access_flags(cmd_buffer, dst_flags);
/* Our HW implementation of the sparse feature lives in the GAM unit
* (interface between all the GPU caches and external memory). As a result