From 1537b9355aa112d2d83ed5787cb18a3c564c1107 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Fri, 23 Aug 2024 08:06:02 -0400
Subject: [PATCH] ac,radeonsi: update comments related to the L2 cache, use
 "L2", not "TC"

"GL2" is also OK. "TC-compatible" is also OK.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30869>
---
 src/amd/common/ac_surface.c                   |  4 ++--
 src/amd/common/ac_surface.h                   |  2 +-
 src/gallium/drivers/radeonsi/si_clear.c       |  2 +-
 src/gallium/drivers/radeonsi/si_compute.c     |  2 +-
 src/gallium/drivers/radeonsi/si_pipe.h        |  8 ++++----
 src/gallium/drivers/radeonsi/si_state.c       | 19 +++++--------------
 .../drivers/radeonsi/si_state_draw.cpp        |  6 +++---
 .../drivers/radeonsi/si_state_streamout.c     |  7 ++++---
 8 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 492cba3c401..5616dc0663f 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -1548,7 +1548,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *i
     */
    if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_size && config->info.levels > 1) {
       /* The smallest miplevels that are never compressed by DCC
-       * still read the DCC buffer via TC if the base level uses DCC,
+       * still read the DCC buffer from memory if the base level uses DCC,
        * and for some reason the DCC buffer needs to be larger if
        * the miptree uses non-zero tile_swizzle. Otherwise there are
        * VM faults.
@@ -2192,7 +2192,7 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
           *
           * Alternative solutions that also work but are worse:
           * - Disable DCC entirely.
-          * - Flush TC L2 after rendering.
+          * - Flush the L2 cache after rendering.
           */
          for (unsigned i = 0; i < in->numMipLevels; i++) {
             surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset;
diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
index 4e529af9a82..e8cbe929de2 100644
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@@ -153,7 +153,7 @@ enum gfx9_resource_type
 
 struct gfx9_surf_meta_flags {
    uint8_t rb_aligned : 1;   /* optimal for RBs */
-   uint8_t pipe_aligned : 1; /* optimal for TC */
+   uint8_t pipe_aligned : 1; /* optimal for L2 */
    uint8_t independent_64B_blocks : 1;
    uint8_t independent_128B_blocks : 1;
    uint8_t max_compressed_block_size : 2;
diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c
index 5beff4db8f4..aefe034bc38 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -66,7 +66,7 @@ void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
                                  sctx->framebuffer.DB_has_shader_readable_metadata);
    }
 
-   /* Flush caches in case we use compute. */
+   /* Invalidate the VMEM cache because we always use compute. */
    sctx->flags |= SI_CONTEXT_INV_VCACHE;
 
    /* GFX6-8: CB and DB don't use L2. */
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 22a64784fe4..2ff851e8be8 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -1213,7 +1213,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
    }
 
    if (info->indirect) {
-      /* Indirect buffers use TC L2 on GFX9-GFX11, but not other hw. */
+      /* Indirect buffers are read through L2 on GFX9-GFX11, but not other hw. */
       if ((sctx->gfx_level <= GFX8 || sctx->gfx_level == GFX12) &&
           si_resource(info->indirect)->TC_L2_dirty) {
          sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index c07ba4276e1..f1480f376cd 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -331,14 +331,14 @@ struct si_resource {
    struct util_range valid_buffer_range;
 
    /* For buffers only. This indicates that a write operation has been
-    * performed by TC L2, but the cache hasn't been flushed.
-    * Any hw block which doesn't use or bypasses TC L2 should check this
+    * performed by L2, but the cache hasn't been flushed.
+    * Any hw block which doesn't use or bypasses L2 should check this
     * flag and flush the cache before using the buffer.
     *
-    * For example, TC L2 must be flushed if a buffer which has been
+    * For example, L2 must be flushed if a buffer which has been
     * modified by a shader store instruction is about to be used as
     * an index buffer. The reason is that VGT DMA index fetching doesn't
-    * use TC L2.
+    * use L2.
     */
    bool TC_L2_dirty;
 
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 39730e76f69..8121f30fa47 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2627,20 +2627,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
       }
    }
 
-   /* Only flush TC when changing the framebuffer state, because
-    * the only client not using TC that can change textures is
-    * the framebuffer.
-    *
-    * Wait for compute shaders because of possible transitions:
-    * - FB write -> shader read
-    * - shader write -> FB read
-    *
-    * Wait for draws because of possible transitions:
-    * - texture -> render (eg: glBlitFramebuffer(with src=dst) then glDraw*)
-    *
-    * DB caches are flushed on demand (using si_decompress_textures).
-    *
-    * When MSAA is enabled, CB and TC caches are flushed on demand
+   /* When MSAA is enabled, CB and L2 caches are flushed on demand
     * (after FMASK decompression). Shader write -> FB read transitions
     * cannot happen for MSAA textures, because MSAA shader images are
     * not supported.
@@ -2653,9 +2640,13 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
                                  sctx->framebuffer.all_DCC_pipe_aligned);
    }
 
+   /* Wait for CS because: shader write -> FB read
+    * Wait for PS because: texture -> render (eg: glBlitFramebuffer(with src=dst) then glDraw*)
+    */
    sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH;
    si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
 
+   /* DB caches are flushed on demand (using si_decompress_textures) except the cases below. */
    if (sctx->gfx_level >= GFX12) {
       si_make_DB_shader_coherent(sctx, sctx->framebuffer.nr_samples, true, false);
    } else if (sctx->generate_mipmap_for_depth) {
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp
index bd2c936306b..6062f4f80af 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -594,7 +594,7 @@ static void si_prefetch_shaders(struct si_context *sctx)
    if (GFX_VERSION < GFX7 || !mask)
       return;
 
-   /* Prefetch shaders and VBO descriptors to TC L2. */
+   /* Prefetch shaders and VBO descriptors into L2. */
    if (GFX_VERSION >= GFX11) {
       if (HAS_TESS && mask & SI_PREFETCH_HS)
          si_prefetch_shader_async<GFX_VERSION>(sctx, sctx->queued.named.hs);
@@ -2134,7 +2134,7 @@ static void si_draw(struct pipe_context *ctx,
          index_offset -= start_offset;
       } else if ((GFX_VERSION <= GFX7 || GFX_VERSION == GFX12) &&
                  si_resource(indexbuf)->TC_L2_dirty) {
-         /* GFX8-GFX11 reads index buffers through TC L2, so it doesn't
+         /* GFX8-GFX11 reads index buffers through L2, so it doesn't
           * need this. */
          sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
          si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
@@ -2146,7 +2146,7 @@ static void si_draw(struct pipe_context *ctx,
    unsigned total_direct_count = 0;
 
    if (!IS_DRAW_VERTEX_STATE && indirect) {
-      /* Indirect buffers use TC L2 on GFX9-GFX11, but not other hw. */
+      /* Indirect buffers use L2 on GFX9-GFX11, but not other hw. */
       if (GFX_VERSION <= GFX8 || GFX_VERSION == GFX12) {
          if (indirect->buffer && si_resource(indirect->buffer)->TC_L2_dirty) {
             sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c
index 8047965ff84..8858a68c5fa 100644
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -75,13 +75,13 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ
       /* Stop streamout. */
       si_emit_streamout_end(sctx);
 
-      /* Since streamout uses vector writes which go through TC L2
-       * and most other clients can use TC L2 as well, we don't need
+      /* Since streamout uses vector writes which go through L2
+       * and most other clients can use L2 as well, we don't need
        * to flush it.
        *
        * The only cases which requires flushing it is VGT DMA index
        * fetching (on <= GFX7) and indirect draw data, which are rare
-       * cases. Thus, flag the TC L2 dirtiness in the resource and
+       * cases. Thus, flag the L2 dirtiness in the resource and
        * handle it at draw call time.
        */
       for (i = 0; i < old_num_targets; i++)
@@ -387,6 +387,7 @@ void si_emit_streamout_end(struct si_context *sctx)
                          t[i]->buf_filled_size, t[i]->buf_filled_size_offset,
                          COPY_DATA_REG, NULL,
                          (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
+         /* For DrawTF reading buf_filled_size: */
          sctx->flags |= SI_CONTEXT_PFP_SYNC_ME;
          si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
       } else {