ac,radeonsi: update comments related to the L2 cache, use "L2", not "TC"

"GL2" is also OK. "TC-compatible" is also OK. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30869>
2024-08-23 08:06:02 -04:00
parent 1b94137039
commit 1537b9355a
8 changed files with 21 additions and 29 deletions
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -1548,7 +1548,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *i
    */
   if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_size && config->info.levels > 1) {
      /* The smallest miplevels that are never compressed by DCC
-       * still read the DCC buffer via TC if the base level uses DCC,
+       * still read the DCC buffer from memory if the base level uses DCC,
       * and for some reason the DCC buffer needs to be larger if
       * the miptree uses non-zero tile_swizzle. Otherwise there are
       * VM faults.
@@ -2192,7 +2192,7 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
          *
          * Alternative solutions that also work but are worse:
          * - Disable DCC entirely.
-          * - Flush TC L2 after rendering.
+          * - Flush the L2 cache after rendering.
          */
         for (unsigned i = 0; i < in->numMipLevels; i++) {
            surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset;
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@@ -153,7 +153,7 @@ enum gfx9_resource_type

 struct gfx9_surf_meta_flags {
   uint8_t rb_aligned : 1;   /* optimal for RBs */
-   uint8_t pipe_aligned : 1; /* optimal for TC */
+   uint8_t pipe_aligned : 1; /* optimal for L2 */
   uint8_t independent_64B_blocks : 1;
   uint8_t independent_128B_blocks : 1;
   uint8_t max_compressed_block_size : 2;
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -66,7 +66,7 @@ void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
                                 sctx->framebuffer.DB_has_shader_readable_metadata);
   }

-   /* Flush caches in case we use compute. */
+   /* Invalidate the VMEM cache because we always use compute. */
   sctx->flags |= SI_CONTEXT_INV_VCACHE;

   /* GFX6-8: CB and DB don't use L2. */
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -1213,7 +1213,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
   }

   if (info->indirect) {
-      /* Indirect buffers use TC L2 on GFX9-GFX11, but not other hw. */
+      /* Indirect buffers are read through L2 on GFX9-GFX11, but not other hw. */
      if ((sctx->gfx_level <= GFX8 || sctx->gfx_level == GFX12) &&
          si_resource(info->indirect)->TC_L2_dirty) {
         sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -331,14 +331,14 @@ struct si_resource {
   struct util_range valid_buffer_range;

   /* For buffers only. This indicates that a write operation has been
-    * performed by TC L2, but the cache hasn't been flushed.
-    * Any hw block which doesn't use or bypasses TC L2 should check this
+    * performed by L2, but the cache hasn't been flushed.
+    * Any hw block which doesn't use or bypasses L2 should check this
    * flag and flush the cache before using the buffer.
    *
-    * For example, TC L2 must be flushed if a buffer which has been
+    * For example, L2 must be flushed if a buffer which has been
    * modified by a shader store instruction is about to be used as
    * an index buffer. The reason is that VGT DMA index fetching doesn't
-    * use TC L2.
+    * use L2.
    */
   bool TC_L2_dirty;

--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2627,20 +2627,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
      }
   }

-   /* Only flush TC when changing the framebuffer state, because
-    * the only client not using TC that can change textures is
-    * the framebuffer.
-    *
-    * Wait for compute shaders because of possible transitions:
-    * - FB write -> shader read
-    * - shader write -> FB read
-    *
-    * Wait for draws because of possible transitions:
-    * - texture -> render (eg: glBlitFramebuffer(with src=dst) then glDraw*)
-    *
-    * DB caches are flushed on demand (using si_decompress_textures).
-    *
-    * When MSAA is enabled, CB and TC caches are flushed on demand
+   /* When MSAA is enabled, CB and L2 caches are flushed on demand
    * (after FMASK decompression). Shader write -> FB read transitions
    * cannot happen for MSAA textures, because MSAA shader images are
    * not supported.
@@ -2653,9 +2640,13 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
                                 sctx->framebuffer.all_DCC_pipe_aligned);
   }

+   /* Wait for CS because: shader write -> FB read
+    * Wait for PS because: texture -> render (eg: glBlitFramebuffer(with src=dst) then glDraw*)
+    */
   sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH;
   si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);

+   /* DB caches are flushed on demand (using si_decompress_textures) except the cases below. */
   if (sctx->gfx_level >= GFX12) {
      si_make_DB_shader_coherent(sctx, sctx->framebuffer.nr_samples, true, false);
   } else if (sctx->generate_mipmap_for_depth) {
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -594,7 +594,7 @@ static void si_prefetch_shaders(struct si_context *sctx)
   if (GFX_VERSION < GFX7 || !mask)
      return;

-   /* Prefetch shaders and VBO descriptors to TC L2. */
+   /* Prefetch shaders and VBO descriptors into L2. */
   if (GFX_VERSION >= GFX11) {
      if (HAS_TESS && mask & SI_PREFETCH_HS)
         si_prefetch_shader_async<GFX_VERSION>(sctx, sctx->queued.named.hs);
@@ -2134,7 +2134,7 @@ static void si_draw(struct pipe_context *ctx,
         index_offset -= start_offset;
      } else if ((GFX_VERSION <= GFX7 || GFX_VERSION == GFX12) &&
                 si_resource(indexbuf)->TC_L2_dirty) {
-         /* GFX8-GFX11 reads index buffers through TC L2, so it doesn't
+         /* GFX8-GFX11 reads index buffers through L2, so it doesn't
          * need this. */
         sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
         si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
@@ -2146,7 +2146,7 @@ static void si_draw(struct pipe_context *ctx,
   unsigned total_direct_count = 0;

   if (!IS_DRAW_VERTEX_STATE && indirect) {
-      /* Indirect buffers use TC L2 on GFX9-GFX11, but not other hw. */
+      /* Indirect buffers use L2 on GFX9-GFX11, but not other hw. */
      if (GFX_VERSION <= GFX8 || GFX_VERSION == GFX12) {
         if (indirect->buffer && si_resource(indirect->buffer)->TC_L2_dirty) {
            sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -75,13 +75,13 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ
      /* Stop streamout. */
      si_emit_streamout_end(sctx);

-      /* Since streamout uses vector writes which go through TC L2
-       * and most other clients can use TC L2 as well, we don't need
+      /* Since streamout uses vector writes which go through L2
+       * and most other clients can use L2 as well, we don't need
       * to flush it.
       *
       * The only cases which requires flushing it is VGT DMA index
       * fetching (on <= GFX7) and indirect draw data, which are rare
-       * cases. Thus, flag the TC L2 dirtiness in the resource and
+       * cases. Thus, flag the L2 dirtiness in the resource and
       * handle it at draw call time.
       */
      for (i = 0; i < old_num_targets; i++)
@@ -387,6 +387,7 @@ void si_emit_streamout_end(struct si_context *sctx)
                         t[i]->buf_filled_size, t[i]->buf_filled_size_offset,
                         COPY_DATA_REG, NULL,
                         (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
+         /* For DrawTF reading buf_filled_size: */
         sctx->flags |= SI_CONTEXT_PFP_SYNC_ME;
         si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
      } else {