ac,radeonsi: update comments related to the L2 cache, use "L2", not "TC"

"GL2" is also OK. "TC-compatible" is also OK.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30869>
This commit is contained in:
Marek Olšák
2024-08-23 08:06:02 -04:00
committed by Marge Bot
parent 1b94137039
commit 1537b9355a
8 changed files with 21 additions and 29 deletions

View File

@@ -1548,7 +1548,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *i
*/
if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_size && config->info.levels > 1) {
/* The smallest miplevels that are never compressed by DCC
* still read the DCC buffer via TC if the base level uses DCC,
* still read the DCC buffer from memory if the base level uses DCC,
* and for some reason the DCC buffer needs to be larger if
* the miptree uses non-zero tile_swizzle. Otherwise there are
* VM faults.
@@ -2192,7 +2192,7 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
*
* Alternative solutions that also work but are worse:
* - Disable DCC entirely.
* - Flush TC L2 after rendering.
* - Flush the L2 cache after rendering.
*/
for (unsigned i = 0; i < in->numMipLevels; i++) {
surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset;

View File

@@ -153,7 +153,7 @@ enum gfx9_resource_type
struct gfx9_surf_meta_flags {
uint8_t rb_aligned : 1; /* optimal for RBs */
uint8_t pipe_aligned : 1; /* optimal for TC */
uint8_t pipe_aligned : 1; /* optimal for L2 */
uint8_t independent_64B_blocks : 1;
uint8_t independent_128B_blocks : 1;
uint8_t max_compressed_block_size : 2;

View File

@@ -66,7 +66,7 @@ void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
sctx->framebuffer.DB_has_shader_readable_metadata);
}
/* Flush caches in case we use compute. */
/* Invalidate the VMEM cache because we always use compute. */
sctx->flags |= SI_CONTEXT_INV_VCACHE;
/* GFX6-8: CB and DB don't use L2. */

View File

@@ -1213,7 +1213,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
}
if (info->indirect) {
/* Indirect buffers use TC L2 on GFX9-GFX11, but not other hw. */
/* Indirect buffers are read through L2 on GFX9-GFX11, but not other hw. */
if ((sctx->gfx_level <= GFX8 || sctx->gfx_level == GFX12) &&
si_resource(info->indirect)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;

View File

@@ -331,14 +331,14 @@ struct si_resource {
struct util_range valid_buffer_range;
/* For buffers only. This indicates that a write operation has been
* performed by TC L2, but the cache hasn't been flushed.
* Any hw block which doesn't use or bypasses TC L2 should check this
* performed by L2, but the cache hasn't been flushed.
* Any hw block which doesn't use or bypasses L2 should check this
* flag and flush the cache before using the buffer.
*
* For example, TC L2 must be flushed if a buffer which has been
* For example, L2 must be flushed if a buffer which has been
* modified by a shader store instruction is about to be used as
* an index buffer. The reason is that VGT DMA index fetching doesn't
* use TC L2.
* use L2.
*/
bool TC_L2_dirty;

View File

@@ -2627,20 +2627,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
}
}
/* Only flush TC when changing the framebuffer state, because
* the only client not using TC that can change textures is
* the framebuffer.
*
* Wait for compute shaders because of possible transitions:
* - FB write -> shader read
* - shader write -> FB read
*
* Wait for draws because of possible transitions:
* - texture -> render (eg: glBlitFramebuffer(with src=dst) then glDraw*)
*
* DB caches are flushed on demand (using si_decompress_textures).
*
* When MSAA is enabled, CB and TC caches are flushed on demand
/* When MSAA is enabled, CB and L2 caches are flushed on demand
* (after FMASK decompression). Shader write -> FB read transitions
* cannot happen for MSAA textures, because MSAA shader images are
* not supported.
@@ -2653,9 +2640,13 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
sctx->framebuffer.all_DCC_pipe_aligned);
}
/* Wait for CS because: shader write -> FB read
* Wait for PS because: texture -> render (eg: glBlitFramebuffer(with src=dst) then glDraw*)
*/
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
/* DB caches are flushed on demand (using si_decompress_textures) except the cases below. */
if (sctx->gfx_level >= GFX12) {
si_make_DB_shader_coherent(sctx, sctx->framebuffer.nr_samples, true, false);
} else if (sctx->generate_mipmap_for_depth) {

View File

@@ -594,7 +594,7 @@ static void si_prefetch_shaders(struct si_context *sctx)
if (GFX_VERSION < GFX7 || !mask)
return;
/* Prefetch shaders and VBO descriptors to TC L2. */
/* Prefetch shaders and VBO descriptors into L2. */
if (GFX_VERSION >= GFX11) {
if (HAS_TESS && mask & SI_PREFETCH_HS)
si_prefetch_shader_async<GFX_VERSION>(sctx, sctx->queued.named.hs);
@@ -2134,7 +2134,7 @@ static void si_draw(struct pipe_context *ctx,
index_offset -= start_offset;
} else if ((GFX_VERSION <= GFX7 || GFX_VERSION == GFX12) &&
si_resource(indexbuf)->TC_L2_dirty) {
/* GFX8-GFX11 reads index buffers through TC L2, so it doesn't
/* GFX8-GFX11 reads index buffers through L2, so it doesn't
* need this. */
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
@@ -2146,7 +2146,7 @@ static void si_draw(struct pipe_context *ctx,
unsigned total_direct_count = 0;
if (!IS_DRAW_VERTEX_STATE && indirect) {
/* Indirect buffers use TC L2 on GFX9-GFX11, but not other hw. */
/* Indirect buffers use L2 on GFX9-GFX11, but not other hw. */
if (GFX_VERSION <= GFX8 || GFX_VERSION == GFX12) {
if (indirect->buffer && si_resource(indirect->buffer)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;

View File

@@ -75,13 +75,13 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ
/* Stop streamout. */
si_emit_streamout_end(sctx);
/* Since streamout uses vector writes which go through TC L2
* and most other clients can use TC L2 as well, we don't need
/* Since streamout uses vector writes which go through L2
* and most other clients can use L2 as well, we don't need
* to flush it.
*
* The only cases which requires flushing it is VGT DMA index
* fetching (on <= GFX7) and indirect draw data, which are rare
* cases. Thus, flag the TC L2 dirtiness in the resource and
* cases. Thus, flag the L2 dirtiness in the resource and
* handle it at draw call time.
*/
for (i = 0; i < old_num_targets; i++)
@@ -387,6 +387,7 @@ void si_emit_streamout_end(struct si_context *sctx)
t[i]->buf_filled_size, t[i]->buf_filled_size_offset,
COPY_DATA_REG, NULL,
(R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
/* For DrawTF reading buf_filled_size: */
sctx->flags |= SI_CONTEXT_PFP_SYNC_ME;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
} else {