ac,radeonsi: update comments related to the L2 cache, use "L2", not "TC"
"GL2" is also OK. "TC-compatible" is also OK. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30869>
This commit is contained in:
@@ -1548,7 +1548,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *i
|
||||
*/
|
||||
if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_size && config->info.levels > 1) {
|
||||
/* The smallest miplevels that are never compressed by DCC
|
||||
* still read the DCC buffer via TC if the base level uses DCC,
|
||||
* still read the DCC buffer from memory if the base level uses DCC,
|
||||
* and for some reason the DCC buffer needs to be larger if
|
||||
* the miptree uses non-zero tile_swizzle. Otherwise there are
|
||||
* VM faults.
|
||||
@@ -2192,7 +2192,7 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
|
||||
*
|
||||
* Alternative solutions that also work but are worse:
|
||||
* - Disable DCC entirely.
|
||||
* - Flush TC L2 after rendering.
|
||||
* - Flush the L2 cache after rendering.
|
||||
*/
|
||||
for (unsigned i = 0; i < in->numMipLevels; i++) {
|
||||
surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset;
|
||||
|
@@ -153,7 +153,7 @@ enum gfx9_resource_type
|
||||
|
||||
struct gfx9_surf_meta_flags {
|
||||
uint8_t rb_aligned : 1; /* optimal for RBs */
|
||||
uint8_t pipe_aligned : 1; /* optimal for TC */
|
||||
uint8_t pipe_aligned : 1; /* optimal for L2 */
|
||||
uint8_t independent_64B_blocks : 1;
|
||||
uint8_t independent_128B_blocks : 1;
|
||||
uint8_t max_compressed_block_size : 2;
|
||||
|
@@ -66,7 +66,7 @@ void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
|
||||
sctx->framebuffer.DB_has_shader_readable_metadata);
|
||||
}
|
||||
|
||||
/* Flush caches in case we use compute. */
|
||||
/* Invalidate the VMEM cache because we always use compute. */
|
||||
sctx->flags |= SI_CONTEXT_INV_VCACHE;
|
||||
|
||||
/* GFX6-8: CB and DB don't use L2. */
|
||||
|
@@ -1213,7 +1213,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
|
||||
}
|
||||
|
||||
if (info->indirect) {
|
||||
/* Indirect buffers use TC L2 on GFX9-GFX11, but not other hw. */
|
||||
/* Indirect buffers are read through L2 on GFX9-GFX11, but not other hw. */
|
||||
if ((sctx->gfx_level <= GFX8 || sctx->gfx_level == GFX12) &&
|
||||
si_resource(info->indirect)->TC_L2_dirty) {
|
||||
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
|
||||
|
@@ -331,14 +331,14 @@ struct si_resource {
|
||||
struct util_range valid_buffer_range;
|
||||
|
||||
/* For buffers only. This indicates that a write operation has been
|
||||
* performed by TC L2, but the cache hasn't been flushed.
|
||||
* Any hw block which doesn't use or bypasses TC L2 should check this
|
||||
* performed by L2, but the cache hasn't been flushed.
|
||||
* Any hw block which doesn't use or bypasses L2 should check this
|
||||
* flag and flush the cache before using the buffer.
|
||||
*
|
||||
* For example, TC L2 must be flushed if a buffer which has been
|
||||
* For example, L2 must be flushed if a buffer which has been
|
||||
* modified by a shader store instruction is about to be used as
|
||||
* an index buffer. The reason is that VGT DMA index fetching doesn't
|
||||
* use TC L2.
|
||||
* use L2.
|
||||
*/
|
||||
bool TC_L2_dirty;
|
||||
|
||||
|
@@ -2627,20 +2627,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
||||
}
|
||||
}
|
||||
|
||||
/* Only flush TC when changing the framebuffer state, because
|
||||
* the only client not using TC that can change textures is
|
||||
* the framebuffer.
|
||||
*
|
||||
* Wait for compute shaders because of possible transitions:
|
||||
* - FB write -> shader read
|
||||
* - shader write -> FB read
|
||||
*
|
||||
* Wait for draws because of possible transitions:
|
||||
* - texture -> render (eg: glBlitFramebuffer(with src=dst) then glDraw*)
|
||||
*
|
||||
* DB caches are flushed on demand (using si_decompress_textures).
|
||||
*
|
||||
* When MSAA is enabled, CB and TC caches are flushed on demand
|
||||
/* When MSAA is enabled, CB and L2 caches are flushed on demand
|
||||
* (after FMASK decompression). Shader write -> FB read transitions
|
||||
* cannot happen for MSAA textures, because MSAA shader images are
|
||||
* not supported.
|
||||
@@ -2653,9 +2640,13 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
||||
sctx->framebuffer.all_DCC_pipe_aligned);
|
||||
}
|
||||
|
||||
/* Wait for CS because: shader write -> FB read
|
||||
* Wait for PS because: texture -> render (eg: glBlitFramebuffer(with src=dst) then glDraw*)
|
||||
*/
|
||||
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
|
||||
|
||||
/* DB caches are flushed on demand (using si_decompress_textures) except the cases below. */
|
||||
if (sctx->gfx_level >= GFX12) {
|
||||
si_make_DB_shader_coherent(sctx, sctx->framebuffer.nr_samples, true, false);
|
||||
} else if (sctx->generate_mipmap_for_depth) {
|
||||
|
@@ -594,7 +594,7 @@ static void si_prefetch_shaders(struct si_context *sctx)
|
||||
if (GFX_VERSION < GFX7 || !mask)
|
||||
return;
|
||||
|
||||
/* Prefetch shaders and VBO descriptors to TC L2. */
|
||||
/* Prefetch shaders and VBO descriptors into L2. */
|
||||
if (GFX_VERSION >= GFX11) {
|
||||
if (HAS_TESS && mask & SI_PREFETCH_HS)
|
||||
si_prefetch_shader_async<GFX_VERSION>(sctx, sctx->queued.named.hs);
|
||||
@@ -2134,7 +2134,7 @@ static void si_draw(struct pipe_context *ctx,
|
||||
index_offset -= start_offset;
|
||||
} else if ((GFX_VERSION <= GFX7 || GFX_VERSION == GFX12) &&
|
||||
si_resource(indexbuf)->TC_L2_dirty) {
|
||||
/* GFX8-GFX11 reads index buffers through TC L2, so it doesn't
|
||||
/* GFX8-GFX11 reads index buffers through L2, so it doesn't
|
||||
* need this. */
|
||||
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
|
||||
@@ -2146,7 +2146,7 @@ static void si_draw(struct pipe_context *ctx,
|
||||
unsigned total_direct_count = 0;
|
||||
|
||||
if (!IS_DRAW_VERTEX_STATE && indirect) {
|
||||
/* Indirect buffers use TC L2 on GFX9-GFX11, but not other hw. */
|
||||
/* Indirect buffers use L2 on GFX9-GFX11, but not other hw. */
|
||||
if (GFX_VERSION <= GFX8 || GFX_VERSION == GFX12) {
|
||||
if (indirect->buffer && si_resource(indirect->buffer)->TC_L2_dirty) {
|
||||
sctx->flags |= SI_CONTEXT_WB_L2 | SI_CONTEXT_PFP_SYNC_ME;
|
||||
|
@@ -75,13 +75,13 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ
|
||||
/* Stop streamout. */
|
||||
si_emit_streamout_end(sctx);
|
||||
|
||||
/* Since streamout uses vector writes which go through TC L2
|
||||
* and most other clients can use TC L2 as well, we don't need
|
||||
/* Since streamout uses vector writes which go through L2
|
||||
* and most other clients can use L2 as well, we don't need
|
||||
* to flush it.
|
||||
*
|
||||
* The only cases which requires flushing it is VGT DMA index
|
||||
* fetching (on <= GFX7) and indirect draw data, which are rare
|
||||
* cases. Thus, flag the TC L2 dirtiness in the resource and
|
||||
* cases. Thus, flag the L2 dirtiness in the resource and
|
||||
* handle it at draw call time.
|
||||
*/
|
||||
for (i = 0; i < old_num_targets; i++)
|
||||
@@ -387,6 +387,7 @@ void si_emit_streamout_end(struct si_context *sctx)
|
||||
t[i]->buf_filled_size, t[i]->buf_filled_size_offset,
|
||||
COPY_DATA_REG, NULL,
|
||||
(R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
|
||||
/* For DrawTF reading buf_filled_size: */
|
||||
sctx->flags |= SI_CONTEXT_PFP_SYNC_ME;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
|
||||
} else {
|
||||
|
Reference in New Issue
Block a user