radeonsi: enable TC-compatible HTILE on demand for best Z/S performance
I haven't measured this, but it can only help. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4866>
This commit is contained in:
@@ -359,6 +359,17 @@ static void si_decompress_depth(struct si_context *sctx, struct si_texture *tex,
|
||||
tex->stencil_dirty_level_mask &= ~levels_s;
|
||||
}
|
||||
|
||||
/* We just had to completely decompress Z/S for texturing. Enable
|
||||
* TC-compatible HTILE on the next clear, so that the decompression
|
||||
* doesn't have to be done for this texture ever again.
|
||||
*
|
||||
* TC-compatible HTILE might slightly reduce Z/S performance, but
|
||||
* the decompression is much worse.
|
||||
*/
|
||||
if (has_htile && !tc_compat_htile &&
|
||||
tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE)
|
||||
tex->enable_tc_compatible_htile_next_clear = true;
|
||||
|
||||
/* Only in-place decompression needs to flush DB caches, or
|
||||
* when we don't decompress but TC-compatible planes are dirty.
|
||||
*/
|
||||
|
@@ -570,6 +570,43 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
|
||||
|
||||
if (zstex && zsbuf->u.tex.first_layer == 0 &&
|
||||
zsbuf->u.tex.last_layer == util_max_layer(&zstex->buffer.b.b, 0)) {
|
||||
/* See whether we should enable TC-compatible HTILE. */
|
||||
if (zstex->enable_tc_compatible_htile_next_clear &&
|
||||
!zstex->tc_compatible_htile &&
|
||||
si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_ZS) &&
|
||||
/* If both depth and stencil are present, they must be cleared together. */
|
||||
((buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL ||
|
||||
(buffers & PIPE_CLEAR_DEPTH && (!zstex->surface.has_stencil ||
|
||||
zstex->htile_stencil_disabled)))) {
|
||||
/* Enable TC-compatible HTILE. */
|
||||
zstex->enable_tc_compatible_htile_next_clear = false;
|
||||
zstex->tc_compatible_htile = true;
|
||||
|
||||
/* Update the framebuffer state to reflect the change. */
|
||||
sctx->framebuffer.DB_has_shader_readable_metadata = true;
|
||||
sctx->framebuffer.dirty_zsbuf = true;
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
|
||||
|
||||
/* Update all sampler views and shader images in all contexts. */
|
||||
p_atomic_inc(&sctx->screen->dirty_tex_counter);
|
||||
|
||||
/* Re-initialize HTILE, so that it doesn't contain values incompatible
|
||||
* with the new TC-compatible HTILE setting.
|
||||
*
|
||||
* 0xfffff30f = uncompressed Z + S
|
||||
* 0xfffc000f = uncompressed Z only
|
||||
*
|
||||
* GFX8 always uses the Z+S HTILE format for TC-compatible HTILE even
|
||||
* when stencil is not present.
|
||||
*/
|
||||
uint32_t clear_value = (zstex->surface.has_stencil &&
|
||||
!zstex->htile_stencil_disabled) ||
|
||||
sctx->chip_class == GFX8 ? 0xfffff30f : 0xfffc000f;
|
||||
si_clear_buffer(sctx, &zstex->buffer.b.b, zstex->surface.htile_offset,
|
||||
zstex->surface.htile_size, &clear_value, 4,
|
||||
SI_COHERENCY_DB_META, false);
|
||||
}
|
||||
|
||||
/* TC-compatible HTILE only supports depth clears to 0 or 1. */
|
||||
if (buffers & PIPE_CLEAR_DEPTH && si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_Z) &&
|
||||
(!zstex->tc_compatible_htile || depth == 0 || depth == 1)) {
|
||||
|
@@ -33,7 +33,9 @@
|
||||
static enum si_cache_policy get_cache_policy(struct si_context *sctx, enum si_coherency coher,
|
||||
uint64_t size)
|
||||
{
|
||||
if ((sctx->chip_class >= GFX9 && (coher == SI_COHERENCY_CB_META || coher == SI_COHERENCY_CP)) ||
|
||||
if ((sctx->chip_class >= GFX9 && (coher == SI_COHERENCY_CB_META ||
|
||||
coher == SI_COHERENCY_DB_META ||
|
||||
coher == SI_COHERENCY_CP)) ||
|
||||
(sctx->chip_class >= GFX7 && coher == SI_COHERENCY_SHADER))
|
||||
return size <= 256 * 1024 ? L2_LRU : L2_STREAM;
|
||||
|
||||
@@ -53,6 +55,8 @@ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
|
||||
(cache_policy == L2_BYPASS ? SI_CONTEXT_INV_L2 : 0);
|
||||
case SI_COHERENCY_CB_META:
|
||||
return SI_CONTEXT_FLUSH_AND_INV_CB;
|
||||
case SI_COHERENCY_DB_META:
|
||||
return SI_CONTEXT_FLUSH_AND_INV_DB;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -239,6 +239,7 @@ enum si_coherency
|
||||
SI_COHERENCY_NONE, /* no cache flushes needed */
|
||||
SI_COHERENCY_SHADER,
|
||||
SI_COHERENCY_CB_META,
|
||||
SI_COHERENCY_DB_META,
|
||||
SI_COHERENCY_CP,
|
||||
};
|
||||
|
||||
@@ -336,6 +337,7 @@ struct si_texture {
|
||||
uint8_t stencil_clear_value;
|
||||
bool fmask_is_identity : 1;
|
||||
bool tc_compatible_htile : 1;
|
||||
bool enable_tc_compatible_htile_next_clear : 1;
|
||||
bool htile_stencil_disabled : 1;
|
||||
bool depth_cleared : 1; /* if it was cleared at least once */
|
||||
bool stencil_cleared : 1; /* if it was cleared at least once */
|
||||
|
@@ -1207,13 +1207,12 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
|
||||
/* don't include stencil-only formats which we don't support for rendering */
|
||||
tex->is_depth = util_format_has_depth(util_format_description(tex->buffer.b.b.format));
|
||||
tex->surface = *surface;
|
||||
tex->tc_compatible_htile =
|
||||
tex->surface.htile_size != 0 && (tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
|
||||
tex->tc_compatible_htile = false; /* This will be enabled on demand. */
|
||||
|
||||
/* TC-compatible HTILE:
|
||||
* - GFX8 only supports Z32_FLOAT.
|
||||
* - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
|
||||
if (tex->tc_compatible_htile) {
|
||||
if (tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) {
|
||||
if (sscreen->info.chip_class >= GFX9 && base->format == PIPE_FORMAT_Z16_UNORM)
|
||||
tex->db_render_format = base->format;
|
||||
else {
|
||||
|
Reference in New Issue
Block a user