ac,radv,radeonsi: add function to get the number of ZPLANES

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29349>
This commit is contained in:
Samuel Pitoiset
2024-05-23 13:51:44 +02:00
committed by Marge Bot
parent 709452b9d1
commit 1a08fa6150
4 changed files with 69 additions and 76 deletions

View File

@@ -798,3 +798,46 @@ ac_init_ds_surface(const struct radeon_info *info, const struct ac_ds_state *sta
ac_init_gfx6_ds_surface(info, state, db_format, stencil_format, ds);
}
}
unsigned
ac_get_decompress_on_z_planes(const struct radeon_info *info, enum pipe_format format, uint8_t num_samples,
bool htile_stencil_disabled, bool no_d16_compression)
{
uint32_t max_zplanes = 0;
if (info->gfx_level >= GFX9) {
const bool iterate256 = info->gfx_level >= GFX10 && num_samples >= 2;
/* Default value for 32-bit depth surfaces. */
max_zplanes = 4;
if (format == PIPE_FORMAT_Z16_UNORM && num_samples > 1)
max_zplanes = 2;
/* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
if (info->has_two_planes_iterate256_bug && iterate256 && !htile_stencil_disabled && num_samples == 4)
max_zplanes = 1;
max_zplanes++;
} else {
if (format == PIPE_FORMAT_Z16_UNORM && no_d16_compression) {
/* Do not enable Z plane compression for 16-bit depth
* surfaces because isn't supported on GFX8. Only
* 32-bit depth surfaces are supported by the hardware.
* This allows to maintain shader compatibility and to
* reduce the number of depth decompressions.
*/
max_zplanes = 1;
} else {
/* 0 = full compression. N = only compress up to N-1 Z planes. */
if (num_samples <= 1)
max_zplanes = 5;
else if (num_samples <= 4)
max_zplanes = 3;
else
max_zplanes = 2;
}
}
return max_zplanes;
}

View File

@@ -186,6 +186,10 @@ struct ac_ds_surface {
void
ac_init_ds_surface(const struct radeon_info *info, const struct ac_ds_state *state, struct ac_ds_surface *ds);
unsigned
ac_get_decompress_on_z_planes(const struct radeon_info *info, enum pipe_format format, uint8_t num_samples,
bool htile_stencil_disabled, bool no_d16_compression);
#ifdef __cplusplus
}
#endif

View File

@@ -1799,50 +1799,6 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
}
}
static unsigned
radv_calc_decompress_on_z_planes(const struct radv_device *device, struct radv_image_view *iview)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned max_zplanes = 0;
assert(radv_image_is_tc_compat_htile(iview->image));
if (pdev->info.gfx_level >= GFX9) {
/* Default value for 32-bit depth surfaces. */
max_zplanes = 4;
if (iview->vk.format == VK_FORMAT_D16_UNORM && iview->image->vk.samples > 1)
max_zplanes = 2;
/* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
if (pdev->info.has_two_planes_iterate256_bug && radv_image_get_iterate256(device, iview->image) &&
!radv_image_tile_stencil_disabled(device, iview->image) && iview->image->vk.samples == 4) {
max_zplanes = 1;
}
max_zplanes = max_zplanes + 1;
} else {
if (iview->vk.format == VK_FORMAT_D16_UNORM) {
/* Do not enable Z plane compression for 16-bit depth
* surfaces because isn't supported on GFX8. Only
* 32-bit depth surfaces are supported by the hardware.
* This allows to maintain shader compatibility and to
* reduce the number of depth decompressions.
*/
max_zplanes = 1;
} else {
if (iview->image->vk.samples <= 1)
max_zplanes = 5;
else if (iview->image->vk.samples <= 4)
max_zplanes = 3;
else
max_zplanes = 2;
}
}
return max_zplanes;
}
void
radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer, struct radv_ds_buffer_info *ds)
{
@@ -1905,10 +1861,15 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
ac_init_ds_surface(&pdev->info, &ds_state, &ds->ac);
unsigned max_zplanes = 0;
if (radv_htile_enabled(iview->image, level) && radv_image_is_tc_compat_htile(iview->image)) {
max_zplanes = ac_get_decompress_on_z_planes(&pdev->info, vk_format_to_pipe_format(iview->vk.format),
iview->image->vk.samples,
radv_image_tile_stencil_disabled(device, iview->image), true);
}
if (pdev->info.gfx_level >= GFX9) {
if (radv_htile_enabled(iview->image, level) && radv_image_is_tc_compat_htile(iview->image)) {
unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
ds->ac.db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
if (pdev->info.gfx_level >= GFX10) {
@@ -1932,8 +1893,6 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
ds->ac.u.gfx6.db_depth_info |= S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
if (radv_htile_enabled(iview->image, level) && radv_image_is_tc_compat_htile(iview->image)) {
unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
ds->ac.u.gfx6.db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
ds->ac.db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
}

View File

@@ -3528,11 +3528,10 @@ static void gfx6_emit_framebuffer_state(struct si_context *sctx, unsigned index)
/* Set fields dependent on tc_compatile_htile. */
if (sctx->gfx_level >= GFX9 && tc_compat_htile) {
unsigned max_zplanes = 4;
if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1)
max_zplanes = 2;
unsigned max_zplanes =
ac_get_decompress_on_z_planes(&sctx->screen->info, tex->db_render_format,
tex->buffer.b.b.nr_samples,
tex->htile_stencil_disabled, false);
if (sctx->gfx_level >= GFX10) {
bool iterate256 = tex->buffer.b.b.nr_samples >= 2;
db_z_info |= S_028040_ITERATE_FLUSH(1) |
@@ -3540,17 +3539,12 @@ static void gfx6_emit_framebuffer_state(struct si_context *sctx, unsigned index)
db_stencil_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled) |
S_028044_ITERATE_256(iterate256);
/* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
if (sctx->screen->info.has_two_planes_iterate256_bug && iterate256 &&
!tex->htile_stencil_disabled && tex->buffer.b.b.nr_samples == 4) {
max_zplanes = 1;
}
} else {
db_z_info |= S_028038_ITERATE_FLUSH(1);
db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
}
db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1);
db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
}
unsigned level = zb->base.u.tex.level;
@@ -3608,13 +3602,11 @@ static void gfx6_emit_framebuffer_state(struct si_context *sctx, unsigned index)
if (tex->tc_compatible_htile) {
db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
/* 0 = full compression. N = only compress up to N-1 Z planes. */
if (tex->buffer.b.b.nr_samples <= 1)
db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
else if (tex->buffer.b.b.nr_samples <= 4)
db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
else
db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
unsigned max_zplanes =
ac_get_decompress_on_z_planes(&sctx->screen->info, tex->db_render_format,
tex->buffer.b.b.nr_samples, false, false);
db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
}
}
@@ -3787,10 +3779,10 @@ static void gfx11_dgpu_emit_framebuffer_state(struct si_context *sctx, unsigned
/* Set fields dependent on tc_compatile_htile. */
if (tc_compat_htile) {
unsigned max_zplanes = 4;
if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1)
max_zplanes = 2;
unsigned max_zplanes =
ac_get_decompress_on_z_planes(&sctx->screen->info, tex->db_render_format,
tex->buffer.b.b.nr_samples,
tex->htile_stencil_disabled, false);
bool iterate256 = tex->buffer.b.b.nr_samples >= 2;
db_z_info |= S_028040_ITERATE_FLUSH(1) |
@@ -3798,12 +3790,7 @@ static void gfx11_dgpu_emit_framebuffer_state(struct si_context *sctx, unsigned
db_stencil_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled) |
S_028044_ITERATE_256(iterate256);
/* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
if (sctx->screen->info.has_two_planes_iterate256_bug && iterate256 &&
!tex->htile_stencil_disabled && tex->buffer.b.b.nr_samples == 4)
max_zplanes = 1;
db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1);
db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
}
unsigned level = zb->base.u.tex.level;