radv: implement compressed FMASK texture reads with RADV_PERFTEST=tccompatcmask
This allows us to disable the FMASK decompress pass when transitioning from CB writes to shader reads. This will likely be improved and enabled by default in the future. No CTS regressions on GFX8 but a few number of multisample CTS failures on GFX9 (they look related to the small hint). Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
@@ -1255,6 +1255,15 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
cb_color_info &= C_028C70_DCC_ENABLE;
|
cb_color_info &= C_028C70_DCC_ENABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (radv_image_is_tc_compat_cmask(image) &&
|
||||||
|
(radv_is_fmask_decompress_pipeline(cmd_buffer) ||
|
||||||
|
radv_is_dcc_decompress_pipeline(cmd_buffer))) {
|
||||||
|
/* If this bit is set, the FMASK decompression operation
|
||||||
|
* doesn't occur (DCC_COMPRESS also implies FMASK_DECOMPRESS).
|
||||||
|
*/
|
||||||
|
cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY;
|
||||||
|
}
|
||||||
|
|
||||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||||
radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
|
radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
|
||||||
radeon_emit(cmd_buffer->cs, cb->cb_color_base);
|
radeon_emit(cmd_buffer->cs, cb->cb_color_base);
|
||||||
|
@@ -62,6 +62,7 @@ enum {
|
|||||||
RADV_PERFTEST_DCC_MSAA = 0x10,
|
RADV_PERFTEST_DCC_MSAA = 0x10,
|
||||||
RADV_PERFTEST_BO_LIST = 0x20,
|
RADV_PERFTEST_BO_LIST = 0x20,
|
||||||
RADV_PERFTEST_SHADER_BALLOT = 0x40,
|
RADV_PERFTEST_SHADER_BALLOT = 0x40,
|
||||||
|
RADV_PERFTEST_TC_COMPAT_CMASK = 0x80,
|
||||||
};
|
};
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
@@ -482,6 +482,7 @@ static const struct debug_control radv_perftest_options[] = {
|
|||||||
{"dccmsaa", RADV_PERFTEST_DCC_MSAA},
|
{"dccmsaa", RADV_PERFTEST_DCC_MSAA},
|
||||||
{"bolist", RADV_PERFTEST_BO_LIST},
|
{"bolist", RADV_PERFTEST_BO_LIST},
|
||||||
{"shader_ballot", RADV_PERFTEST_SHADER_BALLOT},
|
{"shader_ballot", RADV_PERFTEST_SHADER_BALLOT},
|
||||||
|
{"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
|
||||||
{NULL, 0}
|
{NULL, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -4397,6 +4398,20 @@ radv_initialise_color_surface(struct radv_device *device,
|
|||||||
unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
|
unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
|
||||||
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
|
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (radv_image_is_tc_compat_cmask(iview->image)) {
|
||||||
|
/* Allow the texture block to read FMASK directly
|
||||||
|
* without decompressing it. This bit must be cleared
|
||||||
|
* when performing FMASK_DECOMPRESS or DCC_COMPRESS,
|
||||||
|
* otherwise the operation doesn't happen.
|
||||||
|
*/
|
||||||
|
cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
|
||||||
|
|
||||||
|
/* Set CMASK into a tiling format that allows the
|
||||||
|
* texture block to read it.
|
||||||
|
*/
|
||||||
|
cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (radv_image_has_cmask(iview->image) &&
|
if (radv_image_has_cmask(iview->image) &&
|
||||||
|
@@ -219,6 +219,29 @@ radv_use_dcc_for_image(struct radv_device *device,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
radv_use_tc_compat_cmask_for_image(struct radv_device *device,
|
||||||
|
struct radv_image *image)
|
||||||
|
{
|
||||||
|
if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* TC-compat CMASK is only available for GFX8+. */
|
||||||
|
if (device->physical_device->rad_info.chip_class < GFX8)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (radv_image_has_dcc(image))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!radv_image_has_cmask(image))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
radv_prefill_surface_from_metadata(struct radv_device *device,
|
radv_prefill_surface_from_metadata(struct radv_device *device,
|
||||||
struct radeon_surf *surface,
|
struct radeon_surf *surface,
|
||||||
@@ -729,11 +752,26 @@ si_make_texture_descriptor(struct radv_device *device,
|
|||||||
S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
|
S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
|
||||||
fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned) |
|
fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned) |
|
||||||
S_008F24_META_RB_ALIGNED(image->planes[0].surface.u.gfx9.cmask.rb_aligned);
|
S_008F24_META_RB_ALIGNED(image->planes[0].surface.u.gfx9.cmask.rb_aligned);
|
||||||
|
|
||||||
|
if (radv_image_is_tc_compat_cmask(image)) {
|
||||||
|
va = gpu_address + image->offset + image->cmask.offset;
|
||||||
|
|
||||||
|
fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
|
||||||
|
fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
|
||||||
|
fmask_state[7] |= va >> 8;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
fmask_state[3] |= S_008F1C_TILING_INDEX(image->fmask.tile_mode_index);
|
fmask_state[3] |= S_008F1C_TILING_INDEX(image->fmask.tile_mode_index);
|
||||||
fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
|
fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
|
||||||
S_008F20_PITCH(image->fmask.pitch_in_pixels - 1);
|
S_008F20_PITCH(image->fmask.pitch_in_pixels - 1);
|
||||||
fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
|
fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
|
||||||
|
|
||||||
|
if (radv_image_is_tc_compat_cmask(image)) {
|
||||||
|
va = gpu_address + image->offset + image->cmask.offset;
|
||||||
|
|
||||||
|
fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
|
||||||
|
fmask_state[7] |= va >> 8;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if (fmask_state)
|
} else if (fmask_state)
|
||||||
memset(fmask_state, 0, 8 * 4);
|
memset(fmask_state, 0, 8 * 4);
|
||||||
@@ -1122,6 +1160,9 @@ radv_image_create(VkDevice _device,
|
|||||||
/* Try to enable FMASK for multisampled images. */
|
/* Try to enable FMASK for multisampled images. */
|
||||||
if (radv_image_can_enable_fmask(image)) {
|
if (radv_image_can_enable_fmask(image)) {
|
||||||
radv_image_alloc_fmask(device, image);
|
radv_image_alloc_fmask(device, image);
|
||||||
|
|
||||||
|
if (radv_use_tc_compat_cmask_for_image(device, image))
|
||||||
|
image->tc_compatible_cmask = true;
|
||||||
} else {
|
} else {
|
||||||
/* Otherwise, try to enable HTILE for depth surfaces. */
|
/* Otherwise, try to enable HTILE for depth surfaces. */
|
||||||
if (radv_image_can_enable_htile(image) &&
|
if (radv_image_can_enable_htile(image) &&
|
||||||
|
@@ -222,6 +222,32 @@ uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
struct radv_image *image,
|
struct radv_image *image,
|
||||||
const VkImageSubresourceRange *range, uint32_t value);
|
const VkImageSubresourceRange *range, uint32_t value);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return whether the bound pipeline is the FMASK decompress pass.
|
||||||
|
*/
|
||||||
|
static inline bool
|
||||||
|
radv_is_fmask_decompress_pipeline(struct radv_cmd_buffer *cmd_buffer)
|
||||||
|
{
|
||||||
|
struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
|
||||||
|
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
|
||||||
|
|
||||||
|
return radv_pipeline_to_handle(pipeline) ==
|
||||||
|
meta_state->fast_clear_flush.fmask_decompress_pipeline;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return whether the bound pipeline is the DCC decompress pass.
|
||||||
|
*/
|
||||||
|
static inline bool
|
||||||
|
radv_is_dcc_decompress_pipeline(struct radv_cmd_buffer *cmd_buffer)
|
||||||
|
{
|
||||||
|
struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
|
||||||
|
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
|
||||||
|
|
||||||
|
return radv_pipeline_to_handle(pipeline) ==
|
||||||
|
meta_state->fast_clear_flush.dcc_decompress_pipeline;
|
||||||
|
}
|
||||||
|
|
||||||
/* common nir builder helpers */
|
/* common nir builder helpers */
|
||||||
#include "nir/nir_builder.h"
|
#include "nir/nir_builder.h"
|
||||||
|
|
||||||
|
@@ -646,7 +646,7 @@ radv_process_color_image(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
|
|
||||||
if (decompress_dcc && radv_dcc_enabled(image, subresourceRange->baseMipLevel)) {
|
if (decompress_dcc && radv_dcc_enabled(image, subresourceRange->baseMipLevel)) {
|
||||||
pipeline = &cmd_buffer->device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
|
pipeline = &cmd_buffer->device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
|
||||||
} else if (radv_image_has_fmask(image)) {
|
} else if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
|
||||||
pipeline = &cmd_buffer->device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
|
pipeline = &cmd_buffer->device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
|
||||||
} else {
|
} else {
|
||||||
pipeline = &cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
|
pipeline = &cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
|
||||||
|
@@ -1562,6 +1562,7 @@ struct radv_image {
|
|||||||
uint64_t dcc_offset;
|
uint64_t dcc_offset;
|
||||||
uint64_t htile_offset;
|
uint64_t htile_offset;
|
||||||
bool tc_compatible_htile;
|
bool tc_compatible_htile;
|
||||||
|
bool tc_compatible_cmask;
|
||||||
|
|
||||||
struct radv_fmask_info fmask;
|
struct radv_fmask_info fmask;
|
||||||
struct radv_cmask_info cmask;
|
struct radv_cmask_info cmask;
|
||||||
@@ -1635,6 +1636,15 @@ radv_image_has_dcc(const struct radv_image *image)
|
|||||||
return image->planes[0].surface.dcc_size;
|
return image->planes[0].surface.dcc_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return whether the image is TC-compatible CMASK.
|
||||||
|
*/
|
||||||
|
static inline bool
|
||||||
|
radv_image_is_tc_compat_cmask(const struct radv_image *image)
|
||||||
|
{
|
||||||
|
return radv_image_has_fmask(image) && image->tc_compatible_cmask;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return whether DCC metadata is enabled for a level.
|
* Return whether DCC metadata is enabled for a level.
|
||||||
*/
|
*/
|
||||||
|
Reference in New Issue
Block a user