radv,aco: do not compact MRTs if the pipeline uses a PS epilog
We can't detect color attachment without exports when compiling a PS epilog, so we can't compact MRTs. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18514>
This commit is contained in:

committed by
Marge Bot

parent
09a0fd6925
commit
19eec024d2
@@ -12354,7 +12354,6 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade
|
|||||||
Builder bld(ctx.program, ctx.block);
|
Builder bld(ctx.program, ctx.block);
|
||||||
|
|
||||||
/* Export all color render targets */
|
/* Export all color render targets */
|
||||||
unsigned compacted_mrt_index = 0;
|
|
||||||
bool exported = false;
|
bool exported = false;
|
||||||
|
|
||||||
for (unsigned i = 0; i < 8; i++) {
|
for (unsigned i = 0; i < 8; i++) {
|
||||||
@@ -12365,7 +12364,7 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade
|
|||||||
|
|
||||||
struct mrt_color_export out;
|
struct mrt_color_export out;
|
||||||
|
|
||||||
out.slot = compacted_mrt_index;
|
out.slot = i;
|
||||||
out.write_mask = 0xf;
|
out.write_mask = 0xf;
|
||||||
out.col_format = col_format;
|
out.col_format = col_format;
|
||||||
out.is_int8 = (key->color_is_int8 >> i) & 1;
|
out.is_int8 = (key->color_is_int8 >> i) & 1;
|
||||||
@@ -12377,10 +12376,7 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade
|
|||||||
out.values[c] = Operand(emit_extract_vector(&ctx, inputs, c, v1));
|
out.values[c] = Operand(emit_extract_vector(&ctx, inputs, c, v1));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (export_fs_mrt_color(&ctx, &out, true)) {
|
exported |= export_fs_mrt_color(&ctx, &out, true);
|
||||||
compacted_mrt_index++;
|
|
||||||
exported = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!exported)
|
if (!exported)
|
||||||
|
@@ -576,7 +576,8 @@ radv_compact_spi_shader_col_format(const struct radv_shader *ps,
|
|||||||
static void
|
static void
|
||||||
radv_pipeline_compute_spi_color_formats(const struct radv_graphics_pipeline *pipeline,
|
radv_pipeline_compute_spi_color_formats(const struct radv_graphics_pipeline *pipeline,
|
||||||
struct radv_blend_state *blend,
|
struct radv_blend_state *blend,
|
||||||
const struct vk_graphics_pipeline_state *state)
|
const struct vk_graphics_pipeline_state *state,
|
||||||
|
bool has_ps_epilog)
|
||||||
{
|
{
|
||||||
unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0;
|
unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0;
|
||||||
|
|
||||||
@@ -611,6 +612,19 @@ radv_pipeline_compute_spi_color_formats(const struct radv_graphics_pipeline *pip
|
|||||||
col_format |= V_028714_SPI_SHADER_32_AR;
|
col_format |= V_028714_SPI_SHADER_32_AR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (has_ps_epilog) {
|
||||||
|
/* Do not compact MRTs when the pipeline uses a PS epilog because we can't detect color
|
||||||
|
* attachments without exports. Without compaction and if the i-th target format is set, all
|
||||||
|
* previous target formats must be non-zero to avoid hangs.
|
||||||
|
*/
|
||||||
|
unsigned num_targets = (util_last_bit(col_format) + 3) / 4;
|
||||||
|
for (unsigned i = 0; i < num_targets; i++) {
|
||||||
|
if (!(col_format & (0xfu << (i * 4)))) {
|
||||||
|
col_format |= V_028714_SPI_SHADER_32_R << (i * 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* The output for dual source blending should have the same format as
|
/* The output for dual source blending should have the same format as
|
||||||
* the first output.
|
* the first output.
|
||||||
*/
|
*/
|
||||||
@@ -699,7 +713,8 @@ radv_blend_check_commutativity(struct radv_blend_state *blend, VkBlendOp op, VkB
|
|||||||
|
|
||||||
static struct radv_blend_state
|
static struct radv_blend_state
|
||||||
radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline,
|
radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline,
|
||||||
const struct vk_graphics_pipeline_state *state)
|
const struct vk_graphics_pipeline_state *state,
|
||||||
|
bool has_ps_epilog)
|
||||||
{
|
{
|
||||||
const struct radv_device *device = pipeline->base.device;
|
const struct radv_device *device = pipeline->base.device;
|
||||||
struct radv_blend_state blend = {0};
|
struct radv_blend_state blend = {0};
|
||||||
@@ -861,7 +876,7 @@ radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline,
|
|||||||
else
|
else
|
||||||
cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE);
|
cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE);
|
||||||
|
|
||||||
radv_pipeline_compute_spi_color_formats(pipeline, &blend, state);
|
radv_pipeline_compute_spi_color_formats(pipeline, &blend, state, has_ps_epilog);
|
||||||
|
|
||||||
pipeline->cb_color_control = cb_color_control;
|
pipeline->cb_color_control = cb_color_control;
|
||||||
|
|
||||||
@@ -6083,7 +6098,8 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv
|
|||||||
|
|
||||||
radv_pipeline_layout_hash(&pipeline_layout);
|
radv_pipeline_layout_hash(&pipeline_layout);
|
||||||
|
|
||||||
struct radv_blend_state blend = radv_pipeline_init_blend_state(pipeline, &state);
|
struct radv_blend_state blend = radv_pipeline_init_blend_state(pipeline, &state,
|
||||||
|
pipeline->ps_epilog);
|
||||||
|
|
||||||
const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
|
const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
|
||||||
vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
|
vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
|
||||||
@@ -6121,7 +6137,9 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv
|
|||||||
gfx103_pipeline_init_vrs_state(pipeline, &state);
|
gfx103_pipeline_init_vrs_state(pipeline, &state);
|
||||||
|
|
||||||
struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
|
struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
|
||||||
blend.spi_shader_col_format = radv_compact_spi_shader_col_format(ps, &blend);
|
if (!ps->info.ps.has_epilog) {
|
||||||
|
blend.spi_shader_col_format = radv_compact_spi_shader_col_format(ps, &blend);
|
||||||
|
}
|
||||||
|
|
||||||
/* Ensure that some export memory is always allocated, for two reasons:
|
/* Ensure that some export memory is always allocated, for two reasons:
|
||||||
*
|
*
|
||||||
@@ -6144,12 +6162,14 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv
|
|||||||
blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
|
blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* In presense of MRT holes (ie. the FS exports MRT1 but not MRT0), the compiler will remap them,
|
if (!ps->info.ps.has_epilog) {
|
||||||
* so that only MRT0 is exported and the driver will compact SPI_SHADER_COL_FORMAT to match what
|
/* In presense of MRT holes (ie. the FS exports MRT1 but not MRT0), the compiler will remap
|
||||||
* the FS actually exports. Though, to make sure the hw remapping works as expected, we should
|
* them, so that only MRT0 is exported and the driver will compact SPI_SHADER_COL_FORMAT to
|
||||||
* also clear color attachments without exports in CB_SHADER_MASK.
|
* match what the FS actually exports. Though, to make sure the hw remapping works as
|
||||||
*/
|
* expected, we should also clear color attachments without exports in CB_SHADER_MASK.
|
||||||
blend.cb_shader_mask &= ps->info.ps.colors_written;
|
*/
|
||||||
|
blend.cb_shader_mask &= ps->info.ps.colors_written;
|
||||||
|
}
|
||||||
|
|
||||||
pipeline->col_format = blend.spi_shader_col_format;
|
pipeline->col_format = blend.spi_shader_col_format;
|
||||||
pipeline->cb_target_mask = blend.cb_target_mask;
|
pipeline->cb_target_mask = blend.cb_target_mask;
|
||||||
@@ -6275,7 +6295,7 @@ radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline,
|
|||||||
if ((imported_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) &&
|
if ((imported_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) &&
|
||||||
!(imported_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) {
|
!(imported_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) {
|
||||||
struct radv_blend_state blend =
|
struct radv_blend_state blend =
|
||||||
radv_pipeline_init_blend_state(&pipeline->base, state);
|
radv_pipeline_init_blend_state(&pipeline->base, state, true);
|
||||||
|
|
||||||
struct radv_pipeline_key key =
|
struct radv_pipeline_key key =
|
||||||
radv_generate_graphics_pipeline_key(&pipeline->base, pCreateInfo, state, &blend);
|
radv_generate_graphics_pipeline_key(&pipeline->base, pCreateInfo, state, &blend);
|
||||||
@@ -6297,7 +6317,7 @@ radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline,
|
|||||||
vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
|
vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
|
||||||
|
|
||||||
struct radv_blend_state blend =
|
struct radv_blend_state blend =
|
||||||
radv_pipeline_init_blend_state(&pipeline->base, state);
|
radv_pipeline_init_blend_state(&pipeline->base, state, pipeline->base.ps_epilog);
|
||||||
|
|
||||||
struct radv_pipeline_key key =
|
struct radv_pipeline_key key =
|
||||||
radv_generate_graphics_pipeline_key(&pipeline->base, pCreateInfo, state, &blend);
|
radv_generate_graphics_pipeline_key(&pipeline->base, pCreateInfo, state, &blend);
|
||||||
|
@@ -953,8 +953,6 @@ void
|
|||||||
radv_declare_ps_epilog_args(enum amd_gfx_level gfx_level, const struct radv_ps_epilog_key *key,
|
radv_declare_ps_epilog_args(enum amd_gfx_level gfx_level, const struct radv_ps_epilog_key *key,
|
||||||
struct radv_shader_args *args)
|
struct radv_shader_args *args)
|
||||||
{
|
{
|
||||||
unsigned num_inputs = 0;
|
|
||||||
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets);
|
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets);
|
||||||
if (gfx_level < GFX11)
|
if (gfx_level < GFX11)
|
||||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
|
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
|
||||||
@@ -966,7 +964,6 @@ radv_declare_ps_epilog_args(enum amd_gfx_level gfx_level, const struct radv_ps_e
|
|||||||
if (col_format == V_028714_SPI_SHADER_ZERO)
|
if (col_format == V_028714_SPI_SHADER_ZERO)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, &args->ps_epilog_inputs[num_inputs]);
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, &args->ps_epilog_inputs[i]);
|
||||||
num_inputs++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user