tu: Disable FS in certain cases even if FS is not empty

If FS doesn't have side-effects and color write mask is zero.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33735>
This commit is contained in:
Danylo Piliaiev
2025-03-05 16:57:22 +01:00
parent 71238fb4d8
commit be481e6615
5 changed files with 106 additions and 19 deletions

View File

@@ -3946,6 +3946,14 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
}
cmd->state.pipeline_blend_lrz = pipeline->lrz_blend.valid;
if (pipeline->disable_fs.valid) {
if (cmd->state.disable_fs != pipeline->disable_fs.disable_fs) {
cmd->state.disable_fs = pipeline->disable_fs.disable_fs;
cmd->state.dirty |= TU_CMD_DIRTY_DISABLE_FS;
}
}
cmd->state.pipeline_disable_fs = pipeline->disable_fs.valid;
if (pipeline->bandwidth.valid)
cmd->state.bandwidth = pipeline->bandwidth;
cmd->state.pipeline_bandwidth = pipeline->bandwidth.valid;
@@ -5804,7 +5812,7 @@ tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
zmode = A6XX_EARLY_Z;
/* FS bypass requires early Z */
if (fs->variant->empty)
if (cmd->state.disable_fs)
zmode = A6XX_EARLY_Z;
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL, 1);

View File

@@ -74,8 +74,9 @@ enum tu_cmd_dirty_bits
TU_CMD_DIRTY_FEEDBACK_LOOPS = BIT(13),
TU_CMD_DIRTY_FS = BIT(14),
TU_CMD_DIRTY_SHADING_RATE = BIT(15),
TU_CMD_DIRTY_DISABLE_FS = BIT(16),
/* all draw states were disabled and need to be re-enabled: */
TU_CMD_DIRTY_DRAW_STATE = BIT(16)
TU_CMD_DIRTY_DRAW_STATE = BIT(17)
};
/* There are only three cache domains we have to care about: the CCU, or
@@ -514,6 +515,7 @@ struct tu_cmd_state
bool predication_active;
bool msaa_disable;
bool blend_reads_dest;
bool disable_fs;
bool stencil_front_write;
bool stencil_back_write;
bool pipeline_sysmem_single_prim_mode;
@@ -527,7 +529,7 @@ struct tu_cmd_state
bool pipeline_reads_shading_rate;
bool pipeline_accesses_smask;
bool pipeline_blend_lrz, pipeline_bandwidth;
bool pipeline_blend_lrz, pipeline_bandwidth, pipeline_disable_fs;
uint32_t pipeline_draw_states;
/* VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT and

View File

@@ -2112,6 +2112,9 @@ tu_pipeline_builder_parse_libraries(struct tu_pipeline_builder *builder,
if (library->base.bandwidth.valid)
pipeline->bandwidth = library->base.bandwidth;
if (library->base.disable_fs.valid)
pipeline->disable_fs = library->base.disable_fs;
pipeline->set_state_mask |= library->base.set_state_mask;
u_foreach_bit (i, library->base.set_state_mask) {
@@ -2903,6 +2906,52 @@ tu_calc_bandwidth(struct tu_bandwidth *bandwidth,
bandwidth->valid = true;
}
static const enum mesa_vk_dynamic_graphics_state tu_disable_fs_state[] = {
MESA_VK_DYNAMIC_CB_ATTACHMENT_COUNT,
MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES,
MESA_VK_DYNAMIC_CB_WRITE_MASKS,
MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE,
};
static bool
tu_calc_disable_fs(const struct vk_color_blend_state *cb,
const struct vk_render_pass_state *rp,
bool alpha_to_coverage_enable,
const struct tu_shader *fs)
{
if (alpha_to_coverage_enable)
return false;
if (fs && !fs->variant->writes_only_color)
return false;
bool has_enabled_attachments = false;
for (unsigned i = 0; i < cb->attachment_count; i++) {
if (rp->color_attachment_formats[i] == VK_FORMAT_UNDEFINED)
continue;
const struct vk_color_blend_attachment_state *att = &cb->attachments[i];
if ((cb->color_write_enables & (1u << i)) && att->write_mask != 0) {
has_enabled_attachments = true;
break;
}
}
return !fs || fs->variant->empty ||
(fs->variant->writes_only_color && !has_enabled_attachments);
}
static void
tu_emit_disable_fs(struct tu_disable_fs *disable_fs,
const struct vk_color_blend_state *cb,
const struct vk_render_pass_state *rp,
bool alpha_to_coverage_enable,
const struct tu_shader *fs)
{
disable_fs->disable_fs =
tu_calc_disable_fs(cb, rp, alpha_to_coverage_enable, fs);
disable_fs->valid = true;
}
/* Return true if the blend state reads the color attachments. */
static bool
tu6_calc_blend_lrz(const struct vk_color_blend_state *cb,
@@ -3124,14 +3173,14 @@ uint32_t
tu6_rast_size(struct tu_device *dev,
const struct vk_rasterization_state *rs,
const struct vk_viewport_state *vp,
const struct tu_shader *fs,
bool multiview,
bool per_view_viewport)
bool per_view_viewport,
bool disable_fs)
{
if (CHIP == A6XX) {
return 15 + (dev->physical_device->info->a6xx.has_legacy_pipeline_shading_rate ? 8 : 0);
} else {
return 25;
return 27;
}
}
@@ -3140,9 +3189,9 @@ void
tu6_emit_rast(struct tu_cs *cs,
const struct vk_rasterization_state *rs,
const struct vk_viewport_state *vp,
const struct tu_shader *fs,
bool multiview,
bool per_view_viewport)
bool per_view_viewport,
bool disable_fs)
{
enum a5xx_line_mode line_mode =
rs->line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR ?
@@ -3205,14 +3254,14 @@ tu6_emit_rast(struct tu_cs *cs,
* "The GPU has a special mode that writes Z-only pixels at twice
* the normal rate."
*/
bool disable_fs = !fs || fs->variant->empty;
tu_cs_emit_regs(cs, RB_RENDER_CNTL(CHIP,
.fs_disable = disable_fs,
.raster_mode = TYPE_TILED,
.raster_direction = LR_TB,
.conservativerasen = conservative_ras_en));
tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL(.fs_disable = disable_fs));
tu_cs_emit_regs(cs, A7XX_HLSQ_FS_UNKNOWN_A9AA(.fs_disable = disable_fs));
tu_cs_emit_regs(cs,
A6XX_PC_DGEN_SU_CONSERVATIVE_RAS_CNTL(conservative_ras_en));
@@ -3628,6 +3677,13 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder,
if (EMIT_STATE(bandwidth, attachments_valid))
tu_calc_bandwidth(&pipeline->bandwidth, cb,
builder->graphics_state.rp);
if (EMIT_STATE(
disable_fs,
attachments_valid && pipeline_contains_all_shader_state(pipeline)))
tu_emit_disable_fs(&pipeline->disable_fs, cb,
builder->graphics_state.rp,
builder->graphics_state.ms->alpha_to_coverage_enable,
pipeline->shaders[MESA_SHADER_FRAGMENT]);
DRAW_STATE(blend_constants, TU_DYNAMIC_STATE_BLEND_CONSTANTS, cb);
if (attachments_valid &&
@@ -3646,12 +3702,12 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder,
BITSET_CLEAR(remove, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS);
}
DRAW_STATE_COND(rast, TU_DYNAMIC_STATE_RAST,
pipeline_contains_all_shader_state(pipeline),
builder->graphics_state.rs,
builder->graphics_state.vp,
pipeline->shaders[MESA_SHADER_FRAGMENT],
pipeline_contains_all_shader_state(pipeline) &&
pipeline->disable_fs.valid,
builder->graphics_state.rs, builder->graphics_state.vp,
builder->graphics_state.rp->view_mask != 0,
pipeline->program.per_view_viewport);
pipeline->program.per_view_viewport,
pipeline->disable_fs.disable_fs);
DRAW_STATE_COND(ds, TU_DYNAMIC_STATE_DS,
attachments_valid,
builder->graphics_state.ds,
@@ -3866,6 +3922,21 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd)
(EMIT_STATE(bandwidth) || (cmd->state.dirty & TU_CMD_DIRTY_SUBPASS)))
tu_calc_bandwidth(&cmd->state.bandwidth, &cmd->vk.dynamic_graphics_state.cb,
&cmd->state.vk_rp);
if (!cmd->state.pipeline_disable_fs &&
(EMIT_STATE(disable_fs) ||
(cmd->state.dirty & TU_CMD_DIRTY_SUBPASS))) {
bool disable_fs = tu_calc_disable_fs(
&cmd->vk.dynamic_graphics_state.cb, &cmd->state.vk_rp,
cmd->vk.dynamic_graphics_state.ms.alpha_to_coverage_enable,
cmd->state.shaders[MESA_SHADER_FRAGMENT]);
if (disable_fs != cmd->state.disable_fs) {
cmd->state.disable_fs = disable_fs;
cmd->state.dirty |= TU_CMD_DIRTY_DISABLE_FS;
}
}
DRAW_STATE(blend_constants, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
&cmd->vk.dynamic_graphics_state.cb);
@@ -3882,12 +3953,12 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd)
DRAW_STATE_COND(rast, TU_DYNAMIC_STATE_RAST,
cmd->state.dirty & (TU_CMD_DIRTY_SUBPASS |
TU_CMD_DIRTY_PER_VIEW_VIEWPORT |
TU_CMD_DIRTY_FS),
TU_CMD_DIRTY_DISABLE_FS),
&cmd->vk.dynamic_graphics_state.rs,
&cmd->vk.dynamic_graphics_state.vp,
cmd->state.shaders[MESA_SHADER_FRAGMENT],
cmd->state.vk_rp.view_mask != 0,
cmd->state.per_view_viewport);
cmd->state.per_view_viewport,
cmd->state.disable_fs);
DRAW_STATE_COND(ds, TU_DYNAMIC_STATE_DS,
cmd->state.dirty & TU_CMD_DIRTY_SUBPASS,
&cmd->vk.dynamic_graphics_state.ds,

View File

@@ -52,6 +52,12 @@ struct tu_bandwidth
bool valid;
};
struct tu_disable_fs
{
bool disable_fs;
bool valid;
};
struct tu_nir_shaders
{
struct vk_pipeline_cache_object base;
@@ -171,6 +177,7 @@ struct tu_pipeline
struct tu_lrz_blend lrz_blend;
struct tu_bandwidth bandwidth;
struct tu_disable_fs disable_fs;
void *executables_mem_ctx;
/* tu_pipeline_executable */

View File

@@ -2079,7 +2079,6 @@ tu6_emit_fs(struct tu_cs *cs,
if (CHIP >= A7XX) {
tu_cs_emit_regs(cs, A6XX_GRAS_UNKNOWN_8110(0x2));
tu_cs_emit_regs(cs, A7XX_HLSQ_FS_UNKNOWN_A9AA(.fs_disable = !fs || fs->empty));
}
if (fs) {