tu: Disable FS in certain cases even if FS is not empty
If FS doesn't have side-effects and color write mask is zero. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33735>
This commit is contained in:
@@ -3946,6 +3946,14 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
|
||||
}
|
||||
cmd->state.pipeline_blend_lrz = pipeline->lrz_blend.valid;
|
||||
|
||||
if (pipeline->disable_fs.valid) {
|
||||
if (cmd->state.disable_fs != pipeline->disable_fs.disable_fs) {
|
||||
cmd->state.disable_fs = pipeline->disable_fs.disable_fs;
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_DISABLE_FS;
|
||||
}
|
||||
}
|
||||
cmd->state.pipeline_disable_fs = pipeline->disable_fs.valid;
|
||||
|
||||
if (pipeline->bandwidth.valid)
|
||||
cmd->state.bandwidth = pipeline->bandwidth;
|
||||
cmd->state.pipeline_bandwidth = pipeline->bandwidth.valid;
|
||||
@@ -5804,7 +5812,7 @@ tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
zmode = A6XX_EARLY_Z;
|
||||
|
||||
/* FS bypass requires early Z */
|
||||
if (fs->variant->empty)
|
||||
if (cmd->state.disable_fs)
|
||||
zmode = A6XX_EARLY_Z;
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL, 1);
|
||||
|
@@ -74,8 +74,9 @@ enum tu_cmd_dirty_bits
|
||||
TU_CMD_DIRTY_FEEDBACK_LOOPS = BIT(13),
|
||||
TU_CMD_DIRTY_FS = BIT(14),
|
||||
TU_CMD_DIRTY_SHADING_RATE = BIT(15),
|
||||
TU_CMD_DIRTY_DISABLE_FS = BIT(16),
|
||||
/* all draw states were disabled and need to be re-enabled: */
|
||||
TU_CMD_DIRTY_DRAW_STATE = BIT(16)
|
||||
TU_CMD_DIRTY_DRAW_STATE = BIT(17)
|
||||
};
|
||||
|
||||
/* There are only three cache domains we have to care about: the CCU, or
|
||||
@@ -514,6 +515,7 @@ struct tu_cmd_state
|
||||
bool predication_active;
|
||||
bool msaa_disable;
|
||||
bool blend_reads_dest;
|
||||
bool disable_fs;
|
||||
bool stencil_front_write;
|
||||
bool stencil_back_write;
|
||||
bool pipeline_sysmem_single_prim_mode;
|
||||
@@ -527,7 +529,7 @@ struct tu_cmd_state
|
||||
bool pipeline_reads_shading_rate;
|
||||
bool pipeline_accesses_smask;
|
||||
|
||||
bool pipeline_blend_lrz, pipeline_bandwidth;
|
||||
bool pipeline_blend_lrz, pipeline_bandwidth, pipeline_disable_fs;
|
||||
uint32_t pipeline_draw_states;
|
||||
|
||||
/* VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT and
|
||||
|
@@ -2112,6 +2112,9 @@ tu_pipeline_builder_parse_libraries(struct tu_pipeline_builder *builder,
|
||||
if (library->base.bandwidth.valid)
|
||||
pipeline->bandwidth = library->base.bandwidth;
|
||||
|
||||
if (library->base.disable_fs.valid)
|
||||
pipeline->disable_fs = library->base.disable_fs;
|
||||
|
||||
pipeline->set_state_mask |= library->base.set_state_mask;
|
||||
|
||||
u_foreach_bit (i, library->base.set_state_mask) {
|
||||
@@ -2903,6 +2906,52 @@ tu_calc_bandwidth(struct tu_bandwidth *bandwidth,
|
||||
bandwidth->valid = true;
|
||||
}
|
||||
|
||||
static const enum mesa_vk_dynamic_graphics_state tu_disable_fs_state[] = {
|
||||
MESA_VK_DYNAMIC_CB_ATTACHMENT_COUNT,
|
||||
MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES,
|
||||
MESA_VK_DYNAMIC_CB_WRITE_MASKS,
|
||||
MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE,
|
||||
};
|
||||
|
||||
static bool
|
||||
tu_calc_disable_fs(const struct vk_color_blend_state *cb,
|
||||
const struct vk_render_pass_state *rp,
|
||||
bool alpha_to_coverage_enable,
|
||||
const struct tu_shader *fs)
|
||||
{
|
||||
if (alpha_to_coverage_enable)
|
||||
return false;
|
||||
if (fs && !fs->variant->writes_only_color)
|
||||
return false;
|
||||
|
||||
bool has_enabled_attachments = false;
|
||||
for (unsigned i = 0; i < cb->attachment_count; i++) {
|
||||
if (rp->color_attachment_formats[i] == VK_FORMAT_UNDEFINED)
|
||||
continue;
|
||||
|
||||
const struct vk_color_blend_attachment_state *att = &cb->attachments[i];
|
||||
if ((cb->color_write_enables & (1u << i)) && att->write_mask != 0) {
|
||||
has_enabled_attachments = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return !fs || fs->variant->empty ||
|
||||
(fs->variant->writes_only_color && !has_enabled_attachments);
|
||||
}
|
||||
|
||||
static void
|
||||
tu_emit_disable_fs(struct tu_disable_fs *disable_fs,
|
||||
const struct vk_color_blend_state *cb,
|
||||
const struct vk_render_pass_state *rp,
|
||||
bool alpha_to_coverage_enable,
|
||||
const struct tu_shader *fs)
|
||||
{
|
||||
disable_fs->disable_fs =
|
||||
tu_calc_disable_fs(cb, rp, alpha_to_coverage_enable, fs);
|
||||
disable_fs->valid = true;
|
||||
}
|
||||
|
||||
/* Return true if the blend state reads the color attachments. */
|
||||
static bool
|
||||
tu6_calc_blend_lrz(const struct vk_color_blend_state *cb,
|
||||
@@ -3124,14 +3173,14 @@ uint32_t
|
||||
tu6_rast_size(struct tu_device *dev,
|
||||
const struct vk_rasterization_state *rs,
|
||||
const struct vk_viewport_state *vp,
|
||||
const struct tu_shader *fs,
|
||||
bool multiview,
|
||||
bool per_view_viewport)
|
||||
bool per_view_viewport,
|
||||
bool disable_fs)
|
||||
{
|
||||
if (CHIP == A6XX) {
|
||||
return 15 + (dev->physical_device->info->a6xx.has_legacy_pipeline_shading_rate ? 8 : 0);
|
||||
} else {
|
||||
return 25;
|
||||
return 27;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3140,9 +3189,9 @@ void
|
||||
tu6_emit_rast(struct tu_cs *cs,
|
||||
const struct vk_rasterization_state *rs,
|
||||
const struct vk_viewport_state *vp,
|
||||
const struct tu_shader *fs,
|
||||
bool multiview,
|
||||
bool per_view_viewport)
|
||||
bool per_view_viewport,
|
||||
bool disable_fs)
|
||||
{
|
||||
enum a5xx_line_mode line_mode =
|
||||
rs->line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR ?
|
||||
@@ -3205,14 +3254,14 @@ tu6_emit_rast(struct tu_cs *cs,
|
||||
* "The GPU has a special mode that writes Z-only pixels at twice
|
||||
* the normal rate."
|
||||
*/
|
||||
bool disable_fs = !fs || fs->variant->empty;
|
||||
|
||||
tu_cs_emit_regs(cs, RB_RENDER_CNTL(CHIP,
|
||||
.fs_disable = disable_fs,
|
||||
.raster_mode = TYPE_TILED,
|
||||
.raster_direction = LR_TB,
|
||||
.conservativerasen = conservative_ras_en));
|
||||
tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL(.fs_disable = disable_fs));
|
||||
tu_cs_emit_regs(cs, A7XX_HLSQ_FS_UNKNOWN_A9AA(.fs_disable = disable_fs));
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_PC_DGEN_SU_CONSERVATIVE_RAS_CNTL(conservative_ras_en));
|
||||
|
||||
@@ -3628,6 +3677,13 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder,
|
||||
if (EMIT_STATE(bandwidth, attachments_valid))
|
||||
tu_calc_bandwidth(&pipeline->bandwidth, cb,
|
||||
builder->graphics_state.rp);
|
||||
if (EMIT_STATE(
|
||||
disable_fs,
|
||||
attachments_valid && pipeline_contains_all_shader_state(pipeline)))
|
||||
tu_emit_disable_fs(&pipeline->disable_fs, cb,
|
||||
builder->graphics_state.rp,
|
||||
builder->graphics_state.ms->alpha_to_coverage_enable,
|
||||
pipeline->shaders[MESA_SHADER_FRAGMENT]);
|
||||
DRAW_STATE(blend_constants, TU_DYNAMIC_STATE_BLEND_CONSTANTS, cb);
|
||||
|
||||
if (attachments_valid &&
|
||||
@@ -3646,12 +3702,12 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder,
|
||||
BITSET_CLEAR(remove, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS);
|
||||
}
|
||||
DRAW_STATE_COND(rast, TU_DYNAMIC_STATE_RAST,
|
||||
pipeline_contains_all_shader_state(pipeline),
|
||||
builder->graphics_state.rs,
|
||||
builder->graphics_state.vp,
|
||||
pipeline->shaders[MESA_SHADER_FRAGMENT],
|
||||
pipeline_contains_all_shader_state(pipeline) &&
|
||||
pipeline->disable_fs.valid,
|
||||
builder->graphics_state.rs, builder->graphics_state.vp,
|
||||
builder->graphics_state.rp->view_mask != 0,
|
||||
pipeline->program.per_view_viewport);
|
||||
pipeline->program.per_view_viewport,
|
||||
pipeline->disable_fs.disable_fs);
|
||||
DRAW_STATE_COND(ds, TU_DYNAMIC_STATE_DS,
|
||||
attachments_valid,
|
||||
builder->graphics_state.ds,
|
||||
@@ -3866,6 +3922,21 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd)
|
||||
(EMIT_STATE(bandwidth) || (cmd->state.dirty & TU_CMD_DIRTY_SUBPASS)))
|
||||
tu_calc_bandwidth(&cmd->state.bandwidth, &cmd->vk.dynamic_graphics_state.cb,
|
||||
&cmd->state.vk_rp);
|
||||
|
||||
if (!cmd->state.pipeline_disable_fs &&
|
||||
(EMIT_STATE(disable_fs) ||
|
||||
(cmd->state.dirty & TU_CMD_DIRTY_SUBPASS))) {
|
||||
bool disable_fs = tu_calc_disable_fs(
|
||||
&cmd->vk.dynamic_graphics_state.cb, &cmd->state.vk_rp,
|
||||
cmd->vk.dynamic_graphics_state.ms.alpha_to_coverage_enable,
|
||||
cmd->state.shaders[MESA_SHADER_FRAGMENT]);
|
||||
|
||||
if (disable_fs != cmd->state.disable_fs) {
|
||||
cmd->state.disable_fs = disable_fs;
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_DISABLE_FS;
|
||||
}
|
||||
}
|
||||
|
||||
DRAW_STATE(blend_constants, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
|
||||
&cmd->vk.dynamic_graphics_state.cb);
|
||||
|
||||
@@ -3882,12 +3953,12 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd)
|
||||
DRAW_STATE_COND(rast, TU_DYNAMIC_STATE_RAST,
|
||||
cmd->state.dirty & (TU_CMD_DIRTY_SUBPASS |
|
||||
TU_CMD_DIRTY_PER_VIEW_VIEWPORT |
|
||||
TU_CMD_DIRTY_FS),
|
||||
TU_CMD_DIRTY_DISABLE_FS),
|
||||
&cmd->vk.dynamic_graphics_state.rs,
|
||||
&cmd->vk.dynamic_graphics_state.vp,
|
||||
cmd->state.shaders[MESA_SHADER_FRAGMENT],
|
||||
cmd->state.vk_rp.view_mask != 0,
|
||||
cmd->state.per_view_viewport);
|
||||
cmd->state.per_view_viewport,
|
||||
cmd->state.disable_fs);
|
||||
DRAW_STATE_COND(ds, TU_DYNAMIC_STATE_DS,
|
||||
cmd->state.dirty & TU_CMD_DIRTY_SUBPASS,
|
||||
&cmd->vk.dynamic_graphics_state.ds,
|
||||
|
@@ -52,6 +52,12 @@ struct tu_bandwidth
|
||||
bool valid;
|
||||
};
|
||||
|
||||
struct tu_disable_fs
|
||||
{
|
||||
bool disable_fs;
|
||||
bool valid;
|
||||
};
|
||||
|
||||
struct tu_nir_shaders
|
||||
{
|
||||
struct vk_pipeline_cache_object base;
|
||||
@@ -171,6 +177,7 @@ struct tu_pipeline
|
||||
|
||||
struct tu_lrz_blend lrz_blend;
|
||||
struct tu_bandwidth bandwidth;
|
||||
struct tu_disable_fs disable_fs;
|
||||
|
||||
void *executables_mem_ctx;
|
||||
/* tu_pipeline_executable */
|
||||
|
@@ -2079,7 +2079,6 @@ tu6_emit_fs(struct tu_cs *cs,
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
tu_cs_emit_regs(cs, A6XX_GRAS_UNKNOWN_8110(0x2));
|
||||
tu_cs_emit_regs(cs, A7XX_HLSQ_FS_UNKNOWN_A9AA(.fs_disable = !fs || fs->empty));
|
||||
}
|
||||
|
||||
if (fs) {
|
||||
|
Reference in New Issue
Block a user