anv: stop using 3DSTATE_WM::ForceThreadDispatchEnable

Documentation says we should leave this field to the default value
(Normal). Instead we set 3DSTATE_PS_EXTRA::PixelShaderHasUAV when we
see that a fragment shader has side effects.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30408>
This commit is contained in:
Lionel Landwerlin
2024-07-28 00:36:54 +03:00
committed by Marge Bot
parent c818de7360
commit eebb6cd236
3 changed files with 29 additions and 36 deletions

View File

@@ -1583,6 +1583,7 @@ struct anv_gfx_dynamic_state {
/* 3DSTATE_PS_EXTRA */
struct {
bool PixelShaderHasUAV;
bool PixelShaderIsPerSample;
bool PixelShaderKillsPixel;
bool PixelShaderIsPerCoarsePixel;
@@ -1711,7 +1712,6 @@ struct anv_gfx_dynamic_state {
/* 3DSTATE_WM */
struct {
uint32_t ForceThreadDispatchEnable;
bool LineStippleEnable;
uint32_t BarycentricInterpolationMode;
} wm;
@@ -4707,7 +4707,6 @@ struct anv_graphics_pipeline {
bool rp_has_ds_self_dep;
bool kill_pixel;
bool force_fragment_thread_dispatch;
bool uses_xfb;
bool sample_shading_enable;
float min_sample_shading;

View File

@@ -1012,19 +1012,6 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE)))
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN);
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
(gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
/* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
* threads.
*/
bool force_thread_dispatch =
anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
(pipeline->force_fragment_thread_dispatch ||
anv_cmd_buffer_all_color_write_masked(cmd_buffer));
SET(WM, wm.ForceThreadDispatchEnable, force_thread_dispatch ? ForceON : 0);
}
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE)) {
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel,
@@ -1034,6 +1021,33 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
FRAGMENT);
}
#if GFX_VERx10 >= 125
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) {
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV,
wm_prog_data && wm_prog_data->has_side_effects,
FRAGMENT);
}
#else
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE)) {
/* Prior to Gfx12.5 the HW seems to avoid spawning fragment shaders even
* if 3DSTATE_PS_EXTRA::PixelShaderKillsPixel=true when
* 3DSTATE_PS_BLEND::HasWriteableRT=false. This is causing problems with
* occlusion queries with 0 attachments. There are no CTS tests
* exercising this but zink+anv fails a bunch of tests like piglit
* arb_framebuffer_no_attachments-query.
*
* Here we choose to tweak the PixelShaderHasUAV to make sure the
* fragment shaders are run properly.
*/
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV,
wm_prog_data && (wm_prog_data->has_side_effects ||
(gfx->color_att_count == 0 &&
gfx->n_occlusion_queries > 0)),
FRAGMENT);
}
#endif
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
(gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
@@ -1759,6 +1773,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA),
pipeline, partial.ps_extra, pse) {
SET(pse, ps_extra, PixelShaderHasUAV);
SET(pse, ps_extra, PixelShaderIsPerSample);
#if GFX_VER >= 11
SET(pse, ps_extra, PixelShaderIsPerCoarsePixel);
@@ -2136,7 +2151,6 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_WM),
pipeline, partial.wm, wm) {
SET(wm, wm, ForceThreadDispatchEnable);
SET(wm, wm, LineStippleEnable);
SET(wm, wm, BarycentricInterpolationMode);
}

View File

@@ -1642,26 +1642,6 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
} else {
wm.EarlyDepthStencilControl = EDSC_NORMAL;
}
/* Gen8 hardware tries to compute ThreadDispatchEnable for us but
* doesn't take into account KillPixels when no depth or stencil
* writes are enabled. In order for occlusion queries to work
* correctly with no attachments, we need to force-enable PS thread
* dispatch.
*
* The BDW docs are pretty clear that that this bit isn't validated
* and probably shouldn't be used in production:
*
* "This must always be set to Normal. This field should not be
* tested for functional validation."
*
* Unfortunately, however, the other mechanism we have for doing this
* is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW.
* Given two bad options, we choose the one which works.
*/
pipeline->force_fragment_thread_dispatch =
wm_prog_data->has_side_effects ||
wm_prog_data->uses_kill;
}
}
}