anv: stop using 3DSTATE_WM::ForceThreadDispatchEnable
Documentation says we should leave this field to the default value (Normal). Instead we set 3DSTATE_PS_EXTRA::PixelShaderHasUAV when we see that a fragment shader has side effects. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30408>
This commit is contained in:

committed by
Marge Bot

parent
c818de7360
commit
eebb6cd236
@@ -1583,6 +1583,7 @@ struct anv_gfx_dynamic_state {
|
||||
|
||||
/* 3DSTATE_PS_EXTRA */
|
||||
struct {
|
||||
bool PixelShaderHasUAV;
|
||||
bool PixelShaderIsPerSample;
|
||||
bool PixelShaderKillsPixel;
|
||||
bool PixelShaderIsPerCoarsePixel;
|
||||
@@ -1711,7 +1712,6 @@ struct anv_gfx_dynamic_state {
|
||||
|
||||
/* 3DSTATE_WM */
|
||||
struct {
|
||||
uint32_t ForceThreadDispatchEnable;
|
||||
bool LineStippleEnable;
|
||||
uint32_t BarycentricInterpolationMode;
|
||||
} wm;
|
||||
@@ -4707,7 +4707,6 @@ struct anv_graphics_pipeline {
|
||||
bool rp_has_ds_self_dep;
|
||||
|
||||
bool kill_pixel;
|
||||
bool force_fragment_thread_dispatch;
|
||||
bool uses_xfb;
|
||||
bool sample_shading_enable;
|
||||
float min_sample_shading;
|
||||
|
@@ -1012,19 +1012,6 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE)))
|
||||
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN);
|
||||
|
||||
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
|
||||
(gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
|
||||
/* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
|
||||
* threads.
|
||||
*/
|
||||
bool force_thread_dispatch =
|
||||
anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
|
||||
(pipeline->force_fragment_thread_dispatch ||
|
||||
anv_cmd_buffer_all_color_write_masked(cmd_buffer));
|
||||
SET(WM, wm.ForceThreadDispatchEnable, force_thread_dispatch ? ForceON : 0);
|
||||
}
|
||||
|
||||
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE)) {
|
||||
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel,
|
||||
@@ -1034,6 +1021,33 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
FRAGMENT);
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) {
|
||||
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV,
|
||||
wm_prog_data && wm_prog_data->has_side_effects,
|
||||
FRAGMENT);
|
||||
}
|
||||
#else
|
||||
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE)) {
|
||||
/* Prior to Gfx12.5 the HW seems to avoid spawning fragment shaders even
|
||||
* if 3DSTATE_PS_EXTRA::PixelShaderKillsPixel=true when
|
||||
* 3DSTATE_PS_BLEND::HasWriteableRT=false. This is causing problems with
|
||||
* occlusion queries with 0 attachments. There are no CTS tests
|
||||
* exercising this but zink+anv fails a bunch of tests like piglit
|
||||
* arb_framebuffer_no_attachments-query.
|
||||
*
|
||||
* Here we choose to tweak the PixelShaderHasUAV to make sure the
|
||||
* fragment shaders are run properly.
|
||||
*/
|
||||
SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV,
|
||||
wm_prog_data && (wm_prog_data->has_side_effects ||
|
||||
(gfx->color_att_count == 0 &&
|
||||
gfx->n_occlusion_queries > 0)),
|
||||
FRAGMENT);
|
||||
}
|
||||
#endif
|
||||
|
||||
if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
|
||||
(gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
|
||||
@@ -1759,6 +1773,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
|
||||
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA)) {
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA),
|
||||
pipeline, partial.ps_extra, pse) {
|
||||
SET(pse, ps_extra, PixelShaderHasUAV);
|
||||
SET(pse, ps_extra, PixelShaderIsPerSample);
|
||||
#if GFX_VER >= 11
|
||||
SET(pse, ps_extra, PixelShaderIsPerCoarsePixel);
|
||||
@@ -2136,7 +2151,6 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
|
||||
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM)) {
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_WM),
|
||||
pipeline, partial.wm, wm) {
|
||||
SET(wm, wm, ForceThreadDispatchEnable);
|
||||
SET(wm, wm, LineStippleEnable);
|
||||
SET(wm, wm, BarycentricInterpolationMode);
|
||||
}
|
||||
|
@@ -1642,26 +1642,6 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
|
||||
} else {
|
||||
wm.EarlyDepthStencilControl = EDSC_NORMAL;
|
||||
}
|
||||
|
||||
/* Gen8 hardware tries to compute ThreadDispatchEnable for us but
|
||||
* doesn't take into account KillPixels when no depth or stencil
|
||||
* writes are enabled. In order for occlusion queries to work
|
||||
* correctly with no attachments, we need to force-enable PS thread
|
||||
* dispatch.
|
||||
*
|
||||
* The BDW docs are pretty clear that that this bit isn't validated
|
||||
* and probably shouldn't be used in production:
|
||||
*
|
||||
* "This must always be set to Normal. This field should not be
|
||||
* tested for functional validation."
|
||||
*
|
||||
* Unfortunately, however, the other mechanism we have for doing this
|
||||
* is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW.
|
||||
* Given two bad options, we choose the one which works.
|
||||
*/
|
||||
pipeline->force_fragment_thread_dispatch =
|
||||
wm_prog_data->has_side_effects ||
|
||||
wm_prog_data->uses_kill;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user