anv: prepare pipeline for delayed emission of color writes
Namely we want to be able to emit the following dynamically : * On Gfx 7/7.5 : 3DSTATE_VM, 3DSTATE_BLEND_STATE_POINTERS * On Gfx 8+ : 3DSTATE_VM, 3DSTATE_BLEND_STATE_POINTERS, 3DSTATE_PS_BLEND Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10206>
This commit is contained in:

committed by
Marge Bot

parent
fab08d65cb
commit
82eb7c04e7
@@ -2239,6 +2239,7 @@ enum anv_cmd_dirty_bits {
|
|||||||
ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1 << 22, /* VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT */
|
ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1 << 22, /* VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT */
|
||||||
ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1 << 23, /* VK_DYNAMIC_STATE_STENCIL_OP_EXT */
|
ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1 << 23, /* VK_DYNAMIC_STATE_STENCIL_OP_EXT */
|
||||||
ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1 << 24, /* VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT */
|
ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1 << 24, /* VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT */
|
||||||
|
ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE = 1 << 25, /* VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT */
|
||||||
};
|
};
|
||||||
typedef uint32_t anv_cmd_dirty_mask_t;
|
typedef uint32_t anv_cmd_dirty_mask_t;
|
||||||
|
|
||||||
@@ -3369,6 +3370,7 @@ struct anv_graphics_pipeline {
|
|||||||
bool sample_shading_enable;
|
bool sample_shading_enable;
|
||||||
bool kill_pixel;
|
bool kill_pixel;
|
||||||
bool depth_bounds_test_enable;
|
bool depth_bounds_test_enable;
|
||||||
|
bool force_fragment_thread_dispatch;
|
||||||
|
|
||||||
/* When primitive replication is used, subpass->view_mask will describe what
|
/* When primitive replication is used, subpass->view_mask will describe what
|
||||||
* views to replicate.
|
* views to replicate.
|
||||||
@@ -3389,12 +3391,17 @@ struct anv_graphics_pipeline {
|
|||||||
uint32_t depth_stencil_state[3];
|
uint32_t depth_stencil_state[3];
|
||||||
uint32_t clip[4];
|
uint32_t clip[4];
|
||||||
uint32_t xfb_bo_pitch[4];
|
uint32_t xfb_bo_pitch[4];
|
||||||
|
uint32_t wm[3];
|
||||||
|
uint32_t blend_state[MAX_RTS * 2];
|
||||||
} gfx7;
|
} gfx7;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
uint32_t sf[4];
|
uint32_t sf[4];
|
||||||
uint32_t raster[5];
|
uint32_t raster[5];
|
||||||
uint32_t wm_depth_stencil[3];
|
uint32_t wm_depth_stencil[3];
|
||||||
|
uint32_t wm[2];
|
||||||
|
uint32_t ps_blend[2];
|
||||||
|
uint32_t blend_state[1 + MAX_RTS * 2];
|
||||||
} gfx8;
|
} gfx8;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
|
@@ -1129,7 +1129,8 @@ is_dual_src_blend_factor(VkBlendFactor factor)
|
|||||||
static void
|
static void
|
||||||
emit_cb_state(struct anv_graphics_pipeline *pipeline,
|
emit_cb_state(struct anv_graphics_pipeline *pipeline,
|
||||||
const VkPipelineColorBlendStateCreateInfo *info,
|
const VkPipelineColorBlendStateCreateInfo *info,
|
||||||
const VkPipelineMultisampleStateCreateInfo *ms_info)
|
const VkPipelineMultisampleStateCreateInfo *ms_info,
|
||||||
|
uint32_t dynamic_states)
|
||||||
{
|
{
|
||||||
struct anv_device *device = pipeline->base.device;
|
struct anv_device *device = pipeline->base.device;
|
||||||
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
||||||
@@ -1150,11 +1151,21 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
|
|||||||
|
|
||||||
const uint32_t num_dwords = GENX(BLEND_STATE_length) +
|
const uint32_t num_dwords = GENX(BLEND_STATE_length) +
|
||||||
GENX(BLEND_STATE_ENTRY_length) * surface_count;
|
GENX(BLEND_STATE_ENTRY_length) * surface_count;
|
||||||
pipeline->blend_state =
|
uint32_t *blend_state_start, *state_pos;
|
||||||
anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
|
|
||||||
|
if (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
|
||||||
|
const struct intel_device_info *devinfo = &pipeline->base.device->info;
|
||||||
|
blend_state_start = devinfo->ver >= 8 ?
|
||||||
|
pipeline->gfx8.blend_state : pipeline->gfx7.blend_state;
|
||||||
|
pipeline->blend_state = ANV_STATE_NULL;
|
||||||
|
} else {
|
||||||
|
pipeline->blend_state =
|
||||||
|
anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
|
||||||
|
blend_state_start = pipeline->blend_state.map;
|
||||||
|
}
|
||||||
|
state_pos = blend_state_start;
|
||||||
|
|
||||||
bool has_writeable_rt = false;
|
bool has_writeable_rt = false;
|
||||||
uint32_t *state_pos = pipeline->blend_state.map;
|
|
||||||
state_pos += GENX(BLEND_STATE_length);
|
state_pos += GENX(BLEND_STATE_length);
|
||||||
#if GFX_VER >= 8
|
#if GFX_VER >= 8
|
||||||
struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 };
|
struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 };
|
||||||
@@ -1285,29 +1296,38 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if GFX_VER >= 8
|
#if GFX_VER >= 8
|
||||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_BLEND), blend) {
|
struct GENX(3DSTATE_PS_BLEND) blend = {
|
||||||
blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable;
|
GENX(3DSTATE_PS_BLEND_header),
|
||||||
blend.HasWriteableRT = has_writeable_rt;
|
};
|
||||||
blend.ColorBufferBlendEnable = bs0.ColorBufferBlendEnable;
|
blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable;
|
||||||
blend.SourceAlphaBlendFactor = bs0.SourceAlphaBlendFactor;
|
blend.HasWriteableRT = has_writeable_rt;
|
||||||
blend.DestinationAlphaBlendFactor = bs0.DestinationAlphaBlendFactor;
|
blend.ColorBufferBlendEnable = bs0.ColorBufferBlendEnable;
|
||||||
blend.SourceBlendFactor = bs0.SourceBlendFactor;
|
blend.SourceAlphaBlendFactor = bs0.SourceAlphaBlendFactor;
|
||||||
blend.DestinationBlendFactor = bs0.DestinationBlendFactor;
|
blend.DestinationAlphaBlendFactor = bs0.DestinationAlphaBlendFactor;
|
||||||
blend.AlphaTestEnable = false;
|
blend.SourceBlendFactor = bs0.SourceBlendFactor;
|
||||||
blend.IndependentAlphaBlendEnable =
|
blend.DestinationBlendFactor = bs0.DestinationBlendFactor;
|
||||||
blend_state.IndependentAlphaBlendEnable;
|
blend.AlphaTestEnable = false;
|
||||||
|
blend.IndependentAlphaBlendEnable = blend_state.IndependentAlphaBlendEnable;
|
||||||
|
|
||||||
|
if (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
|
||||||
|
GENX(3DSTATE_PS_BLEND_pack)(NULL, pipeline->gfx8.ps_blend, &blend);
|
||||||
|
} else {
|
||||||
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_BLEND), _blend)
|
||||||
|
_blend = blend;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
(void)has_writeable_rt;
|
(void)has_writeable_rt;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
|
GENX(BLEND_STATE_pack)(NULL, blend_state_start, &blend_state);
|
||||||
|
|
||||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
|
if (!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE)) {
|
||||||
bsp.BlendStatePointer = pipeline->blend_state.offset;
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
|
||||||
|
bsp.BlendStatePointer = pipeline->blend_state.offset;
|
||||||
#if GFX_VER >= 8
|
#if GFX_VER >= 8
|
||||||
bsp.BlendStatePointerValid = true;
|
bsp.BlendStatePointerValid = true;
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1906,87 +1926,110 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline, struct anv_subpass *subp
|
|||||||
const VkPipelineRasterizationStateCreateInfo *raster,
|
const VkPipelineRasterizationStateCreateInfo *raster,
|
||||||
const VkPipelineColorBlendStateCreateInfo *blend,
|
const VkPipelineColorBlendStateCreateInfo *blend,
|
||||||
const VkPipelineMultisampleStateCreateInfo *multisample,
|
const VkPipelineMultisampleStateCreateInfo *multisample,
|
||||||
const VkPipelineRasterizationLineStateCreateInfoEXT *line)
|
const VkPipelineRasterizationLineStateCreateInfoEXT *line,
|
||||||
|
const uint32_t dynamic_states)
|
||||||
{
|
{
|
||||||
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
||||||
|
|
||||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_WM), wm) {
|
struct GENX(3DSTATE_WM) wm = {
|
||||||
wm.StatisticsEnable = true;
|
GENX(3DSTATE_WM_header),
|
||||||
wm.LineEndCapAntialiasingRegionWidth = _05pixels;
|
};
|
||||||
wm.LineAntialiasingRegionWidth = _10pixels;
|
wm.StatisticsEnable = true;
|
||||||
wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
|
wm.LineEndCapAntialiasingRegionWidth = _05pixels;
|
||||||
|
wm.LineAntialiasingRegionWidth = _10pixels;
|
||||||
|
wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
|
||||||
|
|
||||||
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
|
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
|
||||||
if (wm_prog_data->early_fragment_tests) {
|
if (wm_prog_data->early_fragment_tests) {
|
||||||
wm.EarlyDepthStencilControl = EDSC_PREPS;
|
wm.EarlyDepthStencilControl = EDSC_PREPS;
|
||||||
} else if (wm_prog_data->has_side_effects) {
|
} else if (wm_prog_data->has_side_effects) {
|
||||||
wm.EarlyDepthStencilControl = EDSC_PSEXEC;
|
wm.EarlyDepthStencilControl = EDSC_PSEXEC;
|
||||||
} else {
|
} else {
|
||||||
wm.EarlyDepthStencilControl = EDSC_NORMAL;
|
wm.EarlyDepthStencilControl = EDSC_NORMAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if GFX_VER >= 8
|
#if GFX_VER >= 8
|
||||||
/* Gfx8 hardware tries to compute ThreadDispatchEnable for us but
|
/* Gen8 hardware tries to compute ThreadDispatchEnable for us but
|
||||||
* doesn't take into account KillPixels when no depth or stencil
|
* doesn't take into account KillPixels when no depth or stencil
|
||||||
* writes are enabled. In order for occlusion queries to work
|
* writes are enabled. In order for occlusion queries to work
|
||||||
* correctly with no attachments, we need to force-enable PS thread
|
* correctly with no attachments, we need to force-enable PS thread
|
||||||
* dispatch.
|
* dispatch.
|
||||||
*
|
*
|
||||||
* The BDW docs are pretty clear that that this bit isn't validated
|
* The BDW docs are pretty clear that that this bit isn't validated
|
||||||
* and probably shouldn't be used in production:
|
* and probably shouldn't be used in production:
|
||||||
*
|
*
|
||||||
* "This must always be set to Normal. This field should not be
|
* "This must always be set to Normal. This field should not be
|
||||||
* tested for functional validation."
|
* tested for functional validation."
|
||||||
*
|
*
|
||||||
* Unfortunately, however, the other mechanism we have for doing this
|
* Unfortunately, however, the other mechanism we have for doing this
|
||||||
* is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW.
|
* is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW.
|
||||||
* Given two bad options, we choose the one which works.
|
* Given two bad options, we choose the one which works.
|
||||||
*/
|
*/
|
||||||
if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) &&
|
pipeline->force_fragment_thread_dispatch =
|
||||||
!has_color_buffer_write_enabled(pipeline, blend))
|
wm_prog_data->has_side_effects ||
|
||||||
wm.ForceThreadDispatchEnable = ForceON;
|
wm_prog_data->uses_kill;
|
||||||
|
|
||||||
|
if (pipeline->force_fragment_thread_dispatch ||
|
||||||
|
!has_color_buffer_write_enabled(pipeline, blend)) {
|
||||||
|
/* Only set this value in non dynamic mode. */
|
||||||
|
wm.ForceThreadDispatchEnable =
|
||||||
|
!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) ? ForceON : 0;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
wm.BarycentricInterpolationMode =
|
wm.BarycentricInterpolationMode =
|
||||||
wm_prog_data->barycentric_interp_modes;
|
wm_prog_data->barycentric_interp_modes;
|
||||||
|
|
||||||
#if GFX_VER < 8
|
#if GFX_VER < 8
|
||||||
wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
|
wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
|
||||||
wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
|
wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
|
||||||
wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
|
wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
|
||||||
wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
|
wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
|
||||||
|
|
||||||
/* If the subpass has a depth or stencil self-dependency, then we
|
/* If the subpass has a depth or stencil self-dependency, then we
|
||||||
* need to force the hardware to do the depth/stencil write *after*
|
* need to force the hardware to do the depth/stencil write *after*
|
||||||
* fragment shader execution. Otherwise, the writes may hit memory
|
* fragment shader execution. Otherwise, the writes may hit memory
|
||||||
* before we get around to fetching from the input attachment and we
|
* before we get around to fetching from the input attachment and we
|
||||||
* may get the depth or stencil value from the current draw rather
|
* may get the depth or stencil value from the current draw rather
|
||||||
* than the previous one.
|
* than the previous one.
|
||||||
*/
|
*/
|
||||||
wm.PixelShaderKillsPixel = subpass->has_ds_self_dep ||
|
wm.PixelShaderKillsPixel = subpass->has_ds_self_dep ||
|
||||||
wm_prog_data->uses_kill;
|
wm_prog_data->uses_kill;
|
||||||
|
|
||||||
if (wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF ||
|
pipeline->force_fragment_thread_dispatch =
|
||||||
wm_prog_data->has_side_effects ||
|
wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF ||
|
||||||
wm.PixelShaderKillsPixel ||
|
wm_prog_data->has_side_effects ||
|
||||||
has_color_buffer_write_enabled(pipeline, blend))
|
wm.PixelShaderKillsPixel;
|
||||||
wm.ThreadDispatchEnable = true;
|
|
||||||
|
|
||||||
if (multisample && multisample->rasterizationSamples > 1) {
|
if (pipeline->force_fragment_thread_dispatch ||
|
||||||
if (wm_prog_data->persample_dispatch) {
|
has_color_buffer_write_enabled(pipeline, blend)) {
|
||||||
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
|
/* Only set this value in non dynamic mode. */
|
||||||
} else {
|
wm.ThreadDispatchEnable = !(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE);
|
||||||
wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
|
}
|
||||||
}
|
|
||||||
} else {
|
if (multisample && multisample->rasterizationSamples > 1) {
|
||||||
|
if (wm_prog_data->persample_dispatch) {
|
||||||
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
|
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
|
||||||
|
} else {
|
||||||
|
wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
|
||||||
}
|
}
|
||||||
wm.MultisampleRasterizationMode =
|
} else {
|
||||||
gfx7_ms_rast_mode(pipeline, ia, raster, multisample);
|
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
|
||||||
|
}
|
||||||
|
wm.MultisampleRasterizationMode =
|
||||||
|
gfx7_ms_rast_mode(pipeline, ia, raster, multisample);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
wm.LineStippleEnable = line && line->stippledLineEnable;
|
wm.LineStippleEnable = line && line->stippledLineEnable;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
|
||||||
|
const struct intel_device_info *devinfo = &pipeline->base.device->info;
|
||||||
|
uint32_t *dws = devinfo->ver >= 8 ? pipeline->gfx8.wm : pipeline->gfx7.wm;
|
||||||
|
GENX(3DSTATE_WM_pack)(NULL, dws, &wm);
|
||||||
|
} else {
|
||||||
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_WM), _wm)
|
||||||
|
_wm = wm;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2307,7 +2350,7 @@ genX(graphics_pipeline_create)(
|
|||||||
urb_deref_block_size);
|
urb_deref_block_size);
|
||||||
emit_ms_state(pipeline, ms_info, dynamic_states);
|
emit_ms_state(pipeline, ms_info, dynamic_states);
|
||||||
emit_ds_state(pipeline, ds_info, dynamic_states, pass, subpass);
|
emit_ds_state(pipeline, ds_info, dynamic_states, pass, subpass);
|
||||||
emit_cb_state(pipeline, cb_info, ms_info);
|
emit_cb_state(pipeline, cb_info, ms_info, dynamic_states);
|
||||||
compute_kill_pixel(pipeline, ms_info, subpass);
|
compute_kill_pixel(pipeline, ms_info, subpass);
|
||||||
|
|
||||||
emit_3dstate_clip(pipeline,
|
emit_3dstate_clip(pipeline,
|
||||||
@@ -2347,7 +2390,7 @@ genX(graphics_pipeline_create)(
|
|||||||
emit_3dstate_wm(pipeline, subpass,
|
emit_3dstate_wm(pipeline, subpass,
|
||||||
pCreateInfo->pInputAssemblyState,
|
pCreateInfo->pInputAssemblyState,
|
||||||
pCreateInfo->pRasterizationState,
|
pCreateInfo->pRasterizationState,
|
||||||
cb_info, ms_info, line_info);
|
cb_info, ms_info, line_info, dynamic_states);
|
||||||
emit_3dstate_ps(pipeline, cb_info, ms_info);
|
emit_3dstate_ps(pipeline, cb_info, ms_info);
|
||||||
#if GFX_VER >= 8
|
#if GFX_VER >= 8
|
||||||
emit_3dstate_ps_extra(pipeline, subpass,
|
emit_3dstate_ps_extra(pipeline, subpass,
|
||||||
|
Reference in New Issue
Block a user