anv: prepare pipeline for delayed emission of color writes

Namely we want to be able to emit the following dynamically :

  * On Gfx 7/7.5 : 3DSTATE_VM, 3DSTATE_BLEND_STATE_POINTERS

  * On Gfx 8+ : 3DSTATE_VM, 3DSTATE_BLEND_STATE_POINTERS,
    3DSTATE_PS_BLEND

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10206>
This commit is contained in:
Lionel Landwerlin
2021-03-31 18:50:00 +03:00
committed by Marge Bot
parent fab08d65cb
commit 82eb7c04e7
2 changed files with 134 additions and 84 deletions

View File

@@ -2239,6 +2239,7 @@ enum anv_cmd_dirty_bits {
ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1 << 22, /* VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT */ ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1 << 22, /* VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT */
ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1 << 23, /* VK_DYNAMIC_STATE_STENCIL_OP_EXT */ ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1 << 23, /* VK_DYNAMIC_STATE_STENCIL_OP_EXT */
ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1 << 24, /* VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT */ ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1 << 24, /* VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT */
ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE = 1 << 25, /* VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT */
}; };
typedef uint32_t anv_cmd_dirty_mask_t; typedef uint32_t anv_cmd_dirty_mask_t;
@@ -3369,6 +3370,7 @@ struct anv_graphics_pipeline {
bool sample_shading_enable; bool sample_shading_enable;
bool kill_pixel; bool kill_pixel;
bool depth_bounds_test_enable; bool depth_bounds_test_enable;
bool force_fragment_thread_dispatch;
/* When primitive replication is used, subpass->view_mask will describe what /* When primitive replication is used, subpass->view_mask will describe what
* views to replicate. * views to replicate.
@@ -3389,12 +3391,17 @@ struct anv_graphics_pipeline {
uint32_t depth_stencil_state[3]; uint32_t depth_stencil_state[3];
uint32_t clip[4]; uint32_t clip[4];
uint32_t xfb_bo_pitch[4]; uint32_t xfb_bo_pitch[4];
uint32_t wm[3];
uint32_t blend_state[MAX_RTS * 2];
} gfx7; } gfx7;
struct { struct {
uint32_t sf[4]; uint32_t sf[4];
uint32_t raster[5]; uint32_t raster[5];
uint32_t wm_depth_stencil[3]; uint32_t wm_depth_stencil[3];
uint32_t wm[2];
uint32_t ps_blend[2];
uint32_t blend_state[1 + MAX_RTS * 2];
} gfx8; } gfx8;
struct { struct {

View File

@@ -1129,7 +1129,8 @@ is_dual_src_blend_factor(VkBlendFactor factor)
static void static void
emit_cb_state(struct anv_graphics_pipeline *pipeline, emit_cb_state(struct anv_graphics_pipeline *pipeline,
const VkPipelineColorBlendStateCreateInfo *info, const VkPipelineColorBlendStateCreateInfo *info,
const VkPipelineMultisampleStateCreateInfo *ms_info) const VkPipelineMultisampleStateCreateInfo *ms_info,
uint32_t dynamic_states)
{ {
struct anv_device *device = pipeline->base.device; struct anv_device *device = pipeline->base.device;
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
@@ -1150,11 +1151,21 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
const uint32_t num_dwords = GENX(BLEND_STATE_length) + const uint32_t num_dwords = GENX(BLEND_STATE_length) +
GENX(BLEND_STATE_ENTRY_length) * surface_count; GENX(BLEND_STATE_ENTRY_length) * surface_count;
uint32_t *blend_state_start, *state_pos;
if (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
const struct intel_device_info *devinfo = &pipeline->base.device->info;
blend_state_start = devinfo->ver >= 8 ?
pipeline->gfx8.blend_state : pipeline->gfx7.blend_state;
pipeline->blend_state = ANV_STATE_NULL;
} else {
pipeline->blend_state = pipeline->blend_state =
anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64); anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
blend_state_start = pipeline->blend_state.map;
}
state_pos = blend_state_start;
bool has_writeable_rt = false; bool has_writeable_rt = false;
uint32_t *state_pos = pipeline->blend_state.map;
state_pos += GENX(BLEND_STATE_length); state_pos += GENX(BLEND_STATE_length);
#if GFX_VER >= 8 #if GFX_VER >= 8
struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 }; struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 };
@@ -1285,7 +1296,9 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
} }
#if GFX_VER >= 8 #if GFX_VER >= 8
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_BLEND), blend) { struct GENX(3DSTATE_PS_BLEND) blend = {
GENX(3DSTATE_PS_BLEND_header),
};
blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable; blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable;
blend.HasWriteableRT = has_writeable_rt; blend.HasWriteableRT = has_writeable_rt;
blend.ColorBufferBlendEnable = bs0.ColorBufferBlendEnable; blend.ColorBufferBlendEnable = bs0.ColorBufferBlendEnable;
@@ -1294,15 +1307,21 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
blend.SourceBlendFactor = bs0.SourceBlendFactor; blend.SourceBlendFactor = bs0.SourceBlendFactor;
blend.DestinationBlendFactor = bs0.DestinationBlendFactor; blend.DestinationBlendFactor = bs0.DestinationBlendFactor;
blend.AlphaTestEnable = false; blend.AlphaTestEnable = false;
blend.IndependentAlphaBlendEnable = blend.IndependentAlphaBlendEnable = blend_state.IndependentAlphaBlendEnable;
blend_state.IndependentAlphaBlendEnable;
if (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
GENX(3DSTATE_PS_BLEND_pack)(NULL, pipeline->gfx8.ps_blend, &blend);
} else {
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_BLEND), _blend)
_blend = blend;
} }
#else #else
(void)has_writeable_rt; (void)has_writeable_rt;
#endif #endif
GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state); GENX(BLEND_STATE_pack)(NULL, blend_state_start, &blend_state);
if (!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE)) {
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
bsp.BlendStatePointer = pipeline->blend_state.offset; bsp.BlendStatePointer = pipeline->blend_state.offset;
#if GFX_VER >= 8 #if GFX_VER >= 8
@@ -1310,6 +1329,7 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
#endif #endif
} }
} }
}
static void static void
emit_3dstate_clip(struct anv_graphics_pipeline *pipeline, emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
@@ -1906,11 +1926,14 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline, struct anv_subpass *subp
const VkPipelineRasterizationStateCreateInfo *raster, const VkPipelineRasterizationStateCreateInfo *raster,
const VkPipelineColorBlendStateCreateInfo *blend, const VkPipelineColorBlendStateCreateInfo *blend,
const VkPipelineMultisampleStateCreateInfo *multisample, const VkPipelineMultisampleStateCreateInfo *multisample,
const VkPipelineRasterizationLineStateCreateInfoEXT *line) const VkPipelineRasterizationLineStateCreateInfoEXT *line,
const uint32_t dynamic_states)
{ {
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_WM), wm) { struct GENX(3DSTATE_WM) wm = {
GENX(3DSTATE_WM_header),
};
wm.StatisticsEnable = true; wm.StatisticsEnable = true;
wm.LineEndCapAntialiasingRegionWidth = _05pixels; wm.LineEndCapAntialiasingRegionWidth = _05pixels;
wm.LineAntialiasingRegionWidth = _10pixels; wm.LineAntialiasingRegionWidth = _10pixels;
@@ -1926,7 +1949,7 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline, struct anv_subpass *subp
} }
#if GFX_VER >= 8 #if GFX_VER >= 8
/* Gfx8 hardware tries to compute ThreadDispatchEnable for us but /* Gen8 hardware tries to compute ThreadDispatchEnable for us but
* doesn't take into account KillPixels when no depth or stencil * doesn't take into account KillPixels when no depth or stencil
* writes are enabled. In order for occlusion queries to work * writes are enabled. In order for occlusion queries to work
* correctly with no attachments, we need to force-enable PS thread * correctly with no attachments, we need to force-enable PS thread
@@ -1942,9 +1965,16 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline, struct anv_subpass *subp
* is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW. * is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW.
* Given two bad options, we choose the one which works. * Given two bad options, we choose the one which works.
*/ */
if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) && pipeline->force_fragment_thread_dispatch =
!has_color_buffer_write_enabled(pipeline, blend)) wm_prog_data->has_side_effects ||
wm.ForceThreadDispatchEnable = ForceON; wm_prog_data->uses_kill;
if (pipeline->force_fragment_thread_dispatch ||
!has_color_buffer_write_enabled(pipeline, blend)) {
/* Only set this value in non dynamic mode. */
wm.ForceThreadDispatchEnable =
!(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) ? ForceON : 0;
}
#endif #endif
wm.BarycentricInterpolationMode = wm.BarycentricInterpolationMode =
@@ -1966,11 +1996,16 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline, struct anv_subpass *subp
wm.PixelShaderKillsPixel = subpass->has_ds_self_dep || wm.PixelShaderKillsPixel = subpass->has_ds_self_dep ||
wm_prog_data->uses_kill; wm_prog_data->uses_kill;
if (wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF || pipeline->force_fragment_thread_dispatch =
wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF ||
wm_prog_data->has_side_effects || wm_prog_data->has_side_effects ||
wm.PixelShaderKillsPixel || wm.PixelShaderKillsPixel;
has_color_buffer_write_enabled(pipeline, blend))
wm.ThreadDispatchEnable = true; if (pipeline->force_fragment_thread_dispatch ||
has_color_buffer_write_enabled(pipeline, blend)) {
/* Only set this value in non dynamic mode. */
wm.ThreadDispatchEnable = !(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE);
}
if (multisample && multisample->rasterizationSamples > 1) { if (multisample && multisample->rasterizationSamples > 1) {
if (wm_prog_data->persample_dispatch) { if (wm_prog_data->persample_dispatch) {
@@ -1987,6 +2022,14 @@ emit_3dstate_wm(struct anv_graphics_pipeline *pipeline, struct anv_subpass *subp
wm.LineStippleEnable = line && line->stippledLineEnable; wm.LineStippleEnable = line && line->stippledLineEnable;
} }
if (dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
const struct intel_device_info *devinfo = &pipeline->base.device->info;
uint32_t *dws = devinfo->ver >= 8 ? pipeline->gfx8.wm : pipeline->gfx7.wm;
GENX(3DSTATE_WM_pack)(NULL, dws, &wm);
} else {
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_WM), _wm)
_wm = wm;
} }
} }
@@ -2307,7 +2350,7 @@ genX(graphics_pipeline_create)(
urb_deref_block_size); urb_deref_block_size);
emit_ms_state(pipeline, ms_info, dynamic_states); emit_ms_state(pipeline, ms_info, dynamic_states);
emit_ds_state(pipeline, ds_info, dynamic_states, pass, subpass); emit_ds_state(pipeline, ds_info, dynamic_states, pass, subpass);
emit_cb_state(pipeline, cb_info, ms_info); emit_cb_state(pipeline, cb_info, ms_info, dynamic_states);
compute_kill_pixel(pipeline, ms_info, subpass); compute_kill_pixel(pipeline, ms_info, subpass);
emit_3dstate_clip(pipeline, emit_3dstate_clip(pipeline,
@@ -2347,7 +2390,7 @@ genX(graphics_pipeline_create)(
emit_3dstate_wm(pipeline, subpass, emit_3dstate_wm(pipeline, subpass,
pCreateInfo->pInputAssemblyState, pCreateInfo->pInputAssemblyState,
pCreateInfo->pRasterizationState, pCreateInfo->pRasterizationState,
cb_info, ms_info, line_info); cb_info, ms_info, line_info, dynamic_states);
emit_3dstate_ps(pipeline, cb_info, ms_info); emit_3dstate_ps(pipeline, cb_info, ms_info);
#if GFX_VER >= 8 #if GFX_VER >= 8
emit_3dstate_ps_extra(pipeline, subpass, emit_3dstate_ps_extra(pipeline, subpass,