anv: fix VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT state

First, there is a problem if you do the following

 vkCmdSetColorWriteEnableEXT(attachmentCount = 8)
 vkCmdBindPipeline(GFX, with attachmentCount = 4)
 vkCmdDraw()
 vkCmdBindPipeline(GFX, with attachmentCount = 8)
 vkCmdDraw()

Because in the dynamic state emission code we rely on the first
pipeline to figure the number of BLEND_STATE entries to prepare. This
is wrong, we should fill all entries so that the dynamic state works
regardless of the number of attachments in the pipeline. With regard
to the dynamic values, we should retain enable/disable values that do
not concern the current pipeline.

Second, 3DSTATE_WM was not always reemitted when the pipeline changed.
But since it is not emitted as part of the pipeline, this results in
inconsistent state being programmed.

Third, we end up disabling the fragment stage completely in some
cases. And that is programming the pipeline inconsistently and
triggering a hang on TGL.

v2: Fix comment (Tapani)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: b15bfe92f7 ("anv: implement VK_EXT_color_write_enable")
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15310>
This commit is contained in:
Lionel Landwerlin
2021-10-11 18:31:26 +03:00
committed by Marge Bot
parent f348103fce
commit a4f502de32
5 changed files with 91 additions and 152 deletions

View File

@@ -1600,9 +1600,16 @@ void anv_CmdSetColorWriteEnableEXT(
assert(attachmentCount <= MAX_RTS);
uint8_t color_writes = 0;
for (uint32_t i = 0; i < attachmentCount; i++)
color_writes |= pColorWriteEnables[i] ? (1 << i) : 0;
/* Keep the existing values outside the color attachments count of the
* current pipeline.
*/
uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes;
for (uint32_t i = 0; i < attachmentCount; i++) {
if (pColorWriteEnables[i])
color_writes |= BITFIELD_BIT(i);
else
color_writes &= ~BITFIELD_BIT(i);
}
if (cmd_buffer->state.gfx.dynamic.color_writes != color_writes) {
cmd_buffer->state.gfx.dynamic.color_writes = color_writes;

View File

@@ -487,12 +487,9 @@ populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
key->ignore_sample_mask_out = false;
assert(rendering_info->colorAttachmentCount <= MAX_RTS);
for (uint32_t i = 0; i < rendering_info->colorAttachmentCount; i++) {
if (rendering_info->pColorAttachmentFormats[i] != VK_FORMAT_UNDEFINED)
key->color_outputs_valid |= (1 << i);
}
key->nr_color_regions = rendering_info->colorAttachmentCount;
/* Consider all inputs as valid until look at the NIR variables. */
key->color_outputs_valid = (1u << MAX_RTS) - 1;
key->nr_color_regions = MAX_RTS;
/* To reduce possible shader recompilations we would need to know if
* there is a SampleMask output variable to compute if we should emit
@@ -1119,6 +1116,26 @@ static void
anv_pipeline_link_fs(const struct brw_compiler *compiler,
struct anv_pipeline_stage *stage)
{
/* Initially the valid outputs value is set to all possible render targets
* valid (see populate_wm_prog_key()), before we look at the shader
* variables. Here we look at the output variables of the shader an compute
* a correct number of render target outputs.
*/
stage->key.wm.color_outputs_valid = 0;
nir_foreach_shader_out_variable_safe(var, stage->nir) {
if (var->data.location < FRAG_RESULT_DATA0)
continue;
const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
const unsigned array_len =
glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
assert(rt + array_len <= MAX_RTS);
stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
}
stage->key.wm.nr_color_regions =
util_last_bit(stage->key.wm.color_outputs_valid);
unsigned num_rt_bindings;
struct anv_pipeline_binding rt_bindings[MAX_RTS];
if (stage->key.wm.nr_color_regions > 0) {
@@ -1152,71 +1169,6 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler,
typed_memcpy(stage->bind_map.surface_to_descriptor,
rt_bindings, num_rt_bindings);
stage->bind_map.surface_count += num_rt_bindings;
/* Now that we've set up the color attachments, we can go through and
* eliminate any shader outputs that map to VK_ATTACHMENT_UNUSED in the
* hopes that dead code can clean them up in this and any earlier shader
* stages.
*/
nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
bool deleted_output = false;
nir_foreach_shader_out_variable_safe(var, stage->nir) {
/* TODO: We don't delete depth/stencil writes. We probably could if the
* subpass doesn't have a depth/stencil attachment.
*/
if (var->data.location < FRAG_RESULT_DATA0)
continue;
const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
/* If this is the RT at location 0 and we have alpha to coverage
* enabled we still need that write because it will affect the coverage
* mask even if it's never written to a color target.
*/
if (rt == 0 && stage->key.wm.alpha_to_coverage)
continue;
const unsigned array_len =
glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
assert(rt + array_len <= MAX_RTS);
if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid &
BITFIELD_RANGE(rt, array_len))) {
deleted_output = true;
var->data.mode = nir_var_function_temp;
exec_node_remove(&var->node);
exec_list_push_tail(&impl->locals, &var->node);
}
}
if (deleted_output)
nir_fixup_deref_modes(stage->nir);
/* Initially the valid outputs value is based off the renderpass color
* attachments (see populate_wm_prog_key()), now that we've potentially
* deleted variables that map to unused attachments, we need to update the
* valid outputs for the backend compiler based on what output variables
* are actually used. */
stage->key.wm.color_outputs_valid = 0;
nir_foreach_shader_out_variable_safe(var, stage->nir) {
if (var->data.location < FRAG_RESULT_DATA0)
continue;
const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
const unsigned array_len =
glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
assert(rt + array_len <= MAX_RTS);
stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
}
/* We stored the number of subpass color attachments in nr_color_regions
* when calculating the key for caching. Now that we've computed the bind
* map, we can reduce this to the actual max before we go into the back-end
* compiler.
*/
stage->key.wm.nr_color_regions =
util_last_bit(stage->key.wm.color_outputs_valid);
}
static void
@@ -2368,10 +2320,12 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
PIPELINE_COLOR_WRITE_CREATE_INFO_EXT);
if (color_write_info) {
dynamic->color_writes = 0;
dynamic->color_writes = (1u << MAX_RTS) - 1;
for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) {
dynamic->color_writes |=
color_write_info->pColorWriteEnables[i] ? (1u << i) : 0;
if (color_write_info->pColorWriteEnables[i])
dynamic->color_writes |= BITFIELD_BIT(i);
else
dynamic->color_writes &= ~BITFIELD_BIT(i);
}
}
}

View File

@@ -1434,10 +1434,18 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
.SourceAlphaBlendFactor = vk_to_intel_blend[a->srcAlphaBlendFactor],
.DestinationAlphaBlendFactor = vk_to_intel_blend[a->dstAlphaBlendFactor],
.AlphaBlendFunction = vk_to_intel_blend_op[a->alphaBlendOp],
.WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
.WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
.WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
.WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
.WriteDisableAlpha =
(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) == 0 &&
!(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
.WriteDisableRed =
(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) == 0 &&
!(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
.WriteDisableGreen =
(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) == 0 &&
!(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
.WriteDisableBlue =
(dynamic_states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) == 0 &&
!(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
};
if (a->srcColorBlendFactor != a->srcAlphaBlendFactor ||
@@ -2253,7 +2261,8 @@ has_color_buffer_write_enabled(const struct anv_graphics_pipeline *pipeline,
if (binding->index == UINT32_MAX)
continue;
if (blend && blend->pAttachments[binding->index].colorWriteMask != 0)
if (blend && binding->index < blend->attachmentCount &&
blend->pAttachments[binding->index].colorWriteMask != 0)
return true;
}

View File

@@ -283,14 +283,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
if (anv_cmd_buffer_needs_dynamic_state(cmd_buffer,
ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |
ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)) {
const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes;
bool dirty_color_blend =
cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;
bool dirty_primitive_topology =
cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
VkPolygonMode dynamic_raster_mode;
VkPrimitiveTopology primitive_topology =
cmd_buffer->state.gfx.dynamic.primitive_topology;
@@ -298,21 +290,20 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
primitive_topology);
if (dirty_color_blend || dirty_primitive_topology) {
uint32_t dwords[GENX(3DSTATE_WM_length)];
struct GENX(3DSTATE_WM) wm = {
GENX(3DSTATE_WM_header),
const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes;
uint32_t dwords[GENX(3DSTATE_WM_length)];
struct GENX(3DSTATE_WM) wm = {
GENX(3DSTATE_WM_header),
.ThreadDispatchEnable = pipeline->force_fragment_thread_dispatch ||
color_writes,
.MultisampleRasterizationMode =
genX(ms_rasterization_mode)(pipeline, dynamic_raster_mode),
};
GENX(3DSTATE_WM_pack)(NULL, dwords, &wm);
anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm);
}
.ThreadDispatchEnable = pipeline->force_fragment_thread_dispatch ||
color_writes,
.MultisampleRasterizationMode =
genX(ms_rasterization_mode)(pipeline,
dynamic_raster_mode),
};
GENX(3DSTATE_WM_pack)(NULL, dwords, &wm);
anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx7.wm);
}
if (anv_cmd_buffer_needs_dynamic_state(cmd_buffer,
@@ -329,14 +320,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
bool dirty_color_blend =
cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;
/* Blend states of each RT */
uint32_t surface_count = 0;
struct anv_pipeline_bind_map *map;
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
surface_count = map->surface_count;
}
uint32_t blend_dws[GENX(BLEND_STATE_length) +
MAX_RTS * GENX(BLEND_STATE_ENTRY_length)];
uint32_t *dws = blend_dws;
@@ -348,10 +331,9 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
bool dirty_logic_op =
cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP;
for (uint32_t i = 0; i < surface_count; i++) {
struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
bool write_disabled =
dirty_color_blend && (color_writes & (1u << binding->index)) == 0;
for (uint32_t i = 0; i < MAX_RTS; i++) {
bool write_disabled = dirty_color_blend &&
(color_writes & BITFIELD_BIT(i)) == 0;
struct GENX(BLEND_STATE_ENTRY) entry = {
.WriteDisableAlpha = write_disabled,
.WriteDisableRed = write_disabled,
@@ -365,7 +347,7 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
}
uint32_t num_dwords = GENX(BLEND_STATE_length) +
GENX(BLEND_STATE_ENTRY_length) * surface_count;
GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
struct anv_state blend_states =
anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,

View File

@@ -642,43 +642,32 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |
ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP)) {
const uint8_t color_writes = cmd_buffer->state.gfx.dynamic.color_writes;
/* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
* threads.
*/
bool dirty_color_blend =
cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;
uint32_t wm_dwords[GENX(3DSTATE_WM_length)];
struct GENX(3DSTATE_WM) wm = {
GENX(3DSTATE_WM_header),
if (dirty_color_blend) {
uint32_t dwords[MAX2(GENX(3DSTATE_WM_length),
GENX(3DSTATE_PS_BLEND_length))];
struct GENX(3DSTATE_WM) wm = {
GENX(3DSTATE_WM_header),
.ForceThreadDispatchEnable = (pipeline->force_fragment_thread_dispatch ||
!color_writes) ? ForceON : 0,
};
GENX(3DSTATE_WM_pack)(NULL, wm_dwords, &wm);
.ForceThreadDispatchEnable = (pipeline->force_fragment_thread_dispatch ||
!color_writes) ? ForceON : 0,
};
GENX(3DSTATE_WM_pack)(NULL, dwords, &wm);
anv_batch_emit_merge(&cmd_buffer->batch, wm_dwords, pipeline->gfx8.wm);
anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.wm);
/* 3DSTATE_PS_BLEND to be consistent with the rest of the
* BLEND_STATE_ENTRY.
*/
struct GENX(3DSTATE_PS_BLEND) ps_blend = {
GENX(3DSTATE_PS_BLEND_header),
.HasWriteableRT = color_writes != 0,
};
GENX(3DSTATE_PS_BLEND_pack)(NULL, dwords, &ps_blend);
anv_batch_emit_merge(&cmd_buffer->batch, dwords, pipeline->gfx8.ps_blend);
}
/* Blend states of each RT */
uint32_t surface_count = 0;
struct anv_pipeline_bind_map *map;
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
surface_count = map->surface_count;
}
/* 3DSTATE_PS_BLEND to be consistent with the rest of the
* BLEND_STATE_ENTRY.
*/
uint32_t ps_blend_dwords[GENX(3DSTATE_PS_BLEND_length)];
struct GENX(3DSTATE_PS_BLEND) ps_blend = {
GENX(3DSTATE_PS_BLEND_header),
.HasWriteableRT = color_writes != 0,
};
GENX(3DSTATE_PS_BLEND_pack)(NULL, ps_blend_dwords, &ps_blend);
anv_batch_emit_merge(&cmd_buffer->batch, ps_blend_dwords,
pipeline->gfx8.ps_blend);
uint32_t blend_dws[GENX(BLEND_STATE_length) +
MAX_RTS * GENX(BLEND_STATE_ENTRY_length)];
@@ -691,10 +680,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
bool dirty_logic_op =
cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP;
for (uint32_t i = 0; i < surface_count; i++) {
struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
bool write_disabled =
dirty_color_blend && (color_writes & (1u << binding->index)) == 0;
for (uint32_t i = 0; i < MAX_RTS; i++) {
bool write_disabled = (color_writes & BITFIELD_BIT(i)) == 0;
struct GENX(BLEND_STATE_ENTRY) entry = {
.WriteDisableAlpha = write_disabled,
.WriteDisableRed = write_disabled,
@@ -708,7 +695,7 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
}
uint32_t num_dwords = GENX(BLEND_STATE_length) +
GENX(BLEND_STATE_ENTRY_length) * surface_count;
GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
struct anv_state blend_states =
anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,