Files
third_party_mesa3d/src/intel/vulkan/genX_gfx_state.c
Tapani Pälli 524e8865ce iris/anv: move Wa_14018912822 as a drirc workaround
This should be toggled on only for applications that hit the issue.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9886
Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25424>
2023-10-02 08:26:14 +00:00

1757 lines
78 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
#include "common/intel_guardband.h"
#include "compiler/brw_prim.h"
const uint32_t genX(vk_to_intel_blend)[] = {
[VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO,
[VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE,
[VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR,
[VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR,
[VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA,
[VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA,
[VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR,
[VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA,
[VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE,
[VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR,
[VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA,
};
static const uint32_t genX(vk_to_intel_blend_op)[] = {
[VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD,
[VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT,
[VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT,
[VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN,
[VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX,
};
static void
genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer)
{
#if GFX_VERx10 >= 120
/* Wa_16013994831 - Disable preemption during streamout, enable back
* again if XFB not used by the current pipeline.
*
* Although this workaround applies to Gfx12+, we already disable object
* level preemption for another reason in genX_state.c so we can skip this
* for Gfx12.
*/
if (!intel_needs_workaround(cmd_buffer->device->info, 16013994831))
return;
if (cmd_buffer->state.gfx.pipeline->uses_xfb) {
genX(cmd_buffer_set_preemption)(cmd_buffer, false);
return;
}
if (!cmd_buffer->state.gfx.object_preemption)
genX(cmd_buffer_set_preemption)(cmd_buffer, true);
#endif
}
#if GFX_VER >= 12
static uint32_t
get_cps_state_offset(struct anv_device *device, bool cps_enabled,
const struct vk_fragment_shading_rate_state *fsr)
{
if (!cps_enabled)
return device->cps_states.offset;
uint32_t offset;
static const uint32_t size_index[] = {
[1] = 0,
[2] = 1,
[4] = 2,
};
#if GFX_VERx10 >= 125
offset =
1 + /* skip disabled */
fsr->combiner_ops[0] * 5 * 3 * 3 +
fsr->combiner_ops[1] * 3 * 3 +
size_index[fsr->fragment_size.width] * 3 +
size_index[fsr->fragment_size.height];
#else
offset =
1 + /* skip disabled */
size_index[fsr->fragment_size.width] * 3 +
size_index[fsr->fragment_size.height];
#endif
offset *= MAX_VIEWPORTS * GENX(CPS_STATE_length) * 4;
return device->cps_states.offset + offset;
}
#endif /* GFX_VER >= 12 */
UNUSED static bool
want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer,
const struct vk_depth_stencil_state *ds)
{
if (GFX_VER > 9)
return false;
assert(GFX_VER == 9);
/* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:
*
* Clearing this bit will force the STC cache to wait for pending
* retirement of pixels at the HZ-read stage and do the STC-test for
* Non-promoted, R-computed and Computed depth modes instead of
* postponing the STC-test to RCPFE.
*
* STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
* 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
*
* STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
*
* COMP_STC_EN = STC_TEST_EN &&
* 3DSTATE_PS_EXTRA::PixelShaderComputesStencil
*
* SW parses the pipeline states to generate the following logical
* signal indicating if PMA FIX can be enabled.
*
* STC_PMA_OPT =
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
* 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
* 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
* !(3DSTATE_WM::EDSC_Mode == 2) &&
* 3DSTATE_PS_EXTRA::PixelShaderValid &&
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
* 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
* (COMP_STC_EN || STC_WRITE_EN) &&
* ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
* 3DSTATE_WM::ForceKillPix == ON ||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
* (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
*/
/* These are always true:
* 3DSTATE_WM::ForceThreadDispatch != 1 &&
* !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
*/
/* We only enable the PMA fix if we know for certain that HiZ is enabled.
* If we don't know whether HiZ is enabled or not, we disable the PMA fix
* and there is no harm.
*
* (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
* 3DSTATE_DEPTH_BUFFER::HIZ Enable
*/
if (!cmd_buffer->state.hiz_enabled)
return false;
/* We can't possibly know if HiZ is enabled without the depth attachment */
ASSERTED const struct anv_image_view *d_iview =
cmd_buffer->state.gfx.depth_att.iview;
assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);
/* 3DSTATE_PS_EXTRA::PixelShaderValid */
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
return false;
/* !(3DSTATE_WM::EDSC_Mode == 2) */
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
if (wm_prog_data->early_fragment_tests)
return false;
/* We never use anv_pipeline for HiZ ops so this is trivially true:
* !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
* 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
* 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
* 3DSTATE_WM_HZ_OP::StencilBufferClear)
*/
/* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
* 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
*/
const bool stc_test_en = ds->stencil.test_enable;
/* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
*/
const bool stc_write_en = ds->stencil.write_enable;
/* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
/* COMP_STC_EN || STC_WRITE_EN */
if (!(comp_stc_en || stc_write_en))
return false;
/* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
* 3DSTATE_WM::ForceKillPix == ON ||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
* (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
*/
return pipeline->kill_pixel ||
wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
}
static void
genX(rasterization_mode)(VkPolygonMode raster_mode,
VkLineRasterizationModeEXT line_mode,
float line_width,
uint32_t *api_mode,
bool *msaa_rasterization_enable)
{
if (raster_mode == VK_POLYGON_MODE_LINE) {
/* Unfortunately, configuring our line rasterization hardware on gfx8
* and later is rather painful. Instead of giving us bits to tell the
* hardware what line mode to use like we had on gfx7, we now have an
* arcane combination of API Mode and MSAA enable bits which do things
* in a table which are expected to magically put the hardware into the
* right mode for your API. Sadly, Vulkan isn't any of the APIs the
* hardware people thought of so nothing works the way you want it to.
*
* Look at the table titled "Multisample Rasterization Modes" in Vol 7
* of the Skylake PRM for more details.
*/
switch (line_mode) {
case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
*api_mode = DX101;
#if GFX_VER <= 9
/* Prior to ICL, the algorithm the HW uses to draw wide lines
* doesn't quite match what the CTS expects, at least for rectangular
* lines, so we set this to false here, making it draw parallelograms
* instead, which work well enough.
*/
*msaa_rasterization_enable = line_width < 1.0078125;
#else
*msaa_rasterization_enable = true;
#endif
break;
case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
*api_mode = DX9OGL;
*msaa_rasterization_enable = false;
break;
default:
unreachable("Unsupported line rasterization mode");
}
} else {
*api_mode = DX101;
*msaa_rasterization_enable = true;
}
}
/**
* This function takes the vulkan runtime values & dirty states and updates
* the values in anv_gfx_dynamic_state, flagging HW instructions for
* reemission if the values are changing.
*
* Nothing is emitted in the batch buffer.
*/
void
genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
{
UNUSED struct anv_device *device = cmd_buffer->device;
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
const struct anv_graphics_pipeline *pipeline = gfx->pipeline;
const struct vk_dynamic_graphics_state *dyn =
&cmd_buffer->vk.dynamic_graphics_state;
struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
struct anv_instance *instance = cmd_buffer->device->physical->instance;
#define GET(field) hw_state->field
#define SET(bit, field, value) \
do { \
__typeof(hw_state->field) __v = value; \
if (hw_state->field != __v) { \
hw_state->field = __v; \
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
} \
} while (0)
#define SET_STAGE(bit, field, value, stage) \
do { \
__typeof(hw_state->field) __v = value; \
if (!anv_pipeline_has_stage(pipeline, \
MESA_SHADER_##stage)) { \
hw_state->field = __v; \
break; \
} \
if (hw_state->field != __v) { \
hw_state->field = __v; \
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
} \
} while (0)
#define SETUP_PROVOKING_VERTEX(bit, cmd, mode) \
switch (mode) { \
case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT: \
SET(bit, cmd.TriangleStripListProvokingVertexSelect, 0); \
SET(bit, cmd.LineStripListProvokingVertexSelect, 0); \
SET(bit, cmd.TriangleFanProvokingVertexSelect, 1); \
break; \
case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT: \
SET(bit, cmd.TriangleStripListProvokingVertexSelect, 2); \
SET(bit, cmd.LineStripListProvokingVertexSelect, 1); \
SET(bit, cmd.TriangleFanProvokingVertexSelect, 2); \
break; \
default: \
unreachable("Invalid provoking vertex mode"); \
} \
if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_XFB_ENABLE |
ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE)) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX)) {
SET(STREAMOUT, so.RenderingDisable, dyn->rs.rasterizer_discard_enable);
SET(STREAMOUT, so.RenderStreamSelect, dyn->rs.rasterization_stream);
#if INTEL_NEEDS_WA_14017076903
/* Wa_14017076903 :
*
* SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage:
*
* SOL_INT::Render_Enable =
* (3DSTATE_STREAMOUT::Force_Rending == Force_On) ||
* (
* (3DSTATE_STREAMOUT::Force_Rending != Force_Off) &&
* !(3DSTATE_GS::Enable && 3DSTATE_GS::Output Vertex Size == 0) &&
* !3DSTATE_STREAMOUT::API_Render_Disable &&
* (
* 3DSTATE_DEPTH_STENCIL_STATE::Stencil_TestEnable ||
* 3DSTATE_DEPTH_STENCIL_STATE::Depth_TestEnable ||
* 3DSTATE_DEPTH_STENCIL_STATE::Depth_WriteEnable ||
* 3DSTATE_PS_EXTRA::PS_Valid ||
* 3DSTATE_WM::Legacy Depth_Buffer_Clear ||
* 3DSTATE_WM::Legacy Depth_Buffer_Resolve_Enable ||
* 3DSTATE_WM::Legacy Hierarchical_Depth_Buffer_Resolve_Enable
* )
* )
*
* If SOL_INT::Render_Enable is false, the SO stage will not forward any
* topologies down the pipeline. Which is not what we want for occlusion
* queries.
*
* Here we force rendering to get SOL_INT::Render_Enable when occlusion
* queries are active.
*/
if (!GET(so.RenderingDisable) && cmd_buffer->state.gfx.n_occlusion_queries > 0)
SET(STREAMOUT, so.ForceRendering, Force_on);
#endif
switch (dyn->rs.provoking_vertex) {
case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
SET(STREAMOUT, so.ReorderMode, LEADING);
SET_STAGE(GS, gs.ReorderMode, LEADING, GEOMETRY);
break;
case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
SET(STREAMOUT, so.ReorderMode, TRAILING);
SET_STAGE(GS, gs.ReorderMode, TRAILING, GEOMETRY);
break;
default:
unreachable("Invalid provoking vertex mode");
}
}
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY)) {
uint32_t topology;
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
topology = _3DPRIM_PATCHLIST(dyn->ts.patch_control_points);
else
topology = genX(vk_to_intel_primitive_type)[dyn->ia.primitive_topology];
cmd_buffer->state.gfx.primitive_topology = topology;
SET(VF_TOPOLOGY, vft.PrimitiveTopologyType, topology);
}
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDINGS_VALID) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDING_STRIDES))
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
#if GFX_VER >= 11
if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate &&
(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))) {
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
const bool cps_enable = wm_prog_data &&
brw_wm_prog_data_is_coarse(wm_prog_data, pipeline->fs_msaa_flags);
#if GFX_VER == 11
SET(CPS, cps.CoarsePixelShadingMode,
cps_enable ? CPS_MODE_CONSTANT : CPS_MODE_NONE);
SET(CPS, cps.MinCPSizeX, dyn->fsr.fragment_size.width);
SET(CPS, cps.MinCPSizeY, dyn->fsr.fragment_size.height);
#elif GFX_VER >= 12
SET(CPS, cps.CoarsePixelShadingStateArrayPointer,
get_cps_state_offset(device, cps_enable, &dyn->fsr));
#endif
}
#endif /* GFX_VER >= 11 */
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) {
const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline);
if (tes_prog_data && anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
if (dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) {
SET(TE, te.OutputTopology, tes_prog_data->output_topology);
} else {
/* When the origin is upper-left, we have to flip the winding order */
if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) {
SET(TE, te.OutputTopology, OUTPUT_TRI_CW);
} else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) {
SET(TE, te.OutputTopology, OUTPUT_TRI_CCW);
} else {
SET(TE, te.OutputTopology, tes_prog_data->output_topology);
}
}
} else {
SET(TE, te.OutputTopology, OUTPUT_POINT);
}
}
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH))
SET(SF, sf.LineWidth, dyn->rs.line.width);
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX)) {
SETUP_PROVOKING_VERTEX(SF, sf, dyn->rs.provoking_vertex);
SETUP_PROVOKING_VERTEX(CLIP, clip, dyn->rs.provoking_vertex);
}
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS)) {
/**
* From the Vulkan Spec:
*
* "VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT specifies that the depth
* bias representation is a factor of constant r equal to 1."
*
* From the SKL PRMs, Volume 7: 3D-Media-GPGPU, Depth Offset:
*
* "When UNORM Depth Buffer is at Output Merger (or no Depth Buffer):
*
* Bias = GlobalDepthOffsetConstant * r + GlobalDepthOffsetScale * MaxDepthSlope
*
* Where r is the minimum representable value > 0 in the depth
* buffer format, converted to float32 (note: If state bit Legacy
* Global Depth Bias Enable is set, the r term will be forced to
* 1.0)"
*
* When VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT is set, enable
* LegacyGlobalDepthBiasEnable.
*/
SET(SF, sf.LegacyGlobalDepthBiasEnable,
dyn->rs.depth_bias.representation ==
VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT);
}
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE))
SET(CLIP, clip.APIMode, dyn->vp.depth_clip_negative_one_to_one ? APIMODE_OGL : APIMODE_D3D);
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_POLYGON_MODE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_MODE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLIP_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CONSERVATIVE_MODE)) {
/* Take dynamic primitive topology in to account with
* 3DSTATE_RASTER::APIMode
* 3DSTATE_RASTER::DXMultisampleRasterizationEnable
* 3DSTATE_RASTER::AntialiasingEnable
*/
uint32_t api_mode = 0;
bool msaa_raster_enable = false;
const VkLineRasterizationModeEXT line_mode =
anv_line_rasterization_mode(dyn->rs.line.mode,
pipeline->rasterization_samples);
const VkPolygonMode dynamic_raster_mode =
genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
dyn->rs.polygon_mode,
dyn->ia.primitive_topology);
genX(rasterization_mode)(dynamic_raster_mode,
line_mode, dyn->rs.line.width,
&api_mode, &msaa_raster_enable);
/* From the Browadwell PRM, Volume 2, documentation for
* 3DSTATE_RASTER, "Antialiasing Enable":
*
* "This field must be disabled if any of the render targets
* have integer (UINT or SINT) surface format."
*
* Additionally internal documentation for Gfx12+ states:
*
* "This bit MUST not be set when NUM_MULTISAMPLES > 1 OR
* FORCED_SAMPLE_COUNT > 1."
*/
const bool aa_enable =
anv_rasterization_aa_mode(dynamic_raster_mode, line_mode) &&
!cmd_buffer->state.gfx.has_uint_rt &&
!(GFX_VER >= 12 && cmd_buffer->state.gfx.samples > 1);
const bool depth_clip_enable =
vk_rasterization_state_depth_clip_enable(&dyn->rs);
const bool xy_clip_test_enable =
(dynamic_raster_mode == VK_POLYGON_MODE_FILL);
SET(CLIP, clip.ViewportXYClipTestEnable, xy_clip_test_enable);
SET(RASTER, raster.APIMode, api_mode);
SET(RASTER, raster.DXMultisampleRasterizationEnable, msaa_raster_enable);
SET(RASTER, raster.AntialiasingEnable, aa_enable);
SET(RASTER, raster.CullMode, genX(vk_to_intel_cullmode)[dyn->rs.cull_mode]);
SET(RASTER, raster.FrontWinding, genX(vk_to_intel_front_face)[dyn->rs.front_face]);
SET(RASTER, raster.GlobalDepthOffsetEnableSolid, dyn->rs.depth_bias.enable);
SET(RASTER, raster.GlobalDepthOffsetEnableWireframe, dyn->rs.depth_bias.enable);
SET(RASTER, raster.GlobalDepthOffsetEnablePoint, dyn->rs.depth_bias.enable);
SET(RASTER, raster.GlobalDepthOffsetConstant, dyn->rs.depth_bias.constant);
SET(RASTER, raster.GlobalDepthOffsetScale, dyn->rs.depth_bias.slope);
SET(RASTER, raster.GlobalDepthOffsetClamp, dyn->rs.depth_bias.clamp);
SET(RASTER, raster.FrontFaceFillMode, genX(vk_to_intel_fillmode)[dyn->rs.polygon_mode]);
SET(RASTER, raster.BackFaceFillMode, genX(vk_to_intel_fillmode)[dyn->rs.polygon_mode]);
SET(RASTER, raster.ViewportZFarClipTestEnable, depth_clip_enable);
SET(RASTER, raster.ViewportZNearClipTestEnable, depth_clip_enable);
SET(RASTER, raster.ConservativeRasterizationEnable,
dyn->rs.conservative_mode !=
VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT);
}
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_MASK)) {
/* From the Vulkan 1.0 spec:
* If pSampleMask is NULL, it is treated as if the mask has all bits
* enabled, i.e. no coverage is removed from fragments.
*
* 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
*/
SET(SAMPLE_MASK, sm.SampleMask, dyn->ms.sample_mask & 0xffff);
}
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
#if GFX_VER == 9
/* For the PMA fix */
(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
#endif
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) {
VkImageAspectFlags ds_aspects = 0;
if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
struct vk_depth_stencil_state opt_ds = dyn->ds;
vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
SET(WM_DEPTH_STENCIL, ds.DoubleSidedStencilEnable, true);
SET(WM_DEPTH_STENCIL, ds.StencilTestMask,
opt_ds.stencil.front.compare_mask & 0xff);
SET(WM_DEPTH_STENCIL, ds.StencilWriteMask,
opt_ds.stencil.front.write_mask & 0xff);
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestMask, opt_ds.stencil.back.compare_mask & 0xff);
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilWriteMask, opt_ds.stencil.back.write_mask & 0xff);
SET(WM_DEPTH_STENCIL, ds.StencilReferenceValue,
opt_ds.stencil.front.reference & 0xff);
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilReferenceValue,
opt_ds.stencil.back.reference & 0xff);
SET(WM_DEPTH_STENCIL, ds.DepthTestEnable, opt_ds.depth.test_enable);
SET(WM_DEPTH_STENCIL, ds.DepthBufferWriteEnable, opt_ds.depth.write_enable);
SET(WM_DEPTH_STENCIL, ds.DepthTestFunction,
genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op]);
SET(WM_DEPTH_STENCIL, ds.StencilTestEnable, opt_ds.stencil.test_enable);
SET(WM_DEPTH_STENCIL, ds.StencilBufferWriteEnable, opt_ds.stencil.write_enable);
SET(WM_DEPTH_STENCIL, ds.StencilFailOp,
genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail]);
SET(WM_DEPTH_STENCIL, ds.StencilPassDepthPassOp,
genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass]);
SET(WM_DEPTH_STENCIL, ds.StencilPassDepthFailOp,
genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail]);
SET(WM_DEPTH_STENCIL, ds.StencilTestFunction,
genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare]);
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilFailOp,
genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail]);
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthPassOp,
genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass]);
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthFailOp,
genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail]);
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestFunction,
genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare]);
#if GFX_VER == 9
const bool pma = want_stencil_pma_fix(cmd_buffer, &opt_ds);
SET(PMA_FIX, pma_fix, pma);
#endif
#if INTEL_NEEDS_WA_18019816803
if (intel_needs_workaround(cmd_buffer->device->info, 18019816803)) {
bool ds_write_state = opt_ds.depth.write_enable || opt_ds.stencil.write_enable;
if (cmd_buffer->state.gfx.ds_write_state != ds_write_state) {
cmd_buffer->state.gfx.ds_write_state = ds_write_state;
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_WA_18019816803);
}
}
#endif
}
#if GFX_VER >= 12
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS)) {
SET(DEPTH_BOUNDS, db.DepthBoundsTestEnable, dyn->ds.depth.bounds_test.enable);
/* Only look at updating the bounds if testing is enabled */
if (dyn->ds.depth.bounds_test.enable) {
SET(DEPTH_BOUNDS, db.DepthBoundsTestMinValue, dyn->ds.depth.bounds_test.min);
SET(DEPTH_BOUNDS, db.DepthBoundsTestMaxValue, dyn->ds.depth.bounds_test.max);
}
}
#endif
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE_ENABLE)) {
SET(LINE_STIPPLE, ls.LineStipplePattern, dyn->rs.line.stipple.pattern);
SET(LINE_STIPPLE, ls.LineStippleInverseRepeatCount,
1.0f / MAX2(1, dyn->rs.line.stipple.factor));
SET(LINE_STIPPLE, ls.LineStippleRepeatCount, dyn->rs.line.stipple.factor);
SET(WM, wm.LineStippleEnable, dyn->rs.line.stipple.enable);
}
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RESTART_INDEX) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
SET(VF, vf.IndexedDrawCutIndexEnable, dyn->ia.primitive_restart_enable);
SET(VF, vf.CutIndex, cmd_buffer->state.gfx.restart_index);
}
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_INDEX_BUFFER)
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER);
#if GFX_VERx10 >= 125
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE))
SET(VFG, vfg.ListCutIndexEnable, dyn->ia.primitive_restart_enable);
#endif
if (cmd_buffer->device->vk.enabled_extensions.EXT_sample_locations &&
(BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE)))
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN);
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
/* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
* threads.
*/
bool force_thread_dispatch =
anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
(pipeline->force_fragment_thread_dispatch ||
anv_cmd_buffer_all_color_write_masked(cmd_buffer));
SET(WM, wm.ForceThreadDispatchEnable, force_thread_dispatch ? ForceON : 0);
}
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
(cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS)) {
const uint8_t color_writes = dyn->cb.color_write_enables;
const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx;
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
bool has_writeable_rt =
anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
(color_writes & ((1u << state->color_att_count) - 1)) != 0;
SET(BLEND_STATE, blend.AlphaToCoverageEnable,
dyn->ms.alpha_to_coverage_enable);
SET(BLEND_STATE, blend.AlphaToOneEnable,
dyn->ms.alpha_to_one_enable);
bool independent_alpha_blend = false;
/* Wa_14018912822, check if we set these during RT setup. */
bool color_blend_zero = false;
bool alpha_blend_zero = false;
for (uint32_t i = 0; i < MAX_RTS; i++) {
/* Disable anything above the current number of color attachments. */
bool write_disabled = i >= cmd_buffer->state.gfx.color_att_count ||
(color_writes & BITFIELD_BIT(i)) == 0;
SET(BLEND_STATE, blend.rts[i].WriteDisableAlpha,
write_disabled ||
(dyn->cb.attachments[i].write_mask &
VK_COLOR_COMPONENT_A_BIT) == 0);
SET(BLEND_STATE, blend.rts[i].WriteDisableRed,
write_disabled ||
(dyn->cb.attachments[i].write_mask &
VK_COLOR_COMPONENT_R_BIT) == 0);
SET(BLEND_STATE, blend.rts[i].WriteDisableGreen,
write_disabled ||
(dyn->cb.attachments[i].write_mask &
VK_COLOR_COMPONENT_G_BIT) == 0);
SET(BLEND_STATE, blend.rts[i].WriteDisableBlue,
write_disabled ||
(dyn->cb.attachments[i].write_mask &
VK_COLOR_COMPONENT_B_BIT) == 0);
/* Vulkan specification 1.2.168, VkLogicOp:
*
* "Logical operations are controlled by the logicOpEnable and
* logicOp members of VkPipelineColorBlendStateCreateInfo. If
* logicOpEnable is VK_TRUE, then a logical operation selected by
* logicOp is applied between each color attachment and the
* fragments corresponding output value, and blending of all
* attachments is treated as if it were disabled."
*
* From the Broadwell PRM Volume 2d: Command Reference: Structures:
* BLEND_STATE_ENTRY:
*
* "Enabling LogicOp and Color Buffer Blending at the same time is
* UNDEFINED"
*/
SET(BLEND_STATE, blend.rts[i].LogicOpFunction,
genX(vk_to_intel_logic_op)[dyn->cb.logic_op]);
SET(BLEND_STATE, blend.rts[i].LogicOpEnable, dyn->cb.logic_op_enable);
SET(BLEND_STATE, blend.rts[i].ColorClampRange, COLORCLAMP_RTFORMAT);
SET(BLEND_STATE, blend.rts[i].PreBlendColorClampEnable, true);
SET(BLEND_STATE, blend.rts[i].PostBlendColorClampEnable, true);
/* Setup blend equation. */
SET(BLEND_STATE, blend.rts[i].ColorBlendFunction,
genX(vk_to_intel_blend_op)[
dyn->cb.attachments[i].color_blend_op]);
SET(BLEND_STATE, blend.rts[i].AlphaBlendFunction,
genX(vk_to_intel_blend_op)[
dyn->cb.attachments[i].alpha_blend_op]);
if (dyn->cb.attachments[i].src_color_blend_factor !=
dyn->cb.attachments[i].src_alpha_blend_factor ||
dyn->cb.attachments[i].dst_color_blend_factor !=
dyn->cb.attachments[i].dst_alpha_blend_factor ||
dyn->cb.attachments[i].color_blend_op !=
dyn->cb.attachments[i].alpha_blend_op) {
independent_alpha_blend = true;
}
/* The Dual Source Blending documentation says:
*
* "If SRC1 is included in a src/dst blend factor and
* a DualSource RT Write message is not used, results
* are UNDEFINED. (This reflects the same restriction in DX APIs,
* where undefined results are produced if “o1” is not written
* by a PS there are no default values defined)."
*
* There is no way to gracefully fix this undefined situation
* so we just disable the blending to prevent possible issues.
*/
if (wm_prog_data && !wm_prog_data->dual_src_blend &&
anv_is_dual_src_blend_equation(&dyn->cb.attachments[i])) {
SET(BLEND_STATE, blend.rts[i].ColorBufferBlendEnable, false);
} else {
SET(BLEND_STATE, blend.rts[i].ColorBufferBlendEnable,
!dyn->cb.logic_op_enable &&
dyn->cb.attachments[i].blend_enable);
}
/* Our hardware applies the blend factor prior to the blend function
* regardless of what function is used. Technically, this means the
* hardware can do MORE than GL or Vulkan specify. However, it also
* means that, for MIN and MAX, we have to stomp the blend factor to
* ONE to make it a no-op.
*/
uint32_t SourceBlendFactor;
uint32_t DestinationBlendFactor;
uint32_t SourceAlphaBlendFactor;
uint32_t DestinationAlphaBlendFactor;
if (dyn->cb.attachments[i].color_blend_op == VK_BLEND_OP_MIN ||
dyn->cb.attachments[i].color_blend_op == VK_BLEND_OP_MAX) {
SourceBlendFactor = BLENDFACTOR_ONE;
DestinationBlendFactor = BLENDFACTOR_ONE;
} else {
SourceBlendFactor = genX(vk_to_intel_blend)[
dyn->cb.attachments[i].src_color_blend_factor];
DestinationBlendFactor = genX(vk_to_intel_blend)[
dyn->cb.attachments[i].dst_color_blend_factor];
}
if (dyn->cb.attachments[i].alpha_blend_op == VK_BLEND_OP_MIN ||
dyn->cb.attachments[i].alpha_blend_op == VK_BLEND_OP_MAX) {
SourceAlphaBlendFactor = BLENDFACTOR_ONE;
DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
} else {
SourceAlphaBlendFactor = genX(vk_to_intel_blend)[
dyn->cb.attachments[i].src_alpha_blend_factor];
DestinationAlphaBlendFactor = genX(vk_to_intel_blend)[
dyn->cb.attachments[i].dst_alpha_blend_factor];
}
if (instance->intel_enable_wa_14018912822 &&
intel_needs_workaround(cmd_buffer->device->info, 14018912822) &&
pipeline->rasterization_samples > 1) {
if (DestinationBlendFactor == BLENDFACTOR_ZERO) {
DestinationBlendFactor = BLENDFACTOR_CONST_COLOR;
color_blend_zero = true;
}
if (DestinationAlphaBlendFactor == BLENDFACTOR_ZERO) {
DestinationAlphaBlendFactor = BLENDFACTOR_CONST_ALPHA;
alpha_blend_zero = true;
}
}
SET(BLEND_STATE, blend.rts[i].SourceBlendFactor, SourceBlendFactor);
SET(BLEND_STATE, blend.rts[i].DestinationBlendFactor, DestinationBlendFactor);
SET(BLEND_STATE, blend.rts[i].SourceAlphaBlendFactor, SourceAlphaBlendFactor);
SET(BLEND_STATE, blend.rts[i].DestinationAlphaBlendFactor, DestinationAlphaBlendFactor);
}
cmd_buffer->state.gfx.color_blend_zero = color_blend_zero;
cmd_buffer->state.gfx.alpha_blend_zero = alpha_blend_zero;
SET(BLEND_STATE, blend.IndependentAlphaBlendEnable, independent_alpha_blend);
/* 3DSTATE_PS_BLEND to be consistent with the rest of the
* BLEND_STATE_ENTRY.
*/
SET(PS_BLEND, ps_blend.HasWriteableRT, has_writeable_rt);
SET(PS_BLEND, ps_blend.ColorBufferBlendEnable, GET(blend.rts[0].ColorBufferBlendEnable));
SET(PS_BLEND, ps_blend.SourceAlphaBlendFactor, GET(blend.rts[0].SourceAlphaBlendFactor));
SET(PS_BLEND, ps_blend.DestinationAlphaBlendFactor, cmd_buffer->state.gfx.alpha_blend_zero ?
BLENDFACTOR_CONST_COLOR :
GET(blend.rts[0].DestinationAlphaBlendFactor));
SET(PS_BLEND, ps_blend.SourceBlendFactor, GET(blend.rts[0].SourceBlendFactor));
SET(PS_BLEND, ps_blend.DestinationBlendFactor, cmd_buffer->state.gfx.color_blend_zero ?
BLENDFACTOR_CONST_COLOR :
GET(blend.rts[0].DestinationBlendFactor));
SET(PS_BLEND, ps_blend.AlphaTestEnable, false);
SET(PS_BLEND, ps_blend.IndependentAlphaBlendEnable, GET(blend.IndependentAlphaBlendEnable));
SET(PS_BLEND, ps_blend.AlphaToCoverageEnable, dyn->ms.alpha_to_coverage_enable);
}
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
SET(CC_STATE, cc.BlendConstantColorRed,
cmd_buffer->state.gfx.color_blend_zero ? 0.0f :
dyn->cb.blend_constants[0]);
SET(CC_STATE, cc.BlendConstantColorGreen,
cmd_buffer->state.gfx.color_blend_zero ?
0.0f : dyn->cb.blend_constants[1]);
SET(CC_STATE, cc.BlendConstantColorBlue,
cmd_buffer->state.gfx.color_blend_zero ?
0.0f : dyn->cb.blend_constants[2]);
SET(CC_STATE, cc.BlendConstantColorAlpha,
cmd_buffer->state.gfx.alpha_blend_zero ?
0.0f : dyn->cb.blend_constants[3]);
}
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE)) {
struct anv_instance *instance = cmd_buffer->device->physical->instance;
const VkViewport *viewports = dyn->vp.viewports;
const float scale = dyn->vp.depth_clip_negative_one_to_one ? 0.5f : 1.0f;
for (uint32_t i = 0; i < dyn->vp.viewport_count; i++) {
const VkViewport *vp = &viewports[i];
/* The gfx7 state struct has just the matrix and guardband fields, the
* gfx8 struct adds the min/max viewport fields. */
struct GENX(SF_CLIP_VIEWPORT) sfv = {
.ViewportMatrixElementm00 = vp->width / 2,
.ViewportMatrixElementm11 = vp->height / 2,
.ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) * scale,
.ViewportMatrixElementm30 = vp->x + vp->width / 2,
.ViewportMatrixElementm31 = vp->y + vp->height / 2,
.ViewportMatrixElementm32 = dyn->vp.depth_clip_negative_one_to_one ?
(vp->minDepth + vp->maxDepth) * scale : vp->minDepth,
.XMinClipGuardband = -1.0f,
.XMaxClipGuardband = 1.0f,
.YMinClipGuardband = -1.0f,
.YMaxClipGuardband = 1.0f,
.XMinViewPort = vp->x,
.XMaxViewPort = vp->x + vp->width - 1,
.YMinViewPort = MIN2(vp->y, vp->y + vp->height),
.YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1,
};
/* Fix depth test misrenderings by lowering translated depth range */
if (instance->lower_depth_range_rate != 1.0f)
sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate;
const uint32_t fb_size_max = 1 << 14;
uint32_t x_min = 0, x_max = fb_size_max;
uint32_t y_min = 0, y_max = fb_size_max;
/* If we have a valid renderArea, include that */
if (gfx->render_area.extent.width > 0 &&
gfx->render_area.extent.height > 0) {
x_min = MAX2(x_min, gfx->render_area.offset.x);
x_max = MIN2(x_max, gfx->render_area.offset.x +
gfx->render_area.extent.width);
y_min = MAX2(y_min, gfx->render_area.offset.y);
y_max = MIN2(y_max, gfx->render_area.offset.y +
gfx->render_area.extent.height);
}
/* The client is required to have enough scissors for whatever it
* sets as ViewportIndex but it's possible that they've got more
* viewports set from a previous command. Also, from the Vulkan
* 1.3.207:
*
* "The application must ensure (using scissor if necessary) that
* all rendering is contained within the render area."
*
* If the client doesn't set a scissor, that basically means it
* guarantees everything is in-bounds already. If we end up using a
* guardband of [-1, 1] in that case, there shouldn't be much loss.
* It's theoretically possible that they could do all their clipping
* with clip planes but that'd be a bit odd.
*/
if (i < dyn->vp.scissor_count) {
const VkRect2D *scissor = &dyn->vp.scissors[i];
x_min = MAX2(x_min, scissor->offset.x);
x_max = MIN2(x_max, scissor->offset.x + scissor->extent.width);
y_min = MAX2(y_min, scissor->offset.y);
y_max = MIN2(y_max, scissor->offset.y + scissor->extent.height);
}
/* Only bother calculating the guardband if our known render area is
* less than the maximum size. Otherwise, it will calculate [-1, 1]
* anyway but possibly with precision loss.
*/
if (x_min > 0 || x_max < fb_size_max ||
y_min > 0 || y_max < fb_size_max) {
intel_calculate_guardband_size(x_min, x_max, y_min, y_max,
sfv.ViewportMatrixElementm00,
sfv.ViewportMatrixElementm11,
sfv.ViewportMatrixElementm30,
sfv.ViewportMatrixElementm31,
&sfv.XMinClipGuardband,
&sfv.XMaxClipGuardband,
&sfv.YMinClipGuardband,
&sfv.YMaxClipGuardband);
}
#define SET_VP(bit, state, field) \
do { \
if (hw_state->state.field != sfv.field) { \
hw_state->state.field = sfv.field; \
BITSET_SET(hw_state->dirty, \
ANV_GFX_STATE_##bit); \
} \
} while (0)
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm00);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm11);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm22);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm30);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm31);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm32);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMinClipGuardband);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMaxClipGuardband);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMinClipGuardband);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMaxClipGuardband);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMinViewPort);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMaxViewPort);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMinViewPort);
SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMaxViewPort);
#undef SET_VP
float min_depth = dyn->rs.depth_clamp_enable ?
MIN2(vp->minDepth, vp->maxDepth) :
0.0f;
float max_depth = dyn->rs.depth_clamp_enable ?
MAX2(vp->minDepth, vp->maxDepth) :
1.0f;
SET(VIEWPORT_CC, vp_cc.elem[i].MinimumDepth, min_depth);
SET(VIEWPORT_CC, vp_cc.elem[i].MaximumDepth, max_depth);
SET(CLIP, clip.MaximumVPIndex, dyn->vp.viewport_count > 0 ?
dyn->vp.viewport_count - 1 : 0);
}
/* If the HW state is already considered dirty or the previous
* programmed viewport count is smaller than what we need, update the
* viewport count and ensure the HW state is dirty. Otherwise if the
* number of viewport programmed previously was larger than what we need
* now, no need to reemit we can just keep the old programmed values.
*/
if (BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP) ||
hw_state->vp_sf_clip.count < dyn->vp.viewport_count) {
hw_state->vp_sf_clip.count = dyn->vp.viewport_count;
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP);
}
if (BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
hw_state->vp_cc.count < dyn->vp.viewport_count) {
hw_state->vp_cc.count = dyn->vp.viewport_count;
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC);
}
}
if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS)) {
const VkRect2D *scissors = dyn->vp.scissors;
const VkViewport *viewports = dyn->vp.viewports;
for (uint32_t i = 0; i < dyn->vp.scissor_count; i++) {
const VkRect2D *s = &scissors[i];
const VkViewport *vp = &viewports[i];
const int max = 0xffff;
uint32_t y_min = MAX2(s->offset.y, MIN2(vp->y, vp->y + vp->height));
uint32_t x_min = MAX2(s->offset.x, vp->x);
int64_t y_max = MIN2(s->offset.y + s->extent.height - 1,
MAX2(vp->y, vp->y + vp->height) - 1);
int64_t x_max = MIN2(s->offset.x + s->extent.width - 1,
vp->x + vp->width - 1);
y_max = CLAMP(y_max, 0, INT16_MAX >> 1);
x_max = CLAMP(x_max, 0, INT16_MAX >> 1);
/* Do this math using int64_t so overflow gets clamped correctly. */
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
y_min = CLAMP((uint64_t) y_min, gfx->render_area.offset.y, max);
x_min = CLAMP((uint64_t) x_min, gfx->render_area.offset.x, max);
y_max = CLAMP((uint64_t) y_max, 0,
gfx->render_area.offset.y +
gfx->render_area.extent.height - 1);
x_max = CLAMP((uint64_t) x_max, 0,
gfx->render_area.offset.x +
gfx->render_area.extent.width - 1);
}
if (s->extent.width <= 0 || s->extent.height <= 0) {
/* Since xmax and ymax are inclusive, we have to have xmax < xmin
* or ymax < ymin for empty clips. In case clip x, y, width height
* are all 0, the clamps below produce 0 for xmin, ymin, xmax,
* ymax, which isn't what we want. Just special case empty clips
* and produce a canonical empty clip.
*/
SET(SCISSOR, scissor.elem[i].ScissorRectangleYMin, 1);
SET(SCISSOR, scissor.elem[i].ScissorRectangleXMin, 1);
SET(SCISSOR, scissor.elem[i].ScissorRectangleYMax, 0);
SET(SCISSOR, scissor.elem[i].ScissorRectangleXMax, 0);
} else {
SET(SCISSOR, scissor.elem[i].ScissorRectangleYMin, y_min);
SET(SCISSOR, scissor.elem[i].ScissorRectangleXMin, x_min);
SET(SCISSOR, scissor.elem[i].ScissorRectangleYMax, y_max);
SET(SCISSOR, scissor.elem[i].ScissorRectangleXMax, x_max);
}
}
/* If the HW state is already considered dirty or the previous
* programmed viewport count is smaller than what we need, update the
* viewport count and ensure the HW state is dirty. Otherwise if the
* number of viewport programmed previously was larger than what we need
* now, no need to reemit we can just keep the old programmed values.
*/
if (BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SCISSOR) ||
hw_state->scissor.count < dyn->vp.scissor_count) {
hw_state->scissor.count = dyn->vp.scissor_count;
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SCISSOR);
}
}
#undef GET
#undef SET
#undef SET_STAGE
vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
}
/**
* This function emits the dirty instructions in the batch buffer.
*/
void
genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_device *device = cmd_buffer->device;
struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
struct anv_graphics_pipeline *pipeline = gfx->pipeline;
const struct vk_dynamic_graphics_state *dyn =
&cmd_buffer->vk.dynamic_graphics_state;
struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
/* Since Wa_16011773973 will disable 3DSTATE_STREAMOUT, we need to reemit
* it after.
*/
if (intel_needs_workaround(device->info, 16011773973) &&
pipeline->uses_xfb &&
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) {
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.urb);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.ms);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.primitive_replication);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_INSTANCING))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs_instancing);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs);
#if GFX_VER >= 11
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs_2);
#endif
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VS))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vs);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_HS))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.hs);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DS))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.ds);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_statistics);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_SWIZ))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe_swiz);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) {
/* Wa_16011773973:
* If SOL is enabled and SO_DECL state has to be programmed,
* 1. Send 3D State SOL state with SOL disabled
* 2. Send SO_DECL NP state
* 3. Send 3D State SOL with SOL Enabled
*/
if (intel_needs_workaround(device->info, 16011773973) &&
pipeline->uses_xfb)
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), so);
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline,
final.so_decl_list);
#if GFX_VER >= 11
/* ICL PRMs, Volume 2a - Command Reference: Instructions,
* 3DSTATE_SO_DECL_LIST:
*
* "Workaround: This command must be followed by a PIPE_CONTROL with
* CS Stall bit set."
*
* On DG2+ also known as Wa_1509820217.
*/
genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
ANV_PIPE_CS_STALL_BIT);
#endif
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.ps);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.ps_extra);
if (device->vk.enabled_extensions.EXT_mesh_shader) {
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_control);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_shader);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_distrib);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_control);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_shader);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_redistrib);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe_mesh);
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH))
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.clip_mesh);
} else {
assert(!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL) &&
!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER) &&
!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB) &&
!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL) &&
!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER) &&
!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB) &&
!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH) &&
!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH));
}
#define INIT(category, name) \
.name = hw_state->category.name
#define SET(s, category, name) \
s.name = hw_state->category.name
/* Now the potentially dynamic instructions */
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_CLIP),
pipeline, partial.clip, clip) {
SET(clip, clip, APIMode);
SET(clip, clip, ViewportXYClipTestEnable);
SET(clip, clip, TriangleStripListProvokingVertexSelect);
SET(clip, clip, LineStripListProvokingVertexSelect);
SET(clip, clip, TriangleFanProvokingVertexSelect);
SET(clip, clip, MaximumVPIndex);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_STREAMOUT)) {
genX(streamout_prologue)(cmd_buffer);
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT),
pipeline, partial.so, so) {
SET(so, so, RenderingDisable);
SET(so, so, RenderStreamSelect);
SET(so, so, ReorderMode);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP)) {
struct anv_state sf_clip_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
hw_state->vp_sf_clip.count * 64, 64);
for (uint32_t i = 0; i < hw_state->vp_sf_clip.count; i++) {
struct GENX(SF_CLIP_VIEWPORT) sfv = {
INIT(vp_sf_clip.elem[i], ViewportMatrixElementm00),
INIT(vp_sf_clip.elem[i], ViewportMatrixElementm11),
INIT(vp_sf_clip.elem[i], ViewportMatrixElementm22),
INIT(vp_sf_clip.elem[i], ViewportMatrixElementm30),
INIT(vp_sf_clip.elem[i], ViewportMatrixElementm31),
INIT(vp_sf_clip.elem[i], ViewportMatrixElementm32),
INIT(vp_sf_clip.elem[i], XMinClipGuardband),
INIT(vp_sf_clip.elem[i], XMaxClipGuardband),
INIT(vp_sf_clip.elem[i], YMinClipGuardband),
INIT(vp_sf_clip.elem[i], YMaxClipGuardband),
INIT(vp_sf_clip.elem[i], XMinViewPort),
INIT(vp_sf_clip.elem[i], XMaxViewPort),
INIT(vp_sf_clip.elem[i], YMinViewPort),
INIT(vp_sf_clip.elem[i], YMaxViewPort),
};
GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv);
}
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
clip.SFClipViewportPointer = sf_clip_state.offset;
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC)) {
struct anv_state cc_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
hw_state->vp_cc.count * 8, 32);
for (uint32_t i = 0; i < hw_state->vp_cc.count; i++) {
struct GENX(CC_VIEWPORT) cc_viewport = {
INIT(vp_cc.elem[i], MinimumDepth),
INIT(vp_cc.elem[i], MaximumDepth),
};
GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
}
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
cc.CCViewportPointer = cc_state.offset;
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SCISSOR)) {
/* Wa_1409725701:
*
* "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
* stored as an array of up to 16 elements. The location of first
* element of the array, as specified by Pointer to SCISSOR_RECT,
* should be aligned to a 64-byte boundary.
*/
struct anv_state scissor_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
hw_state->scissor.count * 8, 64);
for (uint32_t i = 0; i < hw_state->scissor.count; i++) {
struct GENX(SCISSOR_RECT) scissor = {
INIT(scissor.elem[i], ScissorRectangleYMin),
INIT(scissor.elem[i], ScissorRectangleXMin),
INIT(scissor.elem[i], ScissorRectangleYMax),
INIT(scissor.elem[i], ScissorRectangleXMax),
};
GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, &scissor);
}
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_SCISSOR_STATE_POINTERS), ssp) {
ssp.ScissorRectPointer = scissor_state.offset;
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
SET(vft, vft, PrimitiveTopologyType);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT)) {
const uint32_t ve_count =
pipeline->vs_input_elements + pipeline->svgs_count;
const uint32_t num_dwords = 1 + 2 * MAX2(1, ve_count);
uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
GENX(3DSTATE_VERTEX_ELEMENTS));
if (p) {
if (ve_count == 0) {
memcpy(p + 1, cmd_buffer->device->empty_vs_input,
sizeof(cmd_buffer->device->empty_vs_input));
} else if (ve_count == pipeline->vertex_input_elems) {
/* MESA_VK_DYNAMIC_VI is not dynamic for this pipeline, so
* everything is in pipeline->vertex_input_data and we can just
* memcpy
*/
memcpy(p + 1, pipeline->vertex_input_data, 4 * 2 * ve_count);
anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline,
final.vf_instancing);
} else {
assert(pipeline->final.vf_instancing.len == 0);
/* Use dyn->vi to emit the dynamic VERTEX_ELEMENT_STATE input. */
genX(emit_vertex_input)(&cmd_buffer->batch, p + 1,
pipeline, dyn->vi, false /* emit_in_pipeline */);
/* Then append the VERTEX_ELEMENT_STATE for the draw parameters */
memcpy(p + 1 + 2 * pipeline->vs_input_elements,
pipeline->vertex_input_data,
4 * 2 * pipeline->vertex_input_elems);
}
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TE)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_TE),
pipeline, partial.te, te) {
SET(te, te, OutputTopology);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_GS)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_GS),
pipeline, partial.gs, gs) {
SET(gs, gs, ReorderMode);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CPS)) {
#if GFX_VER == 11
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CPS), cps) {
SET(cps, cps, CoarsePixelShadingMode);
SET(cps, cps, MinCPSizeX);
SET(cps, cps, MinCPSizeY);
}
#elif GFX_VER >= 12
/* TODO: we can optimize this flush in the following cases:
*
* In the case where the last geometry shader emits a value that is
* not constant, we can avoid this stall because we can synchronize
* the pixel shader internally with
* 3DSTATE_PS::EnablePSDependencyOnCPsizeChange.
*
* If we know that the previous pipeline and the current one are
* using the same fragment shading rate.
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
#if GFX_VERx10 >= 125
pc.PSSStallSyncEnable = true;
#else
pc.PSDSyncEnable = true;
#endif
}
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CPS_POINTERS), cps) {
SET(cps, cps, CoarsePixelShadingStateArrayPointer);
}
#endif
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SF)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_SF),
pipeline, partial.sf, sf) {
SET(sf, sf, LineWidth);
SET(sf, sf, TriangleStripListProvokingVertexSelect);
SET(sf, sf, LineStripListProvokingVertexSelect);
SET(sf, sf, TriangleFanProvokingVertexSelect);
SET(sf, sf, LegacyGlobalDepthBiasEnable);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_RASTER)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_RASTER),
pipeline, partial.raster, raster) {
SET(raster, raster, APIMode);
SET(raster, raster, DXMultisampleRasterizationEnable);
SET(raster, raster, AntialiasingEnable);
SET(raster, raster, CullMode);
SET(raster, raster, FrontWinding);
SET(raster, raster, GlobalDepthOffsetEnableSolid);
SET(raster, raster, GlobalDepthOffsetEnableWireframe);
SET(raster, raster, GlobalDepthOffsetEnablePoint);
SET(raster, raster, GlobalDepthOffsetConstant);
SET(raster, raster, GlobalDepthOffsetScale);
SET(raster, raster, GlobalDepthOffsetClamp);
SET(raster, raster, FrontFaceFillMode);
SET(raster, raster, BackFaceFillMode);
SET(raster, raster, ViewportZFarClipTestEnable);
SET(raster, raster, ViewportZNearClipTestEnable);
SET(raster, raster, ConservativeRasterizationEnable);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE)) {
struct anv_state cc_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
GENX(COLOR_CALC_STATE_length) * 4,
64);
struct GENX(COLOR_CALC_STATE) cc = {
INIT(cc, BlendConstantColorRed),
INIT(cc, BlendConstantColorGreen),
INIT(cc, BlendConstantColorBlue),
INIT(cc, BlendConstantColorAlpha),
};
GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
ccp.ColorCalcStatePointer = cc_state.offset;
ccp.ColorCalcStatePointerValid = true;
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_MASK)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
SET(sm, sm, SampleMask);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM_DEPTH_STENCIL)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
SET(ds, ds, DoubleSidedStencilEnable);
SET(ds, ds, StencilTestMask);
SET(ds, ds, StencilWriteMask);
SET(ds, ds, BackfaceStencilTestMask);
SET(ds, ds, BackfaceStencilWriteMask);
SET(ds, ds, StencilReferenceValue);
SET(ds, ds, BackfaceStencilReferenceValue);
SET(ds, ds, DepthTestEnable);
SET(ds, ds, DepthBufferWriteEnable);
SET(ds, ds, DepthTestFunction);
SET(ds, ds, StencilTestEnable);
SET(ds, ds, StencilBufferWriteEnable);
SET(ds, ds, StencilFailOp);
SET(ds, ds, StencilPassDepthPassOp);
SET(ds, ds, StencilPassDepthFailOp);
SET(ds, ds, StencilTestFunction);
SET(ds, ds, BackfaceStencilFailOp);
SET(ds, ds, BackfaceStencilPassDepthPassOp);
SET(ds, ds, BackfaceStencilPassDepthFailOp);
SET(ds, ds, BackfaceStencilTestFunction);
}
}
#if GFX_VER >= 12
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DEPTH_BOUNDS)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
SET(db, db, DepthBoundsTestEnable);
SET(db, db, DepthBoundsTestMinValue);
SET(db, db, DepthBoundsTestMaxValue);
}
}
#endif
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_LINE_STIPPLE)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
SET(ls, ls, LineStipplePattern);
SET(ls, ls, LineStippleInverseRepeatCount);
SET(ls, ls, LineStippleRepeatCount);
}
#if GFX_VER >= 11
/* ICL PRMs, Volume 2a - Command Reference: Instructions,
* 3DSTATE_LINE_STIPPLE:
*
* "Workaround: This command must be followed by a PIPE_CONTROL with
* CS Stall bit set."
*/
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT);
#endif
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
#if GFX_VERx10 >= 125
vf.GeometryDistributionEnable = true;
#endif
SET(vf, vf, IndexedDrawCutIndexEnable);
SET(vf, vf, CutIndex);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER)) {
struct anv_buffer *buffer = cmd_buffer->state.gfx.index_buffer;
uint32_t offset = cmd_buffer->state.gfx.index_offset;
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
ib.IndexFormat = cmd_buffer->state.gfx.index_type;
ib.MOCS = anv_mocs(cmd_buffer->device,
buffer->address.bo,
ISL_SURF_USAGE_INDEX_BUFFER_BIT);
#if GFX_VER >= 12
ib.L3BypassDisable = true;
#endif
ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
ib.BufferSize = vk_buffer_range(&buffer->vk, offset,
VK_WHOLE_SIZE);
}
}
#if GFX_VERx10 >= 125
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VFG)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_VFG),
pipeline, partial.vfg, vfg) {
SET(vfg, vfg, ListCutIndexEnable);
}
}
#endif
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN)) {
genX(emit_sample_pattern)(&cmd_buffer->batch,
dyn->ms.sample_locations_enable ?
dyn->ms.sample_locations : NULL);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM)) {
anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_WM),
pipeline, partial.wm, wm) {
SET(wm, wm, ForceThreadDispatchEnable);
SET(wm, wm, LineStippleEnable);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_BLEND)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PS_BLEND), blend) {
SET(blend, ps_blend, HasWriteableRT);
SET(blend, ps_blend, ColorBufferBlendEnable);
SET(blend, ps_blend, SourceAlphaBlendFactor);
SET(blend, ps_blend, DestinationAlphaBlendFactor);
SET(blend, ps_blend, SourceBlendFactor);
SET(blend, ps_blend, DestinationBlendFactor);
SET(blend, ps_blend, AlphaTestEnable);
SET(blend, ps_blend, IndependentAlphaBlendEnable);
SET(blend, ps_blend, AlphaToCoverageEnable);
}
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE)) {
const uint32_t num_dwords = GENX(BLEND_STATE_length) +
GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
struct anv_state blend_states =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
num_dwords * 4,
64);
uint32_t *dws = blend_states.map;
struct GENX(BLEND_STATE) blend_state = {
INIT(blend, AlphaToCoverageEnable),
INIT(blend, AlphaToOneEnable),
INIT(blend, IndependentAlphaBlendEnable),
};
GENX(BLEND_STATE_pack)(NULL, blend_states.map, &blend_state);
/* Jump to blend entries. */
dws += GENX(BLEND_STATE_length);
for (uint32_t i = 0; i < MAX_RTS; i++) {
struct GENX(BLEND_STATE_ENTRY) entry = {
INIT(blend.rts[i], WriteDisableAlpha),
INIT(blend.rts[i], WriteDisableRed),
INIT(blend.rts[i], WriteDisableGreen),
INIT(blend.rts[i], WriteDisableBlue),
INIT(blend.rts[i], LogicOpFunction),
INIT(blend.rts[i], LogicOpEnable),
INIT(blend.rts[i], ColorBufferBlendEnable),
INIT(blend.rts[i], ColorClampRange),
INIT(blend.rts[i], PreBlendColorClampEnable),
INIT(blend.rts[i], PostBlendColorClampEnable),
INIT(blend.rts[i], SourceBlendFactor),
INIT(blend.rts[i], DestinationBlendFactor),
INIT(blend.rts[i], ColorBlendFunction),
INIT(blend.rts[i], SourceAlphaBlendFactor),
INIT(blend.rts[i], DestinationAlphaBlendFactor),
INIT(blend.rts[i], AlphaBlendFunction),
};
GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);
dws += GENX(BLEND_STATE_ENTRY_length);
}
cmd_buffer->state.gfx.blend_states = blend_states;
/* Dirty the pointers to reemit 3DSTATE_BLEND_STATE_POINTERS below */
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_POINTERS);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_POINTERS)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
bsp.BlendStatePointer = cmd_buffer->state.gfx.blend_states.offset;
bsp.BlendStatePointerValid = true;
}
}
#if INTEL_NEEDS_WA_18019816803
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_18019816803)) {
genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
ANV_PIPE_PSS_STALL_SYNC_BIT);
}
#endif
#if GFX_VER == 9
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PMA_FIX))
genX(cmd_buffer_enable_pma_fix)(cmd_buffer, hw_state->pma_fix);
#endif
#undef INIT
#undef SET
BITSET_ZERO(hw_state->dirty);
}
void
genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
{
if (cmd_buffer->state.pma_fix_enabled == enable)
return;
cmd_buffer->state.pma_fix_enabled = enable;
/* According to the Broadwell PIPE_CONTROL documentation, software should
* emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
* prior to the LRI. If stencil buffer writes are enabled, then a Render
* Cache Flush is also necessary.
*
* The Skylake docs say to use a depth stall rather than a command
* streamer stall. However, the hardware seems to violently disagree.
* A full command streamer stall seems to be needed in both cases.
*/
genx_batch_emit_pipe_control
(&cmd_buffer->batch, cmd_buffer->device->info,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
ANV_PIPE_CS_STALL_BIT |
#if GFX_VER >= 12
ANV_PIPE_TILE_CACHE_FLUSH_BIT |
#endif
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
#if GFX_VER == 9
uint32_t cache_mode;
anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),
.STCPMAOptimizationEnable = enable,
.STCPMAOptimizationEnableMask = true);
anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
lri.RegisterOffset = GENX(CACHE_MODE_0_num);
lri.DataDWord = cache_mode;
}
#endif /* GFX_VER == 9 */
/* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
* Flush bits is often necessary. We do it regardless because it's easier.
* The render cache flush is also necessary if stencil writes are enabled.
*
* Again, the Skylake docs give a different set of flushes but the BDW
* flushes seem to work just as well.
*/
genx_batch_emit_pipe_control
(&cmd_buffer->batch, cmd_buffer->device->info,
ANV_PIPE_DEPTH_STALL_BIT |
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
#if GFX_VER >= 12
ANV_PIPE_TILE_CACHE_FLUSH_BIT |
#endif
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
}