anv: optimize emission of dynamic state with blorp

There are a few structures located in the dynamic state heap that
blorp also emits. Instead of repacking them after a blorp operation,
just reemit the old dynamic state heap offset.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28368>
This commit is contained in:
Lionel Landwerlin
2024-03-25 15:44:33 +02:00
committed by Marge Bot
parent fe1baa6481
commit 65e2b37ade
4 changed files with 43 additions and 25 deletions

View File

@@ -1395,9 +1395,10 @@ enum anv_gfx_state_bits {
ANV_GFX_STATE_TASK_REDISTRIB,
/* Dynamic states */
ANV_GFX_STATE_BLEND_STATE, /* Just the dynamic state structure */
ANV_GFX_STATE_BLEND_STATE_POINTERS, /* The pointer to the dynamic state */
ANV_GFX_STATE_BLEND_STATE_PTR, /* The pointer to the dynamic state */
ANV_GFX_STATE_CLIP,
ANV_GFX_STATE_CC_STATE,
ANV_GFX_STATE_CC_STATE_PTR,
ANV_GFX_STATE_CPS,
ANV_GFX_STATE_DEPTH_BOUNDS,
ANV_GFX_STATE_INDEX_BUFFER,
@@ -1415,6 +1416,7 @@ enum anv_gfx_state_bits {
ANV_GFX_STATE_VF_TOPOLOGY,
ANV_GFX_STATE_VFG,
ANV_GFX_STATE_VIEWPORT_CC,
ANV_GFX_STATE_VIEWPORT_CC_PTR,
ANV_GFX_STATE_VIEWPORT_SF_CLIP,
ANV_GFX_STATE_WM,
ANV_GFX_STATE_WM_DEPTH_STENCIL,
@@ -1458,7 +1460,9 @@ struct anv_gfx_dynamic_state {
uint32_t DestinationAlphaBlendFactor;
uint32_t AlphaBlendFunction;
} rts[MAX_RTS];
} blend;
struct anv_state state;
} blend;
/* 3DSTATE_CC_STATE_POINTERS */
struct {
@@ -1466,6 +1470,8 @@ struct anv_gfx_dynamic_state {
float BlendConstantColorGreen;
float BlendConstantColorBlue;
float BlendConstantColorAlpha;
struct anv_state state;
} cc;
/* 3DSTATE_CLIP */
@@ -1607,6 +1613,8 @@ struct anv_gfx_dynamic_state {
float MinimumDepth;
float MaximumDepth;
} elem[MAX_VIEWPORTS];
struct anv_state state;
} vp_cc;
/* 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP */
@@ -3610,11 +3618,6 @@ struct anv_cmd_graphics_state {
struct vk_vertex_input_state vertex_input;
struct vk_sample_locations_state sample_locations;
/**
* The latest BLEND_STATE structure packed in dynamic state heap
*/
struct anv_state blend_states;
bool object_preemption;
bool has_uint_rt;

View File

@@ -123,9 +123,10 @@ anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state)
NAME(TASK_CONTROL);
NAME(TASK_SHADER);
NAME(TASK_REDISTRIB);
NAME(BLEND_STATE_POINTERS);
NAME(BLEND_STATE_PTR);
NAME(CLIP);
NAME(CC_STATE);
NAME(CC_STATE_PTR);
NAME(CPS);
NAME(DEPTH_BOUNDS);
NAME(INDEX_BUFFER);
@@ -143,6 +144,7 @@ anv_gfx_state_bit_to_str(enum anv_gfx_state_bits state)
NAME(VF_TOPOLOGY);
NAME(VFG);
NAME(VIEWPORT_CC);
NAME(VIEWPORT_CC_PTR);
NAME(VIEWPORT_SF_CLIP);
NAME(WM);
NAME(WM_DEPTH_STENCIL);

View File

@@ -387,7 +387,7 @@ blorp_exec_on_render(struct blorp_batch *batch,
#if GFX_VER >= 12
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
#endif
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_RASTER);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CLIP);
@@ -406,13 +406,13 @@ blorp_exec_on_render(struct blorp_batch *batch,
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_GS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_POINTERS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR);
if (batch->blorp->config.use_mesh_shading) {
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL);
}
if (params->wm_prog_data) {
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CC_STATE);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PS_BLEND);
}

View File

@@ -1607,7 +1607,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC)) {
struct anv_state cc_state =
hw_state->vp_cc.state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
hw_state->vp_cc.count * 8, 32);
@@ -1616,12 +1616,19 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
INIT(vp_cc.elem[i], MinimumDepth),
INIT(vp_cc.elem[i], MaximumDepth),
};
GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
GENX(CC_VIEWPORT_pack)(NULL, hw_state->vp_cc.state.map + i * 8,
&cc_viewport);
}
/* Dirty the pointers to reemit 3DSTATE_VIEWPORT_STATE_POINTERS_CC below
*/
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) {
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
cc.CCViewportPointer = cc_state.offset;
cc.CCViewportPointer = hw_state->vp_cc.state.offset;
}
cmd_buffer->state.gfx.viewport_set = true;
}
@@ -1772,7 +1779,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE)) {
struct anv_state cc_state =
hw_state->cc.state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
GENX(COLOR_CALC_STATE_length) * 4,
64);
@@ -1782,10 +1789,16 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
INIT(cc, BlendConstantColorBlue),
INIT(cc, BlendConstantColorAlpha),
};
GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
GENX(COLOR_CALC_STATE_pack)(NULL, hw_state->cc.state.map, &cc);
/* Dirty the pointers to reemit 3DSTATE_CC_STATE_POINTERS below
*/
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
ccp.ColorCalcStatePointer = cc_state.offset;
ccp.ColorCalcStatePointer = hw_state->cc.state.offset;
ccp.ColorCalcStatePointerValid = true;
}
}
@@ -1919,19 +1932,19 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE)) {
const uint32_t num_dwords = GENX(BLEND_STATE_length) +
GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
struct anv_state blend_states =
hw_state->blend.state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
num_dwords * 4,
64);
uint32_t *dws = blend_states.map;
uint32_t *dws = hw_state->blend.state.map;
struct GENX(BLEND_STATE) blend_state = {
INIT(blend, AlphaToCoverageEnable),
INIT(blend, AlphaToOneEnable),
INIT(blend, IndependentAlphaBlendEnable),
};
GENX(BLEND_STATE_pack)(NULL, blend_states.map, &blend_state);
GENX(BLEND_STATE_pack)(NULL, dws, &blend_state);
/* Jump to blend entries. */
dws += GENX(BLEND_STATE_length);
@@ -1959,14 +1972,13 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
dws += GENX(BLEND_STATE_ENTRY_length);
}
gfx->blend_states = blend_states;
/* Dirty the pointers to reemit 3DSTATE_BLEND_STATE_POINTERS below */
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_POINTERS);
BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR);
}
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_POINTERS)) {
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
bsp.BlendStatePointer = gfx->blend_states.offset;
bsp.BlendStatePointer = hw_state->blend.state.offset;
bsp.BlendStatePointerValid = true;
}
}
@@ -2047,7 +2059,8 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
/* Wa_18020335297 - Apply the WA when viewport ptr is reprogrammed. */
if (intel_needs_workaround(device->info, 18020335297) &&
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) &&
(BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) &&
cmd_buffer->state.gfx.viewport_set) {
/* For mesh, we implement the WA using CS stall. This is for
* simplicity and takes care of possible interaction with Wa_16014390852.