anv: refactor to fix pipe control debugging

While earlier changes to pipe control emission allowed debug dump of
each pipe control, they also changed debug output to almost always print
same reason/function for each pc. These changes fix the output so that
we print the original function name where pc is emitted.

As example:

pc: emit PC=( +depth_flush +rt_flush +pb_stall +depth_stall ) reason: gfx11_batch_emit_pipe_control_write
pc: emit PC=( ) reason: gfx11_batch_emit_pipe_control_write

changes back to:

pc: emit PC=( +depth_flush +rt_flush +pb_stall +depth_stall ) reason: gfx11_emit_apply_pipe_flushes
pc: emit PC=( ) reason: cmd_buffer_emit_depth_stencil

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25282>
This commit is contained in:
Tapani Pälli
2023-09-19 09:35:16 +03:00
committed by Marge Bot
parent 747c7042df
commit 8d2dcd55d7
7 changed files with 138 additions and 127 deletions

View File

@@ -224,7 +224,8 @@ genX(cmd_buffer_set_preemption)(struct anv_cmd_buffer *cmd_buffer, bool value);
void
genX(batch_emit_pipe_control)(struct anv_batch *batch,
const struct intel_device_info *devinfo,
enum anv_pipe_bits bits);
enum anv_pipe_bits bits,
const char *reason);
void
genX(batch_emit_pipe_control_write)(struct anv_batch *batch,
@@ -232,7 +233,14 @@ genX(batch_emit_pipe_control_write)(struct anv_batch *batch,
uint32_t post_sync_op,
struct anv_address address,
uint32_t imm_data,
enum anv_pipe_bits bits);
enum anv_pipe_bits bits,
const char *reason);
#define genx_batch_emit_pipe_control(a, b, c) \
genX(batch_emit_pipe_control) (a, b, c, __func__)
#define genx_batch_emit_pipe_control_write(a, b, c, d, e, f) \
genX(batch_emit_pipe_control_write) (a, b, c, d, e, f, __func__)
void genX(batch_emit_breakpoint)(struct anv_batch *batch,
struct anv_device *device,

View File

@@ -80,11 +80,11 @@ convert_pc_to_bits(struct GENX(PIPE_CONTROL) *pc) {
return bits;
}
#define anv_debug_dump_pc(pc) \
#define anv_debug_dump_pc(pc, reason) \
if (INTEL_DEBUG(DEBUG_PIPE_CONTROL)) { \
fputs("pc: emit PC=( ", stdout); \
anv_dump_pipe_bits(convert_pc_to_bits(&(pc)), stdout); \
fprintf(stdout, ") reason: %s\n", __func__); \
fprintf(stdout, ") reason: %s\n", reason); \
}
ALWAYS_INLINE static void
@@ -108,9 +108,9 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->state.descriptors_dirty |= ~0;
#if GFX_VERx10 >= 125
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT);
anv_batch_emit(
&cmd_buffer->batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) {
btpa.BindingTablePoolBaseAddress =
@@ -126,7 +126,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
* this, we get GPU hangs when using multi-level command buffers which
* clear depth, reset state base address, and then go render stuff.
*/
genX(batch_emit_pipe_control)
genx_batch_emit_pipe_control
(&cmd_buffer->batch, cmd_buffer->device->info,
#if GFX_VER >= 12
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
@@ -300,8 +300,8 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
if (cmd_buffer->state.current_pipeline == GPGPU)
bits |= ANV_PIPE_CS_STALL_BIT;
#endif
genX(batch_emit_pipe_control)(&cmd_buffer->batch, cmd_buffer->device->info,
bits);
genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
bits);
}
static void
@@ -1429,9 +1429,9 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
* while the pipeline is completely drained and the caches are flushed,
* which involves a first PIPE_CONTROL flush which stalls the pipeline...
*/
genX(batch_emit_pipe_control)(&cmd_buffer->batch, cmd_buffer->device->info,
ANV_PIPE_DATA_CACHE_FLUSH_BIT |
ANV_PIPE_CS_STALL_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
ANV_PIPE_DATA_CACHE_FLUSH_BIT |
ANV_PIPE_CS_STALL_BIT);
/* ...followed by a second pipelined PIPE_CONTROL that initiates
* invalidation of the relevant caches. Note that because RO invalidation
@@ -1447,18 +1447,18 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
* already guarantee that there is no concurrent GPGPU kernel execution
* (see SKL HSD 2132585).
*/
genX(batch_emit_pipe_control)(&cmd_buffer->batch, cmd_buffer->device->info,
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT |
ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT |
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT |
ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT |
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT);
/* Now send a third stalling flush to make sure that invalidation is
* complete when the L3 configuration registers are modified.
*/
genX(batch_emit_pipe_control)(&cmd_buffer->batch, cmd_buffer->device->info,
ANV_PIPE_DATA_CACHE_FLUSH_BIT |
ANV_PIPE_CS_STALL_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
ANV_PIPE_DATA_CACHE_FLUSH_BIT |
ANV_PIPE_CS_STALL_BIT);
genX(emit_l3_config)(&cmd_buffer->batch, cmd_buffer->device, cfg);
#endif /* GFX_VER >= 11 */
@@ -1687,8 +1687,8 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
}
/* Flush PC. */
genX(batch_emit_pipe_control_write)(batch, device->info, sync_op, addr,
0, flush_bits);
genx_batch_emit_pipe_control_write(batch, device->info, sync_op, addr,
0, flush_bits);
/* If the caller wants to know what flushes have been emitted,
* provide the bits based off the PIPE_CONTROL programmed bits.
@@ -1743,8 +1743,8 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch,
}
/* Invalidate PC. */
genX(batch_emit_pipe_control_write)(batch, device->info, sync_op, addr,
0, bits);
genx_batch_emit_pipe_control_write(batch, device->info, sync_op, addr,
0, bits);
#if GFX_VER == 12
if ((bits & ANV_PIPE_AUX_TABLE_INVALIDATE_BIT) && device->info->has_aux_map) {
@@ -2914,14 +2914,16 @@ cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer,
ALWAYS_INLINE void
genX(batch_emit_pipe_control)(struct anv_batch *batch,
const struct intel_device_info *devinfo,
enum anv_pipe_bits bits)
enum anv_pipe_bits bits,
const char *reason)
{
genX(batch_emit_pipe_control_write)(batch,
devinfo,
NoWrite,
ANV_NULL_ADDRESS,
0,
bits);
bits,
reason);
}
ALWAYS_INLINE void
@@ -2930,7 +2932,8 @@ genX(batch_emit_pipe_control_write)(struct anv_batch *batch,
uint32_t post_sync_op,
struct anv_address address,
uint32_t imm_data,
enum anv_pipe_bits bits)
enum anv_pipe_bits bits,
const char *reason)
{
/* XXX - insert all workarounds and GFX specific things below. */
@@ -2990,7 +2993,7 @@ genX(batch_emit_pipe_control_write)(struct anv_batch *batch,
pipe.DestinationAddressType = DAT_PPGTT;
pipe.ImmediateData = imm_data;
anv_debug_dump_pc(pipe);
anv_debug_dump_pc(pipe, reason);
}
}
@@ -3007,7 +3010,7 @@ genX(batch_set_preemption)(struct anv_batch *batch,
}
/* Wa_16013994831 - we need to insert CS_STALL and 250 noops. */
genX(batch_emit_pipe_control)(batch, devinfo, ANV_PIPE_CS_STALL_BIT);
genx_batch_emit_pipe_control(batch, devinfo, ANV_PIPE_CS_STALL_BIT);
for (unsigned i = 0; i < 250; i++)
anv_batch_emit(batch, GENX(MI_NOOP), noop);
@@ -3538,14 +3541,14 @@ genX(BeginCommandBuffer)(
static void
emit_isp_disable(struct anv_cmd_buffer *cmd_buffer)
{
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
pc.IndirectStatePointersDisable = true;
pc.CommandStreamerStallEnable = true;
anv_debug_dump_pc(pc);
anv_debug_dump_pc(pc, __func__);
}
}
@@ -6130,15 +6133,15 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
*/
if (intel_device_info_is_atsm(device->info) &&
cmd_buffer->queue_family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) {
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT |
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT |
ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT |
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT |
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT |
ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT |
ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT |
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT |
ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT |
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT);
}
#endif
@@ -6812,9 +6815,9 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
* Emit depth flush after state that sends implicit depth flush.
*/
if (intel_needs_workaround(cmd_buffer->device->info, 14016712196)) {
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT);
}
if (info.depth_surf)
@@ -6835,7 +6838,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
*
* This also seems sufficient to handle Wa_14014097488.
*/
genX(batch_emit_pipe_control_write)
genx_batch_emit_pipe_control_write
(&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData,
cmd_buffer->device->workaround_address, 0, 0);
}
@@ -6882,9 +6885,9 @@ cmd_buffer_emit_cps_control_buffer(struct anv_cmd_buffer *cmd_buffer,
* Emit depth flush after state that sends implicit depth flush.
*/
if (intel_needs_workaround(cmd_buffer->device->info, 14016712196)) {
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT);
}
#endif /* GFX_VERx10 >= 125 */
}
@@ -7741,7 +7744,7 @@ void genX(CmdSetEvent2)(
pc_bits |= ANV_PIPE_CS_STALL_BIT;
}
genX(batch_emit_pipe_control_write)
genx_batch_emit_pipe_control_write
(&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData,
anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool,
event->state),
@@ -7776,7 +7779,7 @@ void genX(CmdResetEvent2)(
pc_bits |= ANV_PIPE_CS_STALL_BIT;
}
genX(batch_emit_pipe_control_write)
genx_batch_emit_pipe_control_write
(&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData,
anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool,
event->state),
@@ -7924,12 +7927,12 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
}
case ANV_TIMESTAMP_CAPTURE_END_OF_PIPE:
genX(batch_emit_pipe_control_write)
genx_batch_emit_pipe_control_write
(batch, device->info, WriteTimestamp, addr, 0, 0);
break;
case ANV_TIMESTAMP_CAPTURE_AT_CS_STALL:
genX(batch_emit_pipe_control_write)
genx_batch_emit_pipe_control_write
(batch, device->info, WriteTimestamp, addr, 0,
ANV_PIPE_CS_STALL_BIT);
break;
@@ -7977,7 +7980,7 @@ genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch,
primitive_topology == _3DPRIM_LINESTRIP_BF ||
primitive_topology == _3DPRIM_LINESTRIP_CONT_BF) &&
(vertex_count == 1 || vertex_count == 2)) {
genX(batch_emit_pipe_control_write)
genx_batch_emit_pipe_control_write
(batch, device->info, WriteImmediateData,
device->workaround_address, 0, 0);

View File

@@ -109,8 +109,8 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
/* Make sure the memcpy landed for the generating draw call to pick up
* the value.
*/
genX(batch_emit_pipe_control)(batch, cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT);
genx_batch_emit_pipe_control(batch, cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT);
}
genX(emit_simple_shader_dispatch)(&cmd_buffer->generation_shader_state,

View File

@@ -1204,8 +1204,8 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
#if GFX_VERx10 == 125
/* Wa_14015946265: Send PC with CS stall after SO_DECL. */
genX(batch_emit_pipe_control)(&cmd_buffer->batch, device->info,
ANV_PIPE_CS_STALL_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
ANV_PIPE_CS_STALL_BIT);
#endif
}
@@ -1664,8 +1664,8 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
#if INTEL_NEEDS_WA_18019816803
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_18019816803)) {
genX(batch_emit_pipe_control)(&cmd_buffer->batch, cmd_buffer->device->info,
ANV_PIPE_PSS_STALL_SYNC_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
ANV_PIPE_PSS_STALL_SYNC_BIT);
}
#endif
@@ -1697,7 +1697,7 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
* streamer stall. However, the hardware seems to violently disagree.
* A full command streamer stall seems to be needed in both cases.
*/
genX(batch_emit_pipe_control)
genx_batch_emit_pipe_control
(&cmd_buffer->batch, cmd_buffer->device->info,
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
ANV_PIPE_CS_STALL_BIT |
@@ -1725,7 +1725,7 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
* Again, the Skylake docs give a different set of flushes but the BDW
* flushes seem to work just as well.
*/
genX(batch_emit_pipe_control)
genx_batch_emit_pipe_control
(&cmd_buffer->batch, cmd_buffer->device->info,
ANV_PIPE_DEPTH_STALL_BIT |
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |

View File

@@ -174,7 +174,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
* state is not combined with other state changes.
*/
if (intel_needs_workaround(device->info, 16011411144))
genX(batch_emit_pipe_control)(batch, device->info, ANV_PIPE_CS_STALL_BIT);
genx_batch_emit_pipe_control(batch, device->info, ANV_PIPE_CS_STALL_BIT);
anv_batch_emit(batch, GENX(3DSTATE_SO_BUFFER), sob) {
#if GFX_VER < 12
@@ -200,7 +200,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
/* Wa_16011411144: also CS_STALL after touching SO_BUFFER change */
if (intel_needs_workaround(device->info, 16011411144))
genX(batch_emit_pipe_control)(batch, device->info, ANV_PIPE_CS_STALL_BIT);
genx_batch_emit_pipe_control(batch, device->info, ANV_PIPE_CS_STALL_BIT);
dw = anv_batch_emitn(batch, 5, GENX(3DSTATE_SO_DECL_LIST),
.StreamtoBufferSelects0 = (1 << 0),
@@ -216,7 +216,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device,
#if GFX_VERx10 == 125
/* Wa_14015946265: Send PC with CS stall after SO_DECL. */
genX(batch_emit_pipe_control)(batch, device->info, ANV_PIPE_CS_STALL_BIT);
genx_batch_emit_pipe_control(batch, device->info, ANV_PIPE_CS_STALL_BIT);
#endif
anv_batch_emit(batch, GENX(3DSTATE_STREAMOUT), so) {

View File

@@ -206,9 +206,9 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch)
* always program PIPE_CONTROL either with CS Stall or PS sync stall. In
* both the cases set Render Target Cache Flush Enable".
*/
genX(batch_emit_pipe_control)
(batch, device->info, ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
genx_batch_emit_pipe_control(batch, device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
#endif
/* GEN:BUG:1607854226:
@@ -570,19 +570,19 @@ init_render_queue_state(struct anv_queue *queue, bool is_companion_rcs_batch)
anv_batch_emit(&batch, GENX(STATE_COMPUTE_MODE), zero);
anv_batch_emit(&batch, GENX(3DSTATE_MESH_CONTROL), zero);
anv_batch_emit(&batch, GENX(3DSTATE_TASK_CONTROL), zero);
genX(batch_emit_pipe_control_write)(&batch, device->info, NoWrite,
ANV_NULL_ADDRESS,
0,
ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
genx_batch_emit_pipe_control_write(&batch, device->info, NoWrite,
ANV_NULL_ADDRESS,
0,
ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
genX(emit_pipeline_select)(&batch, GPGPU);
anv_batch_emit(&batch, GENX(CFE_STATE), cfe) {
cfe.MaximumNumberofThreads =
devinfo->max_cs_threads * devinfo->subslice_total;
}
genX(batch_emit_pipe_control_write)(&batch, device->info, NoWrite,
ANV_NULL_ADDRESS,
0,
ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
genx_batch_emit_pipe_control_write(&batch, device->info, NoWrite,
ANV_NULL_ADDRESS,
0,
ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
genX(emit_pipeline_select)(&batch, _3D);
#endif
@@ -629,10 +629,10 @@ init_compute_queue_state(struct anv_queue *queue)
*/
if (intel_needs_workaround(devinfo, 14015782607) &&
queue->family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) {
genX(batch_emit_pipe_control)(&batch, devinfo,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT |
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT);
genx_batch_emit_pipe_control(&batch, devinfo,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT |
ANV_PIPE_HDC_PIPELINE_FLUSH_BIT);
}
#if GFX_VERx10 >= 125
@@ -641,7 +641,7 @@ init_compute_queue_state(struct anv_queue *queue)
*/
if (intel_device_info_is_atsm(devinfo) &&
queue->family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) {
genX(batch_emit_pipe_control)
genx_batch_emit_pipe_control
(&batch, devinfo,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STATE_CACHE_INVALIDATE_BIT |
@@ -1165,7 +1165,7 @@ genX(apply_task_urb_workaround)(struct anv_cmd_buffer *cmd_buffer)
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_URB_ALLOC_TASK), zero);
/* Issue 'nullprim' to commit the state. */
genX(batch_emit_pipe_control_write)
genx_batch_emit_pipe_control_write
(&cmd_buffer->batch, cmd_buffer->device->info,
WriteImmediateData, cmd_buffer->device->workaround_address, 0, 0);
#endif

View File

@@ -673,7 +673,7 @@ emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
bool cs_stall_needed = (GFX_VER == 9 && cmd_buffer->device->info->gt == 4);
genX(batch_emit_pipe_control_write)
genx_batch_emit_pipe_control_write
(&cmd_buffer->batch, cmd_buffer->device->info, WritePSDepthCount, addr, 0,
ANV_PIPE_DEPTH_STALL_BIT | (cs_stall_needed ? ANV_PIPE_CS_STALL_BIT : 0));
}
@@ -694,7 +694,7 @@ emit_query_pc_availability(struct anv_cmd_buffer *cmd_buffer,
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
genX(batch_emit_pipe_control_write)
genx_batch_emit_pipe_control_write
(&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData, addr,
available, ANV_PIPE_CS_STALL_BIT);
}
@@ -1022,20 +1022,20 @@ void genX(CmdBeginQueryIndexedEXT)(
break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)),
mi_reg64(GENX(CL_INVOCATION_COUNT_num)));
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
/* TODO: This might only be necessary for certain stats */
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
uint32_t statistics = pool->vk.pipeline_statistics;
uint32_t offset = 8;
@@ -1048,10 +1048,10 @@ void genX(CmdBeginQueryIndexedEXT)(
}
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
emit_xfb_query(&b, index, anv_address_add(query_addr, 8));
break;
@@ -1107,10 +1107,10 @@ void genX(CmdBeginQueryIndexedEXT)(
const enum intel_engine_class engine_class = cmd_buffer->queue_family->engine_class;
mi_self_mod_barrier(&b, devinfo->engine_class_prefetch[engine_class]);
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
cmd_buffer->perf_query_pool = pool;
cmd_buffer->perf_reloc_idx = 0;
@@ -1169,10 +1169,10 @@ void genX(CmdBeginQueryIndexedEXT)(
}
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
emit_perf_intel_query(cmd_buffer, pool, &b, query_addr, false);
break;
}
@@ -1209,10 +1209,10 @@ void genX(CmdEndQueryIndexedEXT)(
/* Ensure previous commands have completed before capturing the register
* value.
*/
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
mi_store(&b, mi_mem64(anv_address_add(query_addr, 16)),
mi_reg64(GENX(CL_INVOCATION_COUNT_num)));
@@ -1221,10 +1221,10 @@ void genX(CmdEndQueryIndexedEXT)(
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
/* TODO: This might only be necessary for certain stats */
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
uint32_t statistics = pool->vk.pipeline_statistics;
uint32_t offset = 16;
@@ -1239,19 +1239,19 @@ void genX(CmdEndQueryIndexedEXT)(
}
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
emit_xfb_query(&b, index, anv_address_add(query_addr, 16));
emit_query_mi_availability(&b, query_addr, true);
break;
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
cmd_buffer->perf_query_pool = pool;
if (!khr_perf_query_ensure_relocs(cmd_buffer))
@@ -1326,10 +1326,10 @@ void genX(CmdEndQueryIndexedEXT)(
}
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
genX(batch_emit_pipe_control)(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
genx_batch_emit_pipe_control(&cmd_buffer->batch,
cmd_buffer->device->info,
ANV_PIPE_CS_STALL_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
uint32_t marker_offset = intel_perf_marker_offset();
mi_store(&b, mi_mem64(anv_address_add(query_addr, marker_offset)),
mi_imm(cmd_buffer->intel_perf_marker));
@@ -1392,7 +1392,7 @@ void genX(CmdWriteTimestamp2)(
bool cs_stall_needed =
(GFX_VER == 9 && cmd_buffer->device->info->gt == 4);
genX(batch_emit_pipe_control_write)
genx_batch_emit_pipe_control_write
(&cmd_buffer->batch, cmd_buffer->device->info, WriteTimestamp,
anv_address_add(query_addr, 8), 0,
cs_stall_needed ? ANV_PIPE_CS_STALL_BIT : 0);