From 8d2dcd55d78cff504304e2c7aa8b76526c8be36b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tapani=20P=C3=A4lli?= Date: Tue, 19 Sep 2023 09:35:16 +0300 Subject: [PATCH] anv: refactor to fix pipe control debugging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While earlier changes to pipe control emission allowed debug dump of each pipe control, they also changed debug output to almost always print same reason/function for each pc. These changes fix the output so that we print the original function name where pc is emitted. As example: pc: emit PC=( +depth_flush +rt_flush +pb_stall +depth_stall ) reason: gfx11_batch_emit_pipe_control_write pc: emit PC=( ) reason: gfx11_batch_emit_pipe_control_write changes back to: pc: emit PC=( +depth_flush +rt_flush +pb_stall +depth_stall ) reason: gfx11_emit_apply_pipe_flushes pc: emit PC=( ) reason: cmd_buffer_emit_depth_stencil Signed-off-by: Tapani Pälli Reviewed-by: José Roberto de Souza Part-of: --- src/intel/vulkan/anv_genX.h | 12 +- src/intel/vulkan/genX_cmd_buffer.c | 111 +++++++++--------- .../vulkan/genX_cmd_draw_generated_indirect.h | 4 +- src/intel/vulkan/genX_gfx_state.c | 12 +- src/intel/vulkan/genX_gpu_memcpy.c | 6 +- src/intel/vulkan/genX_init_state.c | 34 +++--- src/intel/vulkan/genX_query.c | 86 +++++++------- 7 files changed, 138 insertions(+), 127 deletions(-) diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 43952a1f03c..0c2708cf8e5 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -224,7 +224,8 @@ genX(cmd_buffer_set_preemption)(struct anv_cmd_buffer *cmd_buffer, bool value); void genX(batch_emit_pipe_control)(struct anv_batch *batch, const struct intel_device_info *devinfo, - enum anv_pipe_bits bits); + enum anv_pipe_bits bits, + const char *reason); void genX(batch_emit_pipe_control_write)(struct anv_batch *batch, @@ -232,7 +233,14 @@ genX(batch_emit_pipe_control_write)(struct anv_batch *batch, uint32_t post_sync_op, struct anv_address address, uint32_t imm_data, - enum anv_pipe_bits bits); + enum anv_pipe_bits bits, + const char *reason); + +#define genx_batch_emit_pipe_control(a, b, c) \ +genX(batch_emit_pipe_control) (a, b, c, __func__) + +#define genx_batch_emit_pipe_control_write(a, b, c, d, e, f) \ +genX(batch_emit_pipe_control_write) (a, b, c, d, e, f, __func__) void genX(batch_emit_breakpoint)(struct anv_batch *batch, struct anv_device *device, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 2d7ce49060d..71a5d02c322 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -80,11 +80,11 @@ convert_pc_to_bits(struct GENX(PIPE_CONTROL) *pc) { return bits; } -#define anv_debug_dump_pc(pc) \ +#define anv_debug_dump_pc(pc, reason) \ if (INTEL_DEBUG(DEBUG_PIPE_CONTROL)) { \ fputs("pc: emit PC=( ", stdout); \ anv_dump_pipe_bits(convert_pc_to_bits(&(pc)), stdout); \ - fprintf(stdout, ") reason: %s\n", __func__); \ + fprintf(stdout, ") reason: %s\n", reason); \ } ALWAYS_INLINE static void @@ -108,9 +108,9 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.descriptors_dirty |= ~0; #if GFX_VERx10 >= 125 - genX(batch_emit_pipe_control)(&cmd_buffer->batch, - cmd_buffer->device->info, - ANV_PIPE_CS_STALL_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, + cmd_buffer->device->info, + ANV_PIPE_CS_STALL_BIT); anv_batch_emit( &cmd_buffer->batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) { btpa.BindingTablePoolBaseAddress = @@ -126,7 +126,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) * this, we get GPU hangs when using multi-level command buffers which * clear depth, reset state base address, and then go render stuff. */ - genX(batch_emit_pipe_control) + genx_batch_emit_pipe_control (&cmd_buffer->batch, cmd_buffer->device->info, #if GFX_VER >= 12 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | @@ -300,8 +300,8 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.current_pipeline == GPGPU) bits |= ANV_PIPE_CS_STALL_BIT; #endif - genX(batch_emit_pipe_control)(&cmd_buffer->batch, cmd_buffer->device->info, - bits); + genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + bits); } static void @@ -1429,9 +1429,9 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, * while the pipeline is completely drained and the caches are flushed, * which involves a first PIPE_CONTROL flush which stalls the pipeline... */ - genX(batch_emit_pipe_control)(&cmd_buffer->batch, cmd_buffer->device->info, - ANV_PIPE_DATA_CACHE_FLUSH_BIT | - ANV_PIPE_CS_STALL_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + ANV_PIPE_DATA_CACHE_FLUSH_BIT | + ANV_PIPE_CS_STALL_BIT); /* ...followed by a second pipelined PIPE_CONTROL that initiates * invalidation of the relevant caches. Note that because RO invalidation @@ -1447,18 +1447,18 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, * already guarantee that there is no concurrent GPGPU kernel execution * (see SKL HSD 2132585). */ - genX(batch_emit_pipe_control)(&cmd_buffer->batch, cmd_buffer->device->info, - ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | - ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | - ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | - ANV_PIPE_STATE_CACHE_INVALIDATE_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | + ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | + ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | + ANV_PIPE_STATE_CACHE_INVALIDATE_BIT); /* Now send a third stalling flush to make sure that invalidation is * complete when the L3 configuration registers are modified. */ - genX(batch_emit_pipe_control)(&cmd_buffer->batch, cmd_buffer->device->info, - ANV_PIPE_DATA_CACHE_FLUSH_BIT | - ANV_PIPE_CS_STALL_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + ANV_PIPE_DATA_CACHE_FLUSH_BIT | + ANV_PIPE_CS_STALL_BIT); genX(emit_l3_config)(&cmd_buffer->batch, cmd_buffer->device, cfg); #endif /* GFX_VER >= 11 */ @@ -1687,8 +1687,8 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, } /* Flush PC. */ - genX(batch_emit_pipe_control_write)(batch, device->info, sync_op, addr, - 0, flush_bits); + genx_batch_emit_pipe_control_write(batch, device->info, sync_op, addr, + 0, flush_bits); /* If the caller wants to know what flushes have been emitted, * provide the bits based off the PIPE_CONTROL programmed bits. @@ -1743,8 +1743,8 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, } /* Invalidate PC. */ - genX(batch_emit_pipe_control_write)(batch, device->info, sync_op, addr, - 0, bits); + genx_batch_emit_pipe_control_write(batch, device->info, sync_op, addr, + 0, bits); #if GFX_VER == 12 if ((bits & ANV_PIPE_AUX_TABLE_INVALIDATE_BIT) && device->info->has_aux_map) { @@ -2914,14 +2914,16 @@ cmd_buffer_flush_mesh_inline_data(struct anv_cmd_buffer *cmd_buffer, ALWAYS_INLINE void genX(batch_emit_pipe_control)(struct anv_batch *batch, const struct intel_device_info *devinfo, - enum anv_pipe_bits bits) + enum anv_pipe_bits bits, + const char *reason) { genX(batch_emit_pipe_control_write)(batch, devinfo, NoWrite, ANV_NULL_ADDRESS, 0, - bits); + bits, + reason); } ALWAYS_INLINE void @@ -2930,7 +2932,8 @@ genX(batch_emit_pipe_control_write)(struct anv_batch *batch, uint32_t post_sync_op, struct anv_address address, uint32_t imm_data, - enum anv_pipe_bits bits) + enum anv_pipe_bits bits, + const char *reason) { /* XXX - insert all workarounds and GFX specific things below. */ @@ -2990,7 +2993,7 @@ genX(batch_emit_pipe_control_write)(struct anv_batch *batch, pipe.DestinationAddressType = DAT_PPGTT; pipe.ImmediateData = imm_data; - anv_debug_dump_pc(pipe); + anv_debug_dump_pc(pipe, reason); } } @@ -3007,7 +3010,7 @@ genX(batch_set_preemption)(struct anv_batch *batch, } /* Wa_16013994831 - we need to insert CS_STALL and 250 noops. */ - genX(batch_emit_pipe_control)(batch, devinfo, ANV_PIPE_CS_STALL_BIT); + genx_batch_emit_pipe_control(batch, devinfo, ANV_PIPE_CS_STALL_BIT); for (unsigned i = 0; i < 250; i++) anv_batch_emit(batch, GENX(MI_NOOP), noop); @@ -3538,14 +3541,14 @@ genX(BeginCommandBuffer)( static void emit_isp_disable(struct anv_cmd_buffer *cmd_buffer) { - genX(batch_emit_pipe_control)(&cmd_buffer->batch, - cmd_buffer->device->info, - ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_STALL_AT_SCOREBOARD_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, + cmd_buffer->device->info, + ANV_PIPE_CS_STALL_BIT | + ANV_PIPE_STALL_AT_SCOREBOARD_BIT); anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { pc.IndirectStatePointersDisable = true; pc.CommandStreamerStallEnable = true; - anv_debug_dump_pc(pc); + anv_debug_dump_pc(pc, __func__); } } @@ -6130,15 +6133,15 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, */ if (intel_device_info_is_atsm(device->info) && cmd_buffer->queue_family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) { - genX(batch_emit_pipe_control)(&cmd_buffer->batch, - cmd_buffer->device->info, - ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | - ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | - ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | - ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | - ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | - ANV_PIPE_HDC_PIPELINE_FLUSH_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, + cmd_buffer->device->info, + ANV_PIPE_CS_STALL_BIT | + ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | + ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | + ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | + ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | + ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | + ANV_PIPE_HDC_PIPELINE_FLUSH_BIT); } #endif @@ -6812,9 +6815,9 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) * Emit depth flush after state that sends implicit depth flush. */ if (intel_needs_workaround(cmd_buffer->device->info, 14016712196)) { - genX(batch_emit_pipe_control)(&cmd_buffer->batch, - cmd_buffer->device->info, - ANV_PIPE_DEPTH_CACHE_FLUSH_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, + cmd_buffer->device->info, + ANV_PIPE_DEPTH_CACHE_FLUSH_BIT); } if (info.depth_surf) @@ -6835,7 +6838,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) * * This also seems sufficient to handle Wa_14014097488. */ - genX(batch_emit_pipe_control_write) + genx_batch_emit_pipe_control_write (&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData, cmd_buffer->device->workaround_address, 0, 0); } @@ -6882,9 +6885,9 @@ cmd_buffer_emit_cps_control_buffer(struct anv_cmd_buffer *cmd_buffer, * Emit depth flush after state that sends implicit depth flush. */ if (intel_needs_workaround(cmd_buffer->device->info, 14016712196)) { - genX(batch_emit_pipe_control)(&cmd_buffer->batch, - cmd_buffer->device->info, - ANV_PIPE_DEPTH_CACHE_FLUSH_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, + cmd_buffer->device->info, + ANV_PIPE_DEPTH_CACHE_FLUSH_BIT); } #endif /* GFX_VERx10 >= 125 */ } @@ -7741,7 +7744,7 @@ void genX(CmdSetEvent2)( pc_bits |= ANV_PIPE_CS_STALL_BIT; } - genX(batch_emit_pipe_control_write) + genx_batch_emit_pipe_control_write (&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData, anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool, event->state), @@ -7776,7 +7779,7 @@ void genX(CmdResetEvent2)( pc_bits |= ANV_PIPE_CS_STALL_BIT; } - genX(batch_emit_pipe_control_write) + genx_batch_emit_pipe_control_write (&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData, anv_state_pool_state_address(&cmd_buffer->device->dynamic_state_pool, event->state), @@ -7924,12 +7927,12 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch, } case ANV_TIMESTAMP_CAPTURE_END_OF_PIPE: - genX(batch_emit_pipe_control_write) + genx_batch_emit_pipe_control_write (batch, device->info, WriteTimestamp, addr, 0, 0); break; case ANV_TIMESTAMP_CAPTURE_AT_CS_STALL: - genX(batch_emit_pipe_control_write) + genx_batch_emit_pipe_control_write (batch, device->info, WriteTimestamp, addr, 0, ANV_PIPE_CS_STALL_BIT); break; @@ -7977,7 +7980,7 @@ genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch, primitive_topology == _3DPRIM_LINESTRIP_BF || primitive_topology == _3DPRIM_LINESTRIP_CONT_BF) && (vertex_count == 1 || vertex_count == 2)) { - genX(batch_emit_pipe_control_write) + genx_batch_emit_pipe_control_write (batch, device->info, WriteImmediateData, device->workaround_address, 0, 0); diff --git a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h index bf62f24cdf3..6a0cdf937fc 100644 --- a/src/intel/vulkan/genX_cmd_draw_generated_indirect.h +++ b/src/intel/vulkan/genX_cmd_draw_generated_indirect.h @@ -109,8 +109,8 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer, /* Make sure the memcpy landed for the generating draw call to pick up * the value. */ - genX(batch_emit_pipe_control)(batch, cmd_buffer->device->info, - ANV_PIPE_CS_STALL_BIT); + genx_batch_emit_pipe_control(batch, cmd_buffer->device->info, + ANV_PIPE_CS_STALL_BIT); } genX(emit_simple_shader_dispatch)(&cmd_buffer->generation_shader_state, diff --git a/src/intel/vulkan/genX_gfx_state.c b/src/intel/vulkan/genX_gfx_state.c index c460a6ea46d..f318d3204d7 100644 --- a/src/intel/vulkan/genX_gfx_state.c +++ b/src/intel/vulkan/genX_gfx_state.c @@ -1204,8 +1204,8 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer) #if GFX_VERx10 == 125 /* Wa_14015946265: Send PC with CS stall after SO_DECL. */ - genX(batch_emit_pipe_control)(&cmd_buffer->batch, device->info, - ANV_PIPE_CS_STALL_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info, + ANV_PIPE_CS_STALL_BIT); #endif } @@ -1664,8 +1664,8 @@ genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer) #if INTEL_NEEDS_WA_18019816803 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_18019816803)) { - genX(batch_emit_pipe_control)(&cmd_buffer->batch, cmd_buffer->device->info, - ANV_PIPE_PSS_STALL_SYNC_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info, + ANV_PIPE_PSS_STALL_SYNC_BIT); } #endif @@ -1697,7 +1697,7 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) * streamer stall. However, the hardware seems to violently disagree. * A full command streamer stall seems to be needed in both cases. */ - genX(batch_emit_pipe_control) + genx_batch_emit_pipe_control (&cmd_buffer->batch, cmd_buffer->device->info, ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT | @@ -1725,7 +1725,7 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) * Again, the Skylake docs give a different set of flushes but the BDW * flushes seem to work just as well. */ - genX(batch_emit_pipe_control) + genx_batch_emit_pipe_control (&cmd_buffer->batch, cmd_buffer->device->info, ANV_PIPE_DEPTH_STALL_BIT | ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c index f0a17df4baf..112f94fd2c0 100644 --- a/src/intel/vulkan/genX_gpu_memcpy.c +++ b/src/intel/vulkan/genX_gpu_memcpy.c @@ -174,7 +174,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device, * state is not combined with other state changes. */ if (intel_needs_workaround(device->info, 16011411144)) - genX(batch_emit_pipe_control)(batch, device->info, ANV_PIPE_CS_STALL_BIT); + genx_batch_emit_pipe_control(batch, device->info, ANV_PIPE_CS_STALL_BIT); anv_batch_emit(batch, GENX(3DSTATE_SO_BUFFER), sob) { #if GFX_VER < 12 @@ -200,7 +200,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device, /* Wa_16011411144: also CS_STALL after touching SO_BUFFER change */ if (intel_needs_workaround(device->info, 16011411144)) - genX(batch_emit_pipe_control)(batch, device->info, ANV_PIPE_CS_STALL_BIT); + genx_batch_emit_pipe_control(batch, device->info, ANV_PIPE_CS_STALL_BIT); dw = anv_batch_emitn(batch, 5, GENX(3DSTATE_SO_DECL_LIST), .StreamtoBufferSelects0 = (1 << 0), @@ -216,7 +216,7 @@ emit_so_memcpy(struct anv_batch *batch, struct anv_device *device, #if GFX_VERx10 == 125 /* Wa_14015946265: Send PC with CS stall after SO_DECL. */ - genX(batch_emit_pipe_control)(batch, device->info, ANV_PIPE_CS_STALL_BIT); + genx_batch_emit_pipe_control(batch, device->info, ANV_PIPE_CS_STALL_BIT); #endif anv_batch_emit(batch, GENX(3DSTATE_STREAMOUT), so) { diff --git a/src/intel/vulkan/genX_init_state.c b/src/intel/vulkan/genX_init_state.c index c5f98dc351e..46bdf6fadae 100644 --- a/src/intel/vulkan/genX_init_state.c +++ b/src/intel/vulkan/genX_init_state.c @@ -206,9 +206,9 @@ init_common_queue_state(struct anv_queue *queue, struct anv_batch *batch) * always program PIPE_CONTROL either with CS Stall or PS sync stall. In * both the cases set Render Target Cache Flush Enable". */ - genX(batch_emit_pipe_control) - (batch, device->info, ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT); + genx_batch_emit_pipe_control(batch, device->info, + ANV_PIPE_CS_STALL_BIT | + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT); #endif /* GEN:BUG:1607854226: @@ -570,19 +570,19 @@ init_render_queue_state(struct anv_queue *queue, bool is_companion_rcs_batch) anv_batch_emit(&batch, GENX(STATE_COMPUTE_MODE), zero); anv_batch_emit(&batch, GENX(3DSTATE_MESH_CONTROL), zero); anv_batch_emit(&batch, GENX(3DSTATE_TASK_CONTROL), zero); - genX(batch_emit_pipe_control_write)(&batch, device->info, NoWrite, - ANV_NULL_ADDRESS, - 0, - ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS); + genx_batch_emit_pipe_control_write(&batch, device->info, NoWrite, + ANV_NULL_ADDRESS, + 0, + ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS); genX(emit_pipeline_select)(&batch, GPGPU); anv_batch_emit(&batch, GENX(CFE_STATE), cfe) { cfe.MaximumNumberofThreads = devinfo->max_cs_threads * devinfo->subslice_total; } - genX(batch_emit_pipe_control_write)(&batch, device->info, NoWrite, - ANV_NULL_ADDRESS, - 0, - ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS); + genx_batch_emit_pipe_control_write(&batch, device->info, NoWrite, + ANV_NULL_ADDRESS, + 0, + ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS); genX(emit_pipeline_select)(&batch, _3D); #endif @@ -629,10 +629,10 @@ init_compute_queue_state(struct anv_queue *queue) */ if (intel_needs_workaround(devinfo, 14015782607) && queue->family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) { - genX(batch_emit_pipe_control)(&batch, devinfo, - ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | - ANV_PIPE_HDC_PIPELINE_FLUSH_BIT); + genx_batch_emit_pipe_control(&batch, devinfo, + ANV_PIPE_CS_STALL_BIT | + ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT | + ANV_PIPE_HDC_PIPELINE_FLUSH_BIT); } #if GFX_VERx10 >= 125 @@ -641,7 +641,7 @@ init_compute_queue_state(struct anv_queue *queue) */ if (intel_device_info_is_atsm(devinfo) && queue->family->engine_class == INTEL_ENGINE_CLASS_COMPUTE) { - genX(batch_emit_pipe_control) + genx_batch_emit_pipe_control (&batch, devinfo, ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | @@ -1165,7 +1165,7 @@ genX(apply_task_urb_workaround)(struct anv_cmd_buffer *cmd_buffer) anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_URB_ALLOC_TASK), zero); /* Issue 'nullprim' to commit the state. */ - genX(batch_emit_pipe_control_write) + genx_batch_emit_pipe_control_write (&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData, cmd_buffer->device->workaround_address, 0, 0); #endif diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index b22f7e5a7f6..35612b95553 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -673,7 +673,7 @@ emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer, genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); bool cs_stall_needed = (GFX_VER == 9 && cmd_buffer->device->info->gt == 4); - genX(batch_emit_pipe_control_write) + genx_batch_emit_pipe_control_write (&cmd_buffer->batch, cmd_buffer->device->info, WritePSDepthCount, addr, 0, ANV_PIPE_DEPTH_STALL_BIT | (cs_stall_needed ? ANV_PIPE_CS_STALL_BIT : 0)); } @@ -694,7 +694,7 @@ emit_query_pc_availability(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT; genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); - genX(batch_emit_pipe_control_write) + genx_batch_emit_pipe_control_write (&cmd_buffer->batch, cmd_buffer->device->info, WriteImmediateData, addr, available, ANV_PIPE_CS_STALL_BIT); } @@ -1022,20 +1022,20 @@ void genX(CmdBeginQueryIndexedEXT)( break; case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: - genX(batch_emit_pipe_control)(&cmd_buffer->batch, - cmd_buffer->device->info, - ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_STALL_AT_SCOREBOARD_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, + cmd_buffer->device->info, + ANV_PIPE_CS_STALL_BIT | + ANV_PIPE_STALL_AT_SCOREBOARD_BIT); mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)), mi_reg64(GENX(CL_INVOCATION_COUNT_num))); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: { /* TODO: This might only be necessary for certain stats */ - genX(batch_emit_pipe_control)(&cmd_buffer->batch, - cmd_buffer->device->info, - ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_STALL_AT_SCOREBOARD_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, + cmd_buffer->device->info, + ANV_PIPE_CS_STALL_BIT | + ANV_PIPE_STALL_AT_SCOREBOARD_BIT); uint32_t statistics = pool->vk.pipeline_statistics; uint32_t offset = 8; @@ -1048,10 +1048,10 @@ void genX(CmdBeginQueryIndexedEXT)( } case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: - genX(batch_emit_pipe_control)(&cmd_buffer->batch, - cmd_buffer->device->info, - ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_STALL_AT_SCOREBOARD_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, + cmd_buffer->device->info, + ANV_PIPE_CS_STALL_BIT | + ANV_PIPE_STALL_AT_SCOREBOARD_BIT); emit_xfb_query(&b, index, anv_address_add(query_addr, 8)); break; @@ -1107,10 +1107,10 @@ void genX(CmdBeginQueryIndexedEXT)( const enum intel_engine_class engine_class = cmd_buffer->queue_family->engine_class; mi_self_mod_barrier(&b, devinfo->engine_class_prefetch[engine_class]); - genX(batch_emit_pipe_control)(&cmd_buffer->batch, - cmd_buffer->device->info, - ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_STALL_AT_SCOREBOARD_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, + cmd_buffer->device->info, + ANV_PIPE_CS_STALL_BIT | + ANV_PIPE_STALL_AT_SCOREBOARD_BIT); cmd_buffer->perf_query_pool = pool; cmd_buffer->perf_reloc_idx = 0; @@ -1169,10 +1169,10 @@ void genX(CmdBeginQueryIndexedEXT)( } case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: { - genX(batch_emit_pipe_control)(&cmd_buffer->batch, - cmd_buffer->device->info, - ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_STALL_AT_SCOREBOARD_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, + cmd_buffer->device->info, + ANV_PIPE_CS_STALL_BIT | + ANV_PIPE_STALL_AT_SCOREBOARD_BIT); emit_perf_intel_query(cmd_buffer, pool, &b, query_addr, false); break; } @@ -1209,10 +1209,10 @@ void genX(CmdEndQueryIndexedEXT)( /* Ensure previous commands have completed before capturing the register * value. */ - genX(batch_emit_pipe_control)(&cmd_buffer->batch, - cmd_buffer->device->info, - ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_STALL_AT_SCOREBOARD_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, + cmd_buffer->device->info, + ANV_PIPE_CS_STALL_BIT | + ANV_PIPE_STALL_AT_SCOREBOARD_BIT); mi_store(&b, mi_mem64(anv_address_add(query_addr, 16)), mi_reg64(GENX(CL_INVOCATION_COUNT_num))); @@ -1221,10 +1221,10 @@ void genX(CmdEndQueryIndexedEXT)( case VK_QUERY_TYPE_PIPELINE_STATISTICS: { /* TODO: This might only be necessary for certain stats */ - genX(batch_emit_pipe_control)(&cmd_buffer->batch, - cmd_buffer->device->info, - ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_STALL_AT_SCOREBOARD_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, + cmd_buffer->device->info, + ANV_PIPE_CS_STALL_BIT | + ANV_PIPE_STALL_AT_SCOREBOARD_BIT); uint32_t statistics = pool->vk.pipeline_statistics; uint32_t offset = 16; @@ -1239,19 +1239,19 @@ void genX(CmdEndQueryIndexedEXT)( } case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: - genX(batch_emit_pipe_control)(&cmd_buffer->batch, - cmd_buffer->device->info, - ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_STALL_AT_SCOREBOARD_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, + cmd_buffer->device->info, + ANV_PIPE_CS_STALL_BIT | + ANV_PIPE_STALL_AT_SCOREBOARD_BIT); emit_xfb_query(&b, index, anv_address_add(query_addr, 16)); emit_query_mi_availability(&b, query_addr, true); break; case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: { - genX(batch_emit_pipe_control)(&cmd_buffer->batch, - cmd_buffer->device->info, - ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_STALL_AT_SCOREBOARD_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, + cmd_buffer->device->info, + ANV_PIPE_CS_STALL_BIT | + ANV_PIPE_STALL_AT_SCOREBOARD_BIT); cmd_buffer->perf_query_pool = pool; if (!khr_perf_query_ensure_relocs(cmd_buffer)) @@ -1326,10 +1326,10 @@ void genX(CmdEndQueryIndexedEXT)( } case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: { - genX(batch_emit_pipe_control)(&cmd_buffer->batch, - cmd_buffer->device->info, - ANV_PIPE_CS_STALL_BIT | - ANV_PIPE_STALL_AT_SCOREBOARD_BIT); + genx_batch_emit_pipe_control(&cmd_buffer->batch, + cmd_buffer->device->info, + ANV_PIPE_CS_STALL_BIT | + ANV_PIPE_STALL_AT_SCOREBOARD_BIT); uint32_t marker_offset = intel_perf_marker_offset(); mi_store(&b, mi_mem64(anv_address_add(query_addr, marker_offset)), mi_imm(cmd_buffer->intel_perf_marker)); @@ -1392,7 +1392,7 @@ void genX(CmdWriteTimestamp2)( bool cs_stall_needed = (GFX_VER == 9 && cmd_buffer->device->info->gt == 4); - genX(batch_emit_pipe_control_write) + genx_batch_emit_pipe_control_write (&cmd_buffer->batch, cmd_buffer->device->info, WriteTimestamp, anv_address_add(query_addr, 8), 0, cs_stall_needed ? ANV_PIPE_CS_STALL_BIT : 0);