From 04f6ba113c6765fa902847eb84fb595861bc9cbb Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Tue, 8 Dec 2020 11:06:48 +0100 Subject: [PATCH] ac/sqtt: add ac_thread_trace_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Marek Olšák Acked-by: Samuel Pitoiset Part-of: --- src/amd/Makefile.sources | 3 +- src/amd/common/ac_sqtt.h | 40 +++++++++++ src/amd/common/meson.build | 1 + src/amd/vulkan/layers/radv_sqtt_layer.c | 22 +++--- src/amd/vulkan/radv_cmd_buffer.c | 2 +- src/amd/vulkan/radv_device.c | 10 +-- src/amd/vulkan/radv_private.h | 9 +-- src/amd/vulkan/radv_sqtt.c | 90 ++++++++++++------------- 8 files changed, 107 insertions(+), 70 deletions(-) create mode 100644 src/amd/common/ac_sqtt.h diff --git a/src/amd/Makefile.sources b/src/amd/Makefile.sources index 2e467687ea5..3c408ff3816 100644 --- a/src/amd/Makefile.sources +++ b/src/amd/Makefile.sources @@ -51,7 +51,8 @@ AMD_COMMON_FILES = \ common/ac_shader_util.c \ common/ac_shader_util.h \ common/ac_shadowed_regs.c \ - common/ac_shadowed_regs.h + common/ac_shadowed_regs.h \ + common/ac_sqtt.h AMD_COMMON_LLVM_FILES = \ llvm/ac_llvm_build.c \ diff --git a/src/amd/common/ac_sqtt.h b/src/amd/common/ac_sqtt.h new file mode 100644 index 00000000000..778850da209 --- /dev/null +++ b/src/amd/common/ac_sqtt.h @@ -0,0 +1,40 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * Copyright 2020 Valve Corporation + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef AC_SQTT_H +#define AC_SQTT_H + +struct ac_thread_trace_data { + struct radeon_cmdbuf *start_cs[2]; + struct radeon_cmdbuf *stop_cs[2]; + /* struct radeon_winsys_bo or struct pb_buffer */ + void *bo; + void *ptr; + uint32_t buffer_size; + int start_frame; + char *trigger_file; +}; + +#endif diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build index 216506ff5b3..c734db5b009 100644 --- a/src/amd/common/meson.build +++ b/src/amd/common/meson.build @@ -80,6 +80,7 @@ amd_common_files = files( 'ac_debug.h', 'ac_shadowed_regs.c', 'ac_shadowed_regs.h', + 'ac_sqtt.h', ) libamd_common = static_library( diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c index 2a879b498a3..bf80dce1dd3 100644 --- a/src/amd/vulkan/layers/radv_sqtt_layer.c +++ b/src/amd/vulkan/layers/radv_sqtt_layer.c @@ -437,7 +437,7 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) struct rgp_sqtt_marker_cb_start marker = {0}; struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (likely(!cmd_buffer->device->thread_trace_bo)) + if (likely(!cmd_buffer->device->thread_trace.bo)) return; marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START; @@ -462,7 +462,7 @@ radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) struct rgp_sqtt_marker_cb_end marker = {0}; struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (likely(!cmd_buffer->device->thread_trace_bo)) + if (likely(!cmd_buffer->device->thread_trace.bo)) return; marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END; @@ -476,7 +476,7 @@ radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer) { - if (likely(!cmd_buffer->device->thread_trace_bo)) + if (likely(!cmd_buffer->device->thread_trace.bo)) return; radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, @@ -486,7 +486,7 @@ radv_describe_draw(struct radv_cmd_buffer *cmd_buffer) void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z) { - if (likely(!cmd_buffer->device->thread_trace_bo)) + if (likely(!cmd_buffer->device->thread_trace.bo)) return; radv_write_event_with_dims_marker(cmd_buffer, @@ -514,7 +514,7 @@ radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer) struct rgp_sqtt_marker_barrier_end marker = {0}; struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (likely(!cmd_buffer->device->thread_trace_bo) || + if (likely(!cmd_buffer->device->thread_trace.bo) || !cmd_buffer->state.pending_sqtt_barrier_end) return; @@ -571,7 +571,7 @@ radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, struct rgp_sqtt_marker_barrier_start marker = {0}; struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (likely(!cmd_buffer->device->thread_trace_bo)) + if (likely(!cmd_buffer->device->thread_trace.bo)) return; radv_describe_barrier_end_delayed(cmd_buffer); @@ -597,7 +597,7 @@ radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, struct rgp_sqtt_marker_layout_transition marker = {0}; struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (likely(!cmd_buffer->device->thread_trace_bo)) + if (likely(!cmd_buffer->device->thread_trace.bo)) return; marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION; @@ -635,11 +635,11 @@ radv_handle_thread_trace(VkQueue _queue) if (radv_get_thread_trace(queue, &thread_trace)) radv_dump_thread_trace(queue->device, &thread_trace); } else { - bool frame_trigger = num_frames == queue->device->thread_trace_start_frame; + bool frame_trigger = num_frames == queue->device->thread_trace.start_frame; bool file_trigger = false; - if (queue->device->thread_trace_trigger_file && - access(queue->device->thread_trace_trigger_file, W_OK) == 0) { - if (unlink(queue->device->thread_trace_trigger_file) == 0) { + if (queue->device->thread_trace.trigger_file && + access(queue->device->thread_trace.trigger_file, W_OK) == 0) { + if (unlink(queue->device->thread_trace.trigger_file) == 0) { file_trigger = true; } else { /* Do not enable tracing if we cannot remove the file, diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c362b0f03c8..c4d1d1e7e2c 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -627,7 +627,7 @@ static void radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flush_bits flags) { - if (unlikely(cmd_buffer->device->thread_trace_bo)) { + if (unlikely(cmd_buffer->device->thread_trace.bo)) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0)); } diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index b8a6fb026bb..efa197ccb14 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -2908,13 +2908,13 @@ VkResult radv_CreateDevice( } /* Default buffer size set to 1MB per SE. */ - device->thread_trace_buffer_size = + device->thread_trace.buffer_size = radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 1024 * 1024); - device->thread_trace_start_frame = radv_get_int_debug_option("RADV_THREAD_TRACE", -1); + device->thread_trace.start_frame = radv_get_int_debug_option("RADV_THREAD_TRACE", -1); const char *trigger_file = getenv("RADV_THREAD_TRACE_TRIGGER"); if (trigger_file) - device->thread_trace_trigger_file = strdup(trigger_file); + device->thread_trace.trigger_file = strdup(trigger_file); if (!radv_thread_trace_init(device)) goto fail; @@ -3013,7 +3013,7 @@ fail: radv_bo_list_finish(&device->bo_list); radv_thread_trace_finish(device); - free(device->thread_trace_trigger_file); + free(device->thread_trace.trigger_file); radv_trap_handler_finish(device); @@ -3073,7 +3073,7 @@ void radv_DestroyDevice( u_cnd_monotonic_destroy(&device->timeline_cond); radv_bo_list_finish(&device->bo_list); - free(device->thread_trace_trigger_file); + free(device->thread_trace.trigger_file); radv_thread_trace_finish(device); vk_free(&device->vk.alloc, device); diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index df4a9e3c344..d2a674dfbff 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -67,6 +67,7 @@ #include "radv_descriptor_set.h" #include "radv_extensions.h" #include "sid.h" +#include "ac_sqtt.h" /* Pre-declarations needed for WSI entrypoints */ struct wl_surface; @@ -846,13 +847,7 @@ struct radv_device { struct u_cnd_monotonic timeline_cond; /* Thread trace. */ - struct radeon_cmdbuf *thread_trace_start_cs[2]; - struct radeon_cmdbuf *thread_trace_stop_cs[2]; - struct radeon_winsys_bo *thread_trace_bo; - void *thread_trace_ptr; - uint32_t thread_trace_buffer_size; - int thread_trace_start_frame; - char *thread_trace_trigger_file; + struct ac_thread_trace_data thread_trace; /* Trap handler. */ struct radv_shader_variant *trap_handler_shader; diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index 57f8856264d..634efa5b78f 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -42,7 +42,7 @@ radv_thread_trace_get_data_offset(struct radv_device *device, unsigned se) data_offset = align64(sizeof(struct radv_thread_trace_info) * 4, 1 << SQTT_BUFFER_ALIGN_SHIFT); - data_offset += device->thread_trace_buffer_size * se; + data_offset += device->thread_trace.buffer_size * se; return data_offset; } @@ -50,14 +50,14 @@ radv_thread_trace_get_data_offset(struct radv_device *device, unsigned se) static uint64_t radv_thread_trace_get_info_va(struct radv_device *device, unsigned se) { - uint64_t va = radv_buffer_get_va(device->thread_trace_bo); + uint64_t va = radv_buffer_get_va(device->thread_trace.bo); return va + radv_thread_trace_get_info_offset(se); } static uint64_t radv_thread_trace_get_data_va(struct radv_device *device, unsigned se) { - uint64_t va = radv_buffer_get_va(device->thread_trace_bo); + uint64_t va = radv_buffer_get_va(device->thread_trace.bo); return va + radv_thread_trace_get_data_offset(device, se); } @@ -66,7 +66,7 @@ radv_emit_thread_trace_start(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t queue_family_index) { - uint32_t shifted_size = device->thread_trace_buffer_size >> SQTT_BUFFER_ALIGN_SHIFT; + uint32_t shifted_size = device->thread_trace.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT; unsigned max_se = device->physical_device->rad_info.max_se; assert(device->physical_device->rad_info.chip_class >= GFX8); @@ -412,80 +412,80 @@ radv_thread_trace_init_cs(struct radv_device *device) /* Thread trace start CS. */ for (int family = 0; family < 2; ++family) { - device->thread_trace_start_cs[family] = ws->cs_create(ws, family); - if (!device->thread_trace_start_cs[family]) + device->thread_trace.start_cs[family] = ws->cs_create(ws, family); + if (!device->thread_trace.start_cs[family]) return; switch (family) { case RADV_QUEUE_GENERAL: - radeon_emit(device->thread_trace_start_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); - radeon_emit(device->thread_trace_start_cs[family], CC0_UPDATE_LOAD_ENABLES(1)); - radeon_emit(device->thread_trace_start_cs[family], CC1_UPDATE_SHADOW_ENABLES(1)); + radeon_emit(device->thread_trace.start_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); + radeon_emit(device->thread_trace.start_cs[family], CC0_UPDATE_LOAD_ENABLES(1)); + radeon_emit(device->thread_trace.start_cs[family], CC1_UPDATE_SHADOW_ENABLES(1)); break; case RADV_QUEUE_COMPUTE: - radeon_emit(device->thread_trace_start_cs[family], PKT3(PKT3_NOP, 0, 0)); - radeon_emit(device->thread_trace_start_cs[family], 0); + radeon_emit(device->thread_trace.start_cs[family], PKT3(PKT3_NOP, 0, 0)); + radeon_emit(device->thread_trace.start_cs[family], 0); break; } - radv_cs_add_buffer(ws, device->thread_trace_start_cs[family], - device->thread_trace_bo); + radv_cs_add_buffer(ws, device->thread_trace.start_cs[family], + device->thread_trace.bo); /* Make sure to wait-for-idle before starting SQTT. */ radv_emit_wait_for_idle(device, - device->thread_trace_start_cs[family], + device->thread_trace.start_cs[family], family); /* Enable SQG events that collects thread trace data. */ radv_emit_spi_config_cntl(device, - device->thread_trace_start_cs[family], + device->thread_trace.start_cs[family], true); radv_emit_thread_trace_start(device, - device->thread_trace_start_cs[family], + device->thread_trace.start_cs[family], family); - result = ws->cs_finalize(device->thread_trace_start_cs[family]); + result = ws->cs_finalize(device->thread_trace.start_cs[family]); if (result != VK_SUCCESS) return; } /* Thread trace stop CS. */ for (int family = 0; family < 2; ++family) { - device->thread_trace_stop_cs[family] = ws->cs_create(ws, family); - if (!device->thread_trace_stop_cs[family]) + device->thread_trace.stop_cs[family] = ws->cs_create(ws, family); + if (!device->thread_trace.stop_cs[family]) return; switch (family) { case RADV_QUEUE_GENERAL: - radeon_emit(device->thread_trace_stop_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); - radeon_emit(device->thread_trace_stop_cs[family], CC0_UPDATE_LOAD_ENABLES(1)); - radeon_emit(device->thread_trace_stop_cs[family], CC1_UPDATE_SHADOW_ENABLES(1)); + radeon_emit(device->thread_trace.stop_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); + radeon_emit(device->thread_trace.stop_cs[family], CC0_UPDATE_LOAD_ENABLES(1)); + radeon_emit(device->thread_trace.stop_cs[family], CC1_UPDATE_SHADOW_ENABLES(1)); break; case RADV_QUEUE_COMPUTE: - radeon_emit(device->thread_trace_stop_cs[family], PKT3(PKT3_NOP, 0, 0)); - radeon_emit(device->thread_trace_stop_cs[family], 0); + radeon_emit(device->thread_trace.stop_cs[family], PKT3(PKT3_NOP, 0, 0)); + radeon_emit(device->thread_trace.stop_cs[family], 0); break; } - radv_cs_add_buffer(ws, device->thread_trace_stop_cs[family], - device->thread_trace_bo); + radv_cs_add_buffer(ws, device->thread_trace.stop_cs[family], + device->thread_trace.bo); /* Make sure to wait-for-idle before stopping SQTT. */ radv_emit_wait_for_idle(device, - device->thread_trace_stop_cs[family], + device->thread_trace.stop_cs[family], family); radv_emit_thread_trace_stop(device, - device->thread_trace_stop_cs[family], + device->thread_trace.stop_cs[family], family); /* Restore previous state by disabling SQG events. */ radv_emit_spi_config_cntl(device, - device->thread_trace_stop_cs[family], + device->thread_trace.stop_cs[family], false); - result = ws->cs_finalize(device->thread_trace_stop_cs[family]); + result = ws->cs_finalize(device->thread_trace.stop_cs[family]); if (result != VK_SUCCESS) return; } @@ -500,25 +500,25 @@ radv_thread_trace_init_bo(struct radv_device *device) /* The buffer size and address need to be aligned in HW regs. Align the * size as early as possible so that we do all the allocation & addressing * correctly. */ - device->thread_trace_buffer_size = align64(device->thread_trace_buffer_size, + device->thread_trace.buffer_size = align64(device->thread_trace.buffer_size, 1u << SQTT_BUFFER_ALIGN_SHIFT); /* Compute total size of the thread trace BO for 4 SEs. */ size = align64(sizeof(struct radv_thread_trace_info) * 4, 1 << SQTT_BUFFER_ALIGN_SHIFT); - size += device->thread_trace_buffer_size * 4; + size += device->thread_trace.buffer_size * 4; - device->thread_trace_bo = ws->buffer_create(ws, size, 4096, + device->thread_trace.bo = ws->buffer_create(ws, size, 4096, RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM, RADV_BO_PRIORITY_SCRATCH); - if (!device->thread_trace_bo) + if (!device->thread_trace.bo) return false; - device->thread_trace_ptr = ws->buffer_map(device->thread_trace_bo); - if (!device->thread_trace_ptr) + device->thread_trace.ptr = ws->buffer_map(device->thread_trace.bo); + if (!device->thread_trace.ptr) return false; return true; @@ -539,14 +539,14 @@ radv_thread_trace_finish(struct radv_device *device) { struct radeon_winsys *ws = device->ws; - if (unlikely(device->thread_trace_bo)) - ws->buffer_destroy(device->thread_trace_bo); + if (unlikely(device->thread_trace.bo)) + ws->buffer_destroy(device->thread_trace.bo); for (unsigned i = 0; i < 2; i++) { - if (device->thread_trace_start_cs[i]) - ws->cs_destroy(device->thread_trace_start_cs[i]); - if (device->thread_trace_stop_cs[i]) - ws->cs_destroy(device->thread_trace_stop_cs[i]); + if (device->thread_trace.start_cs[i]) + ws->cs_destroy(device->thread_trace.start_cs[i]); + if (device->thread_trace.stop_cs[i]) + ws->cs_destroy(device->thread_trace.stop_cs[i]); } } @@ -554,7 +554,7 @@ bool radv_begin_thread_trace(struct radv_queue *queue) { int family = queue->queue_family_index; - struct radeon_cmdbuf *cs = queue->device->thread_trace_start_cs[family]; + struct radeon_cmdbuf *cs = queue->device->thread_trace.start_cs[family]; return radv_queue_internal_submit(queue, cs); } @@ -562,7 +562,7 @@ bool radv_end_thread_trace(struct radv_queue *queue) { int family = queue->queue_family_index; - struct radeon_cmdbuf *cs = queue->device->thread_trace_stop_cs[family]; + struct radeon_cmdbuf *cs = queue->device->thread_trace.stop_cs[family]; return radv_queue_internal_submit(queue, cs); } @@ -602,7 +602,7 @@ radv_get_thread_trace(struct radv_queue *queue, { struct radv_device *device = queue->device; unsigned max_se = device->physical_device->rad_info.max_se; - void *thread_trace_ptr = device->thread_trace_ptr; + void *thread_trace_ptr = device->thread_trace.ptr; memset(thread_trace, 0, sizeof(*thread_trace)); thread_trace->num_traces = max_se;