ac/sqtt: add ac_thread_trace_data
Reviewed-by: Marek Olšák <marek.olsak@amd.com> Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8002>
This commit is contained in:
@@ -51,7 +51,8 @@ AMD_COMMON_FILES = \
|
|||||||
common/ac_shader_util.c \
|
common/ac_shader_util.c \
|
||||||
common/ac_shader_util.h \
|
common/ac_shader_util.h \
|
||||||
common/ac_shadowed_regs.c \
|
common/ac_shadowed_regs.c \
|
||||||
common/ac_shadowed_regs.h
|
common/ac_shadowed_regs.h \
|
||||||
|
common/ac_sqtt.h
|
||||||
|
|
||||||
AMD_COMMON_LLVM_FILES = \
|
AMD_COMMON_LLVM_FILES = \
|
||||||
llvm/ac_llvm_build.c \
|
llvm/ac_llvm_build.c \
|
||||||
|
40
src/amd/common/ac_sqtt.h
Normal file
40
src/amd/common/ac_sqtt.h
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Advanced Micro Devices, Inc.
|
||||||
|
* Copyright 2020 Valve Corporation
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||||
|
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||||
|
* the Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||||
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||||
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AC_SQTT_H
|
||||||
|
#define AC_SQTT_H
|
||||||
|
|
||||||
|
struct ac_thread_trace_data {
|
||||||
|
struct radeon_cmdbuf *start_cs[2];
|
||||||
|
struct radeon_cmdbuf *stop_cs[2];
|
||||||
|
/* struct radeon_winsys_bo or struct pb_buffer */
|
||||||
|
void *bo;
|
||||||
|
void *ptr;
|
||||||
|
uint32_t buffer_size;
|
||||||
|
int start_frame;
|
||||||
|
char *trigger_file;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@@ -80,6 +80,7 @@ amd_common_files = files(
|
|||||||
'ac_debug.h',
|
'ac_debug.h',
|
||||||
'ac_shadowed_regs.c',
|
'ac_shadowed_regs.c',
|
||||||
'ac_shadowed_regs.h',
|
'ac_shadowed_regs.h',
|
||||||
|
'ac_sqtt.h',
|
||||||
)
|
)
|
||||||
|
|
||||||
libamd_common = static_library(
|
libamd_common = static_library(
|
||||||
|
@@ -437,7 +437,7 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
|
|||||||
struct rgp_sqtt_marker_cb_start marker = {0};
|
struct rgp_sqtt_marker_cb_start marker = {0};
|
||||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||||
|
|
||||||
if (likely(!cmd_buffer->device->thread_trace_bo))
|
if (likely(!cmd_buffer->device->thread_trace.bo))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;
|
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;
|
||||||
@@ -462,7 +462,7 @@ radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
|
|||||||
struct rgp_sqtt_marker_cb_end marker = {0};
|
struct rgp_sqtt_marker_cb_end marker = {0};
|
||||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||||
|
|
||||||
if (likely(!cmd_buffer->device->thread_trace_bo))
|
if (likely(!cmd_buffer->device->thread_trace.bo))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END;
|
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END;
|
||||||
@@ -476,7 +476,7 @@ radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
|
|||||||
void
|
void
|
||||||
radv_describe_draw(struct radv_cmd_buffer *cmd_buffer)
|
radv_describe_draw(struct radv_cmd_buffer *cmd_buffer)
|
||||||
{
|
{
|
||||||
if (likely(!cmd_buffer->device->thread_trace_bo))
|
if (likely(!cmd_buffer->device->thread_trace.bo))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type,
|
radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type,
|
||||||
@@ -486,7 +486,7 @@ radv_describe_draw(struct radv_cmd_buffer *cmd_buffer)
|
|||||||
void
|
void
|
||||||
radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z)
|
radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z)
|
||||||
{
|
{
|
||||||
if (likely(!cmd_buffer->device->thread_trace_bo))
|
if (likely(!cmd_buffer->device->thread_trace.bo))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
radv_write_event_with_dims_marker(cmd_buffer,
|
radv_write_event_with_dims_marker(cmd_buffer,
|
||||||
@@ -514,7 +514,7 @@ radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer)
|
|||||||
struct rgp_sqtt_marker_barrier_end marker = {0};
|
struct rgp_sqtt_marker_barrier_end marker = {0};
|
||||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||||
|
|
||||||
if (likely(!cmd_buffer->device->thread_trace_bo) ||
|
if (likely(!cmd_buffer->device->thread_trace.bo) ||
|
||||||
!cmd_buffer->state.pending_sqtt_barrier_end)
|
!cmd_buffer->state.pending_sqtt_barrier_end)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@@ -571,7 +571,7 @@ radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
struct rgp_sqtt_marker_barrier_start marker = {0};
|
struct rgp_sqtt_marker_barrier_start marker = {0};
|
||||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||||
|
|
||||||
if (likely(!cmd_buffer->device->thread_trace_bo))
|
if (likely(!cmd_buffer->device->thread_trace.bo))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
radv_describe_barrier_end_delayed(cmd_buffer);
|
radv_describe_barrier_end_delayed(cmd_buffer);
|
||||||
@@ -597,7 +597,7 @@ radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
struct rgp_sqtt_marker_layout_transition marker = {0};
|
struct rgp_sqtt_marker_layout_transition marker = {0};
|
||||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||||
|
|
||||||
if (likely(!cmd_buffer->device->thread_trace_bo))
|
if (likely(!cmd_buffer->device->thread_trace.bo))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION;
|
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION;
|
||||||
@@ -635,11 +635,11 @@ radv_handle_thread_trace(VkQueue _queue)
|
|||||||
if (radv_get_thread_trace(queue, &thread_trace))
|
if (radv_get_thread_trace(queue, &thread_trace))
|
||||||
radv_dump_thread_trace(queue->device, &thread_trace);
|
radv_dump_thread_trace(queue->device, &thread_trace);
|
||||||
} else {
|
} else {
|
||||||
bool frame_trigger = num_frames == queue->device->thread_trace_start_frame;
|
bool frame_trigger = num_frames == queue->device->thread_trace.start_frame;
|
||||||
bool file_trigger = false;
|
bool file_trigger = false;
|
||||||
if (queue->device->thread_trace_trigger_file &&
|
if (queue->device->thread_trace.trigger_file &&
|
||||||
access(queue->device->thread_trace_trigger_file, W_OK) == 0) {
|
access(queue->device->thread_trace.trigger_file, W_OK) == 0) {
|
||||||
if (unlink(queue->device->thread_trace_trigger_file) == 0) {
|
if (unlink(queue->device->thread_trace.trigger_file) == 0) {
|
||||||
file_trigger = true;
|
file_trigger = true;
|
||||||
} else {
|
} else {
|
||||||
/* Do not enable tracing if we cannot remove the file,
|
/* Do not enable tracing if we cannot remove the file,
|
||||||
|
@@ -627,7 +627,7 @@ static void
|
|||||||
radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer,
|
radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer,
|
||||||
enum radv_cmd_flush_bits flags)
|
enum radv_cmd_flush_bits flags)
|
||||||
{
|
{
|
||||||
if (unlikely(cmd_buffer->device->thread_trace_bo)) {
|
if (unlikely(cmd_buffer->device->thread_trace.bo)) {
|
||||||
radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||||
radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0));
|
radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0));
|
||||||
}
|
}
|
||||||
|
@@ -2908,13 +2908,13 @@ VkResult radv_CreateDevice(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Default buffer size set to 1MB per SE. */
|
/* Default buffer size set to 1MB per SE. */
|
||||||
device->thread_trace_buffer_size =
|
device->thread_trace.buffer_size =
|
||||||
radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 1024 * 1024);
|
radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 1024 * 1024);
|
||||||
device->thread_trace_start_frame = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
|
device->thread_trace.start_frame = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
|
||||||
|
|
||||||
const char *trigger_file = getenv("RADV_THREAD_TRACE_TRIGGER");
|
const char *trigger_file = getenv("RADV_THREAD_TRACE_TRIGGER");
|
||||||
if (trigger_file)
|
if (trigger_file)
|
||||||
device->thread_trace_trigger_file = strdup(trigger_file);
|
device->thread_trace.trigger_file = strdup(trigger_file);
|
||||||
|
|
||||||
if (!radv_thread_trace_init(device))
|
if (!radv_thread_trace_init(device))
|
||||||
goto fail;
|
goto fail;
|
||||||
@@ -3013,7 +3013,7 @@ fail:
|
|||||||
radv_bo_list_finish(&device->bo_list);
|
radv_bo_list_finish(&device->bo_list);
|
||||||
|
|
||||||
radv_thread_trace_finish(device);
|
radv_thread_trace_finish(device);
|
||||||
free(device->thread_trace_trigger_file);
|
free(device->thread_trace.trigger_file);
|
||||||
|
|
||||||
radv_trap_handler_finish(device);
|
radv_trap_handler_finish(device);
|
||||||
|
|
||||||
@@ -3073,7 +3073,7 @@ void radv_DestroyDevice(
|
|||||||
u_cnd_monotonic_destroy(&device->timeline_cond);
|
u_cnd_monotonic_destroy(&device->timeline_cond);
|
||||||
radv_bo_list_finish(&device->bo_list);
|
radv_bo_list_finish(&device->bo_list);
|
||||||
|
|
||||||
free(device->thread_trace_trigger_file);
|
free(device->thread_trace.trigger_file);
|
||||||
radv_thread_trace_finish(device);
|
radv_thread_trace_finish(device);
|
||||||
|
|
||||||
vk_free(&device->vk.alloc, device);
|
vk_free(&device->vk.alloc, device);
|
||||||
|
@@ -67,6 +67,7 @@
|
|||||||
#include "radv_descriptor_set.h"
|
#include "radv_descriptor_set.h"
|
||||||
#include "radv_extensions.h"
|
#include "radv_extensions.h"
|
||||||
#include "sid.h"
|
#include "sid.h"
|
||||||
|
#include "ac_sqtt.h"
|
||||||
|
|
||||||
/* Pre-declarations needed for WSI entrypoints */
|
/* Pre-declarations needed for WSI entrypoints */
|
||||||
struct wl_surface;
|
struct wl_surface;
|
||||||
@@ -846,13 +847,7 @@ struct radv_device {
|
|||||||
struct u_cnd_monotonic timeline_cond;
|
struct u_cnd_monotonic timeline_cond;
|
||||||
|
|
||||||
/* Thread trace. */
|
/* Thread trace. */
|
||||||
struct radeon_cmdbuf *thread_trace_start_cs[2];
|
struct ac_thread_trace_data thread_trace;
|
||||||
struct radeon_cmdbuf *thread_trace_stop_cs[2];
|
|
||||||
struct radeon_winsys_bo *thread_trace_bo;
|
|
||||||
void *thread_trace_ptr;
|
|
||||||
uint32_t thread_trace_buffer_size;
|
|
||||||
int thread_trace_start_frame;
|
|
||||||
char *thread_trace_trigger_file;
|
|
||||||
|
|
||||||
/* Trap handler. */
|
/* Trap handler. */
|
||||||
struct radv_shader_variant *trap_handler_shader;
|
struct radv_shader_variant *trap_handler_shader;
|
||||||
|
@@ -42,7 +42,7 @@ radv_thread_trace_get_data_offset(struct radv_device *device, unsigned se)
|
|||||||
|
|
||||||
data_offset = align64(sizeof(struct radv_thread_trace_info) * 4,
|
data_offset = align64(sizeof(struct radv_thread_trace_info) * 4,
|
||||||
1 << SQTT_BUFFER_ALIGN_SHIFT);
|
1 << SQTT_BUFFER_ALIGN_SHIFT);
|
||||||
data_offset += device->thread_trace_buffer_size * se;
|
data_offset += device->thread_trace.buffer_size * se;
|
||||||
|
|
||||||
return data_offset;
|
return data_offset;
|
||||||
}
|
}
|
||||||
@@ -50,14 +50,14 @@ radv_thread_trace_get_data_offset(struct radv_device *device, unsigned se)
|
|||||||
static uint64_t
|
static uint64_t
|
||||||
radv_thread_trace_get_info_va(struct radv_device *device, unsigned se)
|
radv_thread_trace_get_info_va(struct radv_device *device, unsigned se)
|
||||||
{
|
{
|
||||||
uint64_t va = radv_buffer_get_va(device->thread_trace_bo);
|
uint64_t va = radv_buffer_get_va(device->thread_trace.bo);
|
||||||
return va + radv_thread_trace_get_info_offset(se);
|
return va + radv_thread_trace_get_info_offset(se);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t
|
static uint64_t
|
||||||
radv_thread_trace_get_data_va(struct radv_device *device, unsigned se)
|
radv_thread_trace_get_data_va(struct radv_device *device, unsigned se)
|
||||||
{
|
{
|
||||||
uint64_t va = radv_buffer_get_va(device->thread_trace_bo);
|
uint64_t va = radv_buffer_get_va(device->thread_trace.bo);
|
||||||
return va + radv_thread_trace_get_data_offset(device, se);
|
return va + radv_thread_trace_get_data_offset(device, se);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -66,7 +66,7 @@ radv_emit_thread_trace_start(struct radv_device *device,
|
|||||||
struct radeon_cmdbuf *cs,
|
struct radeon_cmdbuf *cs,
|
||||||
uint32_t queue_family_index)
|
uint32_t queue_family_index)
|
||||||
{
|
{
|
||||||
uint32_t shifted_size = device->thread_trace_buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
|
uint32_t shifted_size = device->thread_trace.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
|
||||||
unsigned max_se = device->physical_device->rad_info.max_se;
|
unsigned max_se = device->physical_device->rad_info.max_se;
|
||||||
|
|
||||||
assert(device->physical_device->rad_info.chip_class >= GFX8);
|
assert(device->physical_device->rad_info.chip_class >= GFX8);
|
||||||
@@ -412,80 +412,80 @@ radv_thread_trace_init_cs(struct radv_device *device)
|
|||||||
|
|
||||||
/* Thread trace start CS. */
|
/* Thread trace start CS. */
|
||||||
for (int family = 0; family < 2; ++family) {
|
for (int family = 0; family < 2; ++family) {
|
||||||
device->thread_trace_start_cs[family] = ws->cs_create(ws, family);
|
device->thread_trace.start_cs[family] = ws->cs_create(ws, family);
|
||||||
if (!device->thread_trace_start_cs[family])
|
if (!device->thread_trace.start_cs[family])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
switch (family) {
|
switch (family) {
|
||||||
case RADV_QUEUE_GENERAL:
|
case RADV_QUEUE_GENERAL:
|
||||||
radeon_emit(device->thread_trace_start_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
radeon_emit(device->thread_trace.start_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
||||||
radeon_emit(device->thread_trace_start_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
|
radeon_emit(device->thread_trace.start_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
|
||||||
radeon_emit(device->thread_trace_start_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
|
radeon_emit(device->thread_trace.start_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
|
||||||
break;
|
break;
|
||||||
case RADV_QUEUE_COMPUTE:
|
case RADV_QUEUE_COMPUTE:
|
||||||
radeon_emit(device->thread_trace_start_cs[family], PKT3(PKT3_NOP, 0, 0));
|
radeon_emit(device->thread_trace.start_cs[family], PKT3(PKT3_NOP, 0, 0));
|
||||||
radeon_emit(device->thread_trace_start_cs[family], 0);
|
radeon_emit(device->thread_trace.start_cs[family], 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
radv_cs_add_buffer(ws, device->thread_trace_start_cs[family],
|
radv_cs_add_buffer(ws, device->thread_trace.start_cs[family],
|
||||||
device->thread_trace_bo);
|
device->thread_trace.bo);
|
||||||
|
|
||||||
/* Make sure to wait-for-idle before starting SQTT. */
|
/* Make sure to wait-for-idle before starting SQTT. */
|
||||||
radv_emit_wait_for_idle(device,
|
radv_emit_wait_for_idle(device,
|
||||||
device->thread_trace_start_cs[family],
|
device->thread_trace.start_cs[family],
|
||||||
family);
|
family);
|
||||||
|
|
||||||
/* Enable SQG events that collects thread trace data. */
|
/* Enable SQG events that collects thread trace data. */
|
||||||
radv_emit_spi_config_cntl(device,
|
radv_emit_spi_config_cntl(device,
|
||||||
device->thread_trace_start_cs[family],
|
device->thread_trace.start_cs[family],
|
||||||
true);
|
true);
|
||||||
|
|
||||||
radv_emit_thread_trace_start(device,
|
radv_emit_thread_trace_start(device,
|
||||||
device->thread_trace_start_cs[family],
|
device->thread_trace.start_cs[family],
|
||||||
family);
|
family);
|
||||||
|
|
||||||
result = ws->cs_finalize(device->thread_trace_start_cs[family]);
|
result = ws->cs_finalize(device->thread_trace.start_cs[family]);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Thread trace stop CS. */
|
/* Thread trace stop CS. */
|
||||||
for (int family = 0; family < 2; ++family) {
|
for (int family = 0; family < 2; ++family) {
|
||||||
device->thread_trace_stop_cs[family] = ws->cs_create(ws, family);
|
device->thread_trace.stop_cs[family] = ws->cs_create(ws, family);
|
||||||
if (!device->thread_trace_stop_cs[family])
|
if (!device->thread_trace.stop_cs[family])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
switch (family) {
|
switch (family) {
|
||||||
case RADV_QUEUE_GENERAL:
|
case RADV_QUEUE_GENERAL:
|
||||||
radeon_emit(device->thread_trace_stop_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
radeon_emit(device->thread_trace.stop_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
||||||
radeon_emit(device->thread_trace_stop_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
|
radeon_emit(device->thread_trace.stop_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
|
||||||
radeon_emit(device->thread_trace_stop_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
|
radeon_emit(device->thread_trace.stop_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
|
||||||
break;
|
break;
|
||||||
case RADV_QUEUE_COMPUTE:
|
case RADV_QUEUE_COMPUTE:
|
||||||
radeon_emit(device->thread_trace_stop_cs[family], PKT3(PKT3_NOP, 0, 0));
|
radeon_emit(device->thread_trace.stop_cs[family], PKT3(PKT3_NOP, 0, 0));
|
||||||
radeon_emit(device->thread_trace_stop_cs[family], 0);
|
radeon_emit(device->thread_trace.stop_cs[family], 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
radv_cs_add_buffer(ws, device->thread_trace_stop_cs[family],
|
radv_cs_add_buffer(ws, device->thread_trace.stop_cs[family],
|
||||||
device->thread_trace_bo);
|
device->thread_trace.bo);
|
||||||
|
|
||||||
/* Make sure to wait-for-idle before stopping SQTT. */
|
/* Make sure to wait-for-idle before stopping SQTT. */
|
||||||
radv_emit_wait_for_idle(device,
|
radv_emit_wait_for_idle(device,
|
||||||
device->thread_trace_stop_cs[family],
|
device->thread_trace.stop_cs[family],
|
||||||
family);
|
family);
|
||||||
|
|
||||||
radv_emit_thread_trace_stop(device,
|
radv_emit_thread_trace_stop(device,
|
||||||
device->thread_trace_stop_cs[family],
|
device->thread_trace.stop_cs[family],
|
||||||
family);
|
family);
|
||||||
|
|
||||||
/* Restore previous state by disabling SQG events. */
|
/* Restore previous state by disabling SQG events. */
|
||||||
radv_emit_spi_config_cntl(device,
|
radv_emit_spi_config_cntl(device,
|
||||||
device->thread_trace_stop_cs[family],
|
device->thread_trace.stop_cs[family],
|
||||||
false);
|
false);
|
||||||
|
|
||||||
result = ws->cs_finalize(device->thread_trace_stop_cs[family]);
|
result = ws->cs_finalize(device->thread_trace.stop_cs[family]);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -500,25 +500,25 @@ radv_thread_trace_init_bo(struct radv_device *device)
|
|||||||
/* The buffer size and address need to be aligned in HW regs. Align the
|
/* The buffer size and address need to be aligned in HW regs. Align the
|
||||||
* size as early as possible so that we do all the allocation & addressing
|
* size as early as possible so that we do all the allocation & addressing
|
||||||
* correctly. */
|
* correctly. */
|
||||||
device->thread_trace_buffer_size = align64(device->thread_trace_buffer_size,
|
device->thread_trace.buffer_size = align64(device->thread_trace.buffer_size,
|
||||||
1u << SQTT_BUFFER_ALIGN_SHIFT);
|
1u << SQTT_BUFFER_ALIGN_SHIFT);
|
||||||
|
|
||||||
/* Compute total size of the thread trace BO for 4 SEs. */
|
/* Compute total size of the thread trace BO for 4 SEs. */
|
||||||
size = align64(sizeof(struct radv_thread_trace_info) * 4,
|
size = align64(sizeof(struct radv_thread_trace_info) * 4,
|
||||||
1 << SQTT_BUFFER_ALIGN_SHIFT);
|
1 << SQTT_BUFFER_ALIGN_SHIFT);
|
||||||
size += device->thread_trace_buffer_size * 4;
|
size += device->thread_trace.buffer_size * 4;
|
||||||
|
|
||||||
device->thread_trace_bo = ws->buffer_create(ws, size, 4096,
|
device->thread_trace.bo = ws->buffer_create(ws, size, 4096,
|
||||||
RADEON_DOMAIN_VRAM,
|
RADEON_DOMAIN_VRAM,
|
||||||
RADEON_FLAG_CPU_ACCESS |
|
RADEON_FLAG_CPU_ACCESS |
|
||||||
RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
||||||
RADEON_FLAG_ZERO_VRAM,
|
RADEON_FLAG_ZERO_VRAM,
|
||||||
RADV_BO_PRIORITY_SCRATCH);
|
RADV_BO_PRIORITY_SCRATCH);
|
||||||
if (!device->thread_trace_bo)
|
if (!device->thread_trace.bo)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
device->thread_trace_ptr = ws->buffer_map(device->thread_trace_bo);
|
device->thread_trace.ptr = ws->buffer_map(device->thread_trace.bo);
|
||||||
if (!device->thread_trace_ptr)
|
if (!device->thread_trace.ptr)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@@ -539,14 +539,14 @@ radv_thread_trace_finish(struct radv_device *device)
|
|||||||
{
|
{
|
||||||
struct radeon_winsys *ws = device->ws;
|
struct radeon_winsys *ws = device->ws;
|
||||||
|
|
||||||
if (unlikely(device->thread_trace_bo))
|
if (unlikely(device->thread_trace.bo))
|
||||||
ws->buffer_destroy(device->thread_trace_bo);
|
ws->buffer_destroy(device->thread_trace.bo);
|
||||||
|
|
||||||
for (unsigned i = 0; i < 2; i++) {
|
for (unsigned i = 0; i < 2; i++) {
|
||||||
if (device->thread_trace_start_cs[i])
|
if (device->thread_trace.start_cs[i])
|
||||||
ws->cs_destroy(device->thread_trace_start_cs[i]);
|
ws->cs_destroy(device->thread_trace.start_cs[i]);
|
||||||
if (device->thread_trace_stop_cs[i])
|
if (device->thread_trace.stop_cs[i])
|
||||||
ws->cs_destroy(device->thread_trace_stop_cs[i]);
|
ws->cs_destroy(device->thread_trace.stop_cs[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -554,7 +554,7 @@ bool
|
|||||||
radv_begin_thread_trace(struct radv_queue *queue)
|
radv_begin_thread_trace(struct radv_queue *queue)
|
||||||
{
|
{
|
||||||
int family = queue->queue_family_index;
|
int family = queue->queue_family_index;
|
||||||
struct radeon_cmdbuf *cs = queue->device->thread_trace_start_cs[family];
|
struct radeon_cmdbuf *cs = queue->device->thread_trace.start_cs[family];
|
||||||
return radv_queue_internal_submit(queue, cs);
|
return radv_queue_internal_submit(queue, cs);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -562,7 +562,7 @@ bool
|
|||||||
radv_end_thread_trace(struct radv_queue *queue)
|
radv_end_thread_trace(struct radv_queue *queue)
|
||||||
{
|
{
|
||||||
int family = queue->queue_family_index;
|
int family = queue->queue_family_index;
|
||||||
struct radeon_cmdbuf *cs = queue->device->thread_trace_stop_cs[family];
|
struct radeon_cmdbuf *cs = queue->device->thread_trace.stop_cs[family];
|
||||||
return radv_queue_internal_submit(queue, cs);
|
return radv_queue_internal_submit(queue, cs);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -602,7 +602,7 @@ radv_get_thread_trace(struct radv_queue *queue,
|
|||||||
{
|
{
|
||||||
struct radv_device *device = queue->device;
|
struct radv_device *device = queue->device;
|
||||||
unsigned max_se = device->physical_device->rad_info.max_se;
|
unsigned max_se = device->physical_device->rad_info.max_se;
|
||||||
void *thread_trace_ptr = device->thread_trace_ptr;
|
void *thread_trace_ptr = device->thread_trace.ptr;
|
||||||
|
|
||||||
memset(thread_trace, 0, sizeof(*thread_trace));
|
memset(thread_trace, 0, sizeof(*thread_trace));
|
||||||
thread_trace->num_traces = max_se;
|
thread_trace->num_traces = max_se;
|
||||||
|
Reference in New Issue
Block a user