diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build index a45e5298f16..07596a67c5d 100644 --- a/src/gallium/drivers/radeonsi/meson.build +++ b/src/gallium/drivers/radeonsi/meson.build @@ -87,6 +87,8 @@ files_libradeonsi = files( 'si_test_image_copy_region.c', 'si_test_dma_perf.c', 'si_texture.c', + 'si_utrace.c', + 'si_utrace.h', 'si_uvd.c', 'pspdecryptionparam.h', 'radeon_temporal.h', diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 42655e31d25..811d871b2c9 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -590,6 +590,15 @@ void si_trace_emit(struct si_context *sctx) u_log_flush(sctx->log); } +/* timestamp logging for u_trace: */ +void si_emit_ts(struct si_context *sctx, struct si_resource* buffer, unsigned int offset) +{ + struct radeon_cmdbuf *cs = &sctx->gfx_cs; + uint64_t va = buffer->gpu_address + offset; + si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, + EOP_DATA_SEL_TIMESTAMP, NULL, va, 0, PIPE_QUERY_TIMESTAMP); +} + void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned cp_coher_cntl) { bool compute_ib = !sctx->has_graphics; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index c3950844acd..48bc6eed439 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -15,6 +15,7 @@ #include "util/u_suballoc.h" #include "util/u_threaded_context.h" #include "util/u_vertex_state_cache.h" +#include "util/perf/u_trace.h" #include "ac_sqtt.h" #include "ac_spm.h" #include "si_perfetto.h" @@ -1363,7 +1364,13 @@ struct si_context { /* Only used for DCC MSAA clears with 4-8 fragments and 4-16 samples. */ void *cs_clear_dcc_msaa[32][5][2][3][2]; /* [swizzle_mode][log2(bpe)][fragments == 8][log2(samples)-2][is_array] */ + /* u_trace logging*/ struct si_ds_device ds; + /** Where tracepoints are recorded */ + struct u_trace trace; + struct si_ds_queue ds_queue; + uint32_t *last_timestamp_cmd; + unsigned int last_timestamp_cmd_cdw; }; /* si_blit.c */ @@ -1556,6 +1563,7 @@ void si_allocate_gds(struct si_context *ctx); void si_set_tracked_regs_to_clear_state(struct si_context *ctx); void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs); void si_trace_emit(struct si_context *sctx); +void si_emit_ts(struct si_context *sctx, struct si_resource* buffer, unsigned int offset); void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned cp_coher_cntl); void gfx10_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs); diff --git a/src/gallium/drivers/radeonsi/si_utrace.c b/src/gallium/drivers/radeonsi/si_utrace.c new file mode 100644 index 00000000000..9e1a1de860d --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_utrace.c @@ -0,0 +1,78 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: MIT + */ + +#include "si_utrace.h" +#include "si_perfetto.h" +#include "amd/common/ac_gpu_info.h" + +#include "util/u_trace_gallium.h" +#include "util/hash_table.h" + + +static void si_utrace_record_ts(struct u_trace *trace, void *cs, void *timestamps, unsigned idx, bool end_of_pipe) +{ + struct si_context *ctx = container_of(trace, struct si_context, trace); + struct pipe_resource *buffer = timestamps; + struct si_resource *ts_bo = si_resource(buffer); + + if (ctx->gfx_cs.current.buf == ctx->last_timestamp_cmd && ctx->gfx_cs.current.cdw == ctx->last_timestamp_cmd_cdw) { + uint64_t *ts = si_buffer_map(ctx, ts_bo, PIPE_MAP_READ); + ts[idx] = U_TRACE_NO_TIMESTAMP; + return; + } + + unsigned ts_offset = idx * sizeof(uint64_t); + + si_emit_ts(ctx, ts_bo, ts_offset); + ctx->last_timestamp_cmd = ctx->gfx_cs.current.buf; + ctx->last_timestamp_cmd_cdw = ctx->gfx_cs.current.cdw; +} + +static uint64_t si_utrace_read_ts(struct u_trace_context *utctx, void *timestamps, unsigned idx, void *flush_data) +{ + struct si_context *ctx = container_of(utctx, struct si_context, ds.trace_context); + struct pipe_resource *buffer = timestamps; + + uint64_t *ts = si_buffer_map(ctx, si_resource(buffer), PIPE_MAP_READ); + + /* Don't translate the no-timestamp marker: */ + if (ts[idx] == U_TRACE_NO_TIMESTAMP) + return U_TRACE_NO_TIMESTAMP; + + return (1000000 * ts[idx]) / ctx->screen->info.clock_crystal_freq; +} + +static void si_utrace_delete_flush_data(struct u_trace_context *utctx, void *flush_data) +{ + free(flush_data); +} + +void si_utrace_init(struct si_context *sctx) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%u:%u:%u:%u:%u", sctx->screen->info.pci.domain, + sctx->screen->info.pci.bus, sctx->screen->info.pci.dev, + sctx->screen->info.pci.func, sctx->screen->info.pci_id); + uint32_t gpu_id = _mesa_hash_string(buf); + + si_ds_device_init(&sctx->ds, &sctx->screen->info, gpu_id, AMD_DS_API_OPENGL); + u_trace_pipe_context_init(&sctx->ds.trace_context, &sctx->b, si_utrace_record_ts, + si_utrace_read_ts, si_utrace_delete_flush_data); + + si_ds_device_init_queue(&sctx->ds, &sctx->ds_queue, "%s", "render"); +} + +void si_utrace_fini(struct si_context *sctx) +{ + si_ds_device_fini(&sctx->ds); +} + +void si_utrace_flush(struct si_context *sctx, uint64_t submission_id) +{ + struct si_ds_flush_data *flush_data = malloc(sizeof(*flush_data)); + si_ds_flush_data_init(flush_data, &sctx->ds_queue, submission_id); + u_trace_flush(&sctx->trace, flush_data, false); +} diff --git a/src/gallium/drivers/radeonsi/si_utrace.h b/src/gallium/drivers/radeonsi/si_utrace.h new file mode 100644 index 00000000000..b7609b42481 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_utrace.h @@ -0,0 +1,18 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: MIT + */ + +#ifndef SI_UTRACE_H +#define SI_UTRACE_H + +#include +#include "si_pipe.h" + +void si_utrace_init(struct si_context *sctx); +void si_utrace_fini(struct si_context *sctx); + +void si_utrace_flush(struct si_context *sctx, uint64_t submission_id); + +#endif