From 3e7bac80ce5cc1de7d13dfc6ae17526a03ddffcc Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 31 May 2021 16:42:36 +0200 Subject: [PATCH] ac/rgp: add support for dumping SPM data Signed-off-by: Samuel Pitoiset Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/common/ac_rgp.c | 120 +++++++++++++++++++++++- src/amd/common/ac_rgp.h | 4 +- src/amd/vulkan/layers/radv_sqtt_layer.c | 2 +- src/gallium/drivers/radeonsi/si_sqtt.c | 2 +- 4 files changed, 123 insertions(+), 5 deletions(-) diff --git a/src/amd/common/ac_rgp.c b/src/amd/common/ac_rgp.c index 85fbff0f837..24ea488cd5b 100644 --- a/src/amd/common/ac_rgp.c +++ b/src/amd/common/ac_rgp.c @@ -29,6 +29,7 @@ #include "util/u_process.h" #include "util/u_math.h" +#include "ac_spm.h" #include "ac_sqtt.h" #include "ac_gpu_info.h" #ifdef _WIN32 @@ -892,8 +893,118 @@ static enum elf_gfxip_level ac_chip_class_to_elf_gfxip_level(enum chip_class chi } } +/** + * SQTT SPM DB info. + */ +struct sqtt_spm_counter_info { + enum ac_pc_gpu_block block; + uint32_t instance; + uint32_t data_offset; /* offset of counter from the beginning of the chunk */ + uint32_t event_index; /* index of counter within the block */ +}; + +struct sqtt_file_chunk_spm_db { + struct sqtt_file_chunk_header header; + uint32_t flags; + uint32_t num_timestamps; + uint32_t num_spm_counter_info; + uint32_t sample_interval; +}; + +static_assert(sizeof(struct sqtt_file_chunk_spm_db) == 32, + "sqtt_file_chunk_spm_db doesn't match RGP spec"); + +static void ac_sqtt_fill_spm_db(const struct ac_spm_trace_data *spm_trace, + struct sqtt_file_chunk_spm_db *chunk, + uint32_t num_samples, + uint32_t chunk_size) +{ + chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_SPM_DB; + chunk->header.chunk_id.index = 0; + chunk->header.major_version = 1; + chunk->header.minor_version = 3; + chunk->header.size_in_bytes = chunk_size; + + chunk->flags = 0; + chunk->num_timestamps = num_samples; + chunk->num_spm_counter_info = spm_trace->num_counters; + chunk->sample_interval = spm_trace->sample_interval; +} + +static void ac_sqtt_dump_spm(const struct ac_spm_trace_data *spm_trace, + size_t file_offset, + FILE *output) +{ + uint32_t sample_size_in_bytes = ac_spm_get_sample_size(spm_trace); + uint32_t num_samples = ac_spm_get_num_samples(spm_trace); + uint8_t *spm_data_ptr = (uint8_t *)spm_trace->ptr; + struct sqtt_file_chunk_spm_db spm_db; + size_t file_spm_db_offset = file_offset; + + fseek(output, sizeof(struct sqtt_file_chunk_spm_db), SEEK_CUR); + file_offset += sizeof(struct sqtt_file_chunk_spm_db); + + /* Skip the reserved 32 bytes of data at beginning. */ + spm_data_ptr += 32; + + /* SPM timestamps. */ + uint32_t sample_size_in_qwords = sample_size_in_bytes / sizeof(uint64_t); + uint64_t *timestamp_ptr = (uint64_t *)spm_data_ptr; + + for (uint32_t s = 0; s < num_samples; s++) { + uint64_t index = s * sample_size_in_qwords; + uint64_t timestamp = timestamp_ptr[index]; + + file_offset += sizeof(timestamp); + fwrite(×tamp, sizeof(timestamp), 1, output); + } + + /* SPM counter info. */ + uint64_t counter_values_size = num_samples * sizeof(uint16_t); + uint64_t counter_values_offset = num_samples * sizeof(uint64_t) + + spm_trace->num_counters * sizeof(struct sqtt_spm_counter_info); + + for (uint32_t c = 0; c < spm_trace->num_counters; c++) { + struct sqtt_spm_counter_info cntr_info = { + .block = spm_trace->counters[c].gpu_block, + .instance = spm_trace->counters[c].instance, + .data_offset = counter_values_offset, + .event_index = spm_trace->counters[c].event_id, + }; + + file_offset += sizeof(cntr_info); + fwrite(&cntr_info, sizeof(cntr_info), 1, output); + + counter_values_offset += counter_values_size; + } + + /* SPM counter values. */ + uint32_t sample_size_in_hwords = sample_size_in_bytes / sizeof(uint16_t); + uint16_t *counter_values_ptr = (uint16_t *)spm_data_ptr; + + for (uint32_t c = 0; c < spm_trace->num_counters; c++) { + uint64_t offset = spm_trace->counters[c].offset; + + for (uint32_t s = 0; s < num_samples; s++) { + uint64_t index = offset + (s * sample_size_in_hwords); + uint16_t value = counter_values_ptr[index]; + + file_offset += sizeof(value); + fwrite(&value, sizeof(value), 1, output); + } + } + + /* SQTT SPM DB chunk. */ + ac_sqtt_fill_spm_db(spm_trace, &spm_db, num_samples, + file_offset - file_spm_db_offset); + fseek(output, file_spm_db_offset, SEEK_SET); + fwrite(&spm_db, sizeof(struct sqtt_file_chunk_spm_db), 1, output); + fseek(output, file_offset, SEEK_SET); +} + static void ac_sqtt_dump_data(struct radeon_info *rad_info, struct ac_thread_trace *thread_trace, + const struct ac_spm_trace_data *spm_trace, FILE *output) { struct ac_thread_trace_data *thread_trace_data = thread_trace->data; @@ -1071,10 +1182,15 @@ static void ac_sqtt_dump_data(struct radeon_info *rad_info, fwrite(se->data_ptr, size, 1, output); } } + + if (spm_trace) { + ac_sqtt_dump_spm(spm_trace, file_offset, output); + } } int ac_dump_rgp_capture(struct radeon_info *info, - struct ac_thread_trace *thread_trace) + struct ac_thread_trace *thread_trace, + const struct ac_spm_trace_data *spm_trace) { char filename[2048]; struct tm now; @@ -1092,7 +1208,7 @@ int ac_dump_rgp_capture(struct radeon_info *info, if (!f) return -1; - ac_sqtt_dump_data(info, thread_trace, f); + ac_sqtt_dump_data(info, thread_trace, spm_trace, f); fprintf(stderr, "RGP capture saved to '%s'\n", filename); diff --git a/src/amd/common/ac_rgp.h b/src/amd/common/ac_rgp.h index 9e38556a090..b53bb02de81 100644 --- a/src/amd/common/ac_rgp.h +++ b/src/amd/common/ac_rgp.h @@ -34,6 +34,7 @@ struct radeon_info; struct ac_thread_trace; struct ac_thread_trace_data; +struct ac_spm_trace_data; enum rgp_hardware_stages { RGP_HW_STAGE_VS = 0, @@ -189,7 +190,8 @@ struct rgp_clock_calibration { int ac_dump_rgp_capture(struct radeon_info *info, - struct ac_thread_trace *thread_trace); + struct ac_thread_trace *thread_trace, + const struct ac_spm_trace_data *spm_trace); void ac_rgp_file_write_elf_object(FILE *output, size_t file_elf_start, diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c index 2b9623a811f..5c8cda63809 100644 --- a/src/amd/vulkan/layers/radv_sqtt_layer.c +++ b/src/amd/vulkan/layers/radv_sqtt_layer.c @@ -363,7 +363,7 @@ radv_handle_thread_trace(VkQueue _queue) radv_QueueWaitIdle(_queue); if (radv_get_thread_trace(queue, &thread_trace)) { - ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace); + ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace, NULL); } else { /* Trigger a new capture if the driver failed to get * the trace because the buffer was too small. diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c index a5eb4f5851c..cad12160c14 100644 --- a/src/gallium/drivers/radeonsi/si_sqtt.c +++ b/src/gallium/drivers/radeonsi/si_sqtt.c @@ -710,7 +710,7 @@ si_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs) /* Wait for SQTT to finish and read back the bo */ if (sctx->ws->fence_wait(sctx->ws, sctx->last_sqtt_fence, PIPE_TIMEOUT_INFINITE) && si_get_thread_trace(sctx, &thread_trace)) { - ac_dump_rgp_capture(&sctx->screen->info, &thread_trace); + ac_dump_rgp_capture(&sctx->screen->info, &thread_trace, NULL); } else { fprintf(stderr, "Failed to read the trace\n"); }