freedreno: add fd_rd_output facilities for gzip-compressed RD dumps

Provide fd_rd_output facilities which enable constructing RD dumps that are
stored into gzip-compressed output. This matches the default behavior of
libwrap. Enabling and adjusting the behavior of functionality is done
through the FD_RD_DUMP environment variable.

Integration into Turnip's MSM backend is covered, replacing the previous
RD dump that was enabled through TU_DEBUG=rd. That debug option still
works and is the same as using FD_RD_DUMP=enable.

By default the dumps are created for each submission, using the provided
submit index. FD_RD_DUMP=combine enables gathering dumps for submissions
for the given logical device into a single file.

In the Turnip integration, FD_RD_DUMP=full will force dumping contents of
any buffer object. Additionally, with that option enabled any previous
submit will be waited on.

Specifying FD_RD_DUMP=trigger sets up a trigger file that can be used to
activate dumping manually. Writing zero or some non-integer value to the
file will disable dumping. Writing a positive integer value to it will
enable dumps for that many future submissions. Writing -1 to it will enable
dumps until disabled.

Signed-off-by: Zan Dobersek <zdobersek@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27230>
This commit is contained in:
Zan Dobersek
2024-01-24 07:55:56 +01:00
parent 0a97d1ebfa
commit f9c4e25483
8 changed files with 416 additions and 33 deletions

View File

@@ -369,9 +369,8 @@ Command Stream Capture
^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^
During Mesa development, it's often useful to look at the command streams we During Mesa development, it's often useful to look at the command streams we
send to the kernel. Mesa itself doesn't implement a way to stream them out send to the kernel. We have an interface for the kernel to capture all
(though it maybe should!). Instead, we have an interface for the kernel to submitted command streams:
capture all submitted command streams:
.. code-block:: sh .. code-block:: sh
@@ -391,6 +390,28 @@ probably want to cause a crash in the GPU during a frame of interest so that a
single GPU core dump is generated. Emitting ``0xdeadbeef`` in the CS should be single GPU core dump is generated. Emitting ``0xdeadbeef`` in the CS should be
enough to cause a fault. enough to cause a fault.
``fd_rd_output`` facilities provide support for generating the command stream
capture from inside Mesa. Different ``FD_RD_DUMP`` options are available:
- ``enable`` simply enables dumping the command stream on each submit for a
given logical device. When a more advanced option is specified, ``enable`` is
implied as specified.
- ``combine`` will combine all dumps into a single file instead of writing the
dump for each submit into a standalone file.
- ``full`` will dump every buffer object, which is necessary for replays of
command streams (see below).
- ``trigger`` will establish a trigger file through which dumps can be better
controlled. Writing a positive integer value into the file will enable dumping
of that many subsequent submits. Writing -1 will enable dumping of submits
until disabled. Writing 0 (or any other value) will disable dumps.
Output dump files and trigger file (when enabled) are hard-coded to be placed
under ``/tmp``, or ``/data/local/tmp`` under Android.
Functionality is generic to any Freedreno-based backend, but is currently only
integrated in the MSM backend of Turnip. Using the existing ``TU_DEBUG=rd``
option will translate to ``FD_RD_DUMP=enable``.
Capturing Hang RD Capturing Hang RD
+++++++++++++++++ +++++++++++++++++

View File

@@ -0,0 +1,250 @@
/*
* Copyright © 2024 Igalia S.L.
* SPDX-License-Identifier: MIT
*/
#include "freedreno_rd_output.h"
#include <assert.h>
#include <ctype.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
#include "c11/threads.h"
#include "util/log.h"
#include "util/u_atomic.h"
#include "util/u_debug.h"
#ifdef ANDROID
static const char *fd_rd_output_base_path = "/data/local/tmp";
#else
static const char *fd_rd_output_base_path = "/tmp";
#endif
static const struct debug_control fd_rd_dump_options[] = {
{ "enable", FD_RD_DUMP_ENABLE },
{ "combine", FD_RD_DUMP_COMBINE },
{ "full", FD_RD_DUMP_FULL },
{ "trigger", FD_RD_DUMP_TRIGGER },
{ NULL, 0 }
};
struct fd_rd_dump_env fd_rd_dump_env;
static void
fd_rd_dump_env_init_once(void)
{
fd_rd_dump_env.flags = parse_debug_string(os_get_option("FD_RD_DUMP"),
fd_rd_dump_options);
/* If any of the more-detailed FD_RD_DUMP flags is enabled, the general
* FD_RD_DUMP_ENABLE flag should also implicitly be set.
*/
if (fd_rd_dump_env.flags & ~FD_RD_DUMP_ENABLE)
fd_rd_dump_env.flags |= FD_RD_DUMP_ENABLE;
}
void
fd_rd_dump_env_init(void)
{
static once_flag once = ONCE_FLAG_INIT;
call_once(&once, fd_rd_dump_env_init_once);
}
static void
fd_rd_output_sanitize_name(char *name)
{
/* The name string is null-terminated after being constructed via snprintf.
* Sanitize it by reducing to an underscore anything that's not a hyphen,
* underscore, dot or alphanumeric character.
*/
for (char *s = name; *s; ++s) {
if (isalnum(*s) || *s == '-' || *s == '_' || *s == '.')
continue;
*s = '_';
}
}
void
fd_rd_output_init(struct fd_rd_output *output, char* output_name)
{
snprintf(output->name, sizeof(output->name), "%s", output_name);
fd_rd_output_sanitize_name(output->name);
output->combine = false;
output->file = NULL;
output->trigger_fd = -1;
output->trigger_count = 0;
if (FD_RD_DUMP(COMBINE)) {
output->combine = true;
char file_path[256];
snprintf(file_path, sizeof(file_path), "%s/%s_combined.rd",
fd_rd_output_base_path, output->name);
output->file = gzopen(file_path, "w");
}
if (FD_RD_DUMP(TRIGGER)) {
char file_path[256];
snprintf(file_path, sizeof(file_path), "%s/%s_trigger",
fd_rd_output_base_path, output->name);
output->trigger_fd = open(file_path, O_RDWR | O_CREAT | O_TRUNC, 0600);
}
}
void
fd_rd_output_fini(struct fd_rd_output *output)
{
if (output->file != NULL) {
assert(output->combine);
gzclose(output->file);
}
if (output->trigger_fd >= 0) {
close(output->trigger_fd);
/* Remove the trigger file. The filename is reconstructed here
* instead of having to spend memory to store it in the struct.
*/
char file_path[256];
snprintf(file_path, sizeof(file_path), "%s/%s_trigger",
fd_rd_output_base_path, output->name);
unlink(file_path);
}
}
static void
fd_rd_output_update_trigger_count(struct fd_rd_output *output)
{
assert(FD_RD_DUMP(TRIGGER));
/* Retrieve the trigger file size, only attempt to update the trigger
* value if anything was actually written to that file.
*/
struct stat stat;
if (fstat(output->trigger_fd, &stat) != 0) {
mesa_loge("[fd_rd_output] failed to acccess the %s trigger file",
output->name);
return;
}
if (stat.st_size == 0)
return;
char trigger_data[32];
int ret = read(output->trigger_fd, trigger_data, sizeof(trigger_data));
if (ret < 0) {
mesa_loge("[fd_rd_output] failed to read from the %s trigger file",
output->name);
return;
}
int num_read = MIN2(ret, sizeof(trigger_data) - 1);
/* After reading from it, the trigger file should be reset, which means
* moving the file offset to the start of the file as well as truncating
* it to zero bytes.
*/
if (lseek(output->trigger_fd, 0, SEEK_SET) < 0) {
mesa_loge("[fd_rd_output] failed to reset the %s trigger file position",
output->name);
return;
}
if (ftruncate(output->trigger_fd, 0) < 0) {
mesa_loge("[fd_rd_output] failed to truncate the %s trigger file",
output->name);
return;
}
/* Try to decode the count value through strtol. -1 translates to UINT_MAX
* and keeps generating dumps until disabled. Any positive value will
* allow generating dumps for that many submits. Any other value will
* disable any further generation of RD dumps.
*/
trigger_data[num_read] = '\0';
int32_t value = strtol(trigger_data, NULL, 0);
if (value == -1) {
output->trigger_count = UINT_MAX;
mesa_logi("[fd_rd_output] %s trigger enabling RD dumps until disabled",
output->name);
} else if (value > 0) {
output->trigger_count = (uint32_t) value;
mesa_logi("[fd_rd_output] %s trigger enabling RD dumps for next %u submissions",
output->name, output->trigger_count);
} else {
output->trigger_count = 0;
mesa_logi("[fd_rd_output] %s trigger disabling RD dumps", output->name);
}
}
bool
fd_rd_output_begin(struct fd_rd_output *output, uint32_t submit_idx)
{
assert(output->combine ^ (output->file == NULL));
if (FD_RD_DUMP(TRIGGER)) {
fd_rd_output_update_trigger_count(output);
if (output->trigger_count == 0)
return false;
/* UINT_MAX corresponds to generating dumps until disabled. */
if (output->trigger_count != UINT_MAX)
--output->trigger_count;
}
if (output->combine)
return true;
char file_path[256];
snprintf(file_path, sizeof(file_path), "%s/%s_%.5d.rd",
fd_rd_output_base_path, output->name, submit_idx);
output->file = gzopen(file_path, "w");
return true;
}
static void
fd_rd_output_write(struct fd_rd_output *output, const void *buffer, int size)
{
const uint8_t *pos = (uint8_t *) buffer;
while (size > 0) {
int ret = gzwrite(output->file, pos, size);
if (ret < 0) {
mesa_loge("[fd_rd_output] failed to write to compressed output: %s",
gzerror(output->file, NULL));
return;
}
pos += ret;
size -= ret;
}
}
void
fd_rd_output_write_section(struct fd_rd_output *output, enum rd_sect_type type,
const void *buffer, int size)
{
fd_rd_output_write(output, &type, 4);
fd_rd_output_write(output, &size, 4);
fd_rd_output_write(output, buffer, size);
}
void
fd_rd_output_end(struct fd_rd_output *output)
{
assert(output->file != NULL);
/* When combining output, flush the gzip stream on each submit. This should
* store all the data before any problem during the submit itself occurs.
*/
if (output->combine) {
gzflush(output->file, Z_FINISH);
return;
}
gzclose(output->file);
output->file = NULL;
}

View File

@@ -0,0 +1,66 @@
/*
* Copyright © 2024 Igalia S.L.
* SPDX-License-Identifier: MIT
*/
#ifndef __FREEDRENO_RD_OUTPUT_H__
#define __FREEDRENO_RD_OUTPUT_H__
#include <stdbool.h>
#include <stdint.h>
#include <zlib.h>
#include "redump.h"
#ifdef __cplusplus
extern "C" {
#endif
enum fd_rd_dump_flags {
FD_RD_DUMP_ENABLE = 1 << 0,
FD_RD_DUMP_COMBINE = 1 << 1,
FD_RD_DUMP_FULL = 1 << 2,
FD_RD_DUMP_TRIGGER = 1 << 3,
};
struct fd_rd_dump_env {
uint32_t flags;
};
extern struct fd_rd_dump_env fd_rd_dump_env;
#define FD_RD_DUMP(name) unlikely(fd_rd_dump_env.flags & FD_RD_DUMP_##name)
void
fd_rd_dump_env_init(void);
struct fd_rd_output {
char name[128];
bool combine;
gzFile file;
int trigger_fd;
uint32_t trigger_count;
};
void
fd_rd_output_init(struct fd_rd_output *output, char* output_name);
void
fd_rd_output_fini(struct fd_rd_output *output);
bool
fd_rd_output_begin(struct fd_rd_output *output, uint32_t submit_idx);
void
fd_rd_output_write_section(struct fd_rd_output *output, enum rd_sect_type type,
const void *buffer, int size);
void
fd_rd_output_end(struct fd_rd_output *output);
#ifdef __cplusplus
}
#endif
#endif /* __FREEDRENO_RD_OUTPUT_H__ */

View File

@@ -38,6 +38,8 @@ libfreedreno_common = static_library(
'freedreno_dev_info.c', 'freedreno_dev_info.c',
'freedreno_dev_info.h', 'freedreno_dev_info.h',
'freedreno_pm4.h', 'freedreno_pm4.h',
'freedreno_rd_output.c',
'freedreno_rd_output.h',
'freedreno_uuid.c', 'freedreno_uuid.c',
'freedreno_uuid.h', 'freedreno_uuid.h',
'freedreno_guardband.h', 'freedreno_guardband.h',

View File

@@ -21,6 +21,7 @@
#include "util/hex.h" #include "util/hex.h"
#include "util/driconf.h" #include "util/driconf.h"
#include "util/os_misc.h" #include "util/os_misc.h"
#include "util/u_process.h"
#include "vk_shader_module.h" #include "vk_shader_module.h"
#include "vk_sampler.h" #include "vk_sampler.h"
#include "vk_util.h" #include "vk_util.h"
@@ -2219,6 +2220,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
device->instance = physical_device->instance; device->instance = physical_device->instance;
device->physical_device = physical_device; device->physical_device = physical_device;
device->device_idx = device->physical_device->device_count++;
result = tu_drm_device_init(device); result = tu_drm_device_init(device);
if (result != VK_SUCCESS) { if (result != VK_SUCCESS) {
@@ -2492,6 +2494,26 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
tu_breadcrumbs_init(device); tu_breadcrumbs_init(device);
if (FD_RD_DUMP(ENABLE)) {
struct vk_app_info *app_info = &device->instance->vk.app_info;
const char *app_name_str = app_info->app_name ?
app_info->app_name : util_get_process_name();
const char *engine_name_str = app_info->engine_name ?
app_info->engine_name : "unknown-engine";
char app_name[64];
snprintf(app_name, sizeof(app_name), "%s", app_name_str);
char engine_name[32];
snprintf(engine_name, sizeof(engine_name), "%s", engine_name_str);
char output_name[128];
snprintf(output_name, sizeof(output_name), "tu_%s.%s_device%u",
app_name, engine_name, device->device_idx);
fd_rd_output_init(&device->rd_output, output_name);
}
*pDevice = tu_device_to_handle(device); *pDevice = tu_device_to_handle(device);
return VK_SUCCESS; return VK_SUCCESS;
@@ -2547,6 +2569,9 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
if (!device) if (!device)
return; return;
if (FD_RD_DUMP(ENABLE))
fd_rd_output_fini(&device->rd_output);
tu_breadcrumbs_finish(device); tu_breadcrumbs_finish(device);
u_trace_context_fini(&device->trace_context); u_trace_context_fini(&device->trace_context);

View File

@@ -20,6 +20,7 @@
#include "tu_suballoc.h" #include "tu_suballoc.h"
#include "tu_util.h" #include "tu_util.h"
#include "common/freedreno_rd_output.h"
#include "util/vma.h" #include "util/vma.h"
#include "util/u_vector.h" #include "util/u_vector.h"
@@ -123,6 +124,8 @@ struct tu_physical_device
struct vk_sync_type syncobj_type; struct vk_sync_type syncobj_type;
struct vk_sync_timeline_type timeline_type; struct vk_sync_timeline_type timeline_type;
const struct vk_sync_type *sync_types[3]; const struct vk_sync_type *sync_types[3];
uint32_t device_count;
}; };
VK_DEFINE_HANDLE_CASTS(tu_physical_device, vk.base, VkPhysicalDevice, VK_DEFINE_HANDLE_CASTS(tu_physical_device, vk.base, VkPhysicalDevice,
VK_OBJECT_TYPE_PHYSICAL_DEVICE) VK_OBJECT_TYPE_PHYSICAL_DEVICE)
@@ -253,6 +256,7 @@ struct tu_device
int queue_count[TU_MAX_QUEUE_FAMILIES]; int queue_count[TU_MAX_QUEUE_FAMILIES];
struct tu_physical_device *physical_device; struct tu_physical_device *physical_device;
uint32_t device_idx;
int fd; int fd;
struct ir3_compiler *compiler; struct ir3_compiler *compiler;
@@ -397,6 +401,8 @@ struct tu_device
bool use_z24uint_s8uint; bool use_z24uint_s8uint;
bool use_lrz; bool use_lrz;
struct fd_rd_output rd_output;
}; };
VK_DEFINE_HANDLE_CASTS(tu_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) VK_DEFINE_HANDLE_CASTS(tu_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)

View File

@@ -867,7 +867,7 @@ tu_queue_build_msm_gem_submit_cmds(struct tu_queue *queue,
static VkResult static VkResult
tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit) tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
{ {
queue->device->submit_count++; uint32_t submit_idx = queue->device->submit_count++;
struct tu_cs *autotune_cs = NULL; struct tu_cs *autotune_cs = NULL;
if (submit->autotune_fence) { if (submit->autotune_fence) {
@@ -910,16 +910,20 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
.syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj), .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
}; };
if (TU_DEBUG(RD)) { if (FD_RD_DUMP(ENABLE) && fd_rd_output_begin(&queue->device->rd_output, submit_idx)) {
struct tu_device *device = queue->device; struct tu_device *device = queue->device;
static uint32_t submit_idx; struct fd_rd_output *rd_output = &device->rd_output;
char path[32];
sprintf(path, "%.5d.rd", p_atomic_inc_return(&submit_idx));
int rd = open(path, O_CLOEXEC | O_WRONLY | O_CREAT | O_TRUNC, 0777);
if (rd >= 0) {
rd_write_section(rd, RD_CHIP_ID, &device->physical_device->dev_id.chip_id, 4);
rd_write_section(rd, RD_CMD, "tu-dump", 8); if (FD_RD_DUMP(FULL)) {
VkResult result = tu_wait_fence(device, queue->msm_queue_id, queue->fence, ~0);
if (result != VK_SUCCESS) {
mesa_loge("FD_RD_DUMP_FULL: wait on previous submission for device %u and queue %d failed: %u",
device->device_idx, queue->msm_queue_id, 0);
}
}
fd_rd_output_write_section(rd_output, RD_CHIP_ID, &device->physical_device->dev_id.chip_id, 4);
fd_rd_output_write_section(rd_output, RD_CMD, "tu-dump", 8);
for (unsigned i = 0; i < device->bo_count; i++) { for (unsigned i = 0; i < device->bo_count; i++) {
struct drm_msm_gem_submit_bo bo = device->bo_list[i]; struct drm_msm_gem_submit_bo bo = device->bo_list[i];
@@ -927,10 +931,10 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
uint64_t iova = bo.presumed; uint64_t iova = bo.presumed;
uint32_t buf[3] = { iova, tu_bo->size, iova >> 32 }; uint32_t buf[3] = { iova, tu_bo->size, iova >> 32 };
rd_write_section(rd, RD_GPUADDR, buf, 12); fd_rd_output_write_section(rd_output, RD_GPUADDR, buf, 12);
if (bo.flags & MSM_SUBMIT_BO_DUMP) { if (bo.flags & MSM_SUBMIT_BO_DUMP || FD_RD_DUMP(FULL)) {
msm_bo_map(device, tu_bo); /* note: this would need locking to be safe */ msm_bo_map(device, tu_bo); /* note: this would need locking to be safe */
rd_write_section(rd, RD_BUFFER_CONTENTS, tu_bo->map, tu_bo->size); fd_rd_output_write_section(rd_output, RD_BUFFER_CONTENTS, tu_bo->map, tu_bo->size);
} }
} }
@@ -939,10 +943,10 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
uint64_t iova = device->bo_list[cmd->submit_idx].presumed + cmd->submit_offset; uint64_t iova = device->bo_list[cmd->submit_idx].presumed + cmd->submit_offset;
uint32_t size = cmd->size >> 2; uint32_t size = cmd->size >> 2;
uint32_t buf[3] = { iova, size, iova >> 32 }; uint32_t buf[3] = { iova, size, iova >> 32 };
rd_write_section(rd, RD_CMDSTREAM_ADDR, buf, 12); fd_rd_output_write_section(rd_output, RD_CMDSTREAM_ADDR, buf, 12);
}
close(rd);
} }
fd_rd_output_end(rd_output);
} }
int ret = drmCommandWriteRead(queue->device->fd, int ret = drmCommandWriteRead(queue->device->fd,

View File

@@ -8,6 +8,7 @@
#include <errno.h> #include <errno.h>
#include <stdarg.h> #include <stdarg.h>
#include "common/freedreno_rd_output.h"
#include "util/u_math.h" #include "util/u_math.h"
#include "util/timespec.h" #include "util/timespec.h"
#include "vk_enum_to_str.h" #include "vk_enum_to_str.h"
@@ -54,11 +55,19 @@ tu_env_init_once(void)
if (TU_DEBUG(STARTUP)) if (TU_DEBUG(STARTUP))
mesa_logi("TU_DEBUG=0x%x", tu_env.debug); mesa_logi("TU_DEBUG=0x%x", tu_env.debug);
/* TU_DEBUG=rd functionality was moved to fd_rd_output. This debug option
* should translate to the basic-level FD_RD_DUMP_ENABLE option.
*/
if (TU_DEBUG(RD))
fd_rd_dump_env.flags |= FD_RD_DUMP_ENABLE;
} }
void void
tu_env_init(void) tu_env_init(void)
{ {
fd_rd_dump_env_init();
static once_flag once = ONCE_FLAG_INIT; static once_flag once = ONCE_FLAG_INIT;
call_once(&once, tu_env_init_once); call_once(&once, tu_env_init_once);
} }