freedreno: add fd_rd_output facilities for gzip-compressed RD dumps

Provide fd_rd_output facilities which enable constructing RD dumps that are
stored into gzip-compressed output. This matches the default behavior of
libwrap. Enabling and adjusting the behavior of functionality is done
through the FD_RD_DUMP environment variable.

Integration into Turnip's MSM backend is covered, replacing the previous
RD dump that was enabled through TU_DEBUG=rd. That debug option still
works and is the same as using FD_RD_DUMP=enable.

By default the dumps are created for each submission, using the provided
submit index. FD_RD_DUMP=combine enables gathering dumps for submissions
for the given logical device into a single file.

In the Turnip integration, FD_RD_DUMP=full will force dumping contents of
any buffer object. Additionally, with that option enabled any previous
submit will be waited on.

Specifying FD_RD_DUMP=trigger sets up a trigger file that can be used to
activate dumping manually. Writing zero or some non-integer value to the
file will disable dumping. Writing a positive integer value to it will
enable dumps for that many future submissions. Writing -1 to it will enable
dumps until disabled.

Signed-off-by: Zan Dobersek <zdobersek@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27230>
This commit is contained in:
Zan Dobersek
2024-01-24 07:55:56 +01:00
parent 0a97d1ebfa
commit f9c4e25483
8 changed files with 416 additions and 33 deletions

View File

@@ -369,9 +369,8 @@ Command Stream Capture
^^^^^^^^^^^^^^^^^^^^^^
During Mesa development, it's often useful to look at the command streams we
send to the kernel. Mesa itself doesn't implement a way to stream them out
(though it maybe should!). Instead, we have an interface for the kernel to
capture all submitted command streams:
send to the kernel. We have an interface for the kernel to capture all
submitted command streams:
.. code-block:: sh
@@ -391,6 +390,28 @@ probably want to cause a crash in the GPU during a frame of interest so that a
single GPU core dump is generated. Emitting ``0xdeadbeef`` in the CS should be
enough to cause a fault.
``fd_rd_output`` facilities provide support for generating the command stream
capture from inside Mesa. Different ``FD_RD_DUMP`` options are available:
- ``enable`` simply enables dumping the command stream on each submit for a
given logical device. When a more advanced option is specified, ``enable`` is
implied as specified.
- ``combine`` will combine all dumps into a single file instead of writing the
dump for each submit into a standalone file.
- ``full`` will dump every buffer object, which is necessary for replays of
command streams (see below).
- ``trigger`` will establish a trigger file through which dumps can be better
controlled. Writing a positive integer value into the file will enable dumping
of that many subsequent submits. Writing -1 will enable dumping of submits
until disabled. Writing 0 (or any other value) will disable dumps.
Output dump files and trigger file (when enabled) are hard-coded to be placed
under ``/tmp``, or ``/data/local/tmp`` under Android.
Functionality is generic to any Freedreno-based backend, but is currently only
integrated in the MSM backend of Turnip. Using the existing ``TU_DEBUG=rd``
option will translate to ``FD_RD_DUMP=enable``.
Capturing Hang RD
+++++++++++++++++

View File

@@ -0,0 +1,250 @@
/*
* Copyright © 2024 Igalia S.L.
* SPDX-License-Identifier: MIT
*/
#include "freedreno_rd_output.h"
#include <assert.h>
#include <ctype.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
#include "c11/threads.h"
#include "util/log.h"
#include "util/u_atomic.h"
#include "util/u_debug.h"
#ifdef ANDROID
static const char *fd_rd_output_base_path = "/data/local/tmp";
#else
static const char *fd_rd_output_base_path = "/tmp";
#endif
static const struct debug_control fd_rd_dump_options[] = {
{ "enable", FD_RD_DUMP_ENABLE },
{ "combine", FD_RD_DUMP_COMBINE },
{ "full", FD_RD_DUMP_FULL },
{ "trigger", FD_RD_DUMP_TRIGGER },
{ NULL, 0 }
};
struct fd_rd_dump_env fd_rd_dump_env;
static void
fd_rd_dump_env_init_once(void)
{
fd_rd_dump_env.flags = parse_debug_string(os_get_option("FD_RD_DUMP"),
fd_rd_dump_options);
/* If any of the more-detailed FD_RD_DUMP flags is enabled, the general
* FD_RD_DUMP_ENABLE flag should also implicitly be set.
*/
if (fd_rd_dump_env.flags & ~FD_RD_DUMP_ENABLE)
fd_rd_dump_env.flags |= FD_RD_DUMP_ENABLE;
}
void
fd_rd_dump_env_init(void)
{
static once_flag once = ONCE_FLAG_INIT;
call_once(&once, fd_rd_dump_env_init_once);
}
static void
fd_rd_output_sanitize_name(char *name)
{
/* The name string is null-terminated after being constructed via snprintf.
* Sanitize it by reducing to an underscore anything that's not a hyphen,
* underscore, dot or alphanumeric character.
*/
for (char *s = name; *s; ++s) {
if (isalnum(*s) || *s == '-' || *s == '_' || *s == '.')
continue;
*s = '_';
}
}
void
fd_rd_output_init(struct fd_rd_output *output, char* output_name)
{
snprintf(output->name, sizeof(output->name), "%s", output_name);
fd_rd_output_sanitize_name(output->name);
output->combine = false;
output->file = NULL;
output->trigger_fd = -1;
output->trigger_count = 0;
if (FD_RD_DUMP(COMBINE)) {
output->combine = true;
char file_path[256];
snprintf(file_path, sizeof(file_path), "%s/%s_combined.rd",
fd_rd_output_base_path, output->name);
output->file = gzopen(file_path, "w");
}
if (FD_RD_DUMP(TRIGGER)) {
char file_path[256];
snprintf(file_path, sizeof(file_path), "%s/%s_trigger",
fd_rd_output_base_path, output->name);
output->trigger_fd = open(file_path, O_RDWR | O_CREAT | O_TRUNC, 0600);
}
}
void
fd_rd_output_fini(struct fd_rd_output *output)
{
if (output->file != NULL) {
assert(output->combine);
gzclose(output->file);
}
if (output->trigger_fd >= 0) {
close(output->trigger_fd);
/* Remove the trigger file. The filename is reconstructed here
* instead of having to spend memory to store it in the struct.
*/
char file_path[256];
snprintf(file_path, sizeof(file_path), "%s/%s_trigger",
fd_rd_output_base_path, output->name);
unlink(file_path);
}
}
static void
fd_rd_output_update_trigger_count(struct fd_rd_output *output)
{
assert(FD_RD_DUMP(TRIGGER));
/* Retrieve the trigger file size, only attempt to update the trigger
* value if anything was actually written to that file.
*/
struct stat stat;
if (fstat(output->trigger_fd, &stat) != 0) {
mesa_loge("[fd_rd_output] failed to acccess the %s trigger file",
output->name);
return;
}
if (stat.st_size == 0)
return;
char trigger_data[32];
int ret = read(output->trigger_fd, trigger_data, sizeof(trigger_data));
if (ret < 0) {
mesa_loge("[fd_rd_output] failed to read from the %s trigger file",
output->name);
return;
}
int num_read = MIN2(ret, sizeof(trigger_data) - 1);
/* After reading from it, the trigger file should be reset, which means
* moving the file offset to the start of the file as well as truncating
* it to zero bytes.
*/
if (lseek(output->trigger_fd, 0, SEEK_SET) < 0) {
mesa_loge("[fd_rd_output] failed to reset the %s trigger file position",
output->name);
return;
}
if (ftruncate(output->trigger_fd, 0) < 0) {
mesa_loge("[fd_rd_output] failed to truncate the %s trigger file",
output->name);
return;
}
/* Try to decode the count value through strtol. -1 translates to UINT_MAX
* and keeps generating dumps until disabled. Any positive value will
* allow generating dumps for that many submits. Any other value will
* disable any further generation of RD dumps.
*/
trigger_data[num_read] = '\0';
int32_t value = strtol(trigger_data, NULL, 0);
if (value == -1) {
output->trigger_count = UINT_MAX;
mesa_logi("[fd_rd_output] %s trigger enabling RD dumps until disabled",
output->name);
} else if (value > 0) {
output->trigger_count = (uint32_t) value;
mesa_logi("[fd_rd_output] %s trigger enabling RD dumps for next %u submissions",
output->name, output->trigger_count);
} else {
output->trigger_count = 0;
mesa_logi("[fd_rd_output] %s trigger disabling RD dumps", output->name);
}
}
bool
fd_rd_output_begin(struct fd_rd_output *output, uint32_t submit_idx)
{
assert(output->combine ^ (output->file == NULL));
if (FD_RD_DUMP(TRIGGER)) {
fd_rd_output_update_trigger_count(output);
if (output->trigger_count == 0)
return false;
/* UINT_MAX corresponds to generating dumps until disabled. */
if (output->trigger_count != UINT_MAX)
--output->trigger_count;
}
if (output->combine)
return true;
char file_path[256];
snprintf(file_path, sizeof(file_path), "%s/%s_%.5d.rd",
fd_rd_output_base_path, output->name, submit_idx);
output->file = gzopen(file_path, "w");
return true;
}
static void
fd_rd_output_write(struct fd_rd_output *output, const void *buffer, int size)
{
const uint8_t *pos = (uint8_t *) buffer;
while (size > 0) {
int ret = gzwrite(output->file, pos, size);
if (ret < 0) {
mesa_loge("[fd_rd_output] failed to write to compressed output: %s",
gzerror(output->file, NULL));
return;
}
pos += ret;
size -= ret;
}
}
void
fd_rd_output_write_section(struct fd_rd_output *output, enum rd_sect_type type,
const void *buffer, int size)
{
fd_rd_output_write(output, &type, 4);
fd_rd_output_write(output, &size, 4);
fd_rd_output_write(output, buffer, size);
}
void
fd_rd_output_end(struct fd_rd_output *output)
{
assert(output->file != NULL);
/* When combining output, flush the gzip stream on each submit. This should
* store all the data before any problem during the submit itself occurs.
*/
if (output->combine) {
gzflush(output->file, Z_FINISH);
return;
}
gzclose(output->file);
output->file = NULL;
}

View File

@@ -0,0 +1,66 @@
/*
* Copyright © 2024 Igalia S.L.
* SPDX-License-Identifier: MIT
*/
#ifndef __FREEDRENO_RD_OUTPUT_H__
#define __FREEDRENO_RD_OUTPUT_H__
#include <stdbool.h>
#include <stdint.h>
#include <zlib.h>
#include "redump.h"
#ifdef __cplusplus
extern "C" {
#endif
enum fd_rd_dump_flags {
FD_RD_DUMP_ENABLE = 1 << 0,
FD_RD_DUMP_COMBINE = 1 << 1,
FD_RD_DUMP_FULL = 1 << 2,
FD_RD_DUMP_TRIGGER = 1 << 3,
};
struct fd_rd_dump_env {
uint32_t flags;
};
extern struct fd_rd_dump_env fd_rd_dump_env;
#define FD_RD_DUMP(name) unlikely(fd_rd_dump_env.flags & FD_RD_DUMP_##name)
void
fd_rd_dump_env_init(void);
struct fd_rd_output {
char name[128];
bool combine;
gzFile file;
int trigger_fd;
uint32_t trigger_count;
};
void
fd_rd_output_init(struct fd_rd_output *output, char* output_name);
void
fd_rd_output_fini(struct fd_rd_output *output);
bool
fd_rd_output_begin(struct fd_rd_output *output, uint32_t submit_idx);
void
fd_rd_output_write_section(struct fd_rd_output *output, enum rd_sect_type type,
const void *buffer, int size);
void
fd_rd_output_end(struct fd_rd_output *output);
#ifdef __cplusplus
}
#endif
#endif /* __FREEDRENO_RD_OUTPUT_H__ */

View File

@@ -38,6 +38,8 @@ libfreedreno_common = static_library(
'freedreno_dev_info.c',
'freedreno_dev_info.h',
'freedreno_pm4.h',
'freedreno_rd_output.c',
'freedreno_rd_output.h',
'freedreno_uuid.c',
'freedreno_uuid.h',
'freedreno_guardband.h',

View File

@@ -21,6 +21,7 @@
#include "util/hex.h"
#include "util/driconf.h"
#include "util/os_misc.h"
#include "util/u_process.h"
#include "vk_shader_module.h"
#include "vk_sampler.h"
#include "vk_util.h"
@@ -2219,6 +2220,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
device->instance = physical_device->instance;
device->physical_device = physical_device;
device->device_idx = device->physical_device->device_count++;
result = tu_drm_device_init(device);
if (result != VK_SUCCESS) {
@@ -2492,6 +2494,26 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
tu_breadcrumbs_init(device);
if (FD_RD_DUMP(ENABLE)) {
struct vk_app_info *app_info = &device->instance->vk.app_info;
const char *app_name_str = app_info->app_name ?
app_info->app_name : util_get_process_name();
const char *engine_name_str = app_info->engine_name ?
app_info->engine_name : "unknown-engine";
char app_name[64];
snprintf(app_name, sizeof(app_name), "%s", app_name_str);
char engine_name[32];
snprintf(engine_name, sizeof(engine_name), "%s", engine_name_str);
char output_name[128];
snprintf(output_name, sizeof(output_name), "tu_%s.%s_device%u",
app_name, engine_name, device->device_idx);
fd_rd_output_init(&device->rd_output, output_name);
}
*pDevice = tu_device_to_handle(device);
return VK_SUCCESS;
@@ -2547,6 +2569,9 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
if (!device)
return;
if (FD_RD_DUMP(ENABLE))
fd_rd_output_fini(&device->rd_output);
tu_breadcrumbs_finish(device);
u_trace_context_fini(&device->trace_context);

View File

@@ -20,6 +20,7 @@
#include "tu_suballoc.h"
#include "tu_util.h"
#include "common/freedreno_rd_output.h"
#include "util/vma.h"
#include "util/u_vector.h"
@@ -123,6 +124,8 @@ struct tu_physical_device
struct vk_sync_type syncobj_type;
struct vk_sync_timeline_type timeline_type;
const struct vk_sync_type *sync_types[3];
uint32_t device_count;
};
VK_DEFINE_HANDLE_CASTS(tu_physical_device, vk.base, VkPhysicalDevice,
VK_OBJECT_TYPE_PHYSICAL_DEVICE)
@@ -253,6 +256,7 @@ struct tu_device
int queue_count[TU_MAX_QUEUE_FAMILIES];
struct tu_physical_device *physical_device;
uint32_t device_idx;
int fd;
struct ir3_compiler *compiler;
@@ -397,6 +401,8 @@ struct tu_device
bool use_z24uint_s8uint;
bool use_lrz;
struct fd_rd_output rd_output;
};
VK_DEFINE_HANDLE_CASTS(tu_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)

View File

@@ -867,7 +867,7 @@ tu_queue_build_msm_gem_submit_cmds(struct tu_queue *queue,
static VkResult
tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
{
queue->device->submit_count++;
uint32_t submit_idx = queue->device->submit_count++;
struct tu_cs *autotune_cs = NULL;
if (submit->autotune_fence) {
@@ -910,16 +910,20 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
.syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
};
if (TU_DEBUG(RD)) {
if (FD_RD_DUMP(ENABLE) && fd_rd_output_begin(&queue->device->rd_output, submit_idx)) {
struct tu_device *device = queue->device;
static uint32_t submit_idx;
char path[32];
sprintf(path, "%.5d.rd", p_atomic_inc_return(&submit_idx));
int rd = open(path, O_CLOEXEC | O_WRONLY | O_CREAT | O_TRUNC, 0777);
if (rd >= 0) {
rd_write_section(rd, RD_CHIP_ID, &device->physical_device->dev_id.chip_id, 4);
struct fd_rd_output *rd_output = &device->rd_output;
rd_write_section(rd, RD_CMD, "tu-dump", 8);
if (FD_RD_DUMP(FULL)) {
VkResult result = tu_wait_fence(device, queue->msm_queue_id, queue->fence, ~0);
if (result != VK_SUCCESS) {
mesa_loge("FD_RD_DUMP_FULL: wait on previous submission for device %u and queue %d failed: %u",
device->device_idx, queue->msm_queue_id, 0);
}
}
fd_rd_output_write_section(rd_output, RD_CHIP_ID, &device->physical_device->dev_id.chip_id, 4);
fd_rd_output_write_section(rd_output, RD_CMD, "tu-dump", 8);
for (unsigned i = 0; i < device->bo_count; i++) {
struct drm_msm_gem_submit_bo bo = device->bo_list[i];
@@ -927,10 +931,10 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
uint64_t iova = bo.presumed;
uint32_t buf[3] = { iova, tu_bo->size, iova >> 32 };
rd_write_section(rd, RD_GPUADDR, buf, 12);
if (bo.flags & MSM_SUBMIT_BO_DUMP) {
fd_rd_output_write_section(rd_output, RD_GPUADDR, buf, 12);
if (bo.flags & MSM_SUBMIT_BO_DUMP || FD_RD_DUMP(FULL)) {
msm_bo_map(device, tu_bo); /* note: this would need locking to be safe */
rd_write_section(rd, RD_BUFFER_CONTENTS, tu_bo->map, tu_bo->size);
fd_rd_output_write_section(rd_output, RD_BUFFER_CONTENTS, tu_bo->map, tu_bo->size);
}
}
@@ -939,10 +943,10 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
uint64_t iova = device->bo_list[cmd->submit_idx].presumed + cmd->submit_offset;
uint32_t size = cmd->size >> 2;
uint32_t buf[3] = { iova, size, iova >> 32 };
rd_write_section(rd, RD_CMDSTREAM_ADDR, buf, 12);
}
close(rd);
fd_rd_output_write_section(rd_output, RD_CMDSTREAM_ADDR, buf, 12);
}
fd_rd_output_end(rd_output);
}
int ret = drmCommandWriteRead(queue->device->fd,

View File

@@ -8,6 +8,7 @@
#include <errno.h>
#include <stdarg.h>
#include "common/freedreno_rd_output.h"
#include "util/u_math.h"
#include "util/timespec.h"
#include "vk_enum_to_str.h"
@@ -54,11 +55,19 @@ tu_env_init_once(void)
if (TU_DEBUG(STARTUP))
mesa_logi("TU_DEBUG=0x%x", tu_env.debug);
/* TU_DEBUG=rd functionality was moved to fd_rd_output. This debug option
* should translate to the basic-level FD_RD_DUMP_ENABLE option.
*/
if (TU_DEBUG(RD))
fd_rd_dump_env.flags |= FD_RD_DUMP_ENABLE;
}
void
tu_env_init(void)
{
fd_rd_dump_env_init();
static once_flag once = ONCE_FLAG_INIT;
call_once(&once, tu_env_init_once);
}