radv: replace RADV_TRACE_FILE by RADV_DEBUG=hang

The trace file will be dumped as part of the hang report into
$HOME/radv_dumps_<pid>/trace.log if a GPU hang is detected.

The old and famous RADV_TRACE_FILE envvar is now deprecated.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7233>
This commit is contained in:
Samuel Pitoiset
2020-10-19 18:37:26 +02:00
committed by Marge Bot
parent 9516f9369e
commit 33c9d4bf31
4 changed files with 24 additions and 18 deletions

View File

@@ -554,6 +554,9 @@ RADV driver environment variables
``forcecompress`` ``forcecompress``
Enables DCC,FMASK,CMASK,HTILE in situations where the driver supports it Enables DCC,FMASK,CMASK,HTILE in situations where the driver supports it
but normally does not deem it beneficial. but normally does not deem it beneficial.
``hang``
enable GPU hangs detection and dump a report to $HOME/radv_dumps_<pid>
if a GPU hang is detected
``info`` ``info``
show GPU-related information show GPU-related information
``metashaders`` ``metashaders``
@@ -624,8 +627,6 @@ RADV driver environment variables
``RADV_TEX_ANISO`` ``RADV_TEX_ANISO``
force anisotropy filter (up to 16) force anisotropy filter (up to 16)
``RADV_TRACE_FILE``
generate cmdbuffer tracefiles when a GPU hang is detected
``ACO_DEBUG`` ``ACO_DEBUG``
a comma-separated list of named flags, which do various things: a comma-separated list of named flags, which do various things:

View File

@@ -83,19 +83,10 @@ radv_init_trace(struct radv_device *device)
} }
static void static void
radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs) radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
{ {
const char *filename = getenv("RADV_TRACE_FILE");
FILE *f = fopen(filename, "w");
if (!f) {
fprintf(stderr, "Failed to write trace dump to %s\n", filename);
return;
}
fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr); fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
device->ws->cs_dump(cs, f, (const int*)device->trace_id_ptr, 2); device->ws->cs_dump(cs, f, (const int*)device->trace_id_ptr, 2);
fclose(f);
} }
static void static void
@@ -625,8 +616,6 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
fprintf(stderr, "radv: GPU hang detected...\n"); fprintf(stderr, "radv: GPU hang detected...\n");
radv_dump_trace(queue->device, cs);
/* Create a directory into $HOME/radv_dumps_<pid> to save various /* Create a directory into $HOME/radv_dumps_<pid> to save various
* debugging info about that GPU hang. * debugging info about that GPU hang.
*/ */
@@ -638,6 +627,14 @@ radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
abort(); abort();
} }
/* Dump trace file. */
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
f = fopen(dump_path, "w+");
if (f) {
radv_dump_trace(queue->device, cs, f);
fclose(f);
}
/* Dump pipeline state. */ /* Dump pipeline state. */
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log"); snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
f = fopen(dump_path, "w+"); f = fopen(dump_path, "w+");

View File

@@ -57,6 +57,7 @@ enum {
RADV_DEBUG_DISCARD_TO_DEMOTE = 1 << 26, RADV_DEBUG_DISCARD_TO_DEMOTE = 1 << 26,
RADV_DEBUG_LLVM = 1 << 27, RADV_DEBUG_LLVM = 1 << 27,
RADV_DEBUG_FORCE_COMPRESS = 1 << 28, RADV_DEBUG_FORCE_COMPRESS = 1 << 28,
RADV_DEBUG_HANG = 1 << 29,
}; };
enum { enum {

View File

@@ -530,6 +530,7 @@ static const struct debug_control radv_debug_options[] = {
{"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE}, {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
{"llvm", RADV_DEBUG_LLVM}, {"llvm", RADV_DEBUG_LLVM},
{"forcecompress", RADV_DEBUG_FORCE_COMPRESS}, {"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
{"hang", RADV_DEBUG_HANG},
{NULL, 0} {NULL, 0}
}; };
@@ -2794,19 +2795,25 @@ VkResult radv_CreateDevice(
device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192; device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
if (getenv("RADV_TRACE_FILE")) { if (getenv("RADV_TRACE_FILE")) {
const char *filename = getenv("RADV_TRACE_FILE"); fprintf(stderr, "***********************************************************************************\n");
fprintf(stderr, "* WARNING: RADV_TRACE_FILE=<file> is deprecated and replaced by RADV_DEBUG=hang *\n");
fprintf(stderr, "***********************************************************************************\n");
abort();
}
if (device->instance->debug_flags & RADV_DEBUG_HANG) {
/* Enable GPU hangs detection and dump logs if a GPU hang is
* detected.
*/
keep_shader_info = true; keep_shader_info = true;
if (!radv_init_trace(device)) if (!radv_init_trace(device))
goto fail; goto fail;
fprintf(stderr, "*****************************************************************************\n"); fprintf(stderr, "*****************************************************************************\n");
fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n"); fprintf(stderr, "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
fprintf(stderr, "*****************************************************************************\n"); fprintf(stderr, "*****************************************************************************\n");
fprintf(stderr, "Trace file will be dumped to %s\n", filename);
/* Wait for idle after every draw/dispatch to identify the /* Wait for idle after every draw/dispatch to identify the
* first bad call. * first bad call.
*/ */