ddebug: rewrite to always use a threaded approach

This patch has multiple goals:

1. Off-load the writing of records in 'always' mode to another thread
   for performance.

2. Allow using ddebug with threaded contexts. This really forces us to
   move some of the "after_draw" handling into another thread.

3. Simplify the different modes of ddebug, both in the code and in
   the user interface, i.e. GALLIUM_DDEBUG. In particular, there's
   no 'pipelined' anymore, since we're always pipelined; and 'noflush'
   is replaced by 'flush', since we no longer flush by default.

4. Fix the fences in pipelining mode. They previously relied on writes
   via pipe_context::clear_buffer. However, on radeonsi, those could
   (quite reasonably) end up in the SDMA buffer. So we use the newly
   added PIPE_FLUSH_{TOP,BOTTOM}_OF_PIPE fences instead.

5. Improve pipelined mode overall, using the finer grained information
   provided by the new fences.

Overall, the result is that pipelined mode should be more useful, and
using ddebug in default mode is much less invasive, in the sense that
it changes the overall driver behavior less (which is kind of crucial
for a driver debugging tool).

An example of the new hang debug output:

  Gallium debugger active.
  Hang detection timeout is 1000ms.
  GPU hang detected, collecting information...

  Draw #   driver  prev BOP  TOP  BOP  dump file
  -------------------------------------------------------------
  2          YES      YES    YES  NO   /home/nha/ddebug_dumps/shader_runner_19919_00000000
  3          YES      NO     YES  NO   /home/nha/ddebug_dumps/shader_runner_19919_00000001
  4          YES      NO     YES  NO   /home/nha/ddebug_dumps/shader_runner_19919_00000002
  5          YES      NO     YES  NO   /home/nha/ddebug_dumps/shader_runner_19919_00000003

  Done.

We can see that there were almost certainly 4 draws in flight when
the hang happened: the top-of-pipe fence was signaled for all 4 draws,
the bottom-of-pipe fence for none of them. In virtually all cases,
we'd expect the first draw in the list to be at fault, but due to the
GPU parallelism, it's possible (though highly unlikely) that one of
the later draws causes a component to get stuck in a way that prevents
the earlier draws from making progress as well.

(In the above example, there were actually only 3 draws truly in flight:
the last draw is a blit that waits for the earlier draws; however, its
top-of-pipe fence is emitted before the cache flush and wait, and so
the fact that the draw hasn't truly started yet can only be seen from a
closer inspection of GPU state.)

Acked-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
Nicolai Hähnle
2017-10-22 17:38:59 +02:00
parent e8bb8758dd
commit c9fefa062b
4 changed files with 545 additions and 514 deletions

View File

@@ -564,30 +564,33 @@ dd_context_set_stream_output_targets(struct pipe_context *_pipe,
pipe->set_stream_output_targets(pipe, num_targets, tgs, offsets);
}
void
dd_thread_join(struct dd_context *dctx)
{
mtx_lock(&dctx->mutex);
dctx->kill_thread = true;
cnd_signal(&dctx->cond);
mtx_unlock(&dctx->mutex);
thrd_join(dctx->thread, NULL);
}
static void
dd_context_destroy(struct pipe_context *_pipe)
{
struct dd_context *dctx = dd_context(_pipe);
struct pipe_context *pipe = dctx->pipe;
if (dctx->thread) {
mtx_lock(&dctx->mutex);
dctx->kill_thread = 1;
mtx_unlock(&dctx->mutex);
thrd_join(dctx->thread, NULL);
mtx_destroy(&dctx->mutex);
assert(!dctx->records);
}
dd_thread_join(dctx);
mtx_destroy(&dctx->mutex);
cnd_destroy(&dctx->cond);
if (dctx->fence) {
pipe->transfer_unmap(pipe, dctx->fence_transfer);
pipe_resource_reference(&dctx->fence, NULL);
}
assert(list_empty(&dctx->records));
assert(!dctx->record_pending);
if (pipe->set_log_context) {
pipe->set_log_context(pipe, NULL);
if (dd_screen(dctx->base.screen)->mode == DD_DUMP_ALL_CALLS) {
if (dd_screen(dctx->base.screen)->dump_mode == DD_DUMP_ALL_CALLS) {
FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen), 0);
if (f) {
fprintf(f, "Remainder of driver log:\n\n");
@@ -921,39 +924,19 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe)
dctx->draw_state.sample_mask = ~0;
if (dscreen->mode == DD_DETECT_HANGS_PIPELINED) {
dctx->fence = pipe_buffer_create(dscreen->screen, PIPE_BIND_CUSTOM,
PIPE_USAGE_STAGING, 4);
if (!dctx->fence)
goto fail;
dctx->mapped_fence = pipe_buffer_map(pipe, dctx->fence,
PIPE_TRANSFER_READ_WRITE |
PIPE_TRANSFER_PERSISTENT |
PIPE_TRANSFER_COHERENT,
&dctx->fence_transfer);
if (!dctx->mapped_fence)
goto fail;
*dctx->mapped_fence = 0;
(void) mtx_init(&dctx->mutex, mtx_plain);
dctx->thread = u_thread_create(dd_thread_pipelined_hang_detect, dctx);
if (!dctx->thread) {
mtx_destroy(&dctx->mutex);
goto fail;
}
list_inithead(&dctx->records);
(void) mtx_init(&dctx->mutex, mtx_plain);
(void) cnd_init(&dctx->cond);
dctx->thread = u_thread_create(dd_thread_main, dctx);
if (!dctx->thread) {
mtx_destroy(&dctx->mutex);
goto fail;
}
return &dctx->base;
fail:
if (dctx) {
if (dctx->mapped_fence)
pipe_transfer_unmap(pipe, dctx->fence_transfer);
pipe_resource_reference(&dctx->fence, NULL);
FREE(dctx);
}
FREE(dctx);
pipe->destroy(pipe);
return NULL;
}

File diff suppressed because it is too large Load Diff

View File

@@ -33,11 +33,14 @@
#include "pipe/p_screen.h"
#include "dd_util.h"
#include "os/os_thread.h"
#include "util/list.h"
#include "util/u_log.h"
#include "util/u_queue.h"
enum dd_mode {
DD_DETECT_HANGS,
DD_DETECT_HANGS_PIPELINED,
struct dd_context;
enum dd_dump_mode {
DD_DUMP_ONLY_HANGS,
DD_DUMP_ALL_CALLS,
DD_DUMP_APITRACE_CALL,
};
@@ -47,8 +50,8 @@ struct dd_screen
struct pipe_screen base;
struct pipe_screen *screen;
unsigned timeout_ms;
enum dd_mode mode;
bool no_flush;
enum dd_dump_mode dump_mode;
bool flush_always;
bool verbose;
unsigned skip_count;
unsigned apitrace_dump_call;
@@ -218,13 +221,19 @@ struct dd_draw_state_copy
};
struct dd_draw_record {
struct dd_draw_record *next;
struct list_head list;
struct dd_context *dctx;
int64_t timestamp;
uint32_t sequence_no;
unsigned draw_call;
struct pipe_fence_handle *prev_bottom_of_pipe;
struct pipe_fence_handle *top_of_pipe;
struct pipe_fence_handle *bottom_of_pipe;
struct dd_call call;
struct dd_draw_state_copy draw_state;
struct util_queue_fence driver_finished;
struct u_log_page *log_page;
};
@@ -252,17 +261,16 @@ struct dd_context
*
* An independent, separate thread loops over the list of records and checks
* their fences. Records with signalled fences are freed. On fence timeout,
* the thread dumps the record of the oldest unsignalled fence.
* the thread dumps the records of in-flight draws.
*/
thrd_t thread;
mtx_t mutex;
int kill_thread;
struct pipe_resource *fence;
struct pipe_transfer *fence_transfer;
uint32_t *mapped_fence;
uint32_t sequence_no;
struct dd_draw_record *records;
int max_log_buffer_size;
cnd_t cond;
struct dd_draw_record *record_pending; /* currently inside the driver */
struct list_head records; /* oldest record first */
unsigned num_records;
bool kill_thread;
bool api_stalled;
};
@@ -271,8 +279,11 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe);
void
dd_init_draw_functions(struct dd_context *dctx);
void
dd_thread_join(struct dd_context *dctx);
int
dd_thread_pipelined_hang_detect(void *input);
dd_thread_main(void *input);
FILE *
dd_get_file_stream(struct dd_screen *dscreen, unsigned apitrace_call_number);

View File

@@ -28,6 +28,7 @@
#include "dd_pipe.h"
#include "dd_public.h"
#include "util/u_memory.h"
#include <ctype.h>
#include <stdio.h>
@@ -381,15 +382,55 @@ dd_screen_destroy(struct pipe_screen *_screen)
FREE(dscreen);
}
static void
skip_space(const char **p)
{
while (isspace(**p))
(*p)++;
}
static bool
match_word(const char **cur, const char *word)
{
size_t len = strlen(word);
if (strncmp(*cur, word, len) != 0)
return false;
const char *p = *cur + len;
if (*p) {
if (!isspace(*p))
return false;
*cur = p + 1;
} else {
*cur = p;
}
return true;
}
static bool
match_uint(const char **cur, unsigned *value)
{
char *end;
unsigned v = strtoul(*cur, &end, 0);
if (end == *cur || (*end && !isspace(*end)))
return false;
*cur = end;
*value = v;
return true;
}
struct pipe_screen *
ddebug_screen_create(struct pipe_screen *screen)
{
struct dd_screen *dscreen;
const char *option;
bool no_flush;
unsigned timeout = 0;
bool flush = false;
bool verbose = false;
unsigned timeout = 1000;
unsigned apitrace_dump_call = 0;
enum dd_mode mode;
enum dd_dump_mode mode = DD_DUMP_ONLY_HANGS;
option = debug_get_option("GALLIUM_DDEBUG", NULL);
if (!option)
@@ -400,53 +441,70 @@ ddebug_screen_create(struct pipe_screen *screen)
puts("");
puts("Usage:");
puts("");
puts(" GALLIUM_DDEBUG=\"always [noflush] [verbose]\"");
puts(" Flush and dump context and driver information after every draw call into");
puts(" $HOME/"DD_DIR"/.");
puts("");
puts(" GALLIUM_DDEBUG=\"[timeout in ms] [noflush] [verbose]\"");
puts(" Flush and detect a device hang after every draw call based on the given");
puts(" fence timeout and dump context and driver information into");
puts(" $HOME/"DD_DIR"/ when a hang is detected.");
puts("");
puts(" GALLIUM_DDEBUG=\"pipelined [timeout in ms] [verbose]\"");
puts(" Detect a device hang after every draw call based on the given fence");
puts(" timeout without flushes and dump context and driver information into");
puts(" $HOME/"DD_DIR"/ when a hang is detected.");
puts("");
puts(" GALLIUM_DDEBUG=\"apitrace [call#] [verbose]\"");
puts(" Dump apitrace draw call information into $HOME/"DD_DIR"/. Implies 'noflush'.");
puts("");
puts(" If 'noflush' is specified, do not flush on every draw call. In hang");
puts(" detection mode, this only detect hangs in pipe->flush.");
puts(" If 'verbose' is specified, additional information is written to stderr.");
puts("");
puts(" GALLIUM_DDEBUG=\"[<timeout in ms>] [(always|apitrace <call#)] [flush] [verbose]\"");
puts(" GALLIUM_DDEBUG_SKIP=[count]");
puts(" Skip flush and hang detection for the given initial number of draw calls.");
puts("");
puts("Dump context and driver information of draw calls into");
puts("$HOME/"DD_DIR"/. By default, watch for GPU hangs and only dump information");
puts("about draw calls related to the hang.");
puts("");
puts("<timeout in ms>");
puts(" Change the default timeout for GPU hang detection (default=1000ms).");
puts(" Setting this to 0 will disable GPU hang detection entirely.");
puts("");
puts("always");
puts(" Dump information about all draw calls.");
puts("");
puts("apitrace <call#>");
puts(" Dump information about the draw call corresponding to the given");
puts(" apitrace call number and exit.");
puts("");
puts("flush");
puts(" Flush after every draw call.");
puts("");
puts("verbose");
puts(" Write additional information to stderr.");
puts("");
puts("GALLIUM_DDEBUG_SKIP=count");
puts(" Skip dumping on the first count draw calls (only relevant with 'always').");
puts("");
exit(0);
}
no_flush = strstr(option, "noflush") != NULL;
for (;;) {
skip_space(&option);
if (!*option)
break;
if (!strncmp(option, "always", 6)) {
mode = DD_DUMP_ALL_CALLS;
} else if (!strncmp(option, "apitrace", 8)) {
mode = DD_DUMP_APITRACE_CALL;
no_flush = true;
if (match_word(&option, "always")) {
if (mode == DD_DUMP_APITRACE_CALL) {
printf("ddebug: both 'always' and 'apitrace' specified\n");
exit(1);
}
if (sscanf(option+8, "%u", &apitrace_dump_call) != 1)
return screen;
} else if (!strncmp(option, "pipelined", 9)) {
mode = DD_DETECT_HANGS_PIPELINED;
mode = DD_DUMP_ALL_CALLS;
} else if (match_word(&option, "flush")) {
flush = true;
} else if (match_word(&option, "verbose")) {
verbose = true;
} else if (match_word(&option, "apitrace")) {
if (mode != DD_DUMP_ONLY_HANGS) {
printf("ddebug: 'apitrace' can only appear once and not mixed with 'always'\n");
exit(1);
}
if (sscanf(option+10, "%u", &timeout) != 1)
return screen;
} else {
mode = DD_DETECT_HANGS;
if (!match_uint(&option, &apitrace_dump_call)) {
printf("ddebug: expected call number after 'apitrace'\n");
exit(1);
}
if (sscanf(option, "%u", &timeout) != 1)
return screen;
mode = DD_DUMP_APITRACE_CALL;
} else if (match_uint(&option, &timeout)) {
/* no-op */
} else {
printf("ddebug: bad options: %s\n", option);
exit(1);
}
}
dscreen = CALLOC_STRUCT(dd_screen);
@@ -496,27 +554,28 @@ ddebug_screen_create(struct pipe_screen *screen)
dscreen->screen = screen;
dscreen->timeout_ms = timeout;
dscreen->mode = mode;
dscreen->no_flush = no_flush;
dscreen->verbose = strstr(option, "verbose") != NULL;
dscreen->dump_mode = mode;
dscreen->flush_always = flush;
dscreen->verbose = verbose;
dscreen->apitrace_dump_call = apitrace_dump_call;
switch (dscreen->mode) {
switch (dscreen->dump_mode) {
case DD_DUMP_ALL_CALLS:
fprintf(stderr, "Gallium debugger active. Logging all calls.\n");
break;
case DD_DETECT_HANGS:
case DD_DETECT_HANGS_PIPELINED:
fprintf(stderr, "Gallium debugger active. "
"The hang detection timeout is %i ms.\n", timeout);
break;
case DD_DUMP_APITRACE_CALL:
fprintf(stderr, "Gallium debugger active. Going to dump an apitrace call.\n");
break;
default:
assert(0);
fprintf(stderr, "Gallium debugger active.\n");
break;
}
if (dscreen->timeout_ms > 0)
fprintf(stderr, "Hang detection timeout is %ums.\n", dscreen->timeout_ms);
else
fprintf(stderr, "Hang detection is disabled.\n");
dscreen->skip_count = debug_get_num_option("GALLIUM_DDEBUG_SKIP", 0);
if (dscreen->skip_count > 0) {
fprintf(stderr, "Gallium debugger skipping the first %u draw calls.\n",