radeonsi: cull primitives with async compute for large draw calls

Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
Acked-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Marek Olšák
2018-08-14 02:01:18 -04:00
parent 187f1c999f
commit c9b7a37b8f
18 changed files with 2124 additions and 28 deletions

View File

@@ -337,6 +337,7 @@ struct si_log_chunk_cs {
struct si_saved_cs *cs;
bool dump_bo_list;
unsigned gfx_begin, gfx_end;
unsigned compute_begin, compute_end;
};
static void si_log_chunk_type_cs_destroy(void *data)
@@ -394,6 +395,7 @@ static void si_log_chunk_type_cs_print(void *data, FILE *f)
struct si_context *ctx = chunk->ctx;
struct si_saved_cs *scs = chunk->cs;
int last_trace_id = -1;
int last_compute_trace_id = -1;
/* We are expecting that the ddebug pipe has already
* waited for the context, so this buffer should be idle.
@@ -403,8 +405,10 @@ static void si_log_chunk_type_cs_print(void *data, FILE *f)
NULL,
PIPE_TRANSFER_UNSYNCHRONIZED |
PIPE_TRANSFER_READ);
if (map)
if (map) {
last_trace_id = map[0];
last_compute_trace_id = map[1];
}
if (chunk->gfx_end != chunk->gfx_begin) {
if (chunk->gfx_begin == 0) {
@@ -432,6 +436,21 @@ static void si_log_chunk_type_cs_print(void *data, FILE *f)
}
}
if (chunk->compute_end != chunk->compute_begin) {
assert(ctx->prim_discard_compute_cs);
if (scs->flushed) {
ac_parse_ib(f, scs->compute.ib + chunk->compute_begin,
chunk->compute_end - chunk->compute_begin,
&last_compute_trace_id, map ? 1 : 0, "Compute IB", ctx->chip_class,
NULL, NULL);
} else {
si_parse_current_ib(f, ctx->prim_discard_compute_cs, chunk->compute_begin,
chunk->compute_end, &last_compute_trace_id,
map ? 1 : 0, "Compute IB", ctx->chip_class);
}
}
if (chunk->dump_bo_list) {
fprintf(f, "Flushing. Time: ");
util_dump_ns(f, scs->time_flush);
@@ -452,9 +471,14 @@ static void si_log_cs(struct si_context *ctx, struct u_log_context *log,
struct si_saved_cs *scs = ctx->current_saved_cs;
unsigned gfx_cur = ctx->gfx_cs->prev_dw + ctx->gfx_cs->current.cdw;
unsigned compute_cur = 0;
if (ctx->prim_discard_compute_cs)
compute_cur = ctx->prim_discard_compute_cs->prev_dw + ctx->prim_discard_compute_cs->current.cdw;
if (!dump_bo_list &&
gfx_cur == scs->gfx_last_dw)
gfx_cur == scs->gfx_last_dw &&
compute_cur == scs->compute_last_dw)
return;
struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk));
@@ -467,6 +491,10 @@ static void si_log_cs(struct si_context *ctx, struct u_log_context *log,
chunk->gfx_end = gfx_cur;
scs->gfx_last_dw = gfx_cur;
chunk->compute_begin = scs->compute_last_dw;
chunk->compute_end = compute_cur;
scs->compute_last_dw = compute_cur;
u_log_chunk(log, &si_log_chunk_type_cs, chunk);
}