radeonsi: add cs tracing v3

Same as on r600, trace cs execution by writting cs offset after each
states, this allow to pin point lockup inside command stream and
narrow down the scope of lockup investigation.

v2: Use WRITE_DATA packet instead of WRITE_MEM
v3: Remove useless nop packet

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
This commit is contained in:
Jerome Glisse
2013-03-25 11:46:38 -04:00
parent 21a2dfa55d
commit 3f7d9710e8
6 changed files with 124 additions and 1 deletions

View File

@@ -142,6 +142,12 @@ void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
/* Save 16 dwords for the fence mechanism. */
num_dw += 16;
#if R600_TRACE_CS
if (ctx->screen->trace_bo) {
num_dw += R600_TRACE_CS_DWORDS;
}
#endif
/* Flush if there's not enough space. */
if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
radeonsi_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC);
@@ -206,9 +212,41 @@ void si_context_flush(struct r600_context *ctx, unsigned flags)
/* force to keep tiling flags */
flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
#if R600_TRACE_CS
if (ctx->screen->trace_bo) {
struct r600_screen *rscreen = ctx->screen;
unsigned i;
for (i = 0; i < cs->cdw; i++) {
fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]);
}
rscreen->cs_count++;
}
#endif
/* Flush the CS. */
ctx->ws->cs_flush(ctx->cs, flags);
#if R600_TRACE_CS
if (ctx->screen->trace_bo) {
struct r600_screen *rscreen = ctx->screen;
unsigned i;
for (i = 0; i < 10; i++) {
usleep(5);
if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) {
break;
}
}
if (i == 10) {
fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n",
rscreen->trace_ptr[1], rscreen->trace_ptr[0]);
} else {
fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5);
}
}
#endif
ctx->pm4_dirty_cdwords = 0;
ctx->flags = 0;
@@ -665,3 +703,23 @@ void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_tar
cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, t->filled_size, RADEON_USAGE_READ);
}
#if R600_TRACE_CS
void r600_trace_emit(struct r600_context *rctx)
{
struct r600_screen *rscreen = rctx->screen;
struct radeon_winsys_cs *cs = rctx->cs;
uint64_t va;
va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo);
r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE);
cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0);
cs->buf[cs->cdw++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
PKT3_WRITE_DATA_WR_CONFIRM |
PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME);
cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;
cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL;
cs->buf[cs->cdw++] = cs->cdw;
cs->buf[cs->cdw++] = rscreen->cs_count;
}
#endif

View File

@@ -522,6 +522,14 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
rscreen->ws->buffer_unmap(rscreen->fences.bo->cs_buf);
si_resource_reference(&rscreen->fences.bo, NULL);
}
#if R600_TRACE_CS
if (rscreen->trace_bo) {
rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
}
#endif
pipe_mutex_destroy(rscreen->fences.mutex);
rscreen->ws->destroy(rscreen->ws);
@@ -724,5 +732,19 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
LIST_INITHEAD(&rscreen->fences.blocks);
pipe_mutex_init(rscreen->fences.mutex);
#if R600_TRACE_CS
rscreen->cs_count = 0;
if (rscreen->info.drm_minor >= 28) {
rscreen->trace_bo = (struct si_resource*)pipe_buffer_create(&rscreen->screen,
PIPE_BIND_CUSTOM,
PIPE_USAGE_STAGING,
4096);
if (rscreen->trace_bo) {
rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL,
PIPE_TRANSFER_UNSYNCHRONIZED);
}
}
#endif
return &rscreen->screen;
}

View File

@@ -47,6 +47,9 @@
#define R600_BIG_ENDIAN 0
#endif
#define R600_TRACE_CS 0
#define R600_TRACE_CS_DWORDS 6
struct r600_pipe_fences {
struct si_resource *bo;
unsigned *data;
@@ -67,6 +70,11 @@ struct r600_screen {
struct r600_tiling_info tiling_info;
struct util_slab_mempool pool_buffers;
struct r600_pipe_fences fences;
#if R600_TRACE_CS
struct si_resource *trace_bo;
uint32_t *trace_ptr;
unsigned cs_count;
#endif
};
struct si_pipe_sampler_view {
@@ -226,6 +234,10 @@ void r600_translate_index_buffer(struct r600_context *r600,
struct pipe_index_buffer *ib,
unsigned count);
#if R600_TRACE_CS
void r600_trace_emit(struct r600_context *rctx);
#endif
/*
* common helpers
*/

View File

@@ -199,6 +199,12 @@ unsigned si_pm4_dirty_dw(struct r600_context *rctx)
continue;
count += state->ndw;
#if R600_TRACE_CS
/* for tracing each states */
if (rctx->screen->trace_bo) {
count += R600_TRACE_CS_DWORDS;
}
#endif
}
return count;
@@ -219,6 +225,12 @@ void si_pm4_emit(struct r600_context *rctx, struct si_pm4_state *state)
}
cs->cdw += state->ndw;
#if R600_TRACE_CS
if (rctx->screen->trace_bo) {
r600_trace_emit(rctx);
}
#endif
}
void si_pm4_emit_dirty(struct r600_context *rctx)

View File

@@ -579,6 +579,12 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
si_pm4_emit_dirty(rctx);
rctx->pm4_dirty_cdwords = 0;
#if R600_TRACE_CS
if (rctx->screen->trace_bo) {
r600_trace_emit(rctx);
}
#endif
#if 0
/* Enable stream out if needed. */
if (rctx->streamout_start) {
@@ -587,7 +593,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
}
#endif
rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY;
/* Set the depth buffer as dirty. */

View File

@@ -77,6 +77,20 @@
#define PKT3_DRAW_INDEX_IMMD 0x2E
#define PKT3_NUM_INSTANCES 0x2F
#define PKT3_STRMOUT_BUFFER_UPDATE 0x34
#define PKT3_WRITE_DATA 0x37
#define PKT3_WRITE_DATA_DST_SEL(x) ((x) << 8)
#define PKT3_WRITE_DATA_DST_SEL_REG 0
#define PKT3_WRITE_DATA_DST_SEL_MEM_SYNC 1
#define PKT3_WRITE_DATA_DST_SEL_TC_OR_L2 2
#define PKT3_WRITE_DATA_DST_SEL_GDS 3
#define PKT3_WRITE_DATA_DST_SEL_RESERVED_4 4
#define PKT3_WRITE_DATA_DST_SEL_MEM_ASYNC 5
#define PKT3_WR_ONE_ADDR (1 << 16)
#define PKT3_WRITE_DATA_WR_CONFIRM (1 << 20)
#define PKT3_WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
#define PKT3_WRITE_DATA_ENGINE_SEL_ME 0
#define PKT3_WRITE_DATA_ENGINE_SEL_PFP 1
#define PKT3_WRITE_DATA_ENGINE_SEL_CE 2
#define PKT3_MEM_SEMAPHORE 0x39
#define PKT3_MPEG_INDEX 0x3A
#define PKT3_WAIT_REG_MEM 0x3C