u_trace: helpers for tracing tiling GPUs and re-usable VK cmdbuffers

Re-usable command buffer could be resubmitted any number of times,
but tracepoints are written once. u_trace_clone_append allows
copying tracepoints and copying timestamps if GPU doesn't support
writing timestamps into indirect address.

The case of tiling GPUs is similar, command stream for draws is
resubmitted for each tile.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Reviewed-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10969>
This commit is contained in:
Danylo Piliaiev
2021-07-16 15:01:20 +03:00
parent 3dd1bb6355
commit 0565c993f9
2 changed files with 149 additions and 0 deletions

View File

@@ -262,6 +262,9 @@ process_chunk(void *job, void *gdata, int thread_index)
for (unsigned idx = 0; idx < chunk->num_traces; idx++) {
const struct u_trace_event *evt = &chunk->traces[idx];
if (!evt->tp)
continue;
uint64_t ns = utctx->read_timestamp(utctx, chunk->timestamps, idx, chunk->flush_data);
int32_t delta;
@@ -364,6 +367,104 @@ u_trace_fini(struct u_trace *ut)
free_chunks(&ut->trace_chunks);
}
bool
u_trace_has_points(struct u_trace *ut)
{
return !list_is_empty(&ut->trace_chunks);
}
struct u_trace_iterator
u_trace_begin_iterator(struct u_trace *ut)
{
if (!ut->enabled)
return (struct u_trace_iterator) {NULL, NULL, 0};
struct u_trace_chunk *first_chunk =
list_first_entry(&ut->trace_chunks, struct u_trace_chunk, node);
return (struct u_trace_iterator) { ut, first_chunk, 0};
}
struct u_trace_iterator
u_trace_end_iterator(struct u_trace *ut)
{
if (!ut->enabled)
return (struct u_trace_iterator) {NULL, NULL, 0};
struct u_trace_chunk *last_chunk =
list_last_entry(&ut->trace_chunks, struct u_trace_chunk, node);
return (struct u_trace_iterator) { ut, last_chunk, last_chunk->num_traces};
}
bool
u_trace_iterator_equal(struct u_trace_iterator a,
struct u_trace_iterator b)
{
return a.ut == b.ut &&
a.chunk == b.chunk &&
a.event_idx == b.event_idx;
}
void
u_trace_clone_append(struct u_trace_iterator begin_it,
struct u_trace_iterator end_it,
struct u_trace *into,
void *cmdstream,
u_trace_copy_ts_buffer copy_ts_buffer)
{
struct u_trace_chunk *from_chunk = begin_it.chunk;
uint32_t from_idx = begin_it.event_idx;
while (from_chunk != end_it.chunk || from_idx != end_it.event_idx) {
struct u_trace_chunk *to_chunk = get_chunk(into);
unsigned to_copy = MIN2(TRACES_PER_CHUNK - to_chunk->num_traces,
from_chunk->num_traces - from_idx);
if (from_chunk == end_it.chunk)
to_copy = MIN2(to_copy, end_it.event_idx - from_idx);
copy_ts_buffer(begin_it.ut->utctx, cmdstream,
from_chunk->timestamps, from_idx,
to_chunk->timestamps, to_chunk->num_traces,
to_copy);
memcpy(&to_chunk->traces[to_chunk->num_traces],
&from_chunk->traces[from_idx],
to_copy * sizeof(struct u_trace_event));
to_chunk->num_traces += to_copy;
from_idx += to_copy;
assert(from_idx <= from_chunk->num_traces);
if (from_idx == from_chunk->num_traces) {
if (from_chunk == end_it.chunk)
break;
from_idx = 0;
from_chunk = LIST_ENTRY(struct u_trace_chunk, from_chunk->node.next, node);
}
}
}
void
u_trace_disable_event_range(struct u_trace_iterator begin_it,
struct u_trace_iterator end_it)
{
struct u_trace_chunk *current_chunk = begin_it.chunk;
uint32_t start_idx = begin_it.event_idx;
while(current_chunk != end_it.chunk) {
memset(&current_chunk->traces[start_idx], 0,
(current_chunk->num_traces - start_idx) * sizeof(struct u_trace_event));
start_idx = 0;
current_chunk = LIST_ENTRY(struct u_trace_chunk, current_chunk->node.next, node);
}
memset(&current_chunk->traces[start_idx], 0,
(end_it.event_idx - start_idx) * sizeof(struct u_trace_event));
}
/**
* Append a trace event, returning pointer to buffer of tp->payload_sz
* to be filled in with trace payload. Called by generated tracepoint

View File

@@ -204,6 +204,54 @@ void u_trace_context_process(struct u_trace_context *utctx, bool eof);
void u_trace_init(struct u_trace *ut, struct u_trace_context *utctx);
void u_trace_fini(struct u_trace *ut);
bool u_trace_has_points(struct u_trace *ut);
struct u_trace_iterator
{
struct u_trace *ut;
struct u_trace_chunk *chunk;
uint32_t event_idx;
};
struct u_trace_iterator
u_trace_begin_iterator(struct u_trace *ut);
struct u_trace_iterator
u_trace_end_iterator(struct u_trace *ut);
bool
u_trace_iterator_equal(struct u_trace_iterator a,
struct u_trace_iterator b);
typedef void (*u_trace_copy_ts_buffer)(struct u_trace_context *utctx,
void *cmdstream,
void *ts_from, uint32_t from_offset,
void *ts_to, uint32_t to_offset,
uint32_t count);
/**
* Clones tracepoints range into target u_trace.
* Provides callback for driver to copy timestamps on GPU from
* one buffer to another.
*
* The payload is shared and remains owned by the original u_trace
* if tracepoints are being copied between different u_trace!
*
* It allows:
* - Tracing re-usable command buffer in Vulkan, by copying tracepoints
* each time it is submitted.
* - Per-tile tracing for tiling GPUs, by copying a range of tracepoints
* corresponding to a tile.
*/
void u_trace_clone_append(struct u_trace_iterator begin_it,
struct u_trace_iterator end_it,
struct u_trace *into,
void *cmdstream,
u_trace_copy_ts_buffer copy_ts_buffer);
void u_trace_disable_event_range(struct u_trace_iterator begin_it,
struct u_trace_iterator end_it);
/**
* Flush traces to the parent trace-context. At this point, the expectation
* is that all the tracepoints are "executed" by the GPU following any previously