u_trace: extend tracepoint end_of_pipe bit into flags

We ran into an issue with Intel drivers where it became tricky to tell
whether a timestamp must be recorded with a special end-of-pipe
compute instruction or something else.

We initially tried to deal with that internally by checking some state
in the command buffers but turns out it doesn't work.

This change adds a flag field to the tracepoint to have that
information there and the flags are passed to the record_ts vfunc.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29438>
This commit is contained in:
Lionel Landwerlin
2024-05-28 17:33:28 +03:00
committed by Marge Bot
parent 265b2b1255
commit 3984875792
13 changed files with 59 additions and 35 deletions

View File

@@ -1696,7 +1696,7 @@ tu_trace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
template <chip CHIP>
static void
tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
unsigned idx, bool end_of_pipe)
unsigned idx, uint32_t)
{
struct tu_bo *bo = (struct tu_bo *) timestamps;
struct tu_cs *ts_cs = (struct tu_cs *) cs;

View File

@@ -500,7 +500,7 @@ fd_get_device_reset_status(struct pipe_context *pctx)
static void
fd_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
unsigned idx, bool end_of_pipe)
unsigned idx, uint32_t flags)
{
struct fd_batch *batch = container_of(ut, struct fd_batch, trace);
struct fd_ringbuffer *ring = cs;

View File

@@ -92,7 +92,7 @@ iris_utrace_delete_ts_buffer(struct u_trace_context *utctx, void *timestamps)
static void
iris_utrace_record_ts(struct u_trace *trace, void *cs,
void *timestamps, unsigned idx,
bool end_of_pipe)
uint32_t flags)
{
struct iris_batch *batch = container_of(trace, struct iris_batch, trace);
struct iris_context *ice = batch->ice;
@@ -102,12 +102,14 @@ iris_utrace_record_ts(struct u_trace *trace, void *cs,
iris_use_pinned_bo(batch, bo, true, IRIS_DOMAIN_NONE);
const bool is_end_compute =
(cs == NULL && ice->utrace.last_compute_walker != NULL && end_of_pipe);
cs == NULL &&
(flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE_CS);
if (is_end_compute) {
assert(ice->utrace.last_compute_walker != NULL);
batch->screen->vtbl.rewrite_compute_walker_pc(
batch, ice->utrace.last_compute_walker, bo, ts_offset);
ice->utrace.last_compute_walker = NULL;
} else if (end_of_pipe) {
} else if (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE) {
iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
PIPE_CONTROL_WRITE_TIMESTAMP,
bo, ts_offset, 0ull);

View File

@@ -26,8 +26,7 @@ def define_tracepoints(args):
def begin_end_tp(name, tp_args=[], tp_struct=None, tp_print=None,
tp_default_enabled=True, end_pipelined=True,
need_cs_param=False):
tp_default_enabled=True, need_cs_param=False):
global si_default_tps
if tp_default_enabled:
si_default_tps.append(name)
@@ -41,7 +40,6 @@ def define_tracepoints(args):
tp_struct=tp_struct,
tp_perfetto='si_ds_end_{0}'.format(name),
tp_print=tp_print,
end_of_pipe=end_pipelined,
need_cs_param=need_cs_param)
# Various draws/dispatch, radeonsi

View File

@@ -13,7 +13,7 @@
static void si_utrace_record_ts(struct u_trace *trace, void *cs, void *timestamps,
unsigned idx, bool end_of_pipe)
unsigned idx, uint32_t flags)
{
struct si_context *ctx = container_of(trace, struct si_context, trace);
struct pipe_resource *buffer = timestamps;

View File

@@ -61,6 +61,19 @@ enum intel_ds_stall_flag {
INTEL_DS_CCS_CACHE_FLUSH_BIT = BITFIELD_BIT(16),
};
enum intel_ds_tracepoint_flags {
/**
* Whether the tracepoint's timestamp must be recorded with as an
* end-of-pipe timestamp.
*/
INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE = BITFIELD_BIT(0),
/**
* Whether this tracepoint's timestamp is recorded on the compute pipeline.
*/
INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE_CS = BITFIELD_BIT(1),
};
/* Convert internal driver PIPE_CONTROL stall bits to intel_ds_stall_flag. */
typedef enum intel_ds_stall_flag (*intel_ds_stall_cb_t)(uint32_t flags);

View File

@@ -46,6 +46,7 @@ def define_tracepoints(args):
def begin_end_tp(name, tp_args=[], tp_struct=None, tp_print=None,
tp_default_enabled=True, end_pipelined=True,
compute=False,
need_cs_param=False):
global intel_default_tps
if tp_default_enabled:
@@ -54,13 +55,19 @@ def define_tracepoints(args):
toggle_name=name,
tp_perfetto='intel_ds_begin_{0}'.format(name),
need_cs_param=need_cs_param)
tp_flags = []
if end_pipelined:
if compute:
tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE_CS')
else:
tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE')
Tracepoint('intel_end_{0}'.format(name),
toggle_name=name,
args=tp_args,
tp_struct=tp_struct,
tp_perfetto='intel_ds_end_{0}'.format(name),
tp_print=tp_print,
end_of_pipe=end_pipelined,
tp_flags=tp_flags,
need_cs_param=need_cs_param)
# Frame tracepoints
@@ -175,7 +182,8 @@ def define_tracepoints(args):
tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'),
Arg(type='uint32_t', var='group_y', c_format='%u'),
Arg(type='uint32_t', var='group_z', c_format='%u'),],
tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z'])
tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z'],
compute=True)
# Used to identify copies generated by utrace
begin_end_tp('trace_copy',
@@ -190,7 +198,8 @@ def define_tracepoints(args):
tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'),
Arg(type='uint32_t', var='group_y', c_format='%u'),
Arg(type='uint32_t', var='group_z', c_format='%u'),],
tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z'])
tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z'],
compute=True)
def flag_bits(args):
bits = [Arg(type='enum intel_ds_stall_flag', name='flags', var='decode_cb(flags)', c_format='0x%x')]

View File

@@ -396,7 +396,7 @@ anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
static void
anv_utrace_record_ts(struct u_trace *ut, void *cs,
void *timestamps, unsigned idx,
bool end_of_pipe)
uint32_t flags)
{
struct anv_device *device =
container_of(ut->utctx, struct anv_device, ds.trace_context);
@@ -414,15 +414,19 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs,
/* Is this a end of compute trace point? */
const bool is_end_compute =
cs == NULL &&
(cmd_buffer->last_compute_walker != NULL ||
cmd_buffer->last_indirect_dispatch != NULL) &&
end_of_pipe;
(flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE_CS);
assert(!is_end_compute ||
cmd_buffer->state.last_indirect_dispatch != NULL ||
cmd_buffer->state.last_compute_walker != NULL);
enum anv_timestamp_capture_type capture_type =
is_end_compute ?
(cmd_buffer->state.last_indirect_dispatch != NULL ?
ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH : ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER) :
(flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE) ?
ANV_TIMESTAMP_CAPTURE_END_OF_PIPE : ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE;
enum anv_timestamp_capture_type capture_type = end_of_pipe ?
(is_end_compute ?
(cmd_buffer->last_indirect_dispatch != NULL ?
ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH : ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER) :
ANV_TIMESTAMP_CAPTURE_END_OF_PIPE) : ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE;
void *addr = capture_type == ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH ?
cmd_buffer->state.last_indirect_dispatch :

View File

@@ -224,7 +224,7 @@ anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
static void
anv_utrace_record_ts(struct u_trace *ut, void *cs,
void *timestamps, unsigned idx,
bool end_of_pipe)
uint32_t flags)
{
struct anv_cmd_buffer *cmd_buffer =
container_of(ut, struct anv_cmd_buffer, trace);
@@ -232,8 +232,9 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs,
struct anv_bo *bo = timestamps;
enum anv_timestamp_capture_type capture_type =
(end_of_pipe) ? ANV_TIMESTAMP_CAPTURE_END_OF_PIPE
: ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE;
(flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE) ?
ANV_TIMESTAMP_CAPTURE_END_OF_PIPE :
ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE;
device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device,
(struct anv_address) {
.bo = bo,

View File

@@ -853,8 +853,7 @@ u_trace_appendv(struct u_trace *ut,
}
/* record a timestamp for the trace: */
ut->utctx->record_timestamp(ut, cs, chunk->timestamps, tp_idx,
tp->end_of_pipe);
ut->utctx->record_timestamp(ut, cs, chunk->timestamps, tp_idx, tp->flags);
chunk->traces[tp_idx] = (struct u_trace_event) {
.tp = tp,

View File

@@ -105,7 +105,7 @@ typedef void (*u_trace_record_ts)(struct u_trace *ut,
void *cs,
void *timestamps,
unsigned idx,
bool end_of_pipe);
uint32_t flags);
/**
* Driver provided callback to read back a previously recorded timestamp.

View File

@@ -34,7 +34,7 @@ class Tracepoint(object):
"""
def __init__(self, name, args=[], toggle_name=None,
tp_struct=None, tp_print=None, tp_perfetto=None,
tp_markers=None, end_of_pipe=False, need_cs_param=True):
tp_markers=None, tp_flags=[], need_cs_param=True):
"""Parameters:
- name: the tracepoint name, a tracepoint function with the given
@@ -72,7 +72,7 @@ class Tracepoint(object):
self.tp_print = tp_print
self.tp_perfetto = tp_perfetto
self.tp_markers = tp_markers
self.end_of_pipe = end_of_pipe
self.tp_flags = tp_flags
self.toggle_name = toggle_name
self.need_cs_param = need_cs_param
@@ -449,7 +449,7 @@ static void __emit_label_${trace_name}(struct u_trace_context *utctx, void *cs,
static const struct u_tracepoint __tp_${trace_name} = {
ALIGN_POT(sizeof(struct trace_${trace_name}), 8), /* keep size 64b aligned */
"${trace_name}",
${"true" if trace.end_of_pipe else "false"},
${0 if len(trace.tp_flags) == 0 else " | ".join(trace.tp_flags)},
${index},
__print_${trace_name},
__print_json_${trace_name},

View File

@@ -46,11 +46,9 @@ struct u_tracepoint {
unsigned payload_sz;
const char *name;
/**
* Whether this tracepoint's timestamp must be recorded with as an
* end-of-pipe timestamp (for some GPUs the recording timestamp instruction
* might be different for top/end of pipe).
* A bitfield of driver agnostic flags
*/
bool end_of_pipe:1;
uint16_t flags;
/**
* Index of this tracepoint in <basename>_tracepoint_names in the generated
* u_trace perfetto header. By associating these names with iids in setup,