anv/hasvk: add indirect tracepoint arguments

Gives visibility on some indirect parameter dispatches :
  - draw count
  - compute dispatch size

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29944>
This commit is contained in:
Lionel Landwerlin
2024-06-25 18:10:10 +03:00
committed by Lionel Landwerlin
parent 0a17035b5c
commit 78ae7ab856
14 changed files with 170 additions and 26 deletions

View File

@@ -316,7 +316,7 @@ end_event(struct intel_ds_queue *queue, uint64_t ts_ns,
event->set_duration(ts_ns - start_ns);
event->set_submission_id(submission_id);
if (payload && payload_as_extra) {
if ((payload || indirect_data) && payload_as_extra) {
payload_as_extra(event, payload, indirect_data);
}
});
@@ -427,6 +427,7 @@ CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect, INTEL_DS_QUEUE_STAGE_DRAW_MESH)
CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW_MESH)
CREATE_DUAL_EVENT_CALLBACK(xfb, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
CREATE_DUAL_EVENT_CALLBACK(compute, INTEL_DS_QUEUE_STAGE_COMPUTE)
CREATE_DUAL_EVENT_CALLBACK(compute_indirect, INTEL_DS_QUEUE_STAGE_COMPUTE)
CREATE_DUAL_EVENT_CALLBACK(generate_draws, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
CREATE_DUAL_EVENT_CALLBACK(generate_commands, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
CREATE_DUAL_EVENT_CALLBACK(trace_copy, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)

View File

@@ -41,6 +41,7 @@ def define_tracepoints(args):
from u_trace import TracepointArgStruct as ArgStruct
Header('intel_driver_ds.h', scope=HeaderScope.SOURCE)
Header('vulkan/vulkan_core.h', scope=HeaderScope.SOURCE|HeaderScope.PERFETTO)
Header('blorp/blorp_priv.h', scope=HeaderScope.HEADER)
Header('ds/intel_driver_ds.h', scope=HeaderScope.HEADER)
@@ -162,9 +163,11 @@ def define_tracepoints(args):
begin_end_tp('draw_indexed_indirect',
tp_args=[Arg(type='uint32_t', var='draw_count', c_format='%u'),])
begin_end_tp('draw_indirect_count',
tp_args=[Arg(type='uint32_t', var='max_draw_count', c_format='%u'),])
tp_args=[Arg(type='uint32_t', var='draw_count', c_format='%u',
is_indirect=True),])
begin_end_tp('draw_indexed_indirect_count',
tp_args=[Arg(type='uint32_t', var='max_draw_count', c_format='%u'),])
tp_args=[Arg(type='uint32_t', var='draw_count', c_format='%u',
is_indirect=True),])
begin_end_tp('draw_mesh',
tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'),
@@ -173,7 +176,8 @@ def define_tracepoints(args):
begin_end_tp('draw_mesh_indirect',
tp_args=[Arg(type='uint32_t', var='draw_count', c_format='%u'),])
begin_end_tp('draw_mesh_indirect_count',
tp_args=[Arg(type='uint32_t', var='max_draw_count', c_format='%u'),])
tp_args=[Arg(type='uint32_t', var='draw_count', c_format='%u',
is_indirect=True),])
begin_end_tp('compute',
tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'),
@@ -181,6 +185,12 @@ def define_tracepoints(args):
Arg(type='uint32_t', var='group_z', c_format='%u'),],
compute=True)
begin_end_tp('compute_indirect',
tp_args=[ArgStruct(type='VkDispatchIndirectCommand', var='size',
is_indirect=True, c_format="%ux%ux%u",
fields=['x', 'y', 'z'])],
compute=True)
# Used to identify copies generated by utrace
begin_end_tp('trace_copy',
tp_args=[Arg(type='uint32_t', var='count', c_format='%u'),])

View File

@@ -242,6 +242,12 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
enum anv_timestamp_capture_type type,
void *data);
void genX(cmd_capture_data)(struct anv_batch *batch,
struct anv_device *device,
struct anv_address dst_addr,
struct anv_address src_addr,
uint32_t size_B);
void
genX(batch_emit_post_3dprimitive_was)(struct anv_batch *batch,
const struct anv_device *device,

View File

@@ -580,6 +580,15 @@ anv_address_physical(struct anv_address addr)
return intel_canonical_address(address);
}
static inline struct u_trace_address
anv_address_utrace(struct anv_address addr)
{
return (struct u_trace_address) {
.bo = addr.bo,
.offset = addr.offset,
};
}
static inline struct anv_address
anv_address_add(struct anv_address addr, uint64_t offset)
{
@@ -1221,6 +1230,9 @@ struct anv_physical_device {
void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address,
enum anv_timestamp_capture_type, void *);
void (*cmd_capture_data)(struct anv_batch *, struct anv_device *,
struct anv_address, struct anv_address,
uint32_t);
struct intel_measure_device measure_device;
/* Value of PIPELINE_SELECT::PipelineSelection == GPGPU */

View File

@@ -102,9 +102,6 @@ anv_device_utrace_emit_gfx_copy_buffer(struct u_trace_context *utctx,
void *ts_to, uint64_t to_offset_B,
uint64_t size_B)
{
assert(from_offset_B % sizeof(union anv_utrace_timestamp) == 0);
assert(to_offset_B % sizeof(union anv_utrace_timestamp) == 0);
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
struct anv_memcpy_state *memcpy_state = cmdstream;
@@ -124,9 +121,6 @@ anv_device_utrace_emit_cs_copy_buffer(struct u_trace_context *utctx,
void *ts_to, uint64_t to_offset_B,
uint64_t size_B)
{
assert(from_offset_B % sizeof(union anv_utrace_timestamp) == 0);
assert(to_offset_B % sizeof(union anv_utrace_timestamp) == 0);
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
struct anv_simple_shader *simple_state = cmdstream;
@@ -436,6 +430,42 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
return intel_device_info_timebase_scale(device->info, ts->timestamp);
}
static void
anv_utrace_capture_data(struct u_trace *ut,
void *cs,
void *dst_buffer,
uint64_t dst_offset_B,
void *src_buffer,
uint64_t src_offset_B,
uint32_t size_B)
{
struct anv_device *device =
container_of(ut->utctx, struct anv_device, ds.trace_context);
struct anv_cmd_buffer *cmd_buffer =
container_of(ut, struct anv_cmd_buffer, trace);
/* cmd_buffer is only valid if cs == NULL */
struct anv_batch *batch = cs != NULL ? cs : &cmd_buffer->batch;
struct anv_address dst_addr = {
.bo = dst_buffer,
.offset = dst_offset_B,
};
struct anv_address src_addr = {
.bo = src_buffer,
.offset = src_offset_B,
};
device->physical->cmd_capture_data(batch, device, dst_addr, src_addr, size_B);
}
static const void *
anv_utrace_get_data(struct u_trace_context *utctx,
void *buffer, uint64_t offset_B, uint32_t size_B)
{
struct anv_bo *bo = buffer;
return bo->map + offset_B;
}
void
anv_device_utrace_init(struct anv_device *device)
{
@@ -449,13 +479,13 @@ anv_device_utrace_init(struct anv_device *device)
u_trace_context_init(&device->ds.trace_context,
&device->ds,
device->utrace_timestamp_size,
0,
12,
anv_utrace_create_buffer,
anv_utrace_destroy_buffer,
anv_utrace_record_ts,
anv_utrace_read_ts,
NULL,
NULL,
anv_utrace_capture_data,
anv_utrace_get_data,
anv_utrace_delete_submit);
for (uint32_t q = 0; q < device->queue_count; q++) {

View File

@@ -5865,6 +5865,17 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
}
}
void genX(cmd_capture_data)(struct anv_batch *batch,
struct anv_device *device,
struct anv_address dst_addr,
struct anv_address src_addr,
uint32_t size_B) {
struct mi_builder b;
mi_builder_init(&b, device->info, batch);
mi_builder_set_mocs(&b, isl_mocs(&device->isl_dev, 0, false));
mi_memcpy(&b, dst_addr, src_addr, size_B);
}
void genX(batch_emit_secondary_call)(struct anv_batch *batch,
struct anv_device *device,
struct anv_address secondary_addr,

View File

@@ -529,7 +529,7 @@ void genX(CmdDispatchIndirect)(
INTEL_SNAPSHOT_COMPUTE,
"compute indirect",
0);
trace_intel_begin_compute(&cmd_buffer->trace);
trace_intel_begin_compute_indirect(&cmd_buffer->trace);
if (prog_data->uses_num_work_groups) {
cmd_buffer->state.compute.num_workgroups = addr;
@@ -545,7 +545,8 @@ void genX(CmdDispatchIndirect)(
emit_cs_walker(cmd_buffer, pipeline, prog_data, addr, 0, 0, 0);
trace_intel_end_compute(&cmd_buffer->trace, 0, 0, 0);
trace_intel_end_compute_indirect(&cmd_buffer->trace,
anv_address_utrace(addr));
}
struct anv_address

View File

@@ -2021,7 +2021,8 @@ void genX(CmdDrawIndirectCount)(
false /* indexed */);
}
trace_intel_end_draw_indirect_count(&cmd_buffer->trace, maxDrawCount);
trace_intel_end_draw_indirect_count(&cmd_buffer->trace,
anv_address_utrace(count_address));
}
void genX(CmdDrawIndexedIndirectCount)(
@@ -2069,7 +2070,8 @@ void genX(CmdDrawIndexedIndirectCount)(
true /* indexed */);
}
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace, maxDrawCount);
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace,
anv_address_utrace(count_address));
}
@@ -2345,10 +2347,11 @@ genX(CmdDrawMeshTasksIndirectCountEXT)(
const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device, &count_buffer->address);
mi_builder_set_mocs(&b, mocs);
struct anv_address count_addr =
anv_address_add(count_buffer->address, countBufferOffset);
struct mi_value max =
prepare_for_draw_count_predicate(
cmd_buffer, &b,
anv_address_add(count_buffer->address, countBufferOffset));
cmd_buffer, &b, count_addr);
for (uint32_t i = 0; i < maxDrawCount; i++) {
struct anv_address draw = anv_address_add(buffer->address, offset);
@@ -2362,7 +2365,8 @@ genX(CmdDrawMeshTasksIndirectCountEXT)(
offset += stride;
}
trace_intel_end_draw_mesh_indirect_count(&cmd_buffer->trace, maxDrawCount);
trace_intel_end_draw_mesh_indirect_count(&cmd_buffer->trace,
anv_address_utrace(count_addr));
}
#endif /* GFX_VERx10 >= 125 */

View File

@@ -844,6 +844,7 @@ genX(init_physical_device_state)(ASSERTED struct anv_physical_device *pdevice)
#endif
pdevice->cmd_emit_timestamp = genX(cmd_emit_timestamp);
pdevice->cmd_capture_data = genX(cmd_capture_data);
pdevice->gpgpu_pipeline_value = GPGPU;

View File

@@ -134,6 +134,12 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
struct anv_address addr,
enum anv_timestamp_capture_type);
void genX(cmd_capture_data)(struct anv_batch *batch,
struct anv_device *device,
struct anv_address dst_addr,
struct anv_address src_addr,
uint32_t size_B);
void
genX(rasterization_mode)(VkPolygonMode raster_mode,
VkLineRasterizationModeEXT line_mode,

View File

@@ -493,6 +493,15 @@ anv_address_physical(struct anv_address addr)
}
}
static inline struct u_trace_address
anv_address_utrace(struct anv_address addr)
{
return (struct u_trace_address) {
.bo = addr.bo,
.offset = addr.offset,
};
}
static inline struct anv_address
anv_address_add(struct anv_address addr, uint64_t offset)
{
@@ -909,7 +918,11 @@ struct anv_physical_device {
int64_t master_minor;
struct intel_query_engine_info * engine_info;
void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address, enum anv_timestamp_capture_type);
void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *,
struct anv_address, enum anv_timestamp_capture_type);
void (*cmd_capture_data)(struct anv_batch *, struct anv_device *,
struct anv_address, struct anv_address,
uint32_t);
struct intel_measure_device measure_device;
};

View File

@@ -272,6 +272,42 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
return intel_device_info_timebase_scale(device->info, *ts);
}
static void
anv_utrace_capture_data(struct u_trace *ut,
void *cs,
void *dst_buffer,
uint64_t dst_offset_B,
void *src_buffer,
uint64_t src_offset_B,
uint32_t size_B)
{
struct anv_device *device =
container_of(ut->utctx, struct anv_device, ds.trace_context);
struct anv_cmd_buffer *cmd_buffer =
container_of(ut, struct anv_cmd_buffer, trace);
/* cmd_buffer is only valid if cs == NULL */
struct anv_batch *batch = cs != NULL ? cs : &cmd_buffer->batch;
struct anv_address dst_addr = {
.bo = dst_buffer,
.offset = dst_offset_B,
};
struct anv_address src_addr = {
.bo = src_buffer,
.offset = src_offset_B,
};
device->physical->cmd_capture_data(batch, device, dst_addr, src_addr, size_B);
}
static const void *
anv_utrace_get_data(struct u_trace_context *utctx, void *buffer,
uint64_t offset_B, uint32_t size_B)
{
struct anv_bo *bo = buffer;
return bo->map + offset_B;
}
void
anv_device_utrace_init(struct anv_device *device)
{
@@ -287,8 +323,8 @@ anv_device_utrace_init(struct anv_device *device)
anv_utrace_destroy_buffer,
anv_utrace_record_ts,
anv_utrace_read_ts,
NULL,
NULL,
anv_utrace_capture_data,
anv_utrace_get_data,
anv_utrace_delete_flush_data);
for (uint32_t q = 0; q < device->queue_count; q++) {

View File

@@ -4193,7 +4193,8 @@ void genX(CmdDrawIndirectCount)(
mi_value_unref(&b, max);
trace_intel_end_draw_indirect_count(&cmd_buffer->trace, maxDrawCount);
trace_intel_end_draw_indirect_count(&cmd_buffer->trace,
anv_address_utrace(count_address));
}
void genX(CmdDrawIndexedIndirectCount)(
@@ -4263,8 +4264,8 @@ void genX(CmdDrawIndexedIndirectCount)(
mi_value_unref(&b, max);
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace, maxDrawCount);
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace,
anv_address_utrace(count_address));
}
void genX(CmdBeginTransformFeedbackEXT)(
@@ -6031,3 +6032,14 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
unreachable("invalid");
}
}
void genX(cmd_capture_data)(struct anv_batch *batch,
struct anv_device *device,
struct anv_address dst_addr,
struct anv_address src_addr,
uint32_t size_B)
{
struct mi_builder b;
mi_builder_init(&b, device->info, batch);
mi_memcpy(&b, dst_addr, src_addr, size_B);
}

View File

@@ -108,6 +108,7 @@ genX(init_physical_device_state)(ASSERTED struct anv_physical_device *pdevice)
assert(pdevice->info.verx10 == GFX_VERx10);
pdevice->cmd_emit_timestamp = genX(cmd_emit_timestamp);
pdevice->cmd_capture_data = genX(cmd_capture_data);
}
VkResult