anv/hasvk: add indirect tracepoint arguments
Gives visibility on some indirect parameter dispatches : - draw count - compute dispatch size Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29944>
This commit is contained in:

committed by
Lionel Landwerlin

parent
0a17035b5c
commit
78ae7ab856
@@ -316,7 +316,7 @@ end_event(struct intel_ds_queue *queue, uint64_t ts_ns,
|
||||
event->set_duration(ts_ns - start_ns);
|
||||
event->set_submission_id(submission_id);
|
||||
|
||||
if (payload && payload_as_extra) {
|
||||
if ((payload || indirect_data) && payload_as_extra) {
|
||||
payload_as_extra(event, payload, indirect_data);
|
||||
}
|
||||
});
|
||||
@@ -427,6 +427,7 @@ CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect, INTEL_DS_QUEUE_STAGE_DRAW_MESH)
|
||||
CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW_MESH)
|
||||
CREATE_DUAL_EVENT_CALLBACK(xfb, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
|
||||
CREATE_DUAL_EVENT_CALLBACK(compute, INTEL_DS_QUEUE_STAGE_COMPUTE)
|
||||
CREATE_DUAL_EVENT_CALLBACK(compute_indirect, INTEL_DS_QUEUE_STAGE_COMPUTE)
|
||||
CREATE_DUAL_EVENT_CALLBACK(generate_draws, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
|
||||
CREATE_DUAL_EVENT_CALLBACK(generate_commands, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
|
||||
CREATE_DUAL_EVENT_CALLBACK(trace_copy, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
|
||||
|
@@ -41,6 +41,7 @@ def define_tracepoints(args):
|
||||
from u_trace import TracepointArgStruct as ArgStruct
|
||||
|
||||
Header('intel_driver_ds.h', scope=HeaderScope.SOURCE)
|
||||
Header('vulkan/vulkan_core.h', scope=HeaderScope.SOURCE|HeaderScope.PERFETTO)
|
||||
Header('blorp/blorp_priv.h', scope=HeaderScope.HEADER)
|
||||
Header('ds/intel_driver_ds.h', scope=HeaderScope.HEADER)
|
||||
|
||||
@@ -162,9 +163,11 @@ def define_tracepoints(args):
|
||||
begin_end_tp('draw_indexed_indirect',
|
||||
tp_args=[Arg(type='uint32_t', var='draw_count', c_format='%u'),])
|
||||
begin_end_tp('draw_indirect_count',
|
||||
tp_args=[Arg(type='uint32_t', var='max_draw_count', c_format='%u'),])
|
||||
tp_args=[Arg(type='uint32_t', var='draw_count', c_format='%u',
|
||||
is_indirect=True),])
|
||||
begin_end_tp('draw_indexed_indirect_count',
|
||||
tp_args=[Arg(type='uint32_t', var='max_draw_count', c_format='%u'),])
|
||||
tp_args=[Arg(type='uint32_t', var='draw_count', c_format='%u',
|
||||
is_indirect=True),])
|
||||
|
||||
begin_end_tp('draw_mesh',
|
||||
tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'),
|
||||
@@ -173,7 +176,8 @@ def define_tracepoints(args):
|
||||
begin_end_tp('draw_mesh_indirect',
|
||||
tp_args=[Arg(type='uint32_t', var='draw_count', c_format='%u'),])
|
||||
begin_end_tp('draw_mesh_indirect_count',
|
||||
tp_args=[Arg(type='uint32_t', var='max_draw_count', c_format='%u'),])
|
||||
tp_args=[Arg(type='uint32_t', var='draw_count', c_format='%u',
|
||||
is_indirect=True),])
|
||||
|
||||
begin_end_tp('compute',
|
||||
tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'),
|
||||
@@ -181,6 +185,12 @@ def define_tracepoints(args):
|
||||
Arg(type='uint32_t', var='group_z', c_format='%u'),],
|
||||
compute=True)
|
||||
|
||||
begin_end_tp('compute_indirect',
|
||||
tp_args=[ArgStruct(type='VkDispatchIndirectCommand', var='size',
|
||||
is_indirect=True, c_format="%ux%ux%u",
|
||||
fields=['x', 'y', 'z'])],
|
||||
compute=True)
|
||||
|
||||
# Used to identify copies generated by utrace
|
||||
begin_end_tp('trace_copy',
|
||||
tp_args=[Arg(type='uint32_t', var='count', c_format='%u'),])
|
||||
|
@@ -242,6 +242,12 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
|
||||
enum anv_timestamp_capture_type type,
|
||||
void *data);
|
||||
|
||||
void genX(cmd_capture_data)(struct anv_batch *batch,
|
||||
struct anv_device *device,
|
||||
struct anv_address dst_addr,
|
||||
struct anv_address src_addr,
|
||||
uint32_t size_B);
|
||||
|
||||
void
|
||||
genX(batch_emit_post_3dprimitive_was)(struct anv_batch *batch,
|
||||
const struct anv_device *device,
|
||||
|
@@ -580,6 +580,15 @@ anv_address_physical(struct anv_address addr)
|
||||
return intel_canonical_address(address);
|
||||
}
|
||||
|
||||
static inline struct u_trace_address
|
||||
anv_address_utrace(struct anv_address addr)
|
||||
{
|
||||
return (struct u_trace_address) {
|
||||
.bo = addr.bo,
|
||||
.offset = addr.offset,
|
||||
};
|
||||
}
|
||||
|
||||
static inline struct anv_address
|
||||
anv_address_add(struct anv_address addr, uint64_t offset)
|
||||
{
|
||||
@@ -1221,6 +1230,9 @@ struct anv_physical_device {
|
||||
|
||||
void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address,
|
||||
enum anv_timestamp_capture_type, void *);
|
||||
void (*cmd_capture_data)(struct anv_batch *, struct anv_device *,
|
||||
struct anv_address, struct anv_address,
|
||||
uint32_t);
|
||||
struct intel_measure_device measure_device;
|
||||
|
||||
/* Value of PIPELINE_SELECT::PipelineSelection == GPGPU */
|
||||
|
@@ -102,9 +102,6 @@ anv_device_utrace_emit_gfx_copy_buffer(struct u_trace_context *utctx,
|
||||
void *ts_to, uint64_t to_offset_B,
|
||||
uint64_t size_B)
|
||||
{
|
||||
assert(from_offset_B % sizeof(union anv_utrace_timestamp) == 0);
|
||||
assert(to_offset_B % sizeof(union anv_utrace_timestamp) == 0);
|
||||
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, ds.trace_context);
|
||||
struct anv_memcpy_state *memcpy_state = cmdstream;
|
||||
@@ -124,9 +121,6 @@ anv_device_utrace_emit_cs_copy_buffer(struct u_trace_context *utctx,
|
||||
void *ts_to, uint64_t to_offset_B,
|
||||
uint64_t size_B)
|
||||
{
|
||||
assert(from_offset_B % sizeof(union anv_utrace_timestamp) == 0);
|
||||
assert(to_offset_B % sizeof(union anv_utrace_timestamp) == 0);
|
||||
|
||||
struct anv_device *device =
|
||||
container_of(utctx, struct anv_device, ds.trace_context);
|
||||
struct anv_simple_shader *simple_state = cmdstream;
|
||||
@@ -436,6 +430,42 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
|
||||
return intel_device_info_timebase_scale(device->info, ts->timestamp);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_utrace_capture_data(struct u_trace *ut,
|
||||
void *cs,
|
||||
void *dst_buffer,
|
||||
uint64_t dst_offset_B,
|
||||
void *src_buffer,
|
||||
uint64_t src_offset_B,
|
||||
uint32_t size_B)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(ut->utctx, struct anv_device, ds.trace_context);
|
||||
struct anv_cmd_buffer *cmd_buffer =
|
||||
container_of(ut, struct anv_cmd_buffer, trace);
|
||||
/* cmd_buffer is only valid if cs == NULL */
|
||||
struct anv_batch *batch = cs != NULL ? cs : &cmd_buffer->batch;
|
||||
struct anv_address dst_addr = {
|
||||
.bo = dst_buffer,
|
||||
.offset = dst_offset_B,
|
||||
};
|
||||
struct anv_address src_addr = {
|
||||
.bo = src_buffer,
|
||||
.offset = src_offset_B,
|
||||
};
|
||||
|
||||
device->physical->cmd_capture_data(batch, device, dst_addr, src_addr, size_B);
|
||||
}
|
||||
|
||||
static const void *
|
||||
anv_utrace_get_data(struct u_trace_context *utctx,
|
||||
void *buffer, uint64_t offset_B, uint32_t size_B)
|
||||
{
|
||||
struct anv_bo *bo = buffer;
|
||||
|
||||
return bo->map + offset_B;
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_utrace_init(struct anv_device *device)
|
||||
{
|
||||
@@ -449,13 +479,13 @@ anv_device_utrace_init(struct anv_device *device)
|
||||
u_trace_context_init(&device->ds.trace_context,
|
||||
&device->ds,
|
||||
device->utrace_timestamp_size,
|
||||
0,
|
||||
12,
|
||||
anv_utrace_create_buffer,
|
||||
anv_utrace_destroy_buffer,
|
||||
anv_utrace_record_ts,
|
||||
anv_utrace_read_ts,
|
||||
NULL,
|
||||
NULL,
|
||||
anv_utrace_capture_data,
|
||||
anv_utrace_get_data,
|
||||
anv_utrace_delete_submit);
|
||||
|
||||
for (uint32_t q = 0; q < device->queue_count; q++) {
|
||||
|
@@ -5865,6 +5865,17 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
|
||||
}
|
||||
}
|
||||
|
||||
void genX(cmd_capture_data)(struct anv_batch *batch,
|
||||
struct anv_device *device,
|
||||
struct anv_address dst_addr,
|
||||
struct anv_address src_addr,
|
||||
uint32_t size_B) {
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, device->info, batch);
|
||||
mi_builder_set_mocs(&b, isl_mocs(&device->isl_dev, 0, false));
|
||||
mi_memcpy(&b, dst_addr, src_addr, size_B);
|
||||
}
|
||||
|
||||
void genX(batch_emit_secondary_call)(struct anv_batch *batch,
|
||||
struct anv_device *device,
|
||||
struct anv_address secondary_addr,
|
||||
|
@@ -529,7 +529,7 @@ void genX(CmdDispatchIndirect)(
|
||||
INTEL_SNAPSHOT_COMPUTE,
|
||||
"compute indirect",
|
||||
0);
|
||||
trace_intel_begin_compute(&cmd_buffer->trace);
|
||||
trace_intel_begin_compute_indirect(&cmd_buffer->trace);
|
||||
|
||||
if (prog_data->uses_num_work_groups) {
|
||||
cmd_buffer->state.compute.num_workgroups = addr;
|
||||
@@ -545,7 +545,8 @@ void genX(CmdDispatchIndirect)(
|
||||
|
||||
emit_cs_walker(cmd_buffer, pipeline, prog_data, addr, 0, 0, 0);
|
||||
|
||||
trace_intel_end_compute(&cmd_buffer->trace, 0, 0, 0);
|
||||
trace_intel_end_compute_indirect(&cmd_buffer->trace,
|
||||
anv_address_utrace(addr));
|
||||
}
|
||||
|
||||
struct anv_address
|
||||
|
@@ -2021,7 +2021,8 @@ void genX(CmdDrawIndirectCount)(
|
||||
false /* indexed */);
|
||||
}
|
||||
|
||||
trace_intel_end_draw_indirect_count(&cmd_buffer->trace, maxDrawCount);
|
||||
trace_intel_end_draw_indirect_count(&cmd_buffer->trace,
|
||||
anv_address_utrace(count_address));
|
||||
}
|
||||
|
||||
void genX(CmdDrawIndexedIndirectCount)(
|
||||
@@ -2069,7 +2070,8 @@ void genX(CmdDrawIndexedIndirectCount)(
|
||||
true /* indexed */);
|
||||
}
|
||||
|
||||
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace, maxDrawCount);
|
||||
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace,
|
||||
anv_address_utrace(count_address));
|
||||
|
||||
}
|
||||
|
||||
@@ -2345,10 +2347,11 @@ genX(CmdDrawMeshTasksIndirectCountEXT)(
|
||||
const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device, &count_buffer->address);
|
||||
mi_builder_set_mocs(&b, mocs);
|
||||
|
||||
struct anv_address count_addr =
|
||||
anv_address_add(count_buffer->address, countBufferOffset);
|
||||
struct mi_value max =
|
||||
prepare_for_draw_count_predicate(
|
||||
cmd_buffer, &b,
|
||||
anv_address_add(count_buffer->address, countBufferOffset));
|
||||
cmd_buffer, &b, count_addr);
|
||||
|
||||
for (uint32_t i = 0; i < maxDrawCount; i++) {
|
||||
struct anv_address draw = anv_address_add(buffer->address, offset);
|
||||
@@ -2362,7 +2365,8 @@ genX(CmdDrawMeshTasksIndirectCountEXT)(
|
||||
offset += stride;
|
||||
}
|
||||
|
||||
trace_intel_end_draw_mesh_indirect_count(&cmd_buffer->trace, maxDrawCount);
|
||||
trace_intel_end_draw_mesh_indirect_count(&cmd_buffer->trace,
|
||||
anv_address_utrace(count_addr));
|
||||
}
|
||||
|
||||
#endif /* GFX_VERx10 >= 125 */
|
||||
|
@@ -844,6 +844,7 @@ genX(init_physical_device_state)(ASSERTED struct anv_physical_device *pdevice)
|
||||
#endif
|
||||
|
||||
pdevice->cmd_emit_timestamp = genX(cmd_emit_timestamp);
|
||||
pdevice->cmd_capture_data = genX(cmd_capture_data);
|
||||
|
||||
pdevice->gpgpu_pipeline_value = GPGPU;
|
||||
|
||||
|
@@ -134,6 +134,12 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
|
||||
struct anv_address addr,
|
||||
enum anv_timestamp_capture_type);
|
||||
|
||||
void genX(cmd_capture_data)(struct anv_batch *batch,
|
||||
struct anv_device *device,
|
||||
struct anv_address dst_addr,
|
||||
struct anv_address src_addr,
|
||||
uint32_t size_B);
|
||||
|
||||
void
|
||||
genX(rasterization_mode)(VkPolygonMode raster_mode,
|
||||
VkLineRasterizationModeEXT line_mode,
|
||||
|
@@ -493,6 +493,15 @@ anv_address_physical(struct anv_address addr)
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct u_trace_address
|
||||
anv_address_utrace(struct anv_address addr)
|
||||
{
|
||||
return (struct u_trace_address) {
|
||||
.bo = addr.bo,
|
||||
.offset = addr.offset,
|
||||
};
|
||||
}
|
||||
|
||||
static inline struct anv_address
|
||||
anv_address_add(struct anv_address addr, uint64_t offset)
|
||||
{
|
||||
@@ -909,7 +918,11 @@ struct anv_physical_device {
|
||||
int64_t master_minor;
|
||||
struct intel_query_engine_info * engine_info;
|
||||
|
||||
void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address, enum anv_timestamp_capture_type);
|
||||
void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *,
|
||||
struct anv_address, enum anv_timestamp_capture_type);
|
||||
void (*cmd_capture_data)(struct anv_batch *, struct anv_device *,
|
||||
struct anv_address, struct anv_address,
|
||||
uint32_t);
|
||||
struct intel_measure_device measure_device;
|
||||
};
|
||||
|
||||
|
@@ -272,6 +272,42 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
|
||||
return intel_device_info_timebase_scale(device->info, *ts);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_utrace_capture_data(struct u_trace *ut,
|
||||
void *cs,
|
||||
void *dst_buffer,
|
||||
uint64_t dst_offset_B,
|
||||
void *src_buffer,
|
||||
uint64_t src_offset_B,
|
||||
uint32_t size_B)
|
||||
{
|
||||
struct anv_device *device =
|
||||
container_of(ut->utctx, struct anv_device, ds.trace_context);
|
||||
struct anv_cmd_buffer *cmd_buffer =
|
||||
container_of(ut, struct anv_cmd_buffer, trace);
|
||||
/* cmd_buffer is only valid if cs == NULL */
|
||||
struct anv_batch *batch = cs != NULL ? cs : &cmd_buffer->batch;
|
||||
struct anv_address dst_addr = {
|
||||
.bo = dst_buffer,
|
||||
.offset = dst_offset_B,
|
||||
};
|
||||
struct anv_address src_addr = {
|
||||
.bo = src_buffer,
|
||||
.offset = src_offset_B,
|
||||
};
|
||||
|
||||
device->physical->cmd_capture_data(batch, device, dst_addr, src_addr, size_B);
|
||||
}
|
||||
|
||||
static const void *
|
||||
anv_utrace_get_data(struct u_trace_context *utctx, void *buffer,
|
||||
uint64_t offset_B, uint32_t size_B)
|
||||
{
|
||||
struct anv_bo *bo = buffer;
|
||||
|
||||
return bo->map + offset_B;
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_utrace_init(struct anv_device *device)
|
||||
{
|
||||
@@ -287,8 +323,8 @@ anv_device_utrace_init(struct anv_device *device)
|
||||
anv_utrace_destroy_buffer,
|
||||
anv_utrace_record_ts,
|
||||
anv_utrace_read_ts,
|
||||
NULL,
|
||||
NULL,
|
||||
anv_utrace_capture_data,
|
||||
anv_utrace_get_data,
|
||||
anv_utrace_delete_flush_data);
|
||||
|
||||
for (uint32_t q = 0; q < device->queue_count; q++) {
|
||||
|
@@ -4193,7 +4193,8 @@ void genX(CmdDrawIndirectCount)(
|
||||
|
||||
mi_value_unref(&b, max);
|
||||
|
||||
trace_intel_end_draw_indirect_count(&cmd_buffer->trace, maxDrawCount);
|
||||
trace_intel_end_draw_indirect_count(&cmd_buffer->trace,
|
||||
anv_address_utrace(count_address));
|
||||
}
|
||||
|
||||
void genX(CmdDrawIndexedIndirectCount)(
|
||||
@@ -4263,8 +4264,8 @@ void genX(CmdDrawIndexedIndirectCount)(
|
||||
|
||||
mi_value_unref(&b, max);
|
||||
|
||||
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace, maxDrawCount);
|
||||
|
||||
trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace,
|
||||
anv_address_utrace(count_address));
|
||||
}
|
||||
|
||||
void genX(CmdBeginTransformFeedbackEXT)(
|
||||
@@ -6031,3 +6032,14 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
|
||||
unreachable("invalid");
|
||||
}
|
||||
}
|
||||
|
||||
void genX(cmd_capture_data)(struct anv_batch *batch,
|
||||
struct anv_device *device,
|
||||
struct anv_address dst_addr,
|
||||
struct anv_address src_addr,
|
||||
uint32_t size_B)
|
||||
{
|
||||
struct mi_builder b;
|
||||
mi_builder_init(&b, device->info, batch);
|
||||
mi_memcpy(&b, dst_addr, src_addr, size_B);
|
||||
}
|
||||
|
@@ -108,6 +108,7 @@ genX(init_physical_device_state)(ASSERTED struct anv_physical_device *pdevice)
|
||||
assert(pdevice->info.verx10 == GFX_VERx10);
|
||||
|
||||
pdevice->cmd_emit_timestamp = genX(cmd_emit_timestamp);
|
||||
pdevice->cmd_capture_data = genX(cmd_capture_data);
|
||||
}
|
||||
|
||||
VkResult
|
||||
|
Reference in New Issue
Block a user