anv: add BVH building tracking through u_trace

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kevin Chuang <kaiwenjon23@gmail.com>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32483>
This commit is contained in:
Lionel Landwerlin
2024-12-04 16:08:55 +02:00
committed by Marge Bot
parent 719e4dfa66
commit de00fe3f66
5 changed files with 123 additions and 12 deletions

View File

@@ -438,6 +438,13 @@ CREATE_DUAL_EVENT_CALLBACK(query_copy_shader, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS)
CREATE_DUAL_EVENT_CALLBACK(write_buffer_marker, INTEL_DS_QUEUE_STAGE_CMD_BUFFER)
CREATE_DUAL_EVENT_CALLBACK(rays, INTEL_DS_QUEUE_STAGE_RT)
CREATE_DUAL_EVENT_CALLBACK(as_build, INTEL_DS_QUEUE_STAGE_AS)
CREATE_DUAL_EVENT_CALLBACK(as_build_leaves, INTEL_DS_QUEUE_STAGE_AS)
CREATE_DUAL_EVENT_CALLBACK(as_morton_generate, INTEL_DS_QUEUE_STAGE_AS)
CREATE_DUAL_EVENT_CALLBACK(as_morton_sort, INTEL_DS_QUEUE_STAGE_AS)
CREATE_DUAL_EVENT_CALLBACK(as_lbvh_build_internal, INTEL_DS_QUEUE_STAGE_AS)
CREATE_DUAL_EVENT_CALLBACK(as_ploc_build_internal, INTEL_DS_QUEUE_STAGE_AS)
CREATE_DUAL_EVENT_CALLBACK(as_encode, INTEL_DS_QUEUE_STAGE_AS)
CREATE_DUAL_EVENT_CALLBACK(as_copy, INTEL_DS_QUEUE_STAGE_AS)
void
intel_ds_begin_cmd_buffer_annotation(struct intel_ds_device *device,

View File

@@ -199,6 +199,13 @@ def define_tracepoints(args):
need_cs_param=True)
begin_end_tp('as_build')
begin_end_tp('as_build_leaves', compute=True)
begin_end_tp('as_morton_generate', compute=True)
begin_end_tp('as_morton_sort', compute=True)
begin_end_tp('as_lbvh_build_internal', compute=True)
begin_end_tp('as_ploc_build_internal', compute=True)
begin_end_tp('as_encode', compute=True)
begin_end_tp('as_copy', compute=True)
begin_end_tp('rays',
tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'),

View File

@@ -4028,6 +4028,9 @@ struct anv_cmd_ray_tracing_state {
struct brw_rt_scratch_layout layout;
} scratch;
uint32_t debug_marker_count;
enum vk_acceleration_structure_build_step debug_markers[5];
struct anv_address build_priv_mem_addr;
size_t build_priv_mem_size;
};

View File

@@ -40,6 +40,77 @@ static uint32_t tlas_id = 0;
static struct bvh_dump_struct *bvhDumpArray = NULL;
static uint32_t bvh_dump_array_size = 0;
static void
begin_debug_marker(VkCommandBuffer commandBuffer,
enum vk_acceleration_structure_build_step step,
const char *format, ...)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
assert(cmd_buffer->state.rt.debug_marker_count <
ARRAY_SIZE(cmd_buffer->state.rt.debug_markers));
cmd_buffer->state.rt.debug_markers[cmd_buffer->state.rt.debug_marker_count++] =
step;
switch (step) {
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP:
trace_intel_begin_as_build(&cmd_buffer->trace);
break;
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES:
trace_intel_begin_as_build_leaves(&cmd_buffer->trace);
break;
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE:
trace_intel_begin_as_morton_generate(&cmd_buffer->trace);
break;
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT:
trace_intel_begin_as_morton_sort(&cmd_buffer->trace);
break;
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL:
trace_intel_begin_as_lbvh_build_internal(&cmd_buffer->trace);
break;
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL:
trace_intel_begin_as_ploc_build_internal(&cmd_buffer->trace);
break;
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE:
trace_intel_begin_as_encode(&cmd_buffer->trace);
break;
default:
unreachable("Invalid build step");
}
}
static void
end_debug_marker(VkCommandBuffer commandBuffer)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
cmd_buffer->state.rt.debug_marker_count--;
switch (cmd_buffer->state.rt.debug_markers[cmd_buffer->state.rt.debug_marker_count]) {
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP:
trace_intel_end_as_build(&cmd_buffer->trace);
break;
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES:
trace_intel_end_as_build_leaves(&cmd_buffer->trace);
break;
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE:
trace_intel_end_as_morton_generate(&cmd_buffer->trace);
break;
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT:
trace_intel_end_as_morton_sort(&cmd_buffer->trace);
break;
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL:
trace_intel_end_as_lbvh_build_internal(&cmd_buffer->trace);
break;
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL:
trace_intel_end_as_ploc_build_internal(&cmd_buffer->trace);
break;
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE:
trace_intel_end_as_encode(&cmd_buffer->trace);
break;
default:
unreachable("Invalid build step");
}
}
/* clear out everything from (header + bvh_offset) to the end */
static void
clear_out_anv_bvh(struct anv_cmd_buffer *cmd_buffer,
@@ -533,6 +604,8 @@ anv_init_header(VkCommandBuffer commandBuffer,
}
static const struct vk_acceleration_structure_build_ops anv_build_ops = {
.begin_debug_marker = begin_debug_marker,
.end_debug_marker = end_debug_marker,
.get_as_size = anv_get_as_size,
.get_encode_key = { anv_get_encode_key, anv_get_header_key },
.encode_bind_pipeline = { anv_encode_bind_pipeline,
@@ -582,6 +655,7 @@ anv_device_init_accel_struct_build_state(struct anv_device *device)
device->accel_struct_build.build_args =
(struct vk_acceleration_structure_build_args) {
.emit_markers = u_trace_enabled(&device->ds.trace_context),
.subgroup_size = device->info->ver >= 20 ? 16 : 8,
.radix_sort = device->accel_struct_build.radix_sort,
/* See struct anv_accel_struct_header from anv_bvh.h
@@ -640,7 +714,6 @@ genX(CmdBuildAccelerationStructuresKHR)(
const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
trace_intel_begin_as_build(&cmd_buffer->trace);
struct anv_device *device = cmd_buffer->device;
@@ -662,7 +735,6 @@ genX(CmdBuildAccelerationStructuresKHR)(
&device->accel_struct_build.build_args);
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
trace_intel_end_as_build(&cmd_buffer->trace);
}
void
@@ -686,6 +758,8 @@ genX(CmdCopyAccelerationStructureKHR)(
VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
trace_intel_begin_as_copy(&cmd_buffer->trace);
VkPipeline pipeline;
VkPipelineLayout layout;
VkResult result = get_pipeline_spv(cmd_buffer->device, "copy", copy_spv,
@@ -737,6 +811,8 @@ genX(CmdCopyAccelerationStructureKHR)(
copy_dispatch_size));
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
trace_intel_end_as_copy(&cmd_buffer->trace);
}
void
@@ -746,8 +822,10 @@ genX(CmdCopyAccelerationStructureToMemoryKHR)(
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
struct anv_device *device = cmd_buffer->device;
trace_intel_begin_as_copy(&cmd_buffer->trace);
VkPipeline pipeline;
VkPipelineLayout layout;
VkResult result = get_pipeline_spv(device, "copy", copy_spv,
@@ -804,6 +882,8 @@ genX(CmdCopyAccelerationStructureToMemoryKHR)(
copy_dispatch_size));
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
trace_intel_end_as_copy(&cmd_buffer->trace);
}
void
@@ -814,6 +894,8 @@ genX(CmdCopyMemoryToAccelerationStructureKHR)(
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
trace_intel_begin_as_copy(&cmd_buffer->trace);
VkPipeline pipeline;
VkPipelineLayout layout;
VkResult result = get_pipeline_spv(cmd_buffer->device, "copy", copy_spv,
@@ -853,6 +935,8 @@ genX(CmdCopyMemoryToAccelerationStructureKHR)(
vk_common_CmdDispatch(commandBuffer, 512, 1, 1);
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
trace_intel_end_as_copy(&cmd_buffer->trace);
}
/* TODO: Host commands */

View File

@@ -598,7 +598,8 @@ void genX(CmdDispatchBase)(
prog_data->local_size[0] * prog_data->local_size[1] *
prog_data->local_size[2]);
trace_intel_begin_compute(&cmd_buffer->trace);
if (cmd_buffer->state.rt.debug_marker_count == 0)
trace_intel_begin_compute(&cmd_buffer->trace);
genX(cmd_buffer_flush_compute_state)(cmd_buffer);
@@ -610,8 +611,10 @@ void genX(CmdDispatchBase)(
groupCountX, groupCountY, groupCountZ,
false);
trace_intel_end_compute(&cmd_buffer->trace,
groupCountX, groupCountY, groupCountZ);
if (cmd_buffer->state.rt.debug_marker_count == 0) {
trace_intel_end_compute(&cmd_buffer->trace,
groupCountX, groupCountY, groupCountZ);
}
}
static void
@@ -654,7 +657,8 @@ emit_unaligned_cs_walker(
prog_data->local_size[0] * prog_data->local_size[1] *
prog_data->local_size[2]);
trace_intel_begin_compute(&cmd_buffer->trace);
if (cmd_buffer->state.rt.debug_marker_count == 0)
trace_intel_begin_compute(&cmd_buffer->trace);
assert(!prog_data->uses_num_work_groups);
genX(cmd_buffer_flush_compute_state)(cmd_buffer);
@@ -667,8 +671,10 @@ emit_unaligned_cs_walker(
dispatch, groupCountX, groupCountY, groupCountZ);
#endif
trace_intel_end_compute(&cmd_buffer->trace,
groupCountX, groupCountY, groupCountZ);
if (cmd_buffer->state.rt.debug_marker_count == 0) {
trace_intel_end_compute(&cmd_buffer->trace,
groupCountX, groupCountY, groupCountZ);
}
}
/*
@@ -758,7 +764,9 @@ genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer,
INTEL_SNAPSHOT_COMPUTE,
"compute indirect",
0);
trace_intel_begin_compute_indirect(&cmd_buffer->trace);
if (cmd_buffer->state.rt.debug_marker_count == 0)
trace_intel_begin_compute_indirect(&cmd_buffer->trace);
genX(cmd_buffer_flush_compute_state)(cmd_buffer);
@@ -768,8 +776,10 @@ genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer,
emit_cs_walker(cmd_buffer, pipeline, prog_data, dispatch, indirect_addr, 0,
0, 0, is_unaligned_size_x);
trace_intel_end_compute_indirect(&cmd_buffer->trace,
anv_address_utrace(indirect_addr));
if (cmd_buffer->state.rt.debug_marker_count == 0) {
trace_intel_end_compute_indirect(&cmd_buffer->trace,
anv_address_utrace(indirect_addr));
}
}
void genX(CmdDispatchIndirect)(