vulkan/runtime,radv: Add shared BVH building framework

This is mostly adapted from radv's BVH building. This defines a common
"IR" for BVH trees, two algorithms for constructing it, and a callback
that the driver implements for encoding. The framework takes care of
parallelizing the different passes, so the driver just has to split the
encoding process into "stages" and implement just one part for each
stage.

The runtime changes are:
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com>
The radv changes are;

Reviewed-by: Friedrich Vock <friedrich.vock@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31433>
This commit is contained in:
Connor Abbott
2024-03-05 06:26:46 -05:00
committed by Konstantin
parent f8b584d6a5
commit 8fe3674df8

View File

@@ -73,34 +73,15 @@ main()
break;
}
default: {
default:
/* instances */
ir_leaf_node_size = SIZEOF(vk_ir_instance_node);
output_leaf_node_size = SIZEOF(radv_bvh_instance_node);
vk_ir_instance_node src_node =
DEREF(REF(vk_ir_instance_node)(OFFSET(args.intermediate_bvh, gl_GlobalInvocationID.x * ir_leaf_node_size)));
REF(radv_bvh_instance_node) dst_node =
REF(radv_bvh_instance_node)(OFFSET(args.output_bvh, dst_leaf_offset + gl_GlobalInvocationID.x * output_leaf_node_size));
radv_accel_struct_header blas_header =
DEREF(REF(radv_accel_struct_header)(src_node.base_ptr));
DEREF(dst_node).bvh_ptr = addr_to_node(src_node.base_ptr + blas_header.bvh_offset);
DEREF(dst_node).bvh_offset = blas_header.bvh_offset;
mat4 transform = mat4(src_node.otw_matrix);
mat4 inv_transform = transpose(inverse(transpose(transform)));
DEREF(dst_node).wto_matrix = mat3x4(inv_transform);
DEREF(dst_node).otw_matrix = mat3x4(transform);
DEREF(dst_node).custom_instance_and_mask = src_node.custom_instance_and_mask;
DEREF(dst_node).sbt_offset_and_flags = encode_sbt_offset_and_flags(src_node.sbt_offset_and_flags);
DEREF(dst_node).instance_id = src_node.instance_id;
/* Instance nodes have to be emitted inside the loop since encoding them
* loads an address from the IR node which is uninitialized for inactive nodes.
*/
break;
}
}
if (gl_GlobalInvocationID.x >= DEREF(args.header).ir_internal_node_count)
return;
@@ -209,6 +190,28 @@ main()
} else {
uint32_t child_index = offset / ir_leaf_node_size;
dst_offset = dst_leaf_offset + child_index * output_leaf_node_size;
if (type == vk_ir_node_instance) {
vk_ir_instance_node src_node =
DEREF(REF(vk_ir_instance_node)(OFFSET(args.intermediate_bvh, offset)));
REF(radv_bvh_instance_node) dst_node =
REF(radv_bvh_instance_node)(OFFSET(args.output_bvh, dst_offset));
radv_accel_struct_header blas_header =
DEREF(REF(radv_accel_struct_header)(src_node.base_ptr));
DEREF(dst_node).bvh_ptr = addr_to_node(src_node.base_ptr + blas_header.bvh_offset);
DEREF(dst_node).bvh_offset = blas_header.bvh_offset;
mat4 transform = mat4(src_node.otw_matrix);
mat4 inv_transform = transpose(inverse(transpose(transform)));
DEREF(dst_node).wto_matrix = mat3x4(inv_transform);
DEREF(dst_node).otw_matrix = mat3x4(transform);
DEREF(dst_node).custom_instance_and_mask = src_node.custom_instance_and_mask;
DEREF(dst_node).sbt_offset_and_flags = encode_sbt_offset_and_flags(src_node.sbt_offset_and_flags);
DEREF(dst_node).instance_id = src_node.instance_id;
}
}
vk_aabb child_aabb =