radv: Add a field for the offset of the bvh in the blas.

So that we can put some metadata in front.

Reviewed-By: Konstantin Seurer <konstantin.seurer@gmail.com>
Reviewed-by: Friedrich Vock <friedrich.vock@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18799>
This commit is contained in:
Bas Nieuwenhuizen
2022-09-17 15:23:35 +02:00
committed by Marge Bot
parent b0a385a6bd
commit f1e1509c92
13 changed files with 62 additions and 30 deletions

View File

@@ -359,7 +359,8 @@ calculate_node_bounds(VOID_REF bvh, uint32_t id)
}
case radv_bvh_node_instance: {
radv_bvh_instance_node instance = DEREF(REF(radv_bvh_instance_node)(node));
aabb = calculate_instance_node_bounds(instance.base_ptr, instance.otw_matrix);
aabb = calculate_instance_node_bounds(instance.bvh_ptr - instance.bvh_offset,
instance.otw_matrix);
break;
}
case radv_bvh_node_aabb: {

View File

@@ -78,6 +78,7 @@ struct copy_args {
struct convert_internal_args {
VOID_REF intermediate_bvh;
VOID_REF output_bvh;
uint32_t output_bvh_offset;
uint32_t leaf_node_count;
uint32_t internal_node_count;
uint32_t geometry_type;

View File

@@ -64,8 +64,8 @@ struct radv_accel_struct_geometry_info {
};
struct radv_accel_struct_header {
uint32_t bvh_offset;
uint32_t reserved;
uint32_t reserved2;
float aabb[2][3];
/* Everything after this gets updated/copied from the CPU. */
@@ -132,7 +132,7 @@ struct radv_bvh_aabb_node {
};
struct radv_bvh_instance_node {
uint64_t base_ptr;
uint64_t bvh_ptr;
/* lower 24 bits are the custom instance index, upper 8 bits are the visibility mask */
uint32_t custom_instance_and_mask;
/* lower 24 bits are the sbt offset, upper 8 bits are VkGeometryInstanceFlagsKHR */
@@ -141,7 +141,8 @@ struct radv_bvh_instance_node {
mat3x4 wto_matrix;
uint32_t instance_id;
uint32_t reserved[3];
uint32_t bvh_offset;
uint32_t reserved[2];
/* Object to world matrix transposed from the initial transform. */
mat3x4 otw_matrix;

View File

@@ -168,7 +168,8 @@ main()
DEREF(dst_node).children = children;
if (global_id == args.internal_node_count - 1) {
REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.output_bvh);
REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.output_bvh - args.output_bvh_offset);
DEREF(header).aabb = src.base.aabb;
DEREF(header).bvh_offset = args.output_bvh_offset;
}
}

View File

@@ -78,10 +78,15 @@ main()
DEREF(INDEX(radv_ir_instance_node, args.intermediate_bvh, global_id));
REF(radv_bvh_instance_node) dst =
INDEX(radv_bvh_instance_node, dst_leaves, global_id);
DEREF(dst).base_ptr = src.base_ptr;
uint32_t bvh_offset = 0;
if (src.base_ptr != 0)
bvh_offset = DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset;
DEREF(dst).bvh_ptr = src.base_ptr + bvh_offset;
DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask;
DEREF(dst).sbt_offset_and_flags = src.sbt_offset_and_flags;
DEREF(dst).instance_id = src.instance_id;
DEREF(dst).bvh_offset = bvh_offset;
mat4 transform = mat4(src.otw_matrix);

View File

@@ -92,13 +92,13 @@ main(void)
(offset - node_offset) % SIZEOF(radv_bvh_instance_node) == 0) {
uint64_t idx = (offset - node_offset) / SIZEOF(radv_bvh_instance_node);
uint32_t bvh_offset = DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_offset;
if (args.mode == RADV_COPY_MODE_SERIALIZE) {
DEREF(INDEX(uint64_t, instance_base, idx)) =
DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).base_ptr;
DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_ptr - bvh_offset;
} else { /* RADV_COPY_MODE_DESERIALIZE */
uint64_t blas_addr = DEREF(INDEX(uint64_t, instance_base, idx));
DEREF(REF(radv_bvh_instance_node)(copy_dst_addr + offset)).base_ptr = blas_addr;
DEREF(REF(radv_bvh_instance_node)(copy_dst_addr + offset)).bvh_ptr = blas_addr + bvh_offset;
}
}
}

View File

@@ -188,6 +188,8 @@ build_instance(inout AABB bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t g
REF(radv_ir_instance_node) node = REF(radv_ir_instance_node)(dst_ptr);
AccelerationStructureInstance instance = DEREF(REF(AccelerationStructureInstance)(src_ptr));
DEREF(node).base_ptr = instance.accelerationStructureReference;
if (instance.accelerationStructureReference == 0)
return false;
@@ -195,7 +197,6 @@ build_instance(inout AABB bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t g
radv_accel_struct_header instance_header =
DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference));
DEREF(node).base_ptr = instance.accelerationStructureReference;
bounds = calculate_instance_node_bounds(DEREF(node).base_ptr, DEREF(node).otw_matrix);

View File

@@ -717,6 +717,7 @@ convert_internal_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount,
const struct convert_internal_args args = {
.intermediate_bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
.output_bvh = accel_struct->va,
.output_bvh_offset = 0,
.leaf_node_count = bvh_states[i].leaf_node_count,
.internal_node_count = bvh_states[i].internal_node_count,
.geometry_type = geometry_type,

View File

@@ -158,7 +158,7 @@ struct ray_query_intersection_vars {
};
struct ray_query_vars {
rq_variable *accel_struct;
rq_variable *root_bvh_base;
rq_variable *flags;
rq_variable *cull_mask;
rq_variable *origin;
@@ -242,8 +242,8 @@ init_ray_query_vars(nir_shader *shader, nir_function_impl *impl, unsigned array_
{
const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
dst->accel_struct = rq_variable_create(shader, impl, array_length, glsl_uint64_t_type(),
VAR_NAME("_accel_struct"));
dst->root_bvh_base = rq_variable_create(shader, impl, array_length, glsl_uint64_t_type(),
VAR_NAME("_root_bvh_base"));
dst->flags =
rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_flags"));
dst->cull_mask =
@@ -353,7 +353,6 @@ static void
lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr,
struct ray_query_vars *vars)
{
rq_store_var(b, index, vars->accel_struct, instr->src[1].ssa, 0x1);
rq_store_var(b, index, vars->flags, instr->src[2].ssa, 0x1);
rq_store_var(b, index, vars->cull_mask, nir_iand_imm(b, instr->src[3].ssa, 0xff), 0x1);
@@ -372,17 +371,29 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins
rq_store_var(b, index, vars->closest.intersection_type, nir_imm_int(b, intersection_type_none),
0x1);
nir_ssa_def *accel_struct = rq_load_var(b, index, vars->accel_struct);
nir_ssa_def *accel_struct = instr->src[1].ssa;
nir_push_if(b, nir_ine_imm(b, accel_struct, 0));
{
rq_store_var(b, index, vars->trav.bvh_base, build_addr_to_node(b, accel_struct), 1);
nir_ssa_def *bvh_offset = nir_build_load_global(
b, 1, 32,
nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
.access = ACCESS_NON_WRITEABLE);
nir_ssa_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset));
bvh_base = build_addr_to_node(b, bvh_base);
rq_store_var(b, index, vars->root_bvh_base, bvh_base, 0x1);
rq_store_var(b, index, vars->trav.bvh_base, bvh_base, 1);
rq_store_var(b, index, vars->trav.stack, nir_imm_int(b, 0), 0x1);
rq_store_var(b, index, vars->trav.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 0x1);
rq_store_var(b, index, vars->trav.top_stack, nir_imm_int(b, 0), 1);
}
nir_push_else(b, NULL);
{
rq_store_var(b, index, vars->root_bvh_base, nir_imm_int64(b, 0), 0x1);
}
nir_pop_if(b, NULL);
rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, true), 0x1);
@@ -623,7 +634,7 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars
};
struct radv_ray_traversal_args args = {
.accel_struct = rq_load_var(b, index, vars->accel_struct),
.root_bvh_base = rq_load_var(b, index, vars->root_bvh_base),
.flags = rq_load_var(b, index, vars->flags),
.cull_mask = rq_load_var(b, index, vars->cull_mask),
.origin = rq_load_var(b, index, vars->origin),

View File

@@ -1395,7 +1395,14 @@ build_traversal_shader(struct radv_device *device,
nir_push_if(&b, nir_ine_imm(&b, accel_struct, 0));
{
nir_store_var(&b, trav_vars.bvh_base, build_addr_to_node(&b, accel_struct), 1);
nir_ssa_def *bvh_offset = nir_build_load_global(
&b, 1, 32,
nir_iadd_imm(&b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
.access = ACCESS_NON_WRITEABLE);
nir_ssa_def *root_bvh_base = nir_iadd(&b, accel_struct, nir_u2u64(&b, bvh_offset));
root_bvh_base = build_addr_to_node(&b, root_bvh_base);
nir_store_var(&b, trav_vars.bvh_base, root_bvh_base, 1);
nir_ssa_def *vec3ones = nir_channels(&b, nir_imm_vec4(&b, 1.0, 1.0, 1.0, 1.0), 0x7);
@@ -1435,7 +1442,7 @@ build_traversal_shader(struct radv_device *device,
};
struct radv_ray_traversal_args args = {
.accel_struct = accel_struct,
.root_bvh_base = root_bvh_base,
.flags = nir_load_var(&b, vars.flags),
.cull_mask = nir_load_var(&b, vars.cull_mask),
.origin = nir_load_var(&b, vars.origin),

View File

@@ -465,9 +465,11 @@ rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data,
leaf_nodes_size, internal_nodes_size, parent_table_size, is_bottom_level);
} else if (type == radv_bvh_node_instance) {
struct radv_bvh_instance_node *src = (struct radv_bvh_instance_node *)(data + offset);
if (!_mesa_hash_table_u64_search(accel_struct_vas, src->base_ptr)) {
rra_accel_struct_validation_fail(offset, "Invalid instance node pointer 0x%llx",
(unsigned long long)src->base_ptr);
uint64_t blas_va = src->bvh_ptr - src->bvh_offset;
if (!_mesa_hash_table_u64_search(accel_struct_vas, blas_va)) {
rra_accel_struct_validation_fail(offset,
"Invalid instance node pointer 0x%llx (offset: 0x%x)",
(unsigned long long)src->bvh_ptr, src->bvh_offset);
result = false;
}
}
@@ -521,6 +523,8 @@ static void
rra_transcode_instance_node(struct rra_transcoding_context *ctx,
const struct radv_bvh_instance_node *src)
{
uint64_t blas_va = src->bvh_ptr - src->bvh_offset;
struct rra_instance_node *dst = (struct rra_instance_node *)(ctx->dst + ctx->dst_leaf_offset);
ctx->dst_leaf_offset += sizeof(struct rra_instance_node);
@@ -528,7 +532,7 @@ rra_transcode_instance_node(struct rra_transcoding_context *ctx,
dst->mask = src->custom_instance_and_mask >> 24;
dst->sbt_offset = src->sbt_offset_and_flags & 0xffffff;
dst->instance_flags = src->sbt_offset_and_flags >> 24;
dst->blas_va = (src->base_ptr + sizeof(struct rra_accel_struct_metadata)) >> 3;
dst->blas_va = (blas_va + sizeof(struct rra_accel_struct_metadata)) >> 3;
dst->instance_id = src->instance_id;
dst->blas_metadata_size = sizeof(struct rra_accel_struct_metadata);
@@ -649,7 +653,7 @@ rra_dump_acceleration_structure(struct rra_copied_accel_struct *copied_struct,
uint32_t src_root_offset = (RADV_BVH_ROOT_NODE & ~7) << 3;
if (should_validate)
if (!rra_validate_node(accel_struct_vas, data,
if (!rra_validate_node(accel_struct_vas, data + header->bvh_offset,
(struct radv_bvh_box32_node *)(data + src_root_offset),
src_root_offset, src_leaf_nodes_size, src_internal_nodes_size,
node_parent_table_size, !is_tlas)) {
@@ -677,7 +681,7 @@ rra_dump_acceleration_structure(struct rra_copied_accel_struct *copied_struct,
}
struct rra_transcoding_context ctx = {
.src = data,
.src = data + header->bvh_offset,
.dst = dst_structure_data,
.dst_leaf_offset = RRA_ROOT_NODE_OFFSET + dst_internal_nodes_size,
.dst_internal_offset = RRA_ROOT_NODE_OFFSET,
@@ -687,7 +691,7 @@ rra_dump_acceleration_structure(struct rra_copied_accel_struct *copied_struct,
.leaf_index = 0,
};
rra_transcode_internal_node(&ctx, (const void *)(data + src_root_offset));
rra_transcode_internal_node(&ctx, (const void *)(data + header->bvh_offset + src_root_offset));
struct rra_accel_struct_chunk_header chunk_header = {
.metadata_offset = 0,

View File

@@ -527,7 +527,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
const struct radv_ray_traversal_args *args)
{
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
nir_store_var(b, incomplete, nir_ine_imm(b, args->accel_struct, 0), 0x1);
nir_store_var(b, incomplete, nir_ine_imm(b, args->root_bvh_base, 0), 0x1);
nir_push_if(b, nir_load_var(b, incomplete));
{
@@ -551,8 +551,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
instance_exit->control = nir_selection_control_dont_flatten;
{
nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, 0), 1);
nir_store_deref(b, args->vars.bvh_base, build_addr_to_node(b, args->accel_struct),
1);
nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1);
nir_store_deref(b, args->vars.origin, args->origin, 7);
nir_store_deref(b, args->vars.dir, args->dir, 7);
nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, args->dir), 7);

View File

@@ -134,7 +134,7 @@ struct radv_ray_traversal_vars {
};
struct radv_ray_traversal_args {
nir_ssa_def *accel_struct;
nir_ssa_def *root_bvh_base;
nir_ssa_def *flags;
nir_ssa_def *cull_mask;
nir_ssa_def *origin;