radv: Add a field for the offset of the bvh in the blas.

So that we can put some metadata in front.

Reviewed-By: Konstantin Seurer <konstantin.seurer@gmail.com>
Reviewed-by: Friedrich Vock <friedrich.vock@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18799>
This commit is contained in:
Bas Nieuwenhuizen
2022-09-17 15:23:35 +02:00
committed by Marge Bot
parent b0a385a6bd
commit f1e1509c92
13 changed files with 62 additions and 30 deletions

View File

@@ -359,7 +359,8 @@ calculate_node_bounds(VOID_REF bvh, uint32_t id)
} }
case radv_bvh_node_instance: { case radv_bvh_node_instance: {
radv_bvh_instance_node instance = DEREF(REF(radv_bvh_instance_node)(node)); radv_bvh_instance_node instance = DEREF(REF(radv_bvh_instance_node)(node));
aabb = calculate_instance_node_bounds(instance.base_ptr, instance.otw_matrix); aabb = calculate_instance_node_bounds(instance.bvh_ptr - instance.bvh_offset,
instance.otw_matrix);
break; break;
} }
case radv_bvh_node_aabb: { case radv_bvh_node_aabb: {

View File

@@ -78,6 +78,7 @@ struct copy_args {
struct convert_internal_args { struct convert_internal_args {
VOID_REF intermediate_bvh; VOID_REF intermediate_bvh;
VOID_REF output_bvh; VOID_REF output_bvh;
uint32_t output_bvh_offset;
uint32_t leaf_node_count; uint32_t leaf_node_count;
uint32_t internal_node_count; uint32_t internal_node_count;
uint32_t geometry_type; uint32_t geometry_type;

View File

@@ -64,8 +64,8 @@ struct radv_accel_struct_geometry_info {
}; };
struct radv_accel_struct_header { struct radv_accel_struct_header {
uint32_t bvh_offset;
uint32_t reserved; uint32_t reserved;
uint32_t reserved2;
float aabb[2][3]; float aabb[2][3];
/* Everything after this gets updated/copied from the CPU. */ /* Everything after this gets updated/copied from the CPU. */
@@ -132,7 +132,7 @@ struct radv_bvh_aabb_node {
}; };
struct radv_bvh_instance_node { struct radv_bvh_instance_node {
uint64_t base_ptr; uint64_t bvh_ptr;
/* lower 24 bits are the custom instance index, upper 8 bits are the visibility mask */ /* lower 24 bits are the custom instance index, upper 8 bits are the visibility mask */
uint32_t custom_instance_and_mask; uint32_t custom_instance_and_mask;
/* lower 24 bits are the sbt offset, upper 8 bits are VkGeometryInstanceFlagsKHR */ /* lower 24 bits are the sbt offset, upper 8 bits are VkGeometryInstanceFlagsKHR */
@@ -141,7 +141,8 @@ struct radv_bvh_instance_node {
mat3x4 wto_matrix; mat3x4 wto_matrix;
uint32_t instance_id; uint32_t instance_id;
uint32_t reserved[3]; uint32_t bvh_offset;
uint32_t reserved[2];
/* Object to world matrix transposed from the initial transform. */ /* Object to world matrix transposed from the initial transform. */
mat3x4 otw_matrix; mat3x4 otw_matrix;

View File

@@ -168,7 +168,8 @@ main()
DEREF(dst_node).children = children; DEREF(dst_node).children = children;
if (global_id == args.internal_node_count - 1) { if (global_id == args.internal_node_count - 1) {
REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.output_bvh); REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.output_bvh - args.output_bvh_offset);
DEREF(header).aabb = src.base.aabb; DEREF(header).aabb = src.base.aabb;
DEREF(header).bvh_offset = args.output_bvh_offset;
} }
} }

View File

@@ -78,10 +78,15 @@ main()
DEREF(INDEX(radv_ir_instance_node, args.intermediate_bvh, global_id)); DEREF(INDEX(radv_ir_instance_node, args.intermediate_bvh, global_id));
REF(radv_bvh_instance_node) dst = REF(radv_bvh_instance_node) dst =
INDEX(radv_bvh_instance_node, dst_leaves, global_id); INDEX(radv_bvh_instance_node, dst_leaves, global_id);
DEREF(dst).base_ptr = src.base_ptr; uint32_t bvh_offset = 0;
if (src.base_ptr != 0)
bvh_offset = DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset;
DEREF(dst).bvh_ptr = src.base_ptr + bvh_offset;
DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask; DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask;
DEREF(dst).sbt_offset_and_flags = src.sbt_offset_and_flags; DEREF(dst).sbt_offset_and_flags = src.sbt_offset_and_flags;
DEREF(dst).instance_id = src.instance_id; DEREF(dst).instance_id = src.instance_id;
DEREF(dst).bvh_offset = bvh_offset;
mat4 transform = mat4(src.otw_matrix); mat4 transform = mat4(src.otw_matrix);

View File

@@ -92,13 +92,13 @@ main(void)
(offset - node_offset) % SIZEOF(radv_bvh_instance_node) == 0) { (offset - node_offset) % SIZEOF(radv_bvh_instance_node) == 0) {
uint64_t idx = (offset - node_offset) / SIZEOF(radv_bvh_instance_node); uint64_t idx = (offset - node_offset) / SIZEOF(radv_bvh_instance_node);
uint32_t bvh_offset = DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_offset;
if (args.mode == RADV_COPY_MODE_SERIALIZE) { if (args.mode == RADV_COPY_MODE_SERIALIZE) {
DEREF(INDEX(uint64_t, instance_base, idx)) = DEREF(INDEX(uint64_t, instance_base, idx)) =
DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).base_ptr; DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_ptr - bvh_offset;
} else { /* RADV_COPY_MODE_DESERIALIZE */ } else { /* RADV_COPY_MODE_DESERIALIZE */
uint64_t blas_addr = DEREF(INDEX(uint64_t, instance_base, idx)); uint64_t blas_addr = DEREF(INDEX(uint64_t, instance_base, idx));
DEREF(REF(radv_bvh_instance_node)(copy_dst_addr + offset)).bvh_ptr = blas_addr + bvh_offset;
DEREF(REF(radv_bvh_instance_node)(copy_dst_addr + offset)).base_ptr = blas_addr;
} }
} }
} }

View File

@@ -188,6 +188,8 @@ build_instance(inout AABB bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t g
REF(radv_ir_instance_node) node = REF(radv_ir_instance_node)(dst_ptr); REF(radv_ir_instance_node) node = REF(radv_ir_instance_node)(dst_ptr);
AccelerationStructureInstance instance = DEREF(REF(AccelerationStructureInstance)(src_ptr)); AccelerationStructureInstance instance = DEREF(REF(AccelerationStructureInstance)(src_ptr));
DEREF(node).base_ptr = instance.accelerationStructureReference;
if (instance.accelerationStructureReference == 0) if (instance.accelerationStructureReference == 0)
return false; return false;
@@ -195,7 +197,6 @@ build_instance(inout AABB bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t g
radv_accel_struct_header instance_header = radv_accel_struct_header instance_header =
DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference)); DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference));
DEREF(node).base_ptr = instance.accelerationStructureReference;
bounds = calculate_instance_node_bounds(DEREF(node).base_ptr, DEREF(node).otw_matrix); bounds = calculate_instance_node_bounds(DEREF(node).base_ptr, DEREF(node).otw_matrix);

View File

@@ -717,6 +717,7 @@ convert_internal_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount,
const struct convert_internal_args args = { const struct convert_internal_args args = {
.intermediate_bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset, .intermediate_bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
.output_bvh = accel_struct->va, .output_bvh = accel_struct->va,
.output_bvh_offset = 0,
.leaf_node_count = bvh_states[i].leaf_node_count, .leaf_node_count = bvh_states[i].leaf_node_count,
.internal_node_count = bvh_states[i].internal_node_count, .internal_node_count = bvh_states[i].internal_node_count,
.geometry_type = geometry_type, .geometry_type = geometry_type,

View File

@@ -158,7 +158,7 @@ struct ray_query_intersection_vars {
}; };
struct ray_query_vars { struct ray_query_vars {
rq_variable *accel_struct; rq_variable *root_bvh_base;
rq_variable *flags; rq_variable *flags;
rq_variable *cull_mask; rq_variable *cull_mask;
rq_variable *origin; rq_variable *origin;
@@ -242,8 +242,8 @@ init_ray_query_vars(nir_shader *shader, nir_function_impl *impl, unsigned array_
{ {
const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3); const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
dst->accel_struct = rq_variable_create(shader, impl, array_length, glsl_uint64_t_type(), dst->root_bvh_base = rq_variable_create(shader, impl, array_length, glsl_uint64_t_type(),
VAR_NAME("_accel_struct")); VAR_NAME("_root_bvh_base"));
dst->flags = dst->flags =
rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_flags")); rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_flags"));
dst->cull_mask = dst->cull_mask =
@@ -353,7 +353,6 @@ static void
lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr,
struct ray_query_vars *vars) struct ray_query_vars *vars)
{ {
rq_store_var(b, index, vars->accel_struct, instr->src[1].ssa, 0x1);
rq_store_var(b, index, vars->flags, instr->src[2].ssa, 0x1); rq_store_var(b, index, vars->flags, instr->src[2].ssa, 0x1);
rq_store_var(b, index, vars->cull_mask, nir_iand_imm(b, instr->src[3].ssa, 0xff), 0x1); rq_store_var(b, index, vars->cull_mask, nir_iand_imm(b, instr->src[3].ssa, 0xff), 0x1);
@@ -372,17 +371,29 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins
rq_store_var(b, index, vars->closest.intersection_type, nir_imm_int(b, intersection_type_none), rq_store_var(b, index, vars->closest.intersection_type, nir_imm_int(b, intersection_type_none),
0x1); 0x1);
nir_ssa_def *accel_struct = rq_load_var(b, index, vars->accel_struct); nir_ssa_def *accel_struct = instr->src[1].ssa;
nir_push_if(b, nir_ine_imm(b, accel_struct, 0)); nir_push_if(b, nir_ine_imm(b, accel_struct, 0));
{ {
rq_store_var(b, index, vars->trav.bvh_base, build_addr_to_node(b, accel_struct), 1); nir_ssa_def *bvh_offset = nir_build_load_global(
b, 1, 32,
nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
.access = ACCESS_NON_WRITEABLE);
nir_ssa_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset));
bvh_base = build_addr_to_node(b, bvh_base);
rq_store_var(b, index, vars->root_bvh_base, bvh_base, 0x1);
rq_store_var(b, index, vars->trav.bvh_base, bvh_base, 1);
rq_store_var(b, index, vars->trav.stack, nir_imm_int(b, 0), 0x1); rq_store_var(b, index, vars->trav.stack, nir_imm_int(b, 0), 0x1);
rq_store_var(b, index, vars->trav.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 0x1); rq_store_var(b, index, vars->trav.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 0x1);
rq_store_var(b, index, vars->trav.top_stack, nir_imm_int(b, 0), 1); rq_store_var(b, index, vars->trav.top_stack, nir_imm_int(b, 0), 1);
} }
nir_push_else(b, NULL);
{
rq_store_var(b, index, vars->root_bvh_base, nir_imm_int64(b, 0), 0x1);
}
nir_pop_if(b, NULL); nir_pop_if(b, NULL);
rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, true), 0x1); rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, true), 0x1);
@@ -623,7 +634,7 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars
}; };
struct radv_ray_traversal_args args = { struct radv_ray_traversal_args args = {
.accel_struct = rq_load_var(b, index, vars->accel_struct), .root_bvh_base = rq_load_var(b, index, vars->root_bvh_base),
.flags = rq_load_var(b, index, vars->flags), .flags = rq_load_var(b, index, vars->flags),
.cull_mask = rq_load_var(b, index, vars->cull_mask), .cull_mask = rq_load_var(b, index, vars->cull_mask),
.origin = rq_load_var(b, index, vars->origin), .origin = rq_load_var(b, index, vars->origin),

View File

@@ -1395,7 +1395,14 @@ build_traversal_shader(struct radv_device *device,
nir_push_if(&b, nir_ine_imm(&b, accel_struct, 0)); nir_push_if(&b, nir_ine_imm(&b, accel_struct, 0));
{ {
nir_store_var(&b, trav_vars.bvh_base, build_addr_to_node(&b, accel_struct), 1); nir_ssa_def *bvh_offset = nir_build_load_global(
&b, 1, 32,
nir_iadd_imm(&b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
.access = ACCESS_NON_WRITEABLE);
nir_ssa_def *root_bvh_base = nir_iadd(&b, accel_struct, nir_u2u64(&b, bvh_offset));
root_bvh_base = build_addr_to_node(&b, root_bvh_base);
nir_store_var(&b, trav_vars.bvh_base, root_bvh_base, 1);
nir_ssa_def *vec3ones = nir_channels(&b, nir_imm_vec4(&b, 1.0, 1.0, 1.0, 1.0), 0x7); nir_ssa_def *vec3ones = nir_channels(&b, nir_imm_vec4(&b, 1.0, 1.0, 1.0, 1.0), 0x7);
@@ -1435,7 +1442,7 @@ build_traversal_shader(struct radv_device *device,
}; };
struct radv_ray_traversal_args args = { struct radv_ray_traversal_args args = {
.accel_struct = accel_struct, .root_bvh_base = root_bvh_base,
.flags = nir_load_var(&b, vars.flags), .flags = nir_load_var(&b, vars.flags),
.cull_mask = nir_load_var(&b, vars.cull_mask), .cull_mask = nir_load_var(&b, vars.cull_mask),
.origin = nir_load_var(&b, vars.origin), .origin = nir_load_var(&b, vars.origin),

View File

@@ -465,9 +465,11 @@ rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data,
leaf_nodes_size, internal_nodes_size, parent_table_size, is_bottom_level); leaf_nodes_size, internal_nodes_size, parent_table_size, is_bottom_level);
} else if (type == radv_bvh_node_instance) { } else if (type == radv_bvh_node_instance) {
struct radv_bvh_instance_node *src = (struct radv_bvh_instance_node *)(data + offset); struct radv_bvh_instance_node *src = (struct radv_bvh_instance_node *)(data + offset);
if (!_mesa_hash_table_u64_search(accel_struct_vas, src->base_ptr)) { uint64_t blas_va = src->bvh_ptr - src->bvh_offset;
rra_accel_struct_validation_fail(offset, "Invalid instance node pointer 0x%llx", if (!_mesa_hash_table_u64_search(accel_struct_vas, blas_va)) {
(unsigned long long)src->base_ptr); rra_accel_struct_validation_fail(offset,
"Invalid instance node pointer 0x%llx (offset: 0x%x)",
(unsigned long long)src->bvh_ptr, src->bvh_offset);
result = false; result = false;
} }
} }
@@ -521,6 +523,8 @@ static void
rra_transcode_instance_node(struct rra_transcoding_context *ctx, rra_transcode_instance_node(struct rra_transcoding_context *ctx,
const struct radv_bvh_instance_node *src) const struct radv_bvh_instance_node *src)
{ {
uint64_t blas_va = src->bvh_ptr - src->bvh_offset;
struct rra_instance_node *dst = (struct rra_instance_node *)(ctx->dst + ctx->dst_leaf_offset); struct rra_instance_node *dst = (struct rra_instance_node *)(ctx->dst + ctx->dst_leaf_offset);
ctx->dst_leaf_offset += sizeof(struct rra_instance_node); ctx->dst_leaf_offset += sizeof(struct rra_instance_node);
@@ -528,7 +532,7 @@ rra_transcode_instance_node(struct rra_transcoding_context *ctx,
dst->mask = src->custom_instance_and_mask >> 24; dst->mask = src->custom_instance_and_mask >> 24;
dst->sbt_offset = src->sbt_offset_and_flags & 0xffffff; dst->sbt_offset = src->sbt_offset_and_flags & 0xffffff;
dst->instance_flags = src->sbt_offset_and_flags >> 24; dst->instance_flags = src->sbt_offset_and_flags >> 24;
dst->blas_va = (src->base_ptr + sizeof(struct rra_accel_struct_metadata)) >> 3; dst->blas_va = (blas_va + sizeof(struct rra_accel_struct_metadata)) >> 3;
dst->instance_id = src->instance_id; dst->instance_id = src->instance_id;
dst->blas_metadata_size = sizeof(struct rra_accel_struct_metadata); dst->blas_metadata_size = sizeof(struct rra_accel_struct_metadata);
@@ -649,7 +653,7 @@ rra_dump_acceleration_structure(struct rra_copied_accel_struct *copied_struct,
uint32_t src_root_offset = (RADV_BVH_ROOT_NODE & ~7) << 3; uint32_t src_root_offset = (RADV_BVH_ROOT_NODE & ~7) << 3;
if (should_validate) if (should_validate)
if (!rra_validate_node(accel_struct_vas, data, if (!rra_validate_node(accel_struct_vas, data + header->bvh_offset,
(struct radv_bvh_box32_node *)(data + src_root_offset), (struct radv_bvh_box32_node *)(data + src_root_offset),
src_root_offset, src_leaf_nodes_size, src_internal_nodes_size, src_root_offset, src_leaf_nodes_size, src_internal_nodes_size,
node_parent_table_size, !is_tlas)) { node_parent_table_size, !is_tlas)) {
@@ -677,7 +681,7 @@ rra_dump_acceleration_structure(struct rra_copied_accel_struct *copied_struct,
} }
struct rra_transcoding_context ctx = { struct rra_transcoding_context ctx = {
.src = data, .src = data + header->bvh_offset,
.dst = dst_structure_data, .dst = dst_structure_data,
.dst_leaf_offset = RRA_ROOT_NODE_OFFSET + dst_internal_nodes_size, .dst_leaf_offset = RRA_ROOT_NODE_OFFSET + dst_internal_nodes_size,
.dst_internal_offset = RRA_ROOT_NODE_OFFSET, .dst_internal_offset = RRA_ROOT_NODE_OFFSET,
@@ -687,7 +691,7 @@ rra_dump_acceleration_structure(struct rra_copied_accel_struct *copied_struct,
.leaf_index = 0, .leaf_index = 0,
}; };
rra_transcode_internal_node(&ctx, (const void *)(data + src_root_offset)); rra_transcode_internal_node(&ctx, (const void *)(data + header->bvh_offset + src_root_offset));
struct rra_accel_struct_chunk_header chunk_header = { struct rra_accel_struct_chunk_header chunk_header = {
.metadata_offset = 0, .metadata_offset = 0,

View File

@@ -527,7 +527,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
const struct radv_ray_traversal_args *args) const struct radv_ray_traversal_args *args)
{ {
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete"); nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
nir_store_var(b, incomplete, nir_ine_imm(b, args->accel_struct, 0), 0x1); nir_store_var(b, incomplete, nir_ine_imm(b, args->root_bvh_base, 0), 0x1);
nir_push_if(b, nir_load_var(b, incomplete)); nir_push_if(b, nir_load_var(b, incomplete));
{ {
@@ -551,8 +551,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
instance_exit->control = nir_selection_control_dont_flatten; instance_exit->control = nir_selection_control_dont_flatten;
{ {
nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, 0), 1); nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, 0), 1);
nir_store_deref(b, args->vars.bvh_base, build_addr_to_node(b, args->accel_struct), nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1);
1);
nir_store_deref(b, args->vars.origin, args->origin, 7); nir_store_deref(b, args->vars.origin, args->origin, 7);
nir_store_deref(b, args->vars.dir, args->dir, 7); nir_store_deref(b, args->vars.dir, args->dir, 7);
nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, args->dir), 7); nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, args->dir), 7);

View File

@@ -134,7 +134,7 @@ struct radv_ray_traversal_vars {
}; };
struct radv_ray_traversal_args { struct radv_ray_traversal_args {
nir_ssa_def *accel_struct; nir_ssa_def *root_bvh_base;
nir_ssa_def *flags; nir_ssa_def *flags;
nir_ssa_def *cull_mask; nir_ssa_def *cull_mask;
nir_ssa_def *origin; nir_ssa_def *origin;