diff --git a/src/amd/vulkan/bvh/build_helpers.h b/src/amd/vulkan/bvh/build_helpers.h index 6c226e08e83..9865580f66a 100644 --- a/src/amd/vulkan/bvh/build_helpers.h +++ b/src/amd/vulkan/bvh/build_helpers.h @@ -359,7 +359,8 @@ calculate_node_bounds(VOID_REF bvh, uint32_t id) } case radv_bvh_node_instance: { radv_bvh_instance_node instance = DEREF(REF(radv_bvh_instance_node)(node)); - aabb = calculate_instance_node_bounds(instance.base_ptr, instance.otw_matrix); + aabb = calculate_instance_node_bounds(instance.bvh_ptr - instance.bvh_offset, + instance.otw_matrix); break; } case radv_bvh_node_aabb: { diff --git a/src/amd/vulkan/bvh/build_interface.h b/src/amd/vulkan/bvh/build_interface.h index a868a0a5449..f21c56a7e78 100644 --- a/src/amd/vulkan/bvh/build_interface.h +++ b/src/amd/vulkan/bvh/build_interface.h @@ -78,6 +78,7 @@ struct copy_args { struct convert_internal_args { VOID_REF intermediate_bvh; VOID_REF output_bvh; + uint32_t output_bvh_offset; uint32_t leaf_node_count; uint32_t internal_node_count; uint32_t geometry_type; diff --git a/src/amd/vulkan/bvh/bvh.h b/src/amd/vulkan/bvh/bvh.h index c1359dbff3f..4a753c91ab5 100644 --- a/src/amd/vulkan/bvh/bvh.h +++ b/src/amd/vulkan/bvh/bvh.h @@ -64,8 +64,8 @@ struct radv_accel_struct_geometry_info { }; struct radv_accel_struct_header { + uint32_t bvh_offset; uint32_t reserved; - uint32_t reserved2; float aabb[2][3]; /* Everything after this gets updated/copied from the CPU. */ @@ -132,7 +132,7 @@ struct radv_bvh_aabb_node { }; struct radv_bvh_instance_node { - uint64_t base_ptr; + uint64_t bvh_ptr; /* lower 24 bits are the custom instance index, upper 8 bits are the visibility mask */ uint32_t custom_instance_and_mask; /* lower 24 bits are the sbt offset, upper 8 bits are VkGeometryInstanceFlagsKHR */ @@ -141,7 +141,8 @@ struct radv_bvh_instance_node { mat3x4 wto_matrix; uint32_t instance_id; - uint32_t reserved[3]; + uint32_t bvh_offset; + uint32_t reserved[2]; /* Object to world matrix transposed from the initial transform. */ mat3x4 otw_matrix; diff --git a/src/amd/vulkan/bvh/converter_internal.comp b/src/amd/vulkan/bvh/converter_internal.comp index 56ba18b0dff..87655d117d9 100644 --- a/src/amd/vulkan/bvh/converter_internal.comp +++ b/src/amd/vulkan/bvh/converter_internal.comp @@ -168,7 +168,8 @@ main() DEREF(dst_node).children = children; if (global_id == args.internal_node_count - 1) { - REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.output_bvh); + REF(radv_accel_struct_header) header = REF(radv_accel_struct_header)(args.output_bvh - args.output_bvh_offset); DEREF(header).aabb = src.base.aabb; + DEREF(header).bvh_offset = args.output_bvh_offset; } } diff --git a/src/amd/vulkan/bvh/converter_leaf.comp b/src/amd/vulkan/bvh/converter_leaf.comp index 6f82efaffd4..62df4d3976d 100644 --- a/src/amd/vulkan/bvh/converter_leaf.comp +++ b/src/amd/vulkan/bvh/converter_leaf.comp @@ -78,10 +78,15 @@ main() DEREF(INDEX(radv_ir_instance_node, args.intermediate_bvh, global_id)); REF(radv_bvh_instance_node) dst = INDEX(radv_bvh_instance_node, dst_leaves, global_id); - DEREF(dst).base_ptr = src.base_ptr; + uint32_t bvh_offset = 0; + if (src.base_ptr != 0) + bvh_offset = DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset; + + DEREF(dst).bvh_ptr = src.base_ptr + bvh_offset; DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask; DEREF(dst).sbt_offset_and_flags = src.sbt_offset_and_flags; DEREF(dst).instance_id = src.instance_id; + DEREF(dst).bvh_offset = bvh_offset; mat4 transform = mat4(src.otw_matrix); diff --git a/src/amd/vulkan/bvh/copy.comp b/src/amd/vulkan/bvh/copy.comp index 6bd5a735911..2f2fae748e9 100644 --- a/src/amd/vulkan/bvh/copy.comp +++ b/src/amd/vulkan/bvh/copy.comp @@ -92,13 +92,13 @@ main(void) (offset - node_offset) % SIZEOF(radv_bvh_instance_node) == 0) { uint64_t idx = (offset - node_offset) / SIZEOF(radv_bvh_instance_node); + uint32_t bvh_offset = DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_offset; if (args.mode == RADV_COPY_MODE_SERIALIZE) { DEREF(INDEX(uint64_t, instance_base, idx)) = - DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).base_ptr; + DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_ptr - bvh_offset; } else { /* RADV_COPY_MODE_DESERIALIZE */ uint64_t blas_addr = DEREF(INDEX(uint64_t, instance_base, idx)); - - DEREF(REF(radv_bvh_instance_node)(copy_dst_addr + offset)).base_ptr = blas_addr; + DEREF(REF(radv_bvh_instance_node)(copy_dst_addr + offset)).bvh_ptr = blas_addr + bvh_offset; } } } diff --git a/src/amd/vulkan/bvh/leaf.comp b/src/amd/vulkan/bvh/leaf.comp index 45799eac312..5273210efa4 100644 --- a/src/amd/vulkan/bvh/leaf.comp +++ b/src/amd/vulkan/bvh/leaf.comp @@ -188,6 +188,8 @@ build_instance(inout AABB bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t g REF(radv_ir_instance_node) node = REF(radv_ir_instance_node)(dst_ptr); AccelerationStructureInstance instance = DEREF(REF(AccelerationStructureInstance)(src_ptr)); + DEREF(node).base_ptr = instance.accelerationStructureReference; + if (instance.accelerationStructureReference == 0) return false; @@ -195,7 +197,6 @@ build_instance(inout AABB bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint32_t g radv_accel_struct_header instance_header = DEREF(REF(radv_accel_struct_header)(instance.accelerationStructureReference)); - DEREF(node).base_ptr = instance.accelerationStructureReference; bounds = calculate_instance_node_bounds(DEREF(node).base_ptr, DEREF(node).otw_matrix); diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c index 8f85cd4e868..7dbf9f3ef15 100644 --- a/src/amd/vulkan/radv_acceleration_structure.c +++ b/src/amd/vulkan/radv_acceleration_structure.c @@ -717,6 +717,7 @@ convert_internal_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount, const struct convert_internal_args args = { .intermediate_bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset, .output_bvh = accel_struct->va, + .output_bvh_offset = 0, .leaf_node_count = bvh_states[i].leaf_node_count, .internal_node_count = bvh_states[i].internal_node_count, .geometry_type = geometry_type, diff --git a/src/amd/vulkan/radv_nir_lower_ray_queries.c b/src/amd/vulkan/radv_nir_lower_ray_queries.c index d1d64c52dd0..672766b77fe 100644 --- a/src/amd/vulkan/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/radv_nir_lower_ray_queries.c @@ -158,7 +158,7 @@ struct ray_query_intersection_vars { }; struct ray_query_vars { - rq_variable *accel_struct; + rq_variable *root_bvh_base; rq_variable *flags; rq_variable *cull_mask; rq_variable *origin; @@ -242,8 +242,8 @@ init_ray_query_vars(nir_shader *shader, nir_function_impl *impl, unsigned array_ { const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3); - dst->accel_struct = rq_variable_create(shader, impl, array_length, glsl_uint64_t_type(), - VAR_NAME("_accel_struct")); + dst->root_bvh_base = rq_variable_create(shader, impl, array_length, glsl_uint64_t_type(), + VAR_NAME("_root_bvh_base")); dst->flags = rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_flags")); dst->cull_mask = @@ -353,7 +353,6 @@ static void lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars) { - rq_store_var(b, index, vars->accel_struct, instr->src[1].ssa, 0x1); rq_store_var(b, index, vars->flags, instr->src[2].ssa, 0x1); rq_store_var(b, index, vars->cull_mask, nir_iand_imm(b, instr->src[3].ssa, 0xff), 0x1); @@ -372,17 +371,29 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins rq_store_var(b, index, vars->closest.intersection_type, nir_imm_int(b, intersection_type_none), 0x1); - nir_ssa_def *accel_struct = rq_load_var(b, index, vars->accel_struct); + nir_ssa_def *accel_struct = instr->src[1].ssa; nir_push_if(b, nir_ine_imm(b, accel_struct, 0)); { - rq_store_var(b, index, vars->trav.bvh_base, build_addr_to_node(b, accel_struct), 1); + nir_ssa_def *bvh_offset = nir_build_load_global( + b, 1, 32, + nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)), + .access = ACCESS_NON_WRITEABLE); + nir_ssa_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset)); + bvh_base = build_addr_to_node(b, bvh_base); + + rq_store_var(b, index, vars->root_bvh_base, bvh_base, 0x1); + rq_store_var(b, index, vars->trav.bvh_base, bvh_base, 1); rq_store_var(b, index, vars->trav.stack, nir_imm_int(b, 0), 0x1); rq_store_var(b, index, vars->trav.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 0x1); rq_store_var(b, index, vars->trav.top_stack, nir_imm_int(b, 0), 1); } + nir_push_else(b, NULL); + { + rq_store_var(b, index, vars->root_bvh_base, nir_imm_int64(b, 0), 0x1); + } nir_pop_if(b, NULL); rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, true), 0x1); @@ -623,7 +634,7 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars }; struct radv_ray_traversal_args args = { - .accel_struct = rq_load_var(b, index, vars->accel_struct), + .root_bvh_base = rq_load_var(b, index, vars->root_bvh_base), .flags = rq_load_var(b, index, vars->flags), .cull_mask = rq_load_var(b, index, vars->cull_mask), .origin = rq_load_var(b, index, vars->origin), diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index a53d5e693fe..c3be5f81ce6 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -1395,7 +1395,14 @@ build_traversal_shader(struct radv_device *device, nir_push_if(&b, nir_ine_imm(&b, accel_struct, 0)); { - nir_store_var(&b, trav_vars.bvh_base, build_addr_to_node(&b, accel_struct), 1); + nir_ssa_def *bvh_offset = nir_build_load_global( + &b, 1, 32, + nir_iadd_imm(&b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)), + .access = ACCESS_NON_WRITEABLE); + nir_ssa_def *root_bvh_base = nir_iadd(&b, accel_struct, nir_u2u64(&b, bvh_offset)); + root_bvh_base = build_addr_to_node(&b, root_bvh_base); + + nir_store_var(&b, trav_vars.bvh_base, root_bvh_base, 1); nir_ssa_def *vec3ones = nir_channels(&b, nir_imm_vec4(&b, 1.0, 1.0, 1.0, 1.0), 0x7); @@ -1435,7 +1442,7 @@ build_traversal_shader(struct radv_device *device, }; struct radv_ray_traversal_args args = { - .accel_struct = accel_struct, + .root_bvh_base = root_bvh_base, .flags = nir_load_var(&b, vars.flags), .cull_mask = nir_load_var(&b, vars.cull_mask), .origin = nir_load_var(&b, vars.origin), diff --git a/src/amd/vulkan/radv_rra.c b/src/amd/vulkan/radv_rra.c index 709f35bfce0..d113929e20f 100644 --- a/src/amd/vulkan/radv_rra.c +++ b/src/amd/vulkan/radv_rra.c @@ -465,9 +465,11 @@ rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data, leaf_nodes_size, internal_nodes_size, parent_table_size, is_bottom_level); } else if (type == radv_bvh_node_instance) { struct radv_bvh_instance_node *src = (struct radv_bvh_instance_node *)(data + offset); - if (!_mesa_hash_table_u64_search(accel_struct_vas, src->base_ptr)) { - rra_accel_struct_validation_fail(offset, "Invalid instance node pointer 0x%llx", - (unsigned long long)src->base_ptr); + uint64_t blas_va = src->bvh_ptr - src->bvh_offset; + if (!_mesa_hash_table_u64_search(accel_struct_vas, blas_va)) { + rra_accel_struct_validation_fail(offset, + "Invalid instance node pointer 0x%llx (offset: 0x%x)", + (unsigned long long)src->bvh_ptr, src->bvh_offset); result = false; } } @@ -521,6 +523,8 @@ static void rra_transcode_instance_node(struct rra_transcoding_context *ctx, const struct radv_bvh_instance_node *src) { + uint64_t blas_va = src->bvh_ptr - src->bvh_offset; + struct rra_instance_node *dst = (struct rra_instance_node *)(ctx->dst + ctx->dst_leaf_offset); ctx->dst_leaf_offset += sizeof(struct rra_instance_node); @@ -528,7 +532,7 @@ rra_transcode_instance_node(struct rra_transcoding_context *ctx, dst->mask = src->custom_instance_and_mask >> 24; dst->sbt_offset = src->sbt_offset_and_flags & 0xffffff; dst->instance_flags = src->sbt_offset_and_flags >> 24; - dst->blas_va = (src->base_ptr + sizeof(struct rra_accel_struct_metadata)) >> 3; + dst->blas_va = (blas_va + sizeof(struct rra_accel_struct_metadata)) >> 3; dst->instance_id = src->instance_id; dst->blas_metadata_size = sizeof(struct rra_accel_struct_metadata); @@ -649,7 +653,7 @@ rra_dump_acceleration_structure(struct rra_copied_accel_struct *copied_struct, uint32_t src_root_offset = (RADV_BVH_ROOT_NODE & ~7) << 3; if (should_validate) - if (!rra_validate_node(accel_struct_vas, data, + if (!rra_validate_node(accel_struct_vas, data + header->bvh_offset, (struct radv_bvh_box32_node *)(data + src_root_offset), src_root_offset, src_leaf_nodes_size, src_internal_nodes_size, node_parent_table_size, !is_tlas)) { @@ -677,7 +681,7 @@ rra_dump_acceleration_structure(struct rra_copied_accel_struct *copied_struct, } struct rra_transcoding_context ctx = { - .src = data, + .src = data + header->bvh_offset, .dst = dst_structure_data, .dst_leaf_offset = RRA_ROOT_NODE_OFFSET + dst_internal_nodes_size, .dst_internal_offset = RRA_ROOT_NODE_OFFSET, @@ -687,7 +691,7 @@ rra_dump_acceleration_structure(struct rra_copied_accel_struct *copied_struct, .leaf_index = 0, }; - rra_transcode_internal_node(&ctx, (const void *)(data + src_root_offset)); + rra_transcode_internal_node(&ctx, (const void *)(data + header->bvh_offset + src_root_offset)); struct rra_accel_struct_chunk_header chunk_header = { .metadata_offset = 0, diff --git a/src/amd/vulkan/radv_rt_common.c b/src/amd/vulkan/radv_rt_common.c index d159491ecc6..7442e49c91d 100644 --- a/src/amd/vulkan/radv_rt_common.c +++ b/src/amd/vulkan/radv_rt_common.c @@ -527,7 +527,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args) { nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete"); - nir_store_var(b, incomplete, nir_ine_imm(b, args->accel_struct, 0), 0x1); + nir_store_var(b, incomplete, nir_ine_imm(b, args->root_bvh_base, 0), 0x1); nir_push_if(b, nir_load_var(b, incomplete)); { @@ -551,8 +551,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, instance_exit->control = nir_selection_control_dont_flatten; { nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, 0), 1); - nir_store_deref(b, args->vars.bvh_base, build_addr_to_node(b, args->accel_struct), - 1); + nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1); nir_store_deref(b, args->vars.origin, args->origin, 7); nir_store_deref(b, args->vars.dir, args->dir, 7); nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, args->dir), 7); diff --git a/src/amd/vulkan/radv_rt_common.h b/src/amd/vulkan/radv_rt_common.h index 1e05a07a709..91aa06d9ae5 100644 --- a/src/amd/vulkan/radv_rt_common.h +++ b/src/amd/vulkan/radv_rt_common.h @@ -134,7 +134,7 @@ struct radv_ray_traversal_vars { }; struct radv_ray_traversal_args { - nir_ssa_def *accel_struct; + nir_ssa_def *root_bvh_base; nir_ssa_def *flags; nir_ssa_def *cull_mask; nir_ssa_def *origin;