radv: Convert instance bvh address to node in bvh build.

So we don't have to do it in the traversal loop. Should 2 and
instructions and a 64-bit shift, so 4/8 cycles per instance node
visit.

Totals from 7 (0.01% of 134913) affected shaders:

CodeSize: 208460 -> 208292 (-0.08%)
Instrs: 38276 -> 38248 (-0.07%)
Latency: 803181 -> 803142 (-0.00%)
InvThroughput: 165384 -> 165376 (-0.00%)
Copies: 4912 -> 4905 (-0.14%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19706>
This commit is contained in:
Bas Nieuwenhuizen
2022-11-13 03:13:55 +01:00
committed by Marge Bot
parent d09ed23b9a
commit 1b5dc33caa
6 changed files with 31 additions and 10 deletions

View File

@@ -266,6 +266,20 @@ pack_node_id(uint32_t offset, uint32_t type)
return (offset >> 3) | type; return (offset >> 3) | type;
} }
uint64_t
node_to_addr(uint64_t node)
{
node &= ~7ul;
node <<= 19;
return int64_t(node) >> 16;
}
uint64_t
addr_to_node(uint64_t addr)
{
return (addr >> 3) & ((1ul << 45) - 1);
}
uint32_t uint32_t
ir_id_to_offset(uint32_t id) ir_id_to_offset(uint32_t id)
{ {
@@ -350,7 +364,7 @@ calculate_node_bounds(VOID_REF bvh, uint32_t id)
} }
case radv_bvh_node_instance: { case radv_bvh_node_instance: {
radv_bvh_instance_node instance = DEREF(REF(radv_bvh_instance_node)(node)); radv_bvh_instance_node instance = DEREF(REF(radv_bvh_instance_node)(node));
aabb = calculate_instance_node_bounds(instance.bvh_ptr - instance.bvh_offset, aabb = calculate_instance_node_bounds(node_to_addr(instance.bvh_ptr) - instance.bvh_offset,
instance.otw_matrix); instance.otw_matrix);
break; break;
} }

View File

@@ -183,7 +183,8 @@ struct radv_bvh_aabb_node {
}; };
struct radv_bvh_instance_node { struct radv_bvh_instance_node {
uint64_t bvh_ptr; uint64_t bvh_ptr; /* pre-shifted/masked to serve as node base */
/* lower 24 bits are the custom instance index, upper 8 bits are the visibility mask */ /* lower 24 bits are the custom instance index, upper 8 bits are the visibility mask */
uint32_t custom_instance_and_mask; uint32_t custom_instance_and_mask;
/* lower 24 bits are the sbt offset, upper 8 bits are VkGeometryInstanceFlagsKHR */ /* lower 24 bits are the sbt offset, upper 8 bits are VkGeometryInstanceFlagsKHR */

View File

@@ -109,7 +109,7 @@ main()
if (src.base_ptr != 0) if (src.base_ptr != 0)
bvh_offset = DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset; bvh_offset = DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset;
DEREF(dst).bvh_ptr = src.base_ptr + bvh_offset; DEREF(dst).bvh_ptr = addr_to_node(src.base_ptr + bvh_offset);
DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask; DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask;
DEREF(dst).sbt_offset_and_flags = convert_sbt_offset_and_flags(src.sbt_offset_and_flags); DEREF(dst).sbt_offset_and_flags = convert_sbt_offset_and_flags(src.sbt_offset_and_flags);
DEREF(dst).instance_id = src.instance_id; DEREF(dst).instance_id = src.instance_id;

View File

@@ -96,10 +96,10 @@ main(void)
uint32_t bvh_offset = DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_offset; uint32_t bvh_offset = DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_offset;
if (args.mode == RADV_COPY_MODE_SERIALIZE) { if (args.mode == RADV_COPY_MODE_SERIALIZE) {
DEREF(INDEX(uint64_t, instance_base, idx)) = DEREF(INDEX(uint64_t, instance_base, idx)) =
DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_ptr - bvh_offset; node_to_addr(DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_ptr) - bvh_offset;
} else { /* RADV_COPY_MODE_DESERIALIZE */ } else { /* RADV_COPY_MODE_DESERIALIZE */
uint64_t blas_addr = DEREF(INDEX(uint64_t, instance_base, idx)); uint64_t blas_addr = DEREF(INDEX(uint64_t, instance_base, idx));
DEREF(REF(radv_bvh_instance_node)(copy_dst_addr + offset)).bvh_ptr = blas_addr + bvh_offset; DEREF(REF(radv_bvh_instance_node)(copy_dst_addr + offset)).bvh_ptr = addr_to_node(blas_addr + bvh_offset);
} }
} }
} }

View File

@@ -67,6 +67,14 @@ struct rra_file_chunk_description {
static_assert(sizeof(struct rra_file_chunk_description) == 64, static_assert(sizeof(struct rra_file_chunk_description) == 64,
"rra_file_chunk_description does not match RRA spec"); "rra_file_chunk_description does not match RRA spec");
static uint64_t
node_to_addr(uint64_t node)
{
node &= ~7ull;
node <<= 19;
return ((int64_t)node) >> 16;
}
static void static void
rra_dump_header(FILE *output, uint64_t chunk_descriptions_offset, uint64_t chunk_descriptions_size) rra_dump_header(FILE *output, uint64_t chunk_descriptions_offset, uint64_t chunk_descriptions_size)
{ {
@@ -499,7 +507,7 @@ rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data, void *
offset); offset);
struct radv_bvh_instance_node *src = (struct radv_bvh_instance_node *)(data + offset); struct radv_bvh_instance_node *src = (struct radv_bvh_instance_node *)(data + offset);
uint64_t blas_va = src->bvh_ptr - src->bvh_offset; uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset;
if (!_mesa_hash_table_u64_search(accel_struct_vas, blas_va)) if (!_mesa_hash_table_u64_search(accel_struct_vas, blas_va))
rra_validation_fail(&instance_ctx, rra_validation_fail(&instance_ctx,
"Invalid instance node pointer 0x%llx (offset: 0x%x)", "Invalid instance node pointer 0x%llx (offset: 0x%x)",
@@ -560,7 +568,7 @@ static void
rra_transcode_instance_node(struct rra_transcoding_context *ctx, rra_transcode_instance_node(struct rra_transcoding_context *ctx,
const struct radv_bvh_instance_node *src) const struct radv_bvh_instance_node *src)
{ {
uint64_t blas_va = src->bvh_ptr - src->bvh_offset; uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset;
struct rra_instance_node *dst = (struct rra_instance_node *)(ctx->dst + ctx->dst_leaf_offset); struct rra_instance_node *dst = (struct rra_instance_node *)(ctx->dst + ctx->dst_leaf_offset);
ctx->dst_leaf_offset += sizeof(struct rra_instance_node); ctx->dst_leaf_offset += sizeof(struct rra_instance_node);

View File

@@ -666,9 +666,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1); nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1);
nir_store_deref(b, args->vars.bvh_base, nir_store_deref(b, args->vars.bvh_base,
build_addr_to_node( nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3)), 1);
b, nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3))),
1);
/* Push the instance root node onto the stack */ /* Push the instance root node onto the stack */
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE),