radv: Convert instance bvh address to node in bvh build.
So we don't have to do it in the traversal loop. Should 2 and instructions and a 64-bit shift, so 4/8 cycles per instance node visit. Totals from 7 (0.01% of 134913) affected shaders: CodeSize: 208460 -> 208292 (-0.08%) Instrs: 38276 -> 38248 (-0.07%) Latency: 803181 -> 803142 (-0.00%) InvThroughput: 165384 -> 165376 (-0.00%) Copies: 4912 -> 4905 (-0.14%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19706>
This commit is contained in:

committed by
Marge Bot

parent
d09ed23b9a
commit
1b5dc33caa
@@ -266,6 +266,20 @@ pack_node_id(uint32_t offset, uint32_t type)
|
||||
return (offset >> 3) | type;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
node_to_addr(uint64_t node)
|
||||
{
|
||||
node &= ~7ul;
|
||||
node <<= 19;
|
||||
return int64_t(node) >> 16;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
addr_to_node(uint64_t addr)
|
||||
{
|
||||
return (addr >> 3) & ((1ul << 45) - 1);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
ir_id_to_offset(uint32_t id)
|
||||
{
|
||||
@@ -350,7 +364,7 @@ calculate_node_bounds(VOID_REF bvh, uint32_t id)
|
||||
}
|
||||
case radv_bvh_node_instance: {
|
||||
radv_bvh_instance_node instance = DEREF(REF(radv_bvh_instance_node)(node));
|
||||
aabb = calculate_instance_node_bounds(instance.bvh_ptr - instance.bvh_offset,
|
||||
aabb = calculate_instance_node_bounds(node_to_addr(instance.bvh_ptr) - instance.bvh_offset,
|
||||
instance.otw_matrix);
|
||||
break;
|
||||
}
|
||||
|
@@ -183,7 +183,8 @@ struct radv_bvh_aabb_node {
|
||||
};
|
||||
|
||||
struct radv_bvh_instance_node {
|
||||
uint64_t bvh_ptr;
|
||||
uint64_t bvh_ptr; /* pre-shifted/masked to serve as node base */
|
||||
|
||||
/* lower 24 bits are the custom instance index, upper 8 bits are the visibility mask */
|
||||
uint32_t custom_instance_and_mask;
|
||||
/* lower 24 bits are the sbt offset, upper 8 bits are VkGeometryInstanceFlagsKHR */
|
||||
|
@@ -109,7 +109,7 @@ main()
|
||||
if (src.base_ptr != 0)
|
||||
bvh_offset = DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset;
|
||||
|
||||
DEREF(dst).bvh_ptr = src.base_ptr + bvh_offset;
|
||||
DEREF(dst).bvh_ptr = addr_to_node(src.base_ptr + bvh_offset);
|
||||
DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask;
|
||||
DEREF(dst).sbt_offset_and_flags = convert_sbt_offset_and_flags(src.sbt_offset_and_flags);
|
||||
DEREF(dst).instance_id = src.instance_id;
|
||||
|
@@ -96,10 +96,10 @@ main(void)
|
||||
uint32_t bvh_offset = DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_offset;
|
||||
if (args.mode == RADV_COPY_MODE_SERIALIZE) {
|
||||
DEREF(INDEX(uint64_t, instance_base, idx)) =
|
||||
DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_ptr - bvh_offset;
|
||||
node_to_addr(DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_ptr) - bvh_offset;
|
||||
} else { /* RADV_COPY_MODE_DESERIALIZE */
|
||||
uint64_t blas_addr = DEREF(INDEX(uint64_t, instance_base, idx));
|
||||
DEREF(REF(radv_bvh_instance_node)(copy_dst_addr + offset)).bvh_ptr = blas_addr + bvh_offset;
|
||||
DEREF(REF(radv_bvh_instance_node)(copy_dst_addr + offset)).bvh_ptr = addr_to_node(blas_addr + bvh_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -67,6 +67,14 @@ struct rra_file_chunk_description {
|
||||
static_assert(sizeof(struct rra_file_chunk_description) == 64,
|
||||
"rra_file_chunk_description does not match RRA spec");
|
||||
|
||||
static uint64_t
|
||||
node_to_addr(uint64_t node)
|
||||
{
|
||||
node &= ~7ull;
|
||||
node <<= 19;
|
||||
return ((int64_t)node) >> 16;
|
||||
}
|
||||
|
||||
static void
|
||||
rra_dump_header(FILE *output, uint64_t chunk_descriptions_offset, uint64_t chunk_descriptions_size)
|
||||
{
|
||||
@@ -499,7 +507,7 @@ rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data, void *
|
||||
offset);
|
||||
|
||||
struct radv_bvh_instance_node *src = (struct radv_bvh_instance_node *)(data + offset);
|
||||
uint64_t blas_va = src->bvh_ptr - src->bvh_offset;
|
||||
uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset;
|
||||
if (!_mesa_hash_table_u64_search(accel_struct_vas, blas_va))
|
||||
rra_validation_fail(&instance_ctx,
|
||||
"Invalid instance node pointer 0x%llx (offset: 0x%x)",
|
||||
@@ -560,7 +568,7 @@ static void
|
||||
rra_transcode_instance_node(struct rra_transcoding_context *ctx,
|
||||
const struct radv_bvh_instance_node *src)
|
||||
{
|
||||
uint64_t blas_va = src->bvh_ptr - src->bvh_offset;
|
||||
uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset;
|
||||
|
||||
struct rra_instance_node *dst = (struct rra_instance_node *)(ctx->dst + ctx->dst_leaf_offset);
|
||||
ctx->dst_leaf_offset += sizeof(struct rra_instance_node);
|
||||
|
@@ -666,9 +666,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
||||
|
||||
nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1);
|
||||
nir_store_deref(b, args->vars.bvh_base,
|
||||
build_addr_to_node(
|
||||
b, nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3))),
|
||||
1);
|
||||
nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3)), 1);
|
||||
|
||||
/* Push the instance root node onto the stack */
|
||||
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE),
|
||||
|
Reference in New Issue
Block a user