radv: Convert instance bvh address to node in bvh build.
So we don't have to do it in the traversal loop. Should 2 and instructions and a 64-bit shift, so 4/8 cycles per instance node visit. Totals from 7 (0.01% of 134913) affected shaders: CodeSize: 208460 -> 208292 (-0.08%) Instrs: 38276 -> 38248 (-0.07%) Latency: 803181 -> 803142 (-0.00%) InvThroughput: 165384 -> 165376 (-0.00%) Copies: 4912 -> 4905 (-0.14%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19706>
This commit is contained in:

committed by
Marge Bot

parent
d09ed23b9a
commit
1b5dc33caa
@@ -266,6 +266,20 @@ pack_node_id(uint32_t offset, uint32_t type)
|
|||||||
return (offset >> 3) | type;
|
return (offset >> 3) | type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
node_to_addr(uint64_t node)
|
||||||
|
{
|
||||||
|
node &= ~7ul;
|
||||||
|
node <<= 19;
|
||||||
|
return int64_t(node) >> 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t
|
||||||
|
addr_to_node(uint64_t addr)
|
||||||
|
{
|
||||||
|
return (addr >> 3) & ((1ul << 45) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t
|
uint32_t
|
||||||
ir_id_to_offset(uint32_t id)
|
ir_id_to_offset(uint32_t id)
|
||||||
{
|
{
|
||||||
@@ -350,7 +364,7 @@ calculate_node_bounds(VOID_REF bvh, uint32_t id)
|
|||||||
}
|
}
|
||||||
case radv_bvh_node_instance: {
|
case radv_bvh_node_instance: {
|
||||||
radv_bvh_instance_node instance = DEREF(REF(radv_bvh_instance_node)(node));
|
radv_bvh_instance_node instance = DEREF(REF(radv_bvh_instance_node)(node));
|
||||||
aabb = calculate_instance_node_bounds(instance.bvh_ptr - instance.bvh_offset,
|
aabb = calculate_instance_node_bounds(node_to_addr(instance.bvh_ptr) - instance.bvh_offset,
|
||||||
instance.otw_matrix);
|
instance.otw_matrix);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@@ -183,7 +183,8 @@ struct radv_bvh_aabb_node {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct radv_bvh_instance_node {
|
struct radv_bvh_instance_node {
|
||||||
uint64_t bvh_ptr;
|
uint64_t bvh_ptr; /* pre-shifted/masked to serve as node base */
|
||||||
|
|
||||||
/* lower 24 bits are the custom instance index, upper 8 bits are the visibility mask */
|
/* lower 24 bits are the custom instance index, upper 8 bits are the visibility mask */
|
||||||
uint32_t custom_instance_and_mask;
|
uint32_t custom_instance_and_mask;
|
||||||
/* lower 24 bits are the sbt offset, upper 8 bits are VkGeometryInstanceFlagsKHR */
|
/* lower 24 bits are the sbt offset, upper 8 bits are VkGeometryInstanceFlagsKHR */
|
||||||
|
@@ -109,7 +109,7 @@ main()
|
|||||||
if (src.base_ptr != 0)
|
if (src.base_ptr != 0)
|
||||||
bvh_offset = DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset;
|
bvh_offset = DEREF(REF(radv_accel_struct_header)(src.base_ptr)).bvh_offset;
|
||||||
|
|
||||||
DEREF(dst).bvh_ptr = src.base_ptr + bvh_offset;
|
DEREF(dst).bvh_ptr = addr_to_node(src.base_ptr + bvh_offset);
|
||||||
DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask;
|
DEREF(dst).custom_instance_and_mask = src.custom_instance_and_mask;
|
||||||
DEREF(dst).sbt_offset_and_flags = convert_sbt_offset_and_flags(src.sbt_offset_and_flags);
|
DEREF(dst).sbt_offset_and_flags = convert_sbt_offset_and_flags(src.sbt_offset_and_flags);
|
||||||
DEREF(dst).instance_id = src.instance_id;
|
DEREF(dst).instance_id = src.instance_id;
|
||||||
|
@@ -96,10 +96,10 @@ main(void)
|
|||||||
uint32_t bvh_offset = DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_offset;
|
uint32_t bvh_offset = DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_offset;
|
||||||
if (args.mode == RADV_COPY_MODE_SERIALIZE) {
|
if (args.mode == RADV_COPY_MODE_SERIALIZE) {
|
||||||
DEREF(INDEX(uint64_t, instance_base, idx)) =
|
DEREF(INDEX(uint64_t, instance_base, idx)) =
|
||||||
DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_ptr - bvh_offset;
|
node_to_addr(DEREF(REF(radv_bvh_instance_node)(copy_src_addr + offset)).bvh_ptr) - bvh_offset;
|
||||||
} else { /* RADV_COPY_MODE_DESERIALIZE */
|
} else { /* RADV_COPY_MODE_DESERIALIZE */
|
||||||
uint64_t blas_addr = DEREF(INDEX(uint64_t, instance_base, idx));
|
uint64_t blas_addr = DEREF(INDEX(uint64_t, instance_base, idx));
|
||||||
DEREF(REF(radv_bvh_instance_node)(copy_dst_addr + offset)).bvh_ptr = blas_addr + bvh_offset;
|
DEREF(REF(radv_bvh_instance_node)(copy_dst_addr + offset)).bvh_ptr = addr_to_node(blas_addr + bvh_offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -67,6 +67,14 @@ struct rra_file_chunk_description {
|
|||||||
static_assert(sizeof(struct rra_file_chunk_description) == 64,
|
static_assert(sizeof(struct rra_file_chunk_description) == 64,
|
||||||
"rra_file_chunk_description does not match RRA spec");
|
"rra_file_chunk_description does not match RRA spec");
|
||||||
|
|
||||||
|
static uint64_t
|
||||||
|
node_to_addr(uint64_t node)
|
||||||
|
{
|
||||||
|
node &= ~7ull;
|
||||||
|
node <<= 19;
|
||||||
|
return ((int64_t)node) >> 16;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
rra_dump_header(FILE *output, uint64_t chunk_descriptions_offset, uint64_t chunk_descriptions_size)
|
rra_dump_header(FILE *output, uint64_t chunk_descriptions_offset, uint64_t chunk_descriptions_size)
|
||||||
{
|
{
|
||||||
@@ -499,7 +507,7 @@ rra_validate_node(struct hash_table_u64 *accel_struct_vas, uint8_t *data, void *
|
|||||||
offset);
|
offset);
|
||||||
|
|
||||||
struct radv_bvh_instance_node *src = (struct radv_bvh_instance_node *)(data + offset);
|
struct radv_bvh_instance_node *src = (struct radv_bvh_instance_node *)(data + offset);
|
||||||
uint64_t blas_va = src->bvh_ptr - src->bvh_offset;
|
uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset;
|
||||||
if (!_mesa_hash_table_u64_search(accel_struct_vas, blas_va))
|
if (!_mesa_hash_table_u64_search(accel_struct_vas, blas_va))
|
||||||
rra_validation_fail(&instance_ctx,
|
rra_validation_fail(&instance_ctx,
|
||||||
"Invalid instance node pointer 0x%llx (offset: 0x%x)",
|
"Invalid instance node pointer 0x%llx (offset: 0x%x)",
|
||||||
@@ -560,7 +568,7 @@ static void
|
|||||||
rra_transcode_instance_node(struct rra_transcoding_context *ctx,
|
rra_transcode_instance_node(struct rra_transcoding_context *ctx,
|
||||||
const struct radv_bvh_instance_node *src)
|
const struct radv_bvh_instance_node *src)
|
||||||
{
|
{
|
||||||
uint64_t blas_va = src->bvh_ptr - src->bvh_offset;
|
uint64_t blas_va = node_to_addr(src->bvh_ptr) - src->bvh_offset;
|
||||||
|
|
||||||
struct rra_instance_node *dst = (struct rra_instance_node *)(ctx->dst + ctx->dst_leaf_offset);
|
struct rra_instance_node *dst = (struct rra_instance_node *)(ctx->dst + ctx->dst_leaf_offset);
|
||||||
ctx->dst_leaf_offset += sizeof(struct rra_instance_node);
|
ctx->dst_leaf_offset += sizeof(struct rra_instance_node);
|
||||||
|
@@ -666,9 +666,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
|||||||
|
|
||||||
nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1);
|
nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1);
|
||||||
nir_store_deref(b, args->vars.bvh_base,
|
nir_store_deref(b, args->vars.bvh_base,
|
||||||
build_addr_to_node(
|
nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3)), 1);
|
||||||
b, nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3))),
|
|
||||||
1);
|
|
||||||
|
|
||||||
/* Push the instance root node onto the stack */
|
/* Push the instance root node onto the stack */
|
||||||
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE),
|
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE),
|
||||||
|
Reference in New Issue
Block a user