radv/rt: Get rid of accel struct null checks

Quake II RTX ray queries:

Totals from 7 (14.29% of 49) affected shaders:
CodeSize: 167220 -> 165560 (-0.99%)
Instrs: 31674 -> 31454 (-0.69%)
Latency: 385145 -> 596737 (+54.94%)
InvThroughput: 78837 -> 122005 (+54.76%)
Copies: 4740 -> 4667 (-1.54%); split: -1.60%, +0.06%
Branches: 1565 -> 1493 (-4.60%)
PreSGPRs: 488 -> 501 (+2.66%); split: -0.41%, +3.07%
PreVGPRs: 617 -> 620 (+0.49%)

Performance stays the same.

Reviewed-by: Friedrich Vock <friedrich.vock@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20539>
This commit is contained in:
Konstantin Seurer
2023-01-05 17:48:45 +01:00
committed by Marge Bot
parent 33166ba50b
commit 03105138f1
3 changed files with 276 additions and 296 deletions

View File

@@ -391,34 +391,26 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins
nir_ssa_def *accel_struct = instr->src[1].ssa;
nir_push_if(b, nir_ine_imm(b, accel_struct, 0));
{
nir_ssa_def *bvh_offset = nir_build_load_global(
b, 1, 32,
nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
.access = ACCESS_NON_WRITEABLE);
nir_ssa_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset));
bvh_base = build_addr_to_node(b, bvh_base);
nir_ssa_def *bvh_offset = nir_build_load_global(
b, 1, 32,
nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
.access = ACCESS_NON_WRITEABLE);
nir_ssa_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset));
bvh_base = build_addr_to_node(b, bvh_base);
rq_store_var(b, index, vars->root_bvh_base, bvh_base, 0x1);
rq_store_var(b, index, vars->trav.bvh_base, bvh_base, 1);
rq_store_var(b, index, vars->root_bvh_base, bvh_base, 0x1);
rq_store_var(b, index, vars->trav.bvh_base, bvh_base, 1);
if (vars->stack) {
rq_store_var(b, index, vars->trav.stack, nir_imm_int(b, 0), 0x1);
rq_store_var(b, index, vars->trav.stack_low_watermark, nir_imm_int(b, 0), 0x1);
} else {
nir_ssa_def *base_offset =
nir_imul_imm(b, nir_load_local_invocation_index(b), sizeof(uint32_t));
base_offset = nir_iadd_imm(b, base_offset, vars->shared_base);
rq_store_var(b, index, vars->trav.stack, base_offset, 0x1);
rq_store_var(b, index, vars->trav.stack_low_watermark, base_offset, 0x1);
}
if (vars->stack) {
rq_store_var(b, index, vars->trav.stack, nir_imm_int(b, 0), 0x1);
rq_store_var(b, index, vars->trav.stack_low_watermark, nir_imm_int(b, 0), 0x1);
} else {
nir_ssa_def *base_offset =
nir_imul_imm(b, nir_load_local_invocation_index(b), sizeof(uint32_t));
base_offset = nir_iadd_imm(b, base_offset, vars->shared_base);
rq_store_var(b, index, vars->trav.stack, base_offset, 0x1);
rq_store_var(b, index, vars->trav.stack_low_watermark, base_offset, 0x1);
}
nir_push_else(b, NULL);
{
rq_store_var(b, index, vars->root_bvh_base, nir_imm_int64(b, 0), 0x1);
}
nir_pop_if(b, NULL);
rq_store_var(b, index, vars->trav.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 0x1);
rq_store_var(b, index, vars->trav.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);

View File

@@ -519,230 +519,169 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
const struct radv_ray_traversal_args *args)
{
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
nir_store_var(b, incomplete, nir_ine_imm(b, args->root_bvh_base, 0), 0x1);
nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
nir_push_if(b, nir_load_var(b, incomplete));
nir_ssa_def *desc = create_bvh_descriptor(b);
nir_ssa_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
struct radv_ray_flags ray_flags = {
.force_opaque = nir_test_mask(b, args->flags, SpvRayFlagsOpaqueKHRMask),
.force_not_opaque = nir_test_mask(b, args->flags, SpvRayFlagsNoOpaqueKHRMask),
.terminate_on_first_hit =
nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask),
.no_cull_front = nir_ieq_imm(
b, nir_iand_imm(b, args->flags, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 0),
.no_cull_back =
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullBackFacingTrianglesKHRMask), 0),
.no_cull_opaque =
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullOpaqueKHRMask), 0),
.no_cull_no_opaque =
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullNoOpaqueKHRMask), 0),
.no_skip_triangles =
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask), 0),
.no_skip_aabbs = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0),
};
nir_push_loop(b);
{
nir_ssa_def *desc = create_bvh_descriptor(b);
nir_ssa_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
struct radv_ray_flags ray_flags = {
.force_opaque = nir_test_mask(b, args->flags, SpvRayFlagsOpaqueKHRMask),
.force_not_opaque = nir_test_mask(b, args->flags, SpvRayFlagsNoOpaqueKHRMask),
.terminate_on_first_hit =
nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask),
.no_cull_front = nir_ieq_imm(
b, nir_iand_imm(b, args->flags, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 0),
.no_cull_back = nir_ieq_imm(
b, nir_iand_imm(b, args->flags, SpvRayFlagsCullBackFacingTrianglesKHRMask), 0),
.no_cull_opaque =
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullOpaqueKHRMask), 0),
.no_cull_no_opaque =
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullNoOpaqueKHRMask), 0),
.no_skip_triangles =
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask), 0),
.no_skip_aabbs =
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0),
};
nir_push_loop(b);
nir_push_if(
b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), RADV_BVH_INVALID_NODE));
{
nir_push_if(
b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), RADV_BVH_INVALID_NODE));
/* Early exit if we never overflowed the stack, to avoid having to backtrack to
* the root for no reason. */
nir_push_if(b, nir_ilt(b, nir_load_deref(b, args->vars.stack),
nir_imm_int(b, args->stack_base + args->stack_stride)));
{
/* Early exit if we never overflowed the stack, to avoid having to backtrack to
* the root for no reason. */
nir_push_if(b, nir_ilt(b, nir_load_deref(b, args->vars.stack),
nir_imm_int(b, args->stack_base + args->stack_stride)));
nir_store_var(b, incomplete, nir_imm_bool(b, false), 0x1);
nir_jump(b, nir_jump_break);
}
nir_pop_if(b, NULL);
nir_ssa_def *stack_instance_exit = nir_ige(b, nir_load_deref(b, args->vars.top_stack),
nir_load_deref(b, args->vars.stack));
nir_ssa_def *root_instance_exit =
nir_ieq(b, nir_load_deref(b, args->vars.previous_node),
nir_load_deref(b, args->vars.instance_bottom_node));
nir_if *instance_exit =
nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit));
instance_exit->control = nir_selection_control_dont_flatten;
{
nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, -1), 1);
nir_store_deref(b, args->vars.previous_node,
nir_load_deref(b, args->vars.instance_top_node), 1);
nir_store_deref(b, args->vars.instance_bottom_node,
nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 1);
nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1);
nir_store_deref(b, args->vars.origin, args->origin, 7);
nir_store_deref(b, args->vars.dir, args->dir, 7);
nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, args->dir), 7);
}
nir_pop_if(b, NULL);
nir_push_if(b, nir_ige(b, nir_load_deref(b, args->vars.stack_low_watermark),
nir_load_deref(b, args->vars.stack)));
{
nir_ssa_def *prev = nir_load_deref(b, args->vars.previous_node);
nir_ssa_def *bvh_addr =
build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
nir_ssa_def *parent = fetch_parent_node(b, bvh_addr, prev);
nir_push_if(b, nir_ieq(b, parent, nir_imm_int(b, RADV_BVH_INVALID_NODE)));
{
nir_store_var(b, incomplete, nir_imm_bool(b, false), 0x1);
nir_jump(b, nir_jump_break);
}
nir_pop_if(b, NULL);
nir_ssa_def *stack_instance_exit = nir_ige(b, nir_load_deref(b, args->vars.top_stack),
nir_load_deref(b, args->vars.stack));
nir_ssa_def *root_instance_exit =
nir_ieq(b, nir_load_deref(b, args->vars.previous_node),
nir_load_deref(b, args->vars.instance_bottom_node));
nir_if *instance_exit =
nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit));
instance_exit->control = nir_selection_control_dont_flatten;
{
nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, -1), 1);
nir_store_deref(b, args->vars.previous_node,
nir_load_deref(b, args->vars.instance_top_node), 1);
nir_store_deref(b, args->vars.instance_bottom_node,
nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 1);
nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1);
nir_store_deref(b, args->vars.origin, args->origin, 7);
nir_store_deref(b, args->vars.dir, args->dir, 7);
nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, args->dir), 7);
}
nir_pop_if(b, NULL);
nir_push_if(b, nir_ige(b, nir_load_deref(b, args->vars.stack_low_watermark),
nir_load_deref(b, args->vars.stack)));
{
nir_ssa_def *prev = nir_load_deref(b, args->vars.previous_node);
nir_ssa_def *bvh_addr =
build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
nir_ssa_def *parent = fetch_parent_node(b, bvh_addr, prev);
nir_push_if(b, nir_ieq(b, parent, nir_imm_int(b, RADV_BVH_INVALID_NODE)));
{
nir_store_var(b, incomplete, nir_imm_bool(b, false), 0x1);
nir_jump(b, nir_jump_break);
}
nir_pop_if(b, NULL);
nir_store_deref(b, args->vars.current_node, parent, 0x1);
}
nir_push_else(b, NULL);
{
nir_store_deref(
b, args->vars.stack,
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1);
nir_ssa_def *stack_ptr =
nir_umod(b, nir_load_deref(b, args->vars.stack),
nir_imm_int(b, args->stack_stride * args->stack_entries));
nir_ssa_def *bvh_node = args->stack_load_cb(b, stack_ptr, args);
nir_store_deref(b, args->vars.current_node, bvh_node, 0x1);
nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE),
0x1);
}
nir_pop_if(b, NULL);
nir_store_deref(b, args->vars.current_node, parent, 0x1);
}
nir_push_else(b, NULL);
{
nir_store_deref(
b, args->vars.stack,
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1);
nir_ssa_def *stack_ptr =
nir_umod(b, nir_load_deref(b, args->vars.stack),
nir_imm_int(b, args->stack_stride * args->stack_entries));
nir_ssa_def *bvh_node = args->stack_load_cb(b, stack_ptr, args);
nir_store_deref(b, args->vars.current_node, bvh_node, 0x1);
nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE),
0x1);
}
nir_pop_if(b, NULL);
}
nir_push_else(b, NULL);
{
nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
}
nir_pop_if(b, NULL);
nir_ssa_def *bvh_node = nir_load_deref(b, args->vars.current_node);
nir_ssa_def *bvh_node = nir_load_deref(b, args->vars.current_node);
nir_ssa_def *prev_node = nir_load_deref(b, args->vars.previous_node);
nir_store_deref(b, args->vars.previous_node, bvh_node, 0x1);
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
nir_ssa_def *prev_node = nir_load_deref(b, args->vars.previous_node);
nir_store_deref(b, args->vars.previous_node, bvh_node, 0x1);
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
nir_ssa_def *global_bvh_node =
nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
nir_ssa_def *global_bvh_node =
nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
nir_ssa_def *intrinsic_result = NULL;
if (!radv_emulate_rt(device->physical_device)) {
intrinsic_result = nir_bvh64_intersect_ray_amd(
b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node),
nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir));
}
nir_ssa_def *intrinsic_result = NULL;
if (!radv_emulate_rt(device->physical_device)) {
intrinsic_result = nir_bvh64_intersect_ray_amd(
b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node), nir_load_deref(b, args->vars.tmax),
nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
nir_load_deref(b, args->vars.inv_dir));
}
nir_ssa_def *node_type = nir_iand_imm(b, bvh_node, 7);
nir_push_if(b, nir_uge(b, node_type, nir_imm_int(b, radv_bvh_node_box16)));
nir_ssa_def *node_type = nir_iand_imm(b, bvh_node, 7);
nir_push_if(b, nir_uge(b, node_type, nir_imm_int(b, radv_bvh_node_box16)));
{
nir_push_if(b, nir_uge(b, node_type, nir_imm_int(b, radv_bvh_node_instance)));
{
nir_push_if(b, nir_uge(b, node_type, nir_imm_int(b, radv_bvh_node_instance)));
nir_push_if(b, nir_ieq_imm(b, node_type, radv_bvh_node_aabb));
{
nir_push_if(b, nir_ieq_imm(b, node_type, radv_bvh_node_aabb));
{
insert_traversal_aabb_case(device, b, args, &ray_flags, global_bvh_node);
}
nir_push_else(b, NULL);
{
/* instance */
nir_ssa_def *instance_node_addr =
build_node_to_addr(device, b, global_bvh_node, false);
nir_ssa_def *instance_data = nir_build_load_global(
b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0);
nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2);
nir_ssa_def *instance_mask = nir_ushr_imm(b, instance_and_mask, 24);
nir_push_if(b, nir_ieq_imm(b, nir_iand(b, instance_mask, args->cull_mask), 0));
{
nir_jump(b, nir_jump_continue);
}
nir_pop_if(b, NULL);
nir_ssa_def *wto_matrix[3];
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1);
nir_store_deref(b, args->vars.bvh_base,
nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3)), 1);
/* Push the instance root node onto the stack */
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE),
0x1);
nir_store_deref(b, args->vars.instance_bottom_node,
nir_imm_int(b, RADV_BVH_ROOT_NODE), 1);
nir_store_deref(b, args->vars.instance_top_node, bvh_node, 1);
/* Transform the ray into object space */
nir_store_deref(b, args->vars.origin,
nir_build_vec3_mat_mult(b, args->origin, wto_matrix, true), 7);
nir_store_deref(b, args->vars.dir,
nir_build_vec3_mat_mult(b, args->dir, wto_matrix, false), 7);
nir_store_deref(b, args->vars.inv_dir,
nir_fdiv(b, vec3ones, nir_load_deref(b, args->vars.dir)), 7);
nir_store_deref(b, args->vars.sbt_offset_and_flags,
nir_channel(b, instance_data, 3), 1);
nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1);
}
nir_pop_if(b, NULL);
insert_traversal_aabb_case(device, b, args, &ray_flags, global_bvh_node);
}
nir_push_else(b, NULL);
{
nir_ssa_def *result = intrinsic_result;
if (!result) {
/* If we didn't run the intrinsic cause the hardware didn't support it,
* emulate ray/box intersection here */
result = intersect_ray_amd_software_box(
device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax),
nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
nir_load_deref(b, args->vars.inv_dir));
}
/* instance */
nir_ssa_def *instance_node_addr =
build_node_to_addr(device, b, global_bvh_node, false);
nir_ssa_def *instance_data = nir_build_load_global(
b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0);
nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2);
nir_ssa_def *instance_mask = nir_ushr_imm(b, instance_and_mask, 24);
/* box */
nir_push_if(b, nir_ieq_imm(b, prev_node, RADV_BVH_INVALID_NODE));
nir_push_if(b, nir_ieq_imm(b, nir_iand(b, instance_mask, args->cull_mask), 0));
{
nir_ssa_def *new_nodes[4];
for (unsigned i = 0; i < 4; ++i)
new_nodes[i] = nir_channel(b, result, i);
for (unsigned i = 1; i < 4; ++i)
nir_push_if(b, nir_ine_imm(b, new_nodes[i], RADV_BVH_INVALID_NODE));
for (unsigned i = 4; i-- > 1;) {
nir_ssa_def *stack = nir_load_deref(b, args->vars.stack);
nir_ssa_def *stack_ptr = nir_umod(
b, stack, nir_imm_int(b, args->stack_entries * args->stack_stride));
args->stack_store_cb(b, stack_ptr, new_nodes[i], args);
nir_store_deref(b, args->vars.stack,
nir_iadd_imm(b, stack, args->stack_stride), 1);
if (i == 1) {
nir_ssa_def *new_watermark =
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack),
-args->stack_entries * args->stack_stride);
new_watermark = nir_imax(
b, nir_load_deref(b, args->vars.stack_low_watermark), new_watermark);
nir_store_deref(b, args->vars.stack_low_watermark, new_watermark, 0x1);
}
nir_pop_if(b, NULL);
}
nir_store_deref(b, args->vars.current_node, new_nodes[0], 0x1);
}
nir_push_else(b, NULL);
{
nir_ssa_def *next = nir_imm_int(b, RADV_BVH_INVALID_NODE);
for (unsigned i = 0; i < 3; ++i) {
next = nir_bcsel(b, nir_ieq(b, prev_node, nir_channel(b, result, i)),
nir_channel(b, result, i + 1), next);
}
nir_store_deref(b, args->vars.current_node, next, 0x1);
nir_jump(b, nir_jump_continue);
}
nir_pop_if(b, NULL);
nir_ssa_def *wto_matrix[3];
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1);
nir_store_deref(b, args->vars.bvh_base,
nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3)), 1);
/* Push the instance root node onto the stack */
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 0x1);
nir_store_deref(b, args->vars.instance_bottom_node,
nir_imm_int(b, RADV_BVH_ROOT_NODE), 1);
nir_store_deref(b, args->vars.instance_top_node, bvh_node, 1);
/* Transform the ray into object space */
nir_store_deref(b, args->vars.origin,
nir_build_vec3_mat_mult(b, args->origin, wto_matrix, true), 7);
nir_store_deref(b, args->vars.dir,
nir_build_vec3_mat_mult(b, args->dir, wto_matrix, false), 7);
nir_store_deref(b, args->vars.inv_dir,
nir_fdiv(b, vec3ones, nir_load_deref(b, args->vars.dir)), 7);
nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3),
1);
nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1);
}
nir_pop_if(b, NULL);
}
@@ -751,19 +690,73 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
nir_ssa_def *result = intrinsic_result;
if (!result) {
/* If we didn't run the intrinsic cause the hardware didn't support it,
* emulate ray/tri intersection here */
result = intersect_ray_amd_software_tri(
* emulate ray/box intersection here */
result = intersect_ray_amd_software_box(
device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax),
nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
nir_load_deref(b, args->vars.inv_dir));
}
insert_traversal_triangle_case(device, b, args, &ray_flags, result, global_bvh_node);
/* box */
nir_push_if(b, nir_ieq_imm(b, prev_node, RADV_BVH_INVALID_NODE));
{
nir_ssa_def *new_nodes[4];
for (unsigned i = 0; i < 4; ++i)
new_nodes[i] = nir_channel(b, result, i);
for (unsigned i = 1; i < 4; ++i)
nir_push_if(b, nir_ine_imm(b, new_nodes[i], RADV_BVH_INVALID_NODE));
for (unsigned i = 4; i-- > 1;) {
nir_ssa_def *stack = nir_load_deref(b, args->vars.stack);
nir_ssa_def *stack_ptr =
nir_umod(b, stack, nir_imm_int(b, args->stack_entries * args->stack_stride));
args->stack_store_cb(b, stack_ptr, new_nodes[i], args);
nir_store_deref(b, args->vars.stack, nir_iadd_imm(b, stack, args->stack_stride),
1);
if (i == 1) {
nir_ssa_def *new_watermark =
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack),
-args->stack_entries * args->stack_stride);
new_watermark = nir_imax(b, nir_load_deref(b, args->vars.stack_low_watermark),
new_watermark);
nir_store_deref(b, args->vars.stack_low_watermark, new_watermark, 0x1);
}
nir_pop_if(b, NULL);
}
nir_store_deref(b, args->vars.current_node, new_nodes[0], 0x1);
}
nir_push_else(b, NULL);
{
nir_ssa_def *next = nir_imm_int(b, RADV_BVH_INVALID_NODE);
for (unsigned i = 0; i < 3; ++i) {
next = nir_bcsel(b, nir_ieq(b, prev_node, nir_channel(b, result, i)),
nir_channel(b, result, i + 1), next);
}
nir_store_deref(b, args->vars.current_node, next, 0x1);
}
nir_pop_if(b, NULL);
}
nir_pop_if(b, NULL);
}
nir_pop_loop(b, NULL);
nir_push_else(b, NULL);
{
nir_ssa_def *result = intrinsic_result;
if (!result) {
/* If we didn't run the intrinsic cause the hardware didn't support it,
* emulate ray/tri intersection here */
result = intersect_ray_amd_software_tri(
device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax),
nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
nir_load_deref(b, args->vars.inv_dir));
}
insert_traversal_triangle_case(device, b, args, &ray_flags, result, global_bvh_node);
}
nir_pop_if(b, NULL);
}
nir_pop_if(b, NULL);
nir_pop_loop(b, NULL);
return nir_load_var(b, incomplete);
}

View File

@@ -1333,87 +1333,82 @@ build_traversal_shader(struct radv_device *device,
nir_store_var(&b, trav_vars.hit, nir_imm_false(&b), 1);
nir_push_if(&b, nir_ine_imm(&b, accel_struct, 0));
{
nir_ssa_def *bvh_offset = nir_build_load_global(
&b, 1, 32,
nir_iadd_imm(&b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
.access = ACCESS_NON_WRITEABLE);
nir_ssa_def *root_bvh_base = nir_iadd(&b, accel_struct, nir_u2u64(&b, bvh_offset));
root_bvh_base = build_addr_to_node(&b, root_bvh_base);
nir_ssa_def *bvh_offset = nir_build_load_global(
&b, 1, 32,
nir_iadd_imm(&b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
.access = ACCESS_NON_WRITEABLE);
nir_ssa_def *root_bvh_base = nir_iadd(&b, accel_struct, nir_u2u64(&b, bvh_offset));
root_bvh_base = build_addr_to_node(&b, root_bvh_base);
nir_store_var(&b, trav_vars.bvh_base, root_bvh_base, 1);
nir_store_var(&b, trav_vars.bvh_base, root_bvh_base, 1);
nir_ssa_def *vec3ones = nir_channels(&b, nir_imm_vec4(&b, 1.0, 1.0, 1.0, 1.0), 0x7);
nir_ssa_def *vec3ones = nir_channels(&b, nir_imm_vec4(&b, 1.0, 1.0, 1.0, 1.0), 0x7);
nir_store_var(&b, trav_vars.origin, nir_load_var(&b, vars.origin), 7);
nir_store_var(&b, trav_vars.dir, nir_load_var(&b, vars.direction), 7);
nir_store_var(&b, trav_vars.inv_dir, nir_fdiv(&b, vec3ones, nir_load_var(&b, trav_vars.dir)),
7);
nir_store_var(&b, trav_vars.sbt_offset_and_flags, nir_imm_int(&b, 0), 1);
nir_store_var(&b, trav_vars.instance_addr, nir_imm_int64(&b, 0), 1);
nir_store_var(&b, trav_vars.origin, nir_load_var(&b, vars.origin), 7);
nir_store_var(&b, trav_vars.dir, nir_load_var(&b, vars.direction), 7);
nir_store_var(&b, trav_vars.inv_dir, nir_fdiv(&b, vec3ones, nir_load_var(&b, trav_vars.dir)), 7);
nir_store_var(&b, trav_vars.sbt_offset_and_flags, nir_imm_int(&b, 0), 1);
nir_store_var(&b, trav_vars.instance_addr, nir_imm_int64(&b, 0), 1);
nir_store_var(&b, trav_vars.stack,
nir_imul_imm(&b, nir_load_local_invocation_index(&b), sizeof(uint32_t)), 1);
nir_store_var(&b, trav_vars.stack_low_watermark, nir_load_var(&b, trav_vars.stack), 1);
nir_store_var(&b, trav_vars.current_node, nir_imm_int(&b, RADV_BVH_ROOT_NODE), 0x1);
nir_store_var(&b, trav_vars.previous_node, nir_imm_int(&b, RADV_BVH_INVALID_NODE), 0x1);
nir_store_var(&b, trav_vars.instance_top_node, nir_imm_int(&b, RADV_BVH_INVALID_NODE), 0x1);
nir_store_var(&b, trav_vars.instance_bottom_node, nir_imm_int(&b, RADV_BVH_NO_INSTANCE_ROOT),
0x1);
nir_store_var(&b, trav_vars.stack,
nir_imul_imm(&b, nir_load_local_invocation_index(&b), sizeof(uint32_t)), 1);
nir_store_var(&b, trav_vars.stack_low_watermark, nir_load_var(&b, trav_vars.stack), 1);
nir_store_var(&b, trav_vars.current_node, nir_imm_int(&b, RADV_BVH_ROOT_NODE), 0x1);
nir_store_var(&b, trav_vars.previous_node, nir_imm_int(&b, RADV_BVH_INVALID_NODE), 0x1);
nir_store_var(&b, trav_vars.instance_top_node, nir_imm_int(&b, RADV_BVH_INVALID_NODE), 0x1);
nir_store_var(&b, trav_vars.instance_bottom_node, nir_imm_int(&b, RADV_BVH_NO_INSTANCE_ROOT),
0x1);
nir_store_var(&b, trav_vars.top_stack, nir_imm_int(&b, -1), 1);
nir_store_var(&b, trav_vars.top_stack, nir_imm_int(&b, -1), 1);
struct radv_ray_traversal_vars trav_vars_args = {
.tmax = nir_build_deref_var(&b, vars.tmax),
.origin = nir_build_deref_var(&b, trav_vars.origin),
.dir = nir_build_deref_var(&b, trav_vars.dir),
.inv_dir = nir_build_deref_var(&b, trav_vars.inv_dir),
.bvh_base = nir_build_deref_var(&b, trav_vars.bvh_base),
.stack = nir_build_deref_var(&b, trav_vars.stack),
.top_stack = nir_build_deref_var(&b, trav_vars.top_stack),
.stack_low_watermark = nir_build_deref_var(&b, trav_vars.stack_low_watermark),
.current_node = nir_build_deref_var(&b, trav_vars.current_node),
.previous_node = nir_build_deref_var(&b, trav_vars.previous_node),
.instance_top_node = nir_build_deref_var(&b, trav_vars.instance_top_node),
.instance_bottom_node = nir_build_deref_var(&b, trav_vars.instance_bottom_node),
.instance_addr = nir_build_deref_var(&b, trav_vars.instance_addr),
.sbt_offset_and_flags = nir_build_deref_var(&b, trav_vars.sbt_offset_and_flags),
};
struct radv_ray_traversal_vars trav_vars_args = {
.tmax = nir_build_deref_var(&b, vars.tmax),
.origin = nir_build_deref_var(&b, trav_vars.origin),
.dir = nir_build_deref_var(&b, trav_vars.dir),
.inv_dir = nir_build_deref_var(&b, trav_vars.inv_dir),
.bvh_base = nir_build_deref_var(&b, trav_vars.bvh_base),
.stack = nir_build_deref_var(&b, trav_vars.stack),
.top_stack = nir_build_deref_var(&b, trav_vars.top_stack),
.stack_low_watermark = nir_build_deref_var(&b, trav_vars.stack_low_watermark),
.current_node = nir_build_deref_var(&b, trav_vars.current_node),
.previous_node = nir_build_deref_var(&b, trav_vars.previous_node),
.instance_top_node = nir_build_deref_var(&b, trav_vars.instance_top_node),
.instance_bottom_node = nir_build_deref_var(&b, trav_vars.instance_bottom_node),
.instance_addr = nir_build_deref_var(&b, trav_vars.instance_addr),
.sbt_offset_and_flags = nir_build_deref_var(&b, trav_vars.sbt_offset_and_flags),
};
struct traversal_data data = {
.device = device,
.createInfo = pCreateInfo,
.vars = &vars,
.trav_vars = &trav_vars,
.barycentrics = barycentrics,
};
struct traversal_data data = {
.device = device,
.createInfo = pCreateInfo,
.vars = &vars,
.trav_vars = &trav_vars,
.barycentrics = barycentrics,
};
struct radv_ray_traversal_args args = {
.root_bvh_base = root_bvh_base,
.flags = nir_load_var(&b, vars.flags),
.cull_mask = nir_load_var(&b, vars.cull_mask),
.origin = nir_load_var(&b, vars.origin),
.tmin = nir_load_var(&b, vars.tmin),
.dir = nir_load_var(&b, vars.direction),
.vars = trav_vars_args,
.stack_stride = device->physical_device->rt_wave_size * sizeof(uint32_t),
.stack_entries = MAX_STACK_ENTRY_COUNT,
.stack_base = 0,
.stack_store_cb = store_stack_entry,
.stack_load_cb = load_stack_entry,
.aabb_cb = (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR)
? NULL
: handle_candidate_aabb,
.triangle_cb = (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR)
? NULL
: handle_candidate_triangle,
.data = &data,
};
struct radv_ray_traversal_args args = {
.root_bvh_base = root_bvh_base,
.flags = nir_load_var(&b, vars.flags),
.cull_mask = nir_load_var(&b, vars.cull_mask),
.origin = nir_load_var(&b, vars.origin),
.tmin = nir_load_var(&b, vars.tmin),
.dir = nir_load_var(&b, vars.direction),
.vars = trav_vars_args,
.stack_stride = device->physical_device->rt_wave_size * sizeof(uint32_t),
.stack_entries = MAX_STACK_ENTRY_COUNT,
.stack_base = 0,
.stack_store_cb = store_stack_entry,
.stack_load_cb = load_stack_entry,
.aabb_cb = (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR)
? NULL
: handle_candidate_aabb,
.triangle_cb = (pCreateInfo->flags & VK_PIPELINE_CREATE_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR)
? NULL
: handle_candidate_triangle,
.data = &data,
};
radv_build_ray_traversal(device, &b, &args);
}
nir_pop_if(&b, NULL);
radv_build_ray_traversal(device, &b, &args);
/* Initialize follow-up shader. */
nir_push_if(&b, nir_load_var(&b, trav_vars.hit));