radv/rt: Get rid of accel struct null checks
Quake II RTX ray queries: Totals from 7 (14.29% of 49) affected shaders: CodeSize: 167220 -> 165560 (-0.99%) Instrs: 31674 -> 31454 (-0.69%) Latency: 385145 -> 596737 (+54.94%) InvThroughput: 78837 -> 122005 (+54.76%) Copies: 4740 -> 4667 (-1.54%); split: -1.60%, +0.06% Branches: 1565 -> 1493 (-4.60%) PreSGPRs: 488 -> 501 (+2.66%); split: -0.41%, +3.07% PreVGPRs: 617 -> 620 (+0.49%) Performance stays the same. Reviewed-by: Friedrich Vock <friedrich.vock@gmx.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20539>
This commit is contained in:

committed by
Marge Bot

parent
33166ba50b
commit
03105138f1
@@ -519,230 +519,169 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
||||
const struct radv_ray_traversal_args *args)
|
||||
{
|
||||
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
|
||||
nir_store_var(b, incomplete, nir_ine_imm(b, args->root_bvh_base, 0), 0x1);
|
||||
nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
|
||||
|
||||
nir_push_if(b, nir_load_var(b, incomplete));
|
||||
nir_ssa_def *desc = create_bvh_descriptor(b);
|
||||
nir_ssa_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
|
||||
|
||||
struct radv_ray_flags ray_flags = {
|
||||
.force_opaque = nir_test_mask(b, args->flags, SpvRayFlagsOpaqueKHRMask),
|
||||
.force_not_opaque = nir_test_mask(b, args->flags, SpvRayFlagsNoOpaqueKHRMask),
|
||||
.terminate_on_first_hit =
|
||||
nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask),
|
||||
.no_cull_front = nir_ieq_imm(
|
||||
b, nir_iand_imm(b, args->flags, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 0),
|
||||
.no_cull_back =
|
||||
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullBackFacingTrianglesKHRMask), 0),
|
||||
.no_cull_opaque =
|
||||
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullOpaqueKHRMask), 0),
|
||||
.no_cull_no_opaque =
|
||||
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullNoOpaqueKHRMask), 0),
|
||||
.no_skip_triangles =
|
||||
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask), 0),
|
||||
.no_skip_aabbs = nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0),
|
||||
};
|
||||
nir_push_loop(b);
|
||||
{
|
||||
nir_ssa_def *desc = create_bvh_descriptor(b);
|
||||
nir_ssa_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
|
||||
|
||||
struct radv_ray_flags ray_flags = {
|
||||
.force_opaque = nir_test_mask(b, args->flags, SpvRayFlagsOpaqueKHRMask),
|
||||
.force_not_opaque = nir_test_mask(b, args->flags, SpvRayFlagsNoOpaqueKHRMask),
|
||||
.terminate_on_first_hit =
|
||||
nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask),
|
||||
.no_cull_front = nir_ieq_imm(
|
||||
b, nir_iand_imm(b, args->flags, SpvRayFlagsCullFrontFacingTrianglesKHRMask), 0),
|
||||
.no_cull_back = nir_ieq_imm(
|
||||
b, nir_iand_imm(b, args->flags, SpvRayFlagsCullBackFacingTrianglesKHRMask), 0),
|
||||
.no_cull_opaque =
|
||||
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullOpaqueKHRMask), 0),
|
||||
.no_cull_no_opaque =
|
||||
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsCullNoOpaqueKHRMask), 0),
|
||||
.no_skip_triangles =
|
||||
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipTrianglesKHRMask), 0),
|
||||
.no_skip_aabbs =
|
||||
nir_ieq_imm(b, nir_iand_imm(b, args->flags, SpvRayFlagsSkipAABBsKHRMask), 0),
|
||||
};
|
||||
nir_push_loop(b);
|
||||
nir_push_if(
|
||||
b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), RADV_BVH_INVALID_NODE));
|
||||
{
|
||||
nir_push_if(
|
||||
b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), RADV_BVH_INVALID_NODE));
|
||||
/* Early exit if we never overflowed the stack, to avoid having to backtrack to
|
||||
* the root for no reason. */
|
||||
nir_push_if(b, nir_ilt(b, nir_load_deref(b, args->vars.stack),
|
||||
nir_imm_int(b, args->stack_base + args->stack_stride)));
|
||||
{
|
||||
/* Early exit if we never overflowed the stack, to avoid having to backtrack to
|
||||
* the root for no reason. */
|
||||
nir_push_if(b, nir_ilt(b, nir_load_deref(b, args->vars.stack),
|
||||
nir_imm_int(b, args->stack_base + args->stack_stride)));
|
||||
nir_store_var(b, incomplete, nir_imm_bool(b, false), 0x1);
|
||||
nir_jump(b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *stack_instance_exit = nir_ige(b, nir_load_deref(b, args->vars.top_stack),
|
||||
nir_load_deref(b, args->vars.stack));
|
||||
nir_ssa_def *root_instance_exit =
|
||||
nir_ieq(b, nir_load_deref(b, args->vars.previous_node),
|
||||
nir_load_deref(b, args->vars.instance_bottom_node));
|
||||
nir_if *instance_exit =
|
||||
nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit));
|
||||
instance_exit->control = nir_selection_control_dont_flatten;
|
||||
{
|
||||
nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, -1), 1);
|
||||
nir_store_deref(b, args->vars.previous_node,
|
||||
nir_load_deref(b, args->vars.instance_top_node), 1);
|
||||
nir_store_deref(b, args->vars.instance_bottom_node,
|
||||
nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 1);
|
||||
|
||||
nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1);
|
||||
nir_store_deref(b, args->vars.origin, args->origin, 7);
|
||||
nir_store_deref(b, args->vars.dir, args->dir, 7);
|
||||
nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, args->dir), 7);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_push_if(b, nir_ige(b, nir_load_deref(b, args->vars.stack_low_watermark),
|
||||
nir_load_deref(b, args->vars.stack)));
|
||||
{
|
||||
nir_ssa_def *prev = nir_load_deref(b, args->vars.previous_node);
|
||||
nir_ssa_def *bvh_addr =
|
||||
build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
|
||||
|
||||
nir_ssa_def *parent = fetch_parent_node(b, bvh_addr, prev);
|
||||
nir_push_if(b, nir_ieq(b, parent, nir_imm_int(b, RADV_BVH_INVALID_NODE)));
|
||||
{
|
||||
nir_store_var(b, incomplete, nir_imm_bool(b, false), 0x1);
|
||||
nir_jump(b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *stack_instance_exit = nir_ige(b, nir_load_deref(b, args->vars.top_stack),
|
||||
nir_load_deref(b, args->vars.stack));
|
||||
nir_ssa_def *root_instance_exit =
|
||||
nir_ieq(b, nir_load_deref(b, args->vars.previous_node),
|
||||
nir_load_deref(b, args->vars.instance_bottom_node));
|
||||
nir_if *instance_exit =
|
||||
nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit));
|
||||
instance_exit->control = nir_selection_control_dont_flatten;
|
||||
{
|
||||
nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, -1), 1);
|
||||
nir_store_deref(b, args->vars.previous_node,
|
||||
nir_load_deref(b, args->vars.instance_top_node), 1);
|
||||
nir_store_deref(b, args->vars.instance_bottom_node,
|
||||
nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 1);
|
||||
|
||||
nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1);
|
||||
nir_store_deref(b, args->vars.origin, args->origin, 7);
|
||||
nir_store_deref(b, args->vars.dir, args->dir, 7);
|
||||
nir_store_deref(b, args->vars.inv_dir, nir_fdiv(b, vec3ones, args->dir), 7);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_push_if(b, nir_ige(b, nir_load_deref(b, args->vars.stack_low_watermark),
|
||||
nir_load_deref(b, args->vars.stack)));
|
||||
{
|
||||
nir_ssa_def *prev = nir_load_deref(b, args->vars.previous_node);
|
||||
nir_ssa_def *bvh_addr =
|
||||
build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
|
||||
|
||||
nir_ssa_def *parent = fetch_parent_node(b, bvh_addr, prev);
|
||||
nir_push_if(b, nir_ieq(b, parent, nir_imm_int(b, RADV_BVH_INVALID_NODE)));
|
||||
{
|
||||
nir_store_var(b, incomplete, nir_imm_bool(b, false), 0x1);
|
||||
nir_jump(b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
nir_store_deref(b, args->vars.current_node, parent, 0x1);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_store_deref(
|
||||
b, args->vars.stack,
|
||||
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1);
|
||||
|
||||
nir_ssa_def *stack_ptr =
|
||||
nir_umod(b, nir_load_deref(b, args->vars.stack),
|
||||
nir_imm_int(b, args->stack_stride * args->stack_entries));
|
||||
nir_ssa_def *bvh_node = args->stack_load_cb(b, stack_ptr, args);
|
||||
nir_store_deref(b, args->vars.current_node, bvh_node, 0x1);
|
||||
nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE),
|
||||
0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
nir_store_deref(b, args->vars.current_node, parent, 0x1);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_store_deref(
|
||||
b, args->vars.stack,
|
||||
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1);
|
||||
|
||||
nir_ssa_def *stack_ptr =
|
||||
nir_umod(b, nir_load_deref(b, args->vars.stack),
|
||||
nir_imm_int(b, args->stack_stride * args->stack_entries));
|
||||
nir_ssa_def *bvh_node = args->stack_load_cb(b, stack_ptr, args);
|
||||
nir_store_deref(b, args->vars.current_node, bvh_node, 0x1);
|
||||
nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE),
|
||||
0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *bvh_node = nir_load_deref(b, args->vars.current_node);
|
||||
nir_ssa_def *bvh_node = nir_load_deref(b, args->vars.current_node);
|
||||
|
||||
nir_ssa_def *prev_node = nir_load_deref(b, args->vars.previous_node);
|
||||
nir_store_deref(b, args->vars.previous_node, bvh_node, 0x1);
|
||||
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
|
||||
nir_ssa_def *prev_node = nir_load_deref(b, args->vars.previous_node);
|
||||
nir_store_deref(b, args->vars.previous_node, bvh_node, 0x1);
|
||||
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
|
||||
|
||||
nir_ssa_def *global_bvh_node =
|
||||
nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
|
||||
nir_ssa_def *global_bvh_node =
|
||||
nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
|
||||
|
||||
nir_ssa_def *intrinsic_result = NULL;
|
||||
if (!radv_emulate_rt(device->physical_device)) {
|
||||
intrinsic_result = nir_bvh64_intersect_ray_amd(
|
||||
b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node),
|
||||
nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
|
||||
nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir));
|
||||
}
|
||||
nir_ssa_def *intrinsic_result = NULL;
|
||||
if (!radv_emulate_rt(device->physical_device)) {
|
||||
intrinsic_result = nir_bvh64_intersect_ray_amd(
|
||||
b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node), nir_load_deref(b, args->vars.tmax),
|
||||
nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
|
||||
nir_load_deref(b, args->vars.inv_dir));
|
||||
}
|
||||
|
||||
nir_ssa_def *node_type = nir_iand_imm(b, bvh_node, 7);
|
||||
nir_push_if(b, nir_uge(b, node_type, nir_imm_int(b, radv_bvh_node_box16)));
|
||||
nir_ssa_def *node_type = nir_iand_imm(b, bvh_node, 7);
|
||||
nir_push_if(b, nir_uge(b, node_type, nir_imm_int(b, radv_bvh_node_box16)));
|
||||
{
|
||||
nir_push_if(b, nir_uge(b, node_type, nir_imm_int(b, radv_bvh_node_instance)));
|
||||
{
|
||||
nir_push_if(b, nir_uge(b, node_type, nir_imm_int(b, radv_bvh_node_instance)));
|
||||
nir_push_if(b, nir_ieq_imm(b, node_type, radv_bvh_node_aabb));
|
||||
{
|
||||
nir_push_if(b, nir_ieq_imm(b, node_type, radv_bvh_node_aabb));
|
||||
{
|
||||
insert_traversal_aabb_case(device, b, args, &ray_flags, global_bvh_node);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
/* instance */
|
||||
nir_ssa_def *instance_node_addr =
|
||||
build_node_to_addr(device, b, global_bvh_node, false);
|
||||
nir_ssa_def *instance_data = nir_build_load_global(
|
||||
b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0);
|
||||
nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2);
|
||||
nir_ssa_def *instance_mask = nir_ushr_imm(b, instance_and_mask, 24);
|
||||
|
||||
nir_push_if(b, nir_ieq_imm(b, nir_iand(b, instance_mask, args->cull_mask), 0));
|
||||
{
|
||||
nir_jump(b, nir_jump_continue);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
|
||||
|
||||
nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1);
|
||||
nir_store_deref(b, args->vars.bvh_base,
|
||||
nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3)), 1);
|
||||
|
||||
/* Push the instance root node onto the stack */
|
||||
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE),
|
||||
0x1);
|
||||
nir_store_deref(b, args->vars.instance_bottom_node,
|
||||
nir_imm_int(b, RADV_BVH_ROOT_NODE), 1);
|
||||
nir_store_deref(b, args->vars.instance_top_node, bvh_node, 1);
|
||||
|
||||
/* Transform the ray into object space */
|
||||
nir_store_deref(b, args->vars.origin,
|
||||
nir_build_vec3_mat_mult(b, args->origin, wto_matrix, true), 7);
|
||||
nir_store_deref(b, args->vars.dir,
|
||||
nir_build_vec3_mat_mult(b, args->dir, wto_matrix, false), 7);
|
||||
nir_store_deref(b, args->vars.inv_dir,
|
||||
nir_fdiv(b, vec3ones, nir_load_deref(b, args->vars.dir)), 7);
|
||||
|
||||
nir_store_deref(b, args->vars.sbt_offset_and_flags,
|
||||
nir_channel(b, instance_data, 3), 1);
|
||||
nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
insert_traversal_aabb_case(device, b, args, &ray_flags, global_bvh_node);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_ssa_def *result = intrinsic_result;
|
||||
if (!result) {
|
||||
/* If we didn't run the intrinsic cause the hardware didn't support it,
|
||||
* emulate ray/box intersection here */
|
||||
result = intersect_ray_amd_software_box(
|
||||
device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax),
|
||||
nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
|
||||
nir_load_deref(b, args->vars.inv_dir));
|
||||
}
|
||||
/* instance */
|
||||
nir_ssa_def *instance_node_addr =
|
||||
build_node_to_addr(device, b, global_bvh_node, false);
|
||||
nir_ssa_def *instance_data = nir_build_load_global(
|
||||
b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0);
|
||||
nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2);
|
||||
nir_ssa_def *instance_mask = nir_ushr_imm(b, instance_and_mask, 24);
|
||||
|
||||
/* box */
|
||||
nir_push_if(b, nir_ieq_imm(b, prev_node, RADV_BVH_INVALID_NODE));
|
||||
nir_push_if(b, nir_ieq_imm(b, nir_iand(b, instance_mask, args->cull_mask), 0));
|
||||
{
|
||||
nir_ssa_def *new_nodes[4];
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
new_nodes[i] = nir_channel(b, result, i);
|
||||
|
||||
for (unsigned i = 1; i < 4; ++i)
|
||||
nir_push_if(b, nir_ine_imm(b, new_nodes[i], RADV_BVH_INVALID_NODE));
|
||||
|
||||
for (unsigned i = 4; i-- > 1;) {
|
||||
nir_ssa_def *stack = nir_load_deref(b, args->vars.stack);
|
||||
nir_ssa_def *stack_ptr = nir_umod(
|
||||
b, stack, nir_imm_int(b, args->stack_entries * args->stack_stride));
|
||||
args->stack_store_cb(b, stack_ptr, new_nodes[i], args);
|
||||
nir_store_deref(b, args->vars.stack,
|
||||
nir_iadd_imm(b, stack, args->stack_stride), 1);
|
||||
|
||||
if (i == 1) {
|
||||
nir_ssa_def *new_watermark =
|
||||
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack),
|
||||
-args->stack_entries * args->stack_stride);
|
||||
new_watermark = nir_imax(
|
||||
b, nir_load_deref(b, args->vars.stack_low_watermark), new_watermark);
|
||||
nir_store_deref(b, args->vars.stack_low_watermark, new_watermark, 0x1);
|
||||
}
|
||||
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_store_deref(b, args->vars.current_node, new_nodes[0], 0x1);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_ssa_def *next = nir_imm_int(b, RADV_BVH_INVALID_NODE);
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
next = nir_bcsel(b, nir_ieq(b, prev_node, nir_channel(b, result, i)),
|
||||
nir_channel(b, result, i + 1), next);
|
||||
}
|
||||
nir_store_deref(b, args->vars.current_node, next, 0x1);
|
||||
nir_jump(b, nir_jump_continue);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
|
||||
|
||||
nir_store_deref(b, args->vars.top_stack, nir_load_deref(b, args->vars.stack), 1);
|
||||
nir_store_deref(b, args->vars.bvh_base,
|
||||
nir_pack_64_2x32(b, nir_channels(b, instance_data, 0x3)), 1);
|
||||
|
||||
/* Push the instance root node onto the stack */
|
||||
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 0x1);
|
||||
nir_store_deref(b, args->vars.instance_bottom_node,
|
||||
nir_imm_int(b, RADV_BVH_ROOT_NODE), 1);
|
||||
nir_store_deref(b, args->vars.instance_top_node, bvh_node, 1);
|
||||
|
||||
/* Transform the ray into object space */
|
||||
nir_store_deref(b, args->vars.origin,
|
||||
nir_build_vec3_mat_mult(b, args->origin, wto_matrix, true), 7);
|
||||
nir_store_deref(b, args->vars.dir,
|
||||
nir_build_vec3_mat_mult(b, args->dir, wto_matrix, false), 7);
|
||||
nir_store_deref(b, args->vars.inv_dir,
|
||||
nir_fdiv(b, vec3ones, nir_load_deref(b, args->vars.dir)), 7);
|
||||
|
||||
nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3),
|
||||
1);
|
||||
nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
@@ -751,19 +690,73 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
||||
nir_ssa_def *result = intrinsic_result;
|
||||
if (!result) {
|
||||
/* If we didn't run the intrinsic cause the hardware didn't support it,
|
||||
* emulate ray/tri intersection here */
|
||||
result = intersect_ray_amd_software_tri(
|
||||
* emulate ray/box intersection here */
|
||||
result = intersect_ray_amd_software_box(
|
||||
device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax),
|
||||
nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
|
||||
nir_load_deref(b, args->vars.inv_dir));
|
||||
}
|
||||
insert_traversal_triangle_case(device, b, args, &ray_flags, result, global_bvh_node);
|
||||
|
||||
/* box */
|
||||
nir_push_if(b, nir_ieq_imm(b, prev_node, RADV_BVH_INVALID_NODE));
|
||||
{
|
||||
nir_ssa_def *new_nodes[4];
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
new_nodes[i] = nir_channel(b, result, i);
|
||||
|
||||
for (unsigned i = 1; i < 4; ++i)
|
||||
nir_push_if(b, nir_ine_imm(b, new_nodes[i], RADV_BVH_INVALID_NODE));
|
||||
|
||||
for (unsigned i = 4; i-- > 1;) {
|
||||
nir_ssa_def *stack = nir_load_deref(b, args->vars.stack);
|
||||
nir_ssa_def *stack_ptr =
|
||||
nir_umod(b, stack, nir_imm_int(b, args->stack_entries * args->stack_stride));
|
||||
args->stack_store_cb(b, stack_ptr, new_nodes[i], args);
|
||||
nir_store_deref(b, args->vars.stack, nir_iadd_imm(b, stack, args->stack_stride),
|
||||
1);
|
||||
|
||||
if (i == 1) {
|
||||
nir_ssa_def *new_watermark =
|
||||
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack),
|
||||
-args->stack_entries * args->stack_stride);
|
||||
new_watermark = nir_imax(b, nir_load_deref(b, args->vars.stack_low_watermark),
|
||||
new_watermark);
|
||||
nir_store_deref(b, args->vars.stack_low_watermark, new_watermark, 0x1);
|
||||
}
|
||||
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_store_deref(b, args->vars.current_node, new_nodes[0], 0x1);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_ssa_def *next = nir_imm_int(b, RADV_BVH_INVALID_NODE);
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
next = nir_bcsel(b, nir_ieq(b, prev_node, nir_channel(b, result, i)),
|
||||
nir_channel(b, result, i + 1), next);
|
||||
}
|
||||
nir_store_deref(b, args->vars.current_node, next, 0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_loop(b, NULL);
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_ssa_def *result = intrinsic_result;
|
||||
if (!result) {
|
||||
/* If we didn't run the intrinsic cause the hardware didn't support it,
|
||||
* emulate ray/tri intersection here */
|
||||
result = intersect_ray_amd_software_tri(
|
||||
device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax),
|
||||
nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir),
|
||||
nir_load_deref(b, args->vars.inv_dir));
|
||||
}
|
||||
insert_traversal_triangle_case(device, b, args, &ray_flags, result, global_bvh_node);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
nir_pop_loop(b, NULL);
|
||||
|
||||
return nir_load_var(b, incomplete);
|
||||
}
|
||||
|
Reference in New Issue
Block a user