radv: Add traversal backtracking with a short stack.
So we can now work with arbitrarily deep BVHs. Reviewed-By: Konstantin Seurer <konstantin.seurer@gmail.com> Reviewed-by: Friedrich Vock <friedrich.vock@gmx.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18799>
This commit is contained in:

committed by
Marge Bot

parent
251bba2fa0
commit
a8abdc0d89
@@ -31,6 +31,11 @@
|
||||
#include "radv_rt_common.h"
|
||||
#include "radv_shader.h"
|
||||
|
||||
/* Traversal stack size. Traversal supports backtracking so we can go deeper than this size if
|
||||
* needed. However, we keep a large stack size to avoid it being put into registers, which hurts
|
||||
* occupancy. */
|
||||
#define MAX_STACK_ENTRY_COUNT 76
|
||||
|
||||
typedef struct {
|
||||
nir_variable *variable;
|
||||
unsigned array_length;
|
||||
@@ -140,7 +145,11 @@ struct ray_query_traversal_vars {
|
||||
rq_variable *bvh_base;
|
||||
rq_variable *stack;
|
||||
rq_variable *top_stack;
|
||||
rq_variable *stack_base;
|
||||
rq_variable *current_node;
|
||||
rq_variable *previous_node;
|
||||
rq_variable *instance_top_node;
|
||||
rq_variable *instance_bottom_node;
|
||||
};
|
||||
|
||||
struct ray_query_intersection_vars {
|
||||
@@ -197,9 +206,16 @@ init_ray_query_traversal_vars(nir_shader *shader, nir_function_impl *impl, unsig
|
||||
rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_stack"));
|
||||
result.top_stack =
|
||||
rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_top_stack"));
|
||||
result.stack_base =
|
||||
rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_stack_base"));
|
||||
result.current_node =
|
||||
rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_current_node"));
|
||||
|
||||
result.previous_node =
|
||||
rq_variable_create(shader, impl, array_length, glsl_uint_type(), VAR_NAME("_previous_node"));
|
||||
result.instance_top_node = rq_variable_create(shader, impl, array_length, glsl_uint_type(),
|
||||
VAR_NAME("_instance_top_node"));
|
||||
result.instance_bottom_node = rq_variable_create(shader, impl, array_length, glsl_uint_type(),
|
||||
VAR_NAME("_instance_bottom_node"));
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -387,8 +403,13 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins
|
||||
|
||||
rq_store_var(b, index, vars->trav.stack, nir_imm_int(b, 0), 0x1);
|
||||
rq_store_var(b, index, vars->trav.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE), 0x1);
|
||||
rq_store_var(b, index, vars->trav.previous_node, nir_imm_int(b, -1), 0x1);
|
||||
rq_store_var(b, index, vars->trav.instance_top_node, nir_imm_int(b, -1), 0x1);
|
||||
rq_store_var(b, index, vars->trav.instance_bottom_node,
|
||||
nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 0x1);
|
||||
|
||||
rq_store_var(b, index, vars->trav.top_stack, nir_imm_int(b, 0), 1);
|
||||
rq_store_var(b, index, vars->trav.top_stack, nir_imm_int(b, -1), 1);
|
||||
rq_store_var(b, index, vars->trav.stack_base, nir_imm_int(b, 0), 1);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
@@ -621,7 +642,11 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars
|
||||
.bvh_base = rq_deref_var(b, index, vars->trav.bvh_base),
|
||||
.stack = rq_deref_var(b, index, vars->trav.stack),
|
||||
.top_stack = rq_deref_var(b, index, vars->trav.top_stack),
|
||||
.stack_base = rq_deref_var(b, index, vars->trav.stack_base),
|
||||
.current_node = rq_deref_var(b, index, vars->trav.current_node),
|
||||
.previous_node = rq_deref_var(b, index, vars->trav.previous_node),
|
||||
.instance_top_node = rq_deref_var(b, index, vars->trav.instance_top_node),
|
||||
.instance_bottom_node = rq_deref_var(b, index, vars->trav.instance_bottom_node),
|
||||
.instance_id = rq_deref_var(b, index, vars->candidate.instance_id),
|
||||
.instance_addr = rq_deref_var(b, index, vars->candidate.instance_addr),
|
||||
.custom_instance_and_mask = rq_deref_var(b, index, vars->candidate.custom_instance_and_mask),
|
||||
@@ -642,6 +667,7 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars
|
||||
.dir = rq_load_var(b, index, vars->direction),
|
||||
.vars = trav_vars,
|
||||
.stack_stride = 1,
|
||||
.stack_entries = MAX_STACK_ENTRY_COUNT,
|
||||
.stack_store_cb = store_stack_entry,
|
||||
.stack_load_cb = load_stack_entry,
|
||||
.aabb_cb = handle_candidate_aabb,
|
||||
|
@@ -32,6 +32,10 @@
|
||||
#include "nir/nir_builder.h"
|
||||
#include "nir/nir_builtin_builder.h"
|
||||
|
||||
/* Traversal stack size. This stack is put in LDS and experimentally 16 entries results in best
|
||||
* performance. */
|
||||
#define MAX_STACK_ENTRY_COUNT 16
|
||||
|
||||
static VkRayTracingPipelineCreateInfoKHR
|
||||
radv_create_merged_rt_create_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo)
|
||||
{
|
||||
@@ -1029,9 +1033,12 @@ struct rt_traversal_vars {
|
||||
nir_variable *hit;
|
||||
nir_variable *bvh_base;
|
||||
nir_variable *stack;
|
||||
nir_variable *lds_stack_base;
|
||||
nir_variable *top_stack;
|
||||
nir_variable *stack_base;
|
||||
nir_variable *current_node;
|
||||
nir_variable *previous_node;
|
||||
nir_variable *instance_top_node;
|
||||
nir_variable *instance_bottom_node;
|
||||
};
|
||||
|
||||
static struct rt_traversal_vars
|
||||
@@ -1057,12 +1064,18 @@ init_traversal_vars(nir_builder *b)
|
||||
"traversal_bvh_base");
|
||||
ret.stack =
|
||||
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_ptr");
|
||||
ret.lds_stack_base = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(),
|
||||
"traversal_lds_stack_base");
|
||||
ret.top_stack = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(),
|
||||
"traversal_top_stack_ptr");
|
||||
ret.stack_base =
|
||||
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "traversal_stack_base");
|
||||
ret.current_node =
|
||||
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "current_node;");
|
||||
ret.previous_node =
|
||||
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "previous_node");
|
||||
ret.instance_top_node =
|
||||
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "instance_top_node");
|
||||
ret.instance_bottom_node =
|
||||
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "instance_bottom_node");
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1313,64 +1326,13 @@ static void
|
||||
store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value,
|
||||
const struct radv_ray_traversal_args *args)
|
||||
{
|
||||
index = nir_umod(b, index, nir_imm_int(b, args->stack_stride * MAX_STACK_LDS_ENTRY_COUNT));
|
||||
nir_store_shared(b, value, index, .base = 0, .align_mul = 4);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
load_stack_entry(nir_builder *b, nir_ssa_def *index, const struct radv_ray_traversal_args *args)
|
||||
{
|
||||
nir_variable *ret = nir_local_variable_create(b->impl, glsl_uint_type(), "load_stack_result");
|
||||
struct traversal_data *data = args->data;
|
||||
nir_push_if(b, nir_ilt(b, index, nir_load_var(b, data->trav_vars->lds_stack_base)));
|
||||
{
|
||||
nir_ssa_def *scratch_addr =
|
||||
nir_imul_imm(b, nir_udiv_imm(b, index, args->stack_stride), sizeof(uint32_t));
|
||||
nir_store_var(b, ret, nir_load_scratch(b, 1, 32, scratch_addr), 0x1);
|
||||
nir_store_var(b, data->trav_vars->lds_stack_base, index, 0x1);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_ssa_def *stack_ptr =
|
||||
nir_umod(b, index, nir_imm_int(b, args->stack_stride * MAX_STACK_LDS_ENTRY_COUNT));
|
||||
nir_store_var(b, ret, nir_load_shared(b, 1, 32, stack_ptr, .base = 0, .align_mul = 4), 0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
return nir_load_var(b, ret);
|
||||
}
|
||||
|
||||
static void
|
||||
check_stack_overflow(nir_builder *b, const struct radv_ray_traversal_args *args)
|
||||
{
|
||||
struct traversal_data *data = args->data;
|
||||
|
||||
nir_ssa_def *might_overflow =
|
||||
nir_ige(b,
|
||||
nir_isub(b, nir_load_deref(b, args->vars.stack),
|
||||
nir_load_var(b, data->trav_vars->lds_stack_base)),
|
||||
nir_imm_int(b, args->stack_stride * (MAX_STACK_LDS_ENTRY_COUNT - 2)));
|
||||
nir_push_if(b, might_overflow);
|
||||
{
|
||||
nir_ssa_def *scratch_addr = nir_imul_imm(
|
||||
b, nir_udiv_imm(b, nir_load_var(b, data->trav_vars->lds_stack_base), args->stack_stride),
|
||||
sizeof(uint32_t));
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
nir_ssa_def *lds_stack_ptr =
|
||||
nir_umod(b, nir_load_var(b, data->trav_vars->lds_stack_base),
|
||||
nir_imm_int(b, args->stack_stride * MAX_STACK_LDS_ENTRY_COUNT));
|
||||
|
||||
nir_ssa_def *node = nir_load_shared(b, 1, 32, lds_stack_ptr, .base = 0, .align_mul = 4);
|
||||
nir_store_scratch(b, node, scratch_addr);
|
||||
|
||||
nir_store_var(
|
||||
b, data->trav_vars->lds_stack_base,
|
||||
nir_iadd_imm(b, nir_load_var(b, data->trav_vars->lds_stack_base), args->stack_stride),
|
||||
1);
|
||||
scratch_addr = nir_iadd_imm(b, scratch_addr, sizeof(uint32_t));
|
||||
}
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
return nir_load_shared(b, 1, 32, index, .base = 0, .align_mul = 4);
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
@@ -1383,7 +1345,7 @@ build_traversal_shader(struct radv_device *device,
|
||||
b.shader->info.workgroup_size[0] = 8;
|
||||
b.shader->info.workgroup_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4;
|
||||
b.shader->info.shared_size =
|
||||
device->physical_device->rt_wave_size * MAX_STACK_LDS_ENTRY_COUNT * sizeof(uint32_t);
|
||||
device->physical_device->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t);
|
||||
struct rt_variables vars = create_rt_variables(b.shader, pCreateInfo, dst_vars->stack_sizes);
|
||||
map_rt_variables(var_remap, &vars, dst_vars);
|
||||
|
||||
@@ -1414,10 +1376,14 @@ build_traversal_shader(struct radv_device *device,
|
||||
nir_store_var(&b, trav_vars.instance_addr, nir_imm_int64(&b, 0), 1);
|
||||
|
||||
nir_store_var(&b, trav_vars.stack, nir_imul_imm(&b, nir_load_local_invocation_index(&b), sizeof(uint32_t)), 1);
|
||||
nir_store_var(&b, trav_vars.lds_stack_base, nir_load_var(&b, trav_vars.stack), 1);
|
||||
nir_store_var(&b, trav_vars.stack_base, nir_load_var(&b, trav_vars.stack), 1);
|
||||
nir_store_var(&b, trav_vars.current_node, nir_imm_int(&b, RADV_BVH_ROOT_NODE), 0x1);
|
||||
nir_store_var(&b, trav_vars.previous_node, nir_imm_int(&b, -1), 0x1);
|
||||
nir_store_var(&b, trav_vars.instance_top_node, nir_imm_int(&b, -1), 0x1);
|
||||
nir_store_var(&b, trav_vars.instance_bottom_node, nir_imm_int(&b, RADV_BVH_NO_INSTANCE_ROOT),
|
||||
0x1);
|
||||
|
||||
nir_store_var(&b, trav_vars.top_stack, nir_imm_int(&b, 0), 1);
|
||||
nir_store_var(&b, trav_vars.top_stack, nir_imm_int(&b, -1), 1);
|
||||
|
||||
struct radv_ray_traversal_vars trav_vars_args = {
|
||||
.tmax = nir_build_deref_var(&b, vars.tmax),
|
||||
@@ -1427,7 +1393,11 @@ build_traversal_shader(struct radv_device *device,
|
||||
.bvh_base = nir_build_deref_var(&b, trav_vars.bvh_base),
|
||||
.stack = nir_build_deref_var(&b, trav_vars.stack),
|
||||
.top_stack = nir_build_deref_var(&b, trav_vars.top_stack),
|
||||
.stack_base = nir_build_deref_var(&b, trav_vars.stack_base),
|
||||
.current_node = nir_build_deref_var(&b, trav_vars.current_node),
|
||||
.previous_node = nir_build_deref_var(&b, trav_vars.previous_node),
|
||||
.instance_top_node = nir_build_deref_var(&b, trav_vars.instance_top_node),
|
||||
.instance_bottom_node = nir_build_deref_var(&b, trav_vars.instance_bottom_node),
|
||||
.instance_id = nir_build_deref_var(&b, trav_vars.instance_id),
|
||||
.instance_addr = nir_build_deref_var(&b, trav_vars.instance_addr),
|
||||
.custom_instance_and_mask = nir_build_deref_var(&b, trav_vars.custom_instance_and_mask),
|
||||
@@ -1450,11 +1420,11 @@ build_traversal_shader(struct radv_device *device,
|
||||
.dir = nir_load_var(&b, vars.direction),
|
||||
.vars = trav_vars_args,
|
||||
.stack_stride = device->physical_device->rt_wave_size * sizeof(uint32_t),
|
||||
.stack_entries = MAX_STACK_ENTRY_COUNT,
|
||||
.stack_store_cb = store_stack_entry,
|
||||
.stack_load_cb = load_stack_entry,
|
||||
.aabb_cb = handle_candidate_aabb,
|
||||
.triangle_cb = handle_candidate_triangle,
|
||||
.check_stack_overflow_cb = check_stack_overflow,
|
||||
.data = &data,
|
||||
};
|
||||
|
||||
@@ -1643,7 +1613,7 @@ create_rt_shader(struct radv_device *device, const VkRayTracingPipelineCreateInf
|
||||
if (radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo))
|
||||
nir_store_var(&b, vars.stack_ptr, nir_load_rt_dynamic_callable_stack_base_amd(&b), 0x1);
|
||||
else
|
||||
nir_store_var(&b, vars.stack_ptr, nir_imm_int(&b, MAX_STACK_SCRATCH_ENTRY_COUNT * 4), 0x1);
|
||||
nir_store_var(&b, vars.stack_ptr, nir_imm_int(&b, 0), 0x1);
|
||||
|
||||
nir_loop *loop = nir_push_loop(&b);
|
||||
|
||||
@@ -1688,8 +1658,9 @@ create_rt_shader(struct radv_device *device, const VkRayTracingPipelineCreateInf
|
||||
|
||||
nir_pop_loop(&b, loop);
|
||||
|
||||
b.shader->scratch_size = MAX2(16, MAX_STACK_SCRATCH_ENTRY_COUNT * 4);
|
||||
if (!radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo))
|
||||
if (radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo))
|
||||
b.shader->scratch_size = 16; /* To enable scratch. */
|
||||
else
|
||||
b.shader->scratch_size += compute_rt_stack_size(pCreateInfo, stack_sizes);
|
||||
|
||||
/* Deal with all the inline functions. */
|
||||
|
@@ -522,6 +522,14 @@ insert_traversal_aabb_case(struct radv_device *device, nir_builder *b,
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
fetch_parent_node(nir_builder *b, nir_ssa_def *bvh, nir_ssa_def *node)
|
||||
{
|
||||
nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, nir_udiv_imm(b, node, 8), 4), 4);
|
||||
|
||||
return nir_build_load_global(b, 1, 32, nir_isub(b, bvh, nir_u2u64(b, offset)), .align_mul = 4);
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
||||
const struct radv_ray_traversal_args *args)
|
||||
@@ -538,19 +546,31 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
||||
{
|
||||
nir_push_if(b, nir_ieq_imm(b, nir_load_deref(b, args->vars.current_node), -1));
|
||||
{
|
||||
nir_push_if(b, nir_ilt(b, nir_load_deref(b, args->vars.stack), nir_imm_int(b, args->stack_stride)));
|
||||
/* Early exit if we never overflowed the stack, to avoid having to backtrack to
|
||||
* the root for no reason. */
|
||||
nir_push_if(b, nir_ilt(b, nir_load_deref(b, args->vars.stack),
|
||||
nir_imm_int(b, args->stack_stride)));
|
||||
{
|
||||
nir_store_var(b, incomplete, nir_imm_bool(b, false), 0x1);
|
||||
nir_jump(b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *stack_instance_exit = nir_ige(b, nir_load_deref(b, args->vars.top_stack),
|
||||
nir_load_deref(b, args->vars.stack));
|
||||
nir_ssa_def *root_instance_exit =
|
||||
nir_ieq(b, nir_load_deref(b, args->vars.previous_node),
|
||||
nir_load_deref(b, args->vars.instance_bottom_node));
|
||||
nir_if *instance_exit =
|
||||
nir_push_if(b, nir_uge(b, nir_load_deref(b, args->vars.top_stack),
|
||||
nir_load_deref(b, args->vars.stack)));
|
||||
nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit));
|
||||
instance_exit->control = nir_selection_control_dont_flatten;
|
||||
{
|
||||
nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, 0), 1);
|
||||
nir_store_deref(b, args->vars.top_stack, nir_imm_int(b, -1), 1);
|
||||
nir_store_deref(b, args->vars.previous_node,
|
||||
nir_load_deref(b, args->vars.instance_top_node), 1);
|
||||
nir_store_deref(b, args->vars.instance_bottom_node,
|
||||
nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 1);
|
||||
|
||||
nir_store_deref(b, args->vars.bvh_base, args->root_bvh_base, 1);
|
||||
nir_store_deref(b, args->vars.origin, args->origin, 7);
|
||||
nir_store_deref(b, args->vars.dir, args->dir, 7);
|
||||
@@ -558,20 +578,47 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_store_deref(b, args->vars.stack,
|
||||
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1);
|
||||
nir_push_if(b, nir_ige(b, nir_load_deref(b, args->vars.stack_base),
|
||||
nir_load_deref(b, args->vars.stack)));
|
||||
{
|
||||
nir_ssa_def *prev = nir_load_deref(b, args->vars.previous_node);
|
||||
nir_ssa_def *bvh_addr =
|
||||
build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base));
|
||||
|
||||
nir_ssa_def *bvh_node =
|
||||
args->stack_load_cb(b, nir_load_deref(b, args->vars.stack), args);
|
||||
nir_store_deref(b, args->vars.current_node, bvh_node, 0x1);
|
||||
nir_ssa_def *parent = fetch_parent_node(b, bvh_addr, prev);
|
||||
nir_push_if(b, nir_ieq(b, parent, nir_imm_int(b, -1)));
|
||||
{
|
||||
nir_store_var(b, incomplete, nir_imm_bool(b, false), 0x1);
|
||||
nir_jump(b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
nir_store_deref(b, args->vars.current_node, parent, 0x1);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_store_deref(
|
||||
b, args->vars.stack,
|
||||
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1);
|
||||
|
||||
nir_ssa_def *stack_ptr =
|
||||
nir_umod(b, nir_load_deref(b, args->vars.stack),
|
||||
nir_imm_int(b, args->stack_stride * args->stack_entries));
|
||||
nir_ssa_def *bvh_node = args->stack_load_cb(b, stack_ptr, args);
|
||||
nir_store_deref(b, args->vars.current_node, bvh_node, 0x1);
|
||||
nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, -1), 0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, -1), 0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
if (args->check_stack_overflow_cb)
|
||||
args->check_stack_overflow_cb(b, args);
|
||||
|
||||
nir_ssa_def *bvh_node = nir_load_deref(b, args->vars.current_node);
|
||||
|
||||
nir_ssa_def *prev_node = nir_load_deref(b, args->vars.previous_node);
|
||||
nir_store_deref(b, args->vars.previous_node, bvh_node, 0x1);
|
||||
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, -1), 0x1);
|
||||
|
||||
nir_ssa_def *global_bvh_node =
|
||||
@@ -625,6 +672,9 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
||||
/* Push the instance root node onto the stack */
|
||||
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_ROOT_NODE),
|
||||
0x1);
|
||||
nir_store_deref(b, args->vars.instance_bottom_node,
|
||||
nir_imm_int(b, RADV_BVH_ROOT_NODE), 1);
|
||||
nir_store_deref(b, args->vars.instance_top_node, bvh_node, 1);
|
||||
|
||||
/* Transform the ray into object space */
|
||||
nir_store_deref(b, args->vars.origin,
|
||||
@@ -654,20 +704,46 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
||||
nir_load_deref(b, args->vars.inv_dir));
|
||||
}
|
||||
|
||||
nir_ssa_def *new_nodes[4];
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
new_nodes[i] = nir_channel(b, result, i);
|
||||
/* box */
|
||||
nir_push_if(b, nir_ieq_imm(b, prev_node, -1));
|
||||
{
|
||||
nir_ssa_def *new_nodes[4];
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
new_nodes[i] = nir_channel(b, result, i);
|
||||
|
||||
for (unsigned i = 1; i < 4; ++i)
|
||||
nir_push_if(b, nir_ine_imm(b, new_nodes[i], -1));
|
||||
for (unsigned i = 1; i < 4; ++i)
|
||||
nir_push_if(b, nir_ine_imm(b, new_nodes[i], -1));
|
||||
|
||||
for (unsigned i = 4; i-- > 1;) {
|
||||
nir_ssa_def *stack = nir_load_deref(b, args->vars.stack);
|
||||
args->stack_store_cb(b, stack, new_nodes[i], args);
|
||||
nir_store_deref(b, args->vars.stack, nir_iadd_imm(b, stack, args->stack_stride), 1);
|
||||
nir_pop_if(b, NULL);
|
||||
for (unsigned i = 4; i-- > 1;) {
|
||||
nir_ssa_def *stack = nir_load_deref(b, args->vars.stack);
|
||||
nir_ssa_def *stack_ptr = nir_umod(
|
||||
b, stack, nir_imm_int(b, args->stack_entries * args->stack_stride));
|
||||
args->stack_store_cb(b, stack_ptr, new_nodes[i], args);
|
||||
nir_store_deref(b, args->vars.stack,
|
||||
nir_iadd_imm(b, stack, args->stack_stride), 1);
|
||||
|
||||
if (i == 1) {
|
||||
nir_ssa_def *new_base =
|
||||
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack),
|
||||
-args->stack_entries * args->stack_stride);
|
||||
new_base = nir_imax(b, nir_load_deref(b, args->vars.stack_base), new_base);
|
||||
nir_store_deref(b, args->vars.stack_base, new_base, 0x1);
|
||||
}
|
||||
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_store_deref(b, args->vars.current_node, new_nodes[0], 0x1);
|
||||
}
|
||||
nir_store_deref(b, args->vars.current_node, new_nodes[0], 0x1);
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_ssa_def *next = nir_imm_int(b, -1);
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
next = nir_bcsel(b, nir_ieq(b, prev_node, nir_channel(b, result, i)),
|
||||
nir_channel(b, result, i + 1), next);
|
||||
}
|
||||
nir_store_deref(b, args->vars.current_node, next, 0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
@@ -59,17 +59,6 @@ nir_ssa_def *hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, ni
|
||||
|
||||
nir_ssa_def *create_bvh_descriptor(nir_builder *b);
|
||||
|
||||
/*
|
||||
* A top-level AS can contain 2^24 children and a bottom-level AS can contain 2^24
|
||||
* triangles. At a branching factor of 4, that means we may need up to 24 levels of box
|
||||
* nodes + 1 triangle node
|
||||
* + 1 instance node. Furthermore, when processing a box node, worst case we actually
|
||||
* push all 4 children and remove one, so the DFS stack depth is box nodes * 3 + 2.
|
||||
*/
|
||||
#define MAX_STACK_ENTRY_COUNT 76
|
||||
#define MAX_STACK_LDS_ENTRY_COUNT 16
|
||||
#define MAX_STACK_SCRATCH_ENTRY_COUNT (MAX_STACK_ENTRY_COUNT - MAX_STACK_LDS_ENTRY_COUNT)
|
||||
|
||||
struct radv_ray_traversal_args;
|
||||
|
||||
struct radv_leaf_intersection {
|
||||
@@ -101,9 +90,6 @@ typedef void (*radv_rt_stack_store_cb)(nir_builder *b, nir_ssa_def *index, nir_s
|
||||
typedef nir_ssa_def *(*radv_rt_stack_load_cb)(nir_builder *b, nir_ssa_def *index,
|
||||
const struct radv_ray_traversal_args *args);
|
||||
|
||||
typedef void (*radv_rt_check_stack_overflow_cb)(nir_builder *b,
|
||||
const struct radv_ray_traversal_args *args);
|
||||
|
||||
struct radv_ray_traversal_vars {
|
||||
/* For each accepted hit, tmax will be set to the t value. This allows for automatic intersection
|
||||
* culling.
|
||||
@@ -119,13 +105,23 @@ struct radv_ray_traversal_vars {
|
||||
nir_deref_instr *bvh_base;
|
||||
|
||||
/* stack is the current stack pointer/index. top_stack is the pointer/index that marks the end of
|
||||
* traversal for the current BLAS/TLAS.
|
||||
* traversal for the current BLAS/TLAS. stack_base is the low watermark of the short stack.
|
||||
*/
|
||||
nir_deref_instr *stack;
|
||||
nir_deref_instr *top_stack;
|
||||
nir_deref_instr *stack_base;
|
||||
|
||||
nir_deref_instr *current_node;
|
||||
|
||||
/* The node visited in the previous iteration. This is used in backtracking to jump to its parent
|
||||
* and then find the child after the previously visited node.
|
||||
*/
|
||||
nir_deref_instr *previous_node;
|
||||
|
||||
/* When entering an instance these are the instance node and the root node of the BLAS */
|
||||
nir_deref_instr *instance_top_node;
|
||||
nir_deref_instr *instance_bottom_node;
|
||||
|
||||
/* Information about the current instance used for culling. */
|
||||
nir_deref_instr *instance_id;
|
||||
nir_deref_instr *instance_addr;
|
||||
@@ -143,12 +139,13 @@ struct radv_ray_traversal_args {
|
||||
|
||||
struct radv_ray_traversal_vars vars;
|
||||
|
||||
/* The increment/decrement used for radv_ray_traversal_vars::stack */
|
||||
/* The increment/decrement used for radv_ray_traversal_vars::stack, and how many entries are
|
||||
* available. */
|
||||
uint32_t stack_stride;
|
||||
uint32_t stack_entries;
|
||||
|
||||
radv_rt_stack_store_cb stack_store_cb;
|
||||
radv_rt_stack_load_cb stack_load_cb;
|
||||
radv_rt_check_stack_overflow_cb check_stack_overflow_cb;
|
||||
|
||||
radv_aabb_intersection_cb aabb_cb;
|
||||
radv_triangle_intersection_cb triangle_cb;
|
||||
@@ -156,6 +153,10 @@ struct radv_ray_traversal_args {
|
||||
void *data;
|
||||
};
|
||||
|
||||
/* For the initialization of instance_bottom_node. Explicitly different than RADV_BVH_INVALID_NODE
|
||||
* or any real node, to ensure we never exit an instance when we're not in one. */
|
||||
#define RADV_BVH_NO_INSTANCE_ROOT 0xfffffffeu
|
||||
|
||||
/* Builds the ray traversal loop and returns whether traversal is incomplete, similar to
|
||||
* rayQueryProceedEXT. Traversal will only be considered incomplete, if one of the specified
|
||||
* callbacks breaks out of the traversal loop.
|
||||
|
Reference in New Issue
Block a user