intel/rt: Add lowering for ray-walk intrinsics in any-hit shaders
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7356>
This commit is contained in:

committed by
Marge Bot

parent
c3ddefa000
commit
cb261b03e5
@@ -1119,3 +1119,5 @@ system_value("ray_miss_sbt_addr_intel", 1, bit_sizes=[64])
|
||||
system_value("ray_miss_sbt_stride_intel", 1, bit_sizes=[16])
|
||||
system_value("callable_sbt_addr_intel", 1, bit_sizes=[64])
|
||||
system_value("callable_sbt_stride_intel", 1, bit_sizes=[16])
|
||||
system_value("leaf_opaque_intel", 1, bit_sizes=[1])
|
||||
system_value("leaf_procedural_intel", 1, bit_sizes=[1])
|
||||
|
@@ -292,6 +292,25 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
|
||||
sysval = globals.resume_sbt_addr;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_leaf_procedural_intel:
|
||||
sysval = build_leaf_is_procedural(b, &hit_in);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_leaf_opaque_intel: {
|
||||
if (stage == MESA_SHADER_INTERSECTION) {
|
||||
/* In intersection shaders, the opaque bit is passed to us in
|
||||
* the front_face bit.
|
||||
*/
|
||||
sysval = hit_in.front_face;
|
||||
} else {
|
||||
nir_ssa_def *flags_dw =
|
||||
nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
|
||||
1, 32);
|
||||
sysval = nir_i2b(b, nir_iand_imm(b, flags_dw, 1u << 30));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
@@ -187,6 +187,133 @@ lower_rt_io_and_scratch(nir_shader *nir)
|
||||
nir_address_format_64bit_global);
|
||||
}
|
||||
|
||||
static void
|
||||
build_terminate_ray(nir_builder *b)
|
||||
{
|
||||
nir_ssa_def *skip_closest_hit =
|
||||
nir_i2b(b, nir_iand_imm(b, nir_load_ray_flags(b),
|
||||
BRW_RT_RAY_FLAG_SKIP_CLOSEST_HIT_SHADER));
|
||||
nir_push_if(b, skip_closest_hit);
|
||||
{
|
||||
/* The shader that calls traceRay() is unable to access any ray hit
|
||||
* information except for that which is explicitly written into the ray
|
||||
* payload by shaders invoked during the trace. If there's no closest-
|
||||
* hit shader, then accepting the hit has no observable effect; it's
|
||||
* just extra memory traffic for no reason.
|
||||
*/
|
||||
brw_nir_btd_return(b);
|
||||
nir_jump(b, nir_jump_halt);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
/* The closest hit shader is in the same shader group as the any-hit
|
||||
* shader that we're currently in. We can get the address for its SBT
|
||||
* handle by looking at the shader record pointer and subtracting the
|
||||
* size of a SBT handle. The BINDLESS_SHADER_RECORD for a closest hit
|
||||
* shader is the first one in the SBT handle.
|
||||
*/
|
||||
nir_ssa_def *closest_hit =
|
||||
nir_iadd_imm(b, nir_load_shader_record_ptr(b),
|
||||
-BRW_RT_SBT_HANDLE_SIZE);
|
||||
|
||||
brw_nir_rt_commit_hit(b);
|
||||
brw_nir_btd_spawn(b, closest_hit);
|
||||
nir_jump(b, nir_jump_halt);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
/** Lowers away ray walk intrinsics
|
||||
*
|
||||
* This lowers terminate_ray, ignore_ray_intersection, and the NIR-specific
|
||||
* accept_ray_intersection intrinsics to the appropriate Intel-specific
|
||||
* intrinsics.
|
||||
*/
|
||||
static bool
|
||||
lower_ray_walk_intrinsics(nir_shader *shader,
|
||||
const struct gen_device_info *devinfo)
|
||||
{
|
||||
assert(shader->info.stage == MESA_SHADER_ANY_HIT ||
|
||||
shader->info.stage == MESA_SHADER_INTERSECTION);
|
||||
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
||||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, impl);
|
||||
|
||||
bool progress = false;
|
||||
nir_foreach_block_safe(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_ignore_ray_intersection: {
|
||||
b.cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
/* We put the newly emitted code inside a dummy if because it's
|
||||
* going to contain a jump instruction and we don't want to deal
|
||||
* with that mess here. It'll get dealt with by our control-flow
|
||||
* optimization passes.
|
||||
*/
|
||||
nir_push_if(&b, nir_imm_true(&b));
|
||||
nir_intrinsic_instr *ray_continue =
|
||||
nir_intrinsic_instr_create(b.shader,
|
||||
nir_intrinsic_trace_ray_continue_intel);
|
||||
nir_builder_instr_insert(&b, &ray_continue->instr);
|
||||
nir_jump(&b, nir_jump_halt);
|
||||
nir_pop_if(&b, NULL);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_accept_ray_intersection: {
|
||||
b.cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_ssa_def *terminate =
|
||||
nir_i2b(&b, nir_iand_imm(&b, nir_load_ray_flags(&b),
|
||||
BRW_RT_RAY_FLAG_TERMINATE_ON_FIRST_HIT));
|
||||
nir_push_if(&b, terminate);
|
||||
{
|
||||
build_terminate_ray(&b);
|
||||
}
|
||||
nir_push_else(&b, NULL);
|
||||
{
|
||||
nir_intrinsic_instr *ray_commit =
|
||||
nir_intrinsic_instr_create(b.shader,
|
||||
nir_intrinsic_trace_ray_commit_intel);
|
||||
nir_builder_instr_insert(&b, &ray_commit->instr);
|
||||
nir_jump(&b, nir_jump_halt);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_terminate_ray: {
|
||||
b.cursor = nir_instr_remove(&intrin->instr);
|
||||
build_terminate_ray(&b);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (progress) {
|
||||
nir_metadata_preserve(impl, nir_metadata_none);
|
||||
} else {
|
||||
nir_metadata_preserve(impl, nir_metadata_all);
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
void
|
||||
brw_nir_lower_raygen(nir_shader *nir)
|
||||
{
|
||||
@@ -200,6 +327,7 @@ brw_nir_lower_any_hit(nir_shader *nir, const struct gen_device_info *devinfo)
|
||||
{
|
||||
assert(nir->info.stage == MESA_SHADER_ANY_HIT);
|
||||
NIR_PASS_V(nir, brw_nir_lower_shader_returns);
|
||||
NIR_PASS_V(nir, lower_ray_walk_intrinsics, devinfo);
|
||||
lower_rt_io_and_scratch(nir);
|
||||
}
|
||||
|
||||
|
@@ -341,6 +341,34 @@ brw_nir_rt_load_mem_hit(nir_builder *b,
|
||||
brw_nir_rt_unpack_leaf_ptr(b, nir_channels(b, data, 0x3 << 2));
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_memcpy_global(nir_builder *b,
|
||||
nir_ssa_def *dst_addr, uint32_t dst_align,
|
||||
nir_ssa_def *src_addr, uint32_t src_align,
|
||||
uint32_t size)
|
||||
{
|
||||
/* We're going to copy in 16B chunks */
|
||||
assert(size % 16 == 0);
|
||||
dst_align = MIN2(dst_align, 16);
|
||||
src_align = MIN2(src_align, 16);
|
||||
|
||||
for (unsigned offset = 0; offset < size; offset += 16) {
|
||||
nir_ssa_def *data =
|
||||
nir_load_global(b, nir_iadd_imm(b, src_addr, offset), src_align,
|
||||
4, 32);
|
||||
nir_store_global(b, nir_iadd_imm(b, dst_addr, offset), dst_align,
|
||||
data, 0xf /* write_mask */);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_rt_commit_hit(nir_builder *b)
|
||||
{
|
||||
brw_nir_memcpy_global(b, brw_nir_rt_mem_hit_addr(b, true), 16,
|
||||
brw_nir_rt_mem_hit_addr(b, false), 16,
|
||||
BRW_RT_SIZEOF_HIT_INFO);
|
||||
}
|
||||
|
||||
struct brw_nir_rt_mem_ray_defs {
|
||||
nir_ssa_def *orig;
|
||||
nir_ssa_def *dir;
|
||||
|
@@ -59,6 +59,23 @@ enum brw_rt_hit_kind {
|
||||
BRW_RT_HIT_KIND_BACK_FACE = 0xff,
|
||||
};
|
||||
|
||||
/** Ray flags
|
||||
*
|
||||
* This enum must match the SPIR-V RayFlags enum.
|
||||
*/
|
||||
enum brw_rt_ray_flags {
|
||||
BRW_RT_RAY_FLAG_FORCE_OPAQUE = 0x01,
|
||||
BRW_RT_RAY_FLAG_FORCE_NON_OPAQUE = 0x02,
|
||||
BRW_RT_RAY_FLAG_TERMINATE_ON_FIRST_HIT = 0x04,
|
||||
BRW_RT_RAY_FLAG_SKIP_CLOSEST_HIT_SHADER = 0x08,
|
||||
BRW_RT_RAY_FLAG_CULL_BACK_FACING_TRIANGLES = 0x10,
|
||||
BRW_RT_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES = 0x20,
|
||||
BRW_RT_RAY_FLAG_CULL_OPAQUE = 0x40,
|
||||
BRW_RT_RAY_FLAG_CULL_NON_OPAQUE = 0x80,
|
||||
BRW_RT_RAY_FLAG_SKIP_TRIANGLES = 0x100,
|
||||
BRW_RT_RAY_FLAG_SKIP_AABBS = 0x200,
|
||||
};
|
||||
|
||||
struct brw_rt_scratch_layout {
|
||||
/** Number of stack IDs per DSS */
|
||||
uint32_t stack_ids_per_dss;
|
||||
|
Reference in New Issue
Block a user