From cb261b03e5af7862f1321c778e3ad54b640226bc Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 6 Aug 2020 16:09:55 -0500 Subject: [PATCH] intel/rt: Add lowering for ray-walk intrinsics in any-hit shaders Reviewed-by: Caio Marcelo de Oliveira Filho Part-of: --- src/compiler/nir/nir_intrinsics.py | 2 + .../compiler/brw_nir_lower_rt_intrinsics.c | 19 +++ src/intel/compiler/brw_nir_rt.c | 128 ++++++++++++++++++ src/intel/compiler/brw_nir_rt_builder.h | 28 ++++ src/intel/compiler/brw_rt.h | 17 +++ 5 files changed, 194 insertions(+) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 01c37750afb..3906413aafe 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1119,3 +1119,5 @@ system_value("ray_miss_sbt_addr_intel", 1, bit_sizes=[64]) system_value("ray_miss_sbt_stride_intel", 1, bit_sizes=[16]) system_value("callable_sbt_addr_intel", 1, bit_sizes=[64]) system_value("callable_sbt_stride_intel", 1, bit_sizes=[16]) +system_value("leaf_opaque_intel", 1, bit_sizes=[1]) +system_value("leaf_procedural_intel", 1, bit_sizes=[1]) diff --git a/src/intel/compiler/brw_nir_lower_rt_intrinsics.c b/src/intel/compiler/brw_nir_lower_rt_intrinsics.c index a5fed61ef59..11fa8e49c07 100644 --- a/src/intel/compiler/brw_nir_lower_rt_intrinsics.c +++ b/src/intel/compiler/brw_nir_lower_rt_intrinsics.c @@ -292,6 +292,25 @@ lower_rt_intrinsics_impl(nir_function_impl *impl, sysval = globals.resume_sbt_addr; break; + case nir_intrinsic_load_leaf_procedural_intel: + sysval = build_leaf_is_procedural(b, &hit_in); + break; + + case nir_intrinsic_load_leaf_opaque_intel: { + if (stage == MESA_SHADER_INTERSECTION) { + /* In intersection shaders, the opaque bit is passed to us in + * the front_face bit. + */ + sysval = hit_in.front_face; + } else { + nir_ssa_def *flags_dw = + nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4, + 1, 32); + sysval = nir_i2b(b, nir_iand_imm(b, flags_dw, 1u << 30)); + } + break; + } + default: continue; } diff --git a/src/intel/compiler/brw_nir_rt.c b/src/intel/compiler/brw_nir_rt.c index 0c905a29f28..0467edab27a 100644 --- a/src/intel/compiler/brw_nir_rt.c +++ b/src/intel/compiler/brw_nir_rt.c @@ -187,6 +187,133 @@ lower_rt_io_and_scratch(nir_shader *nir) nir_address_format_64bit_global); } +static void +build_terminate_ray(nir_builder *b) +{ + nir_ssa_def *skip_closest_hit = + nir_i2b(b, nir_iand_imm(b, nir_load_ray_flags(b), + BRW_RT_RAY_FLAG_SKIP_CLOSEST_HIT_SHADER)); + nir_push_if(b, skip_closest_hit); + { + /* The shader that calls traceRay() is unable to access any ray hit + * information except for that which is explicitly written into the ray + * payload by shaders invoked during the trace. If there's no closest- + * hit shader, then accepting the hit has no observable effect; it's + * just extra memory traffic for no reason. + */ + brw_nir_btd_return(b); + nir_jump(b, nir_jump_halt); + } + nir_push_else(b, NULL); + { + /* The closest hit shader is in the same shader group as the any-hit + * shader that we're currently in. We can get the address for its SBT + * handle by looking at the shader record pointer and subtracting the + * size of a SBT handle. The BINDLESS_SHADER_RECORD for a closest hit + * shader is the first one in the SBT handle. + */ + nir_ssa_def *closest_hit = + nir_iadd_imm(b, nir_load_shader_record_ptr(b), + -BRW_RT_SBT_HANDLE_SIZE); + + brw_nir_rt_commit_hit(b); + brw_nir_btd_spawn(b, closest_hit); + nir_jump(b, nir_jump_halt); + } + nir_pop_if(b, NULL); +} + +/** Lowers away ray walk intrinsics + * + * This lowers terminate_ray, ignore_ray_intersection, and the NIR-specific + * accept_ray_intersection intrinsics to the appropriate Intel-specific + * intrinsics. + */ +static bool +lower_ray_walk_intrinsics(nir_shader *shader, + const struct gen_device_info *devinfo) +{ + assert(shader->info.stage == MESA_SHADER_ANY_HIT || + shader->info.stage == MESA_SHADER_INTERSECTION); + + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + + nir_builder b; + nir_builder_init(&b, impl); + + bool progress = false; + nir_foreach_block_safe(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_ignore_ray_intersection: { + b.cursor = nir_instr_remove(&intrin->instr); + + /* We put the newly emitted code inside a dummy if because it's + * going to contain a jump instruction and we don't want to deal + * with that mess here. It'll get dealt with by our control-flow + * optimization passes. + */ + nir_push_if(&b, nir_imm_true(&b)); + nir_intrinsic_instr *ray_continue = + nir_intrinsic_instr_create(b.shader, + nir_intrinsic_trace_ray_continue_intel); + nir_builder_instr_insert(&b, &ray_continue->instr); + nir_jump(&b, nir_jump_halt); + nir_pop_if(&b, NULL); + progress = true; + break; + } + + case nir_intrinsic_accept_ray_intersection: { + b.cursor = nir_instr_remove(&intrin->instr); + + nir_ssa_def *terminate = + nir_i2b(&b, nir_iand_imm(&b, nir_load_ray_flags(&b), + BRW_RT_RAY_FLAG_TERMINATE_ON_FIRST_HIT)); + nir_push_if(&b, terminate); + { + build_terminate_ray(&b); + } + nir_push_else(&b, NULL); + { + nir_intrinsic_instr *ray_commit = + nir_intrinsic_instr_create(b.shader, + nir_intrinsic_trace_ray_commit_intel); + nir_builder_instr_insert(&b, &ray_commit->instr); + nir_jump(&b, nir_jump_halt); + } + nir_pop_if(&b, NULL); + progress = true; + break; + } + + case nir_intrinsic_terminate_ray: { + b.cursor = nir_instr_remove(&intrin->instr); + build_terminate_ray(&b); + progress = true; + break; + } + + default: + break; + } + } + } + + if (progress) { + nir_metadata_preserve(impl, nir_metadata_none); + } else { + nir_metadata_preserve(impl, nir_metadata_all); + } + + return progress; +} + void brw_nir_lower_raygen(nir_shader *nir) { @@ -200,6 +327,7 @@ brw_nir_lower_any_hit(nir_shader *nir, const struct gen_device_info *devinfo) { assert(nir->info.stage == MESA_SHADER_ANY_HIT); NIR_PASS_V(nir, brw_nir_lower_shader_returns); + NIR_PASS_V(nir, lower_ray_walk_intrinsics, devinfo); lower_rt_io_and_scratch(nir); } diff --git a/src/intel/compiler/brw_nir_rt_builder.h b/src/intel/compiler/brw_nir_rt_builder.h index 4f15234978f..ec41ab8524e 100644 --- a/src/intel/compiler/brw_nir_rt_builder.h +++ b/src/intel/compiler/brw_nir_rt_builder.h @@ -341,6 +341,34 @@ brw_nir_rt_load_mem_hit(nir_builder *b, brw_nir_rt_unpack_leaf_ptr(b, nir_channels(b, data, 0x3 << 2)); } +static inline void +brw_nir_memcpy_global(nir_builder *b, + nir_ssa_def *dst_addr, uint32_t dst_align, + nir_ssa_def *src_addr, uint32_t src_align, + uint32_t size) +{ + /* We're going to copy in 16B chunks */ + assert(size % 16 == 0); + dst_align = MIN2(dst_align, 16); + src_align = MIN2(src_align, 16); + + for (unsigned offset = 0; offset < size; offset += 16) { + nir_ssa_def *data = + nir_load_global(b, nir_iadd_imm(b, src_addr, offset), src_align, + 4, 32); + nir_store_global(b, nir_iadd_imm(b, dst_addr, offset), dst_align, + data, 0xf /* write_mask */); + } +} + +static inline void +brw_nir_rt_commit_hit(nir_builder *b) +{ + brw_nir_memcpy_global(b, brw_nir_rt_mem_hit_addr(b, true), 16, + brw_nir_rt_mem_hit_addr(b, false), 16, + BRW_RT_SIZEOF_HIT_INFO); +} + struct brw_nir_rt_mem_ray_defs { nir_ssa_def *orig; nir_ssa_def *dir; diff --git a/src/intel/compiler/brw_rt.h b/src/intel/compiler/brw_rt.h index 10e54e23ced..330abf4dd16 100644 --- a/src/intel/compiler/brw_rt.h +++ b/src/intel/compiler/brw_rt.h @@ -59,6 +59,23 @@ enum brw_rt_hit_kind { BRW_RT_HIT_KIND_BACK_FACE = 0xff, }; +/** Ray flags + * + * This enum must match the SPIR-V RayFlags enum. + */ +enum brw_rt_ray_flags { + BRW_RT_RAY_FLAG_FORCE_OPAQUE = 0x01, + BRW_RT_RAY_FLAG_FORCE_NON_OPAQUE = 0x02, + BRW_RT_RAY_FLAG_TERMINATE_ON_FIRST_HIT = 0x04, + BRW_RT_RAY_FLAG_SKIP_CLOSEST_HIT_SHADER = 0x08, + BRW_RT_RAY_FLAG_CULL_BACK_FACING_TRIANGLES = 0x10, + BRW_RT_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES = 0x20, + BRW_RT_RAY_FLAG_CULL_OPAQUE = 0x40, + BRW_RT_RAY_FLAG_CULL_NON_OPAQUE = 0x80, + BRW_RT_RAY_FLAG_SKIP_TRIANGLES = 0x100, + BRW_RT_RAY_FLAG_SKIP_AABBS = 0x200, +}; + struct brw_rt_scratch_layout { /** Number of stack IDs per DSS */ uint32_t stack_ids_per_dss;