intel/fs: lower ray query intrinsics

v2: Add helper for acceleration->root_node computation (Caio) v3: Update comment on "done" bit (Caio) Remove progress bool value for impl function (Caio) Don't use nir_shader_instructions_pass to search the shader (Caio) v4: Rename variable for if/else block (Caio) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13719>
2021-06-21 13:44:53 +03:00
parent 712d8fb043
commit c78be5da30
9 changed files with 668 additions and 6 deletions
--- a/src/intel/compiler/brw_rt.h
+++ b/src/intel/compiler/brw_rt.h
@@ -31,6 +31,9 @@ extern "C" {
 /** Vulkan defines shaderGroupHandleSize = 32 */
 #define BRW_RT_SBT_HANDLE_SIZE 32

+/** RT_DISPATCH_GLOBALS size (see gen_rt.xml) */
+#define BRW_RT_DISPATCH_GLOBALS_SIZE 80
+
 /** Offset after the RT dispatch globals at which "push" constants live */
 #define BRW_RT_PUSH_CONST_OFFSET 128

@@ -177,6 +180,10 @@ struct brw_rt_raygen_trampoline_params {
    (BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS + \
    (BRW_RT_MAX_BVH_LEVELS % 2 ? 32 : 0))

+#define BRW_RT_SIZEOF_SHADOW_RAY_QUERY  \
+   (BRW_RT_SIZEOF_HIT_INFO * 2 + \
+    (BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS)
+
 #define BRW_RT_SIZEOF_HW_STACK \
   (BRW_RT_SIZEOF_HIT_INFO * 2 + \
    BRW_RT_SIZEOF_RAY * BRW_RT_MAX_BVH_LEVELS + \
@@ -228,6 +235,39 @@ brw_rt_compute_scratch_layout(struct brw_rt_scratch_layout *layout,
   layout->total_size = size;
 }

+static inline uint32_t
+brw_rt_ray_queries_hw_stacks_size(const struct intel_device_info *devinfo)
+{
+   /* Maximum slice/subslice/EU ID can be computed from the max_scratch_ids
+    * which includes all the threads.
+    */
+   uint32_t max_eu_id = devinfo->max_scratch_ids[MESA_SHADER_COMPUTE];
+   uint32_t max_simd_size = 16; /* Cannot run in SIMD32 with ray queries */
+   return max_eu_id * max_simd_size * BRW_RT_SIZEOF_RAY_QUERY;
+}
+
+static inline uint32_t
+brw_rt_ray_queries_shadow_stack_size(const struct intel_device_info *devinfo)
+{
+   /* Maximum slice/subslice/EU ID can be computed from the max_scratch_ids
+    * which includes all the threads.
+    */
+   uint32_t max_eu_id = devinfo->max_scratch_ids[MESA_SHADER_COMPUTE];
+   uint32_t max_simd_size = 16; /* Cannot run in SIMD32 with ray queries */
+   return max_eu_id * max_simd_size * BRW_RT_SIZEOF_SHADOW_RAY_QUERY;
+}
+
+static inline uint32_t
+brw_rt_ray_queries_shadow_stacks_size(const struct intel_device_info *devinfo,
+                                      uint32_t ray_queries)
+{
+   /* Don't bother a shadow stack if we only have a single query. We can
+    * directly write in the HW buffer.
+    */
+   return (ray_queries > 1 ? ray_queries : 0) * brw_rt_ray_queries_shadow_stack_size(devinfo) +
+          ray_queries * 4; /* Ctrl + Level data */
+}
+
 #ifdef __cplusplus
 }
 #endif