diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources index b6d2c760b08..3459161e91f 100644 --- a/src/intel/Makefile.sources +++ b/src/intel/Makefile.sources @@ -102,6 +102,7 @@ COMPILER_FILES = \ compiler/brw_reg.h \ compiler/brw_reg_type.c \ compiler/brw_reg_type.h \ + compiler/brw_rt.h \ compiler/brw_schedule_instructions.cpp \ compiler/brw_shader.cpp \ compiler/brw_shader.h \ diff --git a/src/intel/compiler/brw_rt.h b/src/intel/compiler/brw_rt.h new file mode 100644 index 00000000000..00b1aa517fc --- /dev/null +++ b/src/intel/compiler/brw_rt.h @@ -0,0 +1,163 @@ +/* + * Copyright © 2020 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BRW_RT_H +#define BRW_RT_H + +#ifdef __cplusplus +extern "C" { +#endif + +/** Vulkan defines shaderGroupHandleSize = 32 */ +#define BRW_RT_SBT_HANDLE_SIZE 32 + +/* Vulkan always uses exactly two levels of BVH: world and object. At the API + * level, these are referred to as top and bottom. + */ +enum brw_rt_bvh_level { + BRW_RT_BVH_LEVEL_WORLD = 0, + BRW_RT_BVH_LEVEL_OBJECT = 1, +}; +#define BRW_RT_MAX_BVH_LEVELS 2 + +struct brw_rt_scratch_layout { + /** Number of stack IDs per DSS */ + uint32_t stack_ids_per_dss; + + /** Start offset (in bytes) of the hardware MemRay stack */ + uint32_t ray_stack_start; + + /** Stride (in bytes) of the hardware MemRay stack */ + uint32_t ray_stack_stride; + + /** Start offset (in bytes) of the SW stacks */ + uint64_t sw_stack_start; + + /** Size (in bytes) of the SW stack for a single shader invocation */ + uint32_t sw_stack_size; + + /** Total size (in bytes) of the RT scratch memory area */ + uint64_t total_size; +}; + +/** Size of the "hot zone" in bytes + * + * The hot zone is a SW-defined data structure which is a single uvec4 + * containing two bits of information: + * + * - hotzone.x: Stack offset (in bytes) + * + * This is the offset (in bytes) into the per-thread scratch space at which + * the current shader's stack starts. This is incremented by the calling + * shader prior to any shader call type instructions and gets decremented + * by the resume shader as part of completing the return operation. + * + * + * - hotzone.yzw: The launch ID associated with the current thread + * + * Inside a bindless shader, the only information we have is the DSS ID + * from the hardware EU and a per-DSS stack ID. In particular, the three- + * dimensional launch ID is lost the moment we leave the raygen trampoline. + */ +#define BRW_RT_SIZEOF_HOTZONE 16 + +/* From the BSpec "Address Computation for Memory Based Data Structures: + * Ray and TraversalStack (Async Ray Tracing)": + * + * sizeof(Ray) = 64B, sizeof(HitInfo) = 32B, sizeof(TravStack) = 32B. + */ +#define BRW_RT_SIZEOF_RAY 64 +#define BRW_RT_SIZEOF_HIT_INFO 32 +#define BRW_RT_SIZEOF_TRAV_STACK 32 + +/* From the BSpec: + * + * syncStackSize = (maxBVHLevels % 2 == 1) ? + * (sizeof(HitInfo) * 2 + + * (sizeof(Ray) + sizeof(TravStack)) * maxBVHLevels + 32B) : + * (sizeof(HitInfo) * 2 + + * (sizeof(Ray) + sizeof(TravStack)) * maxBVHLevels); + * + * The select is just to align to 64B. + */ +#define BRW_RT_SIZEOF_RAY_QUERY \ + (BRW_RT_SIZEOF_HIT_INFO * 2 + \ + (BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS + \ + (BRW_RT_MAX_BVH_LEVELS % 2 ? 32 : 0)) + +#define BRW_RT_SIZEOF_HW_STACK \ + (BRW_RT_SIZEOF_HIT_INFO * 2 + \ + BRW_RT_SIZEOF_RAY * BRW_RT_MAX_BVH_LEVELS + \ + BRW_RT_SIZEOF_TRAV_STACK * BRW_RT_MAX_BVH_LEVELS) + +/* This is a mesa-defined region for hit attribute data */ +#define BRW_RT_SIZEOF_HIT_ATTRIB_DATA 64 +#define BRW_RT_OFFSETOF_HIT_ATTRIB_DATA BRW_RT_SIZEOF_HW_STACK + +#define BRW_RT_ASYNC_STACK_STRIDE \ + ALIGN(BRW_RT_OFFSETOF_HIT_ATTRIB_DATA + \ + BRW_RT_SIZEOF_HIT_ATTRIB_DATA, 64) + +static inline void +brw_rt_compute_scratch_layout(struct brw_rt_scratch_layout *layout, + const struct gen_device_info *devinfo, + uint32_t stack_ids_per_dss, + uint32_t sw_stack_size) +{ + layout->stack_ids_per_dss = stack_ids_per_dss; + + const uint32_t dss_count = gen_device_info_num_dual_subslices(devinfo); + const uint32_t num_stack_ids = dss_count * stack_ids_per_dss; + + uint64_t size = 0; + + /* The first thing in our scratch area is an array of "hot zones" which + * store the stack offset as well as the launch IDs for each active + * invocation. + */ + size += BRW_RT_SIZEOF_HOTZONE * num_stack_ids; + + /* Next, we place the HW ray stacks */ + assert(size % 64 == 0); /* Cache-line aligned */ + assert(size < UINT32_MAX); + layout->ray_stack_start = size; + layout->ray_stack_stride = BRW_RT_ASYNC_STACK_STRIDE; + size += num_stack_ids * layout->ray_stack_stride; + + /* Finally, we place the SW stacks for the individual ray-tracing shader + * invocations. We align these to 64B to ensure that we don't have any + * shared cache lines which could hurt performance. + */ + assert(size % 64 == 0); + layout->sw_stack_start = size; + layout->sw_stack_size = ALIGN(sw_stack_size, 64); + size += num_stack_ids * layout->sw_stack_size; + + layout->total_size = size; +} + +#ifdef __cplusplus +} +#endif + +#endif /* BRW_RT_H */ diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index 8a7279b181f..d554ea06bf8 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -94,6 +94,7 @@ libintel_compiler_files = files( 'brw_reg.h', 'brw_reg_type.c', 'brw_reg_type.h', + 'brw_rt.h', 'brw_schedule_instructions.cpp', 'brw_shader.cpp', 'brw_shader.h', diff --git a/src/intel/dev/gen_device_info.h b/src/intel/dev/gen_device_info.h index b7b7df58135..ee27c5f8b43 100644 --- a/src/intel/dev/gen_device_info.h +++ b/src/intel/dev/gen_device_info.h @@ -294,6 +294,12 @@ gen_device_info_eu_available(const struct gen_device_info *devinfo, return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0; } +static inline unsigned +gen_device_info_num_dual_subslices(const struct gen_device_info *devinfo) +{ + unreachable("TODO"); +} + int gen_device_name_to_pci_device_id(const char *name); const char *gen_get_device_name(int devid);