
Each callable ray-tracing shader shader stage has to perform a return operation at the end. In the case of raygen shaders, it retires the bindless thread because the raygen shader is always the root of the call tree. In the case of any-hit shaders, the default action is accep the hit. For callable, miss, and closest-hit shaders, it does a return operation. The assumption is that the calling shader has placed a BINDLESS_SHADER_RECORD address for the return in the first QWord of the callee's scratch space. The return operation simply loads this value and calls a btd_spawn intrinsic to jump to it. Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7356>
506 lines
19 KiB
C
506 lines
19 KiB
C
/*
|
|
* Copyright © 2020 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#ifndef BRW_NIR_RT_BUILDER_H
|
|
#define BRW_NIR_RT_BUILDER_H
|
|
|
|
#include "brw_rt.h"
|
|
#include "nir_builder.h"
|
|
|
|
static nir_ssa_def *
|
|
nir_load_global_const_block_intel(nir_builder *b, nir_ssa_def *addr,
|
|
unsigned num_components)
|
|
{
|
|
nir_intrinsic_instr *load =
|
|
nir_intrinsic_instr_create(b->shader,
|
|
nir_intrinsic_load_global_const_block_intel);
|
|
load->src[0] = nir_src_for_ssa(addr);
|
|
load->num_components = num_components;
|
|
nir_ssa_dest_init(&load->instr, &load->dest, num_components, 32, NULL);
|
|
nir_builder_instr_insert(b, &load->instr);
|
|
return &load->dest.ssa;
|
|
}
|
|
|
|
/* We have our own load/store scratch helpers because they emit a global
|
|
* memory read or write based on the scratch_base_ptr system value rather
|
|
* than a load/store_scratch intrinsic.
|
|
*/
|
|
static inline nir_ssa_def *
|
|
brw_nir_rt_load_scratch(nir_builder *b, uint32_t offset, unsigned align,
|
|
unsigned num_components, unsigned bit_size)
|
|
{
|
|
nir_ssa_def *addr =
|
|
nir_iadd_imm(b, nir_load_scratch_base_ptr(b, 1, 1, 64), offset);
|
|
return nir_load_global(b, addr, MIN2(align, BRW_BTD_STACK_ALIGN),
|
|
num_components, bit_size);
|
|
}
|
|
|
|
static inline void
|
|
brw_nir_rt_store_scratch(nir_builder *b, uint32_t offset, unsigned align,
|
|
nir_ssa_def *value, nir_component_mask_t write_mask)
|
|
{
|
|
nir_ssa_def *addr =
|
|
nir_iadd_imm(b, nir_load_scratch_base_ptr(b, 1, 1, 64), offset);
|
|
nir_store_global(b, addr, MIN2(align, BRW_BTD_STACK_ALIGN),
|
|
value, write_mask);
|
|
}
|
|
|
|
static inline void
|
|
nir_accept_ray_intersection(nir_builder *b)
|
|
{
|
|
nir_intrinsic_instr *accept =
|
|
nir_intrinsic_instr_create(b->shader,
|
|
nir_intrinsic_accept_ray_intersection);
|
|
nir_builder_instr_insert(b, &accept->instr);
|
|
}
|
|
|
|
static inline void
|
|
brw_nir_btd_spawn(nir_builder *b, nir_ssa_def *record_addr)
|
|
{
|
|
nir_intrinsic_instr *spawn =
|
|
nir_intrinsic_instr_create(b->shader,
|
|
nir_intrinsic_btd_spawn_intel);
|
|
spawn->src[0] = nir_src_for_ssa(nir_load_btd_global_arg_addr_intel(b));
|
|
spawn->src[1] = nir_src_for_ssa(record_addr);
|
|
nir_builder_instr_insert(b, &spawn->instr);
|
|
}
|
|
|
|
static inline void
|
|
brw_nir_btd_retire(nir_builder *b)
|
|
{
|
|
nir_intrinsic_instr *retire =
|
|
nir_intrinsic_instr_create(b->shader,
|
|
nir_intrinsic_btd_retire_intel);
|
|
nir_builder_instr_insert(b, &retire->instr);
|
|
}
|
|
|
|
/** This is a pseudo-op which does a bindless return
|
|
*
|
|
* It loads the return address from the stack and calls btd_spawn to spawn the
|
|
* resume shader.
|
|
*/
|
|
static inline void
|
|
brw_nir_btd_return(struct nir_builder *b)
|
|
{
|
|
assert(b->shader->scratch_size == BRW_BTD_STACK_CALLEE_DATA_SIZE);
|
|
nir_ssa_def *resume_addr =
|
|
brw_nir_rt_load_scratch(b, BRW_BTD_STACK_RESUME_BSR_ADDR_OFFSET,
|
|
8 /* align */, 1, 64);
|
|
brw_nir_btd_spawn(b, resume_addr);
|
|
}
|
|
|
|
static inline void
|
|
assert_def_size(nir_ssa_def *def, unsigned num_components, unsigned bit_size)
|
|
{
|
|
assert(def->num_components == num_components);
|
|
assert(def->bit_size == bit_size);
|
|
}
|
|
|
|
static inline nir_ssa_def *
|
|
brw_nir_num_rt_stacks(nir_builder *b,
|
|
const struct gen_device_info *devinfo)
|
|
{
|
|
return nir_imul_imm(b, nir_load_ray_num_dss_rt_stacks_intel(b),
|
|
gen_device_info_num_dual_subslices(devinfo));
|
|
}
|
|
|
|
static inline nir_ssa_def *
|
|
brw_nir_rt_stack_id(nir_builder *b)
|
|
{
|
|
return nir_iadd(b, nir_umul_32x16(b, nir_load_ray_num_dss_rt_stacks_intel(b),
|
|
nir_load_btd_dss_id_intel(b)),
|
|
nir_load_btd_stack_id_intel(b));
|
|
}
|
|
|
|
static inline nir_ssa_def *
|
|
brw_nir_rt_sw_hotzone_addr(nir_builder *b,
|
|
const struct gen_device_info *devinfo)
|
|
{
|
|
nir_ssa_def *offset32 =
|
|
nir_imul_imm(b, brw_nir_rt_stack_id(b), BRW_RT_SIZEOF_HOTZONE);
|
|
|
|
offset32 = nir_iadd(b, offset32, nir_ineg(b,
|
|
nir_imul_imm(b, brw_nir_num_rt_stacks(b, devinfo),
|
|
BRW_RT_SIZEOF_HOTZONE)));
|
|
|
|
return nir_iadd(b, nir_load_ray_base_mem_addr_intel(b),
|
|
nir_i2i64(b, offset32));
|
|
}
|
|
|
|
static inline nir_ssa_def *
|
|
brw_nir_rt_ray_addr(nir_builder *b)
|
|
{
|
|
/* From the BSpec "Address Computation for Memory Based Data Structures:
|
|
* Ray and TraversalStack (Async Ray Tracing)":
|
|
*
|
|
* stackBase = RTDispatchGlobals.rtMemBasePtr
|
|
* + (DSSID * RTDispatchGlobals.numDSSRTStacks + stackID)
|
|
* * RTDispatchGlobals.stackSizePerRay // 64B aligned
|
|
*
|
|
* We assume that we can calculate a 32-bit offset first and then add it
|
|
* to the 64-bit base address at the end.
|
|
*/
|
|
nir_ssa_def *offset32 =
|
|
nir_imul(b, brw_nir_rt_stack_id(b),
|
|
nir_load_ray_hw_stack_size_intel(b));
|
|
return nir_iadd(b, nir_load_ray_base_mem_addr_intel(b),
|
|
nir_u2u64(b, offset32));
|
|
}
|
|
|
|
static inline nir_ssa_def *
|
|
brw_nir_rt_mem_hit_addr(nir_builder *b, bool committed)
|
|
{
|
|
return nir_iadd_imm(b, brw_nir_rt_ray_addr(b),
|
|
committed ? 0 : BRW_RT_SIZEOF_HIT_INFO);
|
|
}
|
|
|
|
static inline nir_ssa_def *
|
|
brw_nir_rt_hit_attrib_data_addr(nir_builder *b)
|
|
{
|
|
return nir_iadd_imm(b, brw_nir_rt_ray_addr(b),
|
|
BRW_RT_OFFSETOF_HIT_ATTRIB_DATA);
|
|
}
|
|
|
|
static inline nir_ssa_def *
|
|
brw_nir_rt_mem_ray_addr(nir_builder *b,
|
|
enum brw_rt_bvh_level bvh_level)
|
|
{
|
|
/* From the BSpec "Address Computation for Memory Based Data Structures:
|
|
* Ray and TraversalStack (Async Ray Tracing)":
|
|
*
|
|
* rayBase = stackBase + sizeof(HitInfo) * 2 // 64B aligned
|
|
* rayPtr = rayBase + bvhLevel * sizeof(Ray); // 64B aligned
|
|
*
|
|
* In Vulkan, we always have exactly two levels of BVH: World and Object.
|
|
*/
|
|
uint32_t offset = BRW_RT_SIZEOF_HIT_INFO * 2 +
|
|
bvh_level * BRW_RT_SIZEOF_RAY;
|
|
return nir_iadd_imm(b, brw_nir_rt_ray_addr(b), offset);
|
|
}
|
|
|
|
static inline nir_ssa_def *
|
|
brw_nir_rt_sw_stack_addr(nir_builder *b,
|
|
const struct gen_device_info *devinfo)
|
|
{
|
|
nir_ssa_def *addr = nir_load_ray_base_mem_addr_intel(b);
|
|
|
|
nir_ssa_def *offset32 = nir_imul(b, brw_nir_num_rt_stacks(b, devinfo),
|
|
nir_load_ray_hw_stack_size_intel(b));
|
|
addr = nir_iadd(b, addr, nir_u2u64(b, offset32));
|
|
|
|
return nir_iadd(b, addr,
|
|
nir_imul(b, nir_u2u64(b, brw_nir_rt_stack_id(b)),
|
|
nir_u2u64(b, nir_load_ray_sw_stack_size_intel(b))));
|
|
}
|
|
|
|
static inline nir_ssa_def *
|
|
nir_unpack_64_4x16_split_z(nir_builder *b, nir_ssa_def *val)
|
|
{
|
|
return nir_unpack_32_2x16_split_x(b, nir_unpack_64_2x32_split_y(b, val));
|
|
}
|
|
|
|
struct brw_nir_rt_globals_defs {
|
|
nir_ssa_def *base_mem_addr;
|
|
nir_ssa_def *call_stack_handler_addr;
|
|
nir_ssa_def *hw_stack_size;
|
|
nir_ssa_def *num_dss_rt_stacks;
|
|
nir_ssa_def *hit_sbt_addr;
|
|
nir_ssa_def *hit_sbt_stride;
|
|
nir_ssa_def *miss_sbt_addr;
|
|
nir_ssa_def *miss_sbt_stride;
|
|
nir_ssa_def *sw_stack_size;
|
|
nir_ssa_def *launch_size;
|
|
nir_ssa_def *call_sbt_addr;
|
|
nir_ssa_def *call_sbt_stride;
|
|
nir_ssa_def *resume_sbt_addr;
|
|
};
|
|
|
|
static inline void
|
|
brw_nir_rt_load_globals(nir_builder *b,
|
|
struct brw_nir_rt_globals_defs *defs)
|
|
{
|
|
nir_ssa_def *addr = nir_load_btd_global_arg_addr_intel(b);
|
|
|
|
nir_ssa_def *data;
|
|
data = nir_load_global_const_block_intel(b, addr, 16);
|
|
defs->base_mem_addr = nir_pack_64_2x32(b, nir_channels(b, data, 0x3));
|
|
|
|
defs->call_stack_handler_addr =
|
|
nir_pack_64_2x32(b, nir_channels(b, data, 0x3 << 2));
|
|
|
|
defs->hw_stack_size = nir_channel(b, data, 4);
|
|
defs->num_dss_rt_stacks = nir_iand_imm(b, nir_channel(b, data, 5), 0xffff);
|
|
defs->hit_sbt_addr =
|
|
nir_pack_64_2x32_split(b, nir_channel(b, data, 8),
|
|
nir_extract_i16(b, nir_channel(b, data, 9),
|
|
nir_imm_int(b, 0)));
|
|
defs->hit_sbt_stride =
|
|
nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 9));
|
|
defs->miss_sbt_addr =
|
|
nir_pack_64_2x32_split(b, nir_channel(b, data, 10),
|
|
nir_extract_i16(b, nir_channel(b, data, 11),
|
|
nir_imm_int(b, 0)));
|
|
defs->miss_sbt_stride =
|
|
nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 11));
|
|
defs->sw_stack_size = nir_channel(b, data, 12);
|
|
defs->launch_size = nir_channels(b, data, 0x7u << 13);
|
|
|
|
data = nir_load_global_const_block_intel(b, nir_iadd_imm(b, addr, 64), 8);
|
|
defs->call_sbt_addr =
|
|
nir_pack_64_2x32_split(b, nir_channel(b, data, 0),
|
|
nir_extract_i16(b, nir_channel(b, data, 1),
|
|
nir_imm_int(b, 0)));
|
|
defs->call_sbt_stride =
|
|
nir_unpack_32_2x16_split_y(b, nir_channel(b, data, 1));
|
|
|
|
defs->resume_sbt_addr =
|
|
nir_pack_64_2x32(b, nir_channels(b, data, 0x3 << 2));
|
|
}
|
|
|
|
static inline nir_ssa_def *
|
|
brw_nir_rt_unpack_leaf_ptr(nir_builder *b, nir_ssa_def *vec2)
|
|
{
|
|
/* Hit record leaf pointers are 42-bit and assumed to be in 64B chunks.
|
|
* This leaves 22 bits at the top for other stuff.
|
|
*/
|
|
nir_ssa_def *ptr64 = nir_imul_imm(b, nir_pack_64_2x32(b, vec2), 64);
|
|
|
|
/* The top 16 bits (remember, we shifted by 6 already) contain garbage
|
|
* that we need to get rid of.
|
|
*/
|
|
nir_ssa_def *ptr_lo = nir_unpack_64_2x32_split_x(b, ptr64);
|
|
nir_ssa_def *ptr_hi = nir_unpack_64_2x32_split_y(b, ptr64);
|
|
ptr_hi = nir_extract_i16(b, ptr_hi, nir_imm_int(b, 0));
|
|
return nir_pack_64_2x32_split(b, ptr_lo, ptr_hi);
|
|
}
|
|
|
|
struct brw_nir_rt_mem_hit_defs {
|
|
nir_ssa_def *t;
|
|
nir_ssa_def *tri_bary; /**< Only valid for triangle geometry */
|
|
nir_ssa_def *aabb_hit_kind; /**< Only valid for AABB geometry */
|
|
nir_ssa_def *leaf_type;
|
|
nir_ssa_def *prim_leaf_index;
|
|
nir_ssa_def *front_face;
|
|
nir_ssa_def *prim_leaf_ptr;
|
|
nir_ssa_def *inst_leaf_ptr;
|
|
};
|
|
|
|
static inline void
|
|
brw_nir_rt_load_mem_hit(nir_builder *b,
|
|
struct brw_nir_rt_mem_hit_defs *defs,
|
|
bool committed)
|
|
{
|
|
nir_ssa_def *hit_addr = brw_nir_rt_mem_hit_addr(b, committed);
|
|
|
|
nir_ssa_def *data = nir_load_global(b, hit_addr, 16, 4, 32);
|
|
defs->t = nir_channel(b, data, 0);
|
|
defs->aabb_hit_kind = nir_channel(b, data, 1);
|
|
defs->tri_bary = nir_channels(b, data, 0x6);
|
|
nir_ssa_def *bitfield = nir_channel(b, data, 3);
|
|
defs->leaf_type =
|
|
nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 17), nir_imm_int(b, 3));
|
|
defs->prim_leaf_index =
|
|
nir_ubitfield_extract(b, bitfield, nir_imm_int(b, 20), nir_imm_int(b, 4));
|
|
defs->front_face = nir_i2b(b, nir_iand_imm(b, bitfield, 1 << 27));
|
|
|
|
data = nir_load_global(b, nir_iadd_imm(b, hit_addr, 16), 16, 4, 32);
|
|
defs->prim_leaf_ptr =
|
|
brw_nir_rt_unpack_leaf_ptr(b, nir_channels(b, data, 0x3 << 0));
|
|
defs->inst_leaf_ptr =
|
|
brw_nir_rt_unpack_leaf_ptr(b, nir_channels(b, data, 0x3 << 2));
|
|
}
|
|
|
|
struct brw_nir_rt_mem_ray_defs {
|
|
nir_ssa_def *orig;
|
|
nir_ssa_def *dir;
|
|
nir_ssa_def *t_near;
|
|
nir_ssa_def *t_far;
|
|
nir_ssa_def *root_node_ptr;
|
|
nir_ssa_def *ray_flags;
|
|
nir_ssa_def *hit_group_sr_base_ptr;
|
|
nir_ssa_def *hit_group_sr_stride;
|
|
nir_ssa_def *miss_sr_ptr;
|
|
nir_ssa_def *shader_index_multiplier;
|
|
nir_ssa_def *inst_leaf_ptr;
|
|
nir_ssa_def *ray_mask;
|
|
};
|
|
|
|
static inline void
|
|
brw_nir_rt_store_mem_ray(nir_builder *b,
|
|
const struct brw_nir_rt_mem_ray_defs *defs,
|
|
enum brw_rt_bvh_level bvh_level)
|
|
{
|
|
nir_ssa_def *ray_addr = brw_nir_rt_mem_ray_addr(b, bvh_level);
|
|
|
|
assert_def_size(defs->orig, 3, 32);
|
|
assert_def_size(defs->dir, 3, 32);
|
|
nir_store_global(b, nir_iadd_imm(b, ray_addr, 0), 16,
|
|
nir_vec4(b, nir_channel(b, defs->orig, 0),
|
|
nir_channel(b, defs->orig, 1),
|
|
nir_channel(b, defs->orig, 2),
|
|
nir_channel(b, defs->dir, 0)),
|
|
~0 /* write mask */);
|
|
|
|
assert_def_size(defs->t_near, 1, 32);
|
|
assert_def_size(defs->t_far, 1, 32);
|
|
nir_store_global(b, nir_iadd_imm(b, ray_addr, 16), 16,
|
|
nir_vec4(b, nir_channel(b, defs->dir, 1),
|
|
nir_channel(b, defs->dir, 2),
|
|
defs->t_near,
|
|
defs->t_far),
|
|
~0 /* write mask */);
|
|
|
|
assert_def_size(defs->root_node_ptr, 1, 64);
|
|
assert_def_size(defs->ray_flags, 1, 16);
|
|
assert_def_size(defs->hit_group_sr_base_ptr, 1, 64);
|
|
assert_def_size(defs->hit_group_sr_stride, 1, 16);
|
|
nir_store_global(b, nir_iadd_imm(b, ray_addr, 32), 16,
|
|
nir_vec4(b, nir_unpack_64_2x32_split_x(b, defs->root_node_ptr),
|
|
nir_pack_32_2x16_split(b,
|
|
nir_unpack_64_4x16_split_z(b, defs->root_node_ptr),
|
|
defs->ray_flags),
|
|
nir_unpack_64_2x32_split_x(b, defs->hit_group_sr_base_ptr),
|
|
nir_pack_32_2x16_split(b,
|
|
nir_unpack_64_4x16_split_z(b, defs->hit_group_sr_base_ptr),
|
|
defs->hit_group_sr_stride)),
|
|
~0 /* write mask */);
|
|
|
|
/* leaf_ptr is optional */
|
|
nir_ssa_def *inst_leaf_ptr;
|
|
if (defs->inst_leaf_ptr) {
|
|
inst_leaf_ptr = defs->inst_leaf_ptr;
|
|
} else {
|
|
inst_leaf_ptr = nir_imm_int64(b, 0);
|
|
}
|
|
|
|
assert_def_size(defs->miss_sr_ptr, 1, 64);
|
|
assert_def_size(defs->shader_index_multiplier, 1, 32);
|
|
assert_def_size(inst_leaf_ptr, 1, 64);
|
|
assert_def_size(defs->ray_mask, 1, 32);
|
|
nir_store_global(b, nir_iadd_imm(b, ray_addr, 48), 16,
|
|
nir_vec4(b, nir_unpack_64_2x32_split_x(b, defs->miss_sr_ptr),
|
|
nir_pack_32_2x16_split(b,
|
|
nir_unpack_64_4x16_split_z(b, defs->miss_sr_ptr),
|
|
nir_unpack_32_2x16_split_x(b,
|
|
nir_ishl(b, defs->shader_index_multiplier,
|
|
nir_imm_int(b, 8)))),
|
|
nir_unpack_64_2x32_split_x(b, inst_leaf_ptr),
|
|
nir_pack_32_2x16_split(b,
|
|
nir_unpack_64_4x16_split_z(b, inst_leaf_ptr),
|
|
nir_unpack_32_2x16_split_x(b, defs->ray_mask))),
|
|
~0 /* write mask */);
|
|
}
|
|
|
|
static inline void
|
|
brw_nir_rt_load_mem_ray(nir_builder *b,
|
|
struct brw_nir_rt_mem_ray_defs *defs,
|
|
enum brw_rt_bvh_level bvh_level)
|
|
{
|
|
nir_ssa_def *ray_addr = brw_nir_rt_mem_ray_addr(b, bvh_level);
|
|
|
|
nir_ssa_def *data[4] = {
|
|
nir_load_global(b, nir_iadd_imm(b, ray_addr, 0), 16, 4, 32),
|
|
nir_load_global(b, nir_iadd_imm(b, ray_addr, 16), 16, 4, 32),
|
|
nir_load_global(b, nir_iadd_imm(b, ray_addr, 32), 16, 4, 32),
|
|
nir_load_global(b, nir_iadd_imm(b, ray_addr, 48), 16, 4, 32),
|
|
};
|
|
|
|
defs->orig = nir_channels(b, data[0], 0x7);
|
|
defs->dir = nir_vec3(b, nir_channel(b, data[0], 3),
|
|
nir_channel(b, data[1], 0),
|
|
nir_channel(b, data[1], 1));
|
|
defs->t_near = nir_channel(b, data[1], 2);
|
|
defs->t_far = nir_channel(b, data[1], 3);
|
|
defs->root_node_ptr =
|
|
nir_pack_64_2x32_split(b, nir_channel(b, data[2], 0),
|
|
nir_extract_i16(b, nir_channel(b, data[2], 1),
|
|
nir_imm_int(b, 0)));
|
|
defs->ray_flags =
|
|
nir_unpack_32_2x16_split_y(b, nir_channel(b, data[2], 1));
|
|
defs->hit_group_sr_base_ptr =
|
|
nir_pack_64_2x32_split(b, nir_channel(b, data[2], 2),
|
|
nir_extract_i16(b, nir_channel(b, data[2], 3),
|
|
nir_imm_int(b, 0)));
|
|
defs->hit_group_sr_stride =
|
|
nir_unpack_32_2x16_split_y(b, nir_channel(b, data[2], 3));
|
|
defs->miss_sr_ptr =
|
|
nir_pack_64_2x32_split(b, nir_channel(b, data[3], 0),
|
|
nir_extract_i16(b, nir_channel(b, data[3], 1),
|
|
nir_imm_int(b, 0)));
|
|
defs->shader_index_multiplier =
|
|
nir_ushr(b, nir_unpack_32_2x16_split_y(b, nir_channel(b, data[3], 1)),
|
|
nir_imm_int(b, 8));
|
|
defs->inst_leaf_ptr =
|
|
nir_pack_64_2x32_split(b, nir_channel(b, data[3], 2),
|
|
nir_extract_i16(b, nir_channel(b, data[3], 3),
|
|
nir_imm_int(b, 0)));
|
|
defs->ray_mask =
|
|
nir_unpack_32_2x16_split_y(b, nir_channel(b, data[3], 3));
|
|
}
|
|
|
|
struct brw_nir_rt_bvh_instance_leaf_defs {
|
|
nir_ssa_def *world_to_object[4];
|
|
nir_ssa_def *instance_id;
|
|
nir_ssa_def *instance_index;
|
|
nir_ssa_def *object_to_world[4];
|
|
};
|
|
|
|
static inline void
|
|
brw_nir_rt_load_bvh_instance_leaf(nir_builder *b,
|
|
struct brw_nir_rt_bvh_instance_leaf_defs *defs,
|
|
nir_ssa_def *leaf_addr)
|
|
{
|
|
/* We don't care about the first 16B of the leaf for now. One day, we may
|
|
* add code to decode it but none of that data is directly required for
|
|
* implementing any ray-tracing built-ins.
|
|
*/
|
|
|
|
defs->world_to_object[0] =
|
|
nir_load_global(b, nir_iadd_imm(b, leaf_addr, 16), 4, 3, 32);
|
|
defs->world_to_object[1] =
|
|
nir_load_global(b, nir_iadd_imm(b, leaf_addr, 28), 4, 3, 32);
|
|
defs->world_to_object[2] =
|
|
nir_load_global(b, nir_iadd_imm(b, leaf_addr, 40), 4, 3, 32);
|
|
/* The last column of the matrices is swapped between the two probably
|
|
* because it makes it easier/faster for hardware somehow.
|
|
*/
|
|
defs->object_to_world[3] =
|
|
nir_load_global(b, nir_iadd_imm(b, leaf_addr, 52), 4, 3, 32);
|
|
|
|
nir_ssa_def *data =
|
|
nir_load_global(b, nir_iadd_imm(b, leaf_addr, 64), 4, 4, 32);
|
|
defs->instance_id = nir_channel(b, data, 2);
|
|
defs->instance_index = nir_channel(b, data, 3);
|
|
|
|
defs->object_to_world[0] =
|
|
nir_load_global(b, nir_iadd_imm(b, leaf_addr, 80), 4, 3, 32);
|
|
defs->object_to_world[1] =
|
|
nir_load_global(b, nir_iadd_imm(b, leaf_addr, 92), 4, 3, 32);
|
|
defs->object_to_world[2] =
|
|
nir_load_global(b, nir_iadd_imm(b, leaf_addr, 104), 4, 3, 32);
|
|
defs->world_to_object[3] =
|
|
nir_load_global(b, nir_iadd_imm(b, leaf_addr, 116), 4, 3, 32);
|
|
}
|
|
|
|
#endif /* BRW_NIR_RT_BUILDER_H */
|