intel/rt: Add a pass to lower shader call instructions
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7356>
This commit is contained in:

committed by
Marge Bot

parent
ca88cd8e5a
commit
fad81a3968
@@ -1093,8 +1093,13 @@ system_value("btd_dss_id_intel", 1)
|
||||
system_value("btd_stack_id_intel", 1)
|
||||
system_value("btd_global_arg_addr_intel", 1, bit_sizes=[64])
|
||||
system_value("btd_local_arg_addr_intel", 1, bit_sizes=[64])
|
||||
system_value("btd_resume_sbt_addr_intel", 1, bit_sizes=[64])
|
||||
# src[] = { global_arg_addr, btd_record }
|
||||
intrinsic("btd_spawn_intel", src_comp=[1, 1])
|
||||
# RANGE=stack_size
|
||||
intrinsic("btd_stack_push_intel", indices=[RANGE])
|
||||
# BASE=call_idx RANGE=stack_size
|
||||
intrinsic("btd_resume_intel", indices=[BASE, RANGE])
|
||||
# src[] = { }
|
||||
intrinsic("btd_retire_intel")
|
||||
|
||||
@@ -1103,3 +1108,5 @@ system_value("ray_base_mem_addr_intel", 1, bit_sizes=[64])
|
||||
system_value("ray_hw_stack_size_intel", 1)
|
||||
system_value("ray_sw_stack_size_intel", 1)
|
||||
system_value("ray_num_dss_rt_stacks_intel", 1)
|
||||
system_value("callable_sbt_addr_intel", 1, bit_sizes=[64])
|
||||
system_value("callable_sbt_stride_intel", 1, bit_sizes=[16])
|
||||
|
@@ -44,6 +44,7 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
|
||||
nir_ssa_def *stack_base_offset = nir_channel(b, hotzone, 0);
|
||||
nir_ssa_def *stack_base_addr =
|
||||
nir_iadd(b, thread_stack_base_addr, nir_u2u64(b, stack_base_offset));
|
||||
ASSERTED bool seen_scratch_base_ptr_load = false;
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
@@ -58,9 +59,37 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_scratch_base_ptr:
|
||||
assert(nir_intrinsic_base(intrin) == 1);
|
||||
seen_scratch_base_ptr_load = true;
|
||||
sysval = stack_base_addr;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_btd_stack_push_intel: {
|
||||
int32_t stack_size = nir_intrinsic_range(intrin);
|
||||
if (stack_size > 0) {
|
||||
nir_ssa_def *child_stack_offset =
|
||||
nir_iadd_imm(b, stack_base_offset, stack_size);
|
||||
nir_store_global(b, hotzone_addr, 16, child_stack_offset, 0x1);
|
||||
}
|
||||
nir_instr_remove(instr);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_btd_resume_intel:
|
||||
/* This is the first "interesting" instruction */
|
||||
assert(block == nir_start_block(impl));
|
||||
assert(!seen_scratch_base_ptr_load);
|
||||
|
||||
int32_t stack_size = nir_intrinsic_range(intrin);
|
||||
if (stack_size > 0) {
|
||||
stack_base_offset =
|
||||
nir_iadd_imm(b, stack_base_offset, -stack_size);
|
||||
nir_store_global(b, hotzone_addr, 16, stack_base_offset, 0x1);
|
||||
stack_base_addr = nir_iadd(b, thread_stack_base_addr,
|
||||
nir_u2u64(b, stack_base_offset));
|
||||
}
|
||||
nir_instr_remove(instr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_base_mem_addr_intel:
|
||||
sysval = globals.base_mem_addr;
|
||||
break;
|
||||
@@ -77,6 +106,19 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
|
||||
sysval = globals.num_dss_rt_stacks;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_callable_sbt_addr_intel:
|
||||
sysval = globals.call_sbt_addr;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_callable_sbt_stride_intel:
|
||||
sysval = globals.call_sbt_stride;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_btd_resume_sbt_addr_intel:
|
||||
/* The call stack handler is just the first in our resume SBT */
|
||||
sysval = globals.resume_sbt_addr;
|
||||
break;
|
||||
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -41,16 +41,21 @@ void brw_nir_lower_combined_intersection_any_hit(nir_shader *intersection,
|
||||
const nir_shader *any_hit,
|
||||
const struct gen_device_info *devinfo);
|
||||
|
||||
/* We reserve the first 16B of the stack for callee data pointers */
|
||||
/* We reserve the first 8B of the stack for callee data pointers */
|
||||
#define BRW_BTD_STACK_RESUME_BSR_ADDR_OFFSET 0
|
||||
#define BRW_BTD_STACK_CALL_DATA_PTR_OFFSET 8
|
||||
#define BRW_BTD_STACK_CALLEE_DATA_SIZE 16
|
||||
#define BRW_BTD_STACK_CALLEE_DATA_SIZE 8
|
||||
|
||||
/* We require the stack to be 8B aligned at the start of a shader */
|
||||
#define BRW_BTD_STACK_ALIGN 8
|
||||
|
||||
void brw_nir_lower_shader_returns(nir_shader *shader);
|
||||
|
||||
bool brw_nir_lower_shader_calls(nir_shader *shader,
|
||||
uint32_t first_resume_sbt_idx,
|
||||
nir_shader ***resume_shaders_out,
|
||||
uint32_t *num_resume_shaders_out,
|
||||
void *mem_ctx);
|
||||
|
||||
void brw_nir_lower_rt_intrinsics(nir_shader *shader,
|
||||
const struct gen_device_info *devinfo);
|
||||
|
||||
|
@@ -94,6 +94,17 @@ brw_nir_btd_retire(nir_builder *b)
|
||||
nir_builder_instr_insert(b, &retire->instr);
|
||||
}
|
||||
|
||||
static inline void
|
||||
brw_nir_btd_resume(nir_builder *b, uint32_t call_idx, unsigned stack_size)
|
||||
{
|
||||
nir_intrinsic_instr *resume =
|
||||
nir_intrinsic_instr_create(b->shader,
|
||||
nir_intrinsic_btd_resume_intel);
|
||||
nir_intrinsic_set_base(resume, call_idx);
|
||||
nir_intrinsic_set_range(resume, stack_size);
|
||||
nir_builder_instr_insert(b, &resume->instr);
|
||||
}
|
||||
|
||||
/** This is a pseudo-op which does a bindless return
|
||||
*
|
||||
* It loads the return address from the stack and calls btd_spawn to spawn the
|
||||
|
@@ -31,6 +31,9 @@ extern "C" {
|
||||
/** Vulkan defines shaderGroupHandleSize = 32 */
|
||||
#define BRW_RT_SBT_HANDLE_SIZE 32
|
||||
|
||||
/** Stride of the resume SBT */
|
||||
#define BRW_BTD_RESUME_SBT_STRIDE 8
|
||||
|
||||
/* Vulkan always uses exactly two levels of BVH: world and object. At the API
|
||||
* level, these are referred to as top and bottom.
|
||||
*/
|
||||
|
Reference in New Issue
Block a user