intel/rt: Add a pass to lower shader call instructions

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7356>
This commit is contained in:
Jason Ekstrand
2020-08-06 14:25:52 -05:00
committed by Marge Bot
parent ca88cd8e5a
commit fad81a3968
6 changed files with 1156 additions and 3 deletions

View File

@@ -1093,8 +1093,13 @@ system_value("btd_dss_id_intel", 1)
system_value("btd_stack_id_intel", 1)
system_value("btd_global_arg_addr_intel", 1, bit_sizes=[64])
system_value("btd_local_arg_addr_intel", 1, bit_sizes=[64])
system_value("btd_resume_sbt_addr_intel", 1, bit_sizes=[64])
# src[] = { global_arg_addr, btd_record }
intrinsic("btd_spawn_intel", src_comp=[1, 1])
# RANGE=stack_size
intrinsic("btd_stack_push_intel", indices=[RANGE])
# BASE=call_idx RANGE=stack_size
intrinsic("btd_resume_intel", indices=[BASE, RANGE])
# src[] = { }
intrinsic("btd_retire_intel")
@@ -1103,3 +1108,5 @@ system_value("ray_base_mem_addr_intel", 1, bit_sizes=[64])
system_value("ray_hw_stack_size_intel", 1)
system_value("ray_sw_stack_size_intel", 1)
system_value("ray_num_dss_rt_stacks_intel", 1)
system_value("callable_sbt_addr_intel", 1, bit_sizes=[64])
system_value("callable_sbt_stride_intel", 1, bit_sizes=[16])

View File

@@ -44,6 +44,7 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
nir_ssa_def *stack_base_offset = nir_channel(b, hotzone, 0);
nir_ssa_def *stack_base_addr =
nir_iadd(b, thread_stack_base_addr, nir_u2u64(b, stack_base_offset));
ASSERTED bool seen_scratch_base_ptr_load = false;
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
@@ -58,9 +59,37 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
switch (intrin->intrinsic) {
case nir_intrinsic_load_scratch_base_ptr:
assert(nir_intrinsic_base(intrin) == 1);
seen_scratch_base_ptr_load = true;
sysval = stack_base_addr;
break;
case nir_intrinsic_btd_stack_push_intel: {
int32_t stack_size = nir_intrinsic_range(intrin);
if (stack_size > 0) {
nir_ssa_def *child_stack_offset =
nir_iadd_imm(b, stack_base_offset, stack_size);
nir_store_global(b, hotzone_addr, 16, child_stack_offset, 0x1);
}
nir_instr_remove(instr);
break;
}
case nir_intrinsic_btd_resume_intel:
/* This is the first "interesting" instruction */
assert(block == nir_start_block(impl));
assert(!seen_scratch_base_ptr_load);
int32_t stack_size = nir_intrinsic_range(intrin);
if (stack_size > 0) {
stack_base_offset =
nir_iadd_imm(b, stack_base_offset, -stack_size);
nir_store_global(b, hotzone_addr, 16, stack_base_offset, 0x1);
stack_base_addr = nir_iadd(b, thread_stack_base_addr,
nir_u2u64(b, stack_base_offset));
}
nir_instr_remove(instr);
break;
case nir_intrinsic_load_ray_base_mem_addr_intel:
sysval = globals.base_mem_addr;
break;
@@ -77,6 +106,19 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
sysval = globals.num_dss_rt_stacks;
break;
case nir_intrinsic_load_callable_sbt_addr_intel:
sysval = globals.call_sbt_addr;
break;
case nir_intrinsic_load_callable_sbt_stride_intel:
sysval = globals.call_sbt_stride;
break;
case nir_intrinsic_load_btd_resume_sbt_addr_intel:
/* The call stack handler is just the first in our resume SBT */
sysval = globals.resume_sbt_addr;
break;
default:
continue;
}

File diff suppressed because it is too large Load Diff

View File

@@ -41,16 +41,21 @@ void brw_nir_lower_combined_intersection_any_hit(nir_shader *intersection,
const nir_shader *any_hit,
const struct gen_device_info *devinfo);
/* We reserve the first 16B of the stack for callee data pointers */
/* We reserve the first 8B of the stack for callee data pointers */
#define BRW_BTD_STACK_RESUME_BSR_ADDR_OFFSET 0
#define BRW_BTD_STACK_CALL_DATA_PTR_OFFSET 8
#define BRW_BTD_STACK_CALLEE_DATA_SIZE 16
#define BRW_BTD_STACK_CALLEE_DATA_SIZE 8
/* We require the stack to be 8B aligned at the start of a shader */
#define BRW_BTD_STACK_ALIGN 8
void brw_nir_lower_shader_returns(nir_shader *shader);
bool brw_nir_lower_shader_calls(nir_shader *shader,
uint32_t first_resume_sbt_idx,
nir_shader ***resume_shaders_out,
uint32_t *num_resume_shaders_out,
void *mem_ctx);
void brw_nir_lower_rt_intrinsics(nir_shader *shader,
const struct gen_device_info *devinfo);

View File

@@ -94,6 +94,17 @@ brw_nir_btd_retire(nir_builder *b)
nir_builder_instr_insert(b, &retire->instr);
}
static inline void
brw_nir_btd_resume(nir_builder *b, uint32_t call_idx, unsigned stack_size)
{
nir_intrinsic_instr *resume =
nir_intrinsic_instr_create(b->shader,
nir_intrinsic_btd_resume_intel);
nir_intrinsic_set_base(resume, call_idx);
nir_intrinsic_set_range(resume, stack_size);
nir_builder_instr_insert(b, &resume->instr);
}
/** This is a pseudo-op which does a bindless return
*
* It loads the return address from the stack and calls btd_spawn to spawn the

View File

@@ -31,6 +31,9 @@ extern "C" {
/** Vulkan defines shaderGroupHandleSize = 32 */
#define BRW_RT_SBT_HANDLE_SIZE 32
/** Stride of the resume SBT */
#define BRW_BTD_RESUME_SBT_STRIDE 8
/* Vulkan always uses exactly two levels of BVH: world and object. At the API
* level, these are referred to as top and bottom.
*/