diff --git a/src/amd/common/ac_shader_args.h b/src/amd/common/ac_shader_args.h index f50c35087b8..ae9be303780 100644 --- a/src/amd/common/ac_shader_args.h +++ b/src/amd/common/ac_shader_args.h @@ -152,6 +152,7 @@ struct ac_shader_args { struct ac_arg sbt_descriptors; struct ac_arg ray_launch_size_addr; struct ac_arg force_vrs_rates; + struct ac_arg rt_dynamic_callable_stack_base; }; void ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile, unsigned registers, diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 9b564d68c95..3c156e603c5 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -9167,6 +9167,10 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) break; } case nir_intrinsic_bvh64_intersect_ray_amd: visit_bvh64_intersect_ray_amd(ctx, instr); break; + case nir_intrinsic_load_rt_dynamic_callable_stack_base_amd: + bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), + get_arg(ctx, ctx->args->ac.rt_dynamic_callable_stack_base)); + break; case nir_intrinsic_overwrite_vs_arguments_amd: { ctx->arg_temps[ctx->args->ac.vertex_id.arg_index] = get_ssa_temp(ctx, instr->src[0].ssa); ctx->arg_temps[ctx->args->ac.instance_id.arg_index] = get_ssa_temp(ctx, instr->src[1].ssa); diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 362aceff453..573a3e7431b 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -8559,6 +8559,14 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCom base_reg + size_loc->sgpr_idx * 4, launch_size_va, true); } + struct radv_userdata_info *base_loc = radv_lookup_user_sgpr( + &pipeline->base, MESA_SHADER_COMPUTE, AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE); + if (base_loc->sgpr_idx != -1) { + struct radv_shader_info *cs_info = &pipeline->base.shaders[MESA_SHADER_COMPUTE]->info; + radeon_set_sh_reg(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + base_loc->sgpr_idx * 4, + pipeline->base.scratch_bytes_per_wave / cs_info->wave_size); + } + radv_dispatch(cmd_buffer, &info, pipeline, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR); } diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 7c5443ce9e2..a128bd92cfb 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -1769,7 +1769,10 @@ create_rt_shader(struct radv_device *device, const VkRayTracingPipelineCreateInf struct rt_variables vars = create_rt_variables(b.shader, pCreateInfo, stack_sizes); load_sbt_entry(&b, &vars, nir_imm_int(&b, 0), SBT_RAYGEN, 0); - nir_store_var(&b, vars.stack_ptr, nir_imm_int(&b, 0), 0x1); + if (radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo)) + nir_store_var(&b, vars.stack_ptr, nir_load_rt_dynamic_callable_stack_base_amd(&b), 0x1); + else + nir_store_var(&b, vars.stack_ptr, nir_imm_int(&b, 0), 0x1); nir_store_var(&b, vars.main_loop_case_visited, nir_imm_bool(&b, true), 1); diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index b889bc44234..d81aafcee54 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -153,6 +153,7 @@ enum radv_ud_index { AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START, AC_UD_CS_SBT_DESCRIPTORS, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR, + AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE, AC_UD_CS_TASK_RING_OFFSETS, AC_UD_CS_TASK_DRAW_ID, AC_UD_CS_TASK_IB, @@ -345,6 +346,7 @@ struct radv_shader_info { bool uses_sbt; bool uses_ray_launch_size; + bool uses_dynamic_rt_callable_stack; } cs; struct { uint64_t tes_inputs_read; diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 058fbbd48c8..84c316f3ece 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -189,6 +189,8 @@ allocate_user_sgprs(enum amd_gfx_level gfx_level, const struct radv_shader_info user_sgpr_count += args->load_grid_size_from_user_sgpr ? 3 : 2; if (info->cs.uses_ray_launch_size) user_sgpr_count += 2; + if (info->cs.uses_dynamic_rt_callable_stack) + user_sgpr_count += 1; if (info->vs.needs_draw_id) user_sgpr_count += 1; if (stage == MESA_SHADER_TASK) @@ -605,6 +607,11 @@ radv_declare_shader_args(enum amd_gfx_level gfx_level, const struct radv_pipelin ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.ray_launch_size_addr); } + if (info->cs.uses_dynamic_rt_callable_stack) { + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, + &args->ac.rt_dynamic_callable_stack_base); + } + if (info->vs.needs_draw_id) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id); } @@ -872,6 +879,9 @@ radv_declare_shader_args(enum amd_gfx_level gfx_level, const struct radv_pipelin if (args->ac.ray_launch_size_addr.used) { set_loc_shader_ptr(args, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR, &user_sgpr_idx); } + if (args->ac.rt_dynamic_callable_stack_base.used) { + set_loc_shader(args, AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE, &user_sgpr_idx, 1); + } if (args->ac.draw_id.used) { set_loc_shader(args, AC_UD_CS_TASK_DRAW_ID, &user_sgpr_idx, 1); } diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 42260d90b3a..336ca1cbe2b 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -210,6 +210,9 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, case nir_intrinsic_load_force_vrs_rates_amd: info->force_vrs_per_vertex = true; break; + case nir_intrinsic_load_rt_dynamic_callable_stack_base_amd: + info->cs.uses_dynamic_rt_callable_stack = true; + break; default: break; } diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 867cbb5ff82..3c34f5ce23f 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -173,6 +173,7 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) case nir_intrinsic_load_tess_level_outer_default: case nir_intrinsic_load_scalar_arg_amd: case nir_intrinsic_load_smem_amd: + case nir_intrinsic_load_rt_dynamic_callable_stack_base_amd: case nir_intrinsic_load_global_const_block_intel: case nir_intrinsic_load_reloc_const_intel: case nir_intrinsic_load_global_block_intel: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 71ae27bb4c7..081db81aa07 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1398,6 +1398,9 @@ system_value("intersection_opaque_amd", 1, bit_sizes=[1]) # Used for indirect ray tracing. system_value("ray_launch_size_addr_amd", 1, bit_sizes=[64]) +# Scratch base of callable stack for ray tracing. +system_value("rt_dynamic_callable_stack_base_amd", 1) + # Load forced VRS rates. intrinsic("load_force_vrs_rates_amd", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER])