diff --git a/src/amd/common/ac_shader_args.h b/src/amd/common/ac_shader_args.h index 794b17c9b4f..62ac708c3d1 100644 --- a/src/amd/common/ac_shader_args.h +++ b/src/amd/common/ac_shader_args.h @@ -181,7 +181,7 @@ struct ac_shader_args { struct { struct ac_arg uniform_shader_addr; struct ac_arg sbt_descriptors; - struct ac_arg launch_size; + struct ac_arg launch_sizes[3]; struct ac_arg launch_size_addr; struct ac_arg launch_ids[3]; struct ac_arg dynamic_callable_stack_base; diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index dd05836a7b6..1ea38cf9467 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -8299,12 +8299,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) emit_split_vector(ctx, dst, 3); break; } - case nir_intrinsic_load_ray_launch_size: { - Temp dst = get_ssa_temp(ctx, &instr->def); - bld.copy(Definition(dst), Operand(get_arg(ctx, ctx->args->rt.launch_size))); - emit_split_vector(ctx, dst, 3); - break; - } case nir_intrinsic_load_local_invocation_id: { Temp dst = get_ssa_temp(ctx, &instr->def); if (ctx->options->gfx_level >= GFX11) { @@ -12347,9 +12341,9 @@ select_rt_prolog(Program* program, ac_shader_config* config, * Shader Record Ptr: v[6-7] */ PhysReg out_uniform_shader_addr = get_arg_reg(out_args, out_args->rt.uniform_shader_addr); - PhysReg out_launch_size_x = get_arg_reg(out_args, out_args->rt.launch_size); - PhysReg out_launch_size_y = out_launch_size_x.advance(4); - PhysReg out_launch_size_z = out_launch_size_y.advance(4); + PhysReg out_launch_size_x = get_arg_reg(out_args, out_args->rt.launch_sizes[0]); + PhysReg out_launch_size_y = get_arg_reg(out_args, out_args->rt.launch_sizes[1]); + PhysReg out_launch_size_z = get_arg_reg(out_args, out_args->rt.launch_sizes[2]); PhysReg out_launch_ids[3]; for (unsigned i = 0; i < 3; i++) out_launch_ids[i] = get_arg_reg(out_args, out_args->rt.launch_ids[i]); diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 753ce0ffde9..44b12ebc597 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -442,7 +442,6 @@ init_context(isel_context* ctx, nir_shader* shader) case nir_intrinsic_load_push_constant: case nir_intrinsic_load_workgroup_id: case nir_intrinsic_load_num_workgroups: - case nir_intrinsic_load_ray_launch_size: case nir_intrinsic_load_sbt_base_amd: case nir_intrinsic_load_subgroup_id: case nir_intrinsic_load_num_subgroups: diff --git a/src/amd/vulkan/nir/radv_nir_rt_shader.c b/src/amd/vulkan/nir/radv_nir_rt_shader.c index 05c77af1126..b9b2ee3b9a0 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_shader.c +++ b/src/amd/vulkan/nir/radv_nir_rt_shader.c @@ -189,6 +189,7 @@ struct rt_variables { nir_variable *ahit_isec_count; + nir_variable *launch_sizes[3]; nir_variable *launch_ids[3]; /* global address of the SBT entry used for the shader */ @@ -235,6 +236,10 @@ create_rt_variables(nir_shader *shader, struct radv_device *device, const VkPipe vars.stack_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "stack_ptr"); vars.shader_record_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_record_ptr"); + vars.launch_sizes[0] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_size_x"); + vars.launch_sizes[1] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_size_y"); + vars.launch_sizes[2] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_size_z"); + vars.launch_ids[0] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_id_x"); vars.launch_ids[1] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_id_y"); vars.launch_ids[2] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_id_z"); @@ -279,6 +284,9 @@ map_rt_variables(struct hash_table *var_remap, struct rt_variables *src, const s _mesa_hash_table_insert(var_remap, src->stack_ptr, dst->stack_ptr); _mesa_hash_table_insert(var_remap, src->shader_record_ptr, dst->shader_record_ptr); + for (uint32_t i = 0; i < ARRAY_SIZE(src->launch_sizes); i++) + _mesa_hash_table_insert(var_remap, src->launch_sizes[i], dst->launch_sizes[i]); + for (uint32_t i = 0; i < ARRAY_SIZE(src->launch_ids); i++) _mesa_hash_table_insert(var_remap, src->launch_ids[i], dst->launch_ids[i]); @@ -377,6 +385,7 @@ load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_def *idx, en struct radv_rt_shader_info { bool uses_launch_id; + bool uses_launch_size; }; struct radv_lower_rt_instruction_data { @@ -485,6 +494,17 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data) ret = nir_load_var(b, vars->shader_record_ptr); break; } + case nir_intrinsic_load_ray_launch_size: { + if (data->out_info) + data->out_info->uses_launch_size = true; + + if (!data->late_lowering) + return false; + + ret = nir_vec3(b, nir_load_var(b, vars->launch_sizes[0]), nir_load_var(b, vars->launch_sizes[1]), + nir_load_var(b, vars->launch_sizes[2])); + break; + }; case nir_intrinsic_load_ray_launch_id: { if (data->out_info) data->out_info->uses_launch_id = true; @@ -1955,7 +1975,13 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH nir_def *descriptor_sets = ac_nir_load_arg(&b, &args->ac, args->descriptor_sets[0]); nir_def *push_constants = ac_nir_load_arg(&b, &args->ac, args->ac.push_constants); nir_def *sbt_descriptors = ac_nir_load_arg(&b, &args->ac, args->ac.rt.sbt_descriptors); - nir_def *launch_size = ac_nir_load_arg(&b, &args->ac, args->ac.rt.launch_size); + + nir_def *launch_sizes[3]; + for (uint32_t i = 0; i < ARRAY_SIZE(launch_sizes); i++) { + launch_sizes[i] = ac_nir_load_arg(&b, &args->ac, args->ac.rt.launch_sizes[i]); + nir_store_var(&b, vars.launch_sizes[i], launch_sizes[i], 1); + } + nir_def *scratch_offset = NULL; if (args->ac.scratch_offset.used) scratch_offset = ac_nir_load_arg(&b, &args->ac, args->ac.scratch_offset); @@ -2032,7 +2058,14 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH ac_nir_store_arg(&b, &args->ac, args->ac.push_constants, push_constants); ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_descriptors, sbt_descriptors); ac_nir_store_arg(&b, &args->ac, args->ac.rt.traversal_shader_addr, traversal_addr); - ac_nir_store_arg(&b, &args->ac, args->ac.rt.launch_size, launch_size); + + for (uint32_t i = 0; i < ARRAY_SIZE(launch_sizes); i++) { + if (rt_info.uses_launch_size) + ac_nir_store_arg(&b, &args->ac, args->ac.rt.launch_sizes[i], launch_sizes[i]); + else + radv_store_arg(&b, args, traversal_info, args->ac.rt.launch_sizes[i], launch_sizes[i]); + } + if (scratch_offset) ac_nir_store_arg(&b, &args->ac, args->ac.scratch_offset, scratch_offset); if (ring_offsets) diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 684ee395689..07f9932073d 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -317,7 +317,10 @@ radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_arg ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants); ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors); ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader_addr); - ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.rt.launch_size); + + for (uint32_t i = 0; i < ARRAY_SIZE(args->ac.rt.launch_sizes); i++) + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.rt.launch_sizes[i]); + if (gfx_level < GFX9) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets);