radv/rt: Track ray_launch_size reads

Totals from 33 (8.71% of 379) affected shaders:
Instrs: 1434025 -> 1433988 (-0.00%); split: -0.01%, +0.00%
CodeSize: 7578824 -> 7578472 (-0.00%); split: -0.01%, +0.00%
Latency: 9241632 -> 9241639 (+0.00%); split: -0.00%, +0.00%
InvThroughput: 3407014 -> 3407049 (+0.00%); split: -0.00%, +0.00%
VClause: 40399 -> 40391 (-0.02%)
SClause: 37755 -> 37760 (+0.01%); split: -0.04%, +0.05%
Copies: 169588 -> 169567 (-0.01%); split: -0.04%, +0.02%
PreSGPRs: 4323 -> 4319 (-0.09%)
VALU: 940500 -> 940484 (-0.00%); split: -0.00%, +0.00%
SALU: 220508 -> 220509 (+0.00%); split: -0.03%, +0.03%

Reviewed-by: Friedrich Vock <friedrich.vock@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28619>
This commit is contained in:
Konstantin Seurer
2024-03-05 09:48:42 +01:00
committed by Marge Bot
parent 7ba8fccad3
commit 432f3eb9ca
5 changed files with 43 additions and 14 deletions

View File

@@ -181,7 +181,7 @@ struct ac_shader_args {
struct {
struct ac_arg uniform_shader_addr;
struct ac_arg sbt_descriptors;
struct ac_arg launch_size;
struct ac_arg launch_sizes[3];
struct ac_arg launch_size_addr;
struct ac_arg launch_ids[3];
struct ac_arg dynamic_callable_stack_base;

View File

@@ -8299,12 +8299,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
emit_split_vector(ctx, dst, 3);
break;
}
case nir_intrinsic_load_ray_launch_size: {
Temp dst = get_ssa_temp(ctx, &instr->def);
bld.copy(Definition(dst), Operand(get_arg(ctx, ctx->args->rt.launch_size)));
emit_split_vector(ctx, dst, 3);
break;
}
case nir_intrinsic_load_local_invocation_id: {
Temp dst = get_ssa_temp(ctx, &instr->def);
if (ctx->options->gfx_level >= GFX11) {
@@ -12347,9 +12341,9 @@ select_rt_prolog(Program* program, ac_shader_config* config,
* Shader Record Ptr: v[6-7]
*/
PhysReg out_uniform_shader_addr = get_arg_reg(out_args, out_args->rt.uniform_shader_addr);
PhysReg out_launch_size_x = get_arg_reg(out_args, out_args->rt.launch_size);
PhysReg out_launch_size_y = out_launch_size_x.advance(4);
PhysReg out_launch_size_z = out_launch_size_y.advance(4);
PhysReg out_launch_size_x = get_arg_reg(out_args, out_args->rt.launch_sizes[0]);
PhysReg out_launch_size_y = get_arg_reg(out_args, out_args->rt.launch_sizes[1]);
PhysReg out_launch_size_z = get_arg_reg(out_args, out_args->rt.launch_sizes[2]);
PhysReg out_launch_ids[3];
for (unsigned i = 0; i < 3; i++)
out_launch_ids[i] = get_arg_reg(out_args, out_args->rt.launch_ids[i]);

View File

@@ -442,7 +442,6 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_intrinsic_load_push_constant:
case nir_intrinsic_load_workgroup_id:
case nir_intrinsic_load_num_workgroups:
case nir_intrinsic_load_ray_launch_size:
case nir_intrinsic_load_sbt_base_amd:
case nir_intrinsic_load_subgroup_id:
case nir_intrinsic_load_num_subgroups:

View File

@@ -189,6 +189,7 @@ struct rt_variables {
nir_variable *ahit_isec_count;
nir_variable *launch_sizes[3];
nir_variable *launch_ids[3];
/* global address of the SBT entry used for the shader */
@@ -235,6 +236,10 @@ create_rt_variables(nir_shader *shader, struct radv_device *device, const VkPipe
vars.stack_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "stack_ptr");
vars.shader_record_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_record_ptr");
vars.launch_sizes[0] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_size_x");
vars.launch_sizes[1] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_size_y");
vars.launch_sizes[2] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_size_z");
vars.launch_ids[0] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_id_x");
vars.launch_ids[1] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_id_y");
vars.launch_ids[2] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_id_z");
@@ -279,6 +284,9 @@ map_rt_variables(struct hash_table *var_remap, struct rt_variables *src, const s
_mesa_hash_table_insert(var_remap, src->stack_ptr, dst->stack_ptr);
_mesa_hash_table_insert(var_remap, src->shader_record_ptr, dst->shader_record_ptr);
for (uint32_t i = 0; i < ARRAY_SIZE(src->launch_sizes); i++)
_mesa_hash_table_insert(var_remap, src->launch_sizes[i], dst->launch_sizes[i]);
for (uint32_t i = 0; i < ARRAY_SIZE(src->launch_ids); i++)
_mesa_hash_table_insert(var_remap, src->launch_ids[i], dst->launch_ids[i]);
@@ -377,6 +385,7 @@ load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_def *idx, en
struct radv_rt_shader_info {
bool uses_launch_id;
bool uses_launch_size;
};
struct radv_lower_rt_instruction_data {
@@ -485,6 +494,17 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
ret = nir_load_var(b, vars->shader_record_ptr);
break;
}
case nir_intrinsic_load_ray_launch_size: {
if (data->out_info)
data->out_info->uses_launch_size = true;
if (!data->late_lowering)
return false;
ret = nir_vec3(b, nir_load_var(b, vars->launch_sizes[0]), nir_load_var(b, vars->launch_sizes[1]),
nir_load_var(b, vars->launch_sizes[2]));
break;
};
case nir_intrinsic_load_ray_launch_id: {
if (data->out_info)
data->out_info->uses_launch_id = true;
@@ -1955,7 +1975,13 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
nir_def *descriptor_sets = ac_nir_load_arg(&b, &args->ac, args->descriptor_sets[0]);
nir_def *push_constants = ac_nir_load_arg(&b, &args->ac, args->ac.push_constants);
nir_def *sbt_descriptors = ac_nir_load_arg(&b, &args->ac, args->ac.rt.sbt_descriptors);
nir_def *launch_size = ac_nir_load_arg(&b, &args->ac, args->ac.rt.launch_size);
nir_def *launch_sizes[3];
for (uint32_t i = 0; i < ARRAY_SIZE(launch_sizes); i++) {
launch_sizes[i] = ac_nir_load_arg(&b, &args->ac, args->ac.rt.launch_sizes[i]);
nir_store_var(&b, vars.launch_sizes[i], launch_sizes[i], 1);
}
nir_def *scratch_offset = NULL;
if (args->ac.scratch_offset.used)
scratch_offset = ac_nir_load_arg(&b, &args->ac, args->ac.scratch_offset);
@@ -2032,7 +2058,14 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
ac_nir_store_arg(&b, &args->ac, args->ac.push_constants, push_constants);
ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_descriptors, sbt_descriptors);
ac_nir_store_arg(&b, &args->ac, args->ac.rt.traversal_shader_addr, traversal_addr);
ac_nir_store_arg(&b, &args->ac, args->ac.rt.launch_size, launch_size);
for (uint32_t i = 0; i < ARRAY_SIZE(launch_sizes); i++) {
if (rt_info.uses_launch_size)
ac_nir_store_arg(&b, &args->ac, args->ac.rt.launch_sizes[i], launch_sizes[i]);
else
radv_store_arg(&b, args, traversal_info, args->ac.rt.launch_sizes[i], launch_sizes[i]);
}
if (scratch_offset)
ac_nir_store_arg(&b, &args->ac, args->ac.scratch_offset, scratch_offset);
if (ring_offsets)

View File

@@ -317,7 +317,10 @@ radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_arg
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors);
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader_addr);
ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.rt.launch_size);
for (uint32_t i = 0; i < ARRAY_SIZE(args->ac.rt.launch_sizes); i++)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.rt.launch_sizes[i]);
if (gfx_level < GFX9) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets);