radv/rt: Track ray_launch_size reads
Totals from 33 (8.71% of 379) affected shaders: Instrs: 1434025 -> 1433988 (-0.00%); split: -0.01%, +0.00% CodeSize: 7578824 -> 7578472 (-0.00%); split: -0.01%, +0.00% Latency: 9241632 -> 9241639 (+0.00%); split: -0.00%, +0.00% InvThroughput: 3407014 -> 3407049 (+0.00%); split: -0.00%, +0.00% VClause: 40399 -> 40391 (-0.02%) SClause: 37755 -> 37760 (+0.01%); split: -0.04%, +0.05% Copies: 169588 -> 169567 (-0.01%); split: -0.04%, +0.02% PreSGPRs: 4323 -> 4319 (-0.09%) VALU: 940500 -> 940484 (-0.00%); split: -0.00%, +0.00% SALU: 220508 -> 220509 (+0.00%); split: -0.03%, +0.03% Reviewed-by: Friedrich Vock <friedrich.vock@gmx.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28619>
This commit is contained in:

committed by
Marge Bot

parent
7ba8fccad3
commit
432f3eb9ca
@@ -181,7 +181,7 @@ struct ac_shader_args {
|
||||
struct {
|
||||
struct ac_arg uniform_shader_addr;
|
||||
struct ac_arg sbt_descriptors;
|
||||
struct ac_arg launch_size;
|
||||
struct ac_arg launch_sizes[3];
|
||||
struct ac_arg launch_size_addr;
|
||||
struct ac_arg launch_ids[3];
|
||||
struct ac_arg dynamic_callable_stack_base;
|
||||
|
@@ -8299,12 +8299,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
emit_split_vector(ctx, dst, 3);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_launch_size: {
|
||||
Temp dst = get_ssa_temp(ctx, &instr->def);
|
||||
bld.copy(Definition(dst), Operand(get_arg(ctx, ctx->args->rt.launch_size)));
|
||||
emit_split_vector(ctx, dst, 3);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_local_invocation_id: {
|
||||
Temp dst = get_ssa_temp(ctx, &instr->def);
|
||||
if (ctx->options->gfx_level >= GFX11) {
|
||||
@@ -12347,9 +12341,9 @@ select_rt_prolog(Program* program, ac_shader_config* config,
|
||||
* Shader Record Ptr: v[6-7]
|
||||
*/
|
||||
PhysReg out_uniform_shader_addr = get_arg_reg(out_args, out_args->rt.uniform_shader_addr);
|
||||
PhysReg out_launch_size_x = get_arg_reg(out_args, out_args->rt.launch_size);
|
||||
PhysReg out_launch_size_y = out_launch_size_x.advance(4);
|
||||
PhysReg out_launch_size_z = out_launch_size_y.advance(4);
|
||||
PhysReg out_launch_size_x = get_arg_reg(out_args, out_args->rt.launch_sizes[0]);
|
||||
PhysReg out_launch_size_y = get_arg_reg(out_args, out_args->rt.launch_sizes[1]);
|
||||
PhysReg out_launch_size_z = get_arg_reg(out_args, out_args->rt.launch_sizes[2]);
|
||||
PhysReg out_launch_ids[3];
|
||||
for (unsigned i = 0; i < 3; i++)
|
||||
out_launch_ids[i] = get_arg_reg(out_args, out_args->rt.launch_ids[i]);
|
||||
|
@@ -442,7 +442,6 @@ init_context(isel_context* ctx, nir_shader* shader)
|
||||
case nir_intrinsic_load_push_constant:
|
||||
case nir_intrinsic_load_workgroup_id:
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
case nir_intrinsic_load_ray_launch_size:
|
||||
case nir_intrinsic_load_sbt_base_amd:
|
||||
case nir_intrinsic_load_subgroup_id:
|
||||
case nir_intrinsic_load_num_subgroups:
|
||||
|
@@ -189,6 +189,7 @@ struct rt_variables {
|
||||
|
||||
nir_variable *ahit_isec_count;
|
||||
|
||||
nir_variable *launch_sizes[3];
|
||||
nir_variable *launch_ids[3];
|
||||
|
||||
/* global address of the SBT entry used for the shader */
|
||||
@@ -235,6 +236,10 @@ create_rt_variables(nir_shader *shader, struct radv_device *device, const VkPipe
|
||||
vars.stack_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "stack_ptr");
|
||||
vars.shader_record_ptr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_record_ptr");
|
||||
|
||||
vars.launch_sizes[0] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_size_x");
|
||||
vars.launch_sizes[1] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_size_y");
|
||||
vars.launch_sizes[2] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_size_z");
|
||||
|
||||
vars.launch_ids[0] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_id_x");
|
||||
vars.launch_ids[1] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_id_y");
|
||||
vars.launch_ids[2] = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "launch_id_z");
|
||||
@@ -279,6 +284,9 @@ map_rt_variables(struct hash_table *var_remap, struct rt_variables *src, const s
|
||||
_mesa_hash_table_insert(var_remap, src->stack_ptr, dst->stack_ptr);
|
||||
_mesa_hash_table_insert(var_remap, src->shader_record_ptr, dst->shader_record_ptr);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(src->launch_sizes); i++)
|
||||
_mesa_hash_table_insert(var_remap, src->launch_sizes[i], dst->launch_sizes[i]);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(src->launch_ids); i++)
|
||||
_mesa_hash_table_insert(var_remap, src->launch_ids[i], dst->launch_ids[i]);
|
||||
|
||||
@@ -377,6 +385,7 @@ load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_def *idx, en
|
||||
|
||||
struct radv_rt_shader_info {
|
||||
bool uses_launch_id;
|
||||
bool uses_launch_size;
|
||||
};
|
||||
|
||||
struct radv_lower_rt_instruction_data {
|
||||
@@ -485,6 +494,17 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
|
||||
ret = nir_load_var(b, vars->shader_record_ptr);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_launch_size: {
|
||||
if (data->out_info)
|
||||
data->out_info->uses_launch_size = true;
|
||||
|
||||
if (!data->late_lowering)
|
||||
return false;
|
||||
|
||||
ret = nir_vec3(b, nir_load_var(b, vars->launch_sizes[0]), nir_load_var(b, vars->launch_sizes[1]),
|
||||
nir_load_var(b, vars->launch_sizes[2]));
|
||||
break;
|
||||
};
|
||||
case nir_intrinsic_load_ray_launch_id: {
|
||||
if (data->out_info)
|
||||
data->out_info->uses_launch_id = true;
|
||||
@@ -1955,7 +1975,13 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
|
||||
nir_def *descriptor_sets = ac_nir_load_arg(&b, &args->ac, args->descriptor_sets[0]);
|
||||
nir_def *push_constants = ac_nir_load_arg(&b, &args->ac, args->ac.push_constants);
|
||||
nir_def *sbt_descriptors = ac_nir_load_arg(&b, &args->ac, args->ac.rt.sbt_descriptors);
|
||||
nir_def *launch_size = ac_nir_load_arg(&b, &args->ac, args->ac.rt.launch_size);
|
||||
|
||||
nir_def *launch_sizes[3];
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(launch_sizes); i++) {
|
||||
launch_sizes[i] = ac_nir_load_arg(&b, &args->ac, args->ac.rt.launch_sizes[i]);
|
||||
nir_store_var(&b, vars.launch_sizes[i], launch_sizes[i], 1);
|
||||
}
|
||||
|
||||
nir_def *scratch_offset = NULL;
|
||||
if (args->ac.scratch_offset.used)
|
||||
scratch_offset = ac_nir_load_arg(&b, &args->ac, args->ac.scratch_offset);
|
||||
@@ -2032,7 +2058,14 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
|
||||
ac_nir_store_arg(&b, &args->ac, args->ac.push_constants, push_constants);
|
||||
ac_nir_store_arg(&b, &args->ac, args->ac.rt.sbt_descriptors, sbt_descriptors);
|
||||
ac_nir_store_arg(&b, &args->ac, args->ac.rt.traversal_shader_addr, traversal_addr);
|
||||
ac_nir_store_arg(&b, &args->ac, args->ac.rt.launch_size, launch_size);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(launch_sizes); i++) {
|
||||
if (rt_info.uses_launch_size)
|
||||
ac_nir_store_arg(&b, &args->ac, args->ac.rt.launch_sizes[i], launch_sizes[i]);
|
||||
else
|
||||
radv_store_arg(&b, args, traversal_info, args->ac.rt.launch_sizes[i], launch_sizes[i]);
|
||||
}
|
||||
|
||||
if (scratch_offset)
|
||||
ac_nir_store_arg(&b, &args->ac, args->ac.scratch_offset, scratch_offset);
|
||||
if (ring_offsets)
|
||||
|
@@ -317,7 +317,10 @@ radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_arg
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.rt.sbt_descriptors);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.traversal_shader_addr);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.rt.launch_size);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(args->ac.rt.launch_sizes); i++)
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.rt.launch_sizes[i]);
|
||||
|
||||
if (gfx_level < GFX9) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets);
|
||||
|
Reference in New Issue
Block a user