diff --git a/src/amd/common/ac_shader_args.h b/src/amd/common/ac_shader_args.h index e74caeda68d..f50c35087b8 100644 --- a/src/amd/common/ac_shader_args.h +++ b/src/amd/common/ac_shader_args.h @@ -150,7 +150,7 @@ struct ac_shader_args { uint64_t inline_push_const_mask; struct ac_arg view_index; struct ac_arg sbt_descriptors; - struct ac_arg ray_launch_size; + struct ac_arg ray_launch_size_addr; struct ac_arg force_vrs_rates; }; diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 822cdddb325..7ca337d1a72 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -8170,10 +8170,10 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) emit_split_vector(ctx, dst, 3); break; } - case nir_intrinsic_load_ray_launch_size: { + case nir_intrinsic_load_ray_launch_size_addr_amd: { Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); - bld.copy(Definition(dst), Operand(get_arg(ctx, ctx->args->ac.ray_launch_size))); - emit_split_vector(ctx, dst, 3); + Temp addr = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->ac.ray_launch_size_addr)); + bld.copy(Definition(dst), Operand(addr)); break; } case nir_intrinsic_load_local_invocation_id: { diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index fb3e51cbdf1..a0ff8164b47 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -600,7 +600,7 @@ init_context(isel_context* ctx, nir_shader* shader) case nir_intrinsic_load_push_constant: case nir_intrinsic_load_workgroup_id: case nir_intrinsic_load_num_workgroups: - case nir_intrinsic_load_ray_launch_size: + case nir_intrinsic_load_ray_launch_size_addr_amd: case nir_intrinsic_load_subgroup_id: case nir_intrinsic_load_num_subgroups: case nir_intrinsic_load_first_vertex: diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 117bcb9de28..ee3ad463c6c 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -7595,35 +7595,53 @@ radv_rt_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_ } static bool -radv_rt_bind_tables(struct radv_cmd_buffer *cmd_buffer, - const VkStridedDeviceAddressRegionKHR *tables) +radv_rt_set_args(struct radv_cmd_buffer *cmd_buffer, + const VkStridedDeviceAddressRegionKHR *tables, uint64_t launch_size_va, + struct radv_dispatch_info *info) { struct radv_pipeline *pipeline = cmd_buffer->state.rt_pipeline; - uint32_t base_reg; + uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_COMPUTE]; void *ptr; - uint32_t *desc_ptr; + uint32_t *write_ptr; uint32_t offset; - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, 64, &offset, &ptr)) + info->unaligned = true; + + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, 64 + (launch_size_va ? 0 : 12), &offset, &ptr)) return false; - desc_ptr = ptr; - for (unsigned i = 0; i < 4; ++i, desc_ptr += 4) { - desc_ptr[0] = tables[i].deviceAddress; - desc_ptr[1] = tables[i].deviceAddress >> 32; - desc_ptr[2] = tables[i].stride; - desc_ptr[3] = 0; + write_ptr = ptr; + for (unsigned i = 0; i < 4; ++i, write_ptr += 4) { + write_ptr[0] = tables[i].deviceAddress; + write_ptr[1] = tables[i].deviceAddress >> 32; + write_ptr[2] = tables[i].stride; + write_ptr[3] = 0; + } + + if (!launch_size_va) { + write_ptr[0] = info->blocks[0]; + write_ptr[1] = info->blocks[1]; + write_ptr[2] = info->blocks[2]; + } else { + info->va = launch_size_va; } uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset; - struct radv_userdata_info *loc = - radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE, AC_UD_CS_SBT_DESCRIPTORS); - if (loc->sgpr_idx == -1) - return true; - base_reg = pipeline->user_data_0[MESA_SHADER_COMPUTE]; - radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va, - false); + struct radv_userdata_info *desc_loc = + radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE, AC_UD_CS_SBT_DESCRIPTORS); + if (desc_loc->sgpr_idx != -1) { + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, + base_reg + desc_loc->sgpr_idx * 4, va, false); + } + + struct radv_userdata_info *size_loc = + radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR); + if (size_loc->sgpr_idx != -1) { + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, + base_reg + size_loc->sgpr_idx * 4, launch_size_va ? launch_size_va : (va + 64), false); + } + return true; } @@ -7641,7 +7659,6 @@ radv_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, info.blocks[0] = width; info.blocks[1] = height; info.blocks[2] = depth; - info.unaligned = 1; const VkStridedDeviceAddressRegionKHR tables[] = { *pRaygenShaderBindingTable, @@ -7650,21 +7667,8 @@ radv_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, *pCallableShaderBindingTable, }; - if (!radv_rt_bind_tables(cmd_buffer, tables)) { + if (!radv_rt_set_args(cmd_buffer, tables, 0, &info)) return; - } - - struct radv_userdata_info *loc = radv_lookup_user_sgpr( - cmd_buffer->state.rt_pipeline, MESA_SHADER_COMPUTE, AC_UD_CS_RAY_LAUNCH_SIZE); - - if (loc->sgpr_idx != -1) { - assert(loc->num_sgprs == 3); - - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3); - radeon_emit(cmd_buffer->cs, width); - radeon_emit(cmd_buffer->cs, height); - radeon_emit(cmd_buffer->cs, depth); - } radv_rt_dispatch(cmd_buffer, &info); } diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 02d27ef8942..048f7b94d0e 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -490,6 +490,24 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret); break; } + case nir_intrinsic_load_ray_launch_size: { + b_shader.cursor = nir_instr_remove(instr); + nir_ssa_def *launch_size_addr = + nir_load_ray_launch_size_addr_amd(&b_shader); + + nir_ssa_def * xy = nir_build_load_smem_amd( + &b_shader, 2, launch_size_addr, nir_imm_int(&b_shader, 0)); + nir_ssa_def * z = nir_build_load_smem_amd( + &b_shader, 1, launch_size_addr, nir_imm_int(&b_shader, 8)); + + nir_ssa_def *xyz[3] = { + nir_channel(&b_shader, xy, 0), + nir_channel(&b_shader, xy, 1), + z, + }; + nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(&b_shader, xyz, 3)); + break; + } case nir_intrinsic_load_ray_t_min: { b_shader.cursor = nir_instr_remove(instr); nir_ssa_def *ret = nir_load_var(&b_shader, vars->tmin); diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 373e3410e78..80364b35a5b 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -155,7 +155,7 @@ enum radv_ud_index { AC_UD_PS_MAX_UD, AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START, AC_UD_CS_SBT_DESCRIPTORS, - AC_UD_CS_RAY_LAUNCH_SIZE, + AC_UD_CS_RAY_LAUNCH_SIZE_ADDR, AC_UD_CS_TASK_RING_OFFSETS, AC_UD_CS_TASK_DRAW_ID, AC_UD_CS_TASK_IB, diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 0326ec45509..2a91ab71f25 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -175,7 +175,7 @@ allocate_user_sgprs(enum chip_class chip_class, const struct radv_shader_info *i if (info->cs.uses_grid_size) user_sgpr_count += args->load_grid_size_from_user_sgpr ? 3 : 2; if (info->cs.uses_ray_launch_size) - user_sgpr_count += 3; + user_sgpr_count++; if (info->vs.needs_draw_id) user_sgpr_count += 1; if (info->cs.uses_task_rings) @@ -570,7 +570,7 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_ } if (info->cs.uses_ray_launch_size) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.ray_launch_size); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.ray_launch_size_addr); } if (info->vs.needs_draw_id) { @@ -808,8 +808,8 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_ set_loc_shader(args, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, args->load_grid_size_from_user_sgpr ? 3 : 2); } - if (args->ac.ray_launch_size.used) { - set_loc_shader(args, AC_UD_CS_RAY_LAUNCH_SIZE, &user_sgpr_idx, 3); + if (args->ac.ray_launch_size_addr.used) { + set_loc_shader_ptr(args, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR, &user_sgpr_idx); } if (args->ac.draw_id.used) { set_loc_shader(args, AC_UD_CS_TASK_DRAW_ID, &user_sgpr_idx, 1); diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index adc30456f3a..0331230eea6 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -593,7 +593,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n case MESA_SHADER_TASK: for (int i = 0; i < 3; ++i) info->cs.block_size[i] = nir->info.workgroup_size[i]; - info->cs.uses_ray_launch_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_RAY_LAUNCH_SIZE); + info->cs.uses_ray_launch_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_RAY_LAUNCH_SIZE_ADDR_AMD); /* Task shaders always need these for the I/O lowering even if * the API shader doesn't actually use them.