diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 0df59d8e87c..8f9f771dee8 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2880,7 +2880,6 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shad uint64_t prolog_va = radv_buffer_get_va(prolog->bo) + prolog->alloc->offset; assert(cmd_buffer->state.emitted_pipeline == cmd_buffer->state.pipeline); - assert(vs_shader->info.num_input_sgprs <= prolog->num_preserved_sgprs); uint32_t rsrc1 = vs_shader->config.rsrc1; if (chip < GFX10 && G_00B228_SGPRS(prolog->rsrc1) > G_00B228_SGPRS(vs_shader->config.rsrc1)) diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c index ab2e1540b5a..dfba72b8d47 100644 --- a/src/amd/vulkan/radv_pipeline_cache.c +++ b/src/amd/vulkan/radv_pipeline_cache.c @@ -354,7 +354,7 @@ radv_create_shaders_from_pipeline_cache( memcpy(binary, p, entry->binary_sizes[i]); p += entry->binary_sizes[i]; - entry->shaders[i] = radv_shader_create(device, binary, false, true); + entry->shaders[i] = radv_shader_create(device, binary, false, true, NULL); free(binary); } else if (entry->binary_sizes[i]) { p += entry->binary_sizes[i]; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index e55dd7693c3..9626c310591 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1325,21 +1325,22 @@ radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage static void radv_postprocess_config(const struct radv_device *device, const struct ac_shader_config *config_in, const struct radv_shader_info *info, gl_shader_stage stage, + const struct radv_shader_args *args, struct ac_shader_config *config_out) { const struct radv_physical_device *pdevice = device->physical_device; bool scratch_enabled = config_in->scratch_bytes_per_wave > 0; bool trap_enabled = !!device->trap_handler_shader; unsigned vgpr_comp_cnt = 0; - unsigned num_input_vgprs = info->num_input_vgprs; + unsigned num_input_vgprs = args->ac.num_vgprs_used; if (stage == MESA_SHADER_FRAGMENT) { num_input_vgprs = ac_get_fs_input_vgpr_cnt(config_in, NULL, NULL); } unsigned num_vgprs = MAX2(config_in->num_vgprs, num_input_vgprs); - /* +3 for scratch wave offset and VCC */ - unsigned num_sgprs = MAX2(config_in->num_sgprs, info->num_input_sgprs + 3); + /* +2 for the ring offsets, +3 for scratch wave offset and VCC */ + unsigned num_sgprs = MAX2(config_in->num_sgprs, args->ac.num_sgprs_used + 2 + 3); unsigned num_shared_vgprs = config_in->num_shared_vgprs; /* shared VGPRs are introduced in Navi and are allocated in blocks of 8 (RDNA ref 3.6.5) */ assert((pdevice->rad_info.chip_class >= GFX10 && num_shared_vgprs % 8 == 0) || @@ -1352,7 +1353,7 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader config_out->num_sgprs = num_sgprs; config_out->num_shared_vgprs = num_shared_vgprs; - config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) | + config_out->rsrc2 = S_00B12C_USER_SGPR(args->num_user_sgprs) | S_00B12C_SCRATCH_EN(scratch_enabled) | S_00B12C_TRAP_PRESENT(trap_enabled); if (trap_enabled) { @@ -1373,10 +1374,10 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader S_00B848_DX10_CLAMP(1) | S_00B848_FLOAT_MODE(config_out->float_mode); if (pdevice->rad_info.chip_class >= GFX10) { - config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(info->num_user_sgprs >> 5); + config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(args->num_user_sgprs >> 5); } else { config_out->rsrc1 |= S_00B228_SGPRS((num_sgprs - 1) / 8); - config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX9(info->num_user_sgprs >> 5); + config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX9(args->num_user_sgprs >> 5); } bool wgp_mode = radv_should_use_wgp_mode(device, stage, info); @@ -1568,7 +1569,7 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader struct radv_shader * radv_shader_create(struct radv_device *device, const struct radv_shader_binary *binary, - bool keep_shader_info, bool from_cache) + bool keep_shader_info, bool from_cache, const struct radv_shader_args *args) { struct ac_shader_config config = {0}; struct ac_rtld_binary rtld_binary = {0}; @@ -1647,7 +1648,8 @@ radv_shader_create(struct radv_device *device, const struct radv_shader_binary * /* Copy the shader binary configuration from the cache. */ memcpy(&shader->config, &binary->config, sizeof(shader->config)); } else { - radv_postprocess_config(device, &config, &binary->info, binary->stage, &shader->config); + assert(args); + radv_postprocess_config(device, &config, &binary->info, binary->stage, args, &shader->config); } void *dest_ptr = radv_alloc_shader_memory(device, shader); @@ -1810,7 +1812,7 @@ shader_compile(struct radv_device *device, struct vk_shader_module *module, binary->info = *info; - struct radv_shader *shader = radv_shader_create(device, binary, keep_shader_info, false); + struct radv_shader *shader = radv_shader_create(device, binary, keep_shader_info, false, &args); if (!shader) { free(binary); return NULL; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index c196c67bd7e..fa1eba5be2d 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -237,9 +237,6 @@ struct radv_shader_info { uint8_t wave_size; uint8_t ballot_bit_size; struct radv_userdata_locations user_sgprs_locs; - unsigned num_user_sgprs; - unsigned num_input_sgprs; - unsigned num_input_vgprs; bool is_ngg; bool is_ngg_passthrough; bool has_ngg_culling; @@ -505,9 +502,12 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline, VkPipelineCreationFeedbackEXT *pipeline_feedback, VkPipelineCreationFeedbackEXT **stage_feedbacks); +struct radv_shader_args; + struct radv_shader *radv_shader_create(struct radv_device *device, const struct radv_shader_binary *binary, - bool keep_shader_info, bool from_cache); + bool keep_shader_info, bool from_cache, + const struct radv_shader_args *args); struct radv_shader *radv_shader_compile( struct radv_device *device, struct vk_shader_module *module, struct nir_shader *const *shaders, int shader_count, struct radv_pipeline_layout *layout, const struct radv_pipeline_key *key, diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 18efc9f11de..219334c1aa8 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -743,11 +743,6 @@ radv_declare_shader_args(const struct radv_nir_compiler_options *options, unreachable("Shader stage not implemented"); } - info->num_input_vgprs = 0; - info->num_input_sgprs = 2; - info->num_input_sgprs += args->ac.num_sgprs_used; - info->num_input_vgprs = args->ac.num_vgprs_used; - uint8_t user_sgpr_idx = 0; set_loc_shader_ptr(info, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx); @@ -811,5 +806,5 @@ radv_declare_shader_args(const struct radv_nir_compiler_options *options, unreachable("Shader stage not implemented"); } - info->num_user_sgprs = user_sgpr_idx; + args->num_user_sgprs = user_sgpr_idx; } diff --git a/src/amd/vulkan/radv_shader_args.h b/src/amd/vulkan/radv_shader_args.h index 6aa98a61d8a..d1aa47430d0 100644 --- a/src/amd/vulkan/radv_shader_args.h +++ b/src/amd/vulkan/radv_shader_args.h @@ -46,6 +46,8 @@ struct radv_shader_args { struct ac_arg prolog_inputs; struct ac_arg vs_inputs[MAX_VERTEX_ATTRIBS]; + unsigned num_user_sgprs; + bool is_gs_copy_shader; bool is_trap_handler_shader; };