radv: configure the number of SGPRs/VGPRs directly from the arguments

Instead of copying the values to radv_shader_info.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13542>
This commit is contained in:
Samuel Pitoiset
2021-10-27 10:20:24 +02:00
committed by Marge Bot
parent 990a8ee5eb
commit 3bbc226d7a
6 changed files with 19 additions and 21 deletions

View File

@@ -2880,7 +2880,6 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shad
uint64_t prolog_va = radv_buffer_get_va(prolog->bo) + prolog->alloc->offset;
assert(cmd_buffer->state.emitted_pipeline == cmd_buffer->state.pipeline);
assert(vs_shader->info.num_input_sgprs <= prolog->num_preserved_sgprs);
uint32_t rsrc1 = vs_shader->config.rsrc1;
if (chip < GFX10 && G_00B228_SGPRS(prolog->rsrc1) > G_00B228_SGPRS(vs_shader->config.rsrc1))

View File

@@ -354,7 +354,7 @@ radv_create_shaders_from_pipeline_cache(
memcpy(binary, p, entry->binary_sizes[i]);
p += entry->binary_sizes[i];
entry->shaders[i] = radv_shader_create(device, binary, false, true);
entry->shaders[i] = radv_shader_create(device, binary, false, true, NULL);
free(binary);
} else if (entry->binary_sizes[i]) {
p += entry->binary_sizes[i];

View File

@@ -1325,21 +1325,22 @@ radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage
static void
radv_postprocess_config(const struct radv_device *device, const struct ac_shader_config *config_in,
const struct radv_shader_info *info, gl_shader_stage stage,
const struct radv_shader_args *args,
struct ac_shader_config *config_out)
{
const struct radv_physical_device *pdevice = device->physical_device;
bool scratch_enabled = config_in->scratch_bytes_per_wave > 0;
bool trap_enabled = !!device->trap_handler_shader;
unsigned vgpr_comp_cnt = 0;
unsigned num_input_vgprs = info->num_input_vgprs;
unsigned num_input_vgprs = args->ac.num_vgprs_used;
if (stage == MESA_SHADER_FRAGMENT) {
num_input_vgprs = ac_get_fs_input_vgpr_cnt(config_in, NULL, NULL);
}
unsigned num_vgprs = MAX2(config_in->num_vgprs, num_input_vgprs);
/* +3 for scratch wave offset and VCC */
unsigned num_sgprs = MAX2(config_in->num_sgprs, info->num_input_sgprs + 3);
/* +2 for the ring offsets, +3 for scratch wave offset and VCC */
unsigned num_sgprs = MAX2(config_in->num_sgprs, args->ac.num_sgprs_used + 2 + 3);
unsigned num_shared_vgprs = config_in->num_shared_vgprs;
/* shared VGPRs are introduced in Navi and are allocated in blocks of 8 (RDNA ref 3.6.5) */
assert((pdevice->rad_info.chip_class >= GFX10 && num_shared_vgprs % 8 == 0) ||
@@ -1352,7 +1353,7 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader
config_out->num_sgprs = num_sgprs;
config_out->num_shared_vgprs = num_shared_vgprs;
config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
config_out->rsrc2 = S_00B12C_USER_SGPR(args->num_user_sgprs) |
S_00B12C_SCRATCH_EN(scratch_enabled) | S_00B12C_TRAP_PRESENT(trap_enabled);
if (trap_enabled) {
@@ -1373,10 +1374,10 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader
S_00B848_DX10_CLAMP(1) | S_00B848_FLOAT_MODE(config_out->float_mode);
if (pdevice->rad_info.chip_class >= GFX10) {
config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(info->num_user_sgprs >> 5);
config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(args->num_user_sgprs >> 5);
} else {
config_out->rsrc1 |= S_00B228_SGPRS((num_sgprs - 1) / 8);
config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX9(info->num_user_sgprs >> 5);
config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX9(args->num_user_sgprs >> 5);
}
bool wgp_mode = radv_should_use_wgp_mode(device, stage, info);
@@ -1568,7 +1569,7 @@ radv_postprocess_config(const struct radv_device *device, const struct ac_shader
struct radv_shader *
radv_shader_create(struct radv_device *device, const struct radv_shader_binary *binary,
bool keep_shader_info, bool from_cache)
bool keep_shader_info, bool from_cache, const struct radv_shader_args *args)
{
struct ac_shader_config config = {0};
struct ac_rtld_binary rtld_binary = {0};
@@ -1647,7 +1648,8 @@ radv_shader_create(struct radv_device *device, const struct radv_shader_binary *
/* Copy the shader binary configuration from the cache. */
memcpy(&shader->config, &binary->config, sizeof(shader->config));
} else {
radv_postprocess_config(device, &config, &binary->info, binary->stage, &shader->config);
assert(args);
radv_postprocess_config(device, &config, &binary->info, binary->stage, args, &shader->config);
}
void *dest_ptr = radv_alloc_shader_memory(device, shader);
@@ -1810,7 +1812,7 @@ shader_compile(struct radv_device *device, struct vk_shader_module *module,
binary->info = *info;
struct radv_shader *shader = radv_shader_create(device, binary, keep_shader_info, false);
struct radv_shader *shader = radv_shader_create(device, binary, keep_shader_info, false, &args);
if (!shader) {
free(binary);
return NULL;

View File

@@ -237,9 +237,6 @@ struct radv_shader_info {
uint8_t wave_size;
uint8_t ballot_bit_size;
struct radv_userdata_locations user_sgprs_locs;
unsigned num_user_sgprs;
unsigned num_input_sgprs;
unsigned num_input_vgprs;
bool is_ngg;
bool is_ngg_passthrough;
bool has_ngg_culling;
@@ -505,9 +502,12 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline,
VkPipelineCreationFeedbackEXT *pipeline_feedback,
VkPipelineCreationFeedbackEXT **stage_feedbacks);
struct radv_shader_args;
struct radv_shader *radv_shader_create(struct radv_device *device,
const struct radv_shader_binary *binary,
bool keep_shader_info, bool from_cache);
bool keep_shader_info, bool from_cache,
const struct radv_shader_args *args);
struct radv_shader *radv_shader_compile(
struct radv_device *device, struct vk_shader_module *module, struct nir_shader *const *shaders,
int shader_count, struct radv_pipeline_layout *layout, const struct radv_pipeline_key *key,

View File

@@ -743,11 +743,6 @@ radv_declare_shader_args(const struct radv_nir_compiler_options *options,
unreachable("Shader stage not implemented");
}
info->num_input_vgprs = 0;
info->num_input_sgprs = 2;
info->num_input_sgprs += args->ac.num_sgprs_used;
info->num_input_vgprs = args->ac.num_vgprs_used;
uint8_t user_sgpr_idx = 0;
set_loc_shader_ptr(info, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx);
@@ -811,5 +806,5 @@ radv_declare_shader_args(const struct radv_nir_compiler_options *options,
unreachable("Shader stage not implemented");
}
info->num_user_sgprs = user_sgpr_idx;
args->num_user_sgprs = user_sgpr_idx;
}

View File

@@ -46,6 +46,8 @@ struct radv_shader_args {
struct ac_arg prolog_inputs;
struct ac_arg vs_inputs[MAX_VERTEX_ATTRIBS];
unsigned num_user_sgprs;
bool is_gs_copy_shader;
bool is_trap_handler_shader;
};