ac: fix min/max_good_num_cu_per_sa on gfx10.3 with disabled SEs

Fixes: 9538b9a68e - radeonsi: add support for Sienna Cichlid

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7542>
This commit is contained in:
Marek Olšák
2020-11-10 21:30:52 -05:00
committed by Marge Bot
parent b635dff256
commit f2977a162a
3 changed files with 15 additions and 4 deletions

View File

@@ -542,6 +542,14 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
if (info->family == CHIP_KAVERI) if (info->family == CHIP_KAVERI)
info->num_render_backends = 2; info->num_render_backends = 2;
/* Guess the number of enabled SEs because the kernel doesn't tell us. */
if (info->chip_class >= GFX10_3 && info->max_se > 1) {
unsigned num_rbs_per_se = info->num_render_backends / info->max_se;
info->num_se = util_bitcount(amdinfo->enabled_rb_pipes_mask) / num_rbs_per_se;
} else {
info->num_se = info->max_se;
}
info->clock_crystal_freq = amdinfo->gpu_counter_freq; info->clock_crystal_freq = amdinfo->gpu_counter_freq;
if (!info->clock_crystal_freq) { if (!info->clock_crystal_freq) {
fprintf(stderr, "amdgpu: clock crystal frequency is 0, timestamps will be wrong\n"); fprintf(stderr, "amdgpu: clock crystal frequency is 0, timestamps will be wrong\n");
@@ -672,10 +680,10 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
*/ */
unsigned cu_group = info->chip_class >= GFX10 ? 2 : 1; unsigned cu_group = info->chip_class >= GFX10 ? 2 : 1;
info->max_good_cu_per_sa = info->max_good_cu_per_sa =
DIV_ROUND_UP(info->num_good_compute_units, (info->max_se * info->max_sh_per_se * cu_group)) * DIV_ROUND_UP(info->num_good_compute_units, (info->num_se * info->max_sh_per_se * cu_group)) *
cu_group; cu_group;
info->min_good_cu_per_sa = info->min_good_cu_per_sa =
(info->num_good_compute_units / (info->max_se * info->max_sh_per_se * cu_group)) * cu_group; (info->num_good_compute_units / (info->num_se * info->max_sh_per_se * cu_group)) * cu_group;
memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode, sizeof(amdinfo->gb_tile_mode)); memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode, sizeof(amdinfo->gb_tile_mode));
info->enabled_rb_mask = amdinfo->enabled_rb_pipes_mask; info->enabled_rb_mask = amdinfo->enabled_rb_pipes_mask;
@@ -964,6 +972,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
fprintf(f, " max_good_cu_per_sa = %i\n", info->max_good_cu_per_sa); fprintf(f, " max_good_cu_per_sa = %i\n", info->max_good_cu_per_sa);
fprintf(f, " min_good_cu_per_sa = %i\n", info->min_good_cu_per_sa); fprintf(f, " min_good_cu_per_sa = %i\n", info->min_good_cu_per_sa);
fprintf(f, " max_se = %i\n", info->max_se); fprintf(f, " max_se = %i\n", info->max_se);
fprintf(f, " num_se = %i\n", info->num_se);
fprintf(f, " max_sh_per_se = %i\n", info->max_sh_per_se); fprintf(f, " max_sh_per_se = %i\n", info->max_sh_per_se);
fprintf(f, " max_wave64_per_simd = %i\n", info->max_wave64_per_simd); fprintf(f, " max_wave64_per_simd = %i\n", info->max_wave64_per_simd);
fprintf(f, " num_physical_sgprs_per_simd = %i\n", info->num_physical_sgprs_per_simd); fprintf(f, " num_physical_sgprs_per_simd = %i\n", info->num_physical_sgprs_per_simd);
@@ -1257,7 +1266,7 @@ unsigned ac_get_compute_resource_limits(struct radeon_info *info, unsigned waves
unsigned compute_resource_limits = S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0); unsigned compute_resource_limits = S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
if (info->chip_class >= GFX7) { if (info->chip_class >= GFX7) {
unsigned num_cu_per_se = info->num_good_compute_units / info->max_se; unsigned num_cu_per_se = info->num_good_compute_units / info->num_se;
/* Force even distribution on all SIMDs in CU if the workgroup /* Force even distribution on all SIMDs in CU if the workgroup
* size is 64. This has shown some good improvements if # of CUs * size is 64. This has shown some good improvements if # of CUs

View File

@@ -166,7 +166,8 @@ struct radeon_info {
uint32_t num_good_compute_units; uint32_t num_good_compute_units;
uint32_t max_good_cu_per_sa; uint32_t max_good_cu_per_sa;
uint32_t min_good_cu_per_sa; /* min != max if SAs have different # of CUs */ uint32_t min_good_cu_per_sa; /* min != max if SAs have different # of CUs */
uint32_t max_se; /* shader engines */ uint32_t max_se; /* number of shader engines incl. disabled ones */
uint32_t num_se; /* number of enabled shader engines */
uint32_t max_sh_per_se; /* shader arrays per shader engine */ uint32_t max_sh_per_se; /* shader arrays per shader engine */
uint32_t max_wave64_per_simd; uint32_t max_wave64_per_simd;
uint32_t num_physical_sgprs_per_simd; uint32_t num_physical_sgprs_per_simd;

View File

@@ -107,6 +107,7 @@ static void radv_null_winsys_query_info(struct radeon_winsys *rws,
info->pci_id = gpu_info[info->family].pci_id; info->pci_id = gpu_info[info->family].pci_id;
info->has_syncobj_wait_for_submit = true; info->has_syncobj_wait_for_submit = true;
info->max_se = 4; info->max_se = 4;
info->num_se = 4;
if (info->chip_class >= GFX10_3) if (info->chip_class >= GFX10_3)
info->max_wave64_per_simd = 16; info->max_wave64_per_simd = 16;
else if (info->chip_class >= GFX10) else if (info->chip_class >= GFX10)