ac: add radeon_info::has_scratch_base_registers

Fixes: 3b0bfd254f - radeonsi/gfx11: make flat_scratch changes for compute Reviewed-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30071>
2024-07-12 17:45:59 -04:00
parent bc4382348d
commit a5b4ae67ae
5 changed files with 8 additions and 7 deletions
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -1597,6 +1597,8 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
   const unsigned max_waves_per_tg = 32; /* 1024 threads in Wave32 */
   info->max_scratch_waves = MAX2(32 * info->min_good_cu_per_sa * info->max_sa_per_se * info->num_se,
                                  max_waves_per_tg);
+   info->has_scratch_base_registers = info->gfx_level >= GFX11 ||
+                                      (!info->has_graphics && info->family >= CHIP_GFX940);
   info->max_gflops = (info->gfx_level >= GFX11 ? 256 : 128) * info->num_cu * info->max_gpu_freq_mhz / 1000;
   info->memory_bandwidth_gbps = DIV_ROUND_UP(info->memory_freq_mhz_effective * info->memory_bus_width / 8, 1000);
   info->has_pcie_bandwidth_info = info->drm_minor >= 51;
@@ -2035,6 +2037,7 @@ void ac_print_gpu_info(const struct radeon_info *info, FILE *f)
   fprintf(f, "    max_vgpr_alloc = %i\n", info->max_vgpr_alloc);
   fprintf(f, "    wave64_vgpr_alloc_granularity = %i\n", info->wave64_vgpr_alloc_granularity);
   fprintf(f, "    max_scratch_waves = %i\n", info->max_scratch_waves);
+   fprintf(f, "    has_scratch_base_registers = %i\n", info->has_scratch_base_registers);
   fprintf(f, "Ring info:\n");
   fprintf(f, "    attribute_ring_size_per_se = %u KB\n",
           DIV_ROUND_UP(info->attribute_ring_size_per_se, 1024));
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -270,6 +270,7 @@ struct radeon_info {
   uint32_t max_vgpr_alloc;
   uint32_t wave64_vgpr_alloc_granularity;
   uint32_t max_scratch_waves;
+   bool has_scratch_base_registers;

   /* Pos, prim, and attribute rings. */
   uint32_t attribute_ring_size_per_se;   /* GFX11+ */
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -406,7 +406,7 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_s
   }

   /* Set the scratch address in the shader binary. */
-   if (sctx->gfx_level < GFX11 && (sctx->family < CHIP_GFX940 || sctx->screen->info.has_graphics)) {
+   if (!sctx->screen->info.has_scratch_base_registers) {
      uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address;

      if (shader->scratch_va != scratch_va) {
@@ -552,9 +552,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
      radeon_opt_set_sh_reg(R_00B860_COMPUTE_TMPRING_SIZE,
                            SI_TRACKED_COMPUTE_TMPRING_SIZE, sctx->compute_tmpring_size);

-      if (config->scratch_bytes_per_wave &&
-          (sctx->gfx_level >= GFX11 ||
-           (sctx->family >= CHIP_GFX940 && !sctx->screen->info.has_graphics))) {
+      if (config->scratch_bytes_per_wave && sctx->screen->info.has_scratch_base_registers) {
         radeon_opt_set_sh_reg2(R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO,
                                SI_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_LO,
                                sctx->compute_scratch_buffer->gpu_address >> 8,
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3014,8 +3014,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
   }

   /* Add/remove the scratch offset to/from input SGPRs. */
-   if (sel->screen->info.gfx_level < GFX11 &&
-       (sel->screen->info.family < CHIP_GFX940 || sel->screen->info.has_graphics) &&
+   if (!sel->screen->info.has_scratch_base_registers &&
       !si_is_merged_shader(shader)) {
      if (sel->info.base.use_aco_amd) {
         /* When aco scratch_offset arg is added explicitly at the beginning.
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -4379,7 +4379,7 @@ bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes)
            return false;
      }

-      if (sctx->gfx_level < GFX11 && !si_update_scratch_relocs(sctx))
+      if (!sctx->screen->info.has_scratch_base_registers && !si_update_scratch_relocs(sctx))
         return false;
   }