ir3: Take LB restriction on constlen into account on a7xx

On a7xx, the max constlen for compute is increased to 512 vec4s or 8KB, however the size of the LB was not increased beyond 40KB. A quick calculation shows that 8KB of consts multiplied by 2 banks plus the API maximum of 32KB shared memory would exceed 40KB. This means that we can't always use a constlen of 512, and sometimes have to fall back to 256 when a lot of shared memory is in use. In the future, we can use similar calculations to figure out how much "extra" shared memory is available for the backend to spill to, but we currently don't support spilling to shared memory. Fixes: 5879eaac18 ("ir3: Increase compute const size on a7xx") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34746> (cherry picked from commit ea9d694a7b363d66dd9e57bc0f55c5fd903632b2)
2025-04-28 19:34:05 -04:00
parent 385a56642b
commit 746f2986ec
2 changed files with 37 additions and 2 deletions
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -54,7 +54,7 @@
        "description": "ir3: Take LB restriction on constlen into account on a7xx",
        "nominated": true,
        "nomination_type": 2,
-        "resolution": 0,
+        "resolution": 1,
        "main_sha": null,
        "because_sha": "5879eaac185ed1c167fd01aff9b91c7cbe43ab0a",
        "notes": null
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -1051,6 +1051,41 @@ ir3_const_state_mut(const struct ir3_shader_variant *v)
   return v->const_state;
 }

+static inline unsigned
+ir3_max_const_compute(const struct ir3_shader_variant *v,
+                      const struct ir3_compiler *compiler)
+{
+   unsigned lm_size = v->local_size_variable ? compiler->local_mem_size :
+      v->cs.req_local_mem;
+
+   /* The LB is divided between consts and local memory. LB is split into
+    * wave_granularity banks, to make it possible for different ALUs to access
+    * it at the same time, and consts are duplicated into each bank so that they
+    * always take constant time to access while LM is spread across the banks.
+    *
+    * We cannot arbitrarily divide LB. Instead only certain configurations, as
+    * defined by the CONSTANTRAMMODE register field, are allowed. Not sticking
+    * with the right configuration can result in hangs when multiple compute
+    * shaders are in flight. We have to limit the constlen so that we can pick a
+    * configuration where there is enough space for LM.
+    */
+   unsigned lb_const_size =
+      ((compiler->compute_lb_size - lm_size) / compiler->wave_granularity) /
+      16 /* bytes per vec4 */;
+   if (lb_const_size < compiler->max_const_compute) {
+      const uint32_t lb_const_sizes[] = { 128, 192, 256, 512 };
+
+      assert(lb_const_size >= lb_const_sizes[0]);
+      for (unsigned i = 0; i < ARRAY_SIZE(lb_const_sizes) - 1; i++) {
+         if (lb_const_size < lb_const_sizes[i + 1])
+            return lb_const_sizes[i];
+      }
+      return lb_const_sizes[ARRAY_SIZE(lb_const_sizes) - 1];
+   } else {
+      return compiler->max_const_compute;
+   }
+}
+
 static inline unsigned
 _ir3_max_const(const struct ir3_shader_variant *v, bool safe_constlen)
 {
@@ -1078,7 +1113,7 @@ _ir3_max_const(const struct ir3_shader_variant *v, bool safe_constlen)

   if ((v->type == MESA_SHADER_COMPUTE) ||
       (v->type == MESA_SHADER_KERNEL)) {
-      return compiler->max_const_compute - shared_consts_size;
+      return ir3_max_const_compute(v, compiler) - shared_consts_size;
   } else if (safe_constlen) {
      return compiler->max_const_safe - safe_shared_consts_size;
   } else if (v->type == MESA_SHADER_FRAGMENT) {