ir3: Move fixup_regfootprint() to ir3_collect_info()

This fixes the case where fixup_regfootprint() adds to the reg footprint but it isn't accounted for when determining whether we should double threadsize in ir3_collect_info(). This would produce a hang on a650 and above where we have a reg footprint of 33 and doubled threadsize. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18840>
2022-09-26 18:27:53 +02:00
parent 7d1b8c8ab2
commit c58d633dd2
2 changed files with 53 additions and 61 deletions
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -388,6 +388,59 @@ ir3_collect_info(struct ir3_shader_variant *v)
      }
   }

+   /* for vertex shader, the inputs are loaded into registers before the shader
+    * is executed, so max_regs from the shader instructions might not properly
+    * reflect the # of registers actually used, especially in case passthrough
+    * varyings.
+    *
+    * Likewise, for fragment shader, we can have some regs which are passed
+    * input values but never touched by the resulting shader (ie. as result
+    * of dead code elimination or simply because we don't know how to turn
+    * the reg off.
+    */
+   for (unsigned i = 0; i < v->inputs_count; i++) {
+      /* skip frag inputs fetch via bary.f since their reg's are
+       * not written by gpu before shader starts (and in fact the
+       * regid's might not even be valid)
+       */
+      if (v->inputs[i].bary)
+         continue;
+
+      /* ignore high regs that are global to all threads in a warp
+       * (they exist by default) (a5xx+)
+       */
+      if (v->inputs[i].regid >= regid(48, 0))
+         continue;
+
+      if (v->inputs[i].compmask) {
+         unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
+         int32_t regid = v->inputs[i].regid + n;
+         if (v->inputs[i].half) {
+            if (!v->mergedregs) {
+               v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
+            } else {
+               v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
+            }
+         } else {
+            v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
+         }
+      }
+   }
+
+   for (unsigned i = 0; i < v->num_sampler_prefetch; i++) {
+      unsigned n = util_last_bit(v->sampler_prefetch[i].wrmask) - 1;
+      int32_t regid = v->sampler_prefetch[i].dst + n;
+      if (v->sampler_prefetch[i].half_precision) {
+         if (!v->mergedregs) {
+            v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
+         } else {
+            v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
+         }
+      } else {
+         v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
+      }
+   }
+
   /* TODO: for a5xx and below, is there a separate regfile for
    * half-registers?
    */
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -48,65 +48,6 @@ ir3_glsl_type_size(const struct glsl_type *type, bool bindless)
   return glsl_count_attribute_slots(type, false);
 }

-/* for vertex shader, the inputs are loaded into registers before the shader
- * is executed, so max_regs from the shader instructions might not properly
- * reflect the # of registers actually used, especially in case passthrough
- * varyings.
- *
- * Likewise, for fragment shader, we can have some regs which are passed
- * input values but never touched by the resulting shader (ie. as result
- * of dead code elimination or simply because we don't know how to turn
- * the reg off.
- */
-static void
-fixup_regfootprint(struct ir3_shader_variant *v)
-{
-   unsigned i;
-
-   for (i = 0; i < v->inputs_count; i++) {
-      /* skip frag inputs fetch via bary.f since their reg's are
-       * not written by gpu before shader starts (and in fact the
-       * regid's might not even be valid)
-       */
-      if (v->inputs[i].bary)
-         continue;
-
-      /* ignore high regs that are global to all threads in a warp
-       * (they exist by default) (a5xx+)
-       */
-      if (v->inputs[i].regid >= regid(48, 0))
-         continue;
-
-      if (v->inputs[i].compmask) {
-         unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
-         int32_t regid = v->inputs[i].regid + n;
-         if (v->inputs[i].half) {
-            if (!v->mergedregs) {
-               v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
-            } else {
-               v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
-            }
-         } else {
-            v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
-         }
-      }
-   }
-
-   for (i = 0; i < v->num_sampler_prefetch; i++) {
-      unsigned n = util_last_bit(v->sampler_prefetch[i].wrmask) - 1;
-      int32_t regid = v->sampler_prefetch[i].dst + n;
-      if (v->sampler_prefetch[i].half_precision) {
-         if (!v->mergedregs) {
-            v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
-         } else {
-            v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
-         }
-      } else {
-         v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
-      }
-   }
-}
-
 /* wrapper for ir3_assemble() which does some info fixup based on
 * shader state.  Non-static since used by ir3_cmdline too.
 */
@@ -170,8 +111,6 @@ ir3_shader_assemble(struct ir3_shader_variant *v)
                        ((v->type == MESA_SHADER_COMPUTE) ||
                         (v->type == MESA_SHADER_KERNEL));

-   fixup_regfootprint(v);
-
   return bin;
 }