nir/algebraic: Move all the individual transforms to a common table.

Cuts 28% of the remaining relocations in libvulkan_intel.so, shrinks binary size by 290kb. Reviewed-by: Adam Jackson <ajax@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13987>
2021-12-01 15:45:54 -08:00
parent a29b54f014
commit de33205f88
3 changed files with 33 additions and 34 deletions
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@ -935,8 +935,10 @@ class TreeAutomaton(object):
      # Bijection from state to index. q in the original algorithm is
      # len(self.states)
      self.states = self.IndexMap()
-      # List of pattern matches for each state index.
-      self.state_patterns = []
+      # Lists of pattern matches separated by None
+      self.state_patterns = [None]
+      # Offset in the ->transforms table for each state index
+      self.state_pattern_offsets = []
      # Map from state index to filtered state index for each opcode.
      self.filter = defaultdict(list)
      # Bijections from filtered state to filtered state index for each
@@ -966,15 +968,21 @@ class TreeAutomaton(object):
      def process_new_states():
         while self.worklist_index < len(self.states):
            state = self.states[self.worklist_index]
-
            # Calculate pattern matches for this state. Each pattern is
            # assigned to a unique item, so we don't have to worry about
            # deduplicating them here. However, we do have to sort them so
            # that they're visited at runtime in the order they're specified
            # in the source.
            patterns = list(sorted(p for item in state for p in item.patterns))
-            assert len(self.state_patterns) == self.worklist_index
-            self.state_patterns.append(patterns)
+
+            if patterns:
+                # Add our patterns to the global table.
+                self.state_pattern_offsets.append(len(self.state_patterns))
+                self.state_patterns.extend(patterns)
+                self.state_patterns.append(None)
+            else:
+                # Point to the initial sentinel in the global table.
+                self.state_pattern_offsets.append(0)

            # calculate filter table for this state, and update filtered
            # worklists.
@@ -1072,15 +1080,16 @@ static const nir_search_variable_cond ${pass_name}_variable_cond[] = {
 };
 % endif

-% for state_id, state_xforms in enumerate(automaton.state_patterns):
-% if state_xforms: # avoid emitting a 0-length array for MSVC
-static const struct transform ${pass_name}_state${state_id}_xforms[] = {
-% for i in state_xforms:
-  { ${xforms[i].search.array_index}, ${xforms[i].replace.array_index}, ${xforms[i].condition_index} },
-% endfor
-};
+static const struct transform ${pass_name}_transforms[] = {
+% for i in automaton.state_patterns:
+% if i is not None:
+   { ${xforms[i].search.array_index}, ${xforms[i].replace.array_index}, ${xforms[i].condition_index} },
+% else:
+   { ~0, ~0, ~0 }, /* Sentinel */
+
 % endif
 % endfor
+};

 static const struct per_op_table ${pass_name}_pass_op_table[nir_num_search_ops] = {
 % for op in automaton.opcodes:
@@ -1110,29 +1119,16 @@ static const struct per_op_table ${pass_name}_pass_op_table[nir_num_search_ops]
 % endfor
 };

-const struct transform *${pass_name}_transforms[] = {
-% for i in range(len(automaton.state_patterns)):
-   % if automaton.state_patterns[i]:
-   ${pass_name}_state${i}_xforms,
-   % else:
-   NULL,
-   % endif
-% endfor
-};
-
-const uint16_t ${pass_name}_transform_counts[] = {
-% for i in range(len(automaton.state_patterns)):
-   % if automaton.state_patterns[i]:
-   (uint16_t)ARRAY_SIZE(${pass_name}_state${i}_xforms),
-   % else:
-   0,
-   % endif
+/* Mapping from state index to offset in transforms (0 being no transforms) */
+static const uint16_t ${pass_name}_transform_offsets[] = {
+% for offset in automaton.state_pattern_offsets:
+   ${offset},
 % endfor
 };

 static const nir_algebraic_table ${pass_name}_table = {
   .transforms = ${pass_name}_transforms,
-   .transform_counts = ${pass_name}_transform_counts,
+   .transform_offsets = ${pass_name}_transform_offsets,
   .pass_op_table = ${pass_name}_pass_op_table,
   .values = ${pass_name}_values,
   .expression_cond = ${ pass_name + "_expression_cond" if expression_cond else "NULL" },
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -884,8 +884,9 @@ nir_algebraic_instr(nir_builder *build, nir_instr *instr,

   int xform_idx = *util_dynarray_element(states, uint16_t,
                                          alu->dest.dest.ssa.index);
-   for (uint16_t i = 0; i < table->transform_counts[xform_idx]; i++) {
-      const struct transform *xform = &table->transforms[xform_idx][i];
+   for (const struct transform *xform = &table->transforms[table->transform_offsets[xform_idx]];
+        xform->condition_offset != ~0;
+        xform++) {
      if (condition_flags[xform->condition_offset] &&
          !(table->values[xform->search].expression.inexact && ignore_inexact) &&
          nir_replace_instr(build, alu, range_ht, states, table,
--- a/src/compiler/nir/nir_search.h
+++ b/src/compiler/nir/nir_search.h
@@ -197,8 +197,10 @@ typedef bool (*nir_search_variable_cond)(struct hash_table *range_ht,

 /* Generated data table for an algebraic optimization pass. */
 typedef struct {
-   const struct transform **transforms;
-   const uint16_t *transform_counts;
+   /** Array of all transforms in the pass. */
+   const struct transform *transforms;
+   /** Mapping from automaton state index to location in *transforms. */
+   const uint16_t *transform_offsets;
   const struct per_op_table *pass_op_table;
   const nir_search_value_union *values;