From de33205f88eebc43676c9a7f603b70e8bc212e95 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Wed, 1 Dec 2021 15:45:54 -0800 Subject: [PATCH] nir/algebraic: Move all the individual transforms to a common table. Cuts 28% of the remaining relocations in libvulkan_intel.so, shrinks binary size by 290kb. Reviewed-by: Adam Jackson Part-of: --- src/compiler/nir/nir_algebraic.py | 56 ++++++++++++++----------------- src/compiler/nir/nir_search.c | 5 +-- src/compiler/nir/nir_search.h | 6 ++-- 3 files changed, 33 insertions(+), 34 deletions(-) diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py index 0f1f3e0612a..bfe00d6ed85 100644 --- a/src/compiler/nir/nir_algebraic.py +++ b/src/compiler/nir/nir_algebraic.py @@ -935,8 +935,10 @@ class TreeAutomaton(object): # Bijection from state to index. q in the original algorithm is # len(self.states) self.states = self.IndexMap() - # List of pattern matches for each state index. - self.state_patterns = [] + # Lists of pattern matches separated by None + self.state_patterns = [None] + # Offset in the ->transforms table for each state index + self.state_pattern_offsets = [] # Map from state index to filtered state index for each opcode. self.filter = defaultdict(list) # Bijections from filtered state to filtered state index for each @@ -966,15 +968,21 @@ class TreeAutomaton(object): def process_new_states(): while self.worklist_index < len(self.states): state = self.states[self.worklist_index] - # Calculate pattern matches for this state. Each pattern is # assigned to a unique item, so we don't have to worry about # deduplicating them here. However, we do have to sort them so # that they're visited at runtime in the order they're specified # in the source. patterns = list(sorted(p for item in state for p in item.patterns)) - assert len(self.state_patterns) == self.worklist_index - self.state_patterns.append(patterns) + + if patterns: + # Add our patterns to the global table. + self.state_pattern_offsets.append(len(self.state_patterns)) + self.state_patterns.extend(patterns) + self.state_patterns.append(None) + else: + # Point to the initial sentinel in the global table. + self.state_pattern_offsets.append(0) # calculate filter table for this state, and update filtered # worklists. @@ -1072,15 +1080,16 @@ static const nir_search_variable_cond ${pass_name}_variable_cond[] = { }; % endif -% for state_id, state_xforms in enumerate(automaton.state_patterns): -% if state_xforms: # avoid emitting a 0-length array for MSVC -static const struct transform ${pass_name}_state${state_id}_xforms[] = { -% for i in state_xforms: - { ${xforms[i].search.array_index}, ${xforms[i].replace.array_index}, ${xforms[i].condition_index} }, -% endfor -}; +static const struct transform ${pass_name}_transforms[] = { +% for i in automaton.state_patterns: +% if i is not None: + { ${xforms[i].search.array_index}, ${xforms[i].replace.array_index}, ${xforms[i].condition_index} }, +% else: + { ~0, ~0, ~0 }, /* Sentinel */ + % endif % endfor +}; static const struct per_op_table ${pass_name}_pass_op_table[nir_num_search_ops] = { % for op in automaton.opcodes: @@ -1110,29 +1119,16 @@ static const struct per_op_table ${pass_name}_pass_op_table[nir_num_search_ops] % endfor }; -const struct transform *${pass_name}_transforms[] = { -% for i in range(len(automaton.state_patterns)): - % if automaton.state_patterns[i]: - ${pass_name}_state${i}_xforms, - % else: - NULL, - % endif -% endfor -}; - -const uint16_t ${pass_name}_transform_counts[] = { -% for i in range(len(automaton.state_patterns)): - % if automaton.state_patterns[i]: - (uint16_t)ARRAY_SIZE(${pass_name}_state${i}_xforms), - % else: - 0, - % endif +/* Mapping from state index to offset in transforms (0 being no transforms) */ +static const uint16_t ${pass_name}_transform_offsets[] = { +% for offset in automaton.state_pattern_offsets: + ${offset}, % endfor }; static const nir_algebraic_table ${pass_name}_table = { .transforms = ${pass_name}_transforms, - .transform_counts = ${pass_name}_transform_counts, + .transform_offsets = ${pass_name}_transform_offsets, .pass_op_table = ${pass_name}_pass_op_table, .values = ${pass_name}_values, .expression_cond = ${ pass_name + "_expression_cond" if expression_cond else "NULL" }, diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c index 9e13452008d..3246d13ec8a 100644 --- a/src/compiler/nir/nir_search.c +++ b/src/compiler/nir/nir_search.c @@ -884,8 +884,9 @@ nir_algebraic_instr(nir_builder *build, nir_instr *instr, int xform_idx = *util_dynarray_element(states, uint16_t, alu->dest.dest.ssa.index); - for (uint16_t i = 0; i < table->transform_counts[xform_idx]; i++) { - const struct transform *xform = &table->transforms[xform_idx][i]; + for (const struct transform *xform = &table->transforms[table->transform_offsets[xform_idx]]; + xform->condition_offset != ~0; + xform++) { if (condition_flags[xform->condition_offset] && !(table->values[xform->search].expression.inexact && ignore_inexact) && nir_replace_instr(build, alu, range_ht, states, table, diff --git a/src/compiler/nir/nir_search.h b/src/compiler/nir/nir_search.h index 67fe7d0a516..2a7e9a2f0fe 100644 --- a/src/compiler/nir/nir_search.h +++ b/src/compiler/nir/nir_search.h @@ -197,8 +197,10 @@ typedef bool (*nir_search_variable_cond)(struct hash_table *range_ht, /* Generated data table for an algebraic optimization pass. */ typedef struct { - const struct transform **transforms; - const uint16_t *transform_counts; + /** Array of all transforms in the pass. */ + const struct transform *transforms; + /** Mapping from automaton state index to location in *transforms. */ + const uint16_t *transform_offsets; const struct per_op_table *pass_op_table; const nir_search_value_union *values;