nir/load_store_vectorize: improve vectorization with identical operations

We might have several identical options to vectorize an entry with, but
only one might be vectorizable because of writes interfering.

An example of this is a pattern found in some CTS tests:
a = load(0)
b = load(4)
store(0, a)
store(4, b)
a = load(0)
b = load(4)
store(0, a)
store(4, b)
...

It might have attempted to vectorize the first load(0) with the second
load(4) without attempting the second load(4) when the first fails. This
changes vectorize_entries() to continue even if the first try_vectorize()
failed.

fossil-db (Navi):
Totals from 117 (0.09% of 137413) affected shaders:
SGPRs: 7040 -> 7088 (+0.68%)
CodeSize: 276504 -> 276308 (-0.07%); split: -0.08%, +0.01%
Instrs: 51152 -> 51111 (-0.08%); split: -0.09%, +0.01%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5415>
This commit is contained in:
Rhys Perry
2020-06-10 11:47:55 +01:00
committed by Marge Bot
parent d9c4ec9154
commit ee073cb543

View File

@@ -1144,23 +1144,33 @@ vectorize_entries(struct vectorize_ctx *ctx, nir_function_impl *impl, struct has
util_dynarray_num_elements(arr, struct entry *),
sizeof(struct entry *), &sort_entries);
unsigned i = 0;
for (; i < util_dynarray_num_elements(arr, struct entry*) - 1; i++) {
struct entry *low = *util_dynarray_element(arr, struct entry *, i);
struct entry *high = *util_dynarray_element(arr, struct entry *, i + 1);
unsigned num_entries = util_dynarray_num_elements(arr, struct entry *);
uint64_t diff = high->offset_signed - low->offset_signed;
if (diff > get_bit_size(low) / 8u * low->intrin->num_components)
for (unsigned first_idx = 0; first_idx < num_entries; first_idx++) {
struct entry *low = *util_dynarray_element(arr, struct entry *, first_idx);
if (!low)
continue;
struct entry *first = low->index < high->index ? low : high;
struct entry *second = low->index < high->index ? high : low;
for (unsigned second_idx = first_idx + 1; second_idx < num_entries; second_idx++) {
struct entry *high = *util_dynarray_element(arr, struct entry *, second_idx);
if (!high)
continue;
if (try_vectorize(impl, ctx, low, high, first, second)) {
*util_dynarray_element(arr, struct entry *, i) = NULL;
*util_dynarray_element(arr, struct entry *, i + 1) = low->is_store ? second : first;
progress = true;
uint64_t diff = high->offset_signed - low->offset_signed;
if (diff > get_bit_size(low) / 8u * low->intrin->num_components)
break;
struct entry *first = low->index < high->index ? low : high;
struct entry *second = low->index < high->index ? high : low;
if (try_vectorize(impl, ctx, low, high, first, second)) {
low = low->is_store ? second : first;
*util_dynarray_element(arr, struct entry *, second_idx) = NULL;
progress = true;
}
}
*util_dynarray_element(arr, struct entry *, first_idx) = low;
}
util_dynarray_foreach(arr, struct entry *, elem) {