From dea00bcc8fa16c103f3c8bc9598c480073dabaf6 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 4 Nov 2022 23:27:28 -0400 Subject: [PATCH] agx: Add CSE optimization pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ported from the Bifrost compiler, in turn based on the ir3 one. This cleans up a lot of junk we emit during NIR->AGX and will help with some SSA RA troubles. total instructions in shared programs: 34803 -> 34381 (-1.21%) instructions in affected programs: 18652 -> 18230 (-2.26%) helped: 198 HURT: 0 helped stats (abs) min: 1.0 max: 28.0 x̄: 2.13 x̃: 1 helped stats (rel) min: 0.31% max: 12.50% x̄: 3.94% x̃: 2.78% 95% mean confidence interval for instructions value: -2.45 -1.81 95% mean confidence interval for instructions %-change: -4.40% -3.48% Instructions are helped. total bytes in shared programs: 238094 -> 234824 (-1.37%) bytes in affected programs: 126472 -> 123202 (-2.59%) helped: 200 HURT: 0 helped stats (abs) min: 6.0 max: 168.0 x̄: 16.35 x̃: 8 helped stats (rel) min: 0.37% max: 17.65% x̄: 4.25% x̃: 3.38% 95% mean confidence interval for bytes value: -18.49 -14.21 95% mean confidence interval for bytes %-change: -4.67% -3.84% Bytes are helped. total halfregs in shared programs: 10078 -> 10107 (0.29%) halfregs in affected programs: 565 -> 594 (5.13%) helped: 22 HURT: 22 helped stats (abs) min: 1.0 max: 4.0 x̄: 1.23 x̃: 1 helped stats (rel) min: 5.71% max: 25.00% x̄: 23.38% x̃: 25.00% HURT stats (abs) min: 2.0 max: 4.0 x̄: 2.55 x̃: 2 HURT stats (rel) min: 4.44% max: 30.77% x̄: 15.61% x̃: 12.73% 95% mean confidence interval for halfregs value: 0.03 1.28 95% mean confidence interval for halfregs %-change: -10.17% 2.40% Inconclusive result (%-change mean confidence interval includes 0). Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_compile.c | 1 + src/asahi/compiler/agx_compiler.h | 3 + src/asahi/compiler/agx_opt_cse.c | 156 ++++++++++++++++++++++++++++++ src/asahi/compiler/meson.build | 1 + 4 files changed, 161 insertions(+) create mode 100644 src/asahi/compiler/agx_opt_cse.c diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index e4df016a258..31e59ae355e 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -1816,6 +1816,7 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl, if (likely(!(agx_debug & AGX_DBG_NOOPT))) { agx_optimizer(ctx); + agx_opt_cse(ctx); agx_dce(ctx); agx_validate(ctx, "Optimization"); diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h index b0135ac0cbd..517673c182c 100644 --- a/src/asahi/compiler/agx_compiler.h +++ b/src/asahi/compiler/agx_compiler.h @@ -82,6 +82,7 @@ agx_size_align_16(enum agx_size size) unreachable("Invalid size"); } +/* Keep synced with hash_index */ typedef struct { /* Sufficient for as many SSA values as we need. Immediates and uniforms fit in 16-bits */ unsigned value : 22; @@ -269,6 +270,7 @@ enum agx_lod_mode { /* Forward declare for branch target */ struct agx_block; +/* Keep synced with hash_instr */ typedef struct { /* Must be first */ struct list_head link; @@ -768,6 +770,7 @@ void agx_print_block(agx_block *block, FILE *fp); void agx_print_shader(agx_context *ctx, FILE *fp); void agx_optimizer(agx_context *ctx); void agx_lower_pseudo(agx_context *ctx); +void agx_opt_cse(agx_context *ctx); void agx_dce(agx_context *ctx); void agx_ra(agx_context *ctx); void agx_lower_64bit_postra(agx_context *ctx); diff --git a/src/asahi/compiler/agx_opt_cse.c b/src/asahi/compiler/agx_opt_cse.c new file mode 100644 index 00000000000..07911258a7a --- /dev/null +++ b/src/asahi/compiler/agx_opt_cse.c @@ -0,0 +1,156 @@ +/* + * Copyright 2022 Alyssa Rosenzweig + * Copyright 2021 Collabora, Ltd. + * Copyright 2014 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#include "compiler.h" +#include "agx_builder.h" + +#define XXH_INLINE_ALL +#include "xxhash.h" + +/* + * This pass handles CSE'ing repeated expressions created in the process of + * translating from NIR. Also, currently this is intra-block only, to make it + * work over multiple block we'd need to bring forward dominance calculation. + */ + +static inline uint32_t +HASH(uint32_t hash, unsigned data) +{ + return XXH32(&data, sizeof(data), hash); +} + +static uint32_t +hash_index(uint32_t hash, agx_index index) +{ + assert(!index.kill && "CSE is run early"); + assert(!index.cache && "CSE is run early"); + assert(!index.discard && "CSE is run early"); + + hash = HASH(hash, index.value); + hash = HASH(hash, index.abs); + hash = HASH(hash, index.neg); + hash = HASH(hash, index.size); + hash = HASH(hash, index.type); + return hash; +} + +/* Hash an ALU instruction. */ +static uint32_t +hash_instr(const void *data) +{ + const agx_instr *I = data; + uint32_t hash = 0; + + hash = HASH(hash, I->op); + hash = HASH(hash, I->nr_dests); + hash = HASH(hash, I->nr_srcs); + + /* Explcitly skip destinations, except for size and type */ + agx_foreach_dest(I, d) { + hash = HASH(hash, I->dest[d].type); + hash = HASH(hash, I->dest[d].size); + } + + agx_foreach_src(I, s) { + hash = hash_index(hash, I->src[s]); + } + + /* Explicitly skip last, scoreboard, nest */ + + hash = HASH(hash, I->imm); + hash = HASH(hash, I->perspective); + hash = HASH(hash, I->invert_cond); + hash = HASH(hash, I->dim); + hash = HASH(hash, I->offset); + hash = HASH(hash, I->shadow); + hash = HASH(hash, I->shift); + hash = HASH(hash, I->saturate); + hash = HASH(hash, I->mask); + + return hash; +} + +static bool +instrs_equal(const void *_i1, const void *_i2) +{ + const agx_instr *i1 = _i1, *i2 = _i2; + + if (i1->op != i2->op) return false; + if (i1->nr_srcs != i2->nr_srcs) return false; + if (i1->nr_dests != i2->nr_dests) return false; + + /* Explicitly skip everything but size and type */ + agx_foreach_dest(i1, d) { + if (i1->dest[d].type != i2->dest[d].type) return false; + if (i1->dest[d].size != i2->dest[d].size) return false; + } + + agx_foreach_src(i1, s) { + agx_index s1 = i1->src[s], s2 = i2->src[s]; + + if (memcmp(&s1, &s2, sizeof(s1)) != 0) + return false; + } + + if (i1->imm != i2->imm) return false; + if (i1->perspective != i2->perspective) return false; + if (i1->invert_cond != i2->invert_cond) return false; + if (i1->dim != i2->dim) return false; + if (i1->offset != i2->offset) return false; + if (i1->shadow != i2->shadow) return false; + if (i1->shift != i2->shift) return false; + if (i1->saturate != i2->saturate) return false; + if (i1->mask != i2->mask) return false; + + return true; +} + +/* Determines what instructions the above routines have to handle */ +static bool +instr_can_cse(const agx_instr *I) +{ + return agx_opcodes_info[I->op].can_eliminate && + agx_opcodes_info[I->op].can_reorder; +} + +void +agx_opt_cse(agx_context *ctx) +{ + struct set *instr_set = _mesa_set_create(NULL, hash_instr, instrs_equal); + + agx_foreach_block(ctx, block) { + agx_index *replacement = calloc(sizeof(agx_index), ctx->alloc); + _mesa_set_clear(instr_set, NULL); + + agx_foreach_instr_in_block(block, instr) { + /* Rewrite as we go so we converge locally in 1 iteration */ + agx_foreach_ssa_src(instr, s) { + agx_index repl = replacement[instr->src[s].value]; + if (!agx_is_null(repl)) + agx_replace_src(instr, s, repl); + } + + if (!instr_can_cse(instr)) + continue; + + bool found; + struct set_entry *entry = + _mesa_set_search_or_add(instr_set, instr, &found); + if (found) { + const agx_instr *match = entry->key; + + agx_foreach_dest(instr, d) { + replacement[instr->dest[d].value] = match->dest[d]; + } + } + } + + free(replacement); + } + + _mesa_set_destroy(instr_set, NULL); +} diff --git a/src/asahi/compiler/meson.build b/src/asahi/compiler/meson.build index c5c7598e796..4ab662147d9 100644 --- a/src/asahi/compiler/meson.build +++ b/src/asahi/compiler/meson.build @@ -32,6 +32,7 @@ libasahi_agx_files = files( 'agx_lower_pseudo.c', 'agx_pack.c', 'agx_print.c', + 'agx_opt_cse.c', 'agx_optimizer.c', 'agx_register_allocate.c', 'agx_uniforms.c',