agx: Add CSE optimization pass
Ported from the Bifrost compiler, in turn based on the ir3 one. This cleans up a lot of junk we emit during NIR->AGX and will help with some SSA RA troubles. total instructions in shared programs: 34803 -> 34381 (-1.21%) instructions in affected programs: 18652 -> 18230 (-2.26%) helped: 198 HURT: 0 helped stats (abs) min: 1.0 max: 28.0 x̄: 2.13 x̃: 1 helped stats (rel) min: 0.31% max: 12.50% x̄: 3.94% x̃: 2.78% 95% mean confidence interval for instructions value: -2.45 -1.81 95% mean confidence interval for instructions %-change: -4.40% -3.48% Instructions are helped. total bytes in shared programs: 238094 -> 234824 (-1.37%) bytes in affected programs: 126472 -> 123202 (-2.59%) helped: 200 HURT: 0 helped stats (abs) min: 6.0 max: 168.0 x̄: 16.35 x̃: 8 helped stats (rel) min: 0.37% max: 17.65% x̄: 4.25% x̃: 3.38% 95% mean confidence interval for bytes value: -18.49 -14.21 95% mean confidence interval for bytes %-change: -4.67% -3.84% Bytes are helped. total halfregs in shared programs: 10078 -> 10107 (0.29%) halfregs in affected programs: 565 -> 594 (5.13%) helped: 22 HURT: 22 helped stats (abs) min: 1.0 max: 4.0 x̄: 1.23 x̃: 1 helped stats (rel) min: 5.71% max: 25.00% x̄: 23.38% x̃: 25.00% HURT stats (abs) min: 2.0 max: 4.0 x̄: 2.55 x̃: 2 HURT stats (rel) min: 4.44% max: 30.77% x̄: 15.61% x̃: 12.73% 95% mean confidence interval for halfregs value: 0.03 1.28 95% mean confidence interval for halfregs %-change: -10.17% 2.40% Inconclusive result (%-change mean confidence interval includes 0). Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19590>
This commit is contained in:

committed by
Marge Bot

parent
4387d0886d
commit
dea00bcc8f
@@ -1816,6 +1816,7 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
|
||||
|
||||
if (likely(!(agx_debug & AGX_DBG_NOOPT))) {
|
||||
agx_optimizer(ctx);
|
||||
agx_opt_cse(ctx);
|
||||
agx_dce(ctx);
|
||||
agx_validate(ctx, "Optimization");
|
||||
|
||||
|
@@ -82,6 +82,7 @@ agx_size_align_16(enum agx_size size)
|
||||
unreachable("Invalid size");
|
||||
}
|
||||
|
||||
/* Keep synced with hash_index */
|
||||
typedef struct {
|
||||
/* Sufficient for as many SSA values as we need. Immediates and uniforms fit in 16-bits */
|
||||
unsigned value : 22;
|
||||
@@ -269,6 +270,7 @@ enum agx_lod_mode {
|
||||
/* Forward declare for branch target */
|
||||
struct agx_block;
|
||||
|
||||
/* Keep synced with hash_instr */
|
||||
typedef struct {
|
||||
/* Must be first */
|
||||
struct list_head link;
|
||||
@@ -768,6 +770,7 @@ void agx_print_block(agx_block *block, FILE *fp);
|
||||
void agx_print_shader(agx_context *ctx, FILE *fp);
|
||||
void agx_optimizer(agx_context *ctx);
|
||||
void agx_lower_pseudo(agx_context *ctx);
|
||||
void agx_opt_cse(agx_context *ctx);
|
||||
void agx_dce(agx_context *ctx);
|
||||
void agx_ra(agx_context *ctx);
|
||||
void agx_lower_64bit_postra(agx_context *ctx);
|
||||
|
156
src/asahi/compiler/agx_opt_cse.c
Normal file
156
src/asahi/compiler/agx_opt_cse.c
Normal file
@@ -0,0 +1,156 @@
|
||||
/*
|
||||
* Copyright 2022 Alyssa Rosenzweig
|
||||
* Copyright 2021 Collabora, Ltd.
|
||||
* Copyright 2014 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "agx_builder.h"
|
||||
|
||||
#define XXH_INLINE_ALL
|
||||
#include "xxhash.h"
|
||||
|
||||
/*
|
||||
* This pass handles CSE'ing repeated expressions created in the process of
|
||||
* translating from NIR. Also, currently this is intra-block only, to make it
|
||||
* work over multiple block we'd need to bring forward dominance calculation.
|
||||
*/
|
||||
|
||||
static inline uint32_t
|
||||
HASH(uint32_t hash, unsigned data)
|
||||
{
|
||||
return XXH32(&data, sizeof(data), hash);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
hash_index(uint32_t hash, agx_index index)
|
||||
{
|
||||
assert(!index.kill && "CSE is run early");
|
||||
assert(!index.cache && "CSE is run early");
|
||||
assert(!index.discard && "CSE is run early");
|
||||
|
||||
hash = HASH(hash, index.value);
|
||||
hash = HASH(hash, index.abs);
|
||||
hash = HASH(hash, index.neg);
|
||||
hash = HASH(hash, index.size);
|
||||
hash = HASH(hash, index.type);
|
||||
return hash;
|
||||
}
|
||||
|
||||
/* Hash an ALU instruction. */
|
||||
static uint32_t
|
||||
hash_instr(const void *data)
|
||||
{
|
||||
const agx_instr *I = data;
|
||||
uint32_t hash = 0;
|
||||
|
||||
hash = HASH(hash, I->op);
|
||||
hash = HASH(hash, I->nr_dests);
|
||||
hash = HASH(hash, I->nr_srcs);
|
||||
|
||||
/* Explcitly skip destinations, except for size and type */
|
||||
agx_foreach_dest(I, d) {
|
||||
hash = HASH(hash, I->dest[d].type);
|
||||
hash = HASH(hash, I->dest[d].size);
|
||||
}
|
||||
|
||||
agx_foreach_src(I, s) {
|
||||
hash = hash_index(hash, I->src[s]);
|
||||
}
|
||||
|
||||
/* Explicitly skip last, scoreboard, nest */
|
||||
|
||||
hash = HASH(hash, I->imm);
|
||||
hash = HASH(hash, I->perspective);
|
||||
hash = HASH(hash, I->invert_cond);
|
||||
hash = HASH(hash, I->dim);
|
||||
hash = HASH(hash, I->offset);
|
||||
hash = HASH(hash, I->shadow);
|
||||
hash = HASH(hash, I->shift);
|
||||
hash = HASH(hash, I->saturate);
|
||||
hash = HASH(hash, I->mask);
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
static bool
|
||||
instrs_equal(const void *_i1, const void *_i2)
|
||||
{
|
||||
const agx_instr *i1 = _i1, *i2 = _i2;
|
||||
|
||||
if (i1->op != i2->op) return false;
|
||||
if (i1->nr_srcs != i2->nr_srcs) return false;
|
||||
if (i1->nr_dests != i2->nr_dests) return false;
|
||||
|
||||
/* Explicitly skip everything but size and type */
|
||||
agx_foreach_dest(i1, d) {
|
||||
if (i1->dest[d].type != i2->dest[d].type) return false;
|
||||
if (i1->dest[d].size != i2->dest[d].size) return false;
|
||||
}
|
||||
|
||||
agx_foreach_src(i1, s) {
|
||||
agx_index s1 = i1->src[s], s2 = i2->src[s];
|
||||
|
||||
if (memcmp(&s1, &s2, sizeof(s1)) != 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (i1->imm != i2->imm) return false;
|
||||
if (i1->perspective != i2->perspective) return false;
|
||||
if (i1->invert_cond != i2->invert_cond) return false;
|
||||
if (i1->dim != i2->dim) return false;
|
||||
if (i1->offset != i2->offset) return false;
|
||||
if (i1->shadow != i2->shadow) return false;
|
||||
if (i1->shift != i2->shift) return false;
|
||||
if (i1->saturate != i2->saturate) return false;
|
||||
if (i1->mask != i2->mask) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Determines what instructions the above routines have to handle */
|
||||
static bool
|
||||
instr_can_cse(const agx_instr *I)
|
||||
{
|
||||
return agx_opcodes_info[I->op].can_eliminate &&
|
||||
agx_opcodes_info[I->op].can_reorder;
|
||||
}
|
||||
|
||||
void
|
||||
agx_opt_cse(agx_context *ctx)
|
||||
{
|
||||
struct set *instr_set = _mesa_set_create(NULL, hash_instr, instrs_equal);
|
||||
|
||||
agx_foreach_block(ctx, block) {
|
||||
agx_index *replacement = calloc(sizeof(agx_index), ctx->alloc);
|
||||
_mesa_set_clear(instr_set, NULL);
|
||||
|
||||
agx_foreach_instr_in_block(block, instr) {
|
||||
/* Rewrite as we go so we converge locally in 1 iteration */
|
||||
agx_foreach_ssa_src(instr, s) {
|
||||
agx_index repl = replacement[instr->src[s].value];
|
||||
if (!agx_is_null(repl))
|
||||
agx_replace_src(instr, s, repl);
|
||||
}
|
||||
|
||||
if (!instr_can_cse(instr))
|
||||
continue;
|
||||
|
||||
bool found;
|
||||
struct set_entry *entry =
|
||||
_mesa_set_search_or_add(instr_set, instr, &found);
|
||||
if (found) {
|
||||
const agx_instr *match = entry->key;
|
||||
|
||||
agx_foreach_dest(instr, d) {
|
||||
replacement[instr->dest[d].value] = match->dest[d];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(replacement);
|
||||
}
|
||||
|
||||
_mesa_set_destroy(instr_set, NULL);
|
||||
}
|
@@ -32,6 +32,7 @@ libasahi_agx_files = files(
|
||||
'agx_lower_pseudo.c',
|
||||
'agx_pack.c',
|
||||
'agx_print.c',
|
||||
'agx_opt_cse.c',
|
||||
'agx_optimizer.c',
|
||||
'agx_register_allocate.c',
|
||||
'agx_uniforms.c',
|
||||
|
Reference in New Issue
Block a user