agx: Add CSE optimization pass

Ported from the Bifrost compiler, in turn based on the ir3 one. This
cleans up a lot of junk we emit during NIR->AGX and will help with some
SSA RA troubles.

total instructions in shared programs: 34803 -> 34381 (-1.21%)
instructions in affected programs: 18652 -> 18230 (-2.26%)
helped: 198
HURT: 0
helped stats (abs) min: 1.0 max: 28.0 x̄: 2.13 x̃: 1
helped stats (rel) min: 0.31% max: 12.50% x̄: 3.94% x̃: 2.78%
95% mean confidence interval for instructions value: -2.45 -1.81
95% mean confidence interval for instructions %-change: -4.40% -3.48%
Instructions are helped.

total bytes in shared programs: 238094 -> 234824 (-1.37%)
bytes in affected programs: 126472 -> 123202 (-2.59%)
helped: 200
HURT: 0
helped stats (abs) min: 6.0 max: 168.0 x̄: 16.35 x̃: 8
helped stats (rel) min: 0.37% max: 17.65% x̄: 4.25% x̃: 3.38%
95% mean confidence interval for bytes value: -18.49 -14.21
95% mean confidence interval for bytes %-change: -4.67% -3.84%
Bytes are helped.

total halfregs in shared programs: 10078 -> 10107 (0.29%)
halfregs in affected programs: 565 -> 594 (5.13%)
helped: 22
HURT: 22
helped stats (abs) min: 1.0 max: 4.0 x̄: 1.23 x̃: 1
helped stats (rel) min: 5.71% max: 25.00% x̄: 23.38% x̃: 25.00%
HURT stats (abs)   min: 2.0 max: 4.0 x̄: 2.55 x̃: 2
HURT stats (rel)   min: 4.44% max: 30.77% x̄: 15.61% x̃: 12.73%
95% mean confidence interval for halfregs value: 0.03 1.28
95% mean confidence interval for halfregs %-change: -10.17% 2.40%
Inconclusive result (%-change mean confidence interval includes 0).

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19590>
This commit is contained in:
Alyssa Rosenzweig
2022-11-04 23:27:28 -04:00
committed by Marge Bot
parent 4387d0886d
commit dea00bcc8f
4 changed files with 161 additions and 0 deletions

View File

@@ -1816,6 +1816,7 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
if (likely(!(agx_debug & AGX_DBG_NOOPT))) {
agx_optimizer(ctx);
agx_opt_cse(ctx);
agx_dce(ctx);
agx_validate(ctx, "Optimization");

View File

@@ -82,6 +82,7 @@ agx_size_align_16(enum agx_size size)
unreachable("Invalid size");
}
/* Keep synced with hash_index */
typedef struct {
/* Sufficient for as many SSA values as we need. Immediates and uniforms fit in 16-bits */
unsigned value : 22;
@@ -269,6 +270,7 @@ enum agx_lod_mode {
/* Forward declare for branch target */
struct agx_block;
/* Keep synced with hash_instr */
typedef struct {
/* Must be first */
struct list_head link;
@@ -768,6 +770,7 @@ void agx_print_block(agx_block *block, FILE *fp);
void agx_print_shader(agx_context *ctx, FILE *fp);
void agx_optimizer(agx_context *ctx);
void agx_lower_pseudo(agx_context *ctx);
void agx_opt_cse(agx_context *ctx);
void agx_dce(agx_context *ctx);
void agx_ra(agx_context *ctx);
void agx_lower_64bit_postra(agx_context *ctx);

View File

@@ -0,0 +1,156 @@
/*
* Copyright 2022 Alyssa Rosenzweig
* Copyright 2021 Collabora, Ltd.
* Copyright 2014 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#include "compiler.h"
#include "agx_builder.h"
#define XXH_INLINE_ALL
#include "xxhash.h"
/*
* This pass handles CSE'ing repeated expressions created in the process of
* translating from NIR. Also, currently this is intra-block only, to make it
* work over multiple block we'd need to bring forward dominance calculation.
*/
static inline uint32_t
HASH(uint32_t hash, unsigned data)
{
return XXH32(&data, sizeof(data), hash);
}
static uint32_t
hash_index(uint32_t hash, agx_index index)
{
assert(!index.kill && "CSE is run early");
assert(!index.cache && "CSE is run early");
assert(!index.discard && "CSE is run early");
hash = HASH(hash, index.value);
hash = HASH(hash, index.abs);
hash = HASH(hash, index.neg);
hash = HASH(hash, index.size);
hash = HASH(hash, index.type);
return hash;
}
/* Hash an ALU instruction. */
static uint32_t
hash_instr(const void *data)
{
const agx_instr *I = data;
uint32_t hash = 0;
hash = HASH(hash, I->op);
hash = HASH(hash, I->nr_dests);
hash = HASH(hash, I->nr_srcs);
/* Explcitly skip destinations, except for size and type */
agx_foreach_dest(I, d) {
hash = HASH(hash, I->dest[d].type);
hash = HASH(hash, I->dest[d].size);
}
agx_foreach_src(I, s) {
hash = hash_index(hash, I->src[s]);
}
/* Explicitly skip last, scoreboard, nest */
hash = HASH(hash, I->imm);
hash = HASH(hash, I->perspective);
hash = HASH(hash, I->invert_cond);
hash = HASH(hash, I->dim);
hash = HASH(hash, I->offset);
hash = HASH(hash, I->shadow);
hash = HASH(hash, I->shift);
hash = HASH(hash, I->saturate);
hash = HASH(hash, I->mask);
return hash;
}
static bool
instrs_equal(const void *_i1, const void *_i2)
{
const agx_instr *i1 = _i1, *i2 = _i2;
if (i1->op != i2->op) return false;
if (i1->nr_srcs != i2->nr_srcs) return false;
if (i1->nr_dests != i2->nr_dests) return false;
/* Explicitly skip everything but size and type */
agx_foreach_dest(i1, d) {
if (i1->dest[d].type != i2->dest[d].type) return false;
if (i1->dest[d].size != i2->dest[d].size) return false;
}
agx_foreach_src(i1, s) {
agx_index s1 = i1->src[s], s2 = i2->src[s];
if (memcmp(&s1, &s2, sizeof(s1)) != 0)
return false;
}
if (i1->imm != i2->imm) return false;
if (i1->perspective != i2->perspective) return false;
if (i1->invert_cond != i2->invert_cond) return false;
if (i1->dim != i2->dim) return false;
if (i1->offset != i2->offset) return false;
if (i1->shadow != i2->shadow) return false;
if (i1->shift != i2->shift) return false;
if (i1->saturate != i2->saturate) return false;
if (i1->mask != i2->mask) return false;
return true;
}
/* Determines what instructions the above routines have to handle */
static bool
instr_can_cse(const agx_instr *I)
{
return agx_opcodes_info[I->op].can_eliminate &&
agx_opcodes_info[I->op].can_reorder;
}
void
agx_opt_cse(agx_context *ctx)
{
struct set *instr_set = _mesa_set_create(NULL, hash_instr, instrs_equal);
agx_foreach_block(ctx, block) {
agx_index *replacement = calloc(sizeof(agx_index), ctx->alloc);
_mesa_set_clear(instr_set, NULL);
agx_foreach_instr_in_block(block, instr) {
/* Rewrite as we go so we converge locally in 1 iteration */
agx_foreach_ssa_src(instr, s) {
agx_index repl = replacement[instr->src[s].value];
if (!agx_is_null(repl))
agx_replace_src(instr, s, repl);
}
if (!instr_can_cse(instr))
continue;
bool found;
struct set_entry *entry =
_mesa_set_search_or_add(instr_set, instr, &found);
if (found) {
const agx_instr *match = entry->key;
agx_foreach_dest(instr, d) {
replacement[instr->dest[d].value] = match->dest[d];
}
}
}
free(replacement);
}
_mesa_set_destroy(instr_set, NULL);
}

View File

@@ -32,6 +32,7 @@ libasahi_agx_files = files(
'agx_lower_pseudo.c',
'agx_pack.c',
'agx_print.c',
'agx_opt_cse.c',
'agx_optimizer.c',
'agx_register_allocate.c',
'agx_uniforms.c',