From 28801b4849371e633eb66dd2c802d44dbb9e3304 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sat, 17 Apr 2021 10:29:27 -0400 Subject: [PATCH] agx: Add forward optimizing pass for fmov Explain the ideas behind our SSA-based optimizer (inspired by ACO's, thank you to Daniel Schuermann for discussing this with me in the context of Bifrost), and implement the subset needed to propagate abs/neg through. Signed-off-by: Alyssa Rosenzweig Acked-by: Jason Ekstrand Acked-by: Bas Nieuwenhuizen Part-of: --- src/asahi/compiler/agx_compile.c | 5 ++ src/asahi/compiler/agx_compiler.h | 1 + src/asahi/compiler/agx_optimizer.c | 132 +++++++++++++++++++++++++++++ src/asahi/compiler/meson.build | 1 + 4 files changed, 139 insertions(+) create mode 100644 src/asahi/compiler/agx_optimizer.c diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 7def357336c..0298abecc20 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -710,6 +710,11 @@ agx_compile_shader_nir(nir_shader *nir, agx_foreach_block(ctx, block) block->name = block_source_count++; + if (agx_debug & AGX_DBG_SHADERS && !skip_internal) + agx_print_shader(ctx, stdout); + + agx_optimizer(ctx); + if (agx_debug & AGX_DBG_SHADERS && !skip_internal) agx_print_shader(ctx, stdout); diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h index a454b78e74b..722f8e78cb4 100644 --- a/src/asahi/compiler/agx_compiler.h +++ b/src/asahi/compiler/agx_compiler.h @@ -547,6 +547,7 @@ agx_builder_insert(agx_cursor *cursor, agx_instr *I) void agx_print_instr(agx_instr *I, FILE *fp); void agx_print_block(agx_block *block, FILE *fp); void agx_print_shader(agx_context *ctx, FILE *fp); +void agx_optimizer(agx_context *ctx); void agx_ra(agx_context *ctx); void agx_pack(agx_context *ctx, struct util_dynarray *emission); diff --git a/src/asahi/compiler/agx_optimizer.c b/src/asahi/compiler/agx_optimizer.c new file mode 100644 index 00000000000..7eb2b920e99 --- /dev/null +++ b/src/asahi/compiler/agx_optimizer.c @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2021 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "agx_compiler.h" + +/* AGX peephole optimizer responsible for instruction combining. It operates in + * a forward direction and a backward direction, in each case traversing in + * source order. SSA means the forward pass satisfies the invariant: + * + * Every def is visited before any of its uses. + * + * Dually, the backend pass satisfies the invariant: + * + * Every use of a def is visited before the def. + * + * This means the forward pass can propagate modifiers forward, whereas the + * backwards pass propagates modifiers backward. Consider an example: + * + * 1 = fabs 0 + * 2 = fround 1 + * 3 = fsat 1 + * + * The forwards pass would propagate the fabs to the fround (since we can + * lookup the fabs from the fround source and do the replacement). By contrast + * the backwards pass would propagate the fsat back to the fround (since when + * we see the fround we know it has only a single user, fsat). Propagatable + * instruction have natural directions (like pushforwards and pullbacks). + * + * We are careful to update the tracked state whenever we modify an instruction + * to ensure the passes are linear-time and converge in a single iteration. + * + * Size conversions are worth special discussion. Consider the snippet: + * + * 2 = fadd 0, 1 + * 3 = f2f16 2 + * 4 = fround 3 + * + * A priori, we can move the f2f16 in either direction. But it's not equal -- + * if we move it up to the fadd, we get FP16 for two instructions, whereas if + * we push it into the fround, we effectively get FP32 for two instructions. So + * f2f16 is backwards. Likewise, consider + * + * 2 = fadd 0, 1 + * 3 = f2f32 1 + * 4 = fround 3 + * + * This time if we move f2f32 up to the fadd, we get FP32 for two, but if we + * move it down to the fround, we get FP16 to too. So f2f32 is backwards. + */ + +static bool +agx_is_fmov(agx_instr *def) +{ + return (def->op == AGX_OPCODE_FADD) + && agx_is_equiv(def->src[1], agx_negzero()); +} + +/* Compose floating-point modifiers with floating-point sources */ + +static agx_index +agx_compose_float_src(agx_index to, agx_index from) +{ + if (to.abs) + from.neg = false; + + from.abs |= to.abs; + from.neg |= to.neg; + + return from; +} + +static void +agx_optimizer_fmov(agx_instr **defs, agx_instr *ins, unsigned srcs) +{ + for (unsigned s = 0; s < srcs; ++s) { + agx_index src = ins->src[s]; + if (src.type != AGX_INDEX_NORMAL) continue; + + agx_instr *def = defs[src.value]; + if (!agx_is_fmov(def)) continue; + if (def->saturate) continue; + + ins->src[s] = agx_compose_float_src(src, def->src[0]); + } +} + +static void +agx_optimizer_forward(agx_context *ctx) +{ + agx_instr **defs = calloc(ctx->alloc, sizeof(*defs)); + + agx_foreach_instr_global(ctx, I) { + struct agx_opcode_info info = agx_opcodes_info[I->op]; + + for (unsigned d = 0; d < info.nr_dests; ++d) { + assert(I->dest[d].type == AGX_INDEX_NORMAL); + defs[I->dest[d].value] = I; + } + + /* Propagate fmov down */ + if (info.is_float) + agx_optimizer_fmov(defs, I, info.nr_srcs); + } + + free(defs); +} + +void +agx_optimizer(agx_context *ctx) +{ + agx_optimizer_forward(ctx); +} diff --git a/src/asahi/compiler/meson.build b/src/asahi/compiler/meson.build index 0bd65486d06..760f273962f 100644 --- a/src/asahi/compiler/meson.build +++ b/src/asahi/compiler/meson.build @@ -23,6 +23,7 @@ libasahi_agx_files = files( 'agx_compile.c', 'agx_pack.c', 'agx_print.c', + 'agx_optimizer.c', 'agx_register_allocate.c', )