agx: Add forward optimizing pass for fmov

Explain the ideas behind our SSA-based optimizer (inspired by ACO's,
thank you to Daniel Schuermann for discussing this with me in the
context of Bifrost), and implement the subset needed to propagate
abs/neg through.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10582>
This commit is contained in:
Alyssa Rosenzweig
2021-04-17 10:29:27 -04:00
committed by Alyssa Rosenzweig
parent e50bae00f4
commit 28801b4849
4 changed files with 139 additions and 0 deletions

View File

@@ -710,6 +710,11 @@ agx_compile_shader_nir(nir_shader *nir,
agx_foreach_block(ctx, block)
block->name = block_source_count++;
if (agx_debug & AGX_DBG_SHADERS && !skip_internal)
agx_print_shader(ctx, stdout);
agx_optimizer(ctx);
if (agx_debug & AGX_DBG_SHADERS && !skip_internal)
agx_print_shader(ctx, stdout);

View File

@@ -547,6 +547,7 @@ agx_builder_insert(agx_cursor *cursor, agx_instr *I)
void agx_print_instr(agx_instr *I, FILE *fp);
void agx_print_block(agx_block *block, FILE *fp);
void agx_print_shader(agx_context *ctx, FILE *fp);
void agx_optimizer(agx_context *ctx);
void agx_ra(agx_context *ctx);
void agx_pack(agx_context *ctx, struct util_dynarray *emission);

View File

@@ -0,0 +1,132 @@
/*
* Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "agx_compiler.h"
/* AGX peephole optimizer responsible for instruction combining. It operates in
* a forward direction and a backward direction, in each case traversing in
* source order. SSA means the forward pass satisfies the invariant:
*
* Every def is visited before any of its uses.
*
* Dually, the backend pass satisfies the invariant:
*
* Every use of a def is visited before the def.
*
* This means the forward pass can propagate modifiers forward, whereas the
* backwards pass propagates modifiers backward. Consider an example:
*
* 1 = fabs 0
* 2 = fround 1
* 3 = fsat 1
*
* The forwards pass would propagate the fabs to the fround (since we can
* lookup the fabs from the fround source and do the replacement). By contrast
* the backwards pass would propagate the fsat back to the fround (since when
* we see the fround we know it has only a single user, fsat). Propagatable
* instruction have natural directions (like pushforwards and pullbacks).
*
* We are careful to update the tracked state whenever we modify an instruction
* to ensure the passes are linear-time and converge in a single iteration.
*
* Size conversions are worth special discussion. Consider the snippet:
*
* 2 = fadd 0, 1
* 3 = f2f16 2
* 4 = fround 3
*
* A priori, we can move the f2f16 in either direction. But it's not equal --
* if we move it up to the fadd, we get FP16 for two instructions, whereas if
* we push it into the fround, we effectively get FP32 for two instructions. So
* f2f16 is backwards. Likewise, consider
*
* 2 = fadd 0, 1
* 3 = f2f32 1
* 4 = fround 3
*
* This time if we move f2f32 up to the fadd, we get FP32 for two, but if we
* move it down to the fround, we get FP16 to too. So f2f32 is backwards.
*/
static bool
agx_is_fmov(agx_instr *def)
{
return (def->op == AGX_OPCODE_FADD)
&& agx_is_equiv(def->src[1], agx_negzero());
}
/* Compose floating-point modifiers with floating-point sources */
static agx_index
agx_compose_float_src(agx_index to, agx_index from)
{
if (to.abs)
from.neg = false;
from.abs |= to.abs;
from.neg |= to.neg;
return from;
}
static void
agx_optimizer_fmov(agx_instr **defs, agx_instr *ins, unsigned srcs)
{
for (unsigned s = 0; s < srcs; ++s) {
agx_index src = ins->src[s];
if (src.type != AGX_INDEX_NORMAL) continue;
agx_instr *def = defs[src.value];
if (!agx_is_fmov(def)) continue;
if (def->saturate) continue;
ins->src[s] = agx_compose_float_src(src, def->src[0]);
}
}
static void
agx_optimizer_forward(agx_context *ctx)
{
agx_instr **defs = calloc(ctx->alloc, sizeof(*defs));
agx_foreach_instr_global(ctx, I) {
struct agx_opcode_info info = agx_opcodes_info[I->op];
for (unsigned d = 0; d < info.nr_dests; ++d) {
assert(I->dest[d].type == AGX_INDEX_NORMAL);
defs[I->dest[d].value] = I;
}
/* Propagate fmov down */
if (info.is_float)
agx_optimizer_fmov(defs, I, info.nr_srcs);
}
free(defs);
}
void
agx_optimizer(agx_context *ctx)
{
agx_optimizer_forward(ctx);
}

View File

@@ -23,6 +23,7 @@ libasahi_agx_files = files(
'agx_compile.c',
'agx_pack.c',
'agx_print.c',
'agx_optimizer.c',
'agx_register_allocate.c',
)