agx: Add forward optimizing pass for fmov
Explain the ideas behind our SSA-based optimizer (inspired by ACO's, thank you to Daniel Schuermann for discussing this with me in the context of Bifrost), and implement the subset needed to propagate abs/neg through. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Acked-by: Jason Ekstrand <jason@jlekstrand.net> Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10582>
This commit is contained in:

committed by
Alyssa Rosenzweig

parent
e50bae00f4
commit
28801b4849
@@ -710,6 +710,11 @@ agx_compile_shader_nir(nir_shader *nir,
|
||||
agx_foreach_block(ctx, block)
|
||||
block->name = block_source_count++;
|
||||
|
||||
if (agx_debug & AGX_DBG_SHADERS && !skip_internal)
|
||||
agx_print_shader(ctx, stdout);
|
||||
|
||||
agx_optimizer(ctx);
|
||||
|
||||
if (agx_debug & AGX_DBG_SHADERS && !skip_internal)
|
||||
agx_print_shader(ctx, stdout);
|
||||
|
||||
|
@@ -547,6 +547,7 @@ agx_builder_insert(agx_cursor *cursor, agx_instr *I)
|
||||
void agx_print_instr(agx_instr *I, FILE *fp);
|
||||
void agx_print_block(agx_block *block, FILE *fp);
|
||||
void agx_print_shader(agx_context *ctx, FILE *fp);
|
||||
void agx_optimizer(agx_context *ctx);
|
||||
void agx_ra(agx_context *ctx);
|
||||
void agx_pack(agx_context *ctx, struct util_dynarray *emission);
|
||||
|
||||
|
132
src/asahi/compiler/agx_optimizer.c
Normal file
132
src/asahi/compiler/agx_optimizer.c
Normal file
@@ -0,0 +1,132 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "agx_compiler.h"
|
||||
|
||||
/* AGX peephole optimizer responsible for instruction combining. It operates in
|
||||
* a forward direction and a backward direction, in each case traversing in
|
||||
* source order. SSA means the forward pass satisfies the invariant:
|
||||
*
|
||||
* Every def is visited before any of its uses.
|
||||
*
|
||||
* Dually, the backend pass satisfies the invariant:
|
||||
*
|
||||
* Every use of a def is visited before the def.
|
||||
*
|
||||
* This means the forward pass can propagate modifiers forward, whereas the
|
||||
* backwards pass propagates modifiers backward. Consider an example:
|
||||
*
|
||||
* 1 = fabs 0
|
||||
* 2 = fround 1
|
||||
* 3 = fsat 1
|
||||
*
|
||||
* The forwards pass would propagate the fabs to the fround (since we can
|
||||
* lookup the fabs from the fround source and do the replacement). By contrast
|
||||
* the backwards pass would propagate the fsat back to the fround (since when
|
||||
* we see the fround we know it has only a single user, fsat). Propagatable
|
||||
* instruction have natural directions (like pushforwards and pullbacks).
|
||||
*
|
||||
* We are careful to update the tracked state whenever we modify an instruction
|
||||
* to ensure the passes are linear-time and converge in a single iteration.
|
||||
*
|
||||
* Size conversions are worth special discussion. Consider the snippet:
|
||||
*
|
||||
* 2 = fadd 0, 1
|
||||
* 3 = f2f16 2
|
||||
* 4 = fround 3
|
||||
*
|
||||
* A priori, we can move the f2f16 in either direction. But it's not equal --
|
||||
* if we move it up to the fadd, we get FP16 for two instructions, whereas if
|
||||
* we push it into the fround, we effectively get FP32 for two instructions. So
|
||||
* f2f16 is backwards. Likewise, consider
|
||||
*
|
||||
* 2 = fadd 0, 1
|
||||
* 3 = f2f32 1
|
||||
* 4 = fround 3
|
||||
*
|
||||
* This time if we move f2f32 up to the fadd, we get FP32 for two, but if we
|
||||
* move it down to the fround, we get FP16 to too. So f2f32 is backwards.
|
||||
*/
|
||||
|
||||
static bool
|
||||
agx_is_fmov(agx_instr *def)
|
||||
{
|
||||
return (def->op == AGX_OPCODE_FADD)
|
||||
&& agx_is_equiv(def->src[1], agx_negzero());
|
||||
}
|
||||
|
||||
/* Compose floating-point modifiers with floating-point sources */
|
||||
|
||||
static agx_index
|
||||
agx_compose_float_src(agx_index to, agx_index from)
|
||||
{
|
||||
if (to.abs)
|
||||
from.neg = false;
|
||||
|
||||
from.abs |= to.abs;
|
||||
from.neg |= to.neg;
|
||||
|
||||
return from;
|
||||
}
|
||||
|
||||
static void
|
||||
agx_optimizer_fmov(agx_instr **defs, agx_instr *ins, unsigned srcs)
|
||||
{
|
||||
for (unsigned s = 0; s < srcs; ++s) {
|
||||
agx_index src = ins->src[s];
|
||||
if (src.type != AGX_INDEX_NORMAL) continue;
|
||||
|
||||
agx_instr *def = defs[src.value];
|
||||
if (!agx_is_fmov(def)) continue;
|
||||
if (def->saturate) continue;
|
||||
|
||||
ins->src[s] = agx_compose_float_src(src, def->src[0]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
agx_optimizer_forward(agx_context *ctx)
|
||||
{
|
||||
agx_instr **defs = calloc(ctx->alloc, sizeof(*defs));
|
||||
|
||||
agx_foreach_instr_global(ctx, I) {
|
||||
struct agx_opcode_info info = agx_opcodes_info[I->op];
|
||||
|
||||
for (unsigned d = 0; d < info.nr_dests; ++d) {
|
||||
assert(I->dest[d].type == AGX_INDEX_NORMAL);
|
||||
defs[I->dest[d].value] = I;
|
||||
}
|
||||
|
||||
/* Propagate fmov down */
|
||||
if (info.is_float)
|
||||
agx_optimizer_fmov(defs, I, info.nr_srcs);
|
||||
}
|
||||
|
||||
free(defs);
|
||||
}
|
||||
|
||||
void
|
||||
agx_optimizer(agx_context *ctx)
|
||||
{
|
||||
agx_optimizer_forward(ctx);
|
||||
}
|
@@ -23,6 +23,7 @@ libasahi_agx_files = files(
|
||||
'agx_compile.c',
|
||||
'agx_pack.c',
|
||||
'agx_print.c',
|
||||
'agx_optimizer.c',
|
||||
'agx_register_allocate.c',
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user