From 7edd42cbc09d3030fed8fc073e3138084a774bec Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sat, 4 Feb 2023 12:27:48 -0500 Subject: [PATCH] agx: Lower uniform sources with a dedicated pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the decision of "can I copyprop this uniform?" from copyprop to a standalone lowering pass. This is more straightforward and will enable the next patch. This has the side effect of sinking load_preamble instructions, for a nice reduction in register pressure. Instruction count increase is from rematerializing some moves, which should be more than balanced out by the reduced register pressure. total instructions in shared programs: 1523285 -> 1523317 (<.01%) instructions in affected programs: 1148 -> 1180 (2.79%) helped: 0 HURT: 13 HURT stats (abs) min: 1.0 max: 4.0 x̄: 2.46 x̃: 2 HURT stats (rel) min: 0.69% max: 7.69% x̄: 3.65% x̃: 2.61% 95% mean confidence interval for instructions value: 1.78 3.14 95% mean confidence interval for instructions %-change: 2.16% 5.15% Instructions are HURT. total bytes in shared programs: 10444532 -> 10444724 (<.01%) bytes in affected programs: 7386 -> 7578 (2.60%) helped: 0 HURT: 13 HURT stats (abs) min: 6.0 max: 24.0 x̄: 14.77 x̃: 12 HURT stats (rel) min: 0.63% max: 7.14% x̄: 3.40% x̃: 2.48% 95% mean confidence interval for bytes value: 10.68 18.85 95% mean confidence interval for bytes %-change: 2.02% 4.78% Bytes are HURT. total halfregs in shared programs: 419444 -> 416434 (-0.72%) halfregs in affected programs: 27080 -> 24070 (-11.12%) helped: 634 HURT: 0 helped stats (abs) min: 1.0 max: 30.0 x̄: 4.75 x̃: 2 helped stats (rel) min: 2.90% max: 54.55% x̄: 13.13% x̃: 8.51% 95% mean confidence interval for halfregs value: -5.08 -4.41 95% mean confidence interval for halfregs %-change: -14.03% -12.23% Halfregs are helped. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_compile.c | 10 ++++ src/asahi/compiler/agx_compiler.h | 1 + .../compiler/agx_lower_uniform_sources.c | 55 +++++++++++++++++++ src/asahi/compiler/agx_optimizer.c | 18 ------ src/asahi/compiler/meson.build | 1 + 5 files changed, 67 insertions(+), 18 deletions(-) create mode 100644 src/asahi/compiler/agx_lower_uniform_sources.c diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 64a539e367c..f118c4f0042 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -1899,12 +1899,22 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl, agx_dce(ctx); agx_optimizer(ctx); agx_opt_cse(ctx); + + /* For correctness, lower uniform sources after copyprop (for correctness, + * as copyprop creates uniform sources). To keep register pressure in + * check, lower after CSE, since moves are cheaper than registers. + */ + agx_lower_uniform_sources(ctx); + /* Dead code eliminate after instruction combining to get the benefit */ agx_dce(ctx); agx_validate(ctx, "Optimization"); if (agx_should_dump(nir, AGX_DBG_SHADERS)) agx_print_shader(ctx, stdout); + } else { + /* We need to lower regardless */ + agx_lower_uniform_sources(ctx); } agx_ra(ctx); diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h index dbebfe3ea5d..85b598c891e 100644 --- a/src/asahi/compiler/agx_compiler.h +++ b/src/asahi/compiler/agx_compiler.h @@ -784,6 +784,7 @@ void agx_print_block(agx_block *block, FILE *fp); void agx_print_shader(agx_context *ctx, FILE *fp); void agx_optimizer(agx_context *ctx); void agx_lower_pseudo(agx_context *ctx); +void agx_lower_uniform_sources(agx_context *ctx); void agx_opt_cse(agx_context *ctx); void agx_dce(agx_context *ctx); void agx_ra(agx_context *ctx); diff --git a/src/asahi/compiler/agx_lower_uniform_sources.c b/src/asahi/compiler/agx_lower_uniform_sources.c new file mode 100644 index 00000000000..5558151c224 --- /dev/null +++ b/src/asahi/compiler/agx_lower_uniform_sources.c @@ -0,0 +1,55 @@ +/* + * Copyright 2023 Alyssa Rosenzweig + * SPDX-License-Identifier: MIT + */ +#include "agx_builder.h" +#include "agx_compiler.h" + +/* + * Not all instructions can take uniforms. Memory instructions can take + * uniforms, but only for their base (first) source and only in the + * low-half of the uniform file. + * + * This pass lowers invalid uniforms. + */ +static bool +should_lower(enum agx_opcode op, agx_index uniform, unsigned src_index) +{ + if (uniform.type != AGX_INDEX_UNIFORM) + return false; + + /* Some instructions only seem able to access uniforms in the low half */ + bool high = uniform.value >= 256; + + switch (op) { + case AGX_OPCODE_TEXTURE_LOAD: + case AGX_OPCODE_TEXTURE_SAMPLE: + return src_index != 1; + case AGX_OPCODE_DEVICE_LOAD: + return src_index != 0 || high; + case AGX_OPCODE_DEVICE_STORE: + return src_index != 1 || high; + case AGX_OPCODE_ZS_EMIT: + case AGX_OPCODE_ST_TILE: + case AGX_OPCODE_LD_TILE: + case AGX_OPCODE_BLOCK_IMAGE_STORE: + case AGX_OPCODE_UNIFORM_STORE: + case AGX_OPCODE_ST_VARY: + return true; + default: + return false; + } +} + +void +agx_lower_uniform_sources(agx_context *ctx) +{ + agx_foreach_instr_global_safe(ctx, I) { + agx_builder b = agx_init_builder(ctx, agx_before_instr(I)); + + agx_foreach_src(I, s) { + if (should_lower(I->op, I->src[s], s)) + I->src[s] = agx_mov(&b, I->src[s]); + } + } +} diff --git a/src/asahi/compiler/agx_optimizer.c b/src/asahi/compiler/agx_optimizer.c index 693334b93de..c9d7248b4e6 100644 --- a/src/asahi/compiler/agx_optimizer.c +++ b/src/asahi/compiler/agx_optimizer.c @@ -187,24 +187,6 @@ agx_optimizer_copyprop(agx_instr **defs, agx_instr *I) if (def->src[0].type == AGX_INDEX_IMMEDIATE) continue; - /* Not all instructions can take uniforms. Memory instructions can take - * uniforms, but only for their base (first) source and only in the - * low-half of the uniform file. - */ - if (def->src[0].type == AGX_INDEX_UNIFORM && - (((I->op == AGX_OPCODE_TEXTURE_LOAD || - I->op == AGX_OPCODE_TEXTURE_SAMPLE) && - s != 1) || - (I->op == AGX_OPCODE_DEVICE_LOAD && - (s != 0 || def->src[0].value >= 256)) || - (I->op == AGX_OPCODE_DEVICE_STORE && - (s != 1 || def->src[0].value >= 256)) || - I->op == AGX_OPCODE_ZS_EMIT || I->op == AGX_OPCODE_ST_TILE || - I->op == AGX_OPCODE_LD_TILE || - I->op == AGX_OPCODE_BLOCK_IMAGE_STORE || - I->op == AGX_OPCODE_UNIFORM_STORE || I->op == AGX_OPCODE_ST_VARY)) - continue; - /* ALU instructions cannot take 64-bit */ if (def->src[0].size == AGX_SIZE_64 && !(I->op == AGX_OPCODE_DEVICE_LOAD && s == 0) && diff --git a/src/asahi/compiler/meson.build b/src/asahi/compiler/meson.build index 2ab3e37f086..ec5a7aa19c7 100644 --- a/src/asahi/compiler/meson.build +++ b/src/asahi/compiler/meson.build @@ -33,6 +33,7 @@ libasahi_agx_files = files( 'agx_lower_64bit.c', 'agx_lower_parallel_copy.c', 'agx_lower_pseudo.c', + 'agx_lower_uniform_sources.c', 'agx_pack.c', 'agx_print.c', 'agx_opt_cse.c',