nir: Un-inline nir_builder_alu_instr_finish_and_insert()

This function is big and I don't think it will won't get meaningfully constant-propagated during inlining without LTO. Move it to a .c file so we just have one copy, saving 2.8MB from libnir.a on an amd64 release build. text data bss total filename before: 18953406 7768312 687260 27408978 build-release/driver-symlinks/iris_dri.so 9734366 5542453 481692 15758511 build-release/lib/libvulkan_intel.so 28687772 13310765 1168952 43167489 (TOTALS) after: 15478350 7767864 687260 23933474 build-release/driver-symlinks/iris_dri.so 6810366 5541685 481692 12833743 build-release/lib/libvulkan_intel.so 22288716 13309549 1168952 36767217 (TOTALS) No statistically significant performance difference on iris shader-db, n=8. Reviewed-by: Matt Turner <mattst88@gmail.com> Acked-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13889>
2021-11-19 16:24:00 -08:00
parent 3b5b4b5d45
commit d9bfcf5f5b
3 changed files with 98 additions and 61 deletions
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -95,6 +95,7 @@ nir_intrinsics_c = custom_target(
 files_libnir = files(
  'nir.c',
  'nir.h',
  'nir_builder.c',
  'nir_builder.h',
  'nir_builtin_builder.c',
  'nir_builtin_builder.h',
--- a/src/compiler/nir/nir_builder.c
+++ b/src/compiler/nir/nir_builder.c
@@ -0,0 +1,87 @@
 /*
 * Copyright © 2014-2015 Broadcom
 * Copyright © 2021 Google
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */
 #include "nir_builder.h"
 nir_ssa_def *
 nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr)
 {
   const nir_op_info *op_info = &nir_op_infos[instr->op];
   instr->exact = build->exact;
   /* Guess the number of components the destination temporary should have
    * based on our input sizes, if it's not fixed for the op.
    */
   unsigned num_components = op_info->output_size;
   if (num_components == 0) {
      for (unsigned i = 0; i < op_info->num_inputs; i++) {
         if (op_info->input_sizes[i] == 0)
            num_components = MAX2(num_components,
                                  instr->src[i].src.ssa->num_components);
      }
   }
   assert(num_components != 0);
   /* Figure out the bitwidth based on the source bitwidth if the instruction
    * is variable-width.
    */
   unsigned bit_size = nir_alu_type_get_type_size(op_info->output_type);
   if (bit_size == 0) {
      for (unsigned i = 0; i < op_info->num_inputs; i++) {
         unsigned src_bit_size = instr->src[i].src.ssa->bit_size;
         if (nir_alu_type_get_type_size(op_info->input_types[i]) == 0) {
            if (bit_size)
               assert(src_bit_size == bit_size);
            else
               bit_size = src_bit_size;
         } else {
            assert(src_bit_size ==
               nir_alu_type_get_type_size(op_info->input_types[i]));
         }
      }
   }
   /* When in doubt, assume 32. */
   if (bit_size == 0)
      bit_size = 32;
   /* Make sure we don't swizzle from outside of our source vector (like if a
    * scalar value was passed into a multiply with a vector).
    */
   for (unsigned i = 0; i < op_info->num_inputs; i++) {
      for (unsigned j = instr->src[i].src.ssa->num_components;
           j < NIR_MAX_VEC_COMPONENTS; j++) {
         instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1;
      }
   }
   nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
                     bit_size, NULL);
   instr->dest.write_mask = (1 << num_components) - 1;
   nir_builder_instr_insert(build, &instr->instr);
   return &instr->dest.dest.ssa;
 }
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -28,6 +28,10 @@
 #include "util/bitscan.h"
 #include "util/half_float.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 struct exec_list;
 typedef struct nir_builder {
@@ -438,67 +442,8 @@ nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w)
   return nir_build_imm(build, 4, 32, v);
 }
-static inline nir_ssa_def *
+nir_ssa_def *
-nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr)
+nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr);
 {
   const nir_op_info *op_info = &nir_op_infos[instr->op];
   instr->exact = build->exact;
   /* Guess the number of components the destination temporary should have
    * based on our input sizes, if it's not fixed for the op.
    */
   unsigned num_components = op_info->output_size;
   if (num_components == 0) {
      for (unsigned i = 0; i < op_info->num_inputs; i++) {
         if (op_info->input_sizes[i] == 0)
            num_components = MAX2(num_components,
                                  instr->src[i].src.ssa->num_components);
      }
   }
   assert(num_components != 0);
   /* Figure out the bitwidth based on the source bitwidth if the instruction
    * is variable-width.
    */
   unsigned bit_size = nir_alu_type_get_type_size(op_info->output_type);
   if (bit_size == 0) {
      for (unsigned i = 0; i < op_info->num_inputs; i++) {
         unsigned src_bit_size = instr->src[i].src.ssa->bit_size;
         if (nir_alu_type_get_type_size(op_info->input_types[i]) == 0) {
            if (bit_size)
               assert(src_bit_size == bit_size);
            else
               bit_size = src_bit_size;
         } else {
            assert(src_bit_size ==
               nir_alu_type_get_type_size(op_info->input_types[i]));
         }
      }
   }
   /* When in doubt, assume 32. */
   if (bit_size == 0)
      bit_size = 32;
   /* Make sure we don't swizzle from outside of our source vector (like if a
    * scalar value was passed into a multiply with a vector).
    */
   for (unsigned i = 0; i < op_info->num_inputs; i++) {
      for (unsigned j = instr->src[i].src.ssa->num_components;
           j < NIR_MAX_VEC_COMPONENTS; j++) {
         instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1;
      }
   }
   nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
                     bit_size, NULL);
   instr->dest.write_mask = (1 << num_components) - 1;
   nir_builder_instr_insert(build, &instr->instr);
   return &instr->dest.dest.ssa;
 }
 static inline nir_ssa_def *
 nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
@@ -1881,4 +1826,8 @@ nir_f2iN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
         (nir_alu_type) (nir_type_int | bit_size));
 }
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
 #endif /* NIR_BUILDER_H */