nir: Un-inline nir_builder_alu_instr_finish_and_insert()

This function is big and I don't think it will won't get meaningfully
constant-propagated during inlining without LTO.  Move it to a .c file so
we just have one copy, saving 2.8MB from libnir.a on an amd64 release
build.

      text       data        bss      total filename
before:
  18953406    7768312     687260   27408978 build-release/driver-symlinks/iris_dri.so
   9734366    5542453     481692   15758511 build-release/lib/libvulkan_intel.so
  28687772   13310765    1168952   43167489 (TOTALS)

after:
  15478350    7767864     687260   23933474 build-release/driver-symlinks/iris_dri.so
   6810366    5541685     481692   12833743 build-release/lib/libvulkan_intel.so
  22288716   13309549    1168952   36767217 (TOTALS)

No statistically significant performance difference on iris shader-db, n=8.

Reviewed-by: Matt Turner <mattst88@gmail.com>
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13889>
This commit is contained in:
Emma Anholt
2021-11-19 16:24:00 -08:00
committed by Marge Bot
parent 3b5b4b5d45
commit d9bfcf5f5b
3 changed files with 98 additions and 61 deletions

View File

@@ -95,6 +95,7 @@ nir_intrinsics_c = custom_target(
files_libnir = files( files_libnir = files(
'nir.c', 'nir.c',
'nir.h', 'nir.h',
'nir_builder.c',
'nir_builder.h', 'nir_builder.h',
'nir_builtin_builder.c', 'nir_builtin_builder.c',
'nir_builtin_builder.h', 'nir_builtin_builder.h',

View File

@@ -0,0 +1,87 @@
/*
* Copyright © 2014-2015 Broadcom
* Copyright © 2021 Google
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "nir_builder.h"
nir_ssa_def *
nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr)
{
const nir_op_info *op_info = &nir_op_infos[instr->op];
instr->exact = build->exact;
/* Guess the number of components the destination temporary should have
* based on our input sizes, if it's not fixed for the op.
*/
unsigned num_components = op_info->output_size;
if (num_components == 0) {
for (unsigned i = 0; i < op_info->num_inputs; i++) {
if (op_info->input_sizes[i] == 0)
num_components = MAX2(num_components,
instr->src[i].src.ssa->num_components);
}
}
assert(num_components != 0);
/* Figure out the bitwidth based on the source bitwidth if the instruction
* is variable-width.
*/
unsigned bit_size = nir_alu_type_get_type_size(op_info->output_type);
if (bit_size == 0) {
for (unsigned i = 0; i < op_info->num_inputs; i++) {
unsigned src_bit_size = instr->src[i].src.ssa->bit_size;
if (nir_alu_type_get_type_size(op_info->input_types[i]) == 0) {
if (bit_size)
assert(src_bit_size == bit_size);
else
bit_size = src_bit_size;
} else {
assert(src_bit_size ==
nir_alu_type_get_type_size(op_info->input_types[i]));
}
}
}
/* When in doubt, assume 32. */
if (bit_size == 0)
bit_size = 32;
/* Make sure we don't swizzle from outside of our source vector (like if a
* scalar value was passed into a multiply with a vector).
*/
for (unsigned i = 0; i < op_info->num_inputs; i++) {
for (unsigned j = instr->src[i].src.ssa->num_components;
j < NIR_MAX_VEC_COMPONENTS; j++) {
instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1;
}
}
nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
bit_size, NULL);
instr->dest.write_mask = (1 << num_components) - 1;
nir_builder_instr_insert(build, &instr->instr);
return &instr->dest.dest.ssa;
}

View File

@@ -28,6 +28,10 @@
#include "util/bitscan.h" #include "util/bitscan.h"
#include "util/half_float.h" #include "util/half_float.h"
#ifdef __cplusplus
extern "C" {
#endif
struct exec_list; struct exec_list;
typedef struct nir_builder { typedef struct nir_builder {
@@ -438,67 +442,8 @@ nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w)
return nir_build_imm(build, 4, 32, v); return nir_build_imm(build, 4, 32, v);
} }
static inline nir_ssa_def * nir_ssa_def *
nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr) nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr);
{
const nir_op_info *op_info = &nir_op_infos[instr->op];
instr->exact = build->exact;
/* Guess the number of components the destination temporary should have
* based on our input sizes, if it's not fixed for the op.
*/
unsigned num_components = op_info->output_size;
if (num_components == 0) {
for (unsigned i = 0; i < op_info->num_inputs; i++) {
if (op_info->input_sizes[i] == 0)
num_components = MAX2(num_components,
instr->src[i].src.ssa->num_components);
}
}
assert(num_components != 0);
/* Figure out the bitwidth based on the source bitwidth if the instruction
* is variable-width.
*/
unsigned bit_size = nir_alu_type_get_type_size(op_info->output_type);
if (bit_size == 0) {
for (unsigned i = 0; i < op_info->num_inputs; i++) {
unsigned src_bit_size = instr->src[i].src.ssa->bit_size;
if (nir_alu_type_get_type_size(op_info->input_types[i]) == 0) {
if (bit_size)
assert(src_bit_size == bit_size);
else
bit_size = src_bit_size;
} else {
assert(src_bit_size ==
nir_alu_type_get_type_size(op_info->input_types[i]));
}
}
}
/* When in doubt, assume 32. */
if (bit_size == 0)
bit_size = 32;
/* Make sure we don't swizzle from outside of our source vector (like if a
* scalar value was passed into a multiply with a vector).
*/
for (unsigned i = 0; i < op_info->num_inputs; i++) {
for (unsigned j = instr->src[i].src.ssa->num_components;
j < NIR_MAX_VEC_COMPONENTS; j++) {
instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1;
}
}
nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
bit_size, NULL);
instr->dest.write_mask = (1 << num_components) - 1;
nir_builder_instr_insert(build, &instr->instr);
return &instr->dest.dest.ssa;
}
static inline nir_ssa_def * static inline nir_ssa_def *
nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0, nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
@@ -1881,4 +1826,8 @@ nir_f2iN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
(nir_alu_type) (nir_type_int | bit_size)); (nir_alu_type) (nir_type_int | bit_size));
} }
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* NIR_BUILDER_H */ #endif /* NIR_BUILDER_H */