nir: Un-inline nir_builder_alu_instr_finish_and_insert()
This function is big and I don't think it will won't get meaningfully constant-propagated during inlining without LTO. Move it to a .c file so we just have one copy, saving 2.8MB from libnir.a on an amd64 release build. text data bss total filename before: 18953406 7768312 687260 27408978 build-release/driver-symlinks/iris_dri.so 9734366 5542453 481692 15758511 build-release/lib/libvulkan_intel.so 28687772 13310765 1168952 43167489 (TOTALS) after: 15478350 7767864 687260 23933474 build-release/driver-symlinks/iris_dri.so 6810366 5541685 481692 12833743 build-release/lib/libvulkan_intel.so 22288716 13309549 1168952 36767217 (TOTALS) No statistically significant performance difference on iris shader-db, n=8. Reviewed-by: Matt Turner <mattst88@gmail.com> Acked-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13889>
This commit is contained in:
@@ -95,6 +95,7 @@ nir_intrinsics_c = custom_target(
|
|||||||
files_libnir = files(
|
files_libnir = files(
|
||||||
'nir.c',
|
'nir.c',
|
||||||
'nir.h',
|
'nir.h',
|
||||||
|
'nir_builder.c',
|
||||||
'nir_builder.h',
|
'nir_builder.h',
|
||||||
'nir_builtin_builder.c',
|
'nir_builtin_builder.c',
|
||||||
'nir_builtin_builder.h',
|
'nir_builtin_builder.h',
|
||||||
|
87
src/compiler/nir/nir_builder.c
Normal file
87
src/compiler/nir/nir_builder.c
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
/*
|
||||||
|
* Copyright © 2014-2015 Broadcom
|
||||||
|
* Copyright © 2021 Google
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "nir_builder.h"
|
||||||
|
|
||||||
|
nir_ssa_def *
|
||||||
|
nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr)
|
||||||
|
{
|
||||||
|
const nir_op_info *op_info = &nir_op_infos[instr->op];
|
||||||
|
|
||||||
|
instr->exact = build->exact;
|
||||||
|
|
||||||
|
/* Guess the number of components the destination temporary should have
|
||||||
|
* based on our input sizes, if it's not fixed for the op.
|
||||||
|
*/
|
||||||
|
unsigned num_components = op_info->output_size;
|
||||||
|
if (num_components == 0) {
|
||||||
|
for (unsigned i = 0; i < op_info->num_inputs; i++) {
|
||||||
|
if (op_info->input_sizes[i] == 0)
|
||||||
|
num_components = MAX2(num_components,
|
||||||
|
instr->src[i].src.ssa->num_components);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(num_components != 0);
|
||||||
|
|
||||||
|
/* Figure out the bitwidth based on the source bitwidth if the instruction
|
||||||
|
* is variable-width.
|
||||||
|
*/
|
||||||
|
unsigned bit_size = nir_alu_type_get_type_size(op_info->output_type);
|
||||||
|
if (bit_size == 0) {
|
||||||
|
for (unsigned i = 0; i < op_info->num_inputs; i++) {
|
||||||
|
unsigned src_bit_size = instr->src[i].src.ssa->bit_size;
|
||||||
|
if (nir_alu_type_get_type_size(op_info->input_types[i]) == 0) {
|
||||||
|
if (bit_size)
|
||||||
|
assert(src_bit_size == bit_size);
|
||||||
|
else
|
||||||
|
bit_size = src_bit_size;
|
||||||
|
} else {
|
||||||
|
assert(src_bit_size ==
|
||||||
|
nir_alu_type_get_type_size(op_info->input_types[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* When in doubt, assume 32. */
|
||||||
|
if (bit_size == 0)
|
||||||
|
bit_size = 32;
|
||||||
|
|
||||||
|
/* Make sure we don't swizzle from outside of our source vector (like if a
|
||||||
|
* scalar value was passed into a multiply with a vector).
|
||||||
|
*/
|
||||||
|
for (unsigned i = 0; i < op_info->num_inputs; i++) {
|
||||||
|
for (unsigned j = instr->src[i].src.ssa->num_components;
|
||||||
|
j < NIR_MAX_VEC_COMPONENTS; j++) {
|
||||||
|
instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
|
||||||
|
bit_size, NULL);
|
||||||
|
instr->dest.write_mask = (1 << num_components) - 1;
|
||||||
|
|
||||||
|
nir_builder_instr_insert(build, &instr->instr);
|
||||||
|
|
||||||
|
return &instr->dest.dest.ssa;
|
||||||
|
}
|
@@ -28,6 +28,10 @@
|
|||||||
#include "util/bitscan.h"
|
#include "util/bitscan.h"
|
||||||
#include "util/half_float.h"
|
#include "util/half_float.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
struct exec_list;
|
struct exec_list;
|
||||||
|
|
||||||
typedef struct nir_builder {
|
typedef struct nir_builder {
|
||||||
@@ -438,67 +442,8 @@ nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w)
|
|||||||
return nir_build_imm(build, 4, 32, v);
|
return nir_build_imm(build, 4, 32, v);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline nir_ssa_def *
|
nir_ssa_def *
|
||||||
nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr)
|
nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr);
|
||||||
{
|
|
||||||
const nir_op_info *op_info = &nir_op_infos[instr->op];
|
|
||||||
|
|
||||||
instr->exact = build->exact;
|
|
||||||
|
|
||||||
/* Guess the number of components the destination temporary should have
|
|
||||||
* based on our input sizes, if it's not fixed for the op.
|
|
||||||
*/
|
|
||||||
unsigned num_components = op_info->output_size;
|
|
||||||
if (num_components == 0) {
|
|
||||||
for (unsigned i = 0; i < op_info->num_inputs; i++) {
|
|
||||||
if (op_info->input_sizes[i] == 0)
|
|
||||||
num_components = MAX2(num_components,
|
|
||||||
instr->src[i].src.ssa->num_components);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert(num_components != 0);
|
|
||||||
|
|
||||||
/* Figure out the bitwidth based on the source bitwidth if the instruction
|
|
||||||
* is variable-width.
|
|
||||||
*/
|
|
||||||
unsigned bit_size = nir_alu_type_get_type_size(op_info->output_type);
|
|
||||||
if (bit_size == 0) {
|
|
||||||
for (unsigned i = 0; i < op_info->num_inputs; i++) {
|
|
||||||
unsigned src_bit_size = instr->src[i].src.ssa->bit_size;
|
|
||||||
if (nir_alu_type_get_type_size(op_info->input_types[i]) == 0) {
|
|
||||||
if (bit_size)
|
|
||||||
assert(src_bit_size == bit_size);
|
|
||||||
else
|
|
||||||
bit_size = src_bit_size;
|
|
||||||
} else {
|
|
||||||
assert(src_bit_size ==
|
|
||||||
nir_alu_type_get_type_size(op_info->input_types[i]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* When in doubt, assume 32. */
|
|
||||||
if (bit_size == 0)
|
|
||||||
bit_size = 32;
|
|
||||||
|
|
||||||
/* Make sure we don't swizzle from outside of our source vector (like if a
|
|
||||||
* scalar value was passed into a multiply with a vector).
|
|
||||||
*/
|
|
||||||
for (unsigned i = 0; i < op_info->num_inputs; i++) {
|
|
||||||
for (unsigned j = instr->src[i].src.ssa->num_components;
|
|
||||||
j < NIR_MAX_VEC_COMPONENTS; j++) {
|
|
||||||
instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
|
|
||||||
bit_size, NULL);
|
|
||||||
instr->dest.write_mask = (1 << num_components) - 1;
|
|
||||||
|
|
||||||
nir_builder_instr_insert(build, &instr->instr);
|
|
||||||
|
|
||||||
return &instr->dest.dest.ssa;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline nir_ssa_def *
|
static inline nir_ssa_def *
|
||||||
nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
|
nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
|
||||||
@@ -1881,4 +1826,8 @@ nir_f2iN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
|
|||||||
(nir_alu_type) (nir_type_int | bit_size));
|
(nir_alu_type) (nir_type_int | bit_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} /* extern "C" */
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* NIR_BUILDER_H */
|
#endif /* NIR_BUILDER_H */
|
||||||
|
Reference in New Issue
Block a user