
Since 13b859de
do_copy_propagation_elements() has a flaw where
the time it takes to complete grows exponentially slowers as the number
of nested loops increases. It can also hurt rather than help verses
just letting NIR optimise the code. So if the NIR linker is enabled we
let it handle it instead.
shader-db results Iris (BDW):
total instructions in shared programs: 11177181 -> 11199739 (0.20%)
instructions in affected programs: 119424 -> 141982 (18.89%)
helped: 109
HURT: 65
total cycles in shared programs: 368946819 -> 372277173 (0.90%)
cycles in affected programs: 116539428 -> 119869782 (2.86%)
total spills in shared programs: 3983 -> 8785 (120.56%)
spills in affected programs: 2072 -> 6874 (231.76%)
helped: 0
HURT: 6
total fills in shared programs: 2016 -> 6068 (200.99%)
fills in affected programs: 230 -> 4282 (1761.74%)
helped: 0
HURT: 6
LOST: 85
GAINED: 77
freedreno results:
total instructions in shared programs: 11011122 -> 11011620 (<.01%)
instructions in affected programs: 939829 -> 940327 (0.05%)
total full in shared programs: 762725 -> 762674 (<.01%)
full in affected programs: 1096 -> 1045 (-4.65%)
total constlen in shared programs: 1772092 -> 1771596 (-0.03%)
constlen in affected programs: 2780 -> 2284 (-17.84%)
total stp in shared programs: 4040 -> 4058 (0.45%)
stp in affected programs: 3656 -> 3674 (0.49%)
total ldp in shared programs: 2160 -> 2178 (0.83%)
ldp in affected programs: 1748 -> 1766 (1.03%)
stp HURT: shaders/robclark-shaders/gfxbench5/gl_5_high_off/13.shader_test CL: 1231 -> 1234 (0.24%)
stp HURT: shaders/robclark-shaders/gfxbench5/gl_5_normal_off/13.shader_test CL: 1231 -> 1234 (0.24%)
stp HURT: shaders/robclark-shaders/gfxbench5/gl_5_high_off/15.shader_test CL: 453 -> 456 (0.66%)
stp HURT: shaders/robclark-shaders/gfxbench5/gl_5_normal_off/15.shader_test CL: 453 -> 456 (0.66%)
stp HURT: shaders/robclark-shaders/gfxbench5/gl_5_high_off/17.shader_test CL: 144 -> 147 (2.08%)
stp HURT: shaders/robclark-shaders/gfxbench5/gl_5_normal_off/17.shader_test CL: 144 -> 147 (2.08%)
however, those stp counts are misleading -- gfxbench gl-5-normal actually
gets its scratch (ldp/stp) stored as 16 bits instead of 32 thanks to
better NIR copy prop, and the result is 2.64398% +/- 0.0991923% perf
improvement!
i915 results:
total instructions in shared programs: 510528 -> 510489 (<.01%)
instructions in affected programs: 3303 -> 3264 (-1.18%)
total tex_indirect in shared programs: 16708 -> 16717 (0.05%)
tex_indirect in affected programs: 134 -> 143 (6.72%)
total temps in shared programs: 30181 -> 30169 (-0.04%)
temps in affected programs: 1268 -> 1256 (-0.95%)
LOST: 0
GAINED: 1
i915 highlights:
instructions HURT: shaders/closed/steam/legend-of-grimrock/47.shader_test FS: 141 -> 144 (2.13%)
instructions HURT: shaders/closed/steam/steamworld-dig/22.shader_test FS: 84 -> 108 (28.57%)
temps HURT: shaders/closed/steam/left-4-dead-2/medium/3682.shader_test FS: 7 -> 13 (85.71%)
r300 results:
total instructions in shared programs: 1340439 -> 1340845 (0.03%)
instructions in affected programs: 32354 -> 32760 (1.25%)
total temps in shared programs: 179394 -> 179329 (-0.04%)
temps in affected programs: 1505 -> 1440 (-4.32%)
total consts in shared programs: 1177742 -> 1177885 (0.01%)
consts in affected programs: 1107 -> 1250 (12.92%)
total lits in shared programs: 24992 -> 25019 (0.11%)
lits in affected programs: 138 -> 165 (19.57%)
instructions HURT: shaders/closed/steam/legend-of-grimrock/26.shader_test FS: 47 -> 52 (10.64%)
instructions HURT: shaders/closed/steam/sanctum-2/6072.shader_test FS: 43 -> 48 (11.63%)
instructions HURT: shaders/closed/steam/champions-of-regnum/2378.shader_test VS: 35 -> 40 (14.29%)
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13288>
310 lines
9.2 KiB
Meson
310 lines
9.2 KiB
Meson
# Copyright © 2017 Intel Corporation
|
|
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
# in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
|
|
# The above copyright notice and this permission notice shall be included in
|
|
# all copies or substantial portions of the Software.
|
|
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
# SOFTWARE.
|
|
|
|
libglsl_util = static_library(
|
|
'glsl_util',
|
|
glsl_util_files,
|
|
include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux],
|
|
dependencies : dep_valgrind,
|
|
gnu_symbol_visibility : 'hidden',
|
|
build_by_default : false,
|
|
)
|
|
|
|
subdir('glcpp')
|
|
|
|
bison_command = []
|
|
if yacc_is_bison
|
|
bison_command = [
|
|
prog_bison, '-o', '@OUTPUT0@', '-p', '_mesa_glsl_', '--defines=@OUTPUT1@',
|
|
'@INPUT@',
|
|
]
|
|
else
|
|
bison_command = [
|
|
prog_bison, '-o', '@OUTPUT0@', '-p', '_mesa_glsl_', '-H', '@OUTPUT1@',
|
|
'@INPUT@',
|
|
]
|
|
endif
|
|
|
|
glsl_parser = custom_target(
|
|
'glsl_parser',
|
|
input : 'glsl_parser.yy',
|
|
output : ['glsl_parser.cpp', 'glsl_parser.h'],
|
|
command : bison_command
|
|
)
|
|
|
|
glsl_lexer_cpp = custom_target(
|
|
'glsl_lexer_cpp',
|
|
input : 'glsl_lexer.ll',
|
|
output : 'glsl_lexer.cpp',
|
|
command : [prog_flex_cpp, '-o', '@OUTPUT@', '@INPUT@'],
|
|
)
|
|
|
|
ir_expression_operation_constant_h = custom_target(
|
|
'ir_expression_operation_constant.h',
|
|
input : 'ir_expression_operation.py',
|
|
output : 'ir_expression_operation_constant.h',
|
|
command : [prog_python, '@INPUT@', 'constant'],
|
|
capture : true,
|
|
)
|
|
|
|
ir_expression_operation_strings_h = custom_target(
|
|
'ir_expression_operation_strings.h',
|
|
input : 'ir_expression_operation.py',
|
|
output : 'ir_expression_operation_strings.h',
|
|
command : [prog_python, '@INPUT@', 'strings'],
|
|
capture : true,
|
|
)
|
|
|
|
float64_glsl_h = custom_target(
|
|
'float64_glsl.h',
|
|
input : [files_xxd, 'float64.glsl'],
|
|
output : 'float64_glsl.h',
|
|
command : [prog_python, '@INPUT@', '@OUTPUT@', '-n', 'float64_source'],
|
|
)
|
|
|
|
cross_platform_settings_piece_all_h = custom_target(
|
|
'cross_platform_settings_piece_all.h',
|
|
input : [files_xxd, 'CrossPlatformSettings_piece_all.glsl'],
|
|
output : 'cross_platform_settings_piece_all.h',
|
|
command : [prog_python, '@INPUT@', '@OUTPUT@', '-n', 'cross_platform_settings_piece_all_header'],
|
|
)
|
|
|
|
bc1_glsl_h = custom_target(
|
|
'bc1_glsl.h',
|
|
input : [files_xxd, 'bc1.glsl'],
|
|
output : 'bc1_glsl.h',
|
|
command : [prog_python, '@INPUT@', '@OUTPUT@', '-n', 'bc1_source'],
|
|
)
|
|
|
|
bc4_glsl_h = custom_target(
|
|
'bc4_glsl.h',
|
|
input : [files_xxd, 'bc4.glsl'],
|
|
output : 'bc4_glsl.h',
|
|
command : [prog_python, '@INPUT@', '@OUTPUT@', '-n', 'bc4_source'],
|
|
)
|
|
|
|
etc2_rgba_stitch_glsl_h = custom_target(
|
|
'etc2_rgba_stitch_glsl.h',
|
|
input : [files_xxd, 'etc2_rgba_stitch.glsl'],
|
|
output : 'etc2_rgba_stitch_glsl.h',
|
|
command : [prog_python, '@INPUT@', '@OUTPUT@', '-n', 'etc2_rgba_stitch_source'],
|
|
)
|
|
|
|
astc_glsl_h = custom_target(
|
|
'astc_glsl.h',
|
|
input : [files_xxd, 'astc_decoder.glsl'],
|
|
output : 'astc_glsl.h',
|
|
command : [prog_python, '@INPUT@', '@OUTPUT@', '-n', 'astc_source'],
|
|
)
|
|
|
|
files_libglsl = files(
|
|
'ast.h',
|
|
'ast_array_index.cpp',
|
|
'ast_expr.cpp',
|
|
'ast_function.cpp',
|
|
'ast_to_hir.cpp',
|
|
'ast_type.cpp',
|
|
'builtin_functions.cpp',
|
|
'builtin_functions.h',
|
|
'builtin_int64.h',
|
|
'builtin_types.cpp',
|
|
'builtin_variables.cpp',
|
|
'generate_ir.cpp',
|
|
'gl_nir_lower_atomics.c',
|
|
'gl_nir_lower_images.c',
|
|
'gl_nir_lower_buffers.c',
|
|
'gl_nir_lower_packed_varyings.c',
|
|
'gl_nir_lower_samplers.c',
|
|
'gl_nir_lower_samplers_as_deref.c',
|
|
'gl_nir_lower_xfb_varying.c',
|
|
'gl_nir_link_atomics.c',
|
|
'gl_nir_link_uniform_blocks.c',
|
|
'gl_nir_link_uniform_initializers.c',
|
|
'gl_nir_link_uniforms.c',
|
|
'gl_nir_link_varyings.c',
|
|
'gl_nir_link_varyings.h',
|
|
'gl_nir_link_xfb.c',
|
|
'gl_nir_linker.c',
|
|
'gl_nir_linker.h',
|
|
'gl_nir_opt_dead_builtin_varyings.c',
|
|
'gl_nir.h',
|
|
'glsl_parser_extras.cpp',
|
|
'glsl_parser_extras.h',
|
|
'glsl_symbol_table.cpp',
|
|
'glsl_symbol_table.h',
|
|
'glsl_to_nir.cpp',
|
|
'glsl_to_nir.h',
|
|
'hir_field_selection.cpp',
|
|
'ir_array_refcount.cpp',
|
|
'ir_array_refcount.h',
|
|
'ir_basic_block.cpp',
|
|
'ir_basic_block.h',
|
|
'ir_builder.cpp',
|
|
'ir_builder.h',
|
|
'ir_clone.cpp',
|
|
'ir_constant_expression.cpp',
|
|
'ir.cpp',
|
|
'ir.h',
|
|
'ir_equals.cpp',
|
|
'ir_expression_flattening.cpp',
|
|
'ir_expression_flattening.h',
|
|
'ir_function_can_inline.cpp',
|
|
'ir_function_detect_recursion.cpp',
|
|
'ir_function_inlining.h',
|
|
'ir_function.cpp',
|
|
'ir_hierarchical_visitor.cpp',
|
|
'ir_hierarchical_visitor.h',
|
|
'ir_hv_accept.cpp',
|
|
'ir_optimization.h',
|
|
'ir_print_visitor.cpp',
|
|
'ir_print_visitor.h',
|
|
'ir_reader.cpp',
|
|
'ir_reader.h',
|
|
'ir_rvalue_visitor.cpp',
|
|
'ir_rvalue_visitor.h',
|
|
'ir_uniform.h',
|
|
'ir_validate.cpp',
|
|
'ir_variable_refcount.cpp',
|
|
'ir_variable_refcount.h',
|
|
'ir_visitor.h',
|
|
'linker.cpp',
|
|
'linker.h',
|
|
'linker_util.h',
|
|
'linker_util.cpp',
|
|
'link_functions.cpp',
|
|
'link_interface_blocks.cpp',
|
|
'link_uniforms.cpp',
|
|
'link_uniform_block_active_visitor.cpp',
|
|
'link_uniform_block_active_visitor.h',
|
|
'link_uniform_blocks.cpp',
|
|
'link_varyings.cpp',
|
|
'link_varyings.h',
|
|
'list.h',
|
|
'lower_blend_equation_advanced.cpp',
|
|
'lower_builtins.cpp',
|
|
'lower_discard.cpp',
|
|
'lower_discard_flow.cpp',
|
|
'lower_distance.cpp',
|
|
'lower_instructions.cpp',
|
|
'lower_int64.cpp',
|
|
'lower_jumps.cpp',
|
|
'lower_mat_op_to_vec.cpp',
|
|
'lower_named_interface_blocks.cpp',
|
|
'lower_packing_builtins.cpp',
|
|
'lower_precision.cpp',
|
|
'lower_subroutine.cpp',
|
|
'lower_tess_level.cpp',
|
|
'lower_vec_index_to_cond_assign.cpp',
|
|
'lower_vector_derefs.cpp',
|
|
'opt_algebraic.cpp',
|
|
'opt_constant_folding.cpp',
|
|
'opt_constant_propagation.cpp',
|
|
'opt_constant_variable.cpp',
|
|
'opt_dead_builtin_variables.cpp',
|
|
'opt_dead_code.cpp',
|
|
'opt_dead_code_local.cpp',
|
|
'opt_dead_functions.cpp',
|
|
'opt_flatten_nested_if_blocks.cpp',
|
|
'opt_flip_matrices.cpp',
|
|
'opt_function_inlining.cpp',
|
|
'opt_if_simplification.cpp',
|
|
'opt_minmax.cpp',
|
|
'opt_rebalance_tree.cpp',
|
|
'opt_tree_grafting.cpp',
|
|
'program.h',
|
|
'propagate_invariance.cpp',
|
|
's_expression.cpp',
|
|
's_expression.h',
|
|
'string_to_uint_map.cpp',
|
|
'string_to_uint_map.h',
|
|
'serialize.cpp',
|
|
'serialize.h',
|
|
'shader_cache.cpp',
|
|
'shader_cache.h',
|
|
)
|
|
|
|
files_libglsl_standalone = files(
|
|
'ir_builder_print_visitor.cpp',
|
|
'ir_builder_print_visitor.h',
|
|
'opt_add_neg_to_sub.h',
|
|
'standalone_scaffolding.cpp',
|
|
'standalone_scaffolding.h',
|
|
'standalone.cpp',
|
|
'standalone.h',
|
|
)
|
|
|
|
libglsl = static_library(
|
|
'glsl',
|
|
[files_libglsl, glsl_parser, glsl_lexer_cpp, ir_expression_operation_h,
|
|
ir_expression_operation_strings_h, ir_expression_operation_constant_h,
|
|
float64_glsl_h, cross_platform_settings_piece_all_h, bc1_glsl_h, bc4_glsl_h,
|
|
etc2_rgba_stitch_glsl_h, astc_glsl_h],
|
|
c_args : [c_msvc_compat_args, no_override_init_args],
|
|
cpp_args : [cpp_msvc_compat_args],
|
|
gnu_symbol_visibility : 'hidden',
|
|
link_with : libglcpp,
|
|
include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_compiler],
|
|
dependencies : idep_nir,
|
|
build_by_default : false,
|
|
)
|
|
|
|
libglsl_standalone = static_library(
|
|
'glsl_standalone',
|
|
[files_libglsl_standalone, ir_expression_operation_h],
|
|
c_args : [c_msvc_compat_args, no_override_init_args],
|
|
cpp_args : [cpp_msvc_compat_args],
|
|
gnu_symbol_visibility : 'hidden',
|
|
include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux],
|
|
link_with : [libglsl, libglsl_util, libglcpp_standalone],
|
|
dependencies : [idep_mesautil, idep_getopt],
|
|
build_by_default : false,
|
|
)
|
|
|
|
glsl_compiler = executable(
|
|
'glsl_compiler',
|
|
'main.cpp',
|
|
c_args : [c_msvc_compat_args, no_override_init_args],
|
|
cpp_args : [cpp_msvc_compat_args],
|
|
gnu_symbol_visibility : 'hidden',
|
|
dependencies : [dep_clock, dep_thread, idep_getopt, idep_mesautil],
|
|
include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux],
|
|
link_with : [libglsl_standalone],
|
|
build_by_default : with_tools.contains('glsl'),
|
|
install : with_tools.contains('glsl'),
|
|
)
|
|
|
|
glsl_test = executable(
|
|
'glsl_test',
|
|
['test.cpp', 'test_optpass.cpp', 'test_optpass.h',
|
|
ir_expression_operation_h],
|
|
c_args : [c_msvc_compat_args, no_override_init_args],
|
|
cpp_args : [cpp_msvc_compat_args],
|
|
gnu_symbol_visibility : 'hidden',
|
|
include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux],
|
|
dependencies : [dep_clock, dep_thread, idep_getopt, idep_mesautil],
|
|
link_with : [libglsl, libglsl_standalone, libglsl_util],
|
|
build_by_default : with_tools.contains('glsl'),
|
|
install : with_tools.contains('glsl'),
|
|
)
|
|
|
|
if with_any_opengl and with_tests
|
|
subdir('tests')
|
|
endif
|