Files
third_party_mesa3d/src/intel/compiler/meson.build
Kenneth Graunke c19e5a0a75 intel/brw: Replace predicated break optimization with a simple peephole
We can achieve most of what brw_fs_opt_predicated_break() does with
simple peepholes at NIR -> BRW conversion time.

For predicated break and continue, we can simply look at an IF ... ENDIF
sequence after emitting it.  If there's a single instruction between the
two, and it's a BREAK or CONTINUE, then we can move the predicate from
the IF onto the jump, and delete the IF/ENDIF.  Because we haven't built
the CFG at this stage, we only need to remove them from the linked list
of instructions, which is trivial to do.

For the predicated while optimization, we can rely on the fact that we
already did the predicated break optimization, and simply look for a
predicated BREAK just before the WHILE.  If so, we move the predicate
onto the WHILE, invert it, and remove the BREAK.

There are a few cases where this approach does a worse job than the old
one: nir_convert_from_ssa may introduce load_reg and store_reg in blocks
containing break, and nir_trivialize_registers may decide it needs to
insert movs into those blocks.  So, at NIR -> BRW time, we'll actually
emit some MOVs there, which might have been possible to copy propagate
out after later optimizations.

However, the fossil-db results show that it's still pretty competitive.
For instructions, 1017 shaders were helped (average -1.87 instructions),
while only 62 were hurt (average +2.19 instructions).  In affected
shaders, it was -0.08% for instructions.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30498>
2024-08-05 19:17:55 -07:00

317 lines
8.3 KiB
Meson

# Copyright © 2017 Intel Corporation
# SPDX-License-Identifier: MIT
intel_nir_files = files(
'intel_nir.h',
'intel_nir.c',
'intel_nir_blockify_uniform_loads.c',
'intel_nir_clamp_image_1d_2d_array_sizes.c',
'intel_nir_clamp_per_vertex_loads.c',
'intel_nir_lower_conversions.c',
'intel_nir_lower_non_uniform_barycentric_at_sample.c',
'intel_nir_lower_non_uniform_resource_intel.c',
'intel_nir_lower_printf.c',
'intel_nir_lower_shading_rate_output.c',
'intel_nir_lower_sparse.c',
'intel_nir_lower_texture.c',
'intel_nir_opt_peephole_ffma.c',
'intel_nir_opt_peephole_imul32x16.c',
'intel_nir_tcs_workarounds.c',
)
libintel_compiler_brw_files = files(
'brw_cfg.cpp',
'brw_cfg.h',
'brw_compile_bs.cpp',
'brw_compile_cs.cpp',
'brw_compile_fs.cpp',
'brw_compile_gs.cpp',
'brw_compile_mesh.cpp',
'brw_compile_tcs.cpp',
'brw_compile_tes.cpp',
'brw_compile_vs.cpp',
'brw_compiler.c',
'brw_compiler.h',
'brw_debug_recompile.c',
'brw_def_analysis.cpp',
'brw_disasm.c',
'brw_disasm_info.cpp',
'brw_disasm_info.h',
'brw_eu.c',
'brw_eu_compact.c',
'brw_eu_defines.h',
'brw_eu_emit.c',
'brw_eu.h',
'brw_eu_validate.c',
'brw_fs_bank_conflicts.cpp',
'brw_fs_builder.h',
'brw_fs_cmod_propagation.cpp',
'brw_fs_combine_constants.cpp',
'brw_fs_copy_propagation.cpp',
'brw_fs.cpp',
'brw_fs_cse.cpp',
'brw_fs_dead_code_eliminate.cpp',
'brw_fs_generator.cpp',
'brw_fs.h',
'brw_fs_live_variables.cpp',
'brw_fs_live_variables.h',
'brw_fs_lower.cpp',
'brw_fs_lower_dpas.cpp',
'brw_fs_lower_integer_multiplication.cpp',
'brw_fs_lower_pack.cpp',
'brw_fs_lower_regioning.cpp',
'brw_fs_lower_simd_width.cpp',
'brw_fs_nir.cpp',
'brw_fs_opt.cpp',
'brw_fs_opt_algebraic.cpp',
'brw_fs_opt_virtual_grfs.cpp',
'brw_fs_reg_allocate.cpp',
'brw_fs_register_coalesce.cpp',
'brw_fs_saturate_propagation.cpp',
'brw_fs_scoreboard.cpp',
'brw_fs_thread_payload.cpp',
'brw_fs_validate.cpp',
'brw_fs_visitor.cpp',
'brw_fs_workaround.cpp',
'brw_inst.h',
'brw_ir.h',
'brw_ir_allocator.h',
'brw_ir_analysis.h',
'brw_ir_fs.h',
'brw_ir_performance.h',
'brw_ir_performance.cpp',
'brw_isa_info.h',
'brw_lower_logical_sends.cpp',
'brw_nir.h',
'brw_nir.c',
'brw_nir_analyze_ubo_ranges.c',
'brw_nir_lower_cooperative_matrix.c',
'brw_nir_lower_cs_intrinsics.c',
'brw_nir_lower_alpha_to_coverage.c',
'brw_nir_lower_intersection_shader.c',
'brw_nir_lower_ray_queries.c',
'brw_nir_lower_rt_intrinsics.c',
'brw_nir_lower_shader_calls.c',
'brw_nir_lower_storage_image.c',
'brw_nir_rt.h',
'brw_nir_rt.c',
'brw_nir_rt_builder.h',
'brw_packed_float.c',
'brw_print.cpp',
'brw_prim.h',
'brw_private.h',
'brw_reg.h',
'brw_reg_type.c',
'brw_reg_type.h',
'brw_rt.h',
'brw_schedule_instructions.cpp',
'brw_shader.cpp',
'brw_simd_selection.cpp',
'brw_vue_map.c',
)
brw_device_sha1_gen_src = custom_target('brw_device_sha1_gen.c',
input : ['brw_device_sha1_gen_c.py', '../dev/intel_device_info.py'],
output : ['brw_device_sha1_gen.c'],
command : [prog_python, '@INPUT0@', '--outdir', meson.current_build_dir()])
brw_nir_lower_fsign = custom_target(
'brw_nir_lower_fsign.c',
input : 'brw_nir_lower_fsign.py',
output : 'brw_nir_lower_fsign.c',
command : [
prog_python, '@INPUT@', '-p', dir_compiler_nir,
],
depend_files : nir_algebraic_depends,
capture : true,
)
brw_nir_trig = custom_target(
'brw_nir_trig_workarounds.c',
input : 'brw_nir_trig_workarounds.py',
output : 'brw_nir_trig_workarounds.c',
command : [
prog_python, '@INPUT@', '-p', dir_compiler_nir,
],
depend_files : nir_algebraic_depends,
capture : true,
)
libintel_compiler_brw = static_library(
'intel_compiler',
[libintel_compiler_brw_files, intel_nir_files, brw_nir_lower_fsign, brw_nir_trig, ir_expression_operation_h, [brw_device_sha1_gen_src]],
include_directories : [inc_include, inc_src, inc_intel],
c_args : [no_override_init_args],
gnu_symbol_visibility : 'hidden',
dependencies : [idep_nir_headers, idep_mesautil, idep_intel_dev],
build_by_default : false,
)
idep_intel_compiler_brw = declare_dependency(
link_with : [libintel_compiler_brw],
dependencies : [
idep_nir,
idep_mesautil,
],
)
# For now this tool is only going to be used by Anv
if get_option('intel-clc') == 'system'
prog_intel_clc = find_program('intel_clc', native : true)
dep_prog_intel_clc = []
elif with_intel_clc
prog_intel_clc = executable(
'intel_clc',
[
'intel_clc.c',
'brw_kernel.c',
# Use just the nir_options part of ELK instead of fully linking.
'elk/elk_nir_options.h',
'elk/elk_nir_options.c',
],
link_with : [libisl],
include_directories : [inc_include, inc_src, inc_intel],
c_args : [pre_args, no_override_init_args],
link_args : [ld_args_build_id],
dependencies : [idep_nir, idep_vtn, idep_mesaclc, idep_mesautil, idep_intel_dev,
idep_intel_compiler_brw],
# If we can run host binaries directly, just build intel_clc for the host.
# Most commonly this happens when doing a cross compile from an x86_64 build
# machine to an x86 host
native : not meson.can_run_host_binaries(),
install : get_option('install-intel-clc'),
)
dep_prog_intel_clc = [prog_intel_clc]
endif
if with_tests
test(
'intel_compiler_brw_tests',
executable(
'intel_compiler_brw_tests',
files(
'test_eu_compact.cpp',
'test_eu_validate.cpp',
'test_fs_cmod_propagation.cpp',
'test_fs_combine_constants.cpp',
'test_fs_copy_propagation.cpp',
'test_fs_cse.cpp',
'test_fs_saturate_propagation.cpp',
'test_fs_scoreboard.cpp',
'test_simd_selection.cpp',
'test_vf_float_conversions.cpp',
),
ir_expression_operation_h,
include_directories : [inc_include, inc_src, inc_intel],
link_with : libisl,
dependencies : [idep_gtest, idep_nir, idep_mesautil, idep_intel_dev,
idep_intel_compiler_brw],
),
suite : ['intel'],
protocol : 'gtest',
)
endif
if with_intel_tools
bison_command = []
if yacc_is_bison
bison_command = [
prog_bison, '@INPUT@', '--defines=@OUTPUT1@',
'--output=@OUTPUT0@'
]
else
bison_command = [
prog_bison, '-H', '@OUTPUT1@',
'-o', '@OUTPUT0@', '@INPUT@'
]
endif
brw_gram_tab = custom_target(
'brw_gram.tab.[ch]',
input : 'brw_gram.y',
output : ['brw_gram.tab.c', 'brw_gram.tab.h'],
command : bison_command
)
brw_lex_yy_c = custom_target(
'brw_lex.yy.c',
input : 'brw_lex.l',
output : 'brw_lex.yy.c',
command : [prog_flex, '-o', '@OUTPUT@', '@INPUT@']
)
brw_asm_deps = [
dep_thread,
idep_intel_compiler_brw,
idep_intel_dev,
idep_mesautil,
]
brw_asm = static_library(
'brw_asm',
['brw_asm.c', brw_gram_tab[0], brw_gram_tab[1], brw_lex_yy_c],
dependencies : brw_asm_deps,
include_directories : [inc_include, inc_src, inc_intel],
c_args : [no_override_init_args],
gnu_symbol_visibility : 'hidden',
build_by_default : false,
)
idep_brw_asm = declare_dependency(
link_with : brw_asm,
dependencies : brw_asm_deps,
)
brw_asm_tool = executable(
'brw_asm',
['brw_asm_tool.c'],
dependencies : idep_brw_asm,
include_directories : [inc_include, inc_src, inc_intel],
c_args : [no_override_init_args],
gnu_symbol_visibility : 'hidden',
install : true
)
asm_testcases = [
['skl', 'gfx9'],
['icl', 'gfx11'],
['tgl', 'gfx12'],
['dg2', 'gfx12.5'],
]
test_runner = find_program('tests/run-test.py')
foreach testcase : asm_testcases
_gen_name = testcase[0]
_gen_num = testcase[1]
_gen_folder = join_paths(meson.current_source_dir(), 'tests',
_gen_num.replace('gfx', 'gen'))
test(
'brw_asm_' + _gen_num, test_runner,
args : [
'--brw_asm', brw_asm_tool,
'--gen_name', _gen_name,
'--gen_folder', _gen_folder,
],
suite : 'intel',
)
endforeach
brw_disasm_tool = executable(
'brw_disasm',
files('brw_disasm_tool.c'),
dependencies : [idep_mesautil, dep_thread, idep_intel_dev,
idep_intel_compiler_brw],
include_directories : [inc_include, inc_src, inc_intel],
c_args : [no_override_init_args],
gnu_symbol_visibility : 'hidden',
install : true
)
endif
subdir('elk')