From d3a684803d0a0c6d23bde784759bc74c5f8a008a Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Mon, 8 Jul 2024 12:58:23 +0200 Subject: [PATCH] v3d: don't lower fsat on V3D 7.x This requires that our nir compiler options are different between V3D versions so we can't use a static global any more. total instructions in shared programs: 11241106 -> 11047872 (-1.72%) instructions in affected programs: 4634458 -> 4441224 (-4.17%) helped: 25119 HURT: 1717 Instructions are helped. total threads in shared programs: 425238 -> 425036 (-0.05%) threads in affected programs: 878 -> 676 (-23.01%) helped: 79 HURT: 180 Inconclusive result (%-change mean confidence interval includes 0). total loops in shared programs: 1968 -> 1933 (-1.78%) loops in affected programs: 35 -> 0 helped: 35 HURT: 0 Loops are helped. total uniforms in shared programs: 3845314 -> 3845219 (<.01%) uniforms in affected programs: 213615 -> 213520 (-0.04%) helped: 1338 HURT: 1059 Inconclusive result (value mean confidence interval includes 0). total max-temps in shared programs: 2224313 -> 2221507 (-0.13%) max-temps in affected programs: 236054 -> 233248 (-1.19%) helped: 4863 HURT: 3357 Max-temps are helped. total spills in shared programs: 4264 -> 4294 (0.70%) spills in affected programs: 274 -> 304 (10.95%) helped: 8 HURT: 16 total fills in shared programs: 6638 -> 6497 (-2.12%) fills in affected programs: 2240 -> 2099 (-6.29%) helped: 55 HURT: 17 total sfu-stalls in shared programs: 14942 -> 14353 (-3.94%) sfu-stalls in affected programs: 4863 -> 4274 (-12.11%) helped: 1287 HURT: 1165 Sfu-stalls are helped. total inst-and-stalls in shared programs: 11256048 -> 11062225 (-1.72%) inst-and-stalls in affected programs: 4635701 -> 4441878 (-4.18%) helped: 25074 HURT: 1728 Inst-and-stalls are helped. total nops in shared programs: 270482 -> 270621 (0.05%) nops in affected programs: 27579 -> 27718 (0.50%) helped: 1583 HURT: 1967 Inconclusive result (value mean confidence interval includes 0). Reviewed-by: Juan A. Suarez Part-of: --- src/gallium/drivers/v3d/v3d_screen.c | 153 ++++++++++++++------------- 1 file changed, 81 insertions(+), 72 deletions(-) diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index 056c53a2187..c300bf41d2b 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -673,81 +673,90 @@ v3d_screen_is_format_supported(struct pipe_screen *pscreen, return true; } -static const nir_shader_compiler_options v3d_nir_options = { - .compact_arrays = true, - .lower_uadd_sat = true, - .lower_usub_sat = true, - .lower_iadd_sat = true, - .lower_all_io_to_temps = true, - .lower_extract_byte = true, - .lower_extract_word = true, - .lower_insert_byte = true, - .lower_insert_word = true, - .lower_bitfield_insert = true, - .lower_bitfield_extract = true, - .lower_bitfield_reverse = true, - .lower_bit_count = true, - .lower_cs_local_id_to_index = true, - .lower_ffract = true, - .lower_fmod = true, - .lower_pack_unorm_2x16 = true, - .lower_pack_snorm_2x16 = true, - .lower_pack_unorm_4x8 = true, - .lower_pack_snorm_4x8 = true, - .lower_unpack_unorm_4x8 = true, - .lower_unpack_snorm_4x8 = true, - .lower_pack_half_2x16 = true, - .lower_unpack_half_2x16 = true, - .lower_pack_32_2x16 = true, - .lower_pack_32_2x16_split = true, - .lower_unpack_32_2x16_split = true, - .lower_fdiv = true, - .lower_find_lsb = true, - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, - .lower_flrp32 = true, - .lower_fpow = true, - .lower_fsat = true, - .lower_fsqrt = true, - .lower_ifind_msb = true, - .lower_isign = true, - .lower_ldexp = true, - .lower_hadd = true, - .lower_fisnormal = true, - .lower_mul_high = true, - .lower_wpos_pntc = true, - .lower_to_scalar = true, - .lower_int64_options = - nir_lower_bcsel64 | - nir_lower_conv64 | - nir_lower_iadd64 | - nir_lower_icmp64 | - nir_lower_imul_2x32_64 | - nir_lower_imul64 | - nir_lower_ineg64 | - nir_lower_logic64 | - nir_lower_shift64 | - nir_lower_ufind_msb64, - .lower_fquantize2f16 = true, - .has_fsub = true, - .has_isub = true, - .divergence_analysis_options = - nir_divergence_multiple_workgroup_per_compute_subgroup, - /* This will enable loop unrolling in the state tracker so we won't - * be able to selectively disable it in backend if it leads to - * lower thread counts or TMU spills. Choose a conservative maximum to - * limit register pressure impact. - */ - .max_unroll_iterations = 16, - .force_indirect_unrolling_sampler = true, -}; - static const void * v3d_screen_get_compiler_options(struct pipe_screen *pscreen, - enum pipe_shader_ir ir, enum pipe_shader_type shader) + enum pipe_shader_ir ir, + enum pipe_shader_type shader) { - return &v3d_nir_options; + struct v3d_screen *screen = v3d_screen(pscreen); + const struct v3d_device_info *devinfo = &screen->devinfo; + + static bool initialized = false; + static nir_shader_compiler_options options = { + .compact_arrays = true, + .lower_uadd_sat = true, + .lower_usub_sat = true, + .lower_iadd_sat = true, + .lower_all_io_to_temps = true, + .lower_extract_byte = true, + .lower_extract_word = true, + .lower_insert_byte = true, + .lower_insert_word = true, + .lower_bitfield_insert = true, + .lower_bitfield_extract = true, + .lower_bitfield_reverse = true, + .lower_bit_count = true, + .lower_cs_local_id_to_index = true, + .lower_ffract = true, + .lower_fmod = true, + .lower_pack_unorm_2x16 = true, + .lower_pack_snorm_2x16 = true, + .lower_pack_unorm_4x8 = true, + .lower_pack_snorm_4x8 = true, + .lower_unpack_unorm_4x8 = true, + .lower_unpack_snorm_4x8 = true, + .lower_pack_half_2x16 = true, + .lower_unpack_half_2x16 = true, + .lower_pack_32_2x16 = true, + .lower_pack_32_2x16_split = true, + .lower_unpack_32_2x16_split = true, + .lower_fdiv = true, + .lower_find_lsb = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, + .lower_flrp32 = true, + .lower_fpow = true, + .lower_fsqrt = true, + .lower_ifind_msb = true, + .lower_isign = true, + .lower_ldexp = true, + .lower_hadd = true, + .lower_fisnormal = true, + .lower_mul_high = true, + .lower_wpos_pntc = true, + .lower_to_scalar = true, + .lower_int64_options = + nir_lower_bcsel64 | + nir_lower_conv64 | + nir_lower_iadd64 | + nir_lower_icmp64 | + nir_lower_imul_2x32_64 | + nir_lower_imul64 | + nir_lower_ineg64 | + nir_lower_logic64 | + nir_lower_shift64 | + nir_lower_ufind_msb64, + .lower_fquantize2f16 = true, + .has_fsub = true, + .has_isub = true, + .divergence_analysis_options = + nir_divergence_multiple_workgroup_per_compute_subgroup, + /* This will enable loop unrolling in the state tracker so we won't + * be able to selectively disable it in backend if it leads to + * lower thread counts or TMU spills. Choose a conservative maximum to + * limit register pressure impact. + */ + .max_unroll_iterations = 16, + .force_indirect_unrolling_sampler = true, + }; + + if (!initialized) { + options.lower_fsat = devinfo->ver < 71; + initialized = true; + } + + return &options; } static const uint64_t v3d_available_modifiers[] = {