v3d: don't lower fsat on V3D 7.x

This requires that our nir compiler options are different between V3D versions
so we can't use a static global any more.

total instructions in shared programs: 11241106 -> 11047872 (-1.72%)
instructions in affected programs: 4634458 -> 4441224 (-4.17%)
helped: 25119
HURT: 1717
Instructions are helped.

total threads in shared programs: 425238 -> 425036 (-0.05%)
threads in affected programs: 878 -> 676 (-23.01%)
helped: 79
HURT: 180
Inconclusive result (%-change mean confidence interval includes 0).

total loops in shared programs: 1968 -> 1933 (-1.78%)
loops in affected programs: 35 -> 0
helped: 35
HURT: 0
Loops are helped.

total uniforms in shared programs: 3845314 -> 3845219 (<.01%)
uniforms in affected programs: 213615 -> 213520 (-0.04%)
helped: 1338
HURT: 1059
Inconclusive result (value mean confidence interval includes 0).

total max-temps in shared programs: 2224313 -> 2221507 (-0.13%)
max-temps in affected programs: 236054 -> 233248 (-1.19%)
helped: 4863
HURT: 3357
Max-temps are helped.

total spills in shared programs: 4264 -> 4294 (0.70%)
spills in affected programs: 274 -> 304 (10.95%)
helped: 8
HURT: 16

total fills in shared programs: 6638 -> 6497 (-2.12%)
fills in affected programs: 2240 -> 2099 (-6.29%)
helped: 55
HURT: 17

total sfu-stalls in shared programs: 14942 -> 14353 (-3.94%)
sfu-stalls in affected programs: 4863 -> 4274 (-12.11%)
helped: 1287
HURT: 1165
Sfu-stalls are helped.

total inst-and-stalls in shared programs: 11256048 -> 11062225 (-1.72%)
inst-and-stalls in affected programs: 4635701 -> 4441878 (-4.18%)
helped: 25074
HURT: 1728
Inst-and-stalls are helped.

total nops in shared programs: 270482 -> 270621 (0.05%)
nops in affected programs: 27579 -> 27718 (0.50%)
helped: 1583
HURT: 1967
Inconclusive result (value mean confidence interval includes 0).

Reviewed-by: Juan A. Suarez <jasuarez@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30086>
This commit is contained in:
Iago Toral Quiroga
2024-07-08 12:58:23 +02:00
parent 33187012ab
commit d3a684803d

View File

@@ -673,81 +673,90 @@ v3d_screen_is_format_supported(struct pipe_screen *pscreen,
return true;
}
static const nir_shader_compiler_options v3d_nir_options = {
.compact_arrays = true,
.lower_uadd_sat = true,
.lower_usub_sat = true,
.lower_iadd_sat = true,
.lower_all_io_to_temps = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_bitfield_insert = true,
.lower_bitfield_extract = true,
.lower_bitfield_reverse = true,
.lower_bit_count = true,
.lower_cs_local_id_to_index = true,
.lower_ffract = true,
.lower_fmod = true,
.lower_pack_unorm_2x16 = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_unorm_4x8 = true,
.lower_pack_snorm_4x8 = true,
.lower_unpack_unorm_4x8 = true,
.lower_unpack_snorm_4x8 = true,
.lower_pack_half_2x16 = true,
.lower_unpack_half_2x16 = true,
.lower_pack_32_2x16 = true,
.lower_pack_32_2x16_split = true,
.lower_unpack_32_2x16_split = true,
.lower_fdiv = true,
.lower_find_lsb = true,
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,
.lower_flrp32 = true,
.lower_fpow = true,
.lower_fsat = true,
.lower_fsqrt = true,
.lower_ifind_msb = true,
.lower_isign = true,
.lower_ldexp = true,
.lower_hadd = true,
.lower_fisnormal = true,
.lower_mul_high = true,
.lower_wpos_pntc = true,
.lower_to_scalar = true,
.lower_int64_options =
nir_lower_bcsel64 |
nir_lower_conv64 |
nir_lower_iadd64 |
nir_lower_icmp64 |
nir_lower_imul_2x32_64 |
nir_lower_imul64 |
nir_lower_ineg64 |
nir_lower_logic64 |
nir_lower_shift64 |
nir_lower_ufind_msb64,
.lower_fquantize2f16 = true,
.has_fsub = true,
.has_isub = true,
.divergence_analysis_options =
nir_divergence_multiple_workgroup_per_compute_subgroup,
/* This will enable loop unrolling in the state tracker so we won't
* be able to selectively disable it in backend if it leads to
* lower thread counts or TMU spills. Choose a conservative maximum to
* limit register pressure impact.
*/
.max_unroll_iterations = 16,
.force_indirect_unrolling_sampler = true,
};
static const void *
v3d_screen_get_compiler_options(struct pipe_screen *pscreen,
enum pipe_shader_ir ir, enum pipe_shader_type shader)
enum pipe_shader_ir ir,
enum pipe_shader_type shader)
{
return &v3d_nir_options;
struct v3d_screen *screen = v3d_screen(pscreen);
const struct v3d_device_info *devinfo = &screen->devinfo;
static bool initialized = false;
static nir_shader_compiler_options options = {
.compact_arrays = true,
.lower_uadd_sat = true,
.lower_usub_sat = true,
.lower_iadd_sat = true,
.lower_all_io_to_temps = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_bitfield_insert = true,
.lower_bitfield_extract = true,
.lower_bitfield_reverse = true,
.lower_bit_count = true,
.lower_cs_local_id_to_index = true,
.lower_ffract = true,
.lower_fmod = true,
.lower_pack_unorm_2x16 = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_unorm_4x8 = true,
.lower_pack_snorm_4x8 = true,
.lower_unpack_unorm_4x8 = true,
.lower_unpack_snorm_4x8 = true,
.lower_pack_half_2x16 = true,
.lower_unpack_half_2x16 = true,
.lower_pack_32_2x16 = true,
.lower_pack_32_2x16_split = true,
.lower_unpack_32_2x16_split = true,
.lower_fdiv = true,
.lower_find_lsb = true,
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,
.lower_flrp32 = true,
.lower_fpow = true,
.lower_fsqrt = true,
.lower_ifind_msb = true,
.lower_isign = true,
.lower_ldexp = true,
.lower_hadd = true,
.lower_fisnormal = true,
.lower_mul_high = true,
.lower_wpos_pntc = true,
.lower_to_scalar = true,
.lower_int64_options =
nir_lower_bcsel64 |
nir_lower_conv64 |
nir_lower_iadd64 |
nir_lower_icmp64 |
nir_lower_imul_2x32_64 |
nir_lower_imul64 |
nir_lower_ineg64 |
nir_lower_logic64 |
nir_lower_shift64 |
nir_lower_ufind_msb64,
.lower_fquantize2f16 = true,
.has_fsub = true,
.has_isub = true,
.divergence_analysis_options =
nir_divergence_multiple_workgroup_per_compute_subgroup,
/* This will enable loop unrolling in the state tracker so we won't
* be able to selectively disable it in backend if it leads to
* lower thread counts or TMU spills. Choose a conservative maximum to
* limit register pressure impact.
*/
.max_unroll_iterations = 16,
.force_indirect_unrolling_sampler = true,
};
if (!initialized) {
options.lower_fsat = devinfo->ver < 71;
initialized = true;
}
return &options;
}
static const uint64_t v3d_available_modifiers[] = {