v3d: don't lower fsat on V3D 7.x
This requires that our nir compiler options are different between V3D versions so we can't use a static global any more. total instructions in shared programs: 11241106 -> 11047872 (-1.72%) instructions in affected programs: 4634458 -> 4441224 (-4.17%) helped: 25119 HURT: 1717 Instructions are helped. total threads in shared programs: 425238 -> 425036 (-0.05%) threads in affected programs: 878 -> 676 (-23.01%) helped: 79 HURT: 180 Inconclusive result (%-change mean confidence interval includes 0). total loops in shared programs: 1968 -> 1933 (-1.78%) loops in affected programs: 35 -> 0 helped: 35 HURT: 0 Loops are helped. total uniforms in shared programs: 3845314 -> 3845219 (<.01%) uniforms in affected programs: 213615 -> 213520 (-0.04%) helped: 1338 HURT: 1059 Inconclusive result (value mean confidence interval includes 0). total max-temps in shared programs: 2224313 -> 2221507 (-0.13%) max-temps in affected programs: 236054 -> 233248 (-1.19%) helped: 4863 HURT: 3357 Max-temps are helped. total spills in shared programs: 4264 -> 4294 (0.70%) spills in affected programs: 274 -> 304 (10.95%) helped: 8 HURT: 16 total fills in shared programs: 6638 -> 6497 (-2.12%) fills in affected programs: 2240 -> 2099 (-6.29%) helped: 55 HURT: 17 total sfu-stalls in shared programs: 14942 -> 14353 (-3.94%) sfu-stalls in affected programs: 4863 -> 4274 (-12.11%) helped: 1287 HURT: 1165 Sfu-stalls are helped. total inst-and-stalls in shared programs: 11256048 -> 11062225 (-1.72%) inst-and-stalls in affected programs: 4635701 -> 4441878 (-4.18%) helped: 25074 HURT: 1728 Inst-and-stalls are helped. total nops in shared programs: 270482 -> 270621 (0.05%) nops in affected programs: 27579 -> 27718 (0.50%) helped: 1583 HURT: 1967 Inconclusive result (value mean confidence interval includes 0). Reviewed-by: Juan A. Suarez <jasuarez@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30086>
This commit is contained in:
@@ -673,81 +673,90 @@ v3d_screen_is_format_supported(struct pipe_screen *pscreen,
|
||||
return true;
|
||||
}
|
||||
|
||||
static const nir_shader_compiler_options v3d_nir_options = {
|
||||
.compact_arrays = true,
|
||||
.lower_uadd_sat = true,
|
||||
.lower_usub_sat = true,
|
||||
.lower_iadd_sat = true,
|
||||
.lower_all_io_to_temps = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract = true,
|
||||
.lower_bitfield_reverse = true,
|
||||
.lower_bit_count = true,
|
||||
.lower_cs_local_id_to_index = true,
|
||||
.lower_ffract = true,
|
||||
.lower_fmod = true,
|
||||
.lower_pack_unorm_2x16 = true,
|
||||
.lower_pack_snorm_2x16 = true,
|
||||
.lower_pack_unorm_4x8 = true,
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
.lower_unpack_unorm_4x8 = true,
|
||||
.lower_unpack_snorm_4x8 = true,
|
||||
.lower_pack_half_2x16 = true,
|
||||
.lower_unpack_half_2x16 = true,
|
||||
.lower_pack_32_2x16 = true,
|
||||
.lower_pack_32_2x16_split = true,
|
||||
.lower_unpack_32_2x16_split = true,
|
||||
.lower_fdiv = true,
|
||||
.lower_find_lsb = true,
|
||||
.lower_ffma16 = true,
|
||||
.lower_ffma32 = true,
|
||||
.lower_ffma64 = true,
|
||||
.lower_flrp32 = true,
|
||||
.lower_fpow = true,
|
||||
.lower_fsat = true,
|
||||
.lower_fsqrt = true,
|
||||
.lower_ifind_msb = true,
|
||||
.lower_isign = true,
|
||||
.lower_ldexp = true,
|
||||
.lower_hadd = true,
|
||||
.lower_fisnormal = true,
|
||||
.lower_mul_high = true,
|
||||
.lower_wpos_pntc = true,
|
||||
.lower_to_scalar = true,
|
||||
.lower_int64_options =
|
||||
nir_lower_bcsel64 |
|
||||
nir_lower_conv64 |
|
||||
nir_lower_iadd64 |
|
||||
nir_lower_icmp64 |
|
||||
nir_lower_imul_2x32_64 |
|
||||
nir_lower_imul64 |
|
||||
nir_lower_ineg64 |
|
||||
nir_lower_logic64 |
|
||||
nir_lower_shift64 |
|
||||
nir_lower_ufind_msb64,
|
||||
.lower_fquantize2f16 = true,
|
||||
.has_fsub = true,
|
||||
.has_isub = true,
|
||||
.divergence_analysis_options =
|
||||
nir_divergence_multiple_workgroup_per_compute_subgroup,
|
||||
/* This will enable loop unrolling in the state tracker so we won't
|
||||
* be able to selectively disable it in backend if it leads to
|
||||
* lower thread counts or TMU spills. Choose a conservative maximum to
|
||||
* limit register pressure impact.
|
||||
*/
|
||||
.max_unroll_iterations = 16,
|
||||
.force_indirect_unrolling_sampler = true,
|
||||
};
|
||||
|
||||
static const void *
|
||||
v3d_screen_get_compiler_options(struct pipe_screen *pscreen,
|
||||
enum pipe_shader_ir ir, enum pipe_shader_type shader)
|
||||
enum pipe_shader_ir ir,
|
||||
enum pipe_shader_type shader)
|
||||
{
|
||||
return &v3d_nir_options;
|
||||
struct v3d_screen *screen = v3d_screen(pscreen);
|
||||
const struct v3d_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
static bool initialized = false;
|
||||
static nir_shader_compiler_options options = {
|
||||
.compact_arrays = true,
|
||||
.lower_uadd_sat = true,
|
||||
.lower_usub_sat = true,
|
||||
.lower_iadd_sat = true,
|
||||
.lower_all_io_to_temps = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_bitfield_insert = true,
|
||||
.lower_bitfield_extract = true,
|
||||
.lower_bitfield_reverse = true,
|
||||
.lower_bit_count = true,
|
||||
.lower_cs_local_id_to_index = true,
|
||||
.lower_ffract = true,
|
||||
.lower_fmod = true,
|
||||
.lower_pack_unorm_2x16 = true,
|
||||
.lower_pack_snorm_2x16 = true,
|
||||
.lower_pack_unorm_4x8 = true,
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
.lower_unpack_unorm_4x8 = true,
|
||||
.lower_unpack_snorm_4x8 = true,
|
||||
.lower_pack_half_2x16 = true,
|
||||
.lower_unpack_half_2x16 = true,
|
||||
.lower_pack_32_2x16 = true,
|
||||
.lower_pack_32_2x16_split = true,
|
||||
.lower_unpack_32_2x16_split = true,
|
||||
.lower_fdiv = true,
|
||||
.lower_find_lsb = true,
|
||||
.lower_ffma16 = true,
|
||||
.lower_ffma32 = true,
|
||||
.lower_ffma64 = true,
|
||||
.lower_flrp32 = true,
|
||||
.lower_fpow = true,
|
||||
.lower_fsqrt = true,
|
||||
.lower_ifind_msb = true,
|
||||
.lower_isign = true,
|
||||
.lower_ldexp = true,
|
||||
.lower_hadd = true,
|
||||
.lower_fisnormal = true,
|
||||
.lower_mul_high = true,
|
||||
.lower_wpos_pntc = true,
|
||||
.lower_to_scalar = true,
|
||||
.lower_int64_options =
|
||||
nir_lower_bcsel64 |
|
||||
nir_lower_conv64 |
|
||||
nir_lower_iadd64 |
|
||||
nir_lower_icmp64 |
|
||||
nir_lower_imul_2x32_64 |
|
||||
nir_lower_imul64 |
|
||||
nir_lower_ineg64 |
|
||||
nir_lower_logic64 |
|
||||
nir_lower_shift64 |
|
||||
nir_lower_ufind_msb64,
|
||||
.lower_fquantize2f16 = true,
|
||||
.has_fsub = true,
|
||||
.has_isub = true,
|
||||
.divergence_analysis_options =
|
||||
nir_divergence_multiple_workgroup_per_compute_subgroup,
|
||||
/* This will enable loop unrolling in the state tracker so we won't
|
||||
* be able to selectively disable it in backend if it leads to
|
||||
* lower thread counts or TMU spills. Choose a conservative maximum to
|
||||
* limit register pressure impact.
|
||||
*/
|
||||
.max_unroll_iterations = 16,
|
||||
.force_indirect_unrolling_sampler = true,
|
||||
};
|
||||
|
||||
if (!initialized) {
|
||||
options.lower_fsat = devinfo->ver < 71;
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
return &options;
|
||||
}
|
||||
|
||||
static const uint64_t v3d_available_modifiers[] = {
|
||||
|
Reference in New Issue
Block a user