diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 3df8ccd3e62..cebefc8debf 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -70,9 +70,6 @@ static const struct nir_shader_compiler_options nir_options_llvm = { .lower_unpack_unorm_4x8 = true, .lower_extract_byte = true, .lower_extract_word = true, - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_fpow = true, .lower_mul_2x32_64 = true, .lower_rotate = true, @@ -115,9 +112,6 @@ static const struct nir_shader_compiler_options nir_options_aco = { .lower_unpack_half_2x16 = true, .lower_extract_byte = true, .lower_extract_word = true, - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_fpow = true, .lower_mul_2x32_64 = true, .lower_rotate = true, diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 25b3230a2e7..1a847cf97ff 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -2874,9 +2874,6 @@ const nir_shader_compiler_options v3d_nir_options = { .lower_unpack_half_2x16 = true, .lower_fdiv = true, .lower_find_lsb = true, - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_flrp32 = true, .lower_fpow = true, .lower_fsat = true, diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 1741f4f9bbf..3cc2750d6e9 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3055,12 +3055,9 @@ typedef enum { typedef struct nir_shader_compiler_options { bool lower_fdiv; - bool lower_ffma16; - bool lower_ffma32; - bool lower_ffma64; - bool fuse_ffma16; - bool fuse_ffma32; - bool fuse_ffma64; + bool has_ffma16; + bool has_ffma32; + bool has_ffma64; bool lower_flrp16; bool lower_flrp32; /** Lowers flrp when it does not support doubles */ diff --git a/src/compiler/nir/nir_lower_flrp.c b/src/compiler/nir/nir_lower_flrp.c index d9c45877dd0..de8b8fdc3c5 100644 --- a/src/compiler/nir/nir_lower_flrp.c +++ b/src/compiler/nir/nir_lower_flrp.c @@ -370,11 +370,11 @@ convert_flrp_instruction(nir_builder *bld, unsigned bit_size = nir_dest_bit_size(alu->dest.dest); if (bit_size == 16) - have_ffma = !bld->shader->options->lower_ffma16; + have_ffma = bld->shader->options->has_ffma16; else if (bit_size == 32) - have_ffma = !bld->shader->options->lower_ffma32; + have_ffma = bld->shader->options->has_ffma32; else if (bit_size == 64) - have_ffma = !bld->shader->options->lower_ffma64; + have_ffma = bld->shader->options->has_ffma64; else unreachable("invalid bit_size"); diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index be836f954ed..4e6eaae6fd2 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -193,13 +193,13 @@ optimizations.extend([ (('fadd', a, ('fneg', ('ffract', a))), ('ffloor', a), '!options->lower_ffloor'), (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'), (('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'), - (('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'), - (('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'), - (('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'), + (('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), '!options->has_ffma16'), + (('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), '!options->has_ffma32'), + (('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), '!options->has_ffma64'), # Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late). - (('~ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'), - (('~ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'), - (('~ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma64'), + (('~ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->has_ffma16'), + (('~ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->has_ffma32'), + (('~ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->has_ffma64'), (('~fmul', ('fadd', ('iand', ('ineg', ('b2i', 'a@bool')), ('fmul', b, c)), '#d'), '#e'), ('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))), @@ -2032,9 +2032,9 @@ late_optimizations = [ (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'), (('ineg', a), ('isub', 0, a), 'options->lower_negate'), (('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'), - (('~fadd@16', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma16'), - (('~fadd@32', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma32'), - (('~fadd@64', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma64'), + (('~fadd@16', ('fmul', a, b), c), ('ffma', a, b, c), 'options->has_ffma16'), + (('~fadd@32', ('fmul', a, b), c), ('ffma', a, b, c), 'options->has_ffma32'), + (('~fadd@64', ('fmul', a, b), c), ('ffma', a, b, c), 'options->has_ffma64'), # These are duplicated from the main optimizations table. The late # patterns that rearrange expressions like x - .5 < 0 to x < .5 can create diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index f33f048a505..0d3e02e3bc3 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -47,9 +47,9 @@ static const nir_shader_compiler_options options = { .lower_usub_borrow = true, .lower_mul_high = true, .lower_mul_2x32_64 = true, - .fuse_ffma16 = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, + .has_ffma16 = true, + .has_ffma32 = true, + .has_ffma64 = true, .vertex_id_zero_based = true, .lower_extract_byte = true, .lower_extract_word = true, @@ -99,9 +99,9 @@ static const nir_shader_compiler_options options_a6xx = { .lower_usub_borrow = true, .lower_mul_high = true, .lower_mul_2x32_64 = true, - .fuse_ffma16 = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, + .has_ffma16 = true, + .has_ffma32 = true, + .has_ffma64 = true, .vertex_id_zero_based = false, .lower_extract_byte = true, .lower_extract_word = true, diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c index 3cc7dcec148..d7ae2455a9d 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c @@ -1004,9 +1004,9 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu, .lower_fpow = true, .lower_sub = true, .lower_ftrunc = true, - .fuse_ffma16 = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, + .has_ffma16 = true, + .has_ffma32 = true, + .has_ffma64 = true, .lower_bitops = true, .lower_all_io_to_temps = true, .vertex_id_zero_based = true, diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c index 6cf95d5f4e3..ac95abf19dc 100644 --- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c +++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c @@ -35,9 +35,9 @@ static const nir_shader_compiler_options options = { .lower_fmod = true, .lower_fdiv = true, .lower_fceil = true, - .fuse_ffma16 = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, + .has_ffma16 = true, + .has_ffma32 = true, + .has_ffma64 = true, /* .fdot_replicates = true, it is replicated, but it makes things worse */ .lower_all_io_to_temps = true, .vertex_id_zero_based = true, /* its not implemented anyway */ diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c index 30a3f527181..0b5e9b6b3bc 100644 --- a/src/gallium/drivers/lima/lima_program.c +++ b/src/gallium/drivers/lima/lima_program.c @@ -42,9 +42,6 @@ #include "ir/lima_ir.h" static const nir_shader_compiler_options vs_nir_options = { - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_fpow = true, .lower_ffract = true, .lower_fdiv = true, @@ -62,9 +59,6 @@ static const nir_shader_compiler_options vs_nir_options = { }; static const nir_shader_compiler_options fs_nir_options = { - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_fpow = true, .lower_fdiv = true, .lower_fmod = true, diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 218d80e5c48..6ab9dd939e7 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -549,9 +549,6 @@ static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_bitfield_insert_to_shifts = true, .lower_bitfield_extract_to_shifts = true, .lower_sub = true, - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_fmod = true, .lower_hadd = true, .lower_add_sat = true, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 64453edf19b..d06818f4488 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -3206,12 +3206,6 @@ nvir_nir_shader_compiler_options(int chipset) { nir_shader_compiler_options op = {}; op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET); - op.lower_ffma16 = false; - op.lower_ffma32 = false; - op.lower_ffma64 = false; - op.fuse_ffma16 = false; /* nir doesn't track mad vs fma */ - op.fuse_ffma32 = false; /* nir doesn't track mad vs fma */ - op.fuse_ffma64 = false; /* nir doesn't track mad vs fma */ op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET); op.lower_flrp32 = true; op.lower_flrp64 = true; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 29201eee649..e2007f24cd5 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -923,9 +923,6 @@ int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space) } static const nir_shader_compiler_options nir_options = { - .fuse_ffma16 = false, /* nir doesn't track mad vs fma */ - .fuse_ffma32 = false, /* nir doesn't track mad vs fma */ - .fuse_ffma64 = false, /* nir doesn't track mad vs fma */ .lower_flrp32 = true, .lower_flrp64 = true, .lower_fpow = false, diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index eab7ce91dd7..73912b268a4 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -1179,9 +1179,9 @@ struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, } const struct nir_shader_compiler_options r600_nir_fs_options = { - .fuse_ffma16 = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, + .has_ffma16 = true, + .has_ffma32 = true, + .has_ffma64 = true, .lower_scmp = true, .lower_flrp32 = true, .lower_flrp64 = true, @@ -1205,9 +1205,9 @@ const struct nir_shader_compiler_options r600_nir_fs_options = { }; const struct nir_shader_compiler_options r600_nir_options = { - .fuse_ffma16 = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, + .has_ffma16 = true, + .has_ffma32 = true, + .has_ffma64 = true, .lower_scmp = true, .lower_flrp32 = true, .lower_flrp64 = true, diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 4c684744c4d..7a3c6eb3bd8 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -953,12 +953,9 @@ void si_init_screen_get_functions(struct si_screen *sscreen) * gfx9 and newer prefer FMA for F16 because of the packed instruction. * gfx10 and older prefer MAD for F32 because of the legacy instruction. */ - .lower_ffma16 = sscreen->info.chip_class < GFX9, - .lower_ffma32 = sscreen->info.chip_class < GFX10_3, - .lower_ffma64 = false, - .fuse_ffma16 = sscreen->info.chip_class >= GFX9, - .fuse_ffma32 = sscreen->info.chip_class >= GFX10_3, - .fuse_ffma64 = true, + .has_ffma16 = sscreen->info.chip_class >= GFX9, + .has_ffma32 = sscreen->info.chip_class >= GFX10_3, + .has_ffma64 = true, .lower_fmod = true, .lower_pack_snorm_4x8 = true, .lower_pack_unorm_4x8 = true, diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 8bffb1d3840..a650fe76dab 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2179,9 +2179,6 @@ static const nir_shader_compiler_options nir_options = { .lower_extract_byte = true, .lower_extract_word = true, .lower_fdiv = true, - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_flrp32 = true, .lower_fmod = true, .lower_fpow = true, diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 8f0c16cb13b..a15f7a902b9 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -126,9 +126,6 @@ lower_discard_if(nir_shader *shader) static const struct nir_shader_compiler_options nir_options = { .lower_all_io_to_temps = true, - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_fdph = true, .lower_flrp32 = true, .lower_fpow = true, diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 3d0fcbe8d6f..e000c32c5d9 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -183,9 +183,9 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo) /* Prior to Gen6, there are no three source operations, and Gen11 loses * LRP. */ - nir_options->lower_ffma16 = devinfo->gen < 6; - nir_options->lower_ffma32 = devinfo->gen < 6; - nir_options->lower_ffma64 = devinfo->gen < 6; + nir_options->has_ffma16 = devinfo->gen >= 6; + nir_options->has_ffma32 = devinfo->gen >= 6; + nir_options->has_ffma64 = devinfo->gen >= 6; nir_options->lower_flrp32 = devinfo->gen < 6 || devinfo->gen >= 11; nir_options->lower_fpow = devinfo->gen >= 12; diff --git a/src/panfrost/bifrost/bifrost_compile.h b/src/panfrost/bifrost/bifrost_compile.h index 15b90788133..a46beeddb6a 100644 --- a/src/panfrost/bifrost/bifrost_compile.h +++ b/src/panfrost/bifrost/bifrost_compile.h @@ -69,9 +69,9 @@ static const nir_shader_compiler_options bifrost_nir_options = { .lower_bitfield_extract_to_shifts = true, .vectorize_io = true, - .fuse_ffma16 = true, - .fuse_ffma32 = true, - .fuse_ffma64 = true, + .has_ffma16 = true, + .has_ffma32 = true, + .has_ffma64 = true, .use_interpolated_input_intrinsics = true }; diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h index fabed8bb5b2..1da9cffae74 100644 --- a/src/panfrost/midgard/midgard_compile.h +++ b/src/panfrost/midgard/midgard_compile.h @@ -36,9 +36,6 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_b * solution. */ static const nir_shader_compiler_options midgard_nir_options = { - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, .lower_scmp = true, .lower_flrp16 = true, .lower_flrp32 = true,