Revert "nir: replace lower_ffma and fuse_ffma with has_ffma"
This reverts commit939ddf3f67
. Intel has a separate pass for fusing FFMAs selectively. We split these flags in commit1b72c31e1f
and the reasoning still stands. The patch being reverted was just a cleanup, so there should be no issue with reverting it. Acked-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6849>
This commit is contained in:
@@ -70,6 +70,9 @@ static const struct nir_shader_compiler_options nir_options_llvm = {
|
|||||||
.lower_unpack_unorm_4x8 = true,
|
.lower_unpack_unorm_4x8 = true,
|
||||||
.lower_extract_byte = true,
|
.lower_extract_byte = true,
|
||||||
.lower_extract_word = true,
|
.lower_extract_word = true,
|
||||||
|
.lower_ffma16 = true,
|
||||||
|
.lower_ffma32 = true,
|
||||||
|
.lower_ffma64 = true,
|
||||||
.lower_fpow = true,
|
.lower_fpow = true,
|
||||||
.lower_mul_2x32_64 = true,
|
.lower_mul_2x32_64 = true,
|
||||||
.lower_rotate = true,
|
.lower_rotate = true,
|
||||||
@@ -112,6 +115,9 @@ static const struct nir_shader_compiler_options nir_options_aco = {
|
|||||||
.lower_unpack_half_2x16 = true,
|
.lower_unpack_half_2x16 = true,
|
||||||
.lower_extract_byte = true,
|
.lower_extract_byte = true,
|
||||||
.lower_extract_word = true,
|
.lower_extract_word = true,
|
||||||
|
.lower_ffma16 = true,
|
||||||
|
.lower_ffma32 = true,
|
||||||
|
.lower_ffma64 = true,
|
||||||
.lower_fpow = true,
|
.lower_fpow = true,
|
||||||
.lower_mul_2x32_64 = true,
|
.lower_mul_2x32_64 = true,
|
||||||
.lower_rotate = true,
|
.lower_rotate = true,
|
||||||
|
@@ -2874,6 +2874,9 @@ const nir_shader_compiler_options v3d_nir_options = {
|
|||||||
.lower_unpack_half_2x16 = true,
|
.lower_unpack_half_2x16 = true,
|
||||||
.lower_fdiv = true,
|
.lower_fdiv = true,
|
||||||
.lower_find_lsb = true,
|
.lower_find_lsb = true,
|
||||||
|
.lower_ffma16 = true,
|
||||||
|
.lower_ffma32 = true,
|
||||||
|
.lower_ffma64 = true,
|
||||||
.lower_flrp32 = true,
|
.lower_flrp32 = true,
|
||||||
.lower_fpow = true,
|
.lower_fpow = true,
|
||||||
.lower_fsat = true,
|
.lower_fsat = true,
|
||||||
|
@@ -3055,9 +3055,12 @@ typedef enum {
|
|||||||
|
|
||||||
typedef struct nir_shader_compiler_options {
|
typedef struct nir_shader_compiler_options {
|
||||||
bool lower_fdiv;
|
bool lower_fdiv;
|
||||||
bool has_ffma16;
|
bool lower_ffma16;
|
||||||
bool has_ffma32;
|
bool lower_ffma32;
|
||||||
bool has_ffma64;
|
bool lower_ffma64;
|
||||||
|
bool fuse_ffma16;
|
||||||
|
bool fuse_ffma32;
|
||||||
|
bool fuse_ffma64;
|
||||||
bool lower_flrp16;
|
bool lower_flrp16;
|
||||||
bool lower_flrp32;
|
bool lower_flrp32;
|
||||||
/** Lowers flrp when it does not support doubles */
|
/** Lowers flrp when it does not support doubles */
|
||||||
|
@@ -370,11 +370,11 @@ convert_flrp_instruction(nir_builder *bld,
|
|||||||
unsigned bit_size = nir_dest_bit_size(alu->dest.dest);
|
unsigned bit_size = nir_dest_bit_size(alu->dest.dest);
|
||||||
|
|
||||||
if (bit_size == 16)
|
if (bit_size == 16)
|
||||||
have_ffma = bld->shader->options->has_ffma16;
|
have_ffma = !bld->shader->options->lower_ffma16;
|
||||||
else if (bit_size == 32)
|
else if (bit_size == 32)
|
||||||
have_ffma = bld->shader->options->has_ffma32;
|
have_ffma = !bld->shader->options->lower_ffma32;
|
||||||
else if (bit_size == 64)
|
else if (bit_size == 64)
|
||||||
have_ffma = bld->shader->options->has_ffma64;
|
have_ffma = !bld->shader->options->lower_ffma64;
|
||||||
else
|
else
|
||||||
unreachable("invalid bit_size");
|
unreachable("invalid bit_size");
|
||||||
|
|
||||||
|
@@ -193,13 +193,13 @@ optimizations.extend([
|
|||||||
(('fadd', a, ('fneg', ('ffract', a))), ('ffloor', a), '!options->lower_ffloor'),
|
(('fadd', a, ('fneg', ('ffract', a))), ('ffloor', a), '!options->lower_ffloor'),
|
||||||
(('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
|
(('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
|
||||||
(('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
|
(('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
|
||||||
(('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), '!options->has_ffma16'),
|
(('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'),
|
||||||
(('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), '!options->has_ffma32'),
|
(('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'),
|
||||||
(('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), '!options->has_ffma64'),
|
(('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'),
|
||||||
# Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late).
|
# Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late).
|
||||||
(('~ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->has_ffma16'),
|
(('~ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'),
|
||||||
(('~ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->has_ffma32'),
|
(('~ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'),
|
||||||
(('~ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->has_ffma64'),
|
(('~ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma64'),
|
||||||
|
|
||||||
(('~fmul', ('fadd', ('iand', ('ineg', ('b2i', 'a@bool')), ('fmul', b, c)), '#d'), '#e'),
|
(('~fmul', ('fadd', ('iand', ('ineg', ('b2i', 'a@bool')), ('fmul', b, c)), '#d'), '#e'),
|
||||||
('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))),
|
('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))),
|
||||||
@@ -2032,9 +2032,9 @@ late_optimizations = [
|
|||||||
(('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
|
(('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
|
||||||
(('ineg', a), ('isub', 0, a), 'options->lower_negate'),
|
(('ineg', a), ('isub', 0, a), 'options->lower_negate'),
|
||||||
(('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'),
|
(('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'),
|
||||||
(('~fadd@16', ('fmul', a, b), c), ('ffma', a, b, c), 'options->has_ffma16'),
|
(('~fadd@16', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma16'),
|
||||||
(('~fadd@32', ('fmul', a, b), c), ('ffma', a, b, c), 'options->has_ffma32'),
|
(('~fadd@32', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma32'),
|
||||||
(('~fadd@64', ('fmul', a, b), c), ('ffma', a, b, c), 'options->has_ffma64'),
|
(('~fadd@64', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma64'),
|
||||||
|
|
||||||
# These are duplicated from the main optimizations table. The late
|
# These are duplicated from the main optimizations table. The late
|
||||||
# patterns that rearrange expressions like x - .5 < 0 to x < .5 can create
|
# patterns that rearrange expressions like x - .5 < 0 to x < .5 can create
|
||||||
|
@@ -47,9 +47,9 @@ static const nir_shader_compiler_options options = {
|
|||||||
.lower_usub_borrow = true,
|
.lower_usub_borrow = true,
|
||||||
.lower_mul_high = true,
|
.lower_mul_high = true,
|
||||||
.lower_mul_2x32_64 = true,
|
.lower_mul_2x32_64 = true,
|
||||||
.has_ffma16 = true,
|
.fuse_ffma16 = true,
|
||||||
.has_ffma32 = true,
|
.fuse_ffma32 = true,
|
||||||
.has_ffma64 = true,
|
.fuse_ffma64 = true,
|
||||||
.vertex_id_zero_based = true,
|
.vertex_id_zero_based = true,
|
||||||
.lower_extract_byte = true,
|
.lower_extract_byte = true,
|
||||||
.lower_extract_word = true,
|
.lower_extract_word = true,
|
||||||
@@ -99,9 +99,9 @@ static const nir_shader_compiler_options options_a6xx = {
|
|||||||
.lower_usub_borrow = true,
|
.lower_usub_borrow = true,
|
||||||
.lower_mul_high = true,
|
.lower_mul_high = true,
|
||||||
.lower_mul_2x32_64 = true,
|
.lower_mul_2x32_64 = true,
|
||||||
.has_ffma16 = true,
|
.fuse_ffma16 = true,
|
||||||
.has_ffma32 = true,
|
.fuse_ffma32 = true,
|
||||||
.has_ffma64 = true,
|
.fuse_ffma64 = true,
|
||||||
.vertex_id_zero_based = false,
|
.vertex_id_zero_based = false,
|
||||||
.lower_extract_byte = true,
|
.lower_extract_byte = true,
|
||||||
.lower_extract_word = true,
|
.lower_extract_word = true,
|
||||||
|
@@ -1004,9 +1004,9 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
|
|||||||
.lower_fpow = true,
|
.lower_fpow = true,
|
||||||
.lower_sub = true,
|
.lower_sub = true,
|
||||||
.lower_ftrunc = true,
|
.lower_ftrunc = true,
|
||||||
.has_ffma16 = true,
|
.fuse_ffma16 = true,
|
||||||
.has_ffma32 = true,
|
.fuse_ffma32 = true,
|
||||||
.has_ffma64 = true,
|
.fuse_ffma64 = true,
|
||||||
.lower_bitops = true,
|
.lower_bitops = true,
|
||||||
.lower_all_io_to_temps = true,
|
.lower_all_io_to_temps = true,
|
||||||
.vertex_id_zero_based = true,
|
.vertex_id_zero_based = true,
|
||||||
|
@@ -35,9 +35,9 @@ static const nir_shader_compiler_options options = {
|
|||||||
.lower_fmod = true,
|
.lower_fmod = true,
|
||||||
.lower_fdiv = true,
|
.lower_fdiv = true,
|
||||||
.lower_fceil = true,
|
.lower_fceil = true,
|
||||||
.has_ffma16 = true,
|
.fuse_ffma16 = true,
|
||||||
.has_ffma32 = true,
|
.fuse_ffma32 = true,
|
||||||
.has_ffma64 = true,
|
.fuse_ffma64 = true,
|
||||||
/* .fdot_replicates = true, it is replicated, but it makes things worse */
|
/* .fdot_replicates = true, it is replicated, but it makes things worse */
|
||||||
.lower_all_io_to_temps = true,
|
.lower_all_io_to_temps = true,
|
||||||
.vertex_id_zero_based = true, /* its not implemented anyway */
|
.vertex_id_zero_based = true, /* its not implemented anyway */
|
||||||
|
@@ -42,6 +42,9 @@
|
|||||||
#include "ir/lima_ir.h"
|
#include "ir/lima_ir.h"
|
||||||
|
|
||||||
static const nir_shader_compiler_options vs_nir_options = {
|
static const nir_shader_compiler_options vs_nir_options = {
|
||||||
|
.lower_ffma16 = true,
|
||||||
|
.lower_ffma32 = true,
|
||||||
|
.lower_ffma64 = true,
|
||||||
.lower_fpow = true,
|
.lower_fpow = true,
|
||||||
.lower_ffract = true,
|
.lower_ffract = true,
|
||||||
.lower_fdiv = true,
|
.lower_fdiv = true,
|
||||||
@@ -59,6 +62,9 @@ static const nir_shader_compiler_options vs_nir_options = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static const nir_shader_compiler_options fs_nir_options = {
|
static const nir_shader_compiler_options fs_nir_options = {
|
||||||
|
.lower_ffma16 = true,
|
||||||
|
.lower_ffma32 = true,
|
||||||
|
.lower_ffma64 = true,
|
||||||
.lower_fpow = true,
|
.lower_fpow = true,
|
||||||
.lower_fdiv = true,
|
.lower_fdiv = true,
|
||||||
.lower_fmod = true,
|
.lower_fmod = true,
|
||||||
|
@@ -549,6 +549,9 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
|
|||||||
.lower_bitfield_insert_to_shifts = true,
|
.lower_bitfield_insert_to_shifts = true,
|
||||||
.lower_bitfield_extract_to_shifts = true,
|
.lower_bitfield_extract_to_shifts = true,
|
||||||
.lower_sub = true,
|
.lower_sub = true,
|
||||||
|
.lower_ffma16 = true,
|
||||||
|
.lower_ffma32 = true,
|
||||||
|
.lower_ffma64 = true,
|
||||||
.lower_fmod = true,
|
.lower_fmod = true,
|
||||||
.lower_hadd = true,
|
.lower_hadd = true,
|
||||||
.lower_add_sat = true,
|
.lower_add_sat = true,
|
||||||
|
@@ -3206,6 +3206,12 @@ nvir_nir_shader_compiler_options(int chipset)
|
|||||||
{
|
{
|
||||||
nir_shader_compiler_options op = {};
|
nir_shader_compiler_options op = {};
|
||||||
op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET);
|
op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET);
|
||||||
|
op.lower_ffma16 = false;
|
||||||
|
op.lower_ffma32 = false;
|
||||||
|
op.lower_ffma64 = false;
|
||||||
|
op.fuse_ffma16 = false; /* nir doesn't track mad vs fma */
|
||||||
|
op.fuse_ffma32 = false; /* nir doesn't track mad vs fma */
|
||||||
|
op.fuse_ffma64 = false; /* nir doesn't track mad vs fma */
|
||||||
op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET);
|
op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET);
|
||||||
op.lower_flrp32 = true;
|
op.lower_flrp32 = true;
|
||||||
op.lower_flrp64 = true;
|
op.lower_flrp64 = true;
|
||||||
|
@@ -923,6 +923,9 @@ int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static const nir_shader_compiler_options nir_options = {
|
static const nir_shader_compiler_options nir_options = {
|
||||||
|
.fuse_ffma16 = false, /* nir doesn't track mad vs fma */
|
||||||
|
.fuse_ffma32 = false, /* nir doesn't track mad vs fma */
|
||||||
|
.fuse_ffma64 = false, /* nir doesn't track mad vs fma */
|
||||||
.lower_flrp32 = true,
|
.lower_flrp32 = true,
|
||||||
.lower_flrp64 = true,
|
.lower_flrp64 = true,
|
||||||
.lower_fpow = false,
|
.lower_fpow = false,
|
||||||
|
@@ -1179,9 +1179,9 @@ struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
|
|||||||
}
|
}
|
||||||
|
|
||||||
const struct nir_shader_compiler_options r600_nir_fs_options = {
|
const struct nir_shader_compiler_options r600_nir_fs_options = {
|
||||||
.has_ffma16 = true,
|
.fuse_ffma16 = true,
|
||||||
.has_ffma32 = true,
|
.fuse_ffma32 = true,
|
||||||
.has_ffma64 = true,
|
.fuse_ffma64 = true,
|
||||||
.lower_scmp = true,
|
.lower_scmp = true,
|
||||||
.lower_flrp32 = true,
|
.lower_flrp32 = true,
|
||||||
.lower_flrp64 = true,
|
.lower_flrp64 = true,
|
||||||
@@ -1205,9 +1205,9 @@ const struct nir_shader_compiler_options r600_nir_fs_options = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const struct nir_shader_compiler_options r600_nir_options = {
|
const struct nir_shader_compiler_options r600_nir_options = {
|
||||||
.has_ffma16 = true,
|
.fuse_ffma16 = true,
|
||||||
.has_ffma32 = true,
|
.fuse_ffma32 = true,
|
||||||
.has_ffma64 = true,
|
.fuse_ffma64 = true,
|
||||||
.lower_scmp = true,
|
.lower_scmp = true,
|
||||||
.lower_flrp32 = true,
|
.lower_flrp32 = true,
|
||||||
.lower_flrp64 = true,
|
.lower_flrp64 = true,
|
||||||
|
@@ -953,9 +953,12 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
|
|||||||
* gfx9 and newer prefer FMA for F16 because of the packed instruction.
|
* gfx9 and newer prefer FMA for F16 because of the packed instruction.
|
||||||
* gfx10 and older prefer MAD for F32 because of the legacy instruction.
|
* gfx10 and older prefer MAD for F32 because of the legacy instruction.
|
||||||
*/
|
*/
|
||||||
.has_ffma16 = sscreen->info.chip_class >= GFX9,
|
.lower_ffma16 = sscreen->info.chip_class < GFX9,
|
||||||
.has_ffma32 = sscreen->info.chip_class >= GFX10_3,
|
.lower_ffma32 = sscreen->info.chip_class < GFX10_3,
|
||||||
.has_ffma64 = true,
|
.lower_ffma64 = false,
|
||||||
|
.fuse_ffma16 = sscreen->info.chip_class >= GFX9,
|
||||||
|
.fuse_ffma32 = sscreen->info.chip_class >= GFX10_3,
|
||||||
|
.fuse_ffma64 = true,
|
||||||
.lower_fmod = true,
|
.lower_fmod = true,
|
||||||
.lower_pack_snorm_4x8 = true,
|
.lower_pack_snorm_4x8 = true,
|
||||||
.lower_pack_unorm_4x8 = true,
|
.lower_pack_unorm_4x8 = true,
|
||||||
|
@@ -2179,6 +2179,9 @@ static const nir_shader_compiler_options nir_options = {
|
|||||||
.lower_extract_byte = true,
|
.lower_extract_byte = true,
|
||||||
.lower_extract_word = true,
|
.lower_extract_word = true,
|
||||||
.lower_fdiv = true,
|
.lower_fdiv = true,
|
||||||
|
.lower_ffma16 = true,
|
||||||
|
.lower_ffma32 = true,
|
||||||
|
.lower_ffma64 = true,
|
||||||
.lower_flrp32 = true,
|
.lower_flrp32 = true,
|
||||||
.lower_fmod = true,
|
.lower_fmod = true,
|
||||||
.lower_fpow = true,
|
.lower_fpow = true,
|
||||||
|
@@ -126,6 +126,9 @@ lower_discard_if(nir_shader *shader)
|
|||||||
|
|
||||||
static const struct nir_shader_compiler_options nir_options = {
|
static const struct nir_shader_compiler_options nir_options = {
|
||||||
.lower_all_io_to_temps = true,
|
.lower_all_io_to_temps = true,
|
||||||
|
.lower_ffma16 = true,
|
||||||
|
.lower_ffma32 = true,
|
||||||
|
.lower_ffma64 = true,
|
||||||
.lower_fdph = true,
|
.lower_fdph = true,
|
||||||
.lower_flrp32 = true,
|
.lower_flrp32 = true,
|
||||||
.lower_fpow = true,
|
.lower_fpow = true,
|
||||||
|
@@ -183,9 +183,9 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
|
|||||||
/* Prior to Gen6, there are no three source operations, and Gen11 loses
|
/* Prior to Gen6, there are no three source operations, and Gen11 loses
|
||||||
* LRP.
|
* LRP.
|
||||||
*/
|
*/
|
||||||
nir_options->has_ffma16 = devinfo->gen >= 6;
|
nir_options->lower_ffma16 = devinfo->gen < 6;
|
||||||
nir_options->has_ffma32 = devinfo->gen >= 6;
|
nir_options->lower_ffma32 = devinfo->gen < 6;
|
||||||
nir_options->has_ffma64 = devinfo->gen >= 6;
|
nir_options->lower_ffma64 = devinfo->gen < 6;
|
||||||
nir_options->lower_flrp32 = devinfo->gen < 6 || devinfo->gen >= 11;
|
nir_options->lower_flrp32 = devinfo->gen < 6 || devinfo->gen >= 11;
|
||||||
nir_options->lower_fpow = devinfo->gen >= 12;
|
nir_options->lower_fpow = devinfo->gen >= 12;
|
||||||
|
|
||||||
|
@@ -69,9 +69,9 @@ static const nir_shader_compiler_options bifrost_nir_options = {
|
|||||||
|
|
||||||
.lower_bitfield_extract_to_shifts = true,
|
.lower_bitfield_extract_to_shifts = true,
|
||||||
.vectorize_io = true,
|
.vectorize_io = true,
|
||||||
.has_ffma16 = true,
|
.fuse_ffma16 = true,
|
||||||
.has_ffma32 = true,
|
.fuse_ffma32 = true,
|
||||||
.has_ffma64 = true,
|
.fuse_ffma64 = true,
|
||||||
.use_interpolated_input_intrinsics = true
|
.use_interpolated_input_intrinsics = true
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -36,6 +36,9 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_b
|
|||||||
* solution. */
|
* solution. */
|
||||||
|
|
||||||
static const nir_shader_compiler_options midgard_nir_options = {
|
static const nir_shader_compiler_options midgard_nir_options = {
|
||||||
|
.lower_ffma16 = true,
|
||||||
|
.lower_ffma32 = true,
|
||||||
|
.lower_ffma64 = true,
|
||||||
.lower_scmp = true,
|
.lower_scmp = true,
|
||||||
.lower_flrp16 = true,
|
.lower_flrp16 = true,
|
||||||
.lower_flrp32 = true,
|
.lower_flrp32 = true,
|
||||||
|
Reference in New Issue
Block a user