From 305fdfddb5efc4ee064a055a953948fd00c5438c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timur=20Krist=C3=B3f?= <timur.kristof@gmail.com>
Date: Thu, 9 Jan 2025 16:03:02 -0600
Subject: [PATCH] ac/nir: Move ac_set_nir_options to ac_nir.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

And rename it to ac_nir_set_options to match other functions.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32966>
---
 src/amd/common/ac_nir.c               | 100 ++++++++++++++++++++++++++
 src/amd/common/ac_nir.h               |   4 ++
 src/amd/common/ac_shader_util.c       |  99 -------------------------
 src/amd/common/ac_shader_util.h       |   3 -
 src/amd/compiler/tests/helpers.cpp    |   3 +-
 src/amd/vulkan/radv_shader.c          |   2 +-
 src/gallium/drivers/radeonsi/si_get.c |   2 +-
 7 files changed, 108 insertions(+), 105 deletions(-)

diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c
index 88af6084208..6cf8a290b19 100644
--- a/src/amd/common/ac_nir.c
+++ b/src/amd/common/ac_nir.c
@@ -11,6 +11,106 @@
 #include "nir_builder.h"
 #include "nir_xfb_info.h"
 
+
+/* Set NIR options shared by ACO, LLVM, RADV, and radeonsi. */
+void ac_nir_set_options(struct radeon_info *info, bool use_llvm,
+                        nir_shader_compiler_options *options)
+{
+   /*        |---------------------------------- Performance & Availability --------------------------------|
+    *        |MAD/MAC/MADAK/MADMK|MAD_LEGACY|MAC_LEGACY|    FMA     |FMAC/FMAAK/FMAMK|FMA_LEGACY|PK_FMA_F16,|Best choice
+    * Arch   |    F32,F16,F64    | F32,F16  | F32,F16  |F32,F16,F64 |    F32,F16     |   F32    |PK_FMAC_F16|F16,F32,F64
+    * ------------------------------------------------------------------------------------------------------------------
+    * gfx6,7 |     1 , - , -     |  1 , -   |  1 , -   |1/4, - ,1/16|     - , -      |    -     |   - , -   | - ,MAD,FMA
+    * gfx8   |     1 , 1 , -     |  1 , -   |  - , -   |1/4, 1 ,1/16|     - , -      |    -     |   - , -   |MAD,MAD,FMA
+    * gfx9   |     1 ,1|0, -     |  1 , -   |  - , -   | 1 , 1 ,1/16|    0|1, -      |    -     |   2 , -   |FMA,MAD,FMA
+    * gfx10  |     1 , - , -     |  1 , -   |  1 , -   | 1 , 1 ,1/16|     1 , 1      |    -     |   2 , 2   |FMA,MAD,FMA
+    * gfx10.3|     - , - , -     |  - , -   |  - , -   | 1 , 1 ,1/16|     1 , 1      |    1     |   2 , 2   |  all FMA
+    * gfx11  |     - , - , -     |  - , -   |  - , -   | 2 , 2 ,1/16|     2 , 2      |    2     |   2 , 2   |  all FMA
+    *
+    * Tahiti, Hawaii, Carrizo, Vega20: FMA_F32 is full rate, FMA_F64 is 1/4
+    * gfx9 supports MAD_F16 only on Vega10, Raven, Raven2, Renoir.
+    * gfx9 supports FMAC_F32 only on Vega20, but doesn't support FMAAK and FMAMK.
+    *
+    * gfx8 prefers MAD for F16 because of MAC/MADAK/MADMK.
+    * gfx9 and newer prefer FMA for F16 because of the packed instruction.
+    * gfx10 and older prefer MAD for F32 because of the legacy instruction.
+    */
+
+   memset(options, 0, sizeof(*options));
+   options->vertex_id_zero_based = true;
+   options->lower_scmp = true;
+   options->lower_flrp16 = true;
+   options->lower_flrp32 = true;
+   options->lower_flrp64 = true;
+   options->lower_device_index_to_zero = true;
+   options->lower_fdiv = true;
+   options->lower_fmod = true;
+   options->lower_ineg = true;
+   options->lower_bitfield_insert = true;
+   options->lower_bitfield_extract = true;
+   options->lower_pack_snorm_4x8 = true;
+   options->lower_pack_unorm_4x8 = true;
+   options->lower_pack_half_2x16 = true;
+   options->lower_pack_64_2x32 = true;
+   options->lower_pack_64_4x16 = true;
+   options->lower_pack_32_2x16 = true;
+   options->lower_unpack_snorm_2x16 = true;
+   options->lower_unpack_snorm_4x8 = true;
+   options->lower_unpack_unorm_2x16 = true;
+   options->lower_unpack_unorm_4x8 = true;
+   options->lower_unpack_half_2x16 = true;
+   options->lower_fpow = true;
+   options->lower_mul_2x32_64 = true;
+   options->lower_iadd_sat = info->gfx_level <= GFX8;
+   options->lower_hadd = true;
+   options->lower_mul_32x16 = true;
+   options->has_bfe = true;
+   options->has_bfm = true;
+   options->has_bitfield_select = true;
+   options->has_fneo_fcmpu = true;
+   options->has_ford_funord = true;
+   options->has_fsub = true;
+   options->has_isub = true;
+   options->has_sdot_4x8 = info->has_accelerated_dot_product;
+   options->has_sudot_4x8 = info->has_accelerated_dot_product && info->gfx_level >= GFX11;
+   options->has_udot_4x8 = info->has_accelerated_dot_product;
+   options->has_sdot_4x8_sat = info->has_accelerated_dot_product;
+   options->has_sudot_4x8_sat = info->has_accelerated_dot_product && info->gfx_level >= GFX11;
+   options->has_udot_4x8_sat = info->has_accelerated_dot_product;
+   options->has_dot_2x16 = info->has_accelerated_dot_product && info->gfx_level < GFX11;
+   options->has_find_msb_rev = true;
+   options->has_pack_32_4x8 = true;
+   options->has_pack_half_2x16_rtz = true;
+   options->has_bit_test = !use_llvm;
+   options->has_fmulz = true;
+   options->has_msad = true;
+   options->has_shfr32 = true;
+   options->lower_int64_options = nir_lower_imul64 | nir_lower_imul_high64 | nir_lower_imul_2x32_64 | nir_lower_divmod64 |
+                                  nir_lower_minmax64 | nir_lower_iabs64 | nir_lower_iadd_sat64 | nir_lower_conv64;
+   options->divergence_analysis_options = nir_divergence_view_index_uniform;
+   options->optimize_quad_vote_to_reduce = !use_llvm;
+   options->lower_fisnormal = true;
+   options->support_16bit_alu = info->gfx_level >= GFX8;
+   options->vectorize_vec2_16bit = info->has_packed_math_16bit;
+   options->discard_is_demote = true;
+   options->optimize_sample_mask_in = true;
+   options->optimize_load_front_face_fsign = true;
+   options->io_options = nir_io_has_flexible_input_interpolation_except_flat |
+                         (info->gfx_level >= GFX8 ? nir_io_16bit_input_output_support : 0) |
+                         nir_io_prefer_scalar_fs_inputs |
+                         nir_io_mix_convergent_flat_with_interpolated |
+                         nir_io_vectorizer_ignores_types |
+                         nir_io_compaction_rotates_color_channels;
+   options->lower_layer_fs_input_to_sysval = true;
+   options->scalarize_ddx = true;
+   options->skip_lower_packing_ops =
+      BITFIELD_BIT(nir_lower_packing_op_unpack_64_2x32) |
+      BITFIELD_BIT(nir_lower_packing_op_unpack_64_4x16) |
+      BITFIELD_BIT(nir_lower_packing_op_unpack_32_2x16) |
+      BITFIELD_BIT(nir_lower_packing_op_pack_32_4x8) |
+      BITFIELD_BIT(nir_lower_packing_op_unpack_32_4x8);
+}
+
 /* Sleep for the given number of clock cycles. */
 void
 ac_nir_sleep(nir_builder *b, unsigned num_cycles)
diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h
index bd41b3c8b67..bcc89ca3431 100644
--- a/src/amd/common/ac_nir.h
+++ b/src/amd/common/ac_nir.h
@@ -60,6 +60,10 @@ typedef struct nir_xfb_info nir_xfb_info;
 /* Executed by ac_nir_cull when the current primitive is accepted. */
 typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state);
 
+void
+ac_nir_set_options(struct radeon_info *info, bool use_llvm,
+                   nir_shader_compiler_options *options);
+
 nir_def *
 ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args,
                           struct ac_arg arg, unsigned relative_index);
diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c
index 91810359008..e9c5eaec364 100644
--- a/src/amd/common/ac_shader_util.c
+++ b/src/amd/common/ac_shader_util.c
@@ -15,105 +15,6 @@
 #include <stdlib.h>
 #include <string.h>
 
-/* Set NIR options shared by ACO, LLVM, RADV, and radeonsi. */
-void ac_set_nir_options(struct radeon_info *info, bool use_llvm,
-                        nir_shader_compiler_options *options)
-{
-   /*        |---------------------------------- Performance & Availability --------------------------------|
-    *        |MAD/MAC/MADAK/MADMK|MAD_LEGACY|MAC_LEGACY|    FMA     |FMAC/FMAAK/FMAMK|FMA_LEGACY|PK_FMA_F16,|Best choice
-    * Arch   |    F32,F16,F64    | F32,F16  | F32,F16  |F32,F16,F64 |    F32,F16     |   F32    |PK_FMAC_F16|F16,F32,F64
-    * ------------------------------------------------------------------------------------------------------------------
-    * gfx6,7 |     1 , - , -     |  1 , -   |  1 , -   |1/4, - ,1/16|     - , -      |    -     |   - , -   | - ,MAD,FMA
-    * gfx8   |     1 , 1 , -     |  1 , -   |  - , -   |1/4, 1 ,1/16|     - , -      |    -     |   - , -   |MAD,MAD,FMA
-    * gfx9   |     1 ,1|0, -     |  1 , -   |  - , -   | 1 , 1 ,1/16|    0|1, -      |    -     |   2 , -   |FMA,MAD,FMA
-    * gfx10  |     1 , - , -     |  1 , -   |  1 , -   | 1 , 1 ,1/16|     1 , 1      |    -     |   2 , 2   |FMA,MAD,FMA
-    * gfx10.3|     - , - , -     |  - , -   |  - , -   | 1 , 1 ,1/16|     1 , 1      |    1     |   2 , 2   |  all FMA
-    * gfx11  |     - , - , -     |  - , -   |  - , -   | 2 , 2 ,1/16|     2 , 2      |    2     |   2 , 2   |  all FMA
-    *
-    * Tahiti, Hawaii, Carrizo, Vega20: FMA_F32 is full rate, FMA_F64 is 1/4
-    * gfx9 supports MAD_F16 only on Vega10, Raven, Raven2, Renoir.
-    * gfx9 supports FMAC_F32 only on Vega20, but doesn't support FMAAK and FMAMK.
-    *
-    * gfx8 prefers MAD for F16 because of MAC/MADAK/MADMK.
-    * gfx9 and newer prefer FMA for F16 because of the packed instruction.
-    * gfx10 and older prefer MAD for F32 because of the legacy instruction.
-    */
-
-   memset(options, 0, sizeof(*options));
-   options->vertex_id_zero_based = true;
-   options->lower_scmp = true;
-   options->lower_flrp16 = true;
-   options->lower_flrp32 = true;
-   options->lower_flrp64 = true;
-   options->lower_device_index_to_zero = true;
-   options->lower_fdiv = true;
-   options->lower_fmod = true;
-   options->lower_ineg = true;
-   options->lower_bitfield_insert = true;
-   options->lower_bitfield_extract = true;
-   options->lower_pack_snorm_4x8 = true;
-   options->lower_pack_unorm_4x8 = true;
-   options->lower_pack_half_2x16 = true;
-   options->lower_pack_64_2x32 = true;
-   options->lower_pack_64_4x16 = true;
-   options->lower_pack_32_2x16 = true;
-   options->lower_unpack_snorm_2x16 = true;
-   options->lower_unpack_snorm_4x8 = true;
-   options->lower_unpack_unorm_2x16 = true;
-   options->lower_unpack_unorm_4x8 = true;
-   options->lower_unpack_half_2x16 = true;
-   options->lower_fpow = true;
-   options->lower_mul_2x32_64 = true;
-   options->lower_iadd_sat = info->gfx_level <= GFX8;
-   options->lower_hadd = true;
-   options->lower_mul_32x16 = true;
-   options->has_bfe = true;
-   options->has_bfm = true;
-   options->has_bitfield_select = true;
-   options->has_fneo_fcmpu = true;
-   options->has_ford_funord = true;
-   options->has_fsub = true;
-   options->has_isub = true;
-   options->has_sdot_4x8 = info->has_accelerated_dot_product;
-   options->has_sudot_4x8 = info->has_accelerated_dot_product && info->gfx_level >= GFX11;
-   options->has_udot_4x8 = info->has_accelerated_dot_product;
-   options->has_sdot_4x8_sat = info->has_accelerated_dot_product;
-   options->has_sudot_4x8_sat = info->has_accelerated_dot_product && info->gfx_level >= GFX11;
-   options->has_udot_4x8_sat = info->has_accelerated_dot_product;
-   options->has_dot_2x16 = info->has_accelerated_dot_product && info->gfx_level < GFX11;
-   options->has_find_msb_rev = true;
-   options->has_pack_32_4x8 = true;
-   options->has_pack_half_2x16_rtz = true;
-   options->has_bit_test = !use_llvm;
-   options->has_fmulz = true;
-   options->has_msad = true;
-   options->has_shfr32 = true;
-   options->lower_int64_options = nir_lower_imul64 | nir_lower_imul_high64 | nir_lower_imul_2x32_64 | nir_lower_divmod64 |
-                                  nir_lower_minmax64 | nir_lower_iabs64 | nir_lower_iadd_sat64 | nir_lower_conv64;
-   options->divergence_analysis_options = nir_divergence_view_index_uniform;
-   options->optimize_quad_vote_to_reduce = !use_llvm;
-   options->lower_fisnormal = true;
-   options->support_16bit_alu = info->gfx_level >= GFX8;
-   options->vectorize_vec2_16bit = info->has_packed_math_16bit;
-   options->discard_is_demote = true;
-   options->optimize_sample_mask_in = true;
-   options->optimize_load_front_face_fsign = true;
-   options->io_options = nir_io_has_flexible_input_interpolation_except_flat |
-                         (info->gfx_level >= GFX8 ? nir_io_16bit_input_output_support : 0) |
-                         nir_io_prefer_scalar_fs_inputs |
-                         nir_io_mix_convergent_flat_with_interpolated |
-                         nir_io_vectorizer_ignores_types |
-                         nir_io_compaction_rotates_color_channels;
-   options->lower_layer_fs_input_to_sysval = true;
-   options->scalarize_ddx = true;
-   options->skip_lower_packing_ops =
-      BITFIELD_BIT(nir_lower_packing_op_unpack_64_2x32) |
-      BITFIELD_BIT(nir_lower_packing_op_unpack_64_4x16) |
-      BITFIELD_BIT(nir_lower_packing_op_unpack_32_2x16) |
-      BITFIELD_BIT(nir_lower_packing_op_pack_32_4x8) |
-      BITFIELD_BIT(nir_lower_packing_op_unpack_32_4x8);
-}
-
 unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask,
                                     bool writes_mrt0_alpha)
 {
diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h
index ceb14e76ca8..ccee4839e6e 100644
--- a/src/amd/common/ac_shader_util.h
+++ b/src/amd/common/ac_shader_util.h
@@ -246,9 +246,6 @@ struct ac_nir_config {
    bool uses_aco;
 };
 
-void ac_set_nir_options(struct radeon_info *info, bool use_llvm,
-                        nir_shader_compiler_options *options);
-
 unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask,
                                     bool writes_mrt0_alpha);
 
diff --git a/src/amd/compiler/tests/helpers.cpp b/src/amd/compiler/tests/helpers.cpp
index fe546156a7c..0825c3f4be4 100644
--- a/src/amd/compiler/tests/helpers.cpp
+++ b/src/amd/compiler/tests/helpers.cpp
@@ -6,6 +6,7 @@
 #include "helpers.h"
 
 #include "common/amd_family.h"
+#include "common/ac_nir.h"
 #include "vk_format.h"
 
 #include <llvm-c/Target.h>
@@ -147,7 +148,7 @@ setup_nir_cs(enum amd_gfx_level gfx_level, gl_shader_stage stage, enum radeon_fa
    rad_info.family = family;
 
    memset(&nir_options, 0, sizeof(nir_options));
-   ac_set_nir_options(&rad_info, false, &nir_options);
+   ac_nir_set_options(&rad_info, false, &nir_options);
 
    glsl_type_singleton_init_or_ref();
 
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index b94bec51e5d..f7436d05e40 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -55,7 +55,7 @@ get_nir_options_for_stage(struct radv_physical_device *pdev, gl_shader_stage sta
    bool split_fma =
       (stage <= MESA_SHADER_GEOMETRY || stage == MESA_SHADER_MESH) && instance->debug_flags & RADV_DEBUG_SPLIT_FMA;
 
-   ac_set_nir_options(&pdev->info, pdev->use_llvm, options);
+   ac_nir_set_options(&pdev->info, pdev->use_llvm, options);
 
    options->lower_ffma16 = split_fma || pdev->info.gfx_level < GFX9;
    options->lower_ffma32 = split_fma || pdev->info.gfx_level < GFX10_3;
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index e69311ff107..0a988fd9f30 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -1565,7 +1565,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
    bool has_mediump = sscreen->info.gfx_level >= GFX8 && sscreen->options.fp16;
 
    nir_shader_compiler_options *options = sscreen->nir_options;
-   ac_set_nir_options(&sscreen->info, !sscreen->use_aco, options);
+   ac_nir_set_options(&sscreen->info, !sscreen->use_aco, options);
 
    options->lower_ffma16 = sscreen->info.gfx_level < GFX9;
    options->lower_ffma32 = !use_fma32;