From 305fdfddb5efc4ee064a055a953948fd00c5438c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 9 Jan 2025 16:03:02 -0600 Subject: [PATCH] ac/nir: Move ac_set_nir_options to ac_nir.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And rename it to ac_nir_set_options to match other functions. Reviewed-by: Marek Olšák Part-of: --- src/amd/common/ac_nir.c | 100 ++++++++++++++++++++++++++ src/amd/common/ac_nir.h | 4 ++ src/amd/common/ac_shader_util.c | 99 ------------------------- src/amd/common/ac_shader_util.h | 3 - src/amd/compiler/tests/helpers.cpp | 3 +- src/amd/vulkan/radv_shader.c | 2 +- src/gallium/drivers/radeonsi/si_get.c | 2 +- 7 files changed, 108 insertions(+), 105 deletions(-) diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index 88af6084208..6cf8a290b19 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -11,6 +11,106 @@ #include "nir_builder.h" #include "nir_xfb_info.h" + +/* Set NIR options shared by ACO, LLVM, RADV, and radeonsi. */ +void ac_nir_set_options(struct radeon_info *info, bool use_llvm, + nir_shader_compiler_options *options) +{ + /* |---------------------------------- Performance & Availability --------------------------------| + * |MAD/MAC/MADAK/MADMK|MAD_LEGACY|MAC_LEGACY| FMA |FMAC/FMAAK/FMAMK|FMA_LEGACY|PK_FMA_F16,|Best choice + * Arch | F32,F16,F64 | F32,F16 | F32,F16 |F32,F16,F64 | F32,F16 | F32 |PK_FMAC_F16|F16,F32,F64 + * ------------------------------------------------------------------------------------------------------------------ + * gfx6,7 | 1 , - , - | 1 , - | 1 , - |1/4, - ,1/16| - , - | - | - , - | - ,MAD,FMA + * gfx8 | 1 , 1 , - | 1 , - | - , - |1/4, 1 ,1/16| - , - | - | - , - |MAD,MAD,FMA + * gfx9 | 1 ,1|0, - | 1 , - | - , - | 1 , 1 ,1/16| 0|1, - | - | 2 , - |FMA,MAD,FMA + * gfx10 | 1 , - , - | 1 , - | 1 , - | 1 , 1 ,1/16| 1 , 1 | - | 2 , 2 |FMA,MAD,FMA + * gfx10.3| - , - , - | - , - | - , - | 1 , 1 ,1/16| 1 , 1 | 1 | 2 , 2 | all FMA + * gfx11 | - , - , - | - , - | - , - | 2 , 2 ,1/16| 2 , 2 | 2 | 2 , 2 | all FMA + * + * Tahiti, Hawaii, Carrizo, Vega20: FMA_F32 is full rate, FMA_F64 is 1/4 + * gfx9 supports MAD_F16 only on Vega10, Raven, Raven2, Renoir. + * gfx9 supports FMAC_F32 only on Vega20, but doesn't support FMAAK and FMAMK. + * + * gfx8 prefers MAD for F16 because of MAC/MADAK/MADMK. + * gfx9 and newer prefer FMA for F16 because of the packed instruction. + * gfx10 and older prefer MAD for F32 because of the legacy instruction. + */ + + memset(options, 0, sizeof(*options)); + options->vertex_id_zero_based = true; + options->lower_scmp = true; + options->lower_flrp16 = true; + options->lower_flrp32 = true; + options->lower_flrp64 = true; + options->lower_device_index_to_zero = true; + options->lower_fdiv = true; + options->lower_fmod = true; + options->lower_ineg = true; + options->lower_bitfield_insert = true; + options->lower_bitfield_extract = true; + options->lower_pack_snorm_4x8 = true; + options->lower_pack_unorm_4x8 = true; + options->lower_pack_half_2x16 = true; + options->lower_pack_64_2x32 = true; + options->lower_pack_64_4x16 = true; + options->lower_pack_32_2x16 = true; + options->lower_unpack_snorm_2x16 = true; + options->lower_unpack_snorm_4x8 = true; + options->lower_unpack_unorm_2x16 = true; + options->lower_unpack_unorm_4x8 = true; + options->lower_unpack_half_2x16 = true; + options->lower_fpow = true; + options->lower_mul_2x32_64 = true; + options->lower_iadd_sat = info->gfx_level <= GFX8; + options->lower_hadd = true; + options->lower_mul_32x16 = true; + options->has_bfe = true; + options->has_bfm = true; + options->has_bitfield_select = true; + options->has_fneo_fcmpu = true; + options->has_ford_funord = true; + options->has_fsub = true; + options->has_isub = true; + options->has_sdot_4x8 = info->has_accelerated_dot_product; + options->has_sudot_4x8 = info->has_accelerated_dot_product && info->gfx_level >= GFX11; + options->has_udot_4x8 = info->has_accelerated_dot_product; + options->has_sdot_4x8_sat = info->has_accelerated_dot_product; + options->has_sudot_4x8_sat = info->has_accelerated_dot_product && info->gfx_level >= GFX11; + options->has_udot_4x8_sat = info->has_accelerated_dot_product; + options->has_dot_2x16 = info->has_accelerated_dot_product && info->gfx_level < GFX11; + options->has_find_msb_rev = true; + options->has_pack_32_4x8 = true; + options->has_pack_half_2x16_rtz = true; + options->has_bit_test = !use_llvm; + options->has_fmulz = true; + options->has_msad = true; + options->has_shfr32 = true; + options->lower_int64_options = nir_lower_imul64 | nir_lower_imul_high64 | nir_lower_imul_2x32_64 | nir_lower_divmod64 | + nir_lower_minmax64 | nir_lower_iabs64 | nir_lower_iadd_sat64 | nir_lower_conv64; + options->divergence_analysis_options = nir_divergence_view_index_uniform; + options->optimize_quad_vote_to_reduce = !use_llvm; + options->lower_fisnormal = true; + options->support_16bit_alu = info->gfx_level >= GFX8; + options->vectorize_vec2_16bit = info->has_packed_math_16bit; + options->discard_is_demote = true; + options->optimize_sample_mask_in = true; + options->optimize_load_front_face_fsign = true; + options->io_options = nir_io_has_flexible_input_interpolation_except_flat | + (info->gfx_level >= GFX8 ? nir_io_16bit_input_output_support : 0) | + nir_io_prefer_scalar_fs_inputs | + nir_io_mix_convergent_flat_with_interpolated | + nir_io_vectorizer_ignores_types | + nir_io_compaction_rotates_color_channels; + options->lower_layer_fs_input_to_sysval = true; + options->scalarize_ddx = true; + options->skip_lower_packing_ops = + BITFIELD_BIT(nir_lower_packing_op_unpack_64_2x32) | + BITFIELD_BIT(nir_lower_packing_op_unpack_64_4x16) | + BITFIELD_BIT(nir_lower_packing_op_unpack_32_2x16) | + BITFIELD_BIT(nir_lower_packing_op_pack_32_4x8) | + BITFIELD_BIT(nir_lower_packing_op_unpack_32_4x8); +} + /* Sleep for the given number of clock cycles. */ void ac_nir_sleep(nir_builder *b, unsigned num_cycles) diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index bd41b3c8b67..bcc89ca3431 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -60,6 +60,10 @@ typedef struct nir_xfb_info nir_xfb_info; /* Executed by ac_nir_cull when the current primitive is accepted. */ typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state); +void +ac_nir_set_options(struct radeon_info *info, bool use_llvm, + nir_shader_compiler_options *options); + nir_def * ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg, unsigned relative_index); diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index 91810359008..e9c5eaec364 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -15,105 +15,6 @@ #include #include -/* Set NIR options shared by ACO, LLVM, RADV, and radeonsi. */ -void ac_set_nir_options(struct radeon_info *info, bool use_llvm, - nir_shader_compiler_options *options) -{ - /* |---------------------------------- Performance & Availability --------------------------------| - * |MAD/MAC/MADAK/MADMK|MAD_LEGACY|MAC_LEGACY| FMA |FMAC/FMAAK/FMAMK|FMA_LEGACY|PK_FMA_F16,|Best choice - * Arch | F32,F16,F64 | F32,F16 | F32,F16 |F32,F16,F64 | F32,F16 | F32 |PK_FMAC_F16|F16,F32,F64 - * ------------------------------------------------------------------------------------------------------------------ - * gfx6,7 | 1 , - , - | 1 , - | 1 , - |1/4, - ,1/16| - , - | - | - , - | - ,MAD,FMA - * gfx8 | 1 , 1 , - | 1 , - | - , - |1/4, 1 ,1/16| - , - | - | - , - |MAD,MAD,FMA - * gfx9 | 1 ,1|0, - | 1 , - | - , - | 1 , 1 ,1/16| 0|1, - | - | 2 , - |FMA,MAD,FMA - * gfx10 | 1 , - , - | 1 , - | 1 , - | 1 , 1 ,1/16| 1 , 1 | - | 2 , 2 |FMA,MAD,FMA - * gfx10.3| - , - , - | - , - | - , - | 1 , 1 ,1/16| 1 , 1 | 1 | 2 , 2 | all FMA - * gfx11 | - , - , - | - , - | - , - | 2 , 2 ,1/16| 2 , 2 | 2 | 2 , 2 | all FMA - * - * Tahiti, Hawaii, Carrizo, Vega20: FMA_F32 is full rate, FMA_F64 is 1/4 - * gfx9 supports MAD_F16 only on Vega10, Raven, Raven2, Renoir. - * gfx9 supports FMAC_F32 only on Vega20, but doesn't support FMAAK and FMAMK. - * - * gfx8 prefers MAD for F16 because of MAC/MADAK/MADMK. - * gfx9 and newer prefer FMA for F16 because of the packed instruction. - * gfx10 and older prefer MAD for F32 because of the legacy instruction. - */ - - memset(options, 0, sizeof(*options)); - options->vertex_id_zero_based = true; - options->lower_scmp = true; - options->lower_flrp16 = true; - options->lower_flrp32 = true; - options->lower_flrp64 = true; - options->lower_device_index_to_zero = true; - options->lower_fdiv = true; - options->lower_fmod = true; - options->lower_ineg = true; - options->lower_bitfield_insert = true; - options->lower_bitfield_extract = true; - options->lower_pack_snorm_4x8 = true; - options->lower_pack_unorm_4x8 = true; - options->lower_pack_half_2x16 = true; - options->lower_pack_64_2x32 = true; - options->lower_pack_64_4x16 = true; - options->lower_pack_32_2x16 = true; - options->lower_unpack_snorm_2x16 = true; - options->lower_unpack_snorm_4x8 = true; - options->lower_unpack_unorm_2x16 = true; - options->lower_unpack_unorm_4x8 = true; - options->lower_unpack_half_2x16 = true; - options->lower_fpow = true; - options->lower_mul_2x32_64 = true; - options->lower_iadd_sat = info->gfx_level <= GFX8; - options->lower_hadd = true; - options->lower_mul_32x16 = true; - options->has_bfe = true; - options->has_bfm = true; - options->has_bitfield_select = true; - options->has_fneo_fcmpu = true; - options->has_ford_funord = true; - options->has_fsub = true; - options->has_isub = true; - options->has_sdot_4x8 = info->has_accelerated_dot_product; - options->has_sudot_4x8 = info->has_accelerated_dot_product && info->gfx_level >= GFX11; - options->has_udot_4x8 = info->has_accelerated_dot_product; - options->has_sdot_4x8_sat = info->has_accelerated_dot_product; - options->has_sudot_4x8_sat = info->has_accelerated_dot_product && info->gfx_level >= GFX11; - options->has_udot_4x8_sat = info->has_accelerated_dot_product; - options->has_dot_2x16 = info->has_accelerated_dot_product && info->gfx_level < GFX11; - options->has_find_msb_rev = true; - options->has_pack_32_4x8 = true; - options->has_pack_half_2x16_rtz = true; - options->has_bit_test = !use_llvm; - options->has_fmulz = true; - options->has_msad = true; - options->has_shfr32 = true; - options->lower_int64_options = nir_lower_imul64 | nir_lower_imul_high64 | nir_lower_imul_2x32_64 | nir_lower_divmod64 | - nir_lower_minmax64 | nir_lower_iabs64 | nir_lower_iadd_sat64 | nir_lower_conv64; - options->divergence_analysis_options = nir_divergence_view_index_uniform; - options->optimize_quad_vote_to_reduce = !use_llvm; - options->lower_fisnormal = true; - options->support_16bit_alu = info->gfx_level >= GFX8; - options->vectorize_vec2_16bit = info->has_packed_math_16bit; - options->discard_is_demote = true; - options->optimize_sample_mask_in = true; - options->optimize_load_front_face_fsign = true; - options->io_options = nir_io_has_flexible_input_interpolation_except_flat | - (info->gfx_level >= GFX8 ? nir_io_16bit_input_output_support : 0) | - nir_io_prefer_scalar_fs_inputs | - nir_io_mix_convergent_flat_with_interpolated | - nir_io_vectorizer_ignores_types | - nir_io_compaction_rotates_color_channels; - options->lower_layer_fs_input_to_sysval = true; - options->scalarize_ddx = true; - options->skip_lower_packing_ops = - BITFIELD_BIT(nir_lower_packing_op_unpack_64_2x32) | - BITFIELD_BIT(nir_lower_packing_op_unpack_64_4x16) | - BITFIELD_BIT(nir_lower_packing_op_unpack_32_2x16) | - BITFIELD_BIT(nir_lower_packing_op_pack_32_4x8) | - BITFIELD_BIT(nir_lower_packing_op_unpack_32_4x8); -} - unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask, bool writes_mrt0_alpha) { diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h index ceb14e76ca8..ccee4839e6e 100644 --- a/src/amd/common/ac_shader_util.h +++ b/src/amd/common/ac_shader_util.h @@ -246,9 +246,6 @@ struct ac_nir_config { bool uses_aco; }; -void ac_set_nir_options(struct radeon_info *info, bool use_llvm, - nir_shader_compiler_options *options); - unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask, bool writes_mrt0_alpha); diff --git a/src/amd/compiler/tests/helpers.cpp b/src/amd/compiler/tests/helpers.cpp index fe546156a7c..0825c3f4be4 100644 --- a/src/amd/compiler/tests/helpers.cpp +++ b/src/amd/compiler/tests/helpers.cpp @@ -6,6 +6,7 @@ #include "helpers.h" #include "common/amd_family.h" +#include "common/ac_nir.h" #include "vk_format.h" #include @@ -147,7 +148,7 @@ setup_nir_cs(enum amd_gfx_level gfx_level, gl_shader_stage stage, enum radeon_fa rad_info.family = family; memset(&nir_options, 0, sizeof(nir_options)); - ac_set_nir_options(&rad_info, false, &nir_options); + ac_nir_set_options(&rad_info, false, &nir_options); glsl_type_singleton_init_or_ref(); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index b94bec51e5d..f7436d05e40 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -55,7 +55,7 @@ get_nir_options_for_stage(struct radv_physical_device *pdev, gl_shader_stage sta bool split_fma = (stage <= MESA_SHADER_GEOMETRY || stage == MESA_SHADER_MESH) && instance->debug_flags & RADV_DEBUG_SPLIT_FMA; - ac_set_nir_options(&pdev->info, pdev->use_llvm, options); + ac_nir_set_options(&pdev->info, pdev->use_llvm, options); options->lower_ffma16 = split_fma || pdev->info.gfx_level < GFX9; options->lower_ffma32 = split_fma || pdev->info.gfx_level < GFX10_3; diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index e69311ff107..0a988fd9f30 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -1565,7 +1565,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen) bool has_mediump = sscreen->info.gfx_level >= GFX8 && sscreen->options.fp16; nir_shader_compiler_options *options = sscreen->nir_options; - ac_set_nir_options(&sscreen->info, !sscreen->use_aco, options); + ac_nir_set_options(&sscreen->info, !sscreen->use_aco, options); options->lower_ffma16 = sscreen->info.gfx_level < GFX9; options->lower_ffma32 = !use_fma32;