From 85eab189eebc4ecb2629325c75147a09df9c116e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 9 Jan 2025 18:31:22 -0600 Subject: [PATCH] ac/nir: Move ac_nir_opt_pack_half to separate file. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Marek Olšák Part-of: --- src/amd/common/ac_nir.c | 122 ------------------------ src/amd/common/ac_nir_opt_pack_half.c | 132 ++++++++++++++++++++++++++ src/amd/common/meson.build | 1 + 3 files changed, 133 insertions(+), 122 deletions(-) create mode 100644 src/amd/common/ac_nir_opt_pack_half.c diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index 5da74995e14..a732d0c93ab 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -1050,128 +1050,6 @@ ac_nir_store_debug_log_amd(nir_builder *b, nir_def *uvec4) nir_store_buffer_amd(b, uvec4, buf, offset, zero, zero); } -static bool -needs_rounding_mode_16_64(nir_instr *instr) -{ - if (instr->type != nir_instr_type_alu) - return false; - nir_alu_instr *alu = nir_instr_as_alu(instr); - if (alu->op == nir_op_fquantize2f16) - return true; - if (alu->def.bit_size != 16 && alu->def.bit_size != 64) - return false; - if (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) != nir_type_float) - return false; - - switch (alu->op) { - case nir_op_f2f64: - case nir_op_b2f64: - case nir_op_f2f16_rtz: - case nir_op_b2f16: - case nir_op_fsat: - case nir_op_fabs: - case nir_op_fneg: - case nir_op_fsign: - case nir_op_ftrunc: - case nir_op_fceil: - case nir_op_ffloor: - case nir_op_ffract: - case nir_op_fround_even: - case nir_op_fmin: - case nir_op_fmax: - return false; - default: - return true; - } -} - -static bool -can_use_fmamix(nir_scalar s, enum amd_gfx_level gfx_level) -{ - s = nir_scalar_chase_movs(s); - if (!list_is_singular(&s.def->uses)) - return false; - - if (nir_scalar_is_intrinsic(s) && - nir_scalar_intrinsic_op(s) == nir_intrinsic_load_interpolated_input) - return gfx_level >= GFX11; - - if (!nir_scalar_is_alu(s)) - return false; - - switch (nir_scalar_alu_op(s)) { - case nir_op_fmul: - case nir_op_ffma: - case nir_op_fadd: - case nir_op_fsub: - return true; - case nir_op_fsat: - return can_use_fmamix(nir_scalar_chase_alu_src(s, 0), gfx_level); - default: - return false; - } -} - -static bool -split_pack_half(nir_builder *b, nir_instr *instr, void *param) -{ - enum amd_gfx_level gfx_level = *(enum amd_gfx_level *)param; - - if (instr->type != nir_instr_type_alu) - return false; - nir_alu_instr *alu = nir_instr_as_alu(instr); - if (alu->op != nir_op_pack_half_2x16_rtz_split && alu->op != nir_op_pack_half_2x16_split) - return false; - - nir_scalar s = nir_get_scalar(&alu->def, 0); - - if (!can_use_fmamix(nir_scalar_chase_alu_src(s, 0), gfx_level) || - !can_use_fmamix(nir_scalar_chase_alu_src(s, 1), gfx_level)) - return false; - - b->cursor = nir_before_instr(instr); - - /* Split pack_half into two f2f16 to create v_fma_mix{lo,hi}_f16 - * in the backend. - */ - nir_def *lo = nir_f2f16(b, nir_ssa_for_alu_src(b, alu, 0)); - nir_def *hi = nir_f2f16(b, nir_ssa_for_alu_src(b, alu, 1)); - nir_def_replace(&alu->def, nir_pack_32_2x16_split(b, lo, hi)); - return true; -} - -bool -ac_nir_opt_pack_half(nir_shader *shader, enum amd_gfx_level gfx_level) -{ - if (gfx_level < GFX10) - return false; - - unsigned exec_mode = shader->info.float_controls_execution_mode; - bool set_mode = false; - if (!nir_is_rounding_mode_rtz(exec_mode, 16)) { - nir_foreach_function_impl(impl, shader) { - nir_foreach_block(block, impl) { - nir_foreach_instr(instr, block) { - if (needs_rounding_mode_16_64(instr)) - return false; - } - } - } - set_mode = true; - } - - bool progress = nir_shader_instructions_pass(shader, split_pack_half, - nir_metadata_control_flow, - &gfx_level); - - if (set_mode && progress) { - exec_mode &= ~(FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 | FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); - exec_mode |= FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16 | FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64; - shader->info.float_controls_execution_mode = exec_mode; - } - return progress; -} - nir_def * ac_average_samples(nir_builder *b, nir_def **samples, unsigned num_samples) { diff --git a/src/amd/common/ac_nir_opt_pack_half.c b/src/amd/common/ac_nir_opt_pack_half.c new file mode 100644 index 00000000000..d4425f047d4 --- /dev/null +++ b/src/amd/common/ac_nir_opt_pack_half.c @@ -0,0 +1,132 @@ +/* + * Copyright © 2021 Valve Corporation + * + * SPDX-License-Identifier: MIT + */ + +#include "ac_nir.h" +#include "ac_nir_helpers.h" + +#include "nir_builder.h" + +static bool +needs_rounding_mode_16_64(nir_instr *instr) +{ + if (instr->type != nir_instr_type_alu) + return false; + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (alu->op == nir_op_fquantize2f16) + return true; + if (alu->def.bit_size != 16 && alu->def.bit_size != 64) + return false; + if (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) != nir_type_float) + return false; + + switch (alu->op) { + case nir_op_f2f64: + case nir_op_b2f64: + case nir_op_f2f16_rtz: + case nir_op_b2f16: + case nir_op_fsat: + case nir_op_fabs: + case nir_op_fneg: + case nir_op_fsign: + case nir_op_ftrunc: + case nir_op_fceil: + case nir_op_ffloor: + case nir_op_ffract: + case nir_op_fround_even: + case nir_op_fmin: + case nir_op_fmax: + return false; + default: + return true; + } +} + +static bool +can_use_fmamix(nir_scalar s, enum amd_gfx_level gfx_level) +{ + s = nir_scalar_chase_movs(s); + if (!list_is_singular(&s.def->uses)) + return false; + + if (nir_scalar_is_intrinsic(s) && + nir_scalar_intrinsic_op(s) == nir_intrinsic_load_interpolated_input) + return gfx_level >= GFX11; + + if (!nir_scalar_is_alu(s)) + return false; + + switch (nir_scalar_alu_op(s)) { + case nir_op_fmul: + case nir_op_ffma: + case nir_op_fadd: + case nir_op_fsub: + return true; + case nir_op_fsat: + return can_use_fmamix(nir_scalar_chase_alu_src(s, 0), gfx_level); + default: + return false; + } +} + +static bool +split_pack_half(nir_builder *b, nir_instr *instr, void *param) +{ + enum amd_gfx_level gfx_level = *(enum amd_gfx_level *)param; + + if (instr->type != nir_instr_type_alu) + return false; + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (alu->op != nir_op_pack_half_2x16_rtz_split && alu->op != nir_op_pack_half_2x16_split) + return false; + + nir_scalar s = nir_get_scalar(&alu->def, 0); + + if (!can_use_fmamix(nir_scalar_chase_alu_src(s, 0), gfx_level) || + !can_use_fmamix(nir_scalar_chase_alu_src(s, 1), gfx_level)) + return false; + + b->cursor = nir_before_instr(instr); + + /* Split pack_half into two f2f16 to create v_fma_mix{lo,hi}_f16 + * in the backend. + */ + nir_def *lo = nir_f2f16(b, nir_ssa_for_alu_src(b, alu, 0)); + nir_def *hi = nir_f2f16(b, nir_ssa_for_alu_src(b, alu, 1)); + nir_def_replace(&alu->def, nir_pack_32_2x16_split(b, lo, hi)); + return true; +} + +bool +ac_nir_opt_pack_half(nir_shader *shader, enum amd_gfx_level gfx_level) +{ + if (gfx_level < GFX10) + return false; + + unsigned exec_mode = shader->info.float_controls_execution_mode; + bool set_mode = false; + if (!nir_is_rounding_mode_rtz(exec_mode, 16)) { + nir_foreach_function_impl(impl, shader) { + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (needs_rounding_mode_16_64(instr)) + return false; + } + } + } + set_mode = true; + } + + bool progress = nir_shader_instructions_pass(shader, split_pack_half, + nir_metadata_control_flow, + &gfx_level); + + if (set_mode && progress) { + exec_mode &= ~(FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 | FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); + exec_mode |= FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16 | FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64; + shader->info.float_controls_execution_mode = exec_mode; + } + return progress; +} diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build index 0a9a57468ce..de934ea998b 100644 --- a/src/amd/common/meson.build +++ b/src/amd/common/meson.build @@ -105,6 +105,7 @@ amd_common_files = files( 'ac_nir_meta_cs_blit.c', 'ac_nir_meta_cs_clear_copy_buffer.c', 'ac_nir_meta_ps_resolve.c', + 'ac_nir_opt_pack_half.c', 'ac_nir_opt_shared_append.c', 'amd_family.c', 'ac_parse_ib.c',