From d8468d546352b5eaba3591dae2e7afd9845450e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 30 Nov 2024 22:55:41 -0500 Subject: [PATCH] amd,zink: remove options.varying_estimate_instr_cost callbacks They are a maintainenance burden since they would need changes to support more instruction types that nir_opt_varyings will be able to move between shaders, and they are almost identical to default_varying_estimate_instr_cost, so just use that. The cost threshold is adjusted for AMD because default_varying_estimate_instr_cost is slightly different. Part-of: --- src/amd/common/ac_nir.c | 101 +--------------------- src/amd/common/ac_nir.h | 3 - src/amd/vulkan/radv_shader.c | 1 - src/gallium/drivers/radeonsi/si_get.c | 1 - src/gallium/drivers/zink/zink_compiler.c | 102 ----------------------- 5 files changed, 1 insertion(+), 207 deletions(-) diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index e381c6319ad..eddb6ce2825 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -1601,112 +1601,13 @@ ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer) /* TCS->TES and VS->TES (OpenGL only) */ case MESA_SHADER_FRAGMENT: /* Up to 3 uniforms and 5 ALUs. */ - return 14; + return 12; default: unreachable("unexpected shader stage"); } } -unsigned -ac_nir_varying_estimate_instr_cost(nir_instr *instr) -{ - unsigned dst_bit_size, src_bit_size, num_dst_dwords; - nir_op alu_op; - - /* This is a very loose approximation based on gfx10. */ - switch (instr->type) { - case nir_instr_type_alu: - dst_bit_size = nir_instr_as_alu(instr)->def.bit_size; - src_bit_size = nir_instr_as_alu(instr)->src[0].src.ssa->bit_size; - alu_op = nir_instr_as_alu(instr)->op; - num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32); - - switch (alu_op) { - case nir_op_mov: - case nir_op_vec2: - case nir_op_vec3: - case nir_op_vec4: - case nir_op_vec5: - case nir_op_vec8: - case nir_op_vec16: - case nir_op_fabs: - case nir_op_fneg: - case nir_op_fsat: - return 0; - - case nir_op_imul: - case nir_op_umul_low: - return dst_bit_size <= 16 ? 1 : 4 * num_dst_dwords; - - case nir_op_imul_high: - case nir_op_umul_high: - case nir_op_imul_2x32_64: - case nir_op_umul_2x32_64: - return 4; - - case nir_op_fexp2: - case nir_op_flog2: - case nir_op_frcp: - case nir_op_frsq: - case nir_op_fsqrt: - case nir_op_fsin: - case nir_op_fcos: - case nir_op_fsin_amd: - case nir_op_fcos_amd: - return 4; /* FP16 & FP32. */ - - case nir_op_fpow: - return 4 + 1 + 4; /* log2 + mul + exp2 */ - - case nir_op_fsign: - return dst_bit_size == 64 ? 4 : 3; /* See ac_build_fsign. */ - - case nir_op_idiv: - case nir_op_udiv: - case nir_op_imod: - case nir_op_umod: - case nir_op_irem: - return dst_bit_size == 64 ? 80 : 40; - - case nir_op_fdiv: - return dst_bit_size == 64 ? 80 : 5; /* FP16 & FP32: rcp + mul */ - - case nir_op_fmod: - case nir_op_frem: - return dst_bit_size == 64 ? 80 : 8; - - default: - /* Double opcodes. Comparisons have always full performance. */ - if ((dst_bit_size == 64 && - nir_op_infos[alu_op].output_type & nir_type_float) || - (dst_bit_size >= 8 && src_bit_size == 64 && - nir_op_infos[alu_op].input_types[0] & nir_type_float)) - return 16; - - return DIV_ROUND_UP(MAX2(dst_bit_size, src_bit_size), 32); - } - - case nir_instr_type_intrinsic: - dst_bit_size = nir_instr_as_intrinsic(instr)->def.bit_size; - num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32); - - switch (nir_instr_as_intrinsic(instr)->intrinsic) { - case nir_intrinsic_load_deref: - /* Uniform or UBO load. - * Set a low cost to balance the number of scalar loads and ALUs. - */ - return 3 * num_dst_dwords; - - default: - unreachable("unexpected intrinsic"); - } - - default: - unreachable("unexpected instr type"); - } -} - typedef struct { enum amd_gfx_level gfx_level; bool use_llvm; diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index e739c83c153..61e16c0d556 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -317,9 +317,6 @@ ac_nir_opt_pack_half(nir_shader *shader, enum amd_gfx_level gfx_level); unsigned ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer); -unsigned -ac_nir_varying_estimate_instr_cost(nir_instr *instr); - bool ac_nir_opt_shared_append(nir_shader *shader); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 243a9c664e0..ca2c95adb14 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -64,7 +64,6 @@ get_nir_options_for_stage(struct radv_physical_device *pdev, gl_shader_stage sta options->max_unroll_iterations_aggressive = 128; options->lower_doubles_options = nir_lower_drcp | nir_lower_dsqrt | nir_lower_drsq | nir_lower_ddiv; options->io_options |= nir_io_mediump_is_32bit; - options->varying_estimate_instr_cost = ac_nir_varying_estimate_instr_cost; options->varying_expression_max_cost = ac_nir_varying_expression_max_cost; } diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index db16f04531b..72e540dcf39 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -1595,5 +1595,4 @@ void si_init_screen_get_functions(struct si_screen *sscreen) BITFIELD_BIT(MESA_SHADER_TESS_EVAL); options->support_indirect_outputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL); options->varying_expression_max_cost = si_varying_expression_max_cost; - options->varying_estimate_instr_cost = ac_nir_varying_estimate_instr_cost; } diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 110b1eccf30..fea48d90fe0 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -1265,106 +1265,6 @@ amd_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer) } } -/* from radeonsi */ -static unsigned -amd_varying_estimate_instr_cost(nir_instr *instr) -{ - unsigned dst_bit_size, src_bit_size, num_dst_dwords; - nir_op alu_op; - - /* This is a very loose approximation based on gfx10. */ - switch (instr->type) { - case nir_instr_type_alu: - dst_bit_size = nir_instr_as_alu(instr)->def.bit_size; - src_bit_size = nir_instr_as_alu(instr)->src[0].src.ssa->bit_size; - alu_op = nir_instr_as_alu(instr)->op; - num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32); - - switch (alu_op) { - case nir_op_mov: - case nir_op_vec2: - case nir_op_vec3: - case nir_op_vec4: - case nir_op_vec5: - case nir_op_vec8: - case nir_op_vec16: - case nir_op_fabs: - case nir_op_fneg: - case nir_op_fsat: - return 0; - - case nir_op_imul: - case nir_op_umul_low: - return dst_bit_size <= 16 ? 1 : 4 * num_dst_dwords; - - case nir_op_imul_high: - case nir_op_umul_high: - case nir_op_imul_2x32_64: - case nir_op_umul_2x32_64: - return 4; - - case nir_op_fexp2: - case nir_op_flog2: - case nir_op_frcp: - case nir_op_frsq: - case nir_op_fsqrt: - case nir_op_fsin: - case nir_op_fcos: - case nir_op_fsin_amd: - case nir_op_fcos_amd: - return 4; /* FP16 & FP32. */ - - case nir_op_fpow: - return 4 + 1 + 4; /* log2 + mul + exp2 */ - - case nir_op_fsign: - return dst_bit_size == 64 ? 4 : 3; /* See ac_build_fsign. */ - - case nir_op_idiv: - case nir_op_udiv: - case nir_op_imod: - case nir_op_umod: - case nir_op_irem: - return dst_bit_size == 64 ? 80 : 40; - - case nir_op_fdiv: - return dst_bit_size == 64 ? 80 : 5; /* FP16 & FP32: rcp + mul */ - - case nir_op_fmod: - case nir_op_frem: - return dst_bit_size == 64 ? 80 : 8; - - default: - /* Double opcodes. Comparisons have always full performance. */ - if ((dst_bit_size == 64 && - nir_op_infos[alu_op].output_type & nir_type_float) || - (dst_bit_size >= 8 && src_bit_size == 64 && - nir_op_infos[alu_op].input_types[0] & nir_type_float)) - return 16; - - return DIV_ROUND_UP(MAX2(dst_bit_size, src_bit_size), 32); - } - - case nir_instr_type_intrinsic: - dst_bit_size = nir_instr_as_intrinsic(instr)->def.bit_size; - num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32); - - switch (nir_instr_as_intrinsic(instr)->intrinsic) { - case nir_intrinsic_load_deref: - /* Uniform or UBO load. - * Set a low cost to balance the number of scalar loads and ALUs. - */ - return 3 * num_dst_dwords; - - default: - unreachable("unexpected intrinsic"); - } - - default: - unreachable("unexpected instr type"); - } -} - void zink_screen_init_compiler(struct zink_screen *screen) { @@ -1438,12 +1338,10 @@ zink_screen_init_compiler(struct zink_screen *screen) case VK_DRIVER_ID_AMD_OPEN_SOURCE: case VK_DRIVER_ID_AMD_PROPRIETARY: screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost; - screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost; break; default: mesa_logw("zink: instruction costs not implemented for this implementation!"); screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost; - screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost; } } else { screen->nir_options.io_options |= nir_io_dont_optimize;