amd,zink: remove options.varying_estimate_instr_cost callbacks
They are a maintainenance burden since they would need changes to support more instruction types that nir_opt_varyings will be able to move between shaders, and they are almost identical to default_varying_estimate_instr_cost, so just use that. The cost threshold is adjusted for AMD because default_varying_estimate_instr_cost is slightly different. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32424>
This commit is contained in:
@@ -1601,112 +1601,13 @@ ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
|
||||
/* TCS->TES and VS->TES (OpenGL only) */
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
/* Up to 3 uniforms and 5 ALUs. */
|
||||
return 14;
|
||||
return 12;
|
||||
|
||||
default:
|
||||
unreachable("unexpected shader stage");
|
||||
}
|
||||
}
|
||||
|
||||
unsigned
|
||||
ac_nir_varying_estimate_instr_cost(nir_instr *instr)
|
||||
{
|
||||
unsigned dst_bit_size, src_bit_size, num_dst_dwords;
|
||||
nir_op alu_op;
|
||||
|
||||
/* This is a very loose approximation based on gfx10. */
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_alu:
|
||||
dst_bit_size = nir_instr_as_alu(instr)->def.bit_size;
|
||||
src_bit_size = nir_instr_as_alu(instr)->src[0].src.ssa->bit_size;
|
||||
alu_op = nir_instr_as_alu(instr)->op;
|
||||
num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
|
||||
|
||||
switch (alu_op) {
|
||||
case nir_op_mov:
|
||||
case nir_op_vec2:
|
||||
case nir_op_vec3:
|
||||
case nir_op_vec4:
|
||||
case nir_op_vec5:
|
||||
case nir_op_vec8:
|
||||
case nir_op_vec16:
|
||||
case nir_op_fabs:
|
||||
case nir_op_fneg:
|
||||
case nir_op_fsat:
|
||||
return 0;
|
||||
|
||||
case nir_op_imul:
|
||||
case nir_op_umul_low:
|
||||
return dst_bit_size <= 16 ? 1 : 4 * num_dst_dwords;
|
||||
|
||||
case nir_op_imul_high:
|
||||
case nir_op_umul_high:
|
||||
case nir_op_imul_2x32_64:
|
||||
case nir_op_umul_2x32_64:
|
||||
return 4;
|
||||
|
||||
case nir_op_fexp2:
|
||||
case nir_op_flog2:
|
||||
case nir_op_frcp:
|
||||
case nir_op_frsq:
|
||||
case nir_op_fsqrt:
|
||||
case nir_op_fsin:
|
||||
case nir_op_fcos:
|
||||
case nir_op_fsin_amd:
|
||||
case nir_op_fcos_amd:
|
||||
return 4; /* FP16 & FP32. */
|
||||
|
||||
case nir_op_fpow:
|
||||
return 4 + 1 + 4; /* log2 + mul + exp2 */
|
||||
|
||||
case nir_op_fsign:
|
||||
return dst_bit_size == 64 ? 4 : 3; /* See ac_build_fsign. */
|
||||
|
||||
case nir_op_idiv:
|
||||
case nir_op_udiv:
|
||||
case nir_op_imod:
|
||||
case nir_op_umod:
|
||||
case nir_op_irem:
|
||||
return dst_bit_size == 64 ? 80 : 40;
|
||||
|
||||
case nir_op_fdiv:
|
||||
return dst_bit_size == 64 ? 80 : 5; /* FP16 & FP32: rcp + mul */
|
||||
|
||||
case nir_op_fmod:
|
||||
case nir_op_frem:
|
||||
return dst_bit_size == 64 ? 80 : 8;
|
||||
|
||||
default:
|
||||
/* Double opcodes. Comparisons have always full performance. */
|
||||
if ((dst_bit_size == 64 &&
|
||||
nir_op_infos[alu_op].output_type & nir_type_float) ||
|
||||
(dst_bit_size >= 8 && src_bit_size == 64 &&
|
||||
nir_op_infos[alu_op].input_types[0] & nir_type_float))
|
||||
return 16;
|
||||
|
||||
return DIV_ROUND_UP(MAX2(dst_bit_size, src_bit_size), 32);
|
||||
}
|
||||
|
||||
case nir_instr_type_intrinsic:
|
||||
dst_bit_size = nir_instr_as_intrinsic(instr)->def.bit_size;
|
||||
num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
|
||||
|
||||
switch (nir_instr_as_intrinsic(instr)->intrinsic) {
|
||||
case nir_intrinsic_load_deref:
|
||||
/* Uniform or UBO load.
|
||||
* Set a low cost to balance the number of scalar loads and ALUs.
|
||||
*/
|
||||
return 3 * num_dst_dwords;
|
||||
|
||||
default:
|
||||
unreachable("unexpected intrinsic");
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("unexpected instr type");
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
enum amd_gfx_level gfx_level;
|
||||
bool use_llvm;
|
||||
|
@@ -317,9 +317,6 @@ ac_nir_opt_pack_half(nir_shader *shader, enum amd_gfx_level gfx_level);
|
||||
unsigned
|
||||
ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer);
|
||||
|
||||
unsigned
|
||||
ac_nir_varying_estimate_instr_cost(nir_instr *instr);
|
||||
|
||||
bool
|
||||
ac_nir_opt_shared_append(nir_shader *shader);
|
||||
|
||||
|
@@ -64,7 +64,6 @@ get_nir_options_for_stage(struct radv_physical_device *pdev, gl_shader_stage sta
|
||||
options->max_unroll_iterations_aggressive = 128;
|
||||
options->lower_doubles_options = nir_lower_drcp | nir_lower_dsqrt | nir_lower_drsq | nir_lower_ddiv;
|
||||
options->io_options |= nir_io_mediump_is_32bit;
|
||||
options->varying_estimate_instr_cost = ac_nir_varying_estimate_instr_cost;
|
||||
options->varying_expression_max_cost = ac_nir_varying_expression_max_cost;
|
||||
}
|
||||
|
||||
|
@@ -1595,5 +1595,4 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
|
||||
BITFIELD_BIT(MESA_SHADER_TESS_EVAL);
|
||||
options->support_indirect_outputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
|
||||
options->varying_expression_max_cost = si_varying_expression_max_cost;
|
||||
options->varying_estimate_instr_cost = ac_nir_varying_estimate_instr_cost;
|
||||
}
|
||||
|
@@ -1265,106 +1265,6 @@ amd_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
|
||||
}
|
||||
}
|
||||
|
||||
/* from radeonsi */
|
||||
static unsigned
|
||||
amd_varying_estimate_instr_cost(nir_instr *instr)
|
||||
{
|
||||
unsigned dst_bit_size, src_bit_size, num_dst_dwords;
|
||||
nir_op alu_op;
|
||||
|
||||
/* This is a very loose approximation based on gfx10. */
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_alu:
|
||||
dst_bit_size = nir_instr_as_alu(instr)->def.bit_size;
|
||||
src_bit_size = nir_instr_as_alu(instr)->src[0].src.ssa->bit_size;
|
||||
alu_op = nir_instr_as_alu(instr)->op;
|
||||
num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
|
||||
|
||||
switch (alu_op) {
|
||||
case nir_op_mov:
|
||||
case nir_op_vec2:
|
||||
case nir_op_vec3:
|
||||
case nir_op_vec4:
|
||||
case nir_op_vec5:
|
||||
case nir_op_vec8:
|
||||
case nir_op_vec16:
|
||||
case nir_op_fabs:
|
||||
case nir_op_fneg:
|
||||
case nir_op_fsat:
|
||||
return 0;
|
||||
|
||||
case nir_op_imul:
|
||||
case nir_op_umul_low:
|
||||
return dst_bit_size <= 16 ? 1 : 4 * num_dst_dwords;
|
||||
|
||||
case nir_op_imul_high:
|
||||
case nir_op_umul_high:
|
||||
case nir_op_imul_2x32_64:
|
||||
case nir_op_umul_2x32_64:
|
||||
return 4;
|
||||
|
||||
case nir_op_fexp2:
|
||||
case nir_op_flog2:
|
||||
case nir_op_frcp:
|
||||
case nir_op_frsq:
|
||||
case nir_op_fsqrt:
|
||||
case nir_op_fsin:
|
||||
case nir_op_fcos:
|
||||
case nir_op_fsin_amd:
|
||||
case nir_op_fcos_amd:
|
||||
return 4; /* FP16 & FP32. */
|
||||
|
||||
case nir_op_fpow:
|
||||
return 4 + 1 + 4; /* log2 + mul + exp2 */
|
||||
|
||||
case nir_op_fsign:
|
||||
return dst_bit_size == 64 ? 4 : 3; /* See ac_build_fsign. */
|
||||
|
||||
case nir_op_idiv:
|
||||
case nir_op_udiv:
|
||||
case nir_op_imod:
|
||||
case nir_op_umod:
|
||||
case nir_op_irem:
|
||||
return dst_bit_size == 64 ? 80 : 40;
|
||||
|
||||
case nir_op_fdiv:
|
||||
return dst_bit_size == 64 ? 80 : 5; /* FP16 & FP32: rcp + mul */
|
||||
|
||||
case nir_op_fmod:
|
||||
case nir_op_frem:
|
||||
return dst_bit_size == 64 ? 80 : 8;
|
||||
|
||||
default:
|
||||
/* Double opcodes. Comparisons have always full performance. */
|
||||
if ((dst_bit_size == 64 &&
|
||||
nir_op_infos[alu_op].output_type & nir_type_float) ||
|
||||
(dst_bit_size >= 8 && src_bit_size == 64 &&
|
||||
nir_op_infos[alu_op].input_types[0] & nir_type_float))
|
||||
return 16;
|
||||
|
||||
return DIV_ROUND_UP(MAX2(dst_bit_size, src_bit_size), 32);
|
||||
}
|
||||
|
||||
case nir_instr_type_intrinsic:
|
||||
dst_bit_size = nir_instr_as_intrinsic(instr)->def.bit_size;
|
||||
num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
|
||||
|
||||
switch (nir_instr_as_intrinsic(instr)->intrinsic) {
|
||||
case nir_intrinsic_load_deref:
|
||||
/* Uniform or UBO load.
|
||||
* Set a low cost to balance the number of scalar loads and ALUs.
|
||||
*/
|
||||
return 3 * num_dst_dwords;
|
||||
|
||||
default:
|
||||
unreachable("unexpected intrinsic");
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("unexpected instr type");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
zink_screen_init_compiler(struct zink_screen *screen)
|
||||
{
|
||||
@@ -1438,12 +1338,10 @@ zink_screen_init_compiler(struct zink_screen *screen)
|
||||
case VK_DRIVER_ID_AMD_OPEN_SOURCE:
|
||||
case VK_DRIVER_ID_AMD_PROPRIETARY:
|
||||
screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
|
||||
screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost;
|
||||
break;
|
||||
default:
|
||||
mesa_logw("zink: instruction costs not implemented for this implementation!");
|
||||
screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
|
||||
screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost;
|
||||
}
|
||||
} else {
|
||||
screen->nir_options.io_options |= nir_io_dont_optimize;
|
||||
|
Reference in New Issue
Block a user