nir,amd: remove trinary_minmax opcodes

These consist of the variations nir_op_{i|u|f}{min|max|med}3 which are either
lowered in the backend (LLVM) anyway or can be recombined by the backend (ACO).

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6421>
This commit is contained in:
Daniel Schürmann
2020-06-18 15:14:20 +01:00
committed by Marge Bot
parent 1fa43a4a8e
commit a79dad950b
9 changed files with 20 additions and 239 deletions

View File

@@ -838,12 +838,6 @@ nir_lower_int64_op_to_options_mask(nir_op opcode)
case nir_op_imax:
case nir_op_umin:
case nir_op_umax:
case nir_op_imin3:
case nir_op_imax3:
case nir_op_umin3:
case nir_op_umax3:
case nir_op_imed3:
case nir_op_umed3:
return nir_lower_minmax64;
case nir_op_iabs:
return nir_lower_iabs64;
@@ -944,18 +938,6 @@ lower_int64_alu_instr(nir_builder *b, nir_instr *instr, void *_state)
return lower_umin64(b, src[0], src[1]);
case nir_op_umax:
return lower_umax64(b, src[0], src[1]);
case nir_op_imin3:
return lower_imin64(b, src[0], lower_imin64(b, src[1], src[2]));
case nir_op_imax3:
return lower_imax64(b, src[0], lower_imax64(b, src[1], src[2]));
case nir_op_umin3:
return lower_umin64(b, src[0], lower_umin64(b, src[1], src[2]));
case nir_op_umax3:
return lower_umax64(b, src[0], lower_umax64(b, src[1], src[2]));
case nir_op_imed3:
return lower_imax64(b, lower_imin64(b, lower_imax64(b, src[0], src[1]), src[2]), lower_imin64(b, src[0], src[1]));
case nir_op_umed3:
return lower_umax64(b, lower_umin64(b, lower_umax64(b, src[0], src[1]), src[2]), lower_umin64(b, src[0], src[1]));
case nir_op_iabs:
return lower_iabs64(b, src[0]);
case nir_op_ineg:

View File

@@ -950,22 +950,8 @@ triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2")
# component on vectors). There are two versions, one for floating point
# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
triop("fcsel", tfloat32, "", "(src0 != 0.0f) ? src1 : src2")
# 3 way min/max/med
triop("fmin3", tfloat, "", "fminf(src0, fminf(src1, src2))")
triop("imin3", tint, "", "MIN2(src0, MIN2(src1, src2))")
triop("umin3", tuint, "", "MIN2(src0, MIN2(src1, src2))")
triop("fmax3", tfloat, "", "fmaxf(src0, fmaxf(src1, src2))")
triop("imax3", tint, "", "MAX2(src0, MAX2(src1, src2))")
triop("umax3", tuint, "", "MAX2(src0, MAX2(src1, src2))")
triop("fmed3", tfloat, "", "fmaxf(fminf(fmaxf(src0, src1), src2), fminf(src0, src1))")
triop("imed3", tint, "", "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
triop("umed3", tuint, "", "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
opcode("bcsel", 0, tuint, [0, 0, 0],
[tbool1, tuint, tuint], False, "", "src0 ? src1 : src2")
opcode("b8csel", 0, tuint, [0, 0, 0],

View File

@@ -1153,10 +1153,6 @@ optimizations.extend([
(('bcsel', a, ('bcsel', b, c, d), d), ('bcsel', ('iand', a, b), c, d)),
(('bcsel', a, b, ('bcsel', c, b, d)), ('bcsel', ('ior', a, c), b, d)),
(('fmin3@64', a, b, c), ('fmin@64', a, ('fmin@64', b, c))),
(('fmax3@64', a, b, c), ('fmax@64', a, ('fmax@64', b, c))),
(('fmed3@64', a, b, c), ('fmax@64', ('fmin@64', ('fmax@64', a, b), c), ('fmin@64', a, b))),
# Misc. lowering
(('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'),
(('frem', a, b), ('fsub', a, ('fmul', b, ('ftrunc', ('fdiv', a, b)))), 'options->lower_fmod'),

View File

@@ -1319,10 +1319,6 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
case nir_op_udiv:
case nir_op_bcsel:
case nir_op_b32csel:
case nir_op_imax3:
case nir_op_imin3:
case nir_op_umax3:
case nir_op_umin3:
case nir_op_ubfe:
case nir_op_bfm:
case nir_op_f2u32:
@@ -1405,16 +1401,6 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
case nir_op_b32csel:
res = src1 > src2 ? src1 : src2;
break;
case nir_op_imax3:
case nir_op_imin3:
case nir_op_umax3:
src0 = src0 > src1 ? src0 : src1;
res = src0 > src2 ? src0 : src2;
break;
case nir_op_umin3:
src0 = src0 < src1 ? src0 : src1;
res = src0 < src2 ? src0 : src2;
break;
case nir_op_ubfe:
res = bitmask(MIN2(src2, scalar.def->bit_size));
break;

View File

@@ -126,34 +126,45 @@ vtn_handle_amd_shader_trinary_minmax_instruction(struct vtn_builder *b, SpvOp ex
for (unsigned i = 0; i < num_inputs; i++)
src[i] = vtn_get_nir_ssa(b, w[i + 5]);
/* place constants at src[1-2] for easier constant-folding */
for (unsigned i = 1; i <= 2; i++) {
if (nir_src_as_const_value(nir_src_for_ssa(src[0]))) {
nir_ssa_def* tmp = src[i];
src[i] = src[0];
src[0] = tmp;
}
}
nir_ssa_def *def;
switch ((enum ShaderTrinaryMinMaxAMD)ext_opcode) {
case FMin3AMD:
def = nir_fmin3(nb, src[0], src[1], src[2]);
def = nir_fmin(nb, src[0], nir_fmin(nb, src[1], src[2]));
break;
case UMin3AMD:
def = nir_umin3(nb, src[0], src[1], src[2]);
def = nir_umin(nb, src[0], nir_umin(nb, src[1], src[2]));
break;
case SMin3AMD:
def = nir_imin3(nb, src[0], src[1], src[2]);
def = nir_imin(nb, src[0], nir_imin(nb, src[1], src[2]));
break;
case FMax3AMD:
def = nir_fmax3(nb, src[0], src[1], src[2]);
def = nir_fmax(nb, src[0], nir_fmax(nb, src[1], src[2]));
break;
case UMax3AMD:
def = nir_umax3(nb, src[0], src[1], src[2]);
def = nir_umax(nb, src[0], nir_umax(nb, src[1], src[2]));
break;
case SMax3AMD:
def = nir_imax3(nb, src[0], src[1], src[2]);
def = nir_imax(nb, src[0], nir_imax(nb, src[1], src[2]));
break;
case FMid3AMD:
def = nir_fmed3(nb, src[0], src[1], src[2]);
def = nir_fmin(nb, nir_fmax(nb, src[0], nir_fmin(nb, src[1], src[2])),
nir_fmax(nb, src[1], src[2]));
break;
case UMid3AMD:
def = nir_umed3(nb, src[0], src[1], src[2]);
def = nir_umin(nb, nir_umax(nb, src[0], nir_umin(nb, src[1], src[2])),
nir_umax(nb, src[1], src[2]));
break;
case SMid3AMD:
def = nir_imed3(nb, src[0], src[1], src[2]);
def = nir_imin(nb, nir_imax(nb, src[0], nir_imin(nb, src[1], src[2])),
nir_imax(nb, src[1], src[2]));
break;
default:
unreachable("unknown opcode\n");