nir: remove redundant opcode u2ump

Reviewed-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6283>
This commit is contained in:
Marek Olšák
2020-09-04 01:51:49 -04:00
committed by Marge Bot
parent 26fc5e1f4a
commit 3d3df8dbff
5 changed files with 15 additions and 11 deletions

View File

@@ -971,7 +971,6 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
break; break;
case nir_op_u2u8: case nir_op_u2u8:
case nir_op_u2u16: case nir_op_u2u16:
case nir_op_u2ump:
case nir_op_u2u32: case nir_op_u2u32:
case nir_op_u2u64: case nir_op_u2u64:
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))

View File

@@ -1990,7 +1990,7 @@ nir_visitor::visit(ir_expression *ir)
} }
case ir_unop_u2ump: { case ir_unop_u2ump: {
result = nir_build_alu(&b, nir_op_u2ump, srcs[0], NULL, NULL, NULL); result = nir_build_alu(&b, nir_op_i2imp, srcs[0], NULL, NULL, NULL);
break; break;
} }

View File

@@ -274,7 +274,8 @@ for src_t in [tint, tuint, tfloat, tbool]:
# precision. # precision.
unop_numeric_convert("f2fmp", tfloat16, tfloat, opcodes["f2f16"].const_expr) unop_numeric_convert("f2fmp", tfloat16, tfloat, opcodes["f2f16"].const_expr)
unop_numeric_convert("i2imp", tint16, tint, opcodes["i2i16"].const_expr) unop_numeric_convert("i2imp", tint16, tint, opcodes["i2i16"].const_expr)
unop_numeric_convert("u2ump", tuint16, tuint, opcodes["u2u16"].const_expr) # u2ump isn't defined, because the behavior is equal to i2imp if src has more
# than 16 bits.
# Unary floating-point rounding operations. # Unary floating-point rounding operations.

View File

@@ -967,15 +967,15 @@ optimizations.extend([
# Conversions from 16 bits to 32 bits and back can always be removed # Conversions from 16 bits to 32 bits and back can always be removed
(('f2fmp', ('f2f32', 'a@16')), a), (('f2fmp', ('f2f32', 'a@16')), a),
(('i2imp', ('i2i32', 'a@16')), a), (('i2imp', ('i2i32', 'a@16')), a),
(('u2ump', ('u2u32', 'a@16')), a), (('i2imp', ('u2u32', 'a@16')), a),
(('f2fmp', ('b2f32', 'a@1')), ('b2f16', a)), (('f2fmp', ('b2f32', 'a@1')), ('b2f16', a)),
(('i2imp', ('b2i32', 'a@1')), ('b2i16', a)), (('i2imp', ('b2i32', 'a@1')), ('b2i16', a)),
(('u2ump', ('b2i32', 'a@1')), ('b2i16', a)), (('i2imp', ('b2i32', 'a@1')), ('b2i16', a)),
# Conversions to 16 bits would be lossy so they should only be removed if # Conversions to 16 bits would be lossy so they should only be removed if
# the instruction was generated by the precision lowering pass. # the instruction was generated by the precision lowering pass.
(('f2f32', ('f2fmp', 'a@32')), a), (('f2f32', ('f2fmp', 'a@32')), a),
(('i2i32', ('i2imp', 'a@32')), a), (('i2i32', ('i2imp', 'a@32')), a),
(('u2u32', ('u2ump', 'a@32')), a), (('u2u32', ('i2imp', 'a@32')), a),
(('ffloor', 'a(is_integral)'), a), (('ffloor', 'a(is_integral)'), a),
(('fceil', 'a(is_integral)'), a), (('fceil', 'a(is_integral)'), a),
@@ -1899,6 +1899,10 @@ for i in range(2, 4 + 1):
optimizations += [ optimizations += [
((to_16, vec_inst + suffix_in), vec_inst + out_16, '!options->vectorize_vec2_16bit'), ((to_16, vec_inst + suffix_in), vec_inst + out_16, '!options->vectorize_vec2_16bit'),
]
# u2ump doesn't exist, because it's equal to i2imp
if T in ['f', 'i']:
optimizations += [
((to_mp, vec_inst + suffix_in), vec_inst + out_mp, '!options->vectorize_vec2_16bit') ((to_mp, vec_inst + suffix_in), vec_inst + out_mp, '!options->vectorize_vec2_16bit')
] ]
@@ -2065,7 +2069,7 @@ late_optimizations = [
# nir_opt_algebraic so any remaining ones are required. # nir_opt_algebraic so any remaining ones are required.
(('f2fmp', a), ('f2f16', a)), (('f2fmp', a), ('f2f16', a)),
(('i2imp', a), ('i2i16', a)), (('i2imp', a), ('i2i16', a)),
(('u2ump', a), ('u2u16', a)), (('i2imp', a), ('u2u16', a)),
# Section 8.8 (Integer Functions) of the GLSL 4.60 spec says: # Section 8.8 (Integer Functions) of the GLSL 4.60 spec says:
# #

View File

@@ -315,7 +315,7 @@ pan_unpack_unorm_small(nir_builder *b, nir_ssa_def *pack,
nir_ssa_def *scales, nir_ssa_def *shifts) nir_ssa_def *scales, nir_ssa_def *shifts)
{ {
nir_ssa_def *channels = nir_unpack_32_4x8(b, nir_channel(b, pack, 0)); nir_ssa_def *channels = nir_unpack_32_4x8(b, nir_channel(b, pack, 0));
nir_ssa_def *raw = nir_ushr(b, nir_u2ump(b, channels), shifts); nir_ssa_def *raw = nir_ushr(b, nir_i2imp(b, channels), shifts);
return nir_fmul(b, nir_u2f16(b, raw), scales); return nir_fmul(b, nir_u2f16(b, raw), scales);
} }
@@ -402,7 +402,7 @@ pan_unpack_unorm_1010102(nir_builder *b, nir_ssa_def *packed)
{ {
nir_ssa_def *p = nir_channel(b, packed, 0); nir_ssa_def *p = nir_channel(b, packed, 0);
nir_ssa_def *bytes = nir_unpack_32_4x8(b, p); nir_ssa_def *bytes = nir_unpack_32_4x8(b, p);
nir_ssa_def *ubytes = nir_u2ump(b, bytes); nir_ssa_def *ubytes = nir_i2imp(b, bytes);
nir_ssa_def *shifts = nir_ushr(b, pan_replicate_4(b, nir_channel(b, ubytes, 3)), nir_ssa_def *shifts = nir_ushr(b, pan_replicate_4(b, nir_channel(b, ubytes, 3)),
nir_imm_ivec4(b, 0, 2, 4, 6)); nir_imm_ivec4(b, 0, 2, 4, 6));
@@ -449,7 +449,7 @@ pan_unpack_uint_1010102(nir_builder *b, nir_ssa_def *packed)
nir_ssa_def *mask = nir_iand(b, shift, nir_ssa_def *mask = nir_iand(b, shift,
nir_imm_ivec4(b, 0x3ff, 0x3ff, 0x3ff, 0x3)); nir_imm_ivec4(b, 0x3ff, 0x3ff, 0x3ff, 0x3));
return nir_u2ump(b, mask); return nir_i2imp(b, mask);
} }
/* NIR means we can *finally* catch a break */ /* NIR means we can *finally* catch a break */