nir: Eliminate nir_op_i2b

There are a lot of optimizations in opt_algebraic that match ('ine', a,
0), but there are almost none that match i2b.  Instead of adding a huge
pile of additional patterns (including variations that include both ine
and i2b), always lower i2b to a != 0.

At this point in the series, it should be impossible for anything to
generate i2b, so there /should not/ be any changes.

The failing test on d3d12 is a pre-existing bug that is triggered by
this change.  I talked to Jesse about it, and, after some analysis, he
suggested just adding it to the list of known failures.

v2: Don't rematerialize i2b instructions in dxil_nir_lower_x2b.

v3: Don't rematerialize i2b instructions in zink_nir_algebraic.py.

v4: Fix zink-on-TGL CI failures by calling nir_opt_algebraic after
nir_lower_doubles makes progress.  The latter can generate b2i
instructions, but nir_lower_int64 can't handle them (anymore).

v5: Add back most of the hunk at line 2125 of nir_opt_algebraic.py. I
had accidentally removed the f2b(bf2(x)) optimization.

v6: Just eliminate the i2b instruction.

v7: Remove missed i2b32 in midgard_compile.c. Remove (now unused)
emit_alu_i2orf2_b1 function from sfn_instr_alu.cpp. Previously this
function was still used. 🤷

No shader-db changes on any Intel platform.

All Intel platforms had similar results. (Ice Lake shown)
Instructions in all programs: 141165875 -> 141165873 (-0.0%)
Instructions helped: 2

Cycles in all programs: 9098956382 -> 9098956350 (-0.0%)
Cycles helped: 2

The two Vulkan shaders are helped because of the "new" (('b2i32',
('ine', ('ubfe', a, b, 1), 0)), ('ubfe', a, b, 1)) algebraic pattern.

Acked-by: Jesse Natalie <jenatali@microsoft.com> [earlier version]
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Tested-by: Daniel Schürmann <daniel@schuermann.dev> [earlier version]
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15121>
This commit is contained in:
Ian Romanick
2022-02-15 09:35:47 -08:00
committed by Marge Bot
parent 8b37046765
commit eb76cee9f8
31 changed files with 23 additions and 180 deletions

View File

@@ -3471,8 +3471,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand::zero()); bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand::zero());
break; break;
} }
case nir_op_b2b1: case nir_op_b2b1: {
case nir_op_i2b1: {
Temp src = get_alu_src(ctx, instr->src[0]); Temp src = get_alu_src(ctx, instr->src[0]);
assert(dst.regClass() == bld.lm); assert(dst.regClass() == bld.lm);

View File

@@ -1112,7 +1112,6 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
case nir_op_b2i64: case nir_op_b2i64:
result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
break; break;
case nir_op_i2b1:
case nir_op_b2b1: /* after loads */ case nir_op_b2b1: /* after loads */
result = emit_i2b(&ctx->ac, src[0]); result = emit_i2b(&ctx->ac, src[0]);
break; break;

View File

@@ -3086,7 +3086,6 @@ lower_bit_size_callback(const nir_instr *instr, void *_)
case nir_op_bit_count: case nir_op_bit_count:
case nir_op_find_lsb: case nir_op_find_lsb:
case nir_op_ufind_msb: case nir_op_ufind_msb:
case nir_op_i2b1:
return 32; return 32;
case nir_op_ilt: case nir_op_ilt:
case nir_op_ige: case nir_op_ige:

View File

@@ -805,7 +805,6 @@ agx_emit_alu_bool(agx_builder *b, nir_op op,
case nir_op_inot: return agx_xor_to(b, dst, s0, t); case nir_op_inot: return agx_xor_to(b, dst, s0, t);
case nir_op_f2b1: return agx_fcmpsel_to(b, dst, s0, f, f, t, AGX_FCOND_EQ); case nir_op_f2b1: return agx_fcmpsel_to(b, dst, s0, f, f, t, AGX_FCOND_EQ);
case nir_op_i2b1: return agx_icmpsel_to(b, dst, s0, f, f, t, AGX_ICOND_UEQ);
case nir_op_b2b1: return agx_icmpsel_to(b, dst, s0, f, f, t, AGX_ICOND_UEQ); case nir_op_b2b1: return agx_icmpsel_to(b, dst, s0, f, f, t, AGX_ICOND_UEQ);
case nir_op_bcsel: case nir_op_bcsel:

View File

@@ -1210,11 +1210,6 @@ ntq_emit_comparison(struct v3d_compile *c,
vir_set_pf(c, vir_SUB_dest(c, nop, src0, src1), V3D_QPU_PF_PUSHC); vir_set_pf(c, vir_SUB_dest(c, nop, src0, src1), V3D_QPU_PF_PUSHC);
break; break;
case nir_op_i2b32:
vir_set_pf(c, vir_MOV_dest(c, nop, src0), V3D_QPU_PF_PUSHZ);
cond_invert = true;
break;
case nir_op_f2b32: case nir_op_f2b32:
vir_set_pf(c, vir_FMOV_dest(c, nop, src0), V3D_QPU_PF_PUSHZ); vir_set_pf(c, vir_FMOV_dest(c, nop, src0), V3D_QPU_PF_PUSHZ);
cond_invert = true; cond_invert = true;
@@ -1656,7 +1651,6 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
break; break;
} }
case nir_op_i2b32:
case nir_op_f2b32: case nir_op_f2b32:
case nir_op_feq32: case nir_op_feq32:
case nir_op_fneu32: case nir_op_fneu32:

View File

@@ -3108,10 +3108,6 @@ nir_alu_instr_is_comparison(const nir_alu_instr *instr)
CASE_ALL_SIZES(nir_op_uge) CASE_ALL_SIZES(nir_op_uge)
CASE_ALL_SIZES(nir_op_ieq) CASE_ALL_SIZES(nir_op_ieq)
CASE_ALL_SIZES(nir_op_ine) CASE_ALL_SIZES(nir_op_ine)
case nir_op_i2b1:
case nir_op_i2b8:
case nir_op_i2b16:
case nir_op_i2b32:
case nir_op_f2b1: case nir_op_f2b1:
case nir_op_f2b8: case nir_op_f2b8:
case nir_op_f2b16: case nir_op_f2b16:

View File

@@ -168,11 +168,6 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
bit_size == 16 ? nir_op_f2b16 : nir_op_f2b32; bit_size == 16 ? nir_op_f2b16 : nir_op_f2b32;
break; break;
case nir_op_i2b1:
opcode = bit_size == 8 ? nir_op_i2b8 :
bit_size == 16 ? nir_op_i2b16 : nir_op_i2b32;
break;
case nir_op_b2b1: case nir_op_b2b1:
/* Since the canonical bit size is the size of the src, it's a no-op */ /* Since the canonical bit size is the size of the src, it's a no-op */
opcode = nir_op_mov; opcode = nir_op_mov;

View File

@@ -67,7 +67,6 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
case nir_op_b2f32: alu->op = nir_op_mov; break; case nir_op_b2f32: alu->op = nir_op_mov; break;
case nir_op_b2i32: alu->op = nir_op_mov; break; case nir_op_b2i32: alu->op = nir_op_mov; break;
case nir_op_f2b1: case nir_op_f2b1:
case nir_op_i2b1:
rep = nir_sne(b, nir_ssa_for_alu_src(b, alu, 0), rep = nir_sne(b, nir_ssa_for_alu_src(b, alu, 0),
nir_imm_float(b, 0)); nir_imm_float(b, 0));
break; break;

View File

@@ -67,7 +67,6 @@ lower_alu_instr(nir_alu_instr *alu)
break; break;
case nir_op_f2b1: alu->op = nir_op_f2b32; break; case nir_op_f2b1: alu->op = nir_op_f2b32; break;
case nir_op_i2b1: alu->op = nir_op_i2b32; break;
case nir_op_b2b32: case nir_op_b2b32:
case nir_op_b2b1: case nir_op_b2b1:

View File

@@ -47,14 +47,6 @@ lower_b2i64(nir_builder *b, nir_ssa_def *x)
return nir_pack_64_2x32_split(b, nir_b2i32(b, x), nir_imm_int(b, 0)); return nir_pack_64_2x32_split(b, nir_b2i32(b, x), nir_imm_int(b, 0));
} }
static nir_ssa_def *
lower_i2b(nir_builder *b, nir_ssa_def *x)
{
return nir_ine(b, nir_ior(b, nir_unpack_64_2x32_split_x(b, x),
nir_unpack_64_2x32_split_y(b, x)),
nir_imm_int(b, 0));
}
static nir_ssa_def * static nir_ssa_def *
lower_i2i8(nir_builder *b, nir_ssa_def *x) lower_i2i8(nir_builder *b, nir_ssa_def *x)
{ {
@@ -871,7 +863,6 @@ nir_lower_int64_op_to_options_mask(nir_op opcode)
case nir_op_irem: case nir_op_irem:
return nir_lower_divmod64; return nir_lower_divmod64;
case nir_op_b2i64: case nir_op_b2i64:
case nir_op_i2b1:
case nir_op_i2i8: case nir_op_i2i8:
case nir_op_i2i16: case nir_op_i2i16:
case nir_op_i2i32: case nir_op_i2i32:
@@ -965,8 +956,6 @@ lower_int64_alu_instr(nir_builder *b, nir_alu_instr *alu)
return lower_irem64(b, src[0], src[1]); return lower_irem64(b, src[0], src[1]);
case nir_op_b2i64: case nir_op_b2i64:
return lower_b2i64(b, src[0]); return lower_b2i64(b, src[0]);
case nir_op_i2b1:
return lower_i2b(b, src[0]);
case nir_op_i2i8: case nir_op_i2i8:
return lower_i2i8(b, src[0]); return lower_i2i8(b, src[0]);
case nir_op_i2i16: case nir_op_i2i16:
@@ -1052,7 +1041,6 @@ should_lower_int64_alu_instr(const nir_alu_instr *alu,
const nir_shader_compiler_options *options) const nir_shader_compiler_options *options)
{ {
switch (alu->op) { switch (alu->op) {
case nir_op_i2b1:
case nir_op_i2i8: case nir_op_i2i8:
case nir_op_i2i16: case nir_op_i2i16:
case nir_op_i2i32: case nir_op_i2i32:

View File

@@ -89,7 +89,6 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
} }
case nir_op_f2u32: alu->op = nir_op_ffloor; break; case nir_op_f2u32: alu->op = nir_op_ffloor; break;
case nir_op_i2b1: alu->op = nir_op_f2b1; break;
case nir_op_ilt: alu->op = nir_op_flt; break; case nir_op_ilt: alu->op = nir_op_flt; break;
case nir_op_ige: alu->op = nir_op_fge; break; case nir_op_ige: alu->op = nir_op_fge; break;

View File

@@ -229,7 +229,7 @@ for src_t in [tint, tuint, tfloat, tbool]:
if src_t == tbool: if src_t == tbool:
dst_types = [tfloat, tint, tbool] dst_types = [tfloat, tint, tbool]
elif src_t == tint: elif src_t == tint:
dst_types = [tfloat, tint, tbool] dst_types = [tfloat, tint]
elif src_t == tuint: elif src_t == tuint:
dst_types = [tfloat, tuint] dst_types = [tfloat, tuint]
elif src_t == tfloat: elif src_t == tfloat:

View File

@@ -50,6 +50,10 @@ nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, nir_rounding_mode rnd
return nir_op_mov; return nir_op_mov;
} }
/* i2b and u2b do not exist. Use ine (via nir_type_conversion) instead */
assert((src_base != nir_type_int && src_base != nir_type_uint) ||
dst_base != nir_type_bool);
switch (src_base) { switch (src_base) {
% for src_t in ['int', 'uint', 'float', 'bool']: % for src_t in ['int', 'uint', 'float', 'bool']:
case nir_type_${src_t}: case nir_type_${src_t}:
@@ -68,8 +72,8 @@ nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, nir_rounding_mode rnd
% else: % else:
<% dst_t = 'int' %> <% dst_t = 'int' %>
% endif % endif
% elif src_t == 'uint' and dst_t == 'bool': % elif src_t in ['int', 'uint'] and dst_t == 'bool':
<% src_t = 'int' %> <% continue %>
% endif % endif
switch (dst_bit_size) { switch (dst_bit_size) {
% for dst_bits in type_sizes(dst_t): % for dst_bits in type_sizes(dst_t):

View File

@@ -719,7 +719,6 @@ optimizations.extend([
(('bcsel', ('ilt', a, b), b, a), ('imax', a, b)), (('bcsel', ('ilt', a, b), b, a), ('imax', a, b)),
(('bcsel', ('ige', a, b), b, a), ('imin', a, b)), (('bcsel', ('ige', a, b), b, a), ('imin', a, b)),
(('bcsel', ('ige', b, a), b, a), ('imax', a, b)), (('bcsel', ('ige', b, a), b, a), ('imax', a, b)),
(('bcsel', ('i2b', a), b, c), ('bcsel', ('ine', a, 0), b, c)),
(('bcsel', ('inot', a), b, c), ('bcsel', a, c, b)), (('bcsel', ('inot', a), b, c), ('bcsel', a, c, b)),
(('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)), (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
(('bcsel', a, b, ('bcsel', a, c, d)), ('bcsel', a, b, d)), (('bcsel', a, b, ('bcsel', a, c, d)), ('bcsel', a, b, d)),
@@ -1391,9 +1390,6 @@ optimizations.extend([
(('fsin', a), lowered_sincos(0.5), 'options->lower_sincos'), (('fsin', a), lowered_sincos(0.5), 'options->lower_sincos'),
(('fcos', a), lowered_sincos(0.75), 'options->lower_sincos'), (('fcos', a), lowered_sincos(0.75), 'options->lower_sincos'),
# Boolean simplifications # Boolean simplifications
(('i2b16(is_used_by_if)', a), ('ine16', a, 0)),
(('i2b32(is_used_by_if)', a), ('ine32', a, 0)),
(('i2b1(is_used_by_if)', a), ('ine', a, 0)),
(('ieq', a, True), a), (('ieq', a, True), a),
(('ine(is_not_used_by_if)', a, True), ('inot', a)), (('ine(is_not_used_by_if)', a, True), ('inot', a)),
(('ine', a, False), a), (('ine', a, False), a),
@@ -1454,8 +1450,6 @@ optimizations.extend([
# Conversions # Conversions
(('f2i', ('ftrunc', a)), ('f2i', a)), (('f2i', ('ftrunc', a)), ('f2i', a)),
(('f2u', ('ftrunc', a)), ('f2u', a)), (('f2u', ('ftrunc', a)), ('f2u', a)),
(('i2b', ('ineg', a)), ('i2b', a)),
(('i2b', ('iabs', a)), ('i2b', a)),
(('inot', ('f2b1', a)), ('feq', a, 0.0)), (('inot', ('f2b1', a)), ('feq', a, 0.0)),
# Conversions from 16 bits to 32 bits and back can always be removed # Conversions from 16 bits to 32 bits and back can always be removed
@@ -1967,8 +1961,8 @@ optimizations.extend([
(('ubfe', a, 0, '#b'), ('iand', a, ('ushr', 0xffffffff, ('ineg', b)))), (('ubfe', a, 0, '#b'), ('iand', a, ('ushr', 0xffffffff, ('ineg', b)))),
(('b2i32', ('i2b', ('ubfe', a, b, 1))), ('ubfe', a, b, 1)), (('b2i32', ('ine', ('ubfe', a, b, 1), 0)), ('ubfe', a, b, 1)),
(('b2i32', ('i2b', ('ibfe', a, b, 1))), ('ubfe', a, b, 1)), # ubfe in the replacement is correct (('b2i32', ('ine', ('ibfe', a, b, 1), 0)), ('ubfe', a, b, 1)), # ubfe in the replacement is correct
(('ine', ('ibfe(is_used_once)', a, '#b', '#c'), 0), ('ine', ('iand', a, ('ishl', ('ushr', 0xffffffff, ('ineg', c)), b)), 0)), (('ine', ('ibfe(is_used_once)', a, '#b', '#c'), 0), ('ine', ('iand', a, ('ishl', ('ushr', 0xffffffff, ('ineg', c)), b)), 0)),
(('ieq', ('ibfe(is_used_once)', a, '#b', '#c'), 0), ('ieq', ('iand', a, ('ishl', ('ushr', 0xffffffff, ('ineg', c)), b)), 0)), (('ieq', ('ibfe(is_used_once)', a, '#b', '#c'), 0), ('ieq', ('iand', a, ('ishl', ('ushr', 0xffffffff, ('ineg', c)), b)), 0)),
(('ine', ('ubfe(is_used_once)', a, '#b', '#c'), 0), ('ine', ('iand', a, ('ishl', ('ushr', 0xffffffff, ('ineg', c)), b)), 0)), (('ine', ('ubfe(is_used_once)', a, '#b', '#c'), 0), ('ine', ('iand', a, ('ishl', ('ushr', 0xffffffff, ('ineg', c)), b)), 0)),
@@ -2194,13 +2188,11 @@ for left, right in itertools.combinations_with_replacement(invert.keys(), 2):
optimizations.append((('inot', ('iand(is_used_once)', (left, a, b), (right, c, d))), optimizations.append((('inot', ('iand(is_used_once)', (left, a, b), (right, c, d))),
('ior', (invert[left], a, b), (invert[right], c, d)))) ('ior', (invert[left], a, b), (invert[right], c, d))))
# Optimize x2bN(b2x(x)) -> x # Optimize f2bN(b2f(x)) -> x
for size in type_sizes('bool'): for size in type_sizes('bool'):
aN = 'a@' + str(size) aN = 'a@' + str(size)
f2bN = 'f2b' + str(size) f2bN = 'f2b' + str(size)
i2bN = 'i2b' + str(size)
optimizations.append(((f2bN, ('b2f', aN)), a)) optimizations.append(((f2bN, ('b2f', aN)), a))
optimizations.append(((i2bN, ('b2i', aN)), a))
# Optimize x2yN(b2x(x)) -> b2y # Optimize x2yN(b2x(x)) -> b2y
for x, y in itertools.product(['f', 'u', 'i'], ['f', 'u', 'i']): for x, y in itertools.product(['f', 'u', 'i'], ['f', 'u', 'i']):

View File

@@ -144,7 +144,6 @@ nir_op_matches_search_op(nir_op nop, uint16_t sop)
MATCH_ICONV_CASE(i2i) MATCH_ICONV_CASE(i2i)
MATCH_FCONV_CASE(b2f) MATCH_FCONV_CASE(b2f)
MATCH_ICONV_CASE(b2i) MATCH_ICONV_CASE(b2i)
MATCH_BCONV_CASE(i2b)
MATCH_BCONV_CASE(f2b) MATCH_BCONV_CASE(f2b)
default: default:
unreachable("Invalid nir_search_op"); unreachable("Invalid nir_search_op");
@@ -187,7 +186,6 @@ nir_search_op_for_nir_op(nir_op nop)
MATCH_ICONV_CASE(i2i) MATCH_ICONV_CASE(i2i)
MATCH_FCONV_CASE(b2f) MATCH_FCONV_CASE(b2f)
MATCH_ICONV_CASE(b2i) MATCH_ICONV_CASE(b2i)
MATCH_BCONV_CASE(i2b)
MATCH_BCONV_CASE(f2b) MATCH_BCONV_CASE(f2b)
default: default:
return nop; return nop;
@@ -241,7 +239,6 @@ nir_op_for_search_op(uint16_t sop, unsigned bit_size)
RET_ICONV_CASE(i2i) RET_ICONV_CASE(i2i)
RET_FCONV_CASE(b2f) RET_FCONV_CASE(b2f)
RET_ICONV_CASE(b2i) RET_ICONV_CASE(b2i)
RET_BCONV_CASE(i2b)
RET_BCONV_CASE(f2b) RET_BCONV_CASE(f2b)
default: default:
unreachable("Invalid nir_search_op"); unreachable("Invalid nir_search_op");
@@ -621,7 +618,6 @@ UNUSED static void dump_value(const nir_algebraic_table *table, const nir_search
CASE(f2b) CASE(f2b)
CASE(b2f) CASE(b2f)
CASE(b2i) CASE(b2i)
CASE(i2b)
CASE(i2i) CASE(i2i)
CASE(f2i) CASE(f2i)
CASE(i2f) CASE(i2f)

View File

@@ -123,7 +123,6 @@ enum nir_search_op {
nir_search_op_i2i, nir_search_op_i2i,
nir_search_op_b2f, nir_search_op_b2f,
nir_search_op_b2i, nir_search_op_b2i,
nir_search_op_i2b,
nir_search_op_f2b, nir_search_op_f2b,
nir_num_search_ops, nir_num_search_ops,
}; };

View File

@@ -476,16 +476,6 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
dst[0]->cat2.condition = IR3_COND_NE; dst[0]->cat2.condition = IR3_COND_NE;
break; break;
case nir_op_i2b1:
/* i2b1 will appear when translating from nir_load_ubo or
* nir_intrinsic_load_ssbo, where any non-zero value is true.
*/
dst[0] = ir3_CMPS_S(
b, src[0], 0,
create_immed_typed(b, 0, type_uint_size(bs[0])), 0);
dst[0]->cat2.condition = IR3_COND_NE;
break;
case nir_op_b2b1: case nir_op_b2b1:
/* b2b1 will appear when translating from /* b2b1 will appear when translating from
* *

View File

@@ -244,27 +244,6 @@ assign_alu_dest(struct lp_build_nir_context *bld_base,
assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals); assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals);
} }
static LLVMValueRef
int_to_bool32(struct lp_build_nir_context *bld_base,
uint32_t src_bit_size,
bool is_unsigned,
LLVMValueRef val)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
struct lp_build_context *int_bld =
get_int_bld(bld_base, is_unsigned, src_bit_size);
LLVMValueRef result = lp_build_compare(bld_base->base.gallivm,
int_bld->type, PIPE_FUNC_NOTEQUAL,
val, int_bld->zero);
if (src_bit_size == 16)
result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
else if (src_bit_size == 64)
result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
return result;
}
static LLVMValueRef static LLVMValueRef
flt_to_bool32(struct lp_build_nir_context *bld_base, flt_to_bool32(struct lp_build_nir_context *bld_base,
uint32_t src_bit_size, uint32_t src_bit_size,
@@ -964,9 +943,6 @@ do_alu_action(struct lp_build_nir_context *bld_base,
case nir_op_ftrunc: case nir_op_ftrunc:
result = lp_build_trunc(get_flt_bld(bld_base, src_bit_size[0]), src[0]); result = lp_build_trunc(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break; break;
case nir_op_i2b32:
result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]);
break;
case nir_op_i2f16: case nir_op_i2f16:
result = LLVMBuildSIToFP(builder, src[0], result = LLVMBuildSIToFP(builder, src[0],
bld_base->half_bld.vec_type, ""); bld_base->half_bld.vec_type, "");

View File

@@ -1568,13 +1568,6 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
ntt_FSNE(c, dst, src[0], ureg_imm1f(c->ureg, 0)); ntt_FSNE(c, dst, src[0], ureg_imm1f(c->ureg, 0));
break; break;
case nir_op_i2b32:
if (src_64) {
ntt_U64SNE(c, dst, src[0], ureg_imm1u(c->ureg, 0));
} else
ntt_USNE(c, dst, src[0], ureg_imm1u(c->ureg, 0));
break;
case nir_op_b2i32: case nir_op_b2i32:
ntt_AND(c, dst, src[0], ureg_imm1u(c->ureg, 1)); ntt_AND(c, dst, src[0], ureg_imm1u(c->ureg, 1));
break; break;

View File

@@ -524,6 +524,7 @@ spec/arb_tessellation_shader/execution/variable-indexing/tcs-patch-output-array-
spec/arb_tessellation_shader/execution/variable-indexing/tcs-patch-output-array-vec3-index-wr: crash spec/arb_tessellation_shader/execution/variable-indexing/tcs-patch-output-array-vec3-index-wr: crash
spec/arb_tessellation_shader/execution/variable-indexing/tcs-patch-output-array-vec4-index-wr: crash spec/arb_tessellation_shader/execution/variable-indexing/tcs-patch-output-array-vec4-index-wr: crash
spec/arb_tessellation_shader/execution/variable-indexing/tcs-patch-vec4-index-wr: crash spec/arb_tessellation_shader/execution/variable-indexing/tcs-patch-vec4-index-wr: crash
spec/arb_tessellation_shader/execution/variable-indexing/tcs-patch-vec4-swiz-index-wr: crash
spec/arb_tessellation_shader/execution/variable-indexing/tes-both-input-array-float-index-rd: crash spec/arb_tessellation_shader/execution/variable-indexing/tes-both-input-array-float-index-rd: crash
spec/arb_tessellation_shader/execution/variable-indexing/tes-both-input-array-vec2-index-rd: crash spec/arb_tessellation_shader/execution/variable-indexing/tes-both-input-array-vec2-index-rd: crash
spec/arb_tessellation_shader/execution/variable-indexing/tes-both-input-array-vec3-index-rd: crash spec/arb_tessellation_shader/execution/variable-indexing/tes-both-input-array-vec3-index-rd: crash
@@ -3011,9 +3012,9 @@ spec/oes_viewport_array/viewport-gs-writes-out-of-range: skip
summary: summary:
name: results name: results
---- -------- ---- --------
pass: 17231 pass: 17230
fail: 20 fail: 20
crash: 40 crash: 41
skip: 2925 skip: 2925
timeout: 0 timeout: 0
warn: 25 warn: 25

View File

@@ -83,7 +83,6 @@ static const struct etna_op_info etna_ops[] = {
UOP(b2f32, AND, 0_X_X), /* AND with fui(1.0f) */ UOP(b2f32, AND, 0_X_X), /* AND with fui(1.0f) */
UOP(b2i32, AND, 0_X_X), /* AND with 1 */ UOP(b2i32, AND, 0_X_X), /* AND with 1 */
OPC(f2b32, CMP, 0_X_X, NE), /* != 0.0 */ OPC(f2b32, CMP, 0_X_X, NE), /* != 0.0 */
UOPC(i2b32, CMP, 0_X_X, NE), /* != 0 */
/* arithmetic */ /* arithmetic */
IOP(iadd, ADD, 0_X_1), IOP(iadd, ADD, 0_X_1),
@@ -167,9 +166,6 @@ etna_emit_alu(struct etna_compile *c, nir_op op, struct etna_inst_dst dst,
case nir_op_f2b32: case nir_op_f2b32:
inst.src[1] = etna_immediate_float(0.0f); inst.src[1] = etna_immediate_float(0.0f);
break; break;
case nir_op_i2b32:
inst.src[1] = etna_immediate_int(0);
break;
case nir_op_ineg: case nir_op_ineg:
inst.src[0] = etna_immediate_int(0); inst.src[0] = etna_immediate_int(0);
src[0].neg = 1; src[0].neg = 1;

View File

@@ -1248,9 +1248,6 @@ static bool
emit_any_all_icomp( emit_any_all_icomp(
const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader); const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
static bool
emit_alu_i2orf2_b1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
static bool static bool
emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader); emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
static bool static bool
@@ -1614,9 +1611,6 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
return emit_alu_op2(*alu, op2_add, shader, op2_opt_neg_src1); return emit_alu_op2(*alu, op2_add, shader, op2_opt_neg_src1);
case nir_op_ftrunc: case nir_op_ftrunc:
return emit_alu_op1(*alu, op1_trunc, shader); return emit_alu_op1(*alu, op1_trunc, shader);
case nir_op_i2b1:
case nir_op_i2b32:
return emit_alu_i2orf2_b1(*alu, op2_setne_int, shader);
case nir_op_iadd: case nir_op_iadd:
return emit_alu_op2_int(*alu, op2_add_int, shader); return emit_alu_op2_int(*alu, op2_add_int, shader);
case nir_op_iand: case nir_op_iand:
@@ -2643,28 +2637,6 @@ emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader)
return true; return true;
} }
static bool
emit_alu_i2orf2_b1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
{
auto& value_factory = shader.value_factory();
AluInstr *ir = nullptr;
Pin pin = nir_dest_num_components(alu.dest.dest) == 1 ? pin_free : pin_none;
for (int i = 0; i < 4; ++i) {
if (alu.dest.write_mask & (1 << i)) {
ir = new AluInstr(opcode,
value_factory.dest(alu.dest, i, pin),
value_factory.src(alu.src[0], i),
value_factory.zero(),
AluInstr::write);
shader.emit_instruction(ir);
}
}
if (ir)
ir->set_alu_flag(alu_last_instr);
return true;
}
static bool static bool
emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader) emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
{ {

View File

@@ -1146,7 +1146,6 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
case nir_op_b2i32: case nir_op_b2i32:
result = qir_AND(c, src[0], qir_uniform_ui(c, 1)); result = qir_AND(c, src[0], qir_uniform_ui(c, 1));
break; break;
case nir_op_i2b32:
case nir_op_f2b32: case nir_op_f2b32:
qir_SF(c, src[0]); qir_SF(c, src[0]);
result = qir_MOV(c, qir_SEL(c, QPU_COND_ZC, result = qir_MOV(c, qir_SEL(c, QPU_COND_ZC,

View File

@@ -2185,13 +2185,6 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
nir_src_bit_size(alu->src[0].src), nir_src_bit_size(alu->src[0].src),
num_components, 0)); num_components, 0));
break; break;
case nir_op_i2b1:
assert(nir_op_infos[alu->op].num_inputs == 1);
result = emit_binop(ctx, SpvOpINotEqual, dest_type, src[0],
get_ivec_constant(ctx,
nir_src_bit_size(alu->src[0].src),
num_components, 0));
break;
#define BINOP(nir_op, spirv_op) \ #define BINOP(nir_op, spirv_op) \

View File

@@ -25,7 +25,7 @@ import sys
lower_b2b = [ lower_b2b = [
(('b2b32', 'a'), ('b2i32', 'a')), (('b2b32', 'a'), ('b2i32', 'a')),
(('b2b1', 'a'), ('i2b1', 'a')), (('b2b1', 'a'), ('ine', 'a', 0)),
] ]
def main(): def main():

View File

@@ -1569,23 +1569,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
inst = bld.emit(SHADER_OPCODE_RSQ, result, op[0]); inst = bld.emit(SHADER_OPCODE_RSQ, result, op[0]);
break; break;
case nir_op_i2b32:
case nir_op_f2b32: { case nir_op_f2b32: {
uint32_t bit_size = nir_src_bit_size(instr->src[0].src); uint32_t bit_size = nir_src_bit_size(instr->src[0].src);
if (bit_size == 64) { if (bit_size == 64) {
/* two-argument instructions can't take 64-bit immediates */ /* two-argument instructions can't take 64-bit immediates */
fs_reg zero; fs_reg zero = vgrf(glsl_type::double_type);
fs_reg tmp; fs_reg tmp = vgrf(glsl_type::double_type);
if (instr->op == nir_op_f2b32) { bld.MOV(zero, setup_imm_df(bld, 0.0));
zero = vgrf(glsl_type::double_type);
tmp = vgrf(glsl_type::double_type);
bld.MOV(zero, setup_imm_df(bld, 0.0));
} else {
zero = vgrf(glsl_type::int64_t_type);
tmp = vgrf(glsl_type::int64_t_type);
bld.MOV(zero, brw_imm_q(0));
}
/* A SIMD16 execution needs to be split in two instructions, so use /* A SIMD16 execution needs to be split in two instructions, so use
* a vgrf instead of the flag register as dst so instruction splitting * a vgrf instead of the flag register as dst so instruction splitting
@@ -1596,11 +1587,10 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
} else { } else {
fs_reg zero; fs_reg zero;
if (bit_size == 32) { if (bit_size == 32) {
zero = instr->op == nir_op_f2b32 ? brw_imm_f(0.0f) : brw_imm_d(0); zero = brw_imm_f(0.0f);
} else { } else {
assert(bit_size == 16); assert(bit_size == 16);
zero = instr->op == nir_op_f2b32 ? zero = retype(brw_imm_w(0), BRW_REGISTER_TYPE_HF);
retype(brw_imm_w(0), BRW_REGISTER_TYPE_HF) : brw_imm_w(0);
} }
bld.CMP(result, op[0], zero, BRW_CONDITIONAL_NZ); bld.CMP(result, op[0], zero, BRW_CONDITIONAL_NZ);
} }

View File

@@ -1576,10 +1576,6 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
} }
break; break;
case nir_op_i2b32:
emit(CMP(dst, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ));
break;
case nir_op_unpack_half_2x16_split_x: case nir_op_unpack_half_2x16_split_x:
case nir_op_unpack_half_2x16_split_y: case nir_op_unpack_half_2x16_split_y:
case nir_op_pack_half_2x16_split: case nir_op_pack_half_2x16_split:

View File

@@ -93,8 +93,7 @@ remove_unsupported_casts(no_16bit_conv, 16, 0xffff, 65535.0, -32768.0, 32767.0)
lower_x2b = [ lower_x2b = [
(('b2b32', 'a'), ('b2i32', 'a')), (('b2b32', 'a'), ('b2i32', 'a')),
(('b2b1', 'a'), ('i2b1', 'a')), (('b2b1', 'a'), ('ine', ('b2i32', a), 0)),
(('i2b1', 'a'), ('ine', a, 0)),
(('f2b1', 'a'), ('fneu', a, 0)), (('f2b1', 'a'), ('fneu', a, 0)),
] ]

View File

@@ -2921,18 +2921,10 @@ Converter::visit(nir_alu_instr *insn)
mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp); mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
break; break;
} }
case nir_op_f2b32: case nir_op_f2b32: {
case nir_op_i2b32: {
DEFAULT_CHECKS; DEFAULT_CHECKS;
LValues &newDefs = convert(&insn->dest); LValues &newDefs = convert(&insn->dest);
Value *src1; mkCmp(OP_SET, CC_NEU, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), zero);
if (typeSizeof(sTypes[0]) == 8) {
src1 = loadImm(getSSA(8), 0.0);
} else {
src1 = zero;
}
CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
break; break;
} }
case nir_op_b2i8: case nir_op_b2i8:

View File

@@ -2914,16 +2914,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
bi_mux_i32_to(b, dst, bi_imm_u32(0), bi_imm_u32(~0), s0, BI_MUX_FP_ZERO); bi_mux_i32_to(b, dst, bi_imm_u32(0), bi_imm_u32(~0), s0, BI_MUX_FP_ZERO);
break; break;
case nir_op_i2b8:
bi_mux_v4i8_to(b, dst, bi_imm_u8(0), bi_imm_u8(~0), s0, BI_MUX_INT_ZERO);
break;
case nir_op_i2b16:
bi_mux_v2i16_to(b, dst, bi_imm_u16(0), bi_imm_u16(~0), s0, BI_MUX_INT_ZERO);
break;
case nir_op_i2b32:
bi_mux_i32_to(b, dst, bi_imm_u32(0), bi_imm_u32(~0), s0, BI_MUX_INT_ZERO);
break;
case nir_op_ieq8: case nir_op_ieq8:
case nir_op_ine8: case nir_op_ine8:
case nir_op_ilt8: case nir_op_ilt8:

View File

@@ -840,7 +840,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
* we can do a "not equal to 0.0" test. */ * we can do a "not equal to 0.0" test. */
ALU_CASE_CMP(f2b32, fne); ALU_CASE_CMP(f2b32, fne);
ALU_CASE_CMP(i2b32, ine);
ALU_CASE(frcp, frcp); ALU_CASE(frcp, frcp);
ALU_CASE(frsq, frsqrt); ALU_CASE(frsq, frsqrt);