nir: Eliminate nir_op_f2b
Builds on the work of !15121. This gets to delete even more code
because many drivers shared a lot of code for i2b and f2b.
No shader-db or fossil-db changes on any Intel platform.
v2: Rebase on 1a35acd8d9
.
v3: Update a comment in nir_opcodes_c.py. Suggested by Konstantin.
v4: Another rebase. Remove f2b stuff from Midgard.
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20509>
This commit is contained in:
@@ -277,13 +277,6 @@ static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, LLVMValueRef src0, uns
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, LLVMValueRef src0)
|
|
||||||
{
|
|
||||||
src0 = ac_to_float(ctx, src0);
|
|
||||||
LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
|
|
||||||
return LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
|
static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
|
||||||
{
|
{
|
||||||
switch (bitsize) {
|
switch (bitsize) {
|
||||||
@@ -1097,9 +1090,6 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||||||
case nir_op_b2f64:
|
case nir_op_b2f64:
|
||||||
result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
|
result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
|
||||||
break;
|
break;
|
||||||
case nir_op_f2b1:
|
|
||||||
result = emit_f2b(&ctx->ac, src[0]);
|
|
||||||
break;
|
|
||||||
case nir_op_b2i8:
|
case nir_op_b2i8:
|
||||||
case nir_op_b2i16:
|
case nir_op_b2i16:
|
||||||
case nir_op_b2i32:
|
case nir_op_b2i32:
|
||||||
|
@@ -862,8 +862,6 @@ agx_emit_alu(agx_builder *b, nir_alu_instr *instr)
|
|||||||
else
|
else
|
||||||
return agx_not_to(b, dst, s0);
|
return agx_not_to(b, dst, s0);
|
||||||
|
|
||||||
case nir_op_f2b1:
|
|
||||||
return agx_fcmpsel_to(b, dst, s0, i0, i0, i1, AGX_FCOND_EQ);
|
|
||||||
case nir_op_b2b1:
|
case nir_op_b2b1:
|
||||||
return agx_icmpsel_to(b, dst, s0, i0, i0, i1, AGX_ICOND_UEQ);
|
return agx_icmpsel_to(b, dst, s0, i0, i0, i1, AGX_ICOND_UEQ);
|
||||||
|
|
||||||
|
@@ -1210,11 +1210,6 @@ ntq_emit_comparison(struct v3d_compile *c,
|
|||||||
vir_set_pf(c, vir_SUB_dest(c, nop, src0, src1), V3D_QPU_PF_PUSHC);
|
vir_set_pf(c, vir_SUB_dest(c, nop, src0, src1), V3D_QPU_PF_PUSHC);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_f2b32:
|
|
||||||
vir_set_pf(c, vir_FMOV_dest(c, nop, src0), V3D_QPU_PF_PUSHZ);
|
|
||||||
cond_invert = true;
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -1589,7 +1584,6 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case nir_op_f2b32:
|
|
||||||
case nir_op_feq32:
|
case nir_op_feq32:
|
||||||
case nir_op_fneu32:
|
case nir_op_fneu32:
|
||||||
case nir_op_fge32:
|
case nir_op_fge32:
|
||||||
|
@@ -1174,12 +1174,6 @@ __int_to_fp64(int a)
|
|||||||
return __packFloat64(zSign, 0x412 - shiftCount, zFrac0, zFrac1);
|
return __packFloat64(zSign, 0x412 - shiftCount, zFrac0, zFrac1);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
|
||||||
__fp64_to_bool(uint64_t a)
|
|
||||||
{
|
|
||||||
return !__feq64_nonnan(__fabs64(a), 0ul);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
__bool_to_fp64(bool a)
|
__bool_to_fp64(bool a)
|
||||||
{
|
{
|
||||||
|
@@ -3109,10 +3109,6 @@ nir_alu_instr_is_comparison(const nir_alu_instr *instr)
|
|||||||
CASE_ALL_SIZES(nir_op_uge)
|
CASE_ALL_SIZES(nir_op_uge)
|
||||||
CASE_ALL_SIZES(nir_op_ieq)
|
CASE_ALL_SIZES(nir_op_ieq)
|
||||||
CASE_ALL_SIZES(nir_op_ine)
|
CASE_ALL_SIZES(nir_op_ine)
|
||||||
case nir_op_f2b1:
|
|
||||||
case nir_op_f2b8:
|
|
||||||
case nir_op_f2b16:
|
|
||||||
case nir_op_f2b32:
|
|
||||||
case nir_op_inot:
|
case nir_op_inot:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
|
@@ -939,12 +939,12 @@ class TreeAutomaton(object):
|
|||||||
stripped = opcode.rstrip('0123456789')
|
stripped = opcode.rstrip('0123456789')
|
||||||
if stripped in conv_opcode_types:
|
if stripped in conv_opcode_types:
|
||||||
# Matches that use conversion opcodes with a specific type,
|
# Matches that use conversion opcodes with a specific type,
|
||||||
# like f2b1, are tricky. Either we construct the automaton to
|
# like f2i1, are tricky. Either we construct the automaton to
|
||||||
# match specific NIR opcodes like nir_op_f2b1, in which case we
|
# match specific NIR opcodes like nir_op_f2i1, in which case we
|
||||||
# need to create separate items for each possible NIR opcode
|
# need to create separate items for each possible NIR opcode
|
||||||
# for patterns that have a generic opcode like f2b, or we
|
# for patterns that have a generic opcode like f2i, or we
|
||||||
# construct it to match the search opcode, in which case we
|
# construct it to match the search opcode, in which case we
|
||||||
# need to map f2b1 to f2b when constructing the automaton. Here
|
# need to map f2i1 to f2i when constructing the automaton. Here
|
||||||
# we do the latter.
|
# we do the latter.
|
||||||
opcode = stripped
|
opcode = stripped
|
||||||
self.opcodes.add(opcode)
|
self.opcodes.add(opcode)
|
||||||
|
@@ -163,11 +163,6 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
|
|||||||
/* Nothing to do here, we do not specialize these opcodes by bit-size */
|
/* Nothing to do here, we do not specialize these opcodes by bit-size */
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_f2b1:
|
|
||||||
opcode = bit_size == 8 ? nir_op_f2b8 :
|
|
||||||
bit_size == 16 ? nir_op_f2b16 : nir_op_f2b32;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case nir_op_b2b1:
|
case nir_op_b2b1:
|
||||||
/* Since the canonical bit size is the size of the src, it's a no-op */
|
/* Since the canonical bit size is the size of the src, it's a no-op */
|
||||||
opcode = nir_op_mov;
|
opcode = nir_op_mov;
|
||||||
|
@@ -67,10 +67,6 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *alu, bool has_fcsel_ne,
|
|||||||
|
|
||||||
case nir_op_b2f32: alu->op = nir_op_mov; break;
|
case nir_op_b2f32: alu->op = nir_op_mov; break;
|
||||||
case nir_op_b2i32: alu->op = nir_op_mov; break;
|
case nir_op_b2i32: alu->op = nir_op_mov; break;
|
||||||
case nir_op_f2b1:
|
|
||||||
rep = nir_sne(b, nir_ssa_for_alu_src(b, alu, 0),
|
|
||||||
nir_imm_float(b, 0));
|
|
||||||
break;
|
|
||||||
case nir_op_b2b1: alu->op = nir_op_mov; break;
|
case nir_op_b2b1: alu->op = nir_op_mov; break;
|
||||||
|
|
||||||
case nir_op_flt: alu->op = nir_op_slt; break;
|
case nir_op_flt: alu->op = nir_op_slt; break;
|
||||||
|
@@ -66,8 +66,6 @@ lower_alu_instr(nir_alu_instr *alu)
|
|||||||
/* These we expect to have booleans but the opcode doesn't change */
|
/* These we expect to have booleans but the opcode doesn't change */
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_f2b1: alu->op = nir_op_f2b32; break;
|
|
||||||
|
|
||||||
case nir_op_b2b32:
|
case nir_op_b2b32:
|
||||||
case nir_op_b2b1:
|
case nir_op_b2b1:
|
||||||
/* We're mutating instructions in a dominance-preserving order so our
|
/* We're mutating instructions in a dominance-preserving order so our
|
||||||
|
@@ -504,12 +504,6 @@ lower_doubles_instr_to_soft(nir_builder *b, nir_alu_instr *instr,
|
|||||||
mangled_name = "__fp64_to_uint(u641;";
|
mangled_name = "__fp64_to_uint(u641;";
|
||||||
return_type = glsl_uint_type();
|
return_type = glsl_uint_type();
|
||||||
break;
|
break;
|
||||||
case nir_op_f2b1:
|
|
||||||
case nir_op_f2b32:
|
|
||||||
name = "__fp64_to_bool";
|
|
||||||
mangled_name = "__fp64_to_bool(u641;";
|
|
||||||
return_type = glsl_bool_type();
|
|
||||||
break;
|
|
||||||
case nir_op_b2f64:
|
case nir_op_b2f64:
|
||||||
name = "__bool_to_fp64";
|
name = "__bool_to_fp64";
|
||||||
mangled_name = "__bool_to_fp64(b1;";
|
mangled_name = "__bool_to_fp64(b1;";
|
||||||
|
@@ -233,7 +233,7 @@ for src_t in [tint, tuint, tfloat, tbool]:
|
|||||||
elif src_t == tuint:
|
elif src_t == tuint:
|
||||||
dst_types = [tfloat, tuint]
|
dst_types = [tfloat, tuint]
|
||||||
elif src_t == tfloat:
|
elif src_t == tfloat:
|
||||||
dst_types = [tint, tuint, tfloat, tbool]
|
dst_types = [tint, tuint, tfloat]
|
||||||
|
|
||||||
for dst_t in dst_types:
|
for dst_t in dst_types:
|
||||||
for dst_bit_size in type_sizes(dst_t):
|
for dst_bit_size in type_sizes(dst_t):
|
||||||
|
@@ -50,9 +50,10 @@ nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, nir_rounding_mode rnd
|
|||||||
return nir_op_mov;
|
return nir_op_mov;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* i2b and u2b do not exist. Use ine (via nir_type_conversion) instead */
|
/* f2b, i2b, and u2b do not exist. Use ine or fne (via nir_type_conversion)
|
||||||
assert((src_base != nir_type_int && src_base != nir_type_uint) ||
|
* instead.
|
||||||
dst_base != nir_type_bool);
|
*/
|
||||||
|
assert(src_base == dst_base || dst_base != nir_type_bool);
|
||||||
|
|
||||||
switch (src_base) {
|
switch (src_base) {
|
||||||
% for src_t in ['int', 'uint', 'float', 'bool']:
|
% for src_t in ['int', 'uint', 'float', 'bool']:
|
||||||
@@ -72,7 +73,7 @@ nir_type_conversion_op(nir_alu_type src, nir_alu_type dst, nir_rounding_mode rnd
|
|||||||
% else:
|
% else:
|
||||||
<% dst_t = 'int' %>
|
<% dst_t = 'int' %>
|
||||||
% endif
|
% endif
|
||||||
% elif src_t in ['int', 'uint'] and dst_t == 'bool':
|
% elif src_t != 'bool' and dst_t == 'bool':
|
||||||
<% continue %>
|
<% continue %>
|
||||||
% endif
|
% endif
|
||||||
switch (dst_bit_size) {
|
switch (dst_bit_size) {
|
||||||
|
@@ -135,7 +135,6 @@ optimizations = [
|
|||||||
(('fabs', ('u2f', a)), ('u2f', a)),
|
(('fabs', ('u2f', a)), ('u2f', a)),
|
||||||
(('iabs', ('iabs', a)), ('iabs', a)),
|
(('iabs', ('iabs', a)), ('iabs', a)),
|
||||||
(('iabs', ('ineg', a)), ('iabs', a)),
|
(('iabs', ('ineg', a)), ('iabs', a)),
|
||||||
(('f2b', ('fneg', a)), ('f2b', a)),
|
|
||||||
(('~fadd', a, 0.0), a),
|
(('~fadd', a, 0.0), a),
|
||||||
# a+0.0 is 'a' unless 'a' is denormal or -0.0. If it's only used by a
|
# a+0.0 is 'a' unless 'a' is denormal or -0.0. If it's only used by a
|
||||||
# floating point instruction, they should flush any input denormals and we
|
# floating point instruction, they should flush any input denormals and we
|
||||||
@@ -1452,7 +1451,6 @@ optimizations.extend([
|
|||||||
# Conversions
|
# Conversions
|
||||||
(('f2i', ('ftrunc', a)), ('f2i', a)),
|
(('f2i', ('ftrunc', a)), ('f2i', a)),
|
||||||
(('f2u', ('ftrunc', a)), ('f2u', a)),
|
(('f2u', ('ftrunc', a)), ('f2u', a)),
|
||||||
(('inot', ('f2b1', a)), ('feq', a, 0.0)),
|
|
||||||
|
|
||||||
# Conversions from 16 bits to 32 bits and back can always be removed
|
# Conversions from 16 bits to 32 bits and back can always be removed
|
||||||
(('f2fmp', ('f2f32', 'a@16')), a),
|
(('f2fmp', ('f2f32', 'a@16')), a),
|
||||||
@@ -2198,12 +2196,6 @@ for left, right in itertools.combinations_with_replacement(invert.keys(), 2):
|
|||||||
optimizations.append((('inot', ('iand(is_used_once)', (left, a, b), (right, c, d))),
|
optimizations.append((('inot', ('iand(is_used_once)', (left, a, b), (right, c, d))),
|
||||||
('ior', (invert[left], a, b), (invert[right], c, d))))
|
('ior', (invert[left], a, b), (invert[right], c, d))))
|
||||||
|
|
||||||
# Optimize f2bN(b2f(x)) -> x
|
|
||||||
for size in type_sizes('bool'):
|
|
||||||
aN = 'a@' + str(size)
|
|
||||||
f2bN = 'f2b' + str(size)
|
|
||||||
optimizations.append(((f2bN, ('b2f', aN)), a))
|
|
||||||
|
|
||||||
# Optimize x2yN(b2x(x)) -> b2y
|
# Optimize x2yN(b2x(x)) -> b2y
|
||||||
for x, y in itertools.product(['f', 'u', 'i'], ['f', 'u', 'i']):
|
for x, y in itertools.product(['f', 'u', 'i'], ['f', 'u', 'i']):
|
||||||
if x != 'f' and y != 'f' and x != y:
|
if x != 'f' and y != 'f' and x != y:
|
||||||
|
@@ -129,11 +129,6 @@ nir_op_matches_search_op(nir_op nop, uint16_t sop)
|
|||||||
nop == nir_op_##op##32 || \
|
nop == nir_op_##op##32 || \
|
||||||
nop == nir_op_##op##64;
|
nop == nir_op_##op##64;
|
||||||
|
|
||||||
#define MATCH_BCONV_CASE(op) \
|
|
||||||
case nir_search_op_##op: \
|
|
||||||
return nop == nir_op_##op##1 || \
|
|
||||||
nop == nir_op_##op##32;
|
|
||||||
|
|
||||||
switch (sop) {
|
switch (sop) {
|
||||||
MATCH_FCONV_CASE(i2f)
|
MATCH_FCONV_CASE(i2f)
|
||||||
MATCH_FCONV_CASE(u2f)
|
MATCH_FCONV_CASE(u2f)
|
||||||
@@ -144,14 +139,12 @@ nir_op_matches_search_op(nir_op nop, uint16_t sop)
|
|||||||
MATCH_ICONV_CASE(i2i)
|
MATCH_ICONV_CASE(i2i)
|
||||||
MATCH_FCONV_CASE(b2f)
|
MATCH_FCONV_CASE(b2f)
|
||||||
MATCH_ICONV_CASE(b2i)
|
MATCH_ICONV_CASE(b2i)
|
||||||
MATCH_BCONV_CASE(f2b)
|
|
||||||
default:
|
default:
|
||||||
unreachable("Invalid nir_search_op");
|
unreachable("Invalid nir_search_op");
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef MATCH_FCONV_CASE
|
#undef MATCH_FCONV_CASE
|
||||||
#undef MATCH_ICONV_CASE
|
#undef MATCH_ICONV_CASE
|
||||||
#undef MATCH_BCONV_CASE
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint16_t
|
uint16_t
|
||||||
@@ -170,11 +163,6 @@ nir_search_op_for_nir_op(nir_op nop)
|
|||||||
case nir_op_##op##64: \
|
case nir_op_##op##64: \
|
||||||
return nir_search_op_##op;
|
return nir_search_op_##op;
|
||||||
|
|
||||||
#define MATCH_BCONV_CASE(op) \
|
|
||||||
case nir_op_##op##1: \
|
|
||||||
case nir_op_##op##32: \
|
|
||||||
return nir_search_op_##op;
|
|
||||||
|
|
||||||
|
|
||||||
switch (nop) {
|
switch (nop) {
|
||||||
MATCH_FCONV_CASE(i2f)
|
MATCH_FCONV_CASE(i2f)
|
||||||
@@ -186,14 +174,12 @@ nir_search_op_for_nir_op(nir_op nop)
|
|||||||
MATCH_ICONV_CASE(i2i)
|
MATCH_ICONV_CASE(i2i)
|
||||||
MATCH_FCONV_CASE(b2f)
|
MATCH_FCONV_CASE(b2f)
|
||||||
MATCH_ICONV_CASE(b2i)
|
MATCH_ICONV_CASE(b2i)
|
||||||
MATCH_BCONV_CASE(f2b)
|
|
||||||
default:
|
default:
|
||||||
return nop;
|
return nop;
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef MATCH_FCONV_CASE
|
#undef MATCH_FCONV_CASE
|
||||||
#undef MATCH_ICONV_CASE
|
#undef MATCH_ICONV_CASE
|
||||||
#undef MATCH_BCONV_CASE
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static nir_op
|
static nir_op
|
||||||
@@ -221,14 +207,6 @@ nir_op_for_search_op(uint16_t sop, unsigned bit_size)
|
|||||||
default: unreachable("Invalid bit size"); \
|
default: unreachable("Invalid bit size"); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define RET_BCONV_CASE(op) \
|
|
||||||
case nir_search_op_##op: \
|
|
||||||
switch (bit_size) { \
|
|
||||||
case 1: return nir_op_##op##1; \
|
|
||||||
case 32: return nir_op_##op##32; \
|
|
||||||
default: unreachable("Invalid bit size"); \
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (sop) {
|
switch (sop) {
|
||||||
RET_FCONV_CASE(i2f)
|
RET_FCONV_CASE(i2f)
|
||||||
RET_FCONV_CASE(u2f)
|
RET_FCONV_CASE(u2f)
|
||||||
@@ -239,14 +217,12 @@ nir_op_for_search_op(uint16_t sop, unsigned bit_size)
|
|||||||
RET_ICONV_CASE(i2i)
|
RET_ICONV_CASE(i2i)
|
||||||
RET_FCONV_CASE(b2f)
|
RET_FCONV_CASE(b2f)
|
||||||
RET_ICONV_CASE(b2i)
|
RET_ICONV_CASE(b2i)
|
||||||
RET_BCONV_CASE(f2b)
|
|
||||||
default:
|
default:
|
||||||
unreachable("Invalid nir_search_op");
|
unreachable("Invalid nir_search_op");
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef RET_FCONV_CASE
|
#undef RET_FCONV_CASE
|
||||||
#undef RET_ICONV_CASE
|
#undef RET_ICONV_CASE
|
||||||
#undef RET_BCONV_CASE
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
@@ -615,7 +591,6 @@ UNUSED static void dump_value(const nir_algebraic_table *table, const nir_search
|
|||||||
switch (expr->opcode) {
|
switch (expr->opcode) {
|
||||||
#define CASE(n) \
|
#define CASE(n) \
|
||||||
case nir_search_op_##n: fprintf(stderr, #n); break;
|
case nir_search_op_##n: fprintf(stderr, #n); break;
|
||||||
CASE(f2b)
|
|
||||||
CASE(b2f)
|
CASE(b2f)
|
||||||
CASE(b2i)
|
CASE(b2i)
|
||||||
CASE(i2i)
|
CASE(i2i)
|
||||||
|
@@ -123,7 +123,6 @@ enum nir_search_op {
|
|||||||
nir_search_op_i2i,
|
nir_search_op_i2i,
|
||||||
nir_search_op_b2f,
|
nir_search_op_b2f,
|
||||||
nir_search_op_b2i,
|
nir_search_op_b2i,
|
||||||
nir_search_op_f2b,
|
|
||||||
nir_num_search_ops,
|
nir_num_search_ops,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -469,12 +469,6 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
|
|||||||
dst[0] = create_cov(ctx, create_cov(ctx, src[0], 32, nir_op_f2f16_rtne),
|
dst[0] = create_cov(ctx, create_cov(ctx, src[0], 32, nir_op_f2f16_rtne),
|
||||||
16, nir_op_f2f32);
|
16, nir_op_f2f32);
|
||||||
break;
|
break;
|
||||||
case nir_op_f2b1:
|
|
||||||
dst[0] = ir3_CMPS_F(
|
|
||||||
b, src[0], 0,
|
|
||||||
create_immed_typed(b, 0, type_float_size(bs[0])), 0);
|
|
||||||
dst[0]->cat2.condition = IR3_COND_NE;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case nir_op_b2b1:
|
case nir_op_b2b1:
|
||||||
/* b2b1 will appear when translating from
|
/* b2b1 will appear when translating from
|
||||||
|
@@ -244,23 +244,6 @@ assign_alu_dest(struct lp_build_nir_context *bld_base,
|
|||||||
assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals);
|
assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals);
|
||||||
}
|
}
|
||||||
|
|
||||||
static LLVMValueRef
|
|
||||||
flt_to_bool32(struct lp_build_nir_context *bld_base,
|
|
||||||
uint32_t src_bit_size,
|
|
||||||
LLVMValueRef val)
|
|
||||||
{
|
|
||||||
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
|
|
||||||
struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
|
|
||||||
LLVMValueRef result =
|
|
||||||
lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero);
|
|
||||||
if (src_bit_size == 64)
|
|
||||||
result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
|
|
||||||
if (src_bit_size == 16)
|
|
||||||
result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static LLVMValueRef
|
static LLVMValueRef
|
||||||
fcmp32(struct lp_build_nir_context *bld_base,
|
fcmp32(struct lp_build_nir_context *bld_base,
|
||||||
enum pipe_compare_func compare,
|
enum pipe_compare_func compare,
|
||||||
@@ -731,9 +714,6 @@ do_alu_action(struct lp_build_nir_context *bld_base,
|
|||||||
case nir_op_bitfield_reverse:
|
case nir_op_bitfield_reverse:
|
||||||
result = lp_build_bitfield_reverse(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
|
result = lp_build_bitfield_reverse(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
|
||||||
break;
|
break;
|
||||||
case nir_op_f2b32:
|
|
||||||
result = flt_to_bool32(bld_base, src_bit_size[0], src[0]);
|
|
||||||
break;
|
|
||||||
case nir_op_f2f16:
|
case nir_op_f2f16:
|
||||||
if (src_bit_size[0] == 64)
|
if (src_bit_size[0] == 64)
|
||||||
src[0] = LLVMBuildFPTrunc(builder, src[0],
|
src[0] = LLVMBuildFPTrunc(builder, src[0],
|
||||||
|
@@ -1561,13 +1561,6 @@ ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
|
|||||||
ntt_64bit_1f(c));
|
ntt_64bit_1f(c));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_f2b32:
|
|
||||||
if (src_64)
|
|
||||||
ntt_DSNE(c, dst, src[0], ureg_imm1f(c->ureg, 0));
|
|
||||||
else
|
|
||||||
ntt_FSNE(c, dst, src[0], ureg_imm1f(c->ureg, 0));
|
|
||||||
break;
|
|
||||||
|
|
||||||
case nir_op_b2i32:
|
case nir_op_b2i32:
|
||||||
ntt_AND(c, dst, src[0], ureg_imm1u(c->ureg, 1));
|
ntt_AND(c, dst, src[0], ureg_imm1u(c->ureg, 1));
|
||||||
break;
|
break;
|
||||||
|
@@ -82,7 +82,6 @@ static const struct etna_op_info etna_ops[] = {
|
|||||||
UOP(f2u32, F2I, 0_X_X),
|
UOP(f2u32, F2I, 0_X_X),
|
||||||
UOP(b2f32, AND, 0_X_X), /* AND with fui(1.0f) */
|
UOP(b2f32, AND, 0_X_X), /* AND with fui(1.0f) */
|
||||||
UOP(b2i32, AND, 0_X_X), /* AND with 1 */
|
UOP(b2i32, AND, 0_X_X), /* AND with 1 */
|
||||||
OPC(f2b32, CMP, 0_X_X, NE), /* != 0.0 */
|
|
||||||
|
|
||||||
/* arithmetic */
|
/* arithmetic */
|
||||||
IOP(iadd, ADD, 0_X_1),
|
IOP(iadd, ADD, 0_X_1),
|
||||||
@@ -163,9 +162,6 @@ etna_emit_alu(struct etna_compile *c, nir_op op, struct etna_inst_dst dst,
|
|||||||
case nir_op_b2i32:
|
case nir_op_b2i32:
|
||||||
inst.src[2] = etna_immediate_int(1);
|
inst.src[2] = etna_immediate_int(1);
|
||||||
break;
|
break;
|
||||||
case nir_op_f2b32:
|
|
||||||
inst.src[1] = etna_immediate_float(0.0f);
|
|
||||||
break;
|
|
||||||
case nir_op_ineg:
|
case nir_op_ineg:
|
||||||
inst.src[0] = etna_immediate_int(0);
|
inst.src[0] = etna_immediate_int(0);
|
||||||
src[0].neg = 1;
|
src[0].neg = 1;
|
||||||
|
@@ -1552,8 +1552,6 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
|
|||||||
case nir_op_b32csel:
|
case nir_op_b32csel:
|
||||||
return emit_alu_op3(*alu, op3_cnde_int, shader, {0, 2, 1});
|
return emit_alu_op3(*alu, op3_cnde_int, shader, {0, 2, 1});
|
||||||
|
|
||||||
case nir_op_f2b32:
|
|
||||||
return emit_alu_comb_with_zero(*alu, op2_setne_dx10, shader);
|
|
||||||
case nir_op_fabs:
|
case nir_op_fabs:
|
||||||
return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_src0_abs});
|
return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_src0_abs});
|
||||||
case nir_op_fadd:
|
case nir_op_fadd:
|
||||||
|
@@ -158,7 +158,6 @@ class LowerSplit64op : public NirLowerInstruction {
|
|||||||
switch (alu->op) {
|
switch (alu->op) {
|
||||||
case nir_op_bcsel:
|
case nir_op_bcsel:
|
||||||
return nir_dest_bit_size(alu->dest.dest) == 64;
|
return nir_dest_bit_size(alu->dest.dest) == 64;
|
||||||
case nir_op_f2b1:
|
|
||||||
case nir_op_f2i32:
|
case nir_op_f2i32:
|
||||||
case nir_op_f2u32:
|
case nir_op_f2u32:
|
||||||
case nir_op_f2i64:
|
case nir_op_f2i64:
|
||||||
@@ -200,12 +199,6 @@ class LowerSplit64op : public NirLowerInstruction {
|
|||||||
nir_unpack_64_2x32_split_y(b, nir_ssa_for_alu_src(b, alu, 2)));
|
nir_unpack_64_2x32_split_y(b, nir_ssa_for_alu_src(b, alu, 2)));
|
||||||
return nir_pack_64_2x32_split(b, lo, hi);
|
return nir_pack_64_2x32_split(b, lo, hi);
|
||||||
}
|
}
|
||||||
case nir_op_f2b1: {
|
|
||||||
auto mask = nir_component_mask(nir_dest_num_components(alu->dest.dest));
|
|
||||||
return nir_fneu(b,
|
|
||||||
nir_channels(b, nir_ssa_for_alu_src(b, alu, 0), mask),
|
|
||||||
nir_imm_zero(b, nir_dest_num_components(alu->dest.dest), 64));
|
|
||||||
}
|
|
||||||
case nir_op_f2i32: {
|
case nir_op_f2i32: {
|
||||||
auto src = nir_ssa_for_alu_src(b, alu, 0);
|
auto src = nir_ssa_for_alu_src(b, alu, 0);
|
||||||
auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
|
auto gt0 = nir_flt(b, nir_imm_double(b, 0.0), src);
|
||||||
|
@@ -1146,12 +1146,6 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
|
|||||||
case nir_op_b2i32:
|
case nir_op_b2i32:
|
||||||
result = qir_AND(c, src[0], qir_uniform_ui(c, 1));
|
result = qir_AND(c, src[0], qir_uniform_ui(c, 1));
|
||||||
break;
|
break;
|
||||||
case nir_op_f2b32:
|
|
||||||
qir_SF(c, src[0]);
|
|
||||||
result = qir_MOV(c, qir_SEL(c, QPU_COND_ZC,
|
|
||||||
qir_uniform_ui(c, ~0),
|
|
||||||
qir_uniform_ui(c, 0)));
|
|
||||||
break;
|
|
||||||
|
|
||||||
case nir_op_iadd:
|
case nir_op_iadd:
|
||||||
result = qir_ADD(c, src[0], src[1]);
|
result = qir_ADD(c, src[0], src[1]);
|
||||||
|
@@ -2178,14 +2178,6 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
|
|||||||
src[0]);
|
src[0]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_f2b1:
|
|
||||||
assert(nir_op_infos[alu->op].num_inputs == 1);
|
|
||||||
result = emit_binop(ctx, SpvOpFOrdNotEqual, dest_type, src[0],
|
|
||||||
get_fvec_constant(ctx,
|
|
||||||
nir_src_bit_size(alu->src[0].src),
|
|
||||||
num_components, 0));
|
|
||||||
break;
|
|
||||||
|
|
||||||
|
|
||||||
#define BINOP(nir_op, spirv_op) \
|
#define BINOP(nir_op, spirv_op) \
|
||||||
case nir_op: \
|
case nir_op: \
|
||||||
|
@@ -1571,34 +1571,6 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
|
|||||||
inst = bld.emit(SHADER_OPCODE_RSQ, result, op[0]);
|
inst = bld.emit(SHADER_OPCODE_RSQ, result, op[0]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_f2b32: {
|
|
||||||
uint32_t bit_size = nir_src_bit_size(instr->src[0].src);
|
|
||||||
if (bit_size == 64) {
|
|
||||||
/* two-argument instructions can't take 64-bit immediates */
|
|
||||||
fs_reg zero = vgrf(glsl_type::double_type);
|
|
||||||
fs_reg tmp = vgrf(glsl_type::double_type);
|
|
||||||
|
|
||||||
bld.MOV(zero, setup_imm_df(bld, 0.0));
|
|
||||||
|
|
||||||
/* A SIMD16 execution needs to be split in two instructions, so use
|
|
||||||
* a vgrf instead of the flag register as dst so instruction splitting
|
|
||||||
* works
|
|
||||||
*/
|
|
||||||
bld.CMP(tmp, op[0], zero, BRW_CONDITIONAL_NZ);
|
|
||||||
bld.MOV(result, subscript(tmp, BRW_REGISTER_TYPE_UD, 0));
|
|
||||||
} else {
|
|
||||||
fs_reg zero;
|
|
||||||
if (bit_size == 32) {
|
|
||||||
zero = brw_imm_f(0.0f);
|
|
||||||
} else {
|
|
||||||
assert(bit_size == 16);
|
|
||||||
zero = retype(brw_imm_w(0), BRW_REGISTER_TYPE_HF);
|
|
||||||
}
|
|
||||||
bld.CMP(result, op[0], zero, BRW_CONDITIONAL_NZ);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case nir_op_ftrunc:
|
case nir_op_ftrunc:
|
||||||
inst = bld.RNDZ(result, op[0]);
|
inst = bld.RNDZ(result, op[0]);
|
||||||
if (devinfo->ver < 6) {
|
if (devinfo->ver < 6) {
|
||||||
|
@@ -1555,27 +1555,6 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_f2b32:
|
|
||||||
if (nir_src_bit_size(instr->src[0].src) == 64) {
|
|
||||||
/* We use a MOV with conditional_mod to check if the provided value is
|
|
||||||
* 0.0. We want this to flush denormalized numbers to zero, so we set a
|
|
||||||
* source modifier on the source operand to trigger this, as source
|
|
||||||
* modifiers don't affect the result of the testing against 0.0.
|
|
||||||
*/
|
|
||||||
src_reg value = op[0];
|
|
||||||
value.abs = true;
|
|
||||||
vec4_instruction *inst = emit(MOV(dst_null_df(), value));
|
|
||||||
inst->conditional_mod = BRW_CONDITIONAL_NZ;
|
|
||||||
|
|
||||||
src_reg one = src_reg(this, glsl_type::ivec4_type);
|
|
||||||
emit(MOV(dst_reg(one), brw_imm_d(~0)));
|
|
||||||
inst = emit(BRW_OPCODE_SEL, dst, one, brw_imm_d(0));
|
|
||||||
inst->predicate = BRW_PREDICATE_NORMAL;
|
|
||||||
} else {
|
|
||||||
emit(CMP(dst, op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case nir_op_unpack_half_2x16_split_x:
|
case nir_op_unpack_half_2x16_split_x:
|
||||||
case nir_op_unpack_half_2x16_split_y:
|
case nir_op_unpack_half_2x16_split_y:
|
||||||
case nir_op_pack_half_2x16_split:
|
case nir_op_pack_half_2x16_split:
|
||||||
|
@@ -94,7 +94,6 @@ remove_unsupported_casts(no_16bit_conv, 16, 0xffff, 65535.0, -32768.0, 32767.0)
|
|||||||
lower_x2b = [
|
lower_x2b = [
|
||||||
(('b2b32', 'a'), ('b2i32', 'a')),
|
(('b2b32', 'a'), ('b2i32', 'a')),
|
||||||
(('b2b1', 'a'), ('ine', ('b2i32', a), 0)),
|
(('b2b1', 'a'), ('ine', ('b2i32', a), 0)),
|
||||||
(('f2b1', 'a'), ('fneu', a, 0)),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
no_16bit_conv += [
|
no_16bit_conv += [
|
||||||
|
@@ -2397,15 +2397,6 @@ emit_b2f64(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value
|
|||||||
return emit_select(ctx, alu, val, c1, c0);
|
return emit_select(ctx, alu, val, c1, c0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
|
||||||
emit_f2b32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)
|
|
||||||
{
|
|
||||||
assert(val);
|
|
||||||
|
|
||||||
const struct dxil_value *zero = dxil_module_get_float_const(&ctx->mod, 0.0f);
|
|
||||||
return emit_cmp(ctx, alu, DXIL_FCMP_UNE, val, zero);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift)
|
emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val, bool shift)
|
||||||
{
|
{
|
||||||
@@ -2716,7 +2707,6 @@ emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
|
|||||||
case nir_op_u2u64:
|
case nir_op_u2u64:
|
||||||
return emit_cast(ctx, alu, src[0]);
|
return emit_cast(ctx, alu, src[0]);
|
||||||
|
|
||||||
case nir_op_f2b32: return emit_f2b32(ctx, alu, src[0]);
|
|
||||||
case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
|
case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);
|
||||||
case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
|
case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);
|
||||||
case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]);
|
case nir_op_b2f64: return emit_b2f64(ctx, alu, src[0]);
|
||||||
|
@@ -2921,12 +2921,6 @@ Converter::visit(nir_alu_instr *insn)
|
|||||||
mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
|
mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_op_f2b32: {
|
|
||||||
DEFAULT_CHECKS;
|
|
||||||
LValues &newDefs = convert(&insn->dest);
|
|
||||||
mkCmp(OP_SET, CC_NEU, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), zero);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case nir_op_b2i8:
|
case nir_op_b2i8:
|
||||||
case nir_op_b2i16:
|
case nir_op_b2i16:
|
||||||
case nir_op_b2i32: {
|
case nir_op_b2i32: {
|
||||||
|
@@ -2880,14 +2880,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||||||
BI_MUX_INT_ZERO);
|
BI_MUX_INT_ZERO);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_f2b16:
|
|
||||||
bi_mux_v2i16_to(b, dst, bi_imm_u16(0), bi_imm_u16(~0), s0,
|
|
||||||
BI_MUX_FP_ZERO);
|
|
||||||
break;
|
|
||||||
case nir_op_f2b32:
|
|
||||||
bi_mux_i32_to(b, dst, bi_imm_u32(0), bi_imm_u32(~0), s0, BI_MUX_FP_ZERO);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case nir_op_ieq8:
|
case nir_op_ieq8:
|
||||||
case nir_op_ine8:
|
case nir_op_ine8:
|
||||||
case nir_op_ilt8:
|
case nir_op_ilt8:
|
||||||
|
@@ -836,11 +836,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
|
|||||||
ALU_CASE_CMP(b2f16, iand);
|
ALU_CASE_CMP(b2f16, iand);
|
||||||
ALU_CASE_CMP(b2i32, iand);
|
ALU_CASE_CMP(b2i32, iand);
|
||||||
|
|
||||||
/* Likewise, we don't have a dedicated f2b32 instruction, but
|
|
||||||
* we can do a "not equal to 0.0" test. */
|
|
||||||
|
|
||||||
ALU_CASE_CMP(f2b32, fne);
|
|
||||||
|
|
||||||
ALU_CASE(frcp, frcp);
|
ALU_CASE(frcp, frcp);
|
||||||
ALU_CASE(frsq, frsqrt);
|
ALU_CASE(frsq, frsqrt);
|
||||||
ALU_CASE(fsqrt, fsqrt);
|
ALU_CASE(fsqrt, fsqrt);
|
||||||
|
Reference in New Issue
Block a user