nir,vc4: Suffix a bunch of unorm 4x8 opcodes _vc4
Reviewed-by: Alyssa Rosenzweig <alyssa@collabora.com> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11463>
This commit is contained in:
@@ -883,51 +883,6 @@ binop("fmax", tfloat, _2src_commutative + associative, "fmax(src0, src1)")
|
|||||||
binop("imax", tint, _2src_commutative + associative, "src1 > src0 ? src1 : src0")
|
binop("imax", tint, _2src_commutative + associative, "src1 > src0 ? src1 : src0")
|
||||||
binop("umax", tuint, _2src_commutative + associative, "src1 > src0 ? src1 : src0")
|
binop("umax", tuint, _2src_commutative + associative, "src1 > src0 ? src1 : src0")
|
||||||
|
|
||||||
# Saturated vector add for 4 8bit ints.
|
|
||||||
binop("usadd_4x8", tint32, _2src_commutative + associative, """
|
|
||||||
dst = 0;
|
|
||||||
for (int i = 0; i < 32; i += 8) {
|
|
||||||
dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
|
|
||||||
}
|
|
||||||
""")
|
|
||||||
|
|
||||||
# Saturated vector subtract for 4 8bit ints.
|
|
||||||
binop("ussub_4x8", tint32, "", """
|
|
||||||
dst = 0;
|
|
||||||
for (int i = 0; i < 32; i += 8) {
|
|
||||||
int src0_chan = (src0 >> i) & 0xff;
|
|
||||||
int src1_chan = (src1 >> i) & 0xff;
|
|
||||||
if (src0_chan > src1_chan)
|
|
||||||
dst |= (src0_chan - src1_chan) << i;
|
|
||||||
}
|
|
||||||
""")
|
|
||||||
|
|
||||||
# vector min for 4 8bit ints.
|
|
||||||
binop("umin_4x8", tint32, _2src_commutative + associative, """
|
|
||||||
dst = 0;
|
|
||||||
for (int i = 0; i < 32; i += 8) {
|
|
||||||
dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
|
|
||||||
}
|
|
||||||
""")
|
|
||||||
|
|
||||||
# vector max for 4 8bit ints.
|
|
||||||
binop("umax_4x8", tint32, _2src_commutative + associative, """
|
|
||||||
dst = 0;
|
|
||||||
for (int i = 0; i < 32; i += 8) {
|
|
||||||
dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
|
|
||||||
}
|
|
||||||
""")
|
|
||||||
|
|
||||||
# unorm multiply: (a * b) / 255.
|
|
||||||
binop("umul_unorm_4x8", tint32, _2src_commutative + associative, """
|
|
||||||
dst = 0;
|
|
||||||
for (int i = 0; i < 32; i += 8) {
|
|
||||||
int src0_chan = (src0 >> i) & 0xff;
|
|
||||||
int src1_chan = (src1 >> i) & 0xff;
|
|
||||||
dst |= ((src0_chan * src1_chan) / 255) << i;
|
|
||||||
}
|
|
||||||
""")
|
|
||||||
|
|
||||||
binop("fpow", tfloat, "", "bit_size == 64 ? powf(src0, src1) : pow(src0, src1)")
|
binop("fpow", tfloat, "", "bit_size == 64 ? powf(src0, src1) : pow(src0, src1)")
|
||||||
|
|
||||||
binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32,
|
binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32,
|
||||||
@@ -1286,6 +1241,53 @@ binop("umul24_relaxed", tuint32, _2src_commutative + associative, "src0 * src1")
|
|||||||
unop_convert("fisnormal", tbool1, tfloat, "isnormal(src0)")
|
unop_convert("fisnormal", tbool1, tfloat, "isnormal(src0)")
|
||||||
unop_convert("fisfinite", tbool1, tfloat, "isfinite(src0)")
|
unop_convert("fisfinite", tbool1, tfloat, "isfinite(src0)")
|
||||||
|
|
||||||
|
# vc4-specific opcodes
|
||||||
|
|
||||||
|
# Saturated vector add for 4 8bit ints.
|
||||||
|
binop("usadd_4x8_vc4", tint32, _2src_commutative + associative, """
|
||||||
|
dst = 0;
|
||||||
|
for (int i = 0; i < 32; i += 8) {
|
||||||
|
dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Saturated vector subtract for 4 8bit ints.
|
||||||
|
binop("ussub_4x8_vc4", tint32, "", """
|
||||||
|
dst = 0;
|
||||||
|
for (int i = 0; i < 32; i += 8) {
|
||||||
|
int src0_chan = (src0 >> i) & 0xff;
|
||||||
|
int src1_chan = (src1 >> i) & 0xff;
|
||||||
|
if (src0_chan > src1_chan)
|
||||||
|
dst |= (src0_chan - src1_chan) << i;
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
|
||||||
|
# vector min for 4 8bit ints.
|
||||||
|
binop("umin_4x8_vc4", tint32, _2src_commutative + associative, """
|
||||||
|
dst = 0;
|
||||||
|
for (int i = 0; i < 32; i += 8) {
|
||||||
|
dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
|
||||||
|
# vector max for 4 8bit ints.
|
||||||
|
binop("umax_4x8_vc4", tint32, _2src_commutative + associative, """
|
||||||
|
dst = 0;
|
||||||
|
for (int i = 0; i < 32; i += 8) {
|
||||||
|
dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
|
||||||
|
# unorm multiply: (a * b) / 255.
|
||||||
|
binop("umul_unorm_4x8_vc4", tint32, _2src_commutative + associative, """
|
||||||
|
dst = 0;
|
||||||
|
for (int i = 0; i < 32; i += 8) {
|
||||||
|
int src0_chan = (src0 >> i) & 0xff;
|
||||||
|
int src1_chan = (src1 >> i) & 0xff;
|
||||||
|
dst |= ((src0_chan * src1_chan) / 255) << i;
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
|
||||||
# Mali-specific opcodes
|
# Mali-specific opcodes
|
||||||
unop("fsat_signed_mali", tfloat, ("fmin(fmax(src0, -1.0), 1.0)"))
|
unop("fsat_signed_mali", tfloat, ("fmin(fmax(src0, -1.0), 1.0)"))
|
||||||
unop("fclamp_pos_mali", tfloat, ("fmax(src0, 0.0)"))
|
unop("fclamp_pos_mali", tfloat, ("fmax(src0, 0.0)"))
|
||||||
|
@@ -133,8 +133,8 @@ optimizations = [
|
|||||||
(('fadd(is_only_used_as_float)', 'a@16', 0.0), a, '!'+signed_zero_inf_nan_preserve_16),
|
(('fadd(is_only_used_as_float)', 'a@16', 0.0), a, '!'+signed_zero_inf_nan_preserve_16),
|
||||||
(('fadd(is_only_used_as_float)', 'a@32', 0.0), a, '!'+signed_zero_inf_nan_preserve_32),
|
(('fadd(is_only_used_as_float)', 'a@32', 0.0), a, '!'+signed_zero_inf_nan_preserve_32),
|
||||||
(('iadd', a, 0), a),
|
(('iadd', a, 0), a),
|
||||||
(('usadd_4x8', a, 0), a),
|
(('usadd_4x8_vc4', a, 0), a),
|
||||||
(('usadd_4x8', a, ~0), ~0),
|
(('usadd_4x8_vc4', a, ~0), ~0),
|
||||||
(('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
|
(('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
|
||||||
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
|
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
|
||||||
(('iand', ('ior', a, b), ('ior', a, c)), ('ior', a, ('iand', b, c))),
|
(('iand', ('ior', a, b), ('ior', a, c)), ('ior', a, ('iand', b, c))),
|
||||||
@@ -151,8 +151,8 @@ optimizations = [
|
|||||||
(('fmul', 'a@16', 0.0), 0.0, '!'+signed_zero_inf_nan_preserve_16),
|
(('fmul', 'a@16', 0.0), 0.0, '!'+signed_zero_inf_nan_preserve_16),
|
||||||
(('fmul', 'a@32', 0.0), 0.0, '!'+signed_zero_inf_nan_preserve_32),
|
(('fmul', 'a@32', 0.0), 0.0, '!'+signed_zero_inf_nan_preserve_32),
|
||||||
(('imul', a, 0), 0),
|
(('imul', a, 0), 0),
|
||||||
(('umul_unorm_4x8', a, 0), 0),
|
(('umul_unorm_4x8_vc4', a, 0), 0),
|
||||||
(('umul_unorm_4x8', a, ~0), a),
|
(('umul_unorm_4x8_vc4', a, ~0), a),
|
||||||
(('~fmul', a, 1.0), a),
|
(('~fmul', a, 1.0), a),
|
||||||
# The only effect a*1.0 can have is flushing denormals. If it's only used by
|
# The only effect a*1.0 can have is flushing denormals. If it's only used by
|
||||||
# a floating point instruction, they should flush any input denormals and
|
# a floating point instruction, they should flush any input denormals and
|
||||||
@@ -1333,8 +1333,8 @@ for op in ('extract_u8', 'extract_i8'):
|
|||||||
|
|
||||||
optimizations.extend([
|
optimizations.extend([
|
||||||
# Subtracts
|
# Subtracts
|
||||||
(('ussub_4x8', a, 0), a),
|
(('ussub_4x8_vc4', a, 0), a),
|
||||||
(('ussub_4x8', a, ~0), 0),
|
(('ussub_4x8_vc4', a, ~0), 0),
|
||||||
# Lower all Subtractions first - they can get recombined later
|
# Lower all Subtractions first - they can get recombined later
|
||||||
(('fsub', a, b), ('fadd', a, ('fneg', b))),
|
(('fsub', a, b), ('fadd', a, ('fneg', b))),
|
||||||
(('isub', a, b), ('iadd', a, ('ineg', b))),
|
(('isub', a, b), ('iadd', a, ('ineg', b))),
|
||||||
|
@@ -159,7 +159,7 @@ vc4_blend_channel_i(nir_builder *b,
|
|||||||
return dst;
|
return dst;
|
||||||
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
|
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
|
||||||
return vc4_nir_set_packed_chan(b,
|
return vc4_nir_set_packed_chan(b,
|
||||||
nir_umin_4x8(b,
|
nir_umin_4x8_vc4(b,
|
||||||
src_a,
|
src_a,
|
||||||
nir_inot(b, dst_a)),
|
nir_inot(b, dst_a)),
|
||||||
nir_imm_int(b, ~0),
|
nir_imm_int(b, ~0),
|
||||||
@@ -226,15 +226,15 @@ vc4_blend_func_i(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
|
|||||||
{
|
{
|
||||||
switch (func) {
|
switch (func) {
|
||||||
case PIPE_BLEND_ADD:
|
case PIPE_BLEND_ADD:
|
||||||
return nir_usadd_4x8(b, src, dst);
|
return nir_usadd_4x8_vc4(b, src, dst);
|
||||||
case PIPE_BLEND_SUBTRACT:
|
case PIPE_BLEND_SUBTRACT:
|
||||||
return nir_ussub_4x8(b, src, dst);
|
return nir_ussub_4x8_vc4(b, src, dst);
|
||||||
case PIPE_BLEND_REVERSE_SUBTRACT:
|
case PIPE_BLEND_REVERSE_SUBTRACT:
|
||||||
return nir_ussub_4x8(b, dst, src);
|
return nir_ussub_4x8_vc4(b, dst, src);
|
||||||
case PIPE_BLEND_MIN:
|
case PIPE_BLEND_MIN:
|
||||||
return nir_umin_4x8(b, src, dst);
|
return nir_umin_4x8_vc4(b, src, dst);
|
||||||
case PIPE_BLEND_MAX:
|
case PIPE_BLEND_MAX:
|
||||||
return nir_umax_4x8(b, src, dst);
|
return nir_umax_4x8_vc4(b, src, dst);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
/* Unsupported. */
|
/* Unsupported. */
|
||||||
@@ -353,8 +353,8 @@ vc4_do_blending_i(struct vc4_compile *c, nir_builder *b,
|
|||||||
dst_alpha_factor,
|
dst_alpha_factor,
|
||||||
alpha_chan);
|
alpha_chan);
|
||||||
}
|
}
|
||||||
nir_ssa_def *src_blend = nir_umul_unorm_4x8(b, src_color, src_factor);
|
nir_ssa_def *src_blend = nir_umul_unorm_4x8_vc4(b, src_color, src_factor);
|
||||||
nir_ssa_def *dst_blend = nir_umul_unorm_4x8(b, dst_color, dst_factor);
|
nir_ssa_def *dst_blend = nir_umul_unorm_4x8_vc4(b, dst_color, dst_factor);
|
||||||
|
|
||||||
nir_ssa_def *result =
|
nir_ssa_def *result =
|
||||||
vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
|
vc4_blend_func_i(b, src_blend, dst_blend, blend->rgb_func);
|
||||||
|
@@ -1276,23 +1276,23 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
|
|||||||
result = ntq_emit_ubfe(c, src[0], src[1], src[2]);
|
result = ntq_emit_ubfe(c, src[0], src[1], src[2]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_usadd_4x8:
|
case nir_op_usadd_4x8_vc4:
|
||||||
result = qir_V8ADDS(c, src[0], src[1]);
|
result = qir_V8ADDS(c, src[0], src[1]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_ussub_4x8:
|
case nir_op_ussub_4x8_vc4:
|
||||||
result = qir_V8SUBS(c, src[0], src[1]);
|
result = qir_V8SUBS(c, src[0], src[1]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_umin_4x8:
|
case nir_op_umin_4x8_vc4:
|
||||||
result = qir_V8MIN(c, src[0], src[1]);
|
result = qir_V8MIN(c, src[0], src[1]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_umax_4x8:
|
case nir_op_umax_4x8_vc4:
|
||||||
result = qir_V8MAX(c, src[0], src[1]);
|
result = qir_V8MAX(c, src[0], src[1]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_umul_unorm_4x8:
|
case nir_op_umul_unorm_4x8_vc4:
|
||||||
result = qir_V8MULD(c, src[0], src[1]);
|
result = qir_V8MULD(c, src[0], src[1]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user