zink: do not open-code vector-compares

We already have code to lower away these to something we don't need so
much special-case code to handle.

The sad part here is that we generate slightly worse code; trees of
OpLogicalAnd / OpLogicalOr instead of OpAny OpAll. But I would imagine
that for any GPU where this matters, these would be easy to combine
back, so I'm not losing a lot of sleep over this.

But this makes things simpler. And if we *really* care about OpAny or
OpAll, we should add NIR ALU instructions for them so we can optimize
them on other places as well, not open-code all places where it could
improve things.

Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9797>
This commit is contained in:
Erik Faye-Lund
2021-03-23 18:01:41 +01:00
committed by Marge Bot
parent 296a2fb350
commit e87ff43fb3
2 changed files with 2 additions and 62 deletions

View File

@@ -1532,11 +1532,8 @@ static void
emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
{
SpvId src[nir_op_infos[alu->op].num_inputs];
unsigned in_bit_sizes[nir_op_infos[alu->op].num_inputs];
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++)
src[i] = get_alu_src(ctx, alu, i);
in_bit_sizes[i] = nir_src_bit_size(alu->src[i].src);
}
SpvId dest_type = get_dest_type(ctx, &alu->dest.dest,
nir_op_infos[alu->op].output_type);
@@ -1743,64 +1740,6 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
result = emit_select(ctx, dest_type, src[0], src[1], src[2]);
break;
case nir_op_bany_fnequal2:
case nir_op_bany_fnequal3:
case nir_op_bany_fnequal4: {
assert(nir_op_infos[alu->op].num_inputs == 2);
assert(alu_instr_src_components(alu, 0) ==
alu_instr_src_components(alu, 1));
result = emit_binop(ctx, SpvOpFUnordNotEqual,
get_bvec_type(ctx, alu_instr_src_components(alu, 0)),
src[0], src[1]);
result = emit_unop(ctx, SpvOpAny, dest_type, result);
break;
}
case nir_op_ball_fequal2:
case nir_op_ball_fequal3:
case nir_op_ball_fequal4: {
assert(nir_op_infos[alu->op].num_inputs == 2);
assert(alu_instr_src_components(alu, 0) ==
alu_instr_src_components(alu, 1));
result = emit_binop(ctx, SpvOpFOrdEqual,
get_bvec_type(ctx, alu_instr_src_components(alu, 0)),
src[0], src[1]);
result = emit_unop(ctx, SpvOpAll, dest_type, result);
break;
}
case nir_op_bany_inequal2:
case nir_op_bany_inequal3:
case nir_op_bany_inequal4: {
assert(nir_op_infos[alu->op].num_inputs == 2);
assert(alu_instr_src_components(alu, 0) ==
alu_instr_src_components(alu, 1));
assert(in_bit_sizes[0] == in_bit_sizes[1]);
/* The type of Operand 1 and Operand 2 must be a scalar or vector of integer type. */
SpvOp op = in_bit_sizes[0] == 1 ? SpvOpLogicalNotEqual : SpvOpINotEqual;
result = emit_binop(ctx, op,
get_bvec_type(ctx, alu_instr_src_components(alu, 0)),
src[0], src[1]);
result = emit_unop(ctx, SpvOpAny, dest_type, result);
break;
}
case nir_op_ball_iequal2:
case nir_op_ball_iequal3:
case nir_op_ball_iequal4: {
assert(nir_op_infos[alu->op].num_inputs == 2);
assert(alu_instr_src_components(alu, 0) ==
alu_instr_src_components(alu, 1));
assert(in_bit_sizes[0] == in_bit_sizes[1]);
/* The type of Operand 1 and Operand 2 must be a scalar or vector of integer type. */
SpvOp op = in_bit_sizes[0] == 1 ? SpvOpLogicalEqual : SpvOpIEqual;
result = emit_binop(ctx, op,
get_bvec_type(ctx, alu_instr_src_components(alu, 0)),
src[0], src[1]);
result = emit_unop(ctx, SpvOpAll, dest_type, result);
break;
}
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4: {

View File

@@ -328,6 +328,7 @@ zink_screen_init_compiler(struct zink_screen *screen)
.lower_uadd_carry = true,
.lower_pack_64_2x32_split = true,
.lower_unpack_64_2x32_split = true,
.lower_vector_cmp = true,
.use_scoped_barrier = true,
.lower_int64_options = 0,
.lower_doubles_options = ~nir_lower_fp64_full_software,