zink: do not open-code vector-compares
We already have code to lower away these to something we don't need so much special-case code to handle. The sad part here is that we generate slightly worse code; trees of OpLogicalAnd / OpLogicalOr instead of OpAny OpAll. But I would imagine that for any GPU where this matters, these would be easy to combine back, so I'm not losing a lot of sleep over this. But this makes things simpler. And if we *really* care about OpAny or OpAll, we should add NIR ALU instructions for them so we can optimize them on other places as well, not open-code all places where it could improve things. Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9797>
This commit is contained in:

committed by
Marge Bot

parent
296a2fb350
commit
e87ff43fb3
@@ -1532,11 +1532,8 @@ static void
|
||||
emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
|
||||
{
|
||||
SpvId src[nir_op_infos[alu->op].num_inputs];
|
||||
unsigned in_bit_sizes[nir_op_infos[alu->op].num_inputs];
|
||||
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
|
||||
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++)
|
||||
src[i] = get_alu_src(ctx, alu, i);
|
||||
in_bit_sizes[i] = nir_src_bit_size(alu->src[i].src);
|
||||
}
|
||||
|
||||
SpvId dest_type = get_dest_type(ctx, &alu->dest.dest,
|
||||
nir_op_infos[alu->op].output_type);
|
||||
@@ -1743,64 +1740,6 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
|
||||
result = emit_select(ctx, dest_type, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
case nir_op_bany_fnequal2:
|
||||
case nir_op_bany_fnequal3:
|
||||
case nir_op_bany_fnequal4: {
|
||||
assert(nir_op_infos[alu->op].num_inputs == 2);
|
||||
assert(alu_instr_src_components(alu, 0) ==
|
||||
alu_instr_src_components(alu, 1));
|
||||
result = emit_binop(ctx, SpvOpFUnordNotEqual,
|
||||
get_bvec_type(ctx, alu_instr_src_components(alu, 0)),
|
||||
src[0], src[1]);
|
||||
result = emit_unop(ctx, SpvOpAny, dest_type, result);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_ball_fequal2:
|
||||
case nir_op_ball_fequal3:
|
||||
case nir_op_ball_fequal4: {
|
||||
assert(nir_op_infos[alu->op].num_inputs == 2);
|
||||
assert(alu_instr_src_components(alu, 0) ==
|
||||
alu_instr_src_components(alu, 1));
|
||||
result = emit_binop(ctx, SpvOpFOrdEqual,
|
||||
get_bvec_type(ctx, alu_instr_src_components(alu, 0)),
|
||||
src[0], src[1]);
|
||||
result = emit_unop(ctx, SpvOpAll, dest_type, result);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_bany_inequal2:
|
||||
case nir_op_bany_inequal3:
|
||||
case nir_op_bany_inequal4: {
|
||||
assert(nir_op_infos[alu->op].num_inputs == 2);
|
||||
assert(alu_instr_src_components(alu, 0) ==
|
||||
alu_instr_src_components(alu, 1));
|
||||
assert(in_bit_sizes[0] == in_bit_sizes[1]);
|
||||
/* The type of Operand 1 and Operand 2 must be a scalar or vector of integer type. */
|
||||
SpvOp op = in_bit_sizes[0] == 1 ? SpvOpLogicalNotEqual : SpvOpINotEqual;
|
||||
result = emit_binop(ctx, op,
|
||||
get_bvec_type(ctx, alu_instr_src_components(alu, 0)),
|
||||
src[0], src[1]);
|
||||
result = emit_unop(ctx, SpvOpAny, dest_type, result);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_ball_iequal2:
|
||||
case nir_op_ball_iequal3:
|
||||
case nir_op_ball_iequal4: {
|
||||
assert(nir_op_infos[alu->op].num_inputs == 2);
|
||||
assert(alu_instr_src_components(alu, 0) ==
|
||||
alu_instr_src_components(alu, 1));
|
||||
assert(in_bit_sizes[0] == in_bit_sizes[1]);
|
||||
/* The type of Operand 1 and Operand 2 must be a scalar or vector of integer type. */
|
||||
SpvOp op = in_bit_sizes[0] == 1 ? SpvOpLogicalEqual : SpvOpIEqual;
|
||||
result = emit_binop(ctx, op,
|
||||
get_bvec_type(ctx, alu_instr_src_components(alu, 0)),
|
||||
src[0], src[1]);
|
||||
result = emit_unop(ctx, SpvOpAll, dest_type, result);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_vec2:
|
||||
case nir_op_vec3:
|
||||
case nir_op_vec4: {
|
||||
|
@@ -328,6 +328,7 @@ zink_screen_init_compiler(struct zink_screen *screen)
|
||||
.lower_uadd_carry = true,
|
||||
.lower_pack_64_2x32_split = true,
|
||||
.lower_unpack_64_2x32_split = true,
|
||||
.lower_vector_cmp = true,
|
||||
.use_scoped_barrier = true,
|
||||
.lower_int64_options = 0,
|
||||
.lower_doubles_options = ~nir_lower_fp64_full_software,
|
||||
|
Reference in New Issue
Block a user