From e87ff43fb38acbf74e53495a08f0b1d52b7956d8 Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Tue, 23 Mar 2021 18:01:41 +0100 Subject: [PATCH] zink: do not open-code vector-compares We already have code to lower away these to something we don't need so much special-case code to handle. The sad part here is that we generate slightly worse code; trees of OpLogicalAnd / OpLogicalOr instead of OpAny OpAll. But I would imagine that for any GPU where this matters, these would be easy to combine back, so I'm not losing a lot of sleep over this. But this makes things simpler. And if we *really* care about OpAny or OpAll, we should add NIR ALU instructions for them so we can optimize them on other places as well, not open-code all places where it could improve things. Reviewed-By: Mike Blumenkrantz Part-of: --- .../drivers/zink/nir_to_spirv/nir_to_spirv.c | 63 +------------------ src/gallium/drivers/zink/zink_compiler.c | 1 + 2 files changed, 2 insertions(+), 62 deletions(-) diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index 9bb2ecbc98c..953b42b5c64 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -1532,11 +1532,8 @@ static void emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) { SpvId src[nir_op_infos[alu->op].num_inputs]; - unsigned in_bit_sizes[nir_op_infos[alu->op].num_inputs]; - for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) src[i] = get_alu_src(ctx, alu, i); - in_bit_sizes[i] = nir_src_bit_size(alu->src[i].src); - } SpvId dest_type = get_dest_type(ctx, &alu->dest.dest, nir_op_infos[alu->op].output_type); @@ -1743,64 +1740,6 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) result = emit_select(ctx, dest_type, src[0], src[1], src[2]); break; - case nir_op_bany_fnequal2: - case nir_op_bany_fnequal3: - case nir_op_bany_fnequal4: { - assert(nir_op_infos[alu->op].num_inputs == 2); - assert(alu_instr_src_components(alu, 0) == - alu_instr_src_components(alu, 1)); - result = emit_binop(ctx, SpvOpFUnordNotEqual, - get_bvec_type(ctx, alu_instr_src_components(alu, 0)), - src[0], src[1]); - result = emit_unop(ctx, SpvOpAny, dest_type, result); - break; - } - - case nir_op_ball_fequal2: - case nir_op_ball_fequal3: - case nir_op_ball_fequal4: { - assert(nir_op_infos[alu->op].num_inputs == 2); - assert(alu_instr_src_components(alu, 0) == - alu_instr_src_components(alu, 1)); - result = emit_binop(ctx, SpvOpFOrdEqual, - get_bvec_type(ctx, alu_instr_src_components(alu, 0)), - src[0], src[1]); - result = emit_unop(ctx, SpvOpAll, dest_type, result); - break; - } - - case nir_op_bany_inequal2: - case nir_op_bany_inequal3: - case nir_op_bany_inequal4: { - assert(nir_op_infos[alu->op].num_inputs == 2); - assert(alu_instr_src_components(alu, 0) == - alu_instr_src_components(alu, 1)); - assert(in_bit_sizes[0] == in_bit_sizes[1]); - /* The type of Operand 1 and Operand 2 must be a scalar or vector of integer type. */ - SpvOp op = in_bit_sizes[0] == 1 ? SpvOpLogicalNotEqual : SpvOpINotEqual; - result = emit_binop(ctx, op, - get_bvec_type(ctx, alu_instr_src_components(alu, 0)), - src[0], src[1]); - result = emit_unop(ctx, SpvOpAny, dest_type, result); - break; - } - - case nir_op_ball_iequal2: - case nir_op_ball_iequal3: - case nir_op_ball_iequal4: { - assert(nir_op_infos[alu->op].num_inputs == 2); - assert(alu_instr_src_components(alu, 0) == - alu_instr_src_components(alu, 1)); - assert(in_bit_sizes[0] == in_bit_sizes[1]); - /* The type of Operand 1 and Operand 2 must be a scalar or vector of integer type. */ - SpvOp op = in_bit_sizes[0] == 1 ? SpvOpLogicalEqual : SpvOpIEqual; - result = emit_binop(ctx, op, - get_bvec_type(ctx, alu_instr_src_components(alu, 0)), - src[0], src[1]); - result = emit_unop(ctx, SpvOpAll, dest_type, result); - break; - } - case nir_op_vec2: case nir_op_vec3: case nir_op_vec4: { diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index fc2c74072e2..09d90dee267 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -328,6 +328,7 @@ zink_screen_init_compiler(struct zink_screen *screen) .lower_uadd_carry = true, .lower_pack_64_2x32_split = true, .lower_unpack_64_2x32_split = true, + .lower_vector_cmp = true, .use_scoped_barrier = true, .lower_int64_options = 0, .lower_doubles_options = ~nir_lower_fp64_full_software,