ir3/cf: Rewrite pass

The old pass had a few bugs: - It tried to avoid folding f2f32 into f2f16, but didn't consider conversions that were already folded in. - It didn't prevent folding an f2f16 or f2f32 into a non-floating-point op. In addition it wasn't written in a manner which made handling integer conversions practical. This rewrites the pass to instead calculate the "type" of the conversion source and then check whether folding the conversion is allowed. This allows us to cleanly separate the declarative part where we describe how the HW works from the policy part where we decide whether the transform is allowed, and makes it simple to add support for folding integer conversions. Closes: #3208 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10859>
2021-05-17 18:57:58 +02:00
parent 17861aff96
commit e894e83e47
3 changed files with 187 additions and 77 deletions
--- a/src/freedreno/ci/deqp-freedreno-a630-fails.txt
+++ b/src/freedreno/ci/deqp-freedreno-a630-fails.txt
@@ -162,36 +162,11 @@ dEQP-VK.spirv_assembly.instruction.compute.opcopymemory.array,Fail
 dEQP-VK.spirv_assembly.instruction.compute.opquantize.infinities,Fail

 # https://gitlab.freedesktop.org/mesa/mesa/-/issues/3208
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_bit_geom,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_bit_tessc,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_to_exponent_frag,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_to_exponent_tesse,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_to_exponent_vert,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_round_up_or_round_down_frag,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_round_up_or_round_down_tesse,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_round_up_or_round_down_vert,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_too_small_frag,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_too_small_tesse,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_too_small_vert,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.positive_round_up_or_round_down_geom,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.positive_round_up_or_round_down_tessc,Fail
 dEQP-VK.spirv_assembly.instruction.graphics.opquantize.round_to_inf_frag,Fail
 dEQP-VK.spirv_assembly.instruction.graphics.opquantize.round_to_inf_tesse,Fail
 dEQP-VK.spirv_assembly.instruction.graphics.opquantize.round_to_inf_vert,Fail
 dEQP-VK.spirv_assembly.instruction.graphics.opquantize.round_to_negative_inf_geom,Fail
 dEQP-VK.spirv_assembly.instruction.graphics.opquantize.round_to_negative_inf_tessc,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_carry_bit_geom,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_carry_bit_tessc,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_carry_to_exponent_frag,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_carry_to_exponent_tesse,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_carry_to_exponent_vert,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_negative_round_up_or_round_down_frag,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_negative_round_up_or_round_down_tesse,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_negative_round_up_or_round_down_vert,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_positive_round_up_or_round_down_geom,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_positive_round_up_or_round_down_tessc,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.too_small_geom,Fail
-dEQP-VK.spirv_assembly.instruction.graphics.opquantize.too_small_tessc,Fail

 dEQP-VK.spirv_assembly.instruction.graphics.variable_pointers.graphics.writes_single_buffer_geom,Fail
 dEQP-VK.spirv_assembly.instruction.graphics.variable_pointers.graphics.writes_two_buffers_geom,Fail
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -1159,6 +1159,137 @@ static inline unsigned ir3_cat3_absneg(opc_t opc)
 	}
 }

+/* Return the type (float, int, or uint) the op uses when converting from the
+ * internal result of the op (which is assumed to be the same size as the
+ * sources) to the destination when they are not the same size. If F32 it does
+ * a floating-point conversion, if U32 it does a truncation/zero-extension, if
+ * S32 it does a truncation/sign-extension. "can_fold" will be false if it
+ * doesn't do anything sensible or is unknown.
+ */
+static inline type_t
+ir3_output_conv_type(struct ir3_instruction *instr, bool *can_fold)
+{
+	*can_fold = true;
+	switch (instr->opc) {
+	case OPC_ADD_F:
+	case OPC_MUL_F:
+	case OPC_BARY_F:
+	case OPC_MAD_F32:
+	case OPC_MAD_F16:
+		return TYPE_F32;
+
+	case OPC_ADD_U:
+	case OPC_SUB_U:
+	case OPC_MIN_U:
+	case OPC_MAX_U:
+	case OPC_AND_B:
+	case OPC_OR_B:
+	case OPC_NOT_B:
+	case OPC_XOR_B:
+	case OPC_MUL_U24:
+	case OPC_MULL_U:
+	case OPC_SHL_B:
+	case OPC_SHR_B:
+	case OPC_ASHR_B:
+	case OPC_MAD_U24:
+	/* Comparison ops zero-extend/truncate their results, so consider them as
+	 * unsigned here.
+	 */
+	case OPC_CMPS_F:
+	case OPC_CMPV_F:
+	case OPC_CMPS_U:
+	case OPC_CMPS_S:
+		return TYPE_U32;
+
+	case OPC_ADD_S:
+	case OPC_SUB_S:
+	case OPC_MIN_S:
+	case OPC_MAX_S:
+	case OPC_ABSNEG_S:
+	case OPC_MUL_S24:
+	case OPC_MAD_S24:
+		return TYPE_S32;
+
+	/* We assume that any move->move folding that could be done was done by
+	 * NIR.
+	 */
+	case OPC_MOV:
+	default:
+		*can_fold = false;
+		return TYPE_U32;
+	}
+}
+
+/* Return the src and dst types for the conversion which is already folded
+ * into the op. We can assume that instr has folded in a conversion from
+ * ir3_output_conv_src_type() to ir3_output_conv_dst_type(). Only makes sense
+ * to call if ir3_output_conv_type() returns can_fold = true.
+ */
+static inline type_t
+ir3_output_conv_src_type(struct ir3_instruction *instr, type_t base_type)
+{
+	switch (instr->opc) {
+	case OPC_CMPS_F:
+	case OPC_CMPV_F:
+	case OPC_CMPS_U:
+	case OPC_CMPS_S:
+		/* Comparisons only return 0/1 and the size of the comparison sources
+		 * is irrelevant, never consider them as having an output conversion
+		 * by returning a type with the dest size here:
+		 */
+		return (instr->regs[0]->flags & IR3_REG_HALF) ? half_type(base_type) :
+			full_type(base_type);
+
+	case OPC_BARY_F:
+		/* bary.f doesn't have an explicit source, but we can assume here that
+		 * the varying data it reads is in fp32.
+		 *
+		 * This may be fp16 on older gen's depending on some register
+		 * settings, but it's probably not worth plumbing that through for a
+		 * small improvement that NIR would hopefully handle for us anyway.
+		 */
+		return TYPE_F32;
+
+	default:
+		return (instr->regs[1]->flags & IR3_REG_HALF) ? half_type(base_type) :
+			full_type(base_type);
+	}
+}
+
+static inline type_t
+ir3_output_conv_dst_type(struct ir3_instruction *instr, type_t base_type)
+{
+	return (instr->regs[0]->flags & IR3_REG_HALF) ? half_type(base_type) :
+		full_type(base_type);
+}
+
+/* Some instructions have signed/unsigned variants which are identical except
+ * for whether the folded conversion sign-extends or zero-extends, and we can
+ * fold in a mismatching move by rewriting the opcode. Return the opcode to
+ * switch signedness, and whether one exists.
+ */
+static inline opc_t
+ir3_try_swap_signedness(opc_t opc, bool *can_swap)
+{
+	switch (opc) {
+#define PAIR(u, s)		\
+	case OPC_##u:		\
+		return OPC_##s;	\
+	case OPC_##s:		\
+		return OPC_##u;
+	PAIR(ADD_U, ADD_S)
+	PAIR(SUB_U, SUB_S)
+	/* Note: these are only identical when the sources are half, but that's
+	 * the only case we call this function for anyway.
+	 */
+	PAIR(MUL_U24, MUL_S24)
+
+	default:
+		*can_swap = false;
+		return opc;
+	}
+}
+
 #define MASK(n) ((1 << (n)) - 1)

 /* iterator for an instructions's sources (reg), also returns src #: */
--- a/src/freedreno/ir3/ir3_cf.c
+++ b/src/freedreno/ir3/ir3_cf.c
@@ -26,11 +26,19 @@
 #include "ir3.h"

 static bool
-is_fp16_conv(struct ir3_instruction *instr)
+is_safe_conv(struct ir3_instruction *instr, type_t src_type,
+			 opc_t *src_opc)
 {
 	if (instr->opc != OPC_MOV)
 		return false;

+	/* Only allow half->full or full->half without any type conversion (like
+	 * int to float).
+	 */
+	if (type_size(instr->cat1.src_type) == type_size(instr->cat1.dst_type) ||
+		full_type(instr->cat1.src_type) != full_type(instr->cat1.dst_type))
+		return false;
+
 	struct ir3_register *dst = instr->regs[0];
 	struct ir3_register *src = instr->regs[1];

@@ -45,23 +53,47 @@ is_fp16_conv(struct ir3_instruction *instr)
 	if (src->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
 		return false;

-	if (instr->cat1.src_type == TYPE_F32 &&
-			instr->cat1.dst_type == TYPE_F16)
+	/* Check that the source of the conv matches the type of the src
+	 * instruction.
+	 */
+	if (src_type == instr->cat1.src_type)
 		return true;

-	if (instr->cat1.src_type == TYPE_F16 &&
-			instr->cat1.dst_type == TYPE_F32)
+	/* We can handle mismatches with integer types by converting the opcode
+	 * but not when an integer is reinterpreted as a float or vice-versa.
+	 */
+	if (type_float(src_type) != type_float(instr->cat1.src_type))
+		return false;
+
+	/* We have types with mismatched signedness. Mismatches on the signedness
+	 * don't matter when narrowing:
+	 */
+	if (type_size(instr->cat1.dst_type) < type_size(instr->cat1.src_type))
 		return true;

-	return false;
+	/* Try swapping the opcode: */
+	bool can_swap = true;
+	*src_opc = ir3_try_swap_signedness(*src_opc, &can_swap);
+	return can_swap;
 }

 static bool
-all_uses_fp16_conv(struct ir3_instruction *conv_src)
+all_uses_safe_conv(struct ir3_instruction *conv_src, type_t src_type)
 {
-	foreach_ssa_use (use, conv_src)
-		if (!is_fp16_conv(use))
+	opc_t opc = conv_src->opc;
+	bool first = true;
+	foreach_ssa_use (use, conv_src) {
+		opc_t new_opc = opc;
+		if (!is_safe_conv(use, src_type, &new_opc))
 			return false;
+		/* Check if multiple uses have conflicting requirements on the opcode.
+		 */
+		if (!first && opc != new_opc)
+			return false;
+		first = false;
+		opc = new_opc;
+	}
+	conv_src->opc = opc;
 	return true;
 }

@@ -74,7 +106,7 @@ static void
 rewrite_src_uses(struct ir3_instruction *src)
 {
 	foreach_ssa_use (use, src) {
-		assert(is_fp16_conv(use));
+		assert(use->opc == OPC_MOV);

 		if (is_half(src)) {
 			use->regs[1]->flags |= IR3_REG_HALF;
@@ -91,7 +123,7 @@ try_conversion_folding(struct ir3_instruction *conv)
 {
 	struct ir3_instruction *src;

-	if (!is_fp16_conv(conv))
+	if (conv->opc != OPC_MOV)
 		return false;

 	/* NOTE: we can have non-ssa srcs after copy propagation: */
@@ -102,51 +134,23 @@ try_conversion_folding(struct ir3_instruction *conv)
 	if (!is_alu(src))
 		return false;

-	/* avoid folding f2f32(f2f16) together, in cases where this is legal to
-	 * do (glsl) nir should have handled that for us already:
+	bool can_fold;
+	type_t base_type = ir3_output_conv_type(src, &can_fold);
+	if (!can_fold)
+		return false;
+
+	type_t src_type = ir3_output_conv_src_type(src, base_type);
+	type_t dst_type = ir3_output_conv_dst_type(src, base_type);
+
+	/* Avoid cases where we've already folded in a conversion. We assume that
+	 * if there is a chain of conversions that's foldable then it's been
+	 * folded in NIR already.
 	 */
-	if (is_fp16_conv(src))
+	if (src_type != dst_type)
 		return false;

-	switch (src->opc) {
-	case OPC_SEL_B32:
-	case OPC_SEL_B16:
-	case OPC_MAX_F:
-	case OPC_MIN_F:
-	case OPC_SIGN_F:
-	case OPC_ABSNEG_F:
+	if (!all_uses_safe_conv(src, src_type))
 		return false;
-	case OPC_MOV:
-		/* if src is a "cov" and type doesn't match, then it can't be folded
-		 * for example cov.u32u16+cov.f16f32 can't be folded to cov.u32f32
-		 */
-		if (src->cat1.dst_type != src->cat1.src_type &&
-			conv->cat1.src_type != src->cat1.dst_type)
-			return false;
-		break;
-	default:
-		break;
-	}
-
-	if (!all_uses_fp16_conv(src))
-		return false;
-
-	if (src->opc == OPC_MOV) {
-		if (src->cat1.dst_type == src->cat1.src_type) {
-			/* If we're folding a conversion into a bitwise move, we need to
-			 * change the dst type to F32 to get the right behavior, since we
-			 * could be moving a float with a u32.u32 move.
-			 */
-			src->cat1.dst_type = conv->cat1.dst_type;
-			src->cat1.src_type = conv->cat1.src_type;
-		} else {
-			/* Otherwise, for typechanging movs, we can just change the dst
-			 * type to F16 to collaps the two conversions.  For example
-			 * cov.s32f32 follwed by cov.f32f16 becomes cov.s32f16.
-			 */
-			src->cat1.dst_type = conv->cat1.dst_type;
-		}
-	}

 	ir3_set_dst_type(src, is_half(conv));
 	rewrite_src_uses(src);