i965/vs: Fix multiplies to actually do 32-bit multiplies.

Fixes vs-op-mult-int-int and friends.
2011-08-15 21:02:10 -07:00
parent 7bf70c29ad
commit 0ddf0f1c34
2 changed files with 22 additions and 1 deletions
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -633,6 +633,11 @@ vec4_visitor::generate_code()
      case BRW_OPCODE_MUL:
 	 brw_MUL(p, dst, src[0], src[1]);
 	 break;
+      case BRW_OPCODE_MACH:
+	 brw_set_acc_write_control(p, 1);
+	 brw_MACH(p, dst, src[0], src[1]);
+	 brw_set_acc_write_control(p, 0);
+	 break;

      case BRW_OPCODE_FRC:
 	 brw_FRC(p, dst, src[0]);
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -945,7 +945,23 @@ vec4_visitor::visit(ir_expression *ir)
      break;

   case ir_binop_mul:
-      emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
+      if (ir->type->is_integer()) {
+	 /* For integer multiplication, the MUL uses the low 16 bits
+	  * of one of the operands (src0 on gen6, src1 on gen7).  The
+	  * MACH accumulates in the contribution of the upper 16 bits
+	  * of that operand.
+	  *
+	  * FINISHME: Emit just the MUL if we know an operand is small
+	  * enough.
+	  */
+	 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+
+	 emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
+	 emit(BRW_OPCODE_MACH, dst_null_d(), op[0], op[1]);
+	 emit(BRW_OPCODE_MOV, result_dst, src_reg(acc));
+      } else {
+	 emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
+      }
      break;
   case ir_binop_div:
      assert(!"not reached: should be handled by ir_div_to_mul_rcp");