intel/fs: Handle source modifiers in lower_integer_multiplication().
lower_integer_multiplication() implements 32x32-bit multiplication on some platforms by bit-casting one of the 32-bit sources into two 16-bit unsigned integer portions. This can give incorrect results if the original instruction specified a source modifier. Fix it by emitting an additional MOV instruction implementing the source modifiers where necessary. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
This commit is contained in:
@@ -3862,6 +3862,9 @@ fs_visitor::lower_integer_multiplication()
|
|||||||
high.offset = inst->dst.offset % REG_SIZE;
|
high.offset = inst->dst.offset % REG_SIZE;
|
||||||
|
|
||||||
if (devinfo->gen >= 7) {
|
if (devinfo->gen >= 7) {
|
||||||
|
if (inst->src[1].abs)
|
||||||
|
lower_src_modifiers(this, block, inst, 1);
|
||||||
|
|
||||||
if (inst->src[1].file == IMM) {
|
if (inst->src[1].file == IMM) {
|
||||||
ibld.MUL(low, inst->src[0],
|
ibld.MUL(low, inst->src[0],
|
||||||
brw_imm_uw(inst->src[1].ud & 0xffff));
|
brw_imm_uw(inst->src[1].ud & 0xffff));
|
||||||
@@ -3874,6 +3877,9 @@ fs_visitor::lower_integer_multiplication()
|
|||||||
subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 1));
|
subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 1));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if (inst->src[0].abs)
|
||||||
|
lower_src_modifiers(this, block, inst, 0);
|
||||||
|
|
||||||
ibld.MUL(low, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 0),
|
ibld.MUL(low, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 0),
|
||||||
inst->src[1]);
|
inst->src[1]);
|
||||||
ibld.MUL(high, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 1),
|
ibld.MUL(high, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 1),
|
||||||
@@ -3891,6 +3897,18 @@ fs_visitor::lower_integer_multiplication()
|
|||||||
}
|
}
|
||||||
|
|
||||||
} else if (inst->opcode == SHADER_OPCODE_MULH) {
|
} else if (inst->opcode == SHADER_OPCODE_MULH) {
|
||||||
|
/* According to the BDW+ BSpec page for the "Multiply Accumulate
|
||||||
|
* High" instruction:
|
||||||
|
*
|
||||||
|
* "An added preliminary mov is required for source modification on
|
||||||
|
* src1:
|
||||||
|
* mov (8) r3.0<1>:d -r3<8;8,1>:d
|
||||||
|
* mul (8) acc0:d r2.0<8;8,1>:d r3.0<16;8,2>:uw
|
||||||
|
* mach (8) r5.0<1>:d r2.0<8;8,1>:d r3.0<8;8,1>:d"
|
||||||
|
*/
|
||||||
|
if (devinfo->gen >= 8 && (inst->src[1].negate || inst->src[1].abs))
|
||||||
|
lower_src_modifiers(this, block, inst, 1);
|
||||||
|
|
||||||
/* Should have been lowered to 8-wide. */
|
/* Should have been lowered to 8-wide. */
|
||||||
assert(inst->exec_size <= get_lowered_simd_width(devinfo, inst));
|
assert(inst->exec_size <= get_lowered_simd_width(devinfo, inst));
|
||||||
const fs_reg acc = retype(brw_acc_reg(inst->exec_size),
|
const fs_reg acc = retype(brw_acc_reg(inst->exec_size),
|
||||||
@@ -3906,8 +3924,6 @@ fs_visitor::lower_integer_multiplication()
|
|||||||
* On Gen8, the multiply instruction does a full 32x32-bit
|
* On Gen8, the multiply instruction does a full 32x32-bit
|
||||||
* multiply, but in order to do a 64-bit multiply we can simulate
|
* multiply, but in order to do a 64-bit multiply we can simulate
|
||||||
* the previous behavior and then use a MACH instruction.
|
* the previous behavior and then use a MACH instruction.
|
||||||
*
|
|
||||||
* FINISHME: Don't use source modifiers on src1.
|
|
||||||
*/
|
*/
|
||||||
assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
|
assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
|
||||||
mul->src[1].type == BRW_REGISTER_TYPE_UD);
|
mul->src[1].type == BRW_REGISTER_TYPE_UD);
|
||||||
|
@@ -531,6 +531,25 @@ namespace brw {
|
|||||||
return fs_reg(retype(brw_vec8_grf(regs[0], 0), type));
|
return fs_reg(retype(brw_vec8_grf(regs[0], 0), type));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove any modifiers from the \p i-th source region of the instruction,
|
||||||
|
* including negate, abs and any implicit type conversion to the execution
|
||||||
|
* type. Instead any source modifiers will be implemented as a separate
|
||||||
|
* MOV instruction prior to the original instruction.
|
||||||
|
*/
|
||||||
|
inline bool
|
||||||
|
lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i)
|
||||||
|
{
|
||||||
|
assert(inst->components_read(i) == 1);
|
||||||
|
const fs_builder ibld(v, block, inst);
|
||||||
|
const fs_reg tmp = ibld.vgrf(get_exec_type(inst));
|
||||||
|
|
||||||
|
ibld.MOV(tmp, inst->src[i]);
|
||||||
|
inst->src[i] = tmp;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void shuffle_from_32bit_read(const brw::fs_builder &bld,
|
void shuffle_from_32bit_read(const brw::fs_builder &bld,
|
||||||
|
Reference in New Issue
Block a user