intel/fs: Remove nasty open-coded CHV/BXT 64-bit workarounds.
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
This commit is contained in:
@@ -451,43 +451,13 @@ namespace brw {
|
||||
|
||||
if (cluster_size > 1) {
|
||||
const fs_builder ubld = exec_all().group(dispatch_width() / 2, 0);
|
||||
dst_reg left = horiz_stride(tmp, 2);
|
||||
dst_reg right = horiz_stride(horiz_offset(tmp, 1), 2);
|
||||
|
||||
/* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn":
|
||||
*
|
||||
* "When source or destination datatype is 64b or operation is
|
||||
* integer DWord multiply, regioning in Align1 must follow
|
||||
* these rules:
|
||||
*
|
||||
* [...]
|
||||
*
|
||||
* 3. Source and Destination offset must be the same, except
|
||||
* the case of scalar source."
|
||||
*
|
||||
* In order to work around this, we create a temporary register
|
||||
* and shift left over to match right. If we have a 64-bit type,
|
||||
* we have to use two integer MOVs instead of a 64-bit MOV.
|
||||
*/
|
||||
if (need_matching_subreg_offset(opcode, tmp.type)) {
|
||||
dst_reg tmp2 = vgrf(tmp.type);
|
||||
dst_reg new_left = horiz_stride(horiz_offset(tmp2, 1), 2);
|
||||
if (type_sz(tmp.type) > 4) {
|
||||
ubld.MOV(subscript(new_left, BRW_REGISTER_TYPE_D, 0),
|
||||
subscript(left, BRW_REGISTER_TYPE_D, 0));
|
||||
ubld.MOV(subscript(new_left, BRW_REGISTER_TYPE_D, 1),
|
||||
subscript(left, BRW_REGISTER_TYPE_D, 1));
|
||||
} else {
|
||||
ubld.MOV(new_left, left);
|
||||
}
|
||||
left = new_left;
|
||||
}
|
||||
const dst_reg left = horiz_stride(tmp, 2);
|
||||
const dst_reg right = horiz_stride(horiz_offset(tmp, 1), 2);
|
||||
set_condmod(mod, ubld.emit(opcode, right, left, right));
|
||||
}
|
||||
|
||||
if (cluster_size > 2) {
|
||||
if (type_sz(tmp.type) <= 4 &&
|
||||
!need_matching_subreg_offset(opcode, tmp.type)) {
|
||||
if (type_sz(tmp.type) <= 4) {
|
||||
const fs_builder ubld =
|
||||
exec_all().group(dispatch_width() / 4, 0);
|
||||
src_reg left = horiz_stride(horiz_offset(tmp, 1), 4);
|
||||
@@ -787,38 +757,6 @@ namespace brw {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn":
|
||||
*
|
||||
* "When source or destination datatype is 64b or operation is
|
||||
* integer DWord multiply, regioning in Align1 must follow
|
||||
* these rules:
|
||||
*
|
||||
* [...]
|
||||
*
|
||||
* 3. Source and Destination offset must be the same, except
|
||||
* the case of scalar source."
|
||||
*
|
||||
* This helper just detects when we're in this case.
|
||||
*/
|
||||
bool
|
||||
need_matching_subreg_offset(enum opcode opcode,
|
||||
enum brw_reg_type type) const
|
||||
{
|
||||
if (!shader->devinfo->is_cherryview &&
|
||||
!gen_device_info_is_9lp(shader->devinfo))
|
||||
return false;
|
||||
|
||||
if (type_sz(type) > 4)
|
||||
return true;
|
||||
|
||||
if (opcode == BRW_OPCODE_MUL &&
|
||||
!brw_reg_type_is_floating_point(type))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bblock_t *block;
|
||||
exec_node *cursor;
|
||||
|
||||
|
@@ -805,30 +805,6 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
|
||||
case nir_op_i2i64:
|
||||
case nir_op_u2f64:
|
||||
case nir_op_u2u64:
|
||||
/* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions:
|
||||
*
|
||||
* "When source or destination is 64b (...), regioning in Align1
|
||||
* must follow these rules:
|
||||
*
|
||||
* 1. Source and destination horizontal stride must be aligned to
|
||||
* the same qword.
|
||||
* (...)"
|
||||
*
|
||||
* This means that conversions from bit-sizes smaller than 64-bit to
|
||||
* 64-bit need to have the source data elements aligned to 64-bit.
|
||||
* This restriction does not apply to BDW and later.
|
||||
*/
|
||||
if (nir_dest_bit_size(instr->dest.dest) == 64 &&
|
||||
nir_src_bit_size(instr->src[0].src) < 64 &&
|
||||
(devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
|
||||
fs_reg tmp = bld.vgrf(result.type, 1);
|
||||
tmp = subscript(tmp, op[0].type, 0);
|
||||
inst = bld.MOV(tmp, op[0]);
|
||||
inst = bld.MOV(result, tmp);
|
||||
inst->saturate = instr->dest.saturate;
|
||||
break;
|
||||
}
|
||||
/* fallthrough */
|
||||
case nir_op_f2f32:
|
||||
case nir_op_f2i32:
|
||||
case nir_op_f2u32:
|
||||
@@ -1463,36 +1439,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
|
||||
unreachable("not reached: should have been lowered");
|
||||
|
||||
case nir_op_ishl:
|
||||
case nir_op_ishr:
|
||||
case nir_op_ushr: {
|
||||
fs_reg shift_count = op[1];
|
||||
|
||||
if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
|
||||
if (op[1].file == VGRF &&
|
||||
(result.type == BRW_REGISTER_TYPE_Q ||
|
||||
result.type == BRW_REGISTER_TYPE_UQ)) {
|
||||
shift_count = fs_reg(VGRF, alloc.allocate(dispatch_width / 4),
|
||||
BRW_REGISTER_TYPE_UD);
|
||||
shift_count.stride = 2;
|
||||
bld.MOV(shift_count, op[1]);
|
||||
}
|
||||
}
|
||||
|
||||
switch (instr->op) {
|
||||
case nir_op_ishl:
|
||||
bld.SHL(result, op[0], shift_count);
|
||||
break;
|
||||
case nir_op_ishr:
|
||||
bld.ASR(result, op[0], shift_count);
|
||||
break;
|
||||
case nir_op_ushr:
|
||||
bld.SHR(result, op[0], shift_count);
|
||||
break;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
bld.SHL(result, op[0], op[1]);
|
||||
break;
|
||||
case nir_op_ishr:
|
||||
bld.ASR(result, op[0], op[1]);
|
||||
break;
|
||||
case nir_op_ushr:
|
||||
bld.SHR(result, op[0], op[1]);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_pack_half_2x16_split:
|
||||
bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]);
|
||||
@@ -4414,34 +4368,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
const fs_reg tmp_left = horiz_stride(tmp, 2);
|
||||
const fs_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2);
|
||||
|
||||
/* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn":
|
||||
*
|
||||
* "When source or destination datatype is 64b or operation is
|
||||
* integer DWord multiply, regioning in Align1 must follow
|
||||
* these rules:
|
||||
*
|
||||
* [...]
|
||||
*
|
||||
* 3. Source and Destination offset must be the same, except
|
||||
* the case of scalar source."
|
||||
*
|
||||
* In order to work around this, we have to emit two 32-bit MOVs instead
|
||||
* of a single 64-bit MOV to do the shuffle.
|
||||
*/
|
||||
if (type_sz(value.type) > 4 &&
|
||||
(devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
|
||||
ubld.MOV(subscript(tmp_left, BRW_REGISTER_TYPE_D, 0),
|
||||
subscript(src_right, BRW_REGISTER_TYPE_D, 0));
|
||||
ubld.MOV(subscript(tmp_left, BRW_REGISTER_TYPE_D, 1),
|
||||
subscript(src_right, BRW_REGISTER_TYPE_D, 1));
|
||||
ubld.MOV(subscript(tmp_right, BRW_REGISTER_TYPE_D, 0),
|
||||
subscript(src_left, BRW_REGISTER_TYPE_D, 0));
|
||||
ubld.MOV(subscript(tmp_right, BRW_REGISTER_TYPE_D, 1),
|
||||
subscript(src_left, BRW_REGISTER_TYPE_D, 1));
|
||||
} else {
|
||||
ubld.MOV(tmp_left, src_right);
|
||||
ubld.MOV(tmp_right, src_left);
|
||||
}
|
||||
ubld.MOV(tmp_left, src_right);
|
||||
ubld.MOV(tmp_right, src_left);
|
||||
|
||||
bld.MOV(retype(dest, value.type), tmp);
|
||||
break;
|
||||
}
|
||||
|
Reference in New Issue
Block a user