intel/fs: Respect CHV/BXT regioning restrictions in copy propagation pass.
Currently the visitor attempts to enforce the regioning restrictions that apply to double-precision instructions on CHV/BXT at NIR-to-i965 translation time. It is possible though for the copy propagation pass to violate this restriction if a strided move is propagated into one of the affected instructions. I've only reproduced this issue on a future platform but it could affect CHV/BXT too under the right conditions. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
This commit is contained in:
@@ -315,6 +315,16 @@ can_take_stride(fs_inst *inst, unsigned arg, unsigned stride,
|
|||||||
if (stride > 4)
|
if (stride > 4)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/* Bail if the channels of the source need to be aligned to the byte offset
|
||||||
|
* of the corresponding channel of the destination, and the provided stride
|
||||||
|
* would break this restriction.
|
||||||
|
*/
|
||||||
|
if (has_dst_aligned_region_restriction(devinfo, inst) &&
|
||||||
|
!(type_sz(inst->src[arg].type) * stride ==
|
||||||
|
type_sz(inst->dst.type) * inst->dst.stride ||
|
||||||
|
stride == 0))
|
||||||
|
return false;
|
||||||
|
|
||||||
/* 3-source instructions can only be Align16, which restricts what strides
|
/* 3-source instructions can only be Align16, which restricts what strides
|
||||||
* they can take. They can only take a stride of 1 (the usual case), or 0
|
* they can take. They can only take a stride of 1 (the usual case), or 0
|
||||||
* with a special "repctrl" bit. But the repctrl bit doesn't work for
|
* with a special "repctrl" bit. But the repctrl bit doesn't work for
|
||||||
|
@@ -486,4 +486,32 @@ get_exec_type_size(const fs_inst *inst)
|
|||||||
return type_sz(get_exec_type(inst));
|
return type_sz(get_exec_type(inst));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return whether the following regioning restriction applies to the specified
|
||||||
|
* instruction. From the Cherryview PRM Vol 7. "Register Region
|
||||||
|
* Restrictions":
|
||||||
|
*
|
||||||
|
* "When source or destination datatype is 64b or operation is integer DWord
|
||||||
|
* multiply, regioning in Align1 must follow these rules:
|
||||||
|
*
|
||||||
|
* 1. Source and Destination horizontal stride must be aligned to the same qword.
|
||||||
|
* 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
|
||||||
|
* 3. Source and Destination offset must be the same, except the case of
|
||||||
|
* scalar source."
|
||||||
|
*/
|
||||||
|
static inline bool
|
||||||
|
has_dst_aligned_region_restriction(const gen_device_info *devinfo,
|
||||||
|
const fs_inst *inst)
|
||||||
|
{
|
||||||
|
const brw_reg_type exec_type = get_exec_type(inst);
|
||||||
|
const bool is_int_multiply = !brw_reg_type_is_floating_point(exec_type) &&
|
||||||
|
(inst->opcode == BRW_OPCODE_MUL || inst->opcode == BRW_OPCODE_MAD);
|
||||||
|
|
||||||
|
if (type_sz(inst->dst.type) > 4 || type_sz(exec_type) > 4 ||
|
||||||
|
(type_sz(exec_type) == 4 && is_int_multiply))
|
||||||
|
return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo);
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user