i965/vec4: fix vertical stride to avoid breaking region parameter rule
From IVB PRM, vol4, part3, "General Restrictions on Regioning Parameters": "If ExecSize = Width and HorzStride ≠ 0, VertStride must be set to Width * HorzStride." In next patch, we are going to modify the region parameter for uniforms and vgrf. For uniforms that are the source of DF align1 instructions, they will have <0, 4, 1> regioning and the execsize for those instructions will be 4, so they will break the regioning rule. This will be the same for VGRF sources where we use the vstride == 0 exploit. As we know we are not going to cross the GRF boundary with that execsize and parameters (not even with the exploit), we just fix the vstride here. v2: - Move is_align1_df() (Curro) - Refactor exec_size == width calculation (Curro) Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> Cc: "17.1" <mesa-stable@lists.freedesktop.org> Reviewed-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:
@@ -1948,6 +1948,24 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
|
||||
inst->mlen = 2;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_align1_df(vec4_instruction *inst)
|
||||
{
|
||||
switch (inst->opcode) {
|
||||
case VEC4_OPCODE_DOUBLE_TO_F32:
|
||||
case VEC4_OPCODE_DOUBLE_TO_D32:
|
||||
case VEC4_OPCODE_DOUBLE_TO_U32:
|
||||
case VEC4_OPCODE_TO_DOUBLE:
|
||||
case VEC4_OPCODE_PICK_LOW_32BIT:
|
||||
case VEC4_OPCODE_PICK_HIGH_32BIT:
|
||||
case VEC4_OPCODE_SET_LOW_32BIT:
|
||||
case VEC4_OPCODE_SET_HIGH_32BIT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::convert_to_hw_regs()
|
||||
{
|
||||
@@ -2005,6 +2023,20 @@ vec4_visitor::convert_to_hw_regs()
|
||||
|
||||
apply_logical_swizzle(®, inst, i);
|
||||
src = reg;
|
||||
|
||||
/* From IVB PRM, vol4, part3, "General Restrictions on Regioning
|
||||
* Parameters":
|
||||
*
|
||||
* "If ExecSize = Width and HorzStride ≠ 0, VertStride must be set
|
||||
* to Width * HorzStride."
|
||||
*
|
||||
* We can break this rule with DF sources on DF align1
|
||||
* instructions, because the exec_size would be 4 and width is 4.
|
||||
* As we know we are not accessing to next GRF, it is safe to
|
||||
* set vstride to the formula given by the rule itself.
|
||||
*/
|
||||
if (is_align1_df(inst) && (cvt(inst->exec_size) - 1) == src.width)
|
||||
src.vstride = src.width + src.hstride;
|
||||
}
|
||||
|
||||
if (inst->is_3src(devinfo)) {
|
||||
@@ -2262,24 +2294,6 @@ vec4_visitor::lower_simd_width()
|
||||
return progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_align1_df(vec4_instruction *inst)
|
||||
{
|
||||
switch (inst->opcode) {
|
||||
case VEC4_OPCODE_DOUBLE_TO_F32:
|
||||
case VEC4_OPCODE_DOUBLE_TO_D32:
|
||||
case VEC4_OPCODE_DOUBLE_TO_U32:
|
||||
case VEC4_OPCODE_TO_DOUBLE:
|
||||
case VEC4_OPCODE_PICK_LOW_32BIT:
|
||||
case VEC4_OPCODE_PICK_HIGH_32BIT:
|
||||
case VEC4_OPCODE_SET_LOW_32BIT:
|
||||
case VEC4_OPCODE_SET_HIGH_32BIT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static brw_predicate
|
||||
scalarize_predicate(brw_predicate predicate, unsigned writemask)
|
||||
{
|
||||
|
Reference in New Issue
Block a user