i965/fs: Don't follow pow with an instruction with two dest regs.
Beginning with commit 7b208a73
, Unigine Valley began hanging the GPU on
Gen >= 8 platforms.
Evidently that commit allowed the scheduler to make different choices
that somehow finally ran afoul of a hardware bug in which POW and FDIV
instructions may not be followed by an instruction with two destination
registers (including compressed instructions). I presume the conditions
are more complex than that, but the internal hardware bug report (BDWGFX
bug_de 1696294) does not contain much more information.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94924
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com> [v1]
Tested-by: Mark Janes <mark.a.janes@intel.com> [v1]
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:
@@ -1726,6 +1726,24 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
||||
unsigned int last_insn_offset = p->next_insn_offset;
|
||||
bool multiple_instructions_emitted = false;
|
||||
|
||||
/* From the Broadwell PRM, Volume 7, "3D-Media-GPGPU", in the
|
||||
* "Register Region Restrictions" section: for BDW, SKL:
|
||||
*
|
||||
* "A POW/FDIV operation must not be followed by an instruction
|
||||
* that requires two destination registers."
|
||||
*
|
||||
* The documentation is often lacking annotations for Atom parts,
|
||||
* and empirically this affects CHV as well.
|
||||
*/
|
||||
if (devinfo->gen >= 8 &&
|
||||
p->nr_insn > 1 &&
|
||||
brw_inst_opcode(devinfo, brw_last_inst) == BRW_OPCODE_MATH &&
|
||||
brw_inst_math_function(devinfo, brw_last_inst) == BRW_MATH_FUNCTION_POW &&
|
||||
inst->dst.component_size(inst->exec_size) > REG_SIZE) {
|
||||
brw_NOP(p);
|
||||
last_insn_offset = p->next_insn_offset;
|
||||
}
|
||||
|
||||
if (unlikely(debug_flag))
|
||||
annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
|
||||
|
||||
|
Reference in New Issue
Block a user