i965: Make brw_reg_from_fs_reg() halve exec_size when compressed.
In a5d7e144ea
, Connor generalized the
exec_size halving code to handle more cases. As part of this, he made
it not halve anything if the region accessed falls completely in a
single register.
Unfortunately, it started producing some invalid regions:
-add(16) g6<1>F g10<8,8,1>UW -g1<0,1,0>F { align1 compr };
-add(16) g8<1>F g12<8,8,1>UW -g1.1<0,1,0>F { align1 compr };
+add(16) g6<1>F g10<16,16,1>UW -g1<0,1,0>F { align1 compr };
+add(16) g8<1>F g12<16,16,1>UW -g1.1<0,1,0>F { align1 compr };
Here, the UW source region completely fits within a register. However,
we have to use instruction compression because the destination region
spans two registers. <16,16,1> is invalid because it's compressed.
To handle this, skip the "everything fits in one register" case and
fall through to the exec_size halving case when compressed.
Fixes hundreds of Piglit regressions on GM965.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95370
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
@@ -54,7 +54,8 @@ brw_file_from_reg(fs_reg *reg)
|
||||
}
|
||||
|
||||
static struct brw_reg
|
||||
brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
|
||||
brw_reg_from_fs_reg(const struct brw_codegen *p,
|
||||
fs_inst *inst, fs_reg *reg, unsigned gen)
|
||||
{
|
||||
struct brw_reg brw_reg;
|
||||
|
||||
@@ -65,7 +66,8 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
|
||||
case VGRF:
|
||||
if (reg->stride == 0) {
|
||||
brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0);
|
||||
} else if (inst->exec_size * reg->stride * type_sz(reg->type) <= 32) {
|
||||
} else if (!p->compressed &&
|
||||
inst->exec_size * reg->stride * type_sz(reg->type) <= 32) {
|
||||
brw_reg = brw_vecn_reg(inst->exec_size, brw_file_from_reg(reg),
|
||||
reg->nr, 0);
|
||||
brw_reg = stride(brw_reg, inst->exec_size * reg->stride,
|
||||
@@ -1763,7 +1765,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < inst->sources; i++) {
|
||||
src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen);
|
||||
src[i] = brw_reg_from_fs_reg(p, inst, &inst->src[i], devinfo->gen);
|
||||
|
||||
/* The accumulator result appears to get used for the
|
||||
* conditional modifier generation. When negating a UD
|
||||
@@ -1775,7 +1777,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
||||
inst->src[i].type != BRW_REGISTER_TYPE_UD ||
|
||||
!inst->src[i].negate);
|
||||
}
|
||||
dst = brw_reg_from_fs_reg(inst, &inst->dst, devinfo->gen);
|
||||
dst = brw_reg_from_fs_reg(p, inst, &inst->dst, devinfo->gen);
|
||||
|
||||
brw_set_default_predicate_control(p, inst->predicate);
|
||||
brw_set_default_predicate_inverse(p, inst->predicate_inverse);
|
||||
|
Reference in New Issue
Block a user