i965/fs: double regioning parameters and execsize for DF in IVB/BYT
In IVB and BYT, both regioning parameters and execution sizes are measured as 32-bits element size. So when we have something like: mov(8) g2<1>DF g3<4,4,1>DF We are not actually moving 8 doubles (our intention), but 4 doubles. We need to double the parameters to cope with this issue. However, horizontal strides don't behave as they're supposed to on IVB for DF regions, they will cause each 32-bit half of DF sources to be strided individually, and doubling the value won't make any difference. v2: - Use devinfo directly (Matt). - Use Baytrail instead of Valleview (Matt). - Use IvyBridge instead of Ivy (Matt) - Double the exec_size in code emission (Curro) v3: - Change hstride doubling by an assert and fix commit log (Curro). - Substitute remaining compiler->devinfo by devinfo (Curro). v4: - Fix comment (Curro). Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> Reviewed-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:

committed by
Francisco Jerez

parent
79af256388
commit
0f1316d4db
@@ -53,13 +53,14 @@ brw_file_from_reg(fs_reg *reg)
|
||||
}
|
||||
|
||||
static struct brw_reg
|
||||
brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen, bool compressed)
|
||||
brw_reg_from_fs_reg(const struct gen_device_info *devinfo, fs_inst *inst,
|
||||
fs_reg *reg, bool compressed)
|
||||
{
|
||||
struct brw_reg brw_reg;
|
||||
|
||||
switch (reg->file) {
|
||||
case MRF:
|
||||
assert((reg->nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(gen));
|
||||
assert((reg->nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
|
||||
/* Fallthrough */
|
||||
case VGRF:
|
||||
if (reg->stride == 0) {
|
||||
@@ -92,6 +93,34 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen, bool compressed)
|
||||
const unsigned width = MIN2(reg_width, phys_width);
|
||||
brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0);
|
||||
brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride);
|
||||
/* From the IvyBridge PRM (EU Changes by Processor Generation, page 13):
|
||||
* "Each DF (Double Float) operand uses an element size of 4 rather
|
||||
* than 8 and all regioning parameters are twice what the values
|
||||
* would be based on the true element size: ExecSize, Width,
|
||||
* HorzStride, and VertStride. Each DF operand uses a pair of
|
||||
* channels and all masking and swizzing should be adjusted
|
||||
* appropriately."
|
||||
*
|
||||
* From the IvyBridge PRM (Special Requirements for Handling Double
|
||||
* Precision Data Types, page 71):
|
||||
* "In Align1 mode, all regioning parameters like stride, execution
|
||||
* size, and width must use the syntax of a pair of packed
|
||||
* floats. The offsets for these data types must be 64-bit
|
||||
* aligned. The execution size and regioning parameters are in terms
|
||||
* of floats."
|
||||
*
|
||||
* Summarized: when handling DF-typed arguments, ExecSize,
|
||||
* VertStride, and Width must be doubled.
|
||||
*
|
||||
* It applies to BayTrail too.
|
||||
*/
|
||||
if (devinfo->gen == 7 && !devinfo->is_haswell &&
|
||||
type_sz(reg->type) == 8) {
|
||||
brw_reg.width++;
|
||||
if (brw_reg.vstride > 0)
|
||||
brw_reg.vstride++;
|
||||
assert(brw_reg.hstride == BRW_HORIZONTAL_STRIDE_1);
|
||||
}
|
||||
}
|
||||
|
||||
brw_reg = retype(brw_reg, reg->type);
|
||||
@@ -1585,9 +1614,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
||||
brw_set_default_group(p, inst->group);
|
||||
|
||||
for (unsigned int i = 0; i < inst->sources; i++) {
|
||||
src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen,
|
||||
compressed);
|
||||
|
||||
src[i] = brw_reg_from_fs_reg(devinfo, inst,
|
||||
&inst->src[i], compressed);
|
||||
/* The accumulator result appears to get used for the
|
||||
* conditional modifier generation. When negating a UD
|
||||
* value, there is a 33rd bit generated for the sign in the
|
||||
@@ -1598,7 +1626,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
||||
inst->src[i].type != BRW_REGISTER_TYPE_UD ||
|
||||
!inst->src[i].negate);
|
||||
}
|
||||
dst = brw_reg_from_fs_reg(inst, &inst->dst, devinfo->gen, compressed);
|
||||
dst = brw_reg_from_fs_reg(devinfo, inst,
|
||||
&inst->dst, compressed);
|
||||
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_1);
|
||||
brw_set_default_predicate_control(p, inst->predicate);
|
||||
@@ -1607,7 +1636,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
||||
brw_set_default_saturate(p, inst->saturate);
|
||||
brw_set_default_mask_control(p, inst->force_writemask_all);
|
||||
brw_set_default_acc_write_control(p, inst->writes_accumulator);
|
||||
brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
|
||||
|
||||
unsigned exec_size = inst->exec_size;
|
||||
if (devinfo->gen == 7 && !devinfo->is_haswell &&
|
||||
(get_exec_type_size(inst) == 8 || type_sz(inst->dst.type) == 8)) {
|
||||
exec_size *= 2;
|
||||
}
|
||||
|
||||
brw_set_default_exec_size(p, cvt(exec_size) - 1);
|
||||
|
||||
assert(inst->force_writemask_all || inst->exec_size >= 4);
|
||||
assert(inst->force_writemask_all || inst->group % inst->exec_size == 0);
|
||||
|
Reference in New Issue
Block a user