i965/fs: Expose arbitrary channel execution groups to the IR.
This generalizes the current fs_inst::force_sechalf flag to allow specifying channel enable groups other than 0 or 8. At some point it will likely make sense to fix the vec4 generator to support arbitrary execution groups and then move the definition of fs_inst::group into backend_instruction (e.g. so we can do FP64 in the VEC4 back-end). Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:
@@ -3621,7 +3621,7 @@ fs_visitor::lower_integer_multiplication()
|
||||
mul->src[1].stride *= 2;
|
||||
|
||||
} else if (devinfo->gen == 7 && !devinfo->is_haswell &&
|
||||
inst->force_sechalf) {
|
||||
inst->group > 0) {
|
||||
/* Among other things the quarter control bits influence which
|
||||
* accumulator register is used by the hardware for instructions
|
||||
* that access the accumulator implicitly (e.g. MACH). A
|
||||
@@ -3638,7 +3638,7 @@ fs_visitor::lower_integer_multiplication()
|
||||
* to get the result masked correctly according to the current
|
||||
* channel enables.
|
||||
*/
|
||||
mach->force_sechalf = false;
|
||||
mach->group = 0;
|
||||
mach->force_writemask_all = true;
|
||||
mach->dst = ibld.vgrf(inst->dst.type);
|
||||
ibld.MOV(inst->dst, mach->dst);
|
||||
@@ -3774,8 +3774,8 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||
sample_mask.stride *= 2;
|
||||
|
||||
bld.exec_all().annotate("FB write oMask")
|
||||
.MOV(half(retype(sources[length], BRW_REGISTER_TYPE_UW),
|
||||
inst->force_sechalf),
|
||||
.MOV(horiz_offset(retype(sources[length], BRW_REGISTER_TYPE_UW),
|
||||
inst->group),
|
||||
sample_mask);
|
||||
length++;
|
||||
}
|
||||
@@ -5008,10 +5008,10 @@ fs_visitor::lower_simd_width()
|
||||
* execution size of the builder to the highest of both for now so
|
||||
* we're sure that both cases can be handled.
|
||||
*/
|
||||
const unsigned max_width = MAX2(inst->exec_size, lower_width);
|
||||
const fs_builder ibld = bld.at(block, inst)
|
||||
.exec_all(inst->force_writemask_all)
|
||||
.group(MAX2(inst->exec_size, lower_width),
|
||||
inst->force_sechalf);
|
||||
.group(max_width, inst->group / max_width);
|
||||
|
||||
/* Split the copies in chunks of the execution width of either the
|
||||
* original or the lowered instruction, whichever is lower.
|
||||
@@ -5343,12 +5343,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
|
||||
if (inst->force_writemask_all)
|
||||
fprintf(file, "NoMask ");
|
||||
|
||||
if (dispatch_width == 16 && inst->exec_size == 8) {
|
||||
if (inst->force_sechalf)
|
||||
fprintf(file, "2ndhalf ");
|
||||
else
|
||||
fprintf(file, "1sthalf ");
|
||||
}
|
||||
if (inst->exec_size != dispatch_width)
|
||||
fprintf(file, "group%d ", inst->group);
|
||||
|
||||
fprintf(file, "\n");
|
||||
}
|
||||
|
@@ -72,7 +72,7 @@ namespace brw {
|
||||
fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) :
|
||||
shader(shader), block(block), cursor(inst),
|
||||
_dispatch_width(inst->exec_size),
|
||||
_group(inst->force_sechalf ? 8 : 0),
|
||||
_group(inst->group),
|
||||
force_writemask_all(inst->force_writemask_all)
|
||||
{
|
||||
annotation.str = inst->annotation;
|
||||
@@ -167,6 +167,15 @@ namespace brw {
|
||||
return _dispatch_width;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the channel group in use.
|
||||
*/
|
||||
unsigned
|
||||
group() const
|
||||
{
|
||||
return _group;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate a virtual register of natural vector size (one for this IR)
|
||||
* and SIMD width. \p n gives the amount of space to allocate in
|
||||
@@ -353,9 +362,8 @@ namespace brw {
|
||||
assert(inst->exec_size <= 32);
|
||||
assert(inst->exec_size == dispatch_width() ||
|
||||
force_writemask_all);
|
||||
assert(_group == 0 || _group == 8);
|
||||
|
||||
inst->force_sechalf = (_group == 8);
|
||||
inst->group = _group;
|
||||
inst->force_writemask_all = force_writemask_all;
|
||||
inst->annotation = annotation.str;
|
||||
inst->ir = annotation.ir;
|
||||
|
@@ -162,7 +162,7 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate)
|
||||
return a->opcode == b->opcode &&
|
||||
a->force_writemask_all == b->force_writemask_all &&
|
||||
a->exec_size == b->exec_size &&
|
||||
a->force_sechalf == b->force_sechalf &&
|
||||
a->group == b->group &&
|
||||
a->saturate == b->saturate &&
|
||||
a->predicate == b->predicate &&
|
||||
a->predicate_inverse == b->predicate_inverse &&
|
||||
@@ -215,7 +215,7 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
|
||||
copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
|
||||
} else {
|
||||
copy = bld.MOV(inst->dst, src);
|
||||
copy->force_sechalf = inst->force_sechalf;
|
||||
copy->group = inst->group;
|
||||
copy->force_writemask_all = inst->force_writemask_all;
|
||||
copy->src[0].negate = negate;
|
||||
}
|
||||
|
@@ -212,7 +212,7 @@ fs_generator::fire_fb_write(fs_inst *inst,
|
||||
if (inst->opcode == FS_OPCODE_REP_FB_WRITE)
|
||||
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
|
||||
else if (prog_data->dual_src_blend) {
|
||||
if (!inst->force_sechalf)
|
||||
if (!inst->group)
|
||||
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
|
||||
else
|
||||
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;
|
||||
@@ -1076,7 +1076,7 @@ fs_generator::generate_scratch_write(fs_inst *inst, struct brw_reg src)
|
||||
brw_set_default_compression(p, lower_size > 8);
|
||||
|
||||
for (unsigned i = 0; i < inst->exec_size / lower_size; i++) {
|
||||
brw_set_default_group(p, (inst->force_sechalf ? 8 : 0) + lower_size * i);
|
||||
brw_set_default_group(p, inst->group + lower_size * i);
|
||||
|
||||
brw_MOV(p, brw_uvec_mrf(lower_size, inst->base_mrf + 1, 0),
|
||||
retype(offset(src, block_size * i), BRW_REGISTER_TYPE_UD));
|
||||
@@ -1620,7 +1620,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
||||
const bool compressed =
|
||||
inst->dst.component_size(inst->exec_size) > REG_SIZE;
|
||||
brw_set_default_compression(p, compressed);
|
||||
brw_set_default_group(p, inst->force_sechalf ? 8 : 0);
|
||||
brw_set_default_group(p, inst->group);
|
||||
|
||||
for (unsigned int i = 0; i < inst->sources; i++) {
|
||||
src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen,
|
||||
@@ -1648,6 +1648,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
||||
brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
|
||||
|
||||
assert(inst->force_writemask_all || inst->exec_size >= 8);
|
||||
assert(inst->force_writemask_all || inst->group % inst->exec_size == 0);
|
||||
assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
|
||||
assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
|
||||
|
||||
|
@@ -163,7 +163,7 @@ fs_visitor::opt_peephole_sel()
|
||||
/* Check that the MOVs are the right form. */
|
||||
if (!then_mov[i]->dst.equals(else_mov[i]->dst) ||
|
||||
then_mov[i]->exec_size != else_mov[i]->exec_size ||
|
||||
then_mov[i]->force_sechalf != else_mov[i]->force_sechalf ||
|
||||
then_mov[i]->group != else_mov[i]->group ||
|
||||
then_mov[i]->force_writemask_all != else_mov[i]->force_writemask_all ||
|
||||
then_mov[i]->is_partial_write() ||
|
||||
else_mov[i]->is_partial_write() ||
|
||||
|
@@ -291,21 +291,19 @@ public:
|
||||
*/
|
||||
uint8_t exec_size;
|
||||
|
||||
/**
|
||||
* Channel group from the hardware execution and predication mask that
|
||||
* should be applied to the instruction. The subset of channel enable
|
||||
* signals (calculated from the EU control flow and predication state)
|
||||
* given by [group, group + exec_size) will be used to mask GRF writes and
|
||||
* any other side effects of the instruction.
|
||||
*/
|
||||
uint8_t group;
|
||||
|
||||
bool eot:1;
|
||||
bool force_sechalf:1;
|
||||
bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */
|
||||
};
|
||||
|
||||
/**
|
||||
* Set second-half quarter control on \p inst.
|
||||
*/
|
||||
static inline fs_inst *
|
||||
set_sechalf(fs_inst *inst)
|
||||
{
|
||||
inst->force_sechalf = true;
|
||||
return inst;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make the execution of \p inst dependent on the evaluation of a possibly
|
||||
* inverted predicate.
|
||||
|
Reference in New Issue
Block a user