intel/fs: Support SENDS in SHADER_OPCODE_SEND
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
This commit is contained in:

committed by
Jason Ekstrand

parent
cca199fd85
commit
eab1c55590
@@ -6759,11 +6759,61 @@ fs_visitor::optimize()
|
|||||||
OPT(lower_simd_width);
|
OPT(lower_simd_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OPT(fixup_sends_duplicate_payload);
|
||||||
|
|
||||||
lower_uniform_pull_constant_loads();
|
lower_uniform_pull_constant_loads();
|
||||||
|
|
||||||
validate();
|
validate();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* From the Skylake PRM Vol. 2a docs for sends:
|
||||||
|
*
|
||||||
|
* "It is required that the second block of GRFs does not overlap with the
|
||||||
|
* first block."
|
||||||
|
*
|
||||||
|
* There are plenty of cases where we may accidentally violate this due to
|
||||||
|
* having, for instance, both sources be the constant 0. This little pass
|
||||||
|
* just adds a new vgrf for the second payload and copies it over.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
fs_visitor::fixup_sends_duplicate_payload()
|
||||||
|
{
|
||||||
|
bool progress = false;
|
||||||
|
|
||||||
|
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
|
||||||
|
if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 &&
|
||||||
|
regions_overlap(inst->src[2], inst->mlen * REG_SIZE,
|
||||||
|
inst->src[3], inst->ex_mlen * REG_SIZE)) {
|
||||||
|
fs_reg tmp = fs_reg(VGRF, alloc.allocate(inst->ex_mlen),
|
||||||
|
BRW_REGISTER_TYPE_UD);
|
||||||
|
/* Sadly, we've lost all notion of channels and bit sizes at this
|
||||||
|
* point. Just WE_all it.
|
||||||
|
*/
|
||||||
|
const fs_builder ibld = bld.at(block, inst).exec_all().group(16, 0);
|
||||||
|
fs_reg copy_src = retype(inst->src[3], BRW_REGISTER_TYPE_UD);
|
||||||
|
fs_reg copy_dst = tmp;
|
||||||
|
for (unsigned i = 0; i < inst->ex_mlen; i += 2) {
|
||||||
|
if (inst->ex_mlen == i + 1) {
|
||||||
|
/* Only one register left; do SIMD8 */
|
||||||
|
ibld.group(8, 0).MOV(copy_dst, copy_src);
|
||||||
|
} else {
|
||||||
|
ibld.MOV(copy_dst, copy_src);
|
||||||
|
}
|
||||||
|
copy_src = offset(copy_src, ibld, 1);
|
||||||
|
copy_dst = offset(copy_dst, ibld, 1);
|
||||||
|
}
|
||||||
|
inst->src[3] = tmp;
|
||||||
|
progress = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (progress)
|
||||||
|
invalidate_live_intervals();
|
||||||
|
|
||||||
|
return progress;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Three source instruction must have a GRF/MRF destination register.
|
* Three source instruction must have a GRF/MRF destination register.
|
||||||
* ARF NULL is not allowed. Fix that up by allocating a temporary GRF.
|
* ARF NULL is not allowed. Fix that up by allocating a temporary GRF.
|
||||||
|
@@ -103,6 +103,7 @@ public:
|
|||||||
void setup_vs_payload();
|
void setup_vs_payload();
|
||||||
void setup_gs_payload();
|
void setup_gs_payload();
|
||||||
void setup_cs_payload();
|
void setup_cs_payload();
|
||||||
|
bool fixup_sends_duplicate_payload();
|
||||||
void fixup_3src_null_dest();
|
void fixup_3src_null_dest();
|
||||||
void assign_curb_setup();
|
void assign_curb_setup();
|
||||||
void calculate_urb_setup();
|
void calculate_urb_setup();
|
||||||
|
@@ -258,11 +258,6 @@ fs_generator::generate_send(fs_inst *inst,
|
|||||||
struct brw_reg payload,
|
struct brw_reg payload,
|
||||||
struct brw_reg payload2)
|
struct brw_reg payload2)
|
||||||
{
|
{
|
||||||
/* SENDS not yet supported */
|
|
||||||
assert(ex_desc.file == BRW_IMMEDIATE_VALUE && ex_desc.d == 0);
|
|
||||||
assert(payload2.file == BRW_ARCHITECTURE_REGISTER_FILE &&
|
|
||||||
payload2.nr == BRW_ARF_NULL);
|
|
||||||
|
|
||||||
const bool dst_is_null = dst.file == BRW_ARCHITECTURE_REGISTER_FILE &&
|
const bool dst_is_null = dst.file == BRW_ARCHITECTURE_REGISTER_FILE &&
|
||||||
dst.nr == BRW_ARF_NULL;
|
dst.nr == BRW_ARF_NULL;
|
||||||
const unsigned rlen = dst_is_null ? 0 : inst->size_written / REG_SIZE;
|
const unsigned rlen = dst_is_null ? 0 : inst->size_written / REG_SIZE;
|
||||||
@@ -270,11 +265,23 @@ fs_generator::generate_send(fs_inst *inst,
|
|||||||
uint32_t desc_imm = inst->desc |
|
uint32_t desc_imm = inst->desc |
|
||||||
brw_message_desc(devinfo, inst->mlen, rlen, inst->header_size);
|
brw_message_desc(devinfo, inst->mlen, rlen, inst->header_size);
|
||||||
|
|
||||||
brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm);
|
uint32_t ex_desc_imm = brw_message_ex_desc(devinfo, inst->ex_mlen);
|
||||||
|
|
||||||
|
if (ex_desc.file != BRW_IMMEDIATE_VALUE || ex_desc.ud || ex_desc_imm) {
|
||||||
|
/* If we have any sort of extended descriptor, then we need SENDS. This
|
||||||
|
* also covers the dual-payload case because ex_mlen goes in ex_desc.
|
||||||
|
*/
|
||||||
|
brw_send_indirect_split_message(p, inst->sfid, dst, payload, payload2,
|
||||||
|
desc, desc_imm, ex_desc, ex_desc_imm);
|
||||||
|
if (inst->check_tdr)
|
||||||
|
brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDSC);
|
||||||
|
} else {
|
||||||
|
brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm);
|
||||||
|
if (inst->check_tdr)
|
||||||
|
brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
|
||||||
|
}
|
||||||
|
|
||||||
brw_inst_set_eot(p->devinfo, brw_last_inst, inst->eot);
|
brw_inst_set_eot(p->devinfo, brw_last_inst, inst->eot);
|
||||||
if (inst->check_tdr)
|
|
||||||
brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
Reference in New Issue
Block a user