intel/fs: Rework fs_inst::is_copy_payload() into multiple classification helpers.
This reworks the current fs_inst::is_copy_payload() method into a number of classification helpers with well-defined semantics. This will be useful later on in order to optimize LOAD_PAYLOAD instructions more aggressively in cases where we can determine it's safe to do so. The closest equivalent of the present fs_inst::is_copy_payload() method is the is_coalescing_payload() helper introduced here. No functional nor shader-db changes. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -427,34 +427,6 @@ fs_inst::has_source_and_destination_hazard() const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
|
||||||
fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const
|
|
||||||
{
|
|
||||||
if (this->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
fs_reg reg = this->src[0];
|
|
||||||
if (reg.file != VGRF || reg.offset != 0 || reg.stride != 1)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (grf_alloc.sizes[reg.nr] * REG_SIZE != this->size_written)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
for (int i = 0; i < this->sources; i++) {
|
|
||||||
reg.type = this->src[i].type;
|
|
||||||
if (!this->src[i].equals(reg))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (i < this->header_size) {
|
|
||||||
reg.offset += REG_SIZE;
|
|
||||||
} else {
|
|
||||||
reg = horiz_offset(reg, this->exec_size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
fs_inst::can_do_source_mods(const struct gen_device_info *devinfo) const
|
fs_inst::can_do_source_mods(const struct gen_device_info *devinfo) const
|
||||||
{
|
{
|
||||||
|
@@ -105,7 +105,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
|
|||||||
case SHADER_OPCODE_COS:
|
case SHADER_OPCODE_COS:
|
||||||
return inst->mlen < 2;
|
return inst->mlen < 2;
|
||||||
case SHADER_OPCODE_LOAD_PAYLOAD:
|
case SHADER_OPCODE_LOAD_PAYLOAD:
|
||||||
return !inst->is_copy_payload(v->alloc);
|
return !is_coalescing_payload(v->alloc, inst);
|
||||||
default:
|
default:
|
||||||
return inst->is_send_from_grf() && !inst->has_side_effects() &&
|
return inst->is_send_from_grf() && !inst->has_side_effects() &&
|
||||||
!inst->is_volatile();
|
!inst->is_volatile();
|
||||||
|
@@ -86,7 +86,7 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
|
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
|
||||||
if (!inst->is_copy_payload(v->alloc)) {
|
if (!is_coalescing_payload(v->alloc, inst)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -350,7 +350,6 @@ public:
|
|||||||
bool is_send_from_grf() const;
|
bool is_send_from_grf() const;
|
||||||
bool is_payload(unsigned arg) const;
|
bool is_payload(unsigned arg) const;
|
||||||
bool is_partial_write() const;
|
bool is_partial_write() const;
|
||||||
bool is_copy_payload(const brw::simple_allocator &grf_alloc) const;
|
|
||||||
unsigned components_read(unsigned i) const;
|
unsigned components_read(unsigned i) const;
|
||||||
unsigned size_read(int arg) const;
|
unsigned size_read(int arg) const;
|
||||||
bool can_do_source_mods(const struct gen_device_info *devinfo) const;
|
bool can_do_source_mods(const struct gen_device_info *devinfo) const;
|
||||||
@@ -570,4 +569,103 @@ has_dst_aligned_region_restriction(const gen_device_info *devinfo,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return whether the LOAD_PAYLOAD instruction is a plain copy of bits from
|
||||||
|
* the specified register file into a VGRF.
|
||||||
|
*
|
||||||
|
* This implies identity register regions without any source-destination
|
||||||
|
* overlap, but otherwise has no implications on the location of sources and
|
||||||
|
* destination in the register file: Gathering any number of portions from
|
||||||
|
* multiple virtual registers in any order is allowed.
|
||||||
|
*/
|
||||||
|
inline bool
|
||||||
|
is_copy_payload(brw_reg_file file, const fs_inst *inst)
|
||||||
|
{
|
||||||
|
if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD ||
|
||||||
|
inst->is_partial_write() || inst->saturate ||
|
||||||
|
inst->dst.file != VGRF)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < inst->sources; i++) {
|
||||||
|
if (inst->src[i].file != file ||
|
||||||
|
inst->src[i].abs || inst->src[i].negate)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!inst->src[i].is_contiguous())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (regions_overlap(inst->dst, inst->size_written,
|
||||||
|
inst->src[i], inst->size_read(i)))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Like is_copy_payload(), but the instruction is required to copy a single
|
||||||
|
* contiguous block of registers from the given register file into the
|
||||||
|
* destination without any reordering.
|
||||||
|
*/
|
||||||
|
inline bool
|
||||||
|
is_identity_payload(brw_reg_file file, const fs_inst *inst) {
|
||||||
|
if (is_copy_payload(file, inst)) {
|
||||||
|
fs_reg reg = inst->src[0];
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < inst->sources; i++) {
|
||||||
|
reg.type = inst->src[i].type;
|
||||||
|
if (!inst->src[i].equals(reg))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
reg = byte_offset(reg, inst->size_read(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Like is_copy_payload(), but the instruction is required to source data from
|
||||||
|
* at least two disjoint VGRFs.
|
||||||
|
*
|
||||||
|
* This doesn't necessarily rule out the elimination of this instruction
|
||||||
|
* through register coalescing, but due to limitations of the register
|
||||||
|
* coalesce pass it might be impossible to do so directly until a later stage,
|
||||||
|
* when the LOAD_PAYLOAD instruction is unrolled into a sequence of MOV
|
||||||
|
* instructions.
|
||||||
|
*/
|
||||||
|
inline bool
|
||||||
|
is_multi_copy_payload(const fs_inst *inst) {
|
||||||
|
if (is_copy_payload(VGRF, inst)) {
|
||||||
|
for (unsigned i = 0; i < inst->sources; i++) {
|
||||||
|
if (inst->src[i].nr != inst->src[0].nr)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Like is_identity_payload(), but the instruction is required to copy the
|
||||||
|
* whole contents of a single VGRF into the destination.
|
||||||
|
*
|
||||||
|
* This means that there is a good chance that the instruction will be
|
||||||
|
* eliminated through register coalescing, but it's neither a necessary nor a
|
||||||
|
* sufficient condition for that to happen -- E.g. consider the case where
|
||||||
|
* source and destination registers diverge due to other instructions in the
|
||||||
|
* program overwriting part of their contents, which isn't something we can
|
||||||
|
* predict up front based on a cheap strictly local test of the copy
|
||||||
|
* instruction.
|
||||||
|
*/
|
||||||
|
inline bool
|
||||||
|
is_coalescing_payload(const brw::simple_allocator &alloc, const fs_inst *inst)
|
||||||
|
{
|
||||||
|
return is_identity_payload(VGRF, inst) &&
|
||||||
|
inst->src[0].offset == 0 &&
|
||||||
|
alloc.sizes[inst->src[0].nr] * REG_SIZE == inst->size_written;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user