i965/vec4: add opportunistic behaviour to opt_vector_float()
opt_vector_float() transforms several scalar MOV operations to a single vectorial MOV. This is done when those MOV covers all the components of the destination register. So something like: mov vgrf3.0.xy:D, 0D mov vgrf3.0.w:D, 1065353216D mov vgrf3.0.z:D, 0D is transformed in: mov vgrf3.0:F, [0F, 0F, 0F, 1F] But there are cases where not all the components are written. For example, in: mov vgrf2.0.x:D, 1073741824D mov vgrf3.0.xy:D, 0D mov vgrf3.0.w:D, 1065353216D mov vgrf4.0.xy:D, 1065353216D mov vgrf4.0.w:D, 0D mov vgrf6.0:UD, u4.xyzw:UD Nor vgrf3 nor vgrf4 .z components are written, so the optimization is not applied. But it could be applied anyway with the components covered, using a writemask to select the ones written. So we could transform it in: mov vgrf2.0.x:D, 1073741824D mov vgrf3.0.xyw:F, [0F, 0F, 0F, 1F] mov vgrf4.0.xyw:F, [1F, 1F, 0F, 0F] mov vgrf6.0:UD, u4.xyzw:UD This commit does precisely that: opportunistically apply opt_vector_float() when possible. total instructions in shared programs: 7124660 -> 7114784 (-0.14%) instructions in affected programs: 443078 -> 433202 (-2.23%) helped: 4998 HURT: 0 total cycles in shared programs: 64757760 -> 64728016 (-0.05%) cycles in affected programs: 1401686 -> 1371942 (-2.12%) helped: 3243 HURT: 38 v2: change vectorize_mov() signature (Matt). v3: take in account predicates (Juan). v4 [mattst88]: Update shader-db numbers. Fix some whitespace issues. Reviewed-by: Matt Turner <mattst88@gmail.com> Signed-off-by: Juan A. Suarez Romero <jasuarez@igalia.com>
This commit is contained in:

committed by
Matt Turner

parent
feb71117ae
commit
2f76a9924e
@@ -320,6 +320,28 @@ src_reg::equals(const src_reg &r) const
|
||||
!reladdr && !r.reladdr);
|
||||
}
|
||||
|
||||
bool
|
||||
vec4_visitor::vectorize_mov(bblock_t *block, vec4_instruction *inst,
|
||||
uint8_t imm[4], vec4_instruction *imm_inst[4],
|
||||
int inst_count, unsigned writemask)
|
||||
{
|
||||
if (inst_count < 2)
|
||||
return false;
|
||||
|
||||
unsigned vf;
|
||||
memcpy(&vf, imm, sizeof(vf));
|
||||
vec4_instruction *mov = MOV(imm_inst[0]->dst, brw_imm_vf(vf));
|
||||
mov->dst.type = BRW_REGISTER_TYPE_F;
|
||||
mov->dst.writemask = writemask;
|
||||
inst->insert_before(block, mov);
|
||||
|
||||
for (int i = 0; i < inst_count; i++) {
|
||||
imm_inst[i]->remove(block);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
vec4_visitor::opt_vector_float()
|
||||
{
|
||||
@@ -328,27 +350,38 @@ vec4_visitor::opt_vector_float()
|
||||
int last_reg = -1, last_reg_offset = -1;
|
||||
enum brw_reg_file last_reg_file = BAD_FILE;
|
||||
|
||||
int remaining_channels = 0;
|
||||
uint8_t imm[4];
|
||||
uint8_t imm[4] = { 0 };
|
||||
int inst_count = 0;
|
||||
vec4_instruction *imm_inst[4];
|
||||
unsigned writemask = 0;
|
||||
|
||||
foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
|
||||
if (last_reg != inst->dst.nr ||
|
||||
last_reg_offset != inst->dst.reg_offset ||
|
||||
last_reg_file != inst->dst.file) {
|
||||
progress |= vectorize_mov(block, inst, imm, imm_inst, inst_count,
|
||||
writemask);
|
||||
inst_count = 0;
|
||||
writemask = 0;
|
||||
last_reg = inst->dst.nr;
|
||||
last_reg_offset = inst->dst.reg_offset;
|
||||
last_reg_file = inst->dst.file;
|
||||
remaining_channels = WRITEMASK_XYZW;
|
||||
|
||||
inst_count = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
imm[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->opcode != BRW_OPCODE_MOV ||
|
||||
inst->dst.writemask == WRITEMASK_XYZW ||
|
||||
inst->src[0].file != IMM)
|
||||
inst->src[0].file != IMM ||
|
||||
inst->predicate != BRW_PREDICATE_NONE) {
|
||||
progress |= vectorize_mov(block, inst, imm, imm_inst, inst_count,
|
||||
writemask);
|
||||
inst_count = 0;
|
||||
last_reg = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
int vf = brw_float_to_vf(inst->src[0].f);
|
||||
if (vf == -1)
|
||||
@@ -363,23 +396,8 @@ vec4_visitor::opt_vector_float()
|
||||
if ((inst->dst.writemask & WRITEMASK_W) != 0)
|
||||
imm[3] = vf;
|
||||
|
||||
writemask |= inst->dst.writemask;
|
||||
imm_inst[inst_count++] = inst;
|
||||
|
||||
remaining_channels &= ~inst->dst.writemask;
|
||||
if (remaining_channels == 0) {
|
||||
unsigned vf;
|
||||
memcpy(&vf, imm, sizeof(vf));
|
||||
vec4_instruction *mov = MOV(inst->dst, brw_imm_vf(vf));
|
||||
mov->dst.type = BRW_REGISTER_TYPE_F;
|
||||
mov->dst.writemask = WRITEMASK_XYZW;
|
||||
inst->insert_after(block, mov);
|
||||
last_reg = -1;
|
||||
|
||||
for (int i = 0; i < inst_count; i++) {
|
||||
imm_inst[i]->remove(block);
|
||||
}
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (progress)
|
||||
|
@@ -369,6 +369,10 @@ protected:
|
||||
virtual void gs_end_primitive();
|
||||
|
||||
private:
|
||||
bool vectorize_mov(bblock_t *block, vec4_instruction *inst,
|
||||
uint8_t imm[4], vec4_instruction *imm_inst[4],
|
||||
int inst_count, unsigned writemask);
|
||||
|
||||
/**
|
||||
* If true, then register allocation should fail instead of spilling.
|
||||
*/
|
||||
|
Reference in New Issue
Block a user