diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index a5c7e9346fd..6e3429f72e1 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2549,6 +2549,9 @@ fs_visitor::opt_algebraic() assert(!inst->src[0].negate); const brw::fs_builder ibld(this, block, inst); + if (!inst->is_partial_write()) + ibld.emit_undef_for_dst(inst); + ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 1), subscript(inst->src[0], BRW_REGISTER_TYPE_F, 1)); ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_F, 0), @@ -2567,6 +2570,9 @@ fs_visitor::opt_algebraic() assert(!inst->src[0].negate); const brw::fs_builder ibld(this, block, inst); + if (!inst->is_partial_write()) + ibld.emit_undef_for_dst(inst); + ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1), subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 1)); ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0), @@ -2697,6 +2703,9 @@ fs_visitor::opt_algebraic() assert(!inst->src[1].abs && !inst->src[1].negate); const brw::fs_builder ibld(this, block, inst); + if (!inst->is_partial_write()) + ibld.emit_undef_for_dst(inst); + set_predicate(inst->predicate, ibld.SEL(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0), subscript(inst->src[0], BRW_REGISTER_TYPE_UD, 0), @@ -4107,6 +4116,7 @@ fs_visitor::lower_mul_qword_inst(fs_inst *inst, bblock_t *block) subscript(inst->src[1], BRW_REGISTER_TYPE_UD, 0)); ibld.MOV(bd_low, acc); + ibld.UNDEF(bd); ibld.MOV(subscript(bd, BRW_REGISTER_TYPE_UD, 0), bd_low); ibld.MOV(subscript(bd, BRW_REGISTER_TYPE_UD, 1), bd_high); } @@ -4123,6 +4133,8 @@ fs_visitor::lower_mul_qword_inst(fs_inst *inst, bblock_t *block) if (devinfo->has_64bit_int) { ibld.MOV(inst->dst, bd); } else { + if (!inst->is_partial_write()) + ibld.emit_undef_for_dst(inst); ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 0), subscript(bd, BRW_REGISTER_TYPE_UD, 0)); ibld.MOV(subscript(inst->dst, BRW_REGISTER_TYPE_UD, 1), @@ -5564,6 +5576,10 @@ fs_visitor::lower_find_live_channel() */ fs_reg exec_mask(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)); + const fs_builder ibld(this, block, inst); + if (!inst->is_partial_write()) + ibld.emit_undef_for_dst(inst); + const fs_builder ubld = bld.at(block, inst).exec_all().group(1, 0); /* ce0 doesn't consider the thread dispatch mask (DMask or VMask), diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 7b4cdd726a2..d2d8d5e7ef0 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -565,6 +565,17 @@ namespace brw { } } + instruction * + emit_undef_for_dst(const instruction *old_inst) const + { + assert(old_inst->dst.file == VGRF); + instruction *inst = emit(SHADER_OPCODE_UNDEF, + retype(old_inst->dst, BRW_REGISTER_TYPE_UD)); + inst->size_written = old_inst->size_written; + + return inst; + } + /** * Assorted arithmetic ops. * @{ @@ -785,7 +796,7 @@ namespace brw { assert(dst.offset % REG_SIZE == 0); instruction *inst = emit(SHADER_OPCODE_UNDEF, retype(dst, BRW_REGISTER_TYPE_UD)); - inst->size_written = shader->alloc.sizes[dst.nr] * REG_SIZE; + inst->size_written = shader->alloc.sizes[dst.nr] * REG_SIZE - dst.offset; return inst; } diff --git a/src/intel/compiler/brw_fs_lower_pack.cpp b/src/intel/compiler/brw_fs_lower_pack.cpp index ac7b61de6f0..0b0f9417513 100644 --- a/src/intel/compiler/brw_fs_lower_pack.cpp +++ b/src/intel/compiler/brw_fs_lower_pack.cpp @@ -41,6 +41,13 @@ fs_visitor::lower_pack() fs_reg dst = inst->dst; const fs_builder ibld(this, block, inst); + /* The lowering generates 2 instructions for what was previously 1. This + * can trick the IR to believe we're doing partial writes, but the + * register is actually fully written. Mark it as undef to help the IR + * reduce the liveness of the register. + */ + if (!inst->is_partial_write()) + ibld.emit_undef_for_dst(inst); for (unsigned i = 0; i < inst->sources; i++) ibld.MOV(subscript(dst, inst->src[i].type, i), inst->src[i]);