intel/fs: Combine constants for integer instructions too

v2: Remove type change for SHR with negation.  This was a leftover from
a previous attempt to deal with SHR and negation.  Now all right-shifts
with unsigned parameters are marked as not being able to have source
modifiers.

v3: Disallow negations on right shifts of unsigned sources by setting
the no_negations flag in add_candidate_immediate.  This eliminates the
need to exclude SHR in can_do_source_mods.

Tiger Lake
total instructions in shared programs: 21102817 -> 21099443 (-0.02%)
instructions in affected programs: 296796 -> 293422 (-1.14%)
helped: 92 / HURT: 356

total cycles in shared programs: 790564691 -> 790393358 (-0.02%)
cycles in affected programs: 36456886 -> 36285553 (-0.47%)
helped: 171 / HURT: 286

total spills in shared programs: 3951 -> 3959 (0.20%)
spills in affected programs: 176 -> 184 (4.55%)
helped: 0 / HURT: 2

total fills in shared programs: 2631 -> 2639 (0.30%)
fills in affected programs: 176 -> 184 (4.55%)
helped: 0 / HURT: 2

LOST:   0
GAINED: 4

Ice Lake
total instructions in shared programs: 19954204 -> 19949122 (-0.03%)
instructions in affected programs: 40301 -> 35219 (-12.61%)
helped: 23 / HURT: 2

total cycles in shared programs: 858377735 -> 858462082 (<.01%)
cycles in affected programs: 75537286 -> 75621633 (0.11%)
helped: 124 / HURT: 319

total spills in shared programs: 6255 -> 6190 (-1.04%)
spills in affected programs: 392 -> 327 (-16.58%)
helped: 1 / HURT: 2

total fills in shared programs: 7813 -> 7382 (-5.52%)
fills in affected programs: 942 -> 511 (-45.75%)
helped: 1 / HURT: 2

LOST:   0
GAINED: 3

Skylake
total instructions in shared programs: 18049362 -> 18044440 (-0.03%)
instructions in affected programs: 48317 -> 43395 (-10.19%)
helped: 26 / HURT: 2

total cycles in shared programs: 844884806 -> 844915655 (<.01%)
cycles in affected programs: 76137133 -> 76167982 (0.04%)
helped: 171 / HURT: 293

total spills in shared programs: 6148 -> 6149 (0.02%)
spills in affected programs: 595 -> 596 (0.17%)
helped: 4 / HURT: 2

total fills in shared programs: 7484 -> 7067 (-5.57%)
fills in affected programs: 1226 -> 809 (-34.01%)
helped: 4 / HURT: 2

LOST:   0
GAINED: 8

Broadwell
total instructions in shared programs: 17826844 -> 17821805 (-0.03%)
instructions in affected programs: 60687 -> 55648 (-8.30%)
helped: 28 / HURT: 8

total cycles in shared programs: 905332682 -> 904369499 (-0.11%)
cycles in affected programs: 76743509 -> 75780326 (-1.26%)
helped: 179 / HURT: 225

total spills in shared programs: 17922 -> 17908 (-0.08%)
spills in affected programs: 2495 -> 2481 (-0.56%)
helped: 6 / HURT: 8

total fills in shared programs: 26290 -> 25397 (-3.40%)
fills in affected programs: 2606 -> 1713 (-34.27%)
helped: 8 / HURT: 6

LOST:   1
GAINED: 1

Haswell
total instructions in shared programs: 16678878 -> 16674444 (-0.03%)
instructions in affected programs: 78458 -> 74024 (-5.65%)
helped: 87 / HURT: 6

total cycles in shared programs: 880189381 -> 880301043 (0.01%)
cycles in affected programs: 29956463 -> 30068125 (0.37%)
helped: 169 / HURT: 163

total spills in shared programs: 14428 -> 14378 (-0.35%)
spills in affected programs: 2384 -> 2334 (-2.10%)
helped: 8 / HURT: 6

total fills in shared programs: 16975 -> 16881 (-0.55%)
fills in affected programs: 1334 -> 1240 (-7.05%)
helped: 10 / HURT: 4

Ivy Bridge
total instructions in shared programs: 15706048 -> 15706035 (<.01%)
instructions in affected programs: 9941 -> 9928 (-0.13%)
helped: 13 / HURT: 0

total cycles in shared programs: 433618834 -> 433624637 (<.01%)
cycles in affected programs: 12926714 -> 12932517 (0.04%)
helped: 52 / HURT: 41

Sandy Bridge
total cycles in shared programs: 741223552 -> 741223443 (<.01%)
cycles in affected programs: 19814 -> 19705 (-0.55%)
helped: 14 / HURT: 0

No changes on Iron Lake or GM45

fossil-db changes:

Tiger Lake
Instructions in all programs: 156858030 -> 156905532 (+0.0%)
Instructions helped: 3915
Instructions hurt: 15411

Cycles in all programs: 7529667771 -> 7532117340 (+0.0%)
Cycles helped: 10260
Cycles hurt: 9990

Spills in all programs: 5610 -> 5457 (-2.7%)
Spills helped: 18

Fills in all programs: 6274 -> 6091 (-2.9%)
Fills helped: 18

Gained: 2
Lost: 16

Ice Lake
Instructions in all programs: 141308082 -> 141303083 (-0.0%)
Instructions helped: 574
Instructions hurt: 172

Cycles in all programs: 9091361325 -> 9094622766 (+0.0%)
Cycles helped: 8764
Cycles hurt: 11702

Spills in all programs: 7531 -> 7385 (-1.9%)
Spills helped: 19

Fills in all programs: 8462 -> 8294 (-2.0%)
Fills helped: 19

Gained: 22
Lost: 15

Skylake
Instructions in all programs: 131872162 -> 131867263 (-0.0%)
Instructions helped: 566
Instructions hurt: 172

Cycles in all programs: 8795095440 -> 8799676943 (+0.1%)
Cycles helped: 8333
Cycles hurt: 12182

Spills in all programs: 7006 -> 6884 (-1.7%)
Spills helped: 13

Fills in all programs: 7696 -> 7552 (-1.9%)
Fills helped: 13

Gained: 24
Lost: 1

Tested-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7698>
This commit is contained in:
Ian Romanick
2020-11-12 14:50:23 -08:00
committed by Marge Bot
parent 64c251bb3a
commit c506d7e511
2 changed files with 60 additions and 23 deletions

View File

@@ -1111,7 +1111,7 @@ add_candidate_immediate(struct table *table, fs_inst *inst, unsigned ip,
bool must_promote,
bool allow_one_constant,
bblock_t *block,
ASSERTED const struct intel_device_info *devinfo,
const struct intel_device_info *devinfo,
void *const_ctx)
{
struct value *v = new_value(table, const_ctx);
@@ -1119,15 +1119,20 @@ add_candidate_immediate(struct table *table, fs_inst *inst, unsigned ip,
unsigned box_idx = box_instruction(table, const_ctx, inst, ip, block,
must_promote);
/* Just for now... */
assert(inst->can_do_source_mods(devinfo));
v->value.u64 = inst->src[i].d64;
v->bit_size = 8 * type_sz(inst->src[i].type);
v->instr_index = box_idx;
v->src = i;
v->allow_one_constant = allow_one_constant;
v->no_negations = false;
/* Right-shift instructions are special. They can have source modifiers,
* but changing the type can change the semantic of the instruction. Only
* allow negations on a right shift if the source type is already signed.
*/
v->no_negations = !inst->can_do_source_mods(devinfo) ||
((inst->opcode == BRW_OPCODE_SHR ||
inst->opcode == BRW_OPCODE_ASR) &&
brw_reg_type_is_unsigned_integer(inst->src[i].type));
switch (inst->src[i].type) {
case BRW_REGISTER_TYPE_DF:
@@ -1200,8 +1205,15 @@ fs_visitor::opt_combine_constants()
ip++;
switch (inst->opcode) {
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
case SHADER_OPCODE_POW:
assert(inst->src[0].file != IMM);
if (inst->src[0].file == IMM) {
assert(inst->opcode != SHADER_OPCODE_POW);
add_candidate_immediate(&table, inst, ip, 0, true, false, block,
devinfo, const_ctx);
}
if (inst->src[1].file == IMM && devinfo->ver < 8) {
add_candidate_immediate(&table, inst, ip, 1, true, false, block,
@@ -1226,6 +1238,8 @@ fs_visitor::opt_combine_constants()
break;
}
case BRW_OPCODE_BFE:
case BRW_OPCODE_BFI2:
case BRW_OPCODE_LRP:
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file != IMM)
@@ -1260,6 +1274,18 @@ fs_visitor::opt_combine_constants()
}
break;
case BRW_OPCODE_ASR:
case BRW_OPCODE_BFI1:
case BRW_OPCODE_ROL:
case BRW_OPCODE_ROR:
case BRW_OPCODE_SHL:
case BRW_OPCODE_SHR:
if (inst->src[0].file == IMM) {
add_candidate_immediate(&table, inst, ip, 0, true, false, block,
devinfo, const_ctx);
}
break;
case BRW_OPCODE_MOV:
if (could_coissue(devinfo, inst) && inst->src[0].file == IMM) {
add_candidate_immediate(&table, inst, ip, 0, false, false, block,
@@ -1464,6 +1490,10 @@ fs_visitor::opt_combine_constants()
unreachable("Bad type size");
}
}
} else if ((link->inst->opcode == BRW_OPCODE_SHL ||
link->inst->opcode == BRW_OPCODE_ASR) &&
link->negate) {
reg->type = brw_int_type(type_sz(reg->type), true);
}
#ifdef DEBUG
@@ -1508,6 +1538,8 @@ fs_visitor::opt_combine_constants()
}
#endif
assert(link->inst->can_do_source_mods(devinfo) || !link->negate);
reg->file = VGRF;
reg->offset = table.imm[i].subreg_offset;
reg->stride = 0;

View File

@@ -896,12 +896,6 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
progress = true;
break;
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
/* FINISHME: Promote non-float constants and remove this. */
if (devinfo->ver < 8)
break;
FALLTHROUGH;
case SHADER_OPCODE_POW:
/* Allow constant propagation into src1 (except on Gen 6 which
* doesn't support scalar source math), and let constant combining
@@ -909,23 +903,15 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
*/
if (devinfo->ver == 6)
break;
FALLTHROUGH;
case BRW_OPCODE_BFI1:
case BRW_OPCODE_ASR:
case BRW_OPCODE_SHR:
case BRW_OPCODE_SUBB:
if (i == 1) {
inst->src[i] = val;
progress = true;
}
break;
case BRW_OPCODE_SHL:
/* Only constant propagate into src0 if src1 is also constant. In that
* specific case, constant folding will eliminate the instruction.
*/
if ((i == 0 && inst->src[1].file == IMM) ||
i == 1) {
case BRW_OPCODE_SUBB:
if (i == 1) {
inst->src[i] = val;
progress = true;
}
@@ -1075,7 +1061,26 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
}
break;
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
/* Allow constant propagation into either source (except on Gen 6
* which doesn't support scalar source math). Constant combining
* promote the src1 constant on Gen < 8, and it will promote the src0
* constant on all platforms.
*/
if (devinfo->ver == 6)
break;
FALLTHROUGH;
case BRW_OPCODE_AND:
case BRW_OPCODE_ASR:
case BRW_OPCODE_BFE:
case BRW_OPCODE_BFI1:
case BRW_OPCODE_BFI2:
case BRW_OPCODE_ROL:
case BRW_OPCODE_ROR:
case BRW_OPCODE_SHL:
case BRW_OPCODE_SHR:
case BRW_OPCODE_OR:
case SHADER_OPCODE_TEX_LOGICAL:
case SHADER_OPCODE_TXD_LOGICAL: