intel/fs: Combine constants for SEL instructions too

It is very common to have bcsel where the second and third sources are
both constants.  This results in a situation where we would want to emit
a SEL with two constant sources, but that's not allowed.

Previously, we would load both constants into registers, then let
constant propagation copy the last constant into the SEL instruction.
This results in the constant using an entire SIMD register instead of a
single channel.

Instead, copy propagate both sources, then let the combine-constants
pass do its thing.  In the worst case, this stores the constant in a
single channel of the SIMD register.  In the best case, it reuses a
value that was loaded into a register to satisfy another instruction.

shader-db results:

Tiger Lake, Ice Lake, and Skylake had similar results. (Ice Lake shown)
total instructions in shared programs: 19951549 -> 19948709 (-0.01%)
instructions in affected programs: 482795 -> 479955 (-0.59%)
helped: 1184 / HURT: 3

total cycles in shared programs: 858584724 -> 858205341 (-0.04%)
cycles in affected programs: 356168375 -> 355788992 (-0.11%)
helped: 1448 / HURT: 1195

total spills in shared programs: 6569 -> 6255 (-4.78%)
spills in affected programs: 912 -> 598 (-34.43%)
helped: 58 / HURT: 0

total fills in shared programs: 8218 -> 7813 (-4.93%)
fills in affected programs: 1570 -> 1165 (-25.80%)
helped: 58 / HURT: 0

LOST:   6
GAINED: 16

Broadwell
total instructions in shared programs: 17819660 -> 17819389 (<.01%)
instructions in affected programs: 1078129 -> 1077858 (-0.03%)
helped: 1067 / HURT: 304

total cycles in shared programs: 904722624 -> 905035016 (0.03%)
cycles in affected programs: 362583117 -> 362895509 (0.09%)
helped: 1381 / HURT: 1123

total spills in shared programs: 17884 -> 17922 (0.21%)
spills in affected programs: 5088 -> 5126 (0.75%)
helped: 55 / HURT: 152

total fills in shared programs: 25533 -> 26290 (2.96%)
fills in affected programs: 12992 -> 13749 (5.83%)
helped: 61 /HURT: 295

LOST:   7
GAINED: 24

Haswell
total instructions in shared programs: 16678080 -> 16673976 (-0.02%)
instructions in affected programs: 1162893 -> 1158789 (-0.35%)
helped: 1584 / HURT: 7

total cycles in shared programs: 880180082 -> 879932525 (-0.03%)
cycles in affected programs: 364067522 -> 363819965 (-0.07%)
helped: 1226 / HURT: 976

total spills in shared programs: 14937 -> 14428 (-3.41%)
spills in affected programs: 7866 -> 7357 (-6.47%)
helped: 351 / HURT: 5

total fills in shared programs: 17572 -> 16975 (-3.40%)
fills in affected programs: 11028 -> 10431 (-5.41%)
helped: 350 / HURT: 3

LOST:   8
GAINED: 16

Ivy Bridge
total instructions in shared programs: 15704044 -> 15703158 (<.01%)
instructions in affected programs: 304513 -> 303627 (-0.29%)
helped: 707 / HURT: 0

total cycles in shared programs: 433560149 -> 433471118 (-0.02%)
cycles in affected programs: 19299650 -> 19210619 (-0.46%)
helped: 687 / HURT: 395

LOST:   2
GAINED: 9

Sandy Bridge
total instructions in shared programs: 13913386 -> 13912884 (<.01%)
instructions in affected programs: 195687 -> 195185 (-0.26%)
helped: 455 / HURT: 0

total cycles in shared programs: 741156272 -> 741136266 (<.01%)
cycles in affected programs: 10934349 -> 10914343 (-0.18%)
helped: 578 / HURT: 289

LOST:   9
GAINED: 4

Iron Lake and GM45 had similar results. (Iron Lake shown)
total instructions in shared programs: 8364056 -> 8364042 (<.01%)
instructions in affected programs: 5178 -> 5164 (-0.27%)
helped: 10 / HURT: 0

total cycles in shared programs: 248759794 -> 248757940 (<.01%)
cycles in affected programs: 4305246 -> 4303392 (-0.04%)
helped: 183 / HURT: 24

fossil-db results:

Tiger Lake
Instructions in all programs: 156943594 -> 156802601 (-0.1%)
Instructions helped: 20595
Instructions hurt: 23248

Cycles in all programs: 7512086950 -> 7528386387 (+0.2%)
Cycles helped: 29531
Cycles hurt: 27837

Spills in all programs: 13500 -> 5643 (-58.2%)
Spills helped: 394
Spills hurt: 22

Fills in all programs: 18943 -> 6306 (-66.7%)
Fills helped: 394
Fills hurt: 11

Gained: 93
Lost: 76

Ice Lake
Instructions in all programs: 141395899 -> 141249621 (-0.1%)
Instructions helped: 30067
Instructions hurt: 3

Cycles in all programs: 9097127057 -> 9089668235 (-0.1%)
Cycles helped: 32268
Cycles hurt: 24315

Spills in all programs: 13695 -> 7564 (-44.8%)
Spills helped: 403

Fills in all programs: 18400 -> 8494 (-53.8%)
Fills helped: 403

Gained: 114
Lost: 137

Skylake
Instructions in all programs: 131948328 -> 131826063 (-0.1%)
Instructions helped: 29968
Instructions hurt: 3

Cycles in all programs: 8794778440 -> 8793934844 (-0.0%)
Cycles helped: 32705
Cycles hurt: 23575

Spills in all programs: 10526 -> 7039 (-33.1%)
Spills helped: 403

Fills in all programs: 11025 -> 7728 (-29.9%)
Fills helped: 403

Gained: 102
Lost: 250

Tested-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7698>
This commit is contained in:
Ian Romanick
2020-08-04 16:39:06 -07:00
committed by Marge Bot
parent 44d62a5224
commit 64c251bb3a
2 changed files with 140 additions and 24 deletions

View File

@@ -803,19 +803,21 @@ struct fs_inst_box {
struct reg_link {
DECLARE_RALLOC_CXX_OPERATORS(reg_link)
reg_link(fs_inst *inst, unsigned src, bool negate)
: inst(inst), src(src), negate(negate) {}
reg_link(fs_inst *inst, unsigned src, bool negate, enum interpreted_type type)
: inst(inst), src(src), negate(negate), type(type) {}
struct exec_node link;
fs_inst *inst;
uint8_t src;
bool negate;
enum interpreted_type type;
};
static struct exec_node *
link(void *mem_ctx, fs_inst *inst, unsigned src, bool negate)
link(void *mem_ctx, fs_inst *inst, unsigned src, bool negate,
enum interpreted_type type)
{
reg_link *l = new(mem_ctx) reg_link(inst, src, negate);
reg_link *l = new(mem_ctx) reg_link(inst, src, negate, type);
return &l->link;
}
@@ -1107,6 +1109,7 @@ static void
add_candidate_immediate(struct table *table, fs_inst *inst, unsigned ip,
unsigned i,
bool must_promote,
bool allow_one_constant,
bblock_t *block,
ASSERTED const struct intel_device_info *devinfo,
void *const_ctx)
@@ -1123,7 +1126,7 @@ add_candidate_immediate(struct table *table, fs_inst *inst, unsigned ip,
v->bit_size = 8 * type_sz(inst->src[i].type);
v->instr_index = box_idx;
v->src = i;
v->allow_one_constant = false;
v->allow_one_constant = allow_one_constant;
v->no_negations = false;
switch (inst->src[i].type) {
@@ -1151,6 +1154,18 @@ add_candidate_immediate(struct table *table, fs_inst *inst, unsigned ip,
default:
unreachable("not reached");
}
/* It is safe to change the type of the operands of a select instruction
* that has no conditional modifier, no source modifiers, and no saturate
* modifer.
*/
if (inst->opcode == BRW_OPCODE_SEL &&
inst->conditional_mod == BRW_CONDITIONAL_NONE &&
!inst->src[0].negate && !inst->src[0].abs &&
!inst->src[1].negate && !inst->src[1].abs &&
!inst->saturate) {
v->type = either_type;
}
}
bool
@@ -1189,7 +1204,7 @@ fs_visitor::opt_combine_constants()
assert(inst->src[0].file != IMM);
if (inst->src[1].file == IMM && devinfo->ver < 8) {
add_candidate_immediate(&table, inst, ip, 1, true, block,
add_candidate_immediate(&table, inst, ip, 1, true, false, block,
devinfo, const_ctx);
}
@@ -1204,7 +1219,7 @@ fs_visitor::opt_combine_constants()
if (can_promote_src_as_imm(devinfo, inst, i))
continue;
add_candidate_immediate(&table, inst, ip, i, true, block,
add_candidate_immediate(&table, inst, ip, i, true, false, block,
devinfo, const_ctx);
}
@@ -1216,15 +1231,38 @@ fs_visitor::opt_combine_constants()
if (inst->src[i].file != IMM)
continue;
add_candidate_immediate(&table, inst, ip, i, true, block,
add_candidate_immediate(&table, inst, ip, i, true, false, block,
devinfo, const_ctx);
}
break;
case BRW_OPCODE_SEL:
if (inst->src[0].file == IMM) {
/* It is possible to have src0 be immediate but src1 not be
* immediate for the non-commutative conditional modifiers (e.g.,
* G).
*/
if (inst->conditional_mod == BRW_CONDITIONAL_NONE ||
/* Only GE and L are commutative. */
inst->conditional_mod == BRW_CONDITIONAL_GE ||
inst->conditional_mod == BRW_CONDITIONAL_L) {
assert(inst->src[1].file == IMM);
add_candidate_immediate(&table, inst, ip, 0, true, true, block,
devinfo, const_ctx);
add_candidate_immediate(&table, inst, ip, 1, true, true, block,
devinfo, const_ctx);
} else {
add_candidate_immediate(&table, inst, ip, 0, true, false, block,
devinfo, const_ctx);
}
}
break;
case BRW_OPCODE_MOV:
if (could_coissue(devinfo, inst) && inst->src[0].file == IMM) {
add_candidate_immediate(&table, inst, ip, 0, false, block,
add_candidate_immediate(&table, inst, ip, 0, false, false, block,
devinfo, const_ctx);
}
break;
@@ -1235,7 +1273,7 @@ fs_visitor::opt_combine_constants()
assert(inst->src[0].file != IMM);
if (could_coissue(devinfo, inst) && inst->src[1].file == IMM) {
add_candidate_immediate(&table, inst, ip, 1, false, block,
add_candidate_immediate(&table, inst, ip, 1, false, false, block,
devinfo, const_ctx);
}
break;
@@ -1284,7 +1322,8 @@ fs_visitor::opt_combine_constants()
const unsigned src = table.values[result->user_map[j].index].src;
imm->uses->push_tail(link(const_ctx, ib->inst, src,
result->user_map[j].negate));
result->user_map[j].negate,
result->user_map[j].type));
if (ib->must_promote)
imm->must_promote = true;
@@ -1402,6 +1441,31 @@ fs_visitor::opt_combine_constants()
for (int i = 0; i < table.len; i++) {
foreach_list_typed(reg_link, link, link, table.imm[i].uses) {
fs_reg *reg = &link->inst->src[link->src];
if (link->inst->opcode == BRW_OPCODE_SEL) {
if (link->type == either_type) {
/* Do not change the register type. */
} else if (link->type == integer_only) {
reg->type = brw_int_type(type_sz(reg->type), true);
} else {
assert(link->type == float_only);
switch (type_sz(reg->type)) {
case 2:
reg->type = BRW_REGISTER_TYPE_HF;
break;
case 4:
reg->type = BRW_REGISTER_TYPE_F;
break;
case 8:
reg->type = BRW_REGISTER_TYPE_DF;
break;
default:
unreachable("Bad type size");
}
}
}
#ifdef DEBUG
switch (reg->type) {
case BRW_REGISTER_TYPE_DF:
@@ -1452,6 +1516,53 @@ fs_visitor::opt_combine_constants()
}
}
/* Fixup any SEL instructions that have src0 still as an immediate. Fixup
* the types of any SEL instruction that have a negation on one of the
* sources. Adding the negation may have changed the type of that source,
* so the other source (and destination) must be changed to match.
*/
for (unsigned i = 0; i < table.num_boxes; i++) {
fs_inst *inst = table.boxes[i].inst;
if (inst->opcode != BRW_OPCODE_SEL)
continue;
/* If both sources have negation, the types had better be the same! */
assert(!inst->src[0].negate || !inst->src[1].negate ||
inst->src[0].type == inst->src[1].type);
/* If either source has a negation, force the type of the other source
* and the type of the result to be the same.
*/
if (inst->src[0].negate) {
inst->src[1].type = inst->src[0].type;
inst->dst.type = inst->src[0].type;
}
if (inst->src[1].negate) {
inst->src[0].type = inst->src[1].type;
inst->dst.type = inst->src[1].type;
}
if (inst->src[0].file != IMM)
continue;
assert(inst->src[1].file != IMM);
assert(inst->conditional_mod == BRW_CONDITIONAL_NONE ||
inst->conditional_mod == BRW_CONDITIONAL_GE ||
inst->conditional_mod == BRW_CONDITIONAL_L);
fs_reg temp = inst->src[0];
inst->src[0] = inst->src[1];
inst->src[1] = temp;
/* If this was predicated, flipping operands means we also need to flip
* the predicate.
*/
if (inst->conditional_mod == BRW_CONDITIONAL_NONE)
inst->predicate_inverse = !inst->predicate_inverse;
}
if (debug) {
for (int i = 0; i < table.len; i++) {
struct imm *imm = &table.imm[i];

View File

@@ -1040,21 +1040,26 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
if (i == 1) {
inst->src[i] = val;
progress = true;
} else if (i == 0 && inst->src[1].file != IMM &&
(inst->conditional_mod == BRW_CONDITIONAL_NONE ||
/* Only GE and L are commutative. */
inst->conditional_mod == BRW_CONDITIONAL_GE ||
inst->conditional_mod == BRW_CONDITIONAL_L)) {
inst->src[0] = inst->src[1];
inst->src[1] = val;
} else if (i == 0) {
if (inst->src[1].file != IMM &&
(inst->conditional_mod == BRW_CONDITIONAL_NONE ||
/* Only GE and L are commutative. */
inst->conditional_mod == BRW_CONDITIONAL_GE ||
inst->conditional_mod == BRW_CONDITIONAL_L)) {
inst->src[0] = inst->src[1];
inst->src[1] = val;
/* If this was predicated, flipping operands means
* we also need to flip the predicate.
*/
if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
inst->predicate_inverse =
!inst->predicate_inverse;
/* If this was predicated, flipping operands means
* we also need to flip the predicate.
*/
if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
inst->predicate_inverse =
!inst->predicate_inverse;
}
} else {
inst->src[0] = val;
}
progress = true;
}
break;