intel/fs: Combine constants for integer instructions too
v2: Remove type change for SHR with negation. This was a leftover from a previous attempt to deal with SHR and negation. Now all right-shifts with unsigned parameters are marked as not being able to have source modifiers. v3: Disallow negations on right shifts of unsigned sources by setting the no_negations flag in add_candidate_immediate. This eliminates the need to exclude SHR in can_do_source_mods. Tiger Lake total instructions in shared programs: 21102817 -> 21099443 (-0.02%) instructions in affected programs: 296796 -> 293422 (-1.14%) helped: 92 / HURT: 356 total cycles in shared programs: 790564691 -> 790393358 (-0.02%) cycles in affected programs: 36456886 -> 36285553 (-0.47%) helped: 171 / HURT: 286 total spills in shared programs: 3951 -> 3959 (0.20%) spills in affected programs: 176 -> 184 (4.55%) helped: 0 / HURT: 2 total fills in shared programs: 2631 -> 2639 (0.30%) fills in affected programs: 176 -> 184 (4.55%) helped: 0 / HURT: 2 LOST: 0 GAINED: 4 Ice Lake total instructions in shared programs: 19954204 -> 19949122 (-0.03%) instructions in affected programs: 40301 -> 35219 (-12.61%) helped: 23 / HURT: 2 total cycles in shared programs: 858377735 -> 858462082 (<.01%) cycles in affected programs: 75537286 -> 75621633 (0.11%) helped: 124 / HURT: 319 total spills in shared programs: 6255 -> 6190 (-1.04%) spills in affected programs: 392 -> 327 (-16.58%) helped: 1 / HURT: 2 total fills in shared programs: 7813 -> 7382 (-5.52%) fills in affected programs: 942 -> 511 (-45.75%) helped: 1 / HURT: 2 LOST: 0 GAINED: 3 Skylake total instructions in shared programs: 18049362 -> 18044440 (-0.03%) instructions in affected programs: 48317 -> 43395 (-10.19%) helped: 26 / HURT: 2 total cycles in shared programs: 844884806 -> 844915655 (<.01%) cycles in affected programs: 76137133 -> 76167982 (0.04%) helped: 171 / HURT: 293 total spills in shared programs: 6148 -> 6149 (0.02%) spills in affected programs: 595 -> 596 (0.17%) helped: 4 / HURT: 2 total fills in shared programs: 7484 -> 7067 (-5.57%) fills in affected programs: 1226 -> 809 (-34.01%) helped: 4 / HURT: 2 LOST: 0 GAINED: 8 Broadwell total instructions in shared programs: 17826844 -> 17821805 (-0.03%) instructions in affected programs: 60687 -> 55648 (-8.30%) helped: 28 / HURT: 8 total cycles in shared programs: 905332682 -> 904369499 (-0.11%) cycles in affected programs: 76743509 -> 75780326 (-1.26%) helped: 179 / HURT: 225 total spills in shared programs: 17922 -> 17908 (-0.08%) spills in affected programs: 2495 -> 2481 (-0.56%) helped: 6 / HURT: 8 total fills in shared programs: 26290 -> 25397 (-3.40%) fills in affected programs: 2606 -> 1713 (-34.27%) helped: 8 / HURT: 6 LOST: 1 GAINED: 1 Haswell total instructions in shared programs: 16678878 -> 16674444 (-0.03%) instructions in affected programs: 78458 -> 74024 (-5.65%) helped: 87 / HURT: 6 total cycles in shared programs: 880189381 -> 880301043 (0.01%) cycles in affected programs: 29956463 -> 30068125 (0.37%) helped: 169 / HURT: 163 total spills in shared programs: 14428 -> 14378 (-0.35%) spills in affected programs: 2384 -> 2334 (-2.10%) helped: 8 / HURT: 6 total fills in shared programs: 16975 -> 16881 (-0.55%) fills in affected programs: 1334 -> 1240 (-7.05%) helped: 10 / HURT: 4 Ivy Bridge total instructions in shared programs: 15706048 -> 15706035 (<.01%) instructions in affected programs: 9941 -> 9928 (-0.13%) helped: 13 / HURT: 0 total cycles in shared programs: 433618834 -> 433624637 (<.01%) cycles in affected programs: 12926714 -> 12932517 (0.04%) helped: 52 / HURT: 41 Sandy Bridge total cycles in shared programs: 741223552 -> 741223443 (<.01%) cycles in affected programs: 19814 -> 19705 (-0.55%) helped: 14 / HURT: 0 No changes on Iron Lake or GM45 fossil-db changes: Tiger Lake Instructions in all programs: 156858030 -> 156905532 (+0.0%) Instructions helped: 3915 Instructions hurt: 15411 Cycles in all programs: 7529667771 -> 7532117340 (+0.0%) Cycles helped: 10260 Cycles hurt: 9990 Spills in all programs: 5610 -> 5457 (-2.7%) Spills helped: 18 Fills in all programs: 6274 -> 6091 (-2.9%) Fills helped: 18 Gained: 2 Lost: 16 Ice Lake Instructions in all programs: 141308082 -> 141303083 (-0.0%) Instructions helped: 574 Instructions hurt: 172 Cycles in all programs: 9091361325 -> 9094622766 (+0.0%) Cycles helped: 8764 Cycles hurt: 11702 Spills in all programs: 7531 -> 7385 (-1.9%) Spills helped: 19 Fills in all programs: 8462 -> 8294 (-2.0%) Fills helped: 19 Gained: 22 Lost: 15 Skylake Instructions in all programs: 131872162 -> 131867263 (-0.0%) Instructions helped: 566 Instructions hurt: 172 Cycles in all programs: 8795095440 -> 8799676943 (+0.1%) Cycles helped: 8333 Cycles hurt: 12182 Spills in all programs: 7006 -> 6884 (-1.7%) Spills helped: 13 Fills in all programs: 7696 -> 7552 (-1.9%) Fills helped: 13 Gained: 24 Lost: 1 Tested-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7698>
This commit is contained in:
@@ -1111,7 +1111,7 @@ add_candidate_immediate(struct table *table, fs_inst *inst, unsigned ip,
|
||||
bool must_promote,
|
||||
bool allow_one_constant,
|
||||
bblock_t *block,
|
||||
ASSERTED const struct intel_device_info *devinfo,
|
||||
const struct intel_device_info *devinfo,
|
||||
void *const_ctx)
|
||||
{
|
||||
struct value *v = new_value(table, const_ctx);
|
||||
@@ -1119,15 +1119,20 @@ add_candidate_immediate(struct table *table, fs_inst *inst, unsigned ip,
|
||||
unsigned box_idx = box_instruction(table, const_ctx, inst, ip, block,
|
||||
must_promote);
|
||||
|
||||
/* Just for now... */
|
||||
assert(inst->can_do_source_mods(devinfo));
|
||||
|
||||
v->value.u64 = inst->src[i].d64;
|
||||
v->bit_size = 8 * type_sz(inst->src[i].type);
|
||||
v->instr_index = box_idx;
|
||||
v->src = i;
|
||||
v->allow_one_constant = allow_one_constant;
|
||||
v->no_negations = false;
|
||||
|
||||
/* Right-shift instructions are special. They can have source modifiers,
|
||||
* but changing the type can change the semantic of the instruction. Only
|
||||
* allow negations on a right shift if the source type is already signed.
|
||||
*/
|
||||
v->no_negations = !inst->can_do_source_mods(devinfo) ||
|
||||
((inst->opcode == BRW_OPCODE_SHR ||
|
||||
inst->opcode == BRW_OPCODE_ASR) &&
|
||||
brw_reg_type_is_unsigned_integer(inst->src[i].type));
|
||||
|
||||
switch (inst->src[i].type) {
|
||||
case BRW_REGISTER_TYPE_DF:
|
||||
@@ -1200,8 +1205,15 @@ fs_visitor::opt_combine_constants()
|
||||
ip++;
|
||||
|
||||
switch (inst->opcode) {
|
||||
case SHADER_OPCODE_INT_QUOTIENT:
|
||||
case SHADER_OPCODE_INT_REMAINDER:
|
||||
case SHADER_OPCODE_POW:
|
||||
assert(inst->src[0].file != IMM);
|
||||
if (inst->src[0].file == IMM) {
|
||||
assert(inst->opcode != SHADER_OPCODE_POW);
|
||||
|
||||
add_candidate_immediate(&table, inst, ip, 0, true, false, block,
|
||||
devinfo, const_ctx);
|
||||
}
|
||||
|
||||
if (inst->src[1].file == IMM && devinfo->ver < 8) {
|
||||
add_candidate_immediate(&table, inst, ip, 1, true, false, block,
|
||||
@@ -1226,6 +1238,8 @@ fs_visitor::opt_combine_constants()
|
||||
break;
|
||||
}
|
||||
|
||||
case BRW_OPCODE_BFE:
|
||||
case BRW_OPCODE_BFI2:
|
||||
case BRW_OPCODE_LRP:
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file != IMM)
|
||||
@@ -1260,6 +1274,18 @@ fs_visitor::opt_combine_constants()
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ASR:
|
||||
case BRW_OPCODE_BFI1:
|
||||
case BRW_OPCODE_ROL:
|
||||
case BRW_OPCODE_ROR:
|
||||
case BRW_OPCODE_SHL:
|
||||
case BRW_OPCODE_SHR:
|
||||
if (inst->src[0].file == IMM) {
|
||||
add_candidate_immediate(&table, inst, ip, 0, true, false, block,
|
||||
devinfo, const_ctx);
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MOV:
|
||||
if (could_coissue(devinfo, inst) && inst->src[0].file == IMM) {
|
||||
add_candidate_immediate(&table, inst, ip, 0, false, false, block,
|
||||
@@ -1464,6 +1490,10 @@ fs_visitor::opt_combine_constants()
|
||||
unreachable("Bad type size");
|
||||
}
|
||||
}
|
||||
} else if ((link->inst->opcode == BRW_OPCODE_SHL ||
|
||||
link->inst->opcode == BRW_OPCODE_ASR) &&
|
||||
link->negate) {
|
||||
reg->type = brw_int_type(type_sz(reg->type), true);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
@@ -1508,6 +1538,8 @@ fs_visitor::opt_combine_constants()
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(link->inst->can_do_source_mods(devinfo) || !link->negate);
|
||||
|
||||
reg->file = VGRF;
|
||||
reg->offset = table.imm[i].subreg_offset;
|
||||
reg->stride = 0;
|
||||
|
@@ -896,12 +896,6 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
||||
progress = true;
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_INT_QUOTIENT:
|
||||
case SHADER_OPCODE_INT_REMAINDER:
|
||||
/* FINISHME: Promote non-float constants and remove this. */
|
||||
if (devinfo->ver < 8)
|
||||
break;
|
||||
FALLTHROUGH;
|
||||
case SHADER_OPCODE_POW:
|
||||
/* Allow constant propagation into src1 (except on Gen 6 which
|
||||
* doesn't support scalar source math), and let constant combining
|
||||
@@ -909,23 +903,15 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
||||
*/
|
||||
if (devinfo->ver == 6)
|
||||
break;
|
||||
FALLTHROUGH;
|
||||
case BRW_OPCODE_BFI1:
|
||||
case BRW_OPCODE_ASR:
|
||||
case BRW_OPCODE_SHR:
|
||||
case BRW_OPCODE_SUBB:
|
||||
|
||||
if (i == 1) {
|
||||
inst->src[i] = val;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_SHL:
|
||||
/* Only constant propagate into src0 if src1 is also constant. In that
|
||||
* specific case, constant folding will eliminate the instruction.
|
||||
*/
|
||||
if ((i == 0 && inst->src[1].file == IMM) ||
|
||||
i == 1) {
|
||||
case BRW_OPCODE_SUBB:
|
||||
if (i == 1) {
|
||||
inst->src[i] = val;
|
||||
progress = true;
|
||||
}
|
||||
@@ -1075,7 +1061,26 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
||||
}
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_INT_QUOTIENT:
|
||||
case SHADER_OPCODE_INT_REMAINDER:
|
||||
/* Allow constant propagation into either source (except on Gen 6
|
||||
* which doesn't support scalar source math). Constant combining
|
||||
* promote the src1 constant on Gen < 8, and it will promote the src0
|
||||
* constant on all platforms.
|
||||
*/
|
||||
if (devinfo->ver == 6)
|
||||
break;
|
||||
|
||||
FALLTHROUGH;
|
||||
case BRW_OPCODE_AND:
|
||||
case BRW_OPCODE_ASR:
|
||||
case BRW_OPCODE_BFE:
|
||||
case BRW_OPCODE_BFI1:
|
||||
case BRW_OPCODE_BFI2:
|
||||
case BRW_OPCODE_ROL:
|
||||
case BRW_OPCODE_ROR:
|
||||
case BRW_OPCODE_SHL:
|
||||
case BRW_OPCODE_SHR:
|
||||
case BRW_OPCODE_OR:
|
||||
case SHADER_OPCODE_TEX_LOGICAL:
|
||||
case SHADER_OPCODE_TXD_LOGICAL:
|
||||
|
Reference in New Issue
Block a user