intel/fs: Don't loop in try_constant_propagate
The caller already loops over the sources. This means that the caller must loop over the sources in reverse because constant propagation prefers to propagate into the last sources first. The shader-db and fossil-db changes (below) are all due to SEL instructions. Changing the order sources are visited changes whether a SEL with two immediate sources is (+f0.0) sel g12 IMM_A IMM_B or (-f0.0) sel g12 IMM_B IMM_A The ordering of the sources affects the order the constant combining encounters the values, and the determines which value is "combined" and which value remains an immediate. This affects the results by luck. If there are two instructions: (+f0.0) sel g12 IMM_A IMM_B (+f0.0) sel g13 IMM_A IMM_C Picking IMM_A is advantageous over picking IMM_B and IMM_C. Since the selection algorithm in constant combining is greedy, this case requires the algorithm see the values in just the right order for the right thing to happen. v2: Rebase on many, many changes. Move instruction source fixup reordering out or try_constant_propagate. v3: Rebase on !7698. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25091>
This commit is contained in:
@@ -818,7 +818,7 @@ try_copy_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||
|
||||
static bool
|
||||
try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||
acp_entry *entry)
|
||||
acp_entry *entry, int arg)
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
bool progress = false;
|
||||
@@ -826,27 +826,26 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||
if (type_sz(entry->src.type) > 4)
|
||||
return false;
|
||||
|
||||
for (int i = inst->sources - 1; i >= 0; i--) {
|
||||
if (inst->src[i].file != VGRF)
|
||||
continue;
|
||||
if (inst->src[arg].file != VGRF)
|
||||
return false;
|
||||
|
||||
assert(entry->dst.file == VGRF);
|
||||
if (inst->src[i].nr != entry->dst.nr)
|
||||
continue;
|
||||
if (inst->src[arg].nr != entry->dst.nr)
|
||||
return false;
|
||||
|
||||
/* Bail if inst is reading a range that isn't contained in the range
|
||||
* that entry is writing.
|
||||
*/
|
||||
if (!region_contained_in(inst->src[i], inst->size_read(i),
|
||||
if (!region_contained_in(inst->src[arg], inst->size_read(arg),
|
||||
entry->dst, entry->size_written))
|
||||
continue;
|
||||
return false;
|
||||
|
||||
/* If the size of the use type is larger than the size of the entry
|
||||
* type, the entry doesn't contain all of the data that the user is
|
||||
* trying to use.
|
||||
*/
|
||||
if (type_sz(inst->src[i].type) > type_sz(entry->dst.type))
|
||||
continue;
|
||||
if (type_sz(inst->src[arg].type) > type_sz(entry->dst.type))
|
||||
return false;
|
||||
|
||||
fs_reg val = entry->src;
|
||||
|
||||
@@ -859,34 +858,34 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||
* ...
|
||||
* mul(8) g47<1>D g86<8,8,1>D g12<16,8,2>W
|
||||
*/
|
||||
if (type_sz(inst->src[i].type) < type_sz(entry->dst.type)) {
|
||||
if (type_sz(inst->src[i].type) != 2 || type_sz(entry->dst.type) != 4)
|
||||
continue;
|
||||
if (type_sz(inst->src[arg].type) < type_sz(entry->dst.type)) {
|
||||
if (type_sz(inst->src[arg].type) != 2 || type_sz(entry->dst.type) != 4)
|
||||
return false;
|
||||
|
||||
assert(inst->src[i].subnr == 0 || inst->src[i].subnr == 2);
|
||||
assert(inst->src[arg].subnr == 0 || inst->src[arg].subnr == 2);
|
||||
|
||||
/* When subnr is 0, we want the lower 16-bits, and when it's 2, we
|
||||
* want the upper 16-bits. No other values of subnr are valid for a
|
||||
* UD source.
|
||||
*/
|
||||
const uint16_t v = inst->src[i].subnr == 2 ? val.ud >> 16 : val.ud;
|
||||
const uint16_t v = inst->src[arg].subnr == 2 ? val.ud >> 16 : val.ud;
|
||||
|
||||
val.ud = v | (uint32_t(v) << 16);
|
||||
}
|
||||
|
||||
val.type = inst->src[i].type;
|
||||
val.type = inst->src[arg].type;
|
||||
|
||||
if (inst->src[i].abs) {
|
||||
if (inst->src[arg].abs) {
|
||||
if ((devinfo->ver >= 8 && is_logic_op(inst->opcode)) ||
|
||||
!brw_abs_immediate(val.type, &val.as_brw_reg())) {
|
||||
continue;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->src[i].negate) {
|
||||
if (inst->src[arg].negate) {
|
||||
if ((devinfo->ver >= 8 && is_logic_op(inst->opcode)) ||
|
||||
!brw_negate_immediate(val.type, &val.as_brw_reg())) {
|
||||
continue;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -894,7 +893,7 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||
case BRW_OPCODE_MOV:
|
||||
case SHADER_OPCODE_LOAD_PAYLOAD:
|
||||
case FS_OPCODE_PACK:
|
||||
inst->src[i] = val;
|
||||
inst->src[arg] = val;
|
||||
progress = true;
|
||||
break;
|
||||
|
||||
@@ -906,15 +905,15 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||
if (devinfo->ver == 6)
|
||||
break;
|
||||
|
||||
if (i == 1) {
|
||||
inst->src[i] = val;
|
||||
if (arg == 1) {
|
||||
inst->src[arg] = val;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_SUBB:
|
||||
if (i == 1) {
|
||||
inst->src[i] = val;
|
||||
if (arg == 1) {
|
||||
inst->src[arg] = val;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
@@ -925,10 +924,10 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||
case BRW_OPCODE_ADD:
|
||||
case BRW_OPCODE_XOR:
|
||||
case BRW_OPCODE_ADDC:
|
||||
if (i == 1) {
|
||||
inst->src[i] = val;
|
||||
if (arg == 1) {
|
||||
inst->src[arg] = val;
|
||||
progress = true;
|
||||
} else if (i == 0 && inst->src[1].file != IMM) {
|
||||
} else if (arg == 0 && inst->src[1].file != IMM) {
|
||||
/* Don't copy propagate the constant in situations like
|
||||
*
|
||||
* mov(8) g8<1>D 0x7fffffffD
|
||||
@@ -992,11 +991,11 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||
else
|
||||
break;
|
||||
|
||||
if (i == 2) {
|
||||
inst->src[i] = val;
|
||||
if (arg == 2) {
|
||||
inst->src[arg] = val;
|
||||
progress = true;
|
||||
} else if (inst->src[2].file != IMM) {
|
||||
inst->src[i] = inst->src[2];
|
||||
inst->src[arg] = inst->src[2];
|
||||
inst->src[2] = val;
|
||||
progress = true;
|
||||
}
|
||||
@@ -1005,10 +1004,10 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||
|
||||
case BRW_OPCODE_CMP:
|
||||
case BRW_OPCODE_IF:
|
||||
if (i == 1) {
|
||||
inst->src[i] = val;
|
||||
if (arg == 1) {
|
||||
inst->src[arg] = val;
|
||||
progress = true;
|
||||
} else if (i == 0 && inst->src[1].file != IMM) {
|
||||
} else if (arg == 0 && inst->src[1].file != IMM) {
|
||||
enum brw_conditional_mod new_cmod;
|
||||
|
||||
new_cmod = brw_swap_cmod(inst->conditional_mod);
|
||||
@@ -1025,10 +1024,10 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_SEL:
|
||||
if (i == 1) {
|
||||
inst->src[i] = val;
|
||||
if (arg == 1) {
|
||||
inst->src[arg] = val;
|
||||
progress = true;
|
||||
} else if (i == 0) {
|
||||
} else if (arg == 0) {
|
||||
if (inst->src[1].file != IMM &&
|
||||
(inst->conditional_mod == BRW_CONDITIONAL_NONE ||
|
||||
/* Only GE and L are commutative. */
|
||||
@@ -1056,9 +1055,9 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||
/* The stencil and omask sources of FS_OPCODE_FB_WRITE_LOGICAL are
|
||||
* bit-cast using a strided region so they cannot be immediates.
|
||||
*/
|
||||
if (i != FB_WRITE_LOGICAL_SRC_SRC_STENCIL &&
|
||||
i != FB_WRITE_LOGICAL_SRC_OMASK) {
|
||||
inst->src[i] = val;
|
||||
if (arg != FB_WRITE_LOGICAL_SRC_SRC_STENCIL &&
|
||||
arg != FB_WRITE_LOGICAL_SRC_OMASK) {
|
||||
inst->src[arg] = val;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
@@ -1114,34 +1113,13 @@ try_constant_propagate(const brw_compiler *compiler, fs_inst *inst,
|
||||
case BRW_OPCODE_LRP:
|
||||
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
|
||||
case SHADER_OPCODE_SHUFFLE:
|
||||
inst->src[i] = val;
|
||||
inst->src[arg] = val;
|
||||
progress = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* ADD3 can only have the immediate as src0. */
|
||||
if (progress && inst->opcode == BRW_OPCODE_ADD3) {
|
||||
if (inst->src[2].file == IMM) {
|
||||
const auto src0 = inst->src[0];
|
||||
inst->src[0] = inst->src[2];
|
||||
inst->src[2] = src0;
|
||||
}
|
||||
}
|
||||
|
||||
/* If only one of the sources of a 2-source, commutative instruction (e.g.,
|
||||
* AND) is immediate, it must be src1. If both are immediate, opt_algebraic
|
||||
* should fold it away.
|
||||
*/
|
||||
if (progress && inst->sources == 2 && inst->is_commutative() &&
|
||||
inst->src[0].file == IMM && inst->src[1].file != IMM) {
|
||||
const auto src1 = inst->src[1];
|
||||
inst->src[1] = inst->src[0];
|
||||
inst->src[0] = src1;
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
@@ -1178,25 +1156,50 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block,
|
||||
|
||||
foreach_inst_in_block(fs_inst, inst, block) {
|
||||
/* Try propagating into this instruction. */
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
bool instruction_progress = false;
|
||||
for (int i = inst->sources - 1; i >= 0; i--) {
|
||||
if (inst->src[i].file != VGRF)
|
||||
continue;
|
||||
|
||||
foreach_in_list(acp_entry, entry, &acp[inst->src[i].nr % ACP_HASH_SIZE]) {
|
||||
if (entry->src.file == IMM) {
|
||||
if (try_constant_propagate(compiler, inst, entry)) {
|
||||
progress = true;
|
||||
if (try_constant_propagate(compiler, inst, entry, i)) {
|
||||
instruction_progress = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (try_copy_propagate(compiler, inst, entry, i, alloc)) {
|
||||
progress = true;
|
||||
instruction_progress = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (instruction_progress) {
|
||||
progress = true;
|
||||
|
||||
/* ADD3 can only have the immediate as src0. */
|
||||
if (inst->opcode == BRW_OPCODE_ADD3) {
|
||||
if (inst->src[2].file == IMM) {
|
||||
const auto src0 = inst->src[0];
|
||||
inst->src[0] = inst->src[2];
|
||||
inst->src[2] = src0;
|
||||
}
|
||||
}
|
||||
|
||||
/* If only one of the sources of a 2-source, commutative instruction (e.g.,
|
||||
* AND) is immediate, it must be src1. If both are immediate, opt_algebraic
|
||||
* should fold it away.
|
||||
*/
|
||||
if (inst->sources == 2 && inst->is_commutative() &&
|
||||
inst->src[0].file == IMM && inst->src[1].file != IMM) {
|
||||
const auto src1 = inst->src[1];
|
||||
inst->src[1] = inst->src[0];
|
||||
inst->src[0] = src1;
|
||||
}
|
||||
}
|
||||
|
||||
/* kill the destination from the ACP */
|
||||
if (inst->dst.file == VGRF || inst->dst.file == FIXED_GRF) {
|
||||
foreach_in_list_safe(acp_entry, entry, &acp[inst->dst.nr % ACP_HASH_SIZE]) {
|
||||
|
Reference in New Issue
Block a user