diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 9819b5dcbca..3f223c92d5c 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -766,15 +766,39 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) entry->dst, entry->size_written)) continue; - /* If the type sizes don't match each channel of the instruction is - * either extracting a portion of the constant (which could be handled - * with some effort but the code below doesn't) or reading multiple - * channels of the source at once. + /* If the size of the use type is larger than the size of the entry + * type, the entry doesn't contain all of the data that the user is + * trying to use. */ - if (type_sz(inst->src[i].type) != type_sz(entry->dst.type)) + if (type_sz(inst->src[i].type) > type_sz(entry->dst.type)) continue; fs_reg val = entry->src; + + /* If the size of the use type is smaller than the size of the entry, + * clamp the value to the range of the use type. This enables constant + * copy propagation in cases like + * + * + * mov(8) g12<1>UD 0x0000000cUD + * ... + * mul(8) g47<1>D g86<8,8,1>D g12<16,8,2>W + */ + if (type_sz(inst->src[i].type) < type_sz(entry->dst.type)) { + if (type_sz(inst->src[i].type) != 2 || type_sz(entry->dst.type) != 4) + continue; + + assert(inst->src[i].subnr == 0 || inst->src[i].subnr == 2); + + /* When subnr is 0, we want the lower 16-bits, and when it's 2, we + * want the upper 16-bits. No other values of subnr are valid for a + * UD source. + */ + const uint16_t v = inst->src[i].subnr == 2 ? val.ud >> 16 : val.ud; + + val.ud = v | (uint32_t(v) << 16); + } + val.type = inst->src[i].type; if (inst->src[i].abs) {