From 90d267b2d147cb7acef711da5f17a75c014f23eb Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 3 Feb 2022 15:48:28 -0800 Subject: [PATCH] intel/fs: Fix bounds checking for integer multiplication lowering The previous bounds checking would cause mul(8) g121<1>D g120<8,8,1>D 0xec4dD to be lowered to mul(8) g121<1>D g120<8,8,1>D 0xec4dUW mul(8) g41<1>D g120<8,8,1>D 0x0000UW add(8) g121.1<2>UW g121.1<16,8,2>UW g41<16,8,2>UW Instead of picking the bounds (and the new type) based on the old type, pick the new type based on the value only. This helps a few fossil-db shaders in Witcher 3 and Geekbench5. No changes on any other Intel platforms. Tiger Lake Instructions in all programs: 157581069 -> 157580768 (-0.0%) Instructions helped: 24 Cycles in all programs: 7566979620 -> 7566977172 (-0.0%) Cycles helped: 22 Cycles hurt: 4 Ice Lake Instructions in all programs: 141998965 -> 141998667 (-0.0%) Instructions helped: 26 Cycles in all programs: 9162568666 -> 9162565297 (-0.0%) Cycles helped: 24 Cycles hurt: 2 Skylake No changes. Reviewed-by: Caio Oliveira Part-of: --- src/intel/compiler/brw_fs.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index a979e594439..11e5d052ec2 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -3938,10 +3938,12 @@ fs_visitor::lower_mul_dword_inst(fs_inst *inst, bblock_t *block) { const fs_builder ibld(this, block, inst); - const bool ud = (inst->src[1].type == BRW_REGISTER_TYPE_UD); + /* It is correct to use inst->src[1].d in both end of the comparison. + * Using .ud in the UINT16_MAX comparison would cause any negative value to + * fail the check. + */ if (inst->src[1].file == IMM && - (( ud && inst->src[1].ud <= UINT16_MAX) || - (!ud && inst->src[1].d <= INT16_MAX && inst->src[1].d >= INT16_MIN))) { + (inst->src[1].d >= INT16_MIN && inst->src[1].d <= UINT16_MAX)) { /* The MUL instruction isn't commutative. On Gen <= 6, only the low * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of * src1 are used. @@ -3949,6 +3951,7 @@ fs_visitor::lower_mul_dword_inst(fs_inst *inst, bblock_t *block) * If multiplying by an immediate value that fits in 16-bits, do a * single MUL instruction with that value in the proper location. */ + const bool ud = (inst->src[1].d >= 0); if (devinfo->ver < 7) { fs_reg imm(VGRF, alloc.allocate(dispatch_width / 8), inst->dst.type); ibld.MOV(imm, inst->src[1]);