From a43e6addca7934f1f3aa60c029f80d20dabb1a3a Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 10 Oct 2023 13:20:44 -0500 Subject: [PATCH] nak: Fix fneg to do fadd(-0, x) Thanks to floating point sillyness, fadd(0, x) isn't a no-op but fadd(-0, x) is. Part-of: --- src/nouveau/compiler/nak_from_nir.rs | 2 +- src/nouveau/compiler/nak_ir.rs | 18 +++++++++++++++++- src/nouveau/compiler/nak_opt_copy_prop.rs | 4 ++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index 8648ee7e79e..084905bf058 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -360,7 +360,7 @@ impl<'a> ShaderFromNir<'a> { let (x, y) = match alu.op { nir_op_fabs => (srcs[0].fabs(), Src::new_zero()), nir_op_fadd => (srcs[0], srcs[1]), - nir_op_fneg => (srcs[0].fneg(), Src::new_zero()), + nir_op_fneg => (Src::new_zero().fneg(), srcs[0].fneg()), _ => panic!("Unhandled case"), }; assert!(alu.def.bit_size() == 32); diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index 76f081d48a6..c7932bc4c5a 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -1088,7 +1088,23 @@ impl Src { pub fn is_zero(&self) -> bool { match self.src_ref { - SrcRef::Zero => true, + SrcRef::Zero | SrcRef::Imm32(0) => match self.src_mod { + SrcMod::None | SrcMod::FAbs | SrcMod::INeg => true, + SrcMod::FNeg | SrcMod::FNegAbs | SrcMod::BNot => false, + }, + _ => false, + } + } + + pub fn is_fneg_zero(&self, src_type: SrcType) -> bool { + match self.src_ref { + SrcRef::Zero | SrcRef::Imm32(0) => match self.src_mod { + SrcMod::FNeg | SrcMod::FNegAbs => true, + _ => false, + } + SrcRef::Imm32(0x80000000) => { + src_type == SrcType::F32 && self.src_mod.is_none() + } _ => false, } } diff --git a/src/nouveau/compiler/nak_opt_copy_prop.rs b/src/nouveau/compiler/nak_opt_copy_prop.rs index f0e7f17e54f..5b68914f879 100644 --- a/src/nouveau/compiler/nak_opt_copy_prop.rs +++ b/src/nouveau/compiler/nak_opt_copy_prop.rs @@ -313,9 +313,9 @@ impl CopyPropPass { let dst = dst[0]; if !add.saturate { - if add.srcs[0].is_zero() { + if add.srcs[0].is_fneg_zero(SrcType::F32) { self.add_copy(dst, SrcType::F32, add.srcs[1]); - } else if add.srcs[1].is_zero() { + } else if add.srcs[1].is_fneg_zero(SrcType::F32) { self.add_copy(dst, SrcType::F32, add.srcs[0]); } }