From 55901dc287e11a31f1ff72711a44e17ef725341b Mon Sep 17 00:00:00 2001
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Date: Sat, 20 Jul 2024 16:46:39 -0500
Subject: [PATCH] nak: Add 64-bit shift helpers

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30275>
---
 src/nouveau/compiler/nak/builder.rs  | 72 +++++++++++++++++++++
 src/nouveau/compiler/nak/from_nir.rs | 93 +++-------------------------
 2 files changed, 81 insertions(+), 84 deletions(-)

diff --git a/src/nouveau/compiler/nak/builder.rs b/src/nouveau/compiler/nak/builder.rs
index d0d3e9cdd7d..e4982b378ec 100644
--- a/src/nouveau/compiler/nak/builder.rs
+++ b/src/nouveau/compiler/nak/builder.rs
@@ -143,6 +143,42 @@ pub trait SSABuilder: Builder {
         dst
     }
 
+    fn shl64(&mut self, x: Src, shift: Src) -> SSARef {
+        let x = x.as_ssa().unwrap();
+
+        // For 64-bit shifts, we have to use clamp mode so we need
+        // to mask the shift in order satisfy NIR semantics.
+        debug_assert!(shift.src_mod.is_none());
+        let shift = if let SrcRef::Imm32(imm) = shift.src_ref {
+            (imm & 0x3f).into()
+        } else {
+            self.lop2(LogicOp2::And, shift, 0x3f.into()).into()
+        };
+
+        let dst = self.alloc_ssa(RegFile::GPR, 2);
+        self.push_op(OpShf {
+            dst: dst[0].into(),
+            low: 0.into(),
+            high: x[0].into(),
+            shift,
+            right: false,
+            wrap: false,
+            data_type: IntType::U32,
+            dst_high: true,
+        });
+        self.push_op(OpShf {
+            dst: dst[1].into(),
+            low: x[0].into(),
+            high: x[1].into(),
+            shift,
+            right: false,
+            wrap: false,
+            data_type: IntType::U64,
+            dst_high: true,
+        });
+        dst
+    }
+
     fn shr(&mut self, x: Src, shift: Src, signed: bool) -> SSARef {
         let dst = self.alloc_ssa(RegFile::GPR, 1);
         if self.sm() >= 70 {
@@ -168,6 +204,42 @@ pub trait SSABuilder: Builder {
         dst
     }
 
+    fn shr64(&mut self, x: Src, shift: Src, signed: bool) -> SSARef {
+        let x = x.as_ssa().unwrap();
+
+        // For 64-bit shifts, we have to use clamp mode so we need
+        // to mask the shift in order satisfy NIR semantics.
+        debug_assert!(shift.src_mod.is_none());
+        let shift = if let SrcRef::Imm32(imm) = shift.src_ref {
+            (imm & 0x3f).into()
+        } else {
+            self.lop2(LogicOp2::And, shift, 0x3f.into()).into()
+        };
+
+        let dst = self.alloc_ssa(RegFile::GPR, 2);
+        self.push_op(OpShf {
+            dst: dst[0].into(),
+            low: x[0].into(),
+            high: x[1].into(),
+            shift,
+            right: true,
+            wrap: false,
+            data_type: if signed { IntType::I64 } else { IntType::U64 },
+            dst_high: false,
+        });
+        self.push_op(OpShf {
+            dst: dst[1].into(),
+            low: x[0].into(),
+            high: x[1].into(),
+            shift,
+            right: true,
+            wrap: false,
+            data_type: if signed { IntType::I32 } else { IntType::U32 },
+            dst_high: true,
+        });
+        dst
+    }
+
     fn fadd(&mut self, x: Src, y: Src) -> SSARef {
         let dst = self.alloc_ssa(RegFile::GPR, 1);
         self.push_op(OpFAdd {
diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs
index 6acc49bdb4d..bb59a9369ad 100644
--- a/src/nouveau/compiler/nak/from_nir.rs
+++ b/src/nouveau/compiler/nak/from_nir.rs
@@ -1356,76 +1356,26 @@ impl<'a> ShaderFromNir<'a> {
             }
             nir_op_ior => b.lop2(LogicOp2::Or, srcs[0], srcs[1]),
             nir_op_ishl => {
-                let x = *srcs[0].as_ssa().unwrap();
-                let shift = srcs[1];
                 if alu.def.bit_size() == 64 {
-                    // For 64-bit shifts, we have to use clamp mode so we need
-                    // to mask the shift in order satisfy NIR semantics.
                     let shift = if let Some(s) = nir_srcs[1].comp_as_uint(0) {
-                        ((s & 0x3f) as u32).into()
+                        (s as u32).into()
                     } else {
-                        b.lop2(LogicOp2::And, shift, 0x3f.into()).into()
+                        srcs[1]
                     };
-                    let dst = b.alloc_ssa(RegFile::GPR, 2);
-                    b.push_op(OpShf {
-                        dst: dst[0].into(),
-                        low: 0.into(),
-                        high: x[0].into(),
-                        shift,
-                        right: false,
-                        wrap: false,
-                        data_type: IntType::U32,
-                        dst_high: true,
-                    });
-                    b.push_op(OpShf {
-                        dst: dst[1].into(),
-                        low: x[0].into(),
-                        high: x[1].into(),
-                        shift,
-                        right: false,
-                        wrap: false,
-                        data_type: IntType::U64,
-                        dst_high: true,
-                    });
-                    dst
+                    b.shl64(srcs[0], shift)
                 } else {
                     assert!(alu.def.bit_size() == 32);
                     b.shl(srcs[0], srcs[1])
                 }
             }
             nir_op_ishr => {
-                let x = *srcs[0].as_ssa().unwrap();
-                let shift = srcs[1];
                 if alu.def.bit_size() == 64 {
-                    // For 64-bit shifts, we have to use clamp mode so we need
-                    // to mask the shift in order satisfy NIR semantics.
                     let shift = if let Some(s) = nir_srcs[1].comp_as_uint(0) {
-                        ((s & 0x3f) as u32).into()
+                        (s as u32).into()
                     } else {
-                        b.lop2(LogicOp2::And, shift, 0x3f.into()).into()
+                        srcs[1]
                     };
-                    let dst = b.alloc_ssa(RegFile::GPR, 2);
-                    b.push_op(OpShf {
-                        dst: dst[0].into(),
-                        low: x[0].into(),
-                        high: x[1].into(),
-                        shift,
-                        right: true,
-                        wrap: false,
-                        data_type: IntType::I64,
-                        dst_high: false,
-                    });
-                    b.push_op(OpShf {
-                        dst: dst[1].into(),
-                        low: x[0].into(),
-                        high: x[1].into(),
-                        shift,
-                        right: true,
-                        wrap: false,
-                        data_type: IntType::I32,
-                        dst_high: true,
-                    });
-                    dst
+                    b.shr64(srcs[0], shift, true)
                 } else {
                     assert!(alu.def.bit_size() == 32);
                     b.shr(srcs[0], srcs[1], true)
@@ -1616,38 +1566,13 @@ impl<'a> ShaderFromNir<'a> {
                 dst
             }
             nir_op_ushr => {
-                let x = *srcs[0].as_ssa().unwrap();
-                let shift = srcs[1];
                 if alu.def.bit_size() == 64 {
-                    // For 64-bit shifts, we have to use clamp mode so we need
-                    // to mask the shift in order satisfy NIR semantics.
                     let shift = if let Some(s) = nir_srcs[1].comp_as_uint(0) {
-                        ((s & 0x3f) as u32).into()
+                        (s as u32).into()
                     } else {
-                        b.lop2(LogicOp2::And, shift, 0x3f.into()).into()
+                        srcs[1]
                     };
-                    let dst = b.alloc_ssa(RegFile::GPR, 2);
-                    b.push_op(OpShf {
-                        dst: dst[0].into(),
-                        low: x[0].into(),
-                        high: x[1].into(),
-                        shift,
-                        right: true,
-                        wrap: false,
-                        data_type: IntType::U64,
-                        dst_high: false,
-                    });
-                    b.push_op(OpShf {
-                        dst: dst[1].into(),
-                        low: x[0].into(),
-                        high: x[1].into(),
-                        shift,
-                        right: true,
-                        wrap: false,
-                        data_type: IntType::U32,
-                        dst_high: true,
-                    });
-                    dst
+                    b.shr64(srcs[0], shift, false)
                 } else {
                     assert!(alu.def.bit_size() == 32);
                     b.shr(srcs[0], srcs[1], false)