nak/builder: Allow source modifiers in ineg64()

This lets us implement ineg64(x) as iadd64(0, -x, 0) and also reduces
the number of cases in iadd64() itself.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30402>
This commit is contained in:
Faith Ekstrand
2024-07-26 20:32:08 -05:00
committed by Marge Bot
parent 7f0b8a82d0
commit 8646ae7e23
2 changed files with 85 additions and 89 deletions

View File

@@ -372,52 +372,59 @@ pub trait SSABuilder: Builder {
}
fn iadd64(&mut self, x: Src, y: Src, z: Src) -> SSARef {
let x = x.as_ssa().unwrap();
let y = y.as_ssa().unwrap();
fn split_iadd64_src(src: Src) -> [Src; 2] {
match src.src_ref {
SrcRef::Zero => [0.into(), 0.into()],
SrcRef::SSA(ssa) => {
if src.src_mod.is_ineg() {
[Src::from(ssa[0]).ineg(), Src::from(ssa[1]).bnot()]
} else {
[Src::from(ssa[0]), Src::from(ssa[1])]
}
}
_ => panic!("Unsupported iadd64 source"),
}
}
let is_3src = !x.is_zero() && !y.is_zero() && !z.is_zero();
let x = split_iadd64_src(x);
let y = split_iadd64_src(y);
let dst = self.alloc_ssa(RegFile::GPR, 2);
if self.sm() >= 70 {
if let Some(z) = z.as_ssa() {
let carry = [
self.alloc_ssa(RegFile::Pred, 1),
self.alloc_ssa(RegFile::Pred, 1),
];
self.push_op(OpIAdd3 {
dst: dst[0].into(),
overflow: [carry[0].into(), carry[1].into()],
srcs: [x[0].into(), y[0].into(), z[0].into()],
});
self.push_op(OpIAdd3X {
dst: dst[1].into(),
overflow: [Dst::None, Dst::None],
srcs: [x[1].into(), y[1].into(), z[1].into()],
carry: [carry[0].into(), carry[1].into()],
});
let carry1 = self.alloc_ssa(RegFile::Pred, 1);
let (carry2_dst, carry2_src) = if is_3src {
let carry2 = self.alloc_ssa(RegFile::Pred, 1);
(carry2.into(), carry2.into())
} else {
assert!(z.is_zero());
let carry = self.alloc_ssa(RegFile::Pred, 1);
self.push_op(OpIAdd3 {
dst: dst[0].into(),
overflow: [carry.into(), Dst::None],
srcs: [x[0].into(), y[0].into(), 0.into()],
});
self.push_op(OpIAdd3X {
dst: dst[1].into(),
overflow: [Dst::None, Dst::None],
srcs: [x[1].into(), y[1].into(), 0.into()],
carry: [carry.into(), false.into()],
});
}
// If one of the sources is known to be zero, we only need one
// carry predicate.
(Dst::None, false.into())
};
let z = split_iadd64_src(z);
self.push_op(OpIAdd3 {
dst: dst[0].into(),
overflow: [carry1.into(), carry2_dst],
srcs: [x[0], y[0], z[0]],
});
self.push_op(OpIAdd3X {
dst: dst[1].into(),
overflow: [Dst::None, Dst::None],
srcs: [x[1], y[1], z[1]],
carry: [carry1.into(), carry2_src],
});
} else {
assert!(z.is_zero());
let carry = self.alloc_ssa(RegFile::Carry, 1);
self.push_op(OpIAdd2 {
dst: dst[0].into(),
srcs: [x[0].into(), y[0].into()],
srcs: [x[0], y[0]],
carry_out: carry.into(),
});
self.push_op(OpIAdd2X {
dst: dst[1].into(),
srcs: [x[1].into(), y[1].into()],
srcs: [x[1], y[1]],
carry_out: Dst::None,
carry_in: carry.into(),
});
@@ -499,36 +506,7 @@ pub trait SSABuilder: Builder {
}
fn ineg64(&mut self, x: Src) -> SSARef {
let x = x.as_ssa().unwrap();
let dst = self.alloc_ssa(RegFile::GPR, 2);
if self.sm() >= 70 {
let carry = self.alloc_ssa(RegFile::Pred, 1);
self.push_op(OpIAdd3 {
dst: dst[0].into(),
overflow: [carry.into(), Dst::None],
srcs: [0.into(), Src::from(x[0]).ineg(), 0.into()],
});
self.push_op(OpIAdd3X {
dst: dst[1].into(),
overflow: [Dst::None, Dst::None],
srcs: [0.into(), Src::from(x[1]).bnot(), 0.into()],
carry: [carry.into(), SrcRef::False.into()],
});
} else {
let carry = self.alloc_ssa(RegFile::Carry, 1);
self.push_op(OpIAdd2 {
dst: dst[0].into(),
srcs: [0.into(), Src::from(x[0]).ineg()],
carry_out: carry.into(),
});
self.push_op(OpIAdd2X {
dst: dst[1].into(),
srcs: [0.into(), Src::from(x[1]).bnot()],
carry_out: Dst::None,
carry_in: carry.into(),
});
}
dst
self.iadd64(0.into(), x.ineg(), 0.into())
}
fn isetp(

View File

@@ -707,36 +707,54 @@ fn test_iadd64() {
let run = RunSingleton::get();
let invocations = 100;
let mut b = TestShaderBuilder::new(run.sm.as_ref());
let cases = [
(SrcMod::None, SrcMod::None),
(SrcMod::INeg, SrcMod::None),
(SrcMod::None, SrcMod::INeg),
];
let x = SSARef::from([
b.ld_test_data(0, MemType::B32)[0],
b.ld_test_data(4, MemType::B32)[0],
]);
let y = SSARef::from([
b.ld_test_data(8, MemType::B32)[0],
b.ld_test_data(12, MemType::B32)[0],
]);
let dst = b.iadd64(x.into(), y.into(), 0.into());
b.st_test_data(16, MemType::B32, dst[0].into());
b.st_test_data(20, MemType::B32, dst[1].into());
for (x_mod, y_mod) in cases {
let mut b = TestShaderBuilder::new(run.sm.as_ref());
let bin = b.compile();
let mut x = Src::from([
b.ld_test_data(0, MemType::B32)[0],
b.ld_test_data(4, MemType::B32)[0],
]);
x.src_mod = x_mod;
let mut a = Acorn::new();
let mut data = Vec::new();
for _ in 0..invocations {
data.push([a.get_u32(), a.get_u32(), a.get_u32(), a.get_u32(), 0, 0]);
}
let mut y = Src::from([
b.ld_test_data(8, MemType::B32)[0],
b.ld_test_data(12, MemType::B32)[0],
]);
y.src_mod = y_mod;
run.run.run(&bin, &mut data).unwrap();
let dst = b.iadd64(x, y, 0.into());
b.st_test_data(16, MemType::B32, dst[0].into());
b.st_test_data(20, MemType::B32, dst[1].into());
for d in &data {
let x = u64::from(d[0]) | (u64::from(d[1]) << 32);
let y = u64::from(d[2]) | (u64::from(d[3]) << 32);
let dst = x.wrapping_add(y);
assert_eq!(d[4], dst as u32);
assert_eq!(d[5], (dst >> 32) as u32);
let bin = b.compile();
let mut a = Acorn::new();
let mut data = Vec::new();
for _ in 0..invocations {
data.push([a.get_u32(), a.get_u32(), a.get_u32(), a.get_u32(), 0, 0]);
}
run.run.run(&bin, &mut data).unwrap();
for d in &data {
let mut x = u64::from(d[0]) | (u64::from(d[1]) << 32);
let mut y = u64::from(d[2]) | (u64::from(d[3]) << 32);
if x_mod.is_ineg() {
x = -(x as i64) as u64;
}
if y_mod.is_ineg() {
y = -(y as i64) as u64;
}
let dst = x.wrapping_add(y);
assert_eq!(d[4], dst as u32);
assert_eq!(d[5], (dst >> 32) as u32);
}
}
}