nak/builder: Allow source modifiers in ineg64()
This lets us implement ineg64(x) as iadd64(0, -x, 0) and also reduces the number of cases in iadd64() itself. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30402>
This commit is contained in:

committed by
Marge Bot

parent
7f0b8a82d0
commit
8646ae7e23
@@ -372,52 +372,59 @@ pub trait SSABuilder: Builder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn iadd64(&mut self, x: Src, y: Src, z: Src) -> SSARef {
|
fn iadd64(&mut self, x: Src, y: Src, z: Src) -> SSARef {
|
||||||
let x = x.as_ssa().unwrap();
|
fn split_iadd64_src(src: Src) -> [Src; 2] {
|
||||||
let y = y.as_ssa().unwrap();
|
match src.src_ref {
|
||||||
|
SrcRef::Zero => [0.into(), 0.into()],
|
||||||
|
SrcRef::SSA(ssa) => {
|
||||||
|
if src.src_mod.is_ineg() {
|
||||||
|
[Src::from(ssa[0]).ineg(), Src::from(ssa[1]).bnot()]
|
||||||
|
} else {
|
||||||
|
[Src::from(ssa[0]), Src::from(ssa[1])]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => panic!("Unsupported iadd64 source"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let is_3src = !x.is_zero() && !y.is_zero() && !z.is_zero();
|
||||||
|
|
||||||
|
let x = split_iadd64_src(x);
|
||||||
|
let y = split_iadd64_src(y);
|
||||||
let dst = self.alloc_ssa(RegFile::GPR, 2);
|
let dst = self.alloc_ssa(RegFile::GPR, 2);
|
||||||
if self.sm() >= 70 {
|
if self.sm() >= 70 {
|
||||||
if let Some(z) = z.as_ssa() {
|
let carry1 = self.alloc_ssa(RegFile::Pred, 1);
|
||||||
let carry = [
|
let (carry2_dst, carry2_src) = if is_3src {
|
||||||
self.alloc_ssa(RegFile::Pred, 1),
|
let carry2 = self.alloc_ssa(RegFile::Pred, 1);
|
||||||
self.alloc_ssa(RegFile::Pred, 1),
|
(carry2.into(), carry2.into())
|
||||||
];
|
|
||||||
self.push_op(OpIAdd3 {
|
|
||||||
dst: dst[0].into(),
|
|
||||||
overflow: [carry[0].into(), carry[1].into()],
|
|
||||||
srcs: [x[0].into(), y[0].into(), z[0].into()],
|
|
||||||
});
|
|
||||||
self.push_op(OpIAdd3X {
|
|
||||||
dst: dst[1].into(),
|
|
||||||
overflow: [Dst::None, Dst::None],
|
|
||||||
srcs: [x[1].into(), y[1].into(), z[1].into()],
|
|
||||||
carry: [carry[0].into(), carry[1].into()],
|
|
||||||
});
|
|
||||||
} else {
|
} else {
|
||||||
assert!(z.is_zero());
|
// If one of the sources is known to be zero, we only need one
|
||||||
let carry = self.alloc_ssa(RegFile::Pred, 1);
|
// carry predicate.
|
||||||
|
(Dst::None, false.into())
|
||||||
|
};
|
||||||
|
|
||||||
|
let z = split_iadd64_src(z);
|
||||||
self.push_op(OpIAdd3 {
|
self.push_op(OpIAdd3 {
|
||||||
dst: dst[0].into(),
|
dst: dst[0].into(),
|
||||||
overflow: [carry.into(), Dst::None],
|
overflow: [carry1.into(), carry2_dst],
|
||||||
srcs: [x[0].into(), y[0].into(), 0.into()],
|
srcs: [x[0], y[0], z[0]],
|
||||||
});
|
});
|
||||||
self.push_op(OpIAdd3X {
|
self.push_op(OpIAdd3X {
|
||||||
dst: dst[1].into(),
|
dst: dst[1].into(),
|
||||||
overflow: [Dst::None, Dst::None],
|
overflow: [Dst::None, Dst::None],
|
||||||
srcs: [x[1].into(), y[1].into(), 0.into()],
|
srcs: [x[1], y[1], z[1]],
|
||||||
carry: [carry.into(), false.into()],
|
carry: [carry1.into(), carry2_src],
|
||||||
});
|
});
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
assert!(z.is_zero());
|
assert!(z.is_zero());
|
||||||
let carry = self.alloc_ssa(RegFile::Carry, 1);
|
let carry = self.alloc_ssa(RegFile::Carry, 1);
|
||||||
self.push_op(OpIAdd2 {
|
self.push_op(OpIAdd2 {
|
||||||
dst: dst[0].into(),
|
dst: dst[0].into(),
|
||||||
srcs: [x[0].into(), y[0].into()],
|
srcs: [x[0], y[0]],
|
||||||
carry_out: carry.into(),
|
carry_out: carry.into(),
|
||||||
});
|
});
|
||||||
self.push_op(OpIAdd2X {
|
self.push_op(OpIAdd2X {
|
||||||
dst: dst[1].into(),
|
dst: dst[1].into(),
|
||||||
srcs: [x[1].into(), y[1].into()],
|
srcs: [x[1], y[1]],
|
||||||
carry_out: Dst::None,
|
carry_out: Dst::None,
|
||||||
carry_in: carry.into(),
|
carry_in: carry.into(),
|
||||||
});
|
});
|
||||||
@@ -499,36 +506,7 @@ pub trait SSABuilder: Builder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn ineg64(&mut self, x: Src) -> SSARef {
|
fn ineg64(&mut self, x: Src) -> SSARef {
|
||||||
let x = x.as_ssa().unwrap();
|
self.iadd64(0.into(), x.ineg(), 0.into())
|
||||||
let dst = self.alloc_ssa(RegFile::GPR, 2);
|
|
||||||
if self.sm() >= 70 {
|
|
||||||
let carry = self.alloc_ssa(RegFile::Pred, 1);
|
|
||||||
self.push_op(OpIAdd3 {
|
|
||||||
dst: dst[0].into(),
|
|
||||||
overflow: [carry.into(), Dst::None],
|
|
||||||
srcs: [0.into(), Src::from(x[0]).ineg(), 0.into()],
|
|
||||||
});
|
|
||||||
self.push_op(OpIAdd3X {
|
|
||||||
dst: dst[1].into(),
|
|
||||||
overflow: [Dst::None, Dst::None],
|
|
||||||
srcs: [0.into(), Src::from(x[1]).bnot(), 0.into()],
|
|
||||||
carry: [carry.into(), SrcRef::False.into()],
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
let carry = self.alloc_ssa(RegFile::Carry, 1);
|
|
||||||
self.push_op(OpIAdd2 {
|
|
||||||
dst: dst[0].into(),
|
|
||||||
srcs: [0.into(), Src::from(x[0]).ineg()],
|
|
||||||
carry_out: carry.into(),
|
|
||||||
});
|
|
||||||
self.push_op(OpIAdd2X {
|
|
||||||
dst: dst[1].into(),
|
|
||||||
srcs: [0.into(), Src::from(x[1]).bnot()],
|
|
||||||
carry_out: Dst::None,
|
|
||||||
carry_in: carry.into(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
dst
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn isetp(
|
fn isetp(
|
||||||
|
@@ -707,17 +707,28 @@ fn test_iadd64() {
|
|||||||
let run = RunSingleton::get();
|
let run = RunSingleton::get();
|
||||||
let invocations = 100;
|
let invocations = 100;
|
||||||
|
|
||||||
|
let cases = [
|
||||||
|
(SrcMod::None, SrcMod::None),
|
||||||
|
(SrcMod::INeg, SrcMod::None),
|
||||||
|
(SrcMod::None, SrcMod::INeg),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (x_mod, y_mod) in cases {
|
||||||
let mut b = TestShaderBuilder::new(run.sm.as_ref());
|
let mut b = TestShaderBuilder::new(run.sm.as_ref());
|
||||||
|
|
||||||
let x = SSARef::from([
|
let mut x = Src::from([
|
||||||
b.ld_test_data(0, MemType::B32)[0],
|
b.ld_test_data(0, MemType::B32)[0],
|
||||||
b.ld_test_data(4, MemType::B32)[0],
|
b.ld_test_data(4, MemType::B32)[0],
|
||||||
]);
|
]);
|
||||||
let y = SSARef::from([
|
x.src_mod = x_mod;
|
||||||
|
|
||||||
|
let mut y = Src::from([
|
||||||
b.ld_test_data(8, MemType::B32)[0],
|
b.ld_test_data(8, MemType::B32)[0],
|
||||||
b.ld_test_data(12, MemType::B32)[0],
|
b.ld_test_data(12, MemType::B32)[0],
|
||||||
]);
|
]);
|
||||||
let dst = b.iadd64(x.into(), y.into(), 0.into());
|
y.src_mod = y_mod;
|
||||||
|
|
||||||
|
let dst = b.iadd64(x, y, 0.into());
|
||||||
b.st_test_data(16, MemType::B32, dst[0].into());
|
b.st_test_data(16, MemType::B32, dst[0].into());
|
||||||
b.st_test_data(20, MemType::B32, dst[1].into());
|
b.st_test_data(20, MemType::B32, dst[1].into());
|
||||||
|
|
||||||
@@ -732,13 +743,20 @@ fn test_iadd64() {
|
|||||||
run.run.run(&bin, &mut data).unwrap();
|
run.run.run(&bin, &mut data).unwrap();
|
||||||
|
|
||||||
for d in &data {
|
for d in &data {
|
||||||
let x = u64::from(d[0]) | (u64::from(d[1]) << 32);
|
let mut x = u64::from(d[0]) | (u64::from(d[1]) << 32);
|
||||||
let y = u64::from(d[2]) | (u64::from(d[3]) << 32);
|
let mut y = u64::from(d[2]) | (u64::from(d[3]) << 32);
|
||||||
|
if x_mod.is_ineg() {
|
||||||
|
x = -(x as i64) as u64;
|
||||||
|
}
|
||||||
|
if y_mod.is_ineg() {
|
||||||
|
y = -(y as i64) as u64;
|
||||||
|
}
|
||||||
let dst = x.wrapping_add(y);
|
let dst = x.wrapping_add(y);
|
||||||
assert_eq!(d[4], dst as u32);
|
assert_eq!(d[4], dst as u32);
|
||||||
assert_eq!(d[5], (dst >> 32) as u32);
|
assert_eq!(d[5], (dst >> 32) as u32);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_ineg64() {
|
fn test_ineg64() {
|
||||||
|
Reference in New Issue
Block a user