From d9a9bb651ca9ddf3e14a17f88336ab6ba7c09cf8 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 9 Jul 2024 11:45:07 -0500 Subject: [PATCH] nak/legalize: Move a bunch of helpers to a trait Part-of: --- src/nouveau/compiler/nak/legalize.rs | 812 ++++++++++++++------------- 1 file changed, 407 insertions(+), 405 deletions(-) diff --git a/src/nouveau/compiler/nak/legalize.rs b/src/nouveau/compiler/nak/legalize.rs index e751bb590bf..837aff33469 100644 --- a/src/nouveau/compiler/nak/legalize.rs +++ b/src/nouveau/compiler/nak/legalize.rs @@ -7,19 +7,9 @@ use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness}; use std::collections::{HashMap, HashSet}; -fn copy_ssa(b: &mut impl SSABuilder, ssa: &mut SSAValue, reg_file: RegFile) { - let tmp = b.alloc_ssa(reg_file, 1)[0]; - b.copy_to(tmp.into(), (*ssa).into()); - *ssa = tmp; -} +pub type LegalizeBuilder<'a> = SSAInstrBuilder<'a>; -fn copy_ssa_ref(b: &mut impl SSABuilder, vec: &mut SSARef, reg_file: RegFile) { - for ssa in &mut vec[..] { - copy_ssa(b, ssa, reg_file); - } -} - -fn src_is_upred_reg(src: &Src) -> bool { +pub fn src_is_upred_reg(src: &Src) -> bool { match &src.src_ref { SrcRef::True | SrcRef::False => false, SrcRef::SSA(ssa) => { @@ -35,61 +25,7 @@ fn src_is_upred_reg(src: &Src) -> bool { } } -fn copy_pred_ssa_if_uniform(b: &mut impl SSABuilder, ssa: &mut SSAValue) { - match ssa.file() { - RegFile::Pred => (), - RegFile::UPred => copy_ssa(b, ssa, RegFile::Pred), - _ => panic!("Not a predicate value"), - } -} - -fn copy_pred_if_upred(b: &mut impl SSABuilder, pred: &mut Pred) { - match &mut pred.pred_ref { - PredRef::None => (), - PredRef::SSA(ssa) => { - copy_pred_ssa_if_uniform(b, ssa); - } - PredRef::Reg(_) => panic!("Not in SSA form"), - } -} - -fn copy_src_if_upred(b: &mut impl SSABuilder, src: &mut Src) { - match &mut src.src_ref { - SrcRef::True | SrcRef::False => (), - SrcRef::SSA(ssa) => { - assert!(ssa.comps() == 1); - copy_pred_ssa_if_uniform(b, &mut ssa[0]); - } - SrcRef::Reg(_) => panic!("Not in SSA form"), - _ => panic!("Not a predicate source"), - } -} - -fn copy_src_if_not_same_file(b: &mut impl SSABuilder, src: &mut Src) { - let SrcRef::SSA(vec) = &mut src.src_ref else { - return; - }; - - if vec.comps() == 1 { - return; - } - - let mut all_same = true; - let file = vec[0].file(); - for i in 1..vec.comps() { - let c_file = vec[usize::from(i)].file(); - if c_file != file { - debug_assert!(c_file.to_warp() == file.to_warp()); - all_same = false; - } - } - - if !all_same { - copy_ssa_ref(b, vec, file.to_warp()); - } -} - -fn src_is_reg(src: &Src, reg_file: RegFile) -> bool { +pub fn src_is_reg(src: &Src, reg_file: RegFile) -> bool { match src.src_ref { SrcRef::Zero | SrcRef::True | SrcRef::False => true, SrcRef::SSA(ssa) => ssa.file() == Some(reg_file), @@ -98,6 +34,19 @@ fn src_is_reg(src: &Src, reg_file: RegFile) -> bool { } } +pub fn swap_srcs_if_not_reg( + x: &mut Src, + y: &mut Src, + reg_file: RegFile, +) -> bool { + if !src_is_reg(x, reg_file) && src_is_reg(y, reg_file) { + std::mem::swap(x, y); + true + } else { + false + } +} + fn src_as_lop_imm(src: &Src) -> Option { let x = match src.src_ref { SrcRef::Zero => false, @@ -126,211 +75,270 @@ fn fold_lop_src(src: &Src, x: &mut u8) { } } -fn copy_alu_src( - b: &mut impl SSABuilder, - src: &mut Src, - reg_file: RegFile, - src_type: SrcType, -) { - let val = match src_type { - SrcType::GPR - | SrcType::ALU - | SrcType::F32 - | SrcType::F16 - | SrcType::F16v2 - | SrcType::I32 - | SrcType::B32 => b.alloc_ssa(reg_file, 1), - SrcType::F64 => b.alloc_ssa(reg_file, 2), - SrcType::Pred => b.alloc_ssa(reg_file, 1), - _ => panic!("Unknown source type"), - }; - - if DEBUG.annotate() { - b.push_instr(Instr::new_boxed(OpAnnotate { - annotation: "copy generated by legalizer".into(), - })); - } - - if val.comps() == 1 { - b.copy_to(val.into(), src.src_ref.into()); - } else { - match src.src_ref { - SrcRef::Imm32(u) => { - // Immediates go in the top bits - b.copy_to(val[0].into(), 0.into()); - b.copy_to(val[1].into(), u.into()); - } - SrcRef::CBuf(cb) => { - // CBufs load 8B - b.copy_to(val[0].into(), cb.into()); - b.copy_to(val[1].into(), cb.offset(4).into()); - } - SrcRef::SSA(vec) => { - assert!(vec.comps() == 2); - b.copy_to(val[0].into(), vec[0].into()); - b.copy_to(val[1].into(), vec[1].into()); - } - _ => panic!("Invalid 64-bit SrcRef"), - } - } - - src.src_ref = val.into(); -} - -fn copy_alu_src_if_cbuf( - b: &mut impl SSABuilder, - src: &mut Src, - reg_file: RegFile, - src_type: SrcType, -) { - if matches!(src.src_ref, SrcRef::CBuf(_)) { - copy_alu_src(b, src, reg_file, src_type); - } -} - -fn copy_alu_src_if_not_reg( - b: &mut impl SSABuilder, - src: &mut Src, - reg_file: RegFile, - src_type: SrcType, -) { - if !src_is_reg(src, reg_file) { - copy_alu_src(b, src, reg_file, src_type); - } -} - -fn copy_alu_src_if_not_reg_or_imm( - b: &mut impl SSABuilder, - src: &mut Src, - reg_file: RegFile, - src_type: SrcType, -) { - if !src_is_reg(src, reg_file) && !matches!(&src.src_ref, SrcRef::Imm32(_)) { - copy_alu_src(b, src, reg_file, src_type); - } -} - fn src_is_imm(src: &Src) -> bool { matches!(src.src_ref, SrcRef::Imm32(_)) } -fn copy_alu_src_if_imm( - b: &mut impl SSABuilder, - src: &mut Src, - reg_file: RegFile, - src_type: SrcType, -) { - if src_is_imm(src) { - copy_alu_src(b, src, reg_file, src_type); +pub trait LegalizeBuildHelpers: SSABuilder { + fn copy_ssa(&mut self, ssa: &mut SSAValue, reg_file: RegFile) { + let tmp = self.alloc_ssa(reg_file, 1)[0]; + self.copy_to(tmp.into(), (*ssa).into()); + *ssa = tmp; } -} -fn copy_alu_src_if_both_not_reg( - b: &mut impl SSABuilder, - src1: &Src, - src2: &mut Src, - reg_file: RegFile, - src_type: SrcType, -) { - if !src_is_reg(src1, reg_file) && !src_is_reg(src2, reg_file) { - copy_alu_src(b, src2, reg_file, src_type); - } -} - -fn swap_srcs_if_not_reg(x: &mut Src, y: &mut Src, reg_file: RegFile) -> bool { - if !src_is_reg(x, reg_file) && src_is_reg(y, reg_file) { - std::mem::swap(x, y); - true - } else { - false - } -} - -fn copy_alu_src_if_i20_overflow( - b: &mut impl SSABuilder, - src: &mut Src, - reg_file: RegFile, - src_type: SrcType, -) { - if src.as_imm_not_i20().is_some() { - copy_alu_src(b, src, reg_file, src_type); - } -} - -fn copy_alu_src_if_f20_overflow( - b: &mut impl SSABuilder, - src: &mut Src, - reg_file: RegFile, - src_type: SrcType, -) { - if src.as_imm_not_f20().is_some() { - copy_alu_src(b, src, reg_file, src_type); - } -} - -fn copy_alu_src_and_lower_fmod( - b: &mut impl SSABuilder, - src: &mut Src, - src_type: SrcType, -) { - match src_type { - SrcType::F16 | SrcType::F16v2 => { - let val = b.alloc_ssa(RegFile::GPR, 1); - b.push_op(OpHAdd2 { - dst: val.into(), - srcs: [Src::new_zero().fneg(), *src], - saturate: false, - ftz: false, - f32: false, - }); - *src = val.into(); + fn copy_ssa_ref(&mut self, vec: &mut SSARef, reg_file: RegFile) { + for ssa in &mut vec[..] { + self.copy_ssa(ssa, reg_file); } - SrcType::F32 => { - let val = b.alloc_ssa(RegFile::GPR, 1); - b.push_op(OpFAdd { - dst: val.into(), - srcs: [Src::new_zero().fneg(), *src], - saturate: false, - rnd_mode: FRndMode::NearestEven, - ftz: false, - }); - *src = val.into(); - } - SrcType::F64 => { - let val = b.alloc_ssa(RegFile::GPR, 2); - b.push_op(OpDAdd { - dst: val.into(), - srcs: [Src::new_zero().fneg(), *src], - rnd_mode: FRndMode::NearestEven, - }); - *src = val.into(); - } - _ => panic!("Invalid ffabs srouce type"), } -} -fn copy_alu_src_if_fabs( - b: &mut impl SSABuilder, - src: &mut Src, - src_type: SrcType, -) { - if src.src_mod.has_fabs() { - copy_alu_src_and_lower_fmod(b, src, src_type); + fn copy_pred_ssa_if_uniform(&mut self, ssa: &mut SSAValue) { + match ssa.file() { + RegFile::Pred => (), + RegFile::UPred => self.copy_ssa(ssa, RegFile::Pred), + _ => panic!("Not a predicate value"), + } } -} -fn copy_ssa_ref_if_uniform(b: &mut impl SSABuilder, ssa_ref: &mut SSARef) { - for ssa in &mut ssa_ref[..] { - if ssa.is_uniform() { - let warp = b.alloc_ssa(ssa.file().to_warp(), 1)[0]; - b.copy_to(warp.into(), (*ssa).into()); - *ssa = warp; + fn copy_pred_if_upred(&mut self, pred: &mut Pred) { + match &mut pred.pred_ref { + PredRef::None => (), + PredRef::SSA(ssa) => { + self.copy_pred_ssa_if_uniform(ssa); + } + PredRef::Reg(_) => panic!("Not in SSA form"), + } + } + + fn copy_src_if_upred(&mut self, src: &mut Src) { + match &mut src.src_ref { + SrcRef::True | SrcRef::False => (), + SrcRef::SSA(ssa) => { + assert!(ssa.comps() == 1); + self.copy_pred_ssa_if_uniform(&mut ssa[0]); + } + SrcRef::Reg(_) => panic!("Not in SSA form"), + _ => panic!("Not a predicate source"), + } + } + + fn copy_src_if_not_same_file(&mut self, src: &mut Src) { + let SrcRef::SSA(vec) = &mut src.src_ref else { + return; + }; + + if vec.comps() == 1 { + return; + } + + let mut all_same = true; + let file = vec[0].file(); + for i in 1..vec.comps() { + let c_file = vec[usize::from(i)].file(); + if c_file != file { + debug_assert!(c_file.to_warp() == file.to_warp()); + all_same = false; + } + } + + if !all_same { + self.copy_ssa_ref(vec, file.to_warp()); + } + } + + fn copy_alu_src( + &mut self, + src: &mut Src, + reg_file: RegFile, + src_type: SrcType, + ) { + let val = match src_type { + SrcType::GPR + | SrcType::ALU + | SrcType::F32 + | SrcType::F16 + | SrcType::F16v2 + | SrcType::I32 + | SrcType::B32 => self.alloc_ssa(reg_file, 1), + SrcType::F64 => self.alloc_ssa(reg_file, 2), + SrcType::Pred => self.alloc_ssa(reg_file, 1), + _ => panic!("Unknown source type"), + }; + + if DEBUG.annotate() { + self.push_instr(Instr::new_boxed(OpAnnotate { + annotation: "copy generated by legalizer".into(), + })); + } + + if val.comps() == 1 { + self.copy_to(val.into(), src.src_ref.into()); + } else { + match src.src_ref { + SrcRef::Imm32(u) => { + // Immediates go in the top bits + self.copy_to(val[0].into(), 0.into()); + self.copy_to(val[1].into(), u.into()); + } + SrcRef::CBuf(cb) => { + // CBufs load 8B + self.copy_to(val[0].into(), cb.into()); + self.copy_to(val[1].into(), cb.offset(4).into()); + } + SrcRef::SSA(vec) => { + assert!(vec.comps() == 2); + self.copy_to(val[0].into(), vec[0].into()); + self.copy_to(val[1].into(), vec[1].into()); + } + _ => panic!("Invalid 64-bit SrcRef"), + } + } + + src.src_ref = val.into(); + } + + fn copy_alu_src_if_cbuf( + &mut self, + src: &mut Src, + reg_file: RegFile, + src_type: SrcType, + ) { + if matches!(src.src_ref, SrcRef::CBuf(_)) { + self.copy_alu_src(src, reg_file, src_type); + } + } + + fn copy_alu_src_if_not_reg( + &mut self, + src: &mut Src, + reg_file: RegFile, + src_type: SrcType, + ) { + if !src_is_reg(src, reg_file) { + self.copy_alu_src(src, reg_file, src_type); + } + } + + fn copy_alu_src_if_not_reg_or_imm( + &mut self, + src: &mut Src, + reg_file: RegFile, + src_type: SrcType, + ) { + if !src_is_reg(src, reg_file) + && !matches!(&src.src_ref, SrcRef::Imm32(_)) + { + self.copy_alu_src(src, reg_file, src_type); + } + } + + fn copy_alu_src_if_imm( + &mut self, + src: &mut Src, + reg_file: RegFile, + src_type: SrcType, + ) { + if src_is_imm(src) { + self.copy_alu_src(src, reg_file, src_type); + } + } + + fn copy_alu_src_if_both_not_reg( + &mut self, + src1: &Src, + src2: &mut Src, + reg_file: RegFile, + src_type: SrcType, + ) { + if !src_is_reg(src1, reg_file) && !src_is_reg(src2, reg_file) { + self.copy_alu_src(src2, reg_file, src_type); + } + } + + fn copy_alu_src_and_lower_fmod( + &mut self, + src: &mut Src, + src_type: SrcType, + ) { + match src_type { + SrcType::F16 | SrcType::F16v2 => { + let val = self.alloc_ssa(RegFile::GPR, 1); + self.push_op(OpHAdd2 { + dst: val.into(), + srcs: [Src::new_zero().fneg(), *src], + saturate: false, + ftz: false, + f32: false, + }); + *src = val.into(); + } + SrcType::F32 => { + let val = self.alloc_ssa(RegFile::GPR, 1); + self.push_op(OpFAdd { + dst: val.into(), + srcs: [Src::new_zero().fneg(), *src], + saturate: false, + rnd_mode: FRndMode::NearestEven, + ftz: false, + }); + *src = val.into(); + } + SrcType::F64 => { + let val = self.alloc_ssa(RegFile::GPR, 2); + self.push_op(OpDAdd { + dst: val.into(), + srcs: [Src::new_zero().fneg(), *src], + rnd_mode: FRndMode::NearestEven, + }); + *src = val.into(); + } + _ => panic!("Invalid ffabs srouce type"), + } + } + + fn copy_alu_src_if_fabs(&mut self, src: &mut Src, src_type: SrcType) { + if src.src_mod.has_fabs() { + self.copy_alu_src_and_lower_fmod(src, src_type); + } + } + + fn copy_alu_src_if_i20_overflow( + &mut self, + src: &mut Src, + reg_file: RegFile, + src_type: SrcType, + ) { + if src.as_imm_not_i20().is_some() { + self.copy_alu_src(src, reg_file, src_type); + } + } + + fn copy_alu_src_if_f20_overflow( + &mut self, + src: &mut Src, + reg_file: RegFile, + src_type: SrcType, + ) { + if src.as_imm_not_f20().is_some() { + self.copy_alu_src(src, reg_file, src_type); + } + } + + fn copy_ssa_ref_if_uniform(&mut self, ssa_ref: &mut SSARef) { + for ssa in &mut ssa_ref[..] { + if ssa.is_uniform() { + let warp = self.alloc_ssa(ssa.file().to_warp(), 1)[0]; + self.copy_to(warp.into(), (*ssa).into()); + *ssa = warp; + } } } } +impl LegalizeBuildHelpers for LegalizeBuilder<'_> {} + fn legalize_sm50_instr( - b: &mut impl SSABuilder, + b: &mut LegalizeBuilder, _bl: &impl BlockLiveness, _ip: usize, instr: &mut Instr, @@ -341,148 +349,148 @@ fn legalize_sm50_instr( match &mut instr.op { Op::Shf(op) => { - copy_alu_src_if_not_reg(b, &mut op.shift, GPR, SrcType::GPR); - copy_alu_src_if_not_reg(b, &mut op.high, GPR, SrcType::ALU); - copy_alu_src_if_not_reg(b, &mut op.low, GPR, SrcType::GPR); - copy_alu_src_if_i20_overflow(b, &mut op.shift, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.shift, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.high, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(&mut op.low, GPR, SrcType::GPR); + b.copy_alu_src_if_i20_overflow(&mut op.shift, GPR, SrcType::GPR); } Op::Shl(op) => { - copy_alu_src_if_not_reg(b, &mut op.src, GPR, SrcType::GPR); - copy_alu_src_if_i20_overflow(b, &mut op.shift, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(&mut op.src, GPR, SrcType::GPR); + b.copy_alu_src_if_i20_overflow(&mut op.shift, GPR, SrcType::ALU); } Op::Shr(op) => { - copy_alu_src_if_not_reg(b, &mut op.src, GPR, SrcType::GPR); - copy_alu_src_if_i20_overflow(b, &mut op.shift, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(&mut op.src, GPR, SrcType::GPR); + b.copy_alu_src_if_i20_overflow(&mut op.shift, GPR, SrcType::ALU); } Op::FAdd(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, GPR); - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::F32); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); } Op::FMul(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, GPR); - copy_alu_src_if_not_reg(b, &mut op.srcs[0], GPR, SrcType::F32); + b.copy_alu_src_if_not_reg(&mut op.srcs[0], GPR, SrcType::F32); } Op::FSet(op) => { let [ref mut src0, ref mut src1] = op.srcs; if swap_srcs_if_not_reg(src0, src1, GPR) { op.cmp_op = op.cmp_op.flip(); } - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::F32); - copy_alu_src_if_f20_overflow(b, src1, GPR, SrcType::F32); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32); } Op::FSetP(op) => { let [ref mut src0, ref mut src1] = op.srcs; if swap_srcs_if_not_reg(src0, src1, GPR) { op.cmp_op = op.cmp_op.flip(); } - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::F32); - copy_alu_src_if_f20_overflow(b, src1, GPR, SrcType::F32); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32); } Op::FSwzAdd(op) => { - copy_alu_src_if_not_reg(b, &mut op.srcs[0], GPR, SrcType::GPR); - copy_alu_src_if_not_reg(b, &mut op.srcs[1], GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.srcs[0], GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.srcs[1], GPR, SrcType::GPR); } Op::ISetP(op) => { let [ref mut src0, ref mut src1] = op.srcs; if swap_srcs_if_not_reg(src0, src1, GPR) { op.cmp_op = op.cmp_op.flip(); } - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::ALU); - copy_alu_src_if_i20_overflow(b, src1, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); + b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU); } Op::Lop2(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, GPR); - copy_alu_src_if_not_reg(b, &mut op.srcs[0], GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(&mut op.srcs[0], GPR, SrcType::ALU); } Op::Rro(op) => { - copy_alu_src_if_f20_overflow(b, &mut op.src, GPR, SrcType::F32); + b.copy_alu_src_if_f20_overflow(&mut op.src, GPR, SrcType::F32); } Op::PSetP(_) => {} Op::MuFu(op) => { - copy_alu_src_if_not_reg(b, &mut op.src, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.src, GPR, SrcType::GPR); } Op::DAdd(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, GPR); - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::F64); - copy_alu_src_if_f20_overflow(b, src1, GPR, SrcType::F64); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); } Op::DFma(op) => { let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; - copy_alu_src_if_fabs(b, src0, SrcType::F64); - copy_alu_src_if_fabs(b, src1, SrcType::F64); - copy_alu_src_if_fabs(b, src2, SrcType::F64); + b.copy_alu_src_if_fabs(src0, SrcType::F64); + b.copy_alu_src_if_fabs(src1, SrcType::F64); + b.copy_alu_src_if_fabs(src2, SrcType::F64); swap_srcs_if_not_reg(src0, src1, GPR); - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::F64); - copy_alu_src_if_f20_overflow(b, src1, GPR, SrcType::F64); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); if src_is_reg(src1, GPR) { - copy_alu_src_if_imm(b, src2, GPR, SrcType::F64); + b.copy_alu_src_if_imm(src2, GPR, SrcType::F64); } else { - copy_alu_src_if_not_reg(b, src2, GPR, SrcType::F64); + b.copy_alu_src_if_not_reg(src2, GPR, SrcType::F64); } } Op::DMnMx(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, GPR); - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::F64); - copy_alu_src_if_f20_overflow(b, src1, GPR, SrcType::F64); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); } Op::DMul(op) => { let [ref mut src0, ref mut src1] = op.srcs; - copy_alu_src_if_fabs(b, src0, SrcType::F64); - copy_alu_src_if_fabs(b, src1, SrcType::F64); + b.copy_alu_src_if_fabs(src0, SrcType::F64); + b.copy_alu_src_if_fabs(src1, SrcType::F64); swap_srcs_if_not_reg(src0, src1, GPR); - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::F64); - copy_alu_src_if_f20_overflow(b, src1, GPR, SrcType::F64); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); } Op::DSetP(op) => { let [ref mut src0, ref mut src1] = op.srcs; if swap_srcs_if_not_reg(src0, src1, GPR) { op.cmp_op = op.cmp_op.flip(); } - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::F64); - copy_alu_src_if_f20_overflow(b, src1, GPR, SrcType::F64); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64); } Op::Sel(op) => { let [ref mut src0, ref mut src1] = op.srcs; if swap_srcs_if_not_reg(src0, src1, GPR) { op.cond = op.cond.bnot(); } - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::ALU); - copy_alu_src_if_i20_overflow(b, src1, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); + b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU); } Op::Shfl(op) => { - copy_alu_src_if_not_reg(b, &mut op.src, GPR, SrcType::GPR); - copy_alu_src_if_not_reg_or_imm(b, &mut op.lane, GPR, SrcType::ALU); - copy_alu_src_if_not_reg_or_imm(b, &mut op.c, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(&mut op.src, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg_or_imm(&mut op.lane, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg_or_imm(&mut op.c, GPR, SrcType::ALU); } Op::Vote(_) => {} Op::IAdd2(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, GPR); - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::I32); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::I32); } Op::I2F(op) => { - copy_alu_src_if_i20_overflow(b, &mut op.src, GPR, SrcType::ALU); + b.copy_alu_src_if_i20_overflow(&mut op.src, GPR, SrcType::ALU); } Op::F2F(op) => { - copy_alu_src_if_f20_overflow(b, &mut op.src, GPR, SrcType::ALU); + b.copy_alu_src_if_f20_overflow(&mut op.src, GPR, SrcType::ALU); } Op::I2I(op) => { - copy_alu_src_if_i20_overflow(b, &mut op.src, GPR, SrcType::ALU); + b.copy_alu_src_if_i20_overflow(&mut op.src, GPR, SrcType::ALU); } Op::IMad(op) => { let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; swap_srcs_if_not_reg(src0, src1, GPR); - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); if src_is_reg(src1, GPR) { - copy_alu_src_if_imm(b, src2, GPR, SrcType::ALU); + b.copy_alu_src_if_imm(src2, GPR, SrcType::ALU); } else { - copy_alu_src_if_i20_overflow(b, src1, GPR, SrcType::ALU); - copy_alu_src_if_not_reg(b, src2, GPR, SrcType::ALU); + b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(src2, GPR, SrcType::ALU); } } Op::IMul(op) => { @@ -490,68 +498,68 @@ fn legalize_sm50_instr( if swap_srcs_if_not_reg(src0, src1, GPR) { op.signed.swap(0, 1); } - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); } Op::F2I(op) => { - copy_alu_src_if_f20_overflow(b, &mut op.src, GPR, SrcType::ALU); + b.copy_alu_src_if_f20_overflow(&mut op.src, GPR, SrcType::ALU); } Op::IMnMx(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, GPR); - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU); } Op::Ipa(op) => { - copy_alu_src_if_not_reg(b, &mut op.offset, GPR, SrcType::GPR); - copy_alu_src_if_not_reg(b, &mut op.inv_w, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.offset, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.inv_w, GPR, SrcType::GPR); } Op::PopC(_) => {} Op::BRev(op) => { - copy_alu_src_if_not_reg(b, &mut op.src, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(&mut op.src, GPR, SrcType::ALU); } Op::Flo(op) => { - copy_alu_src_if_i20_overflow(b, &mut op.src, GPR, SrcType::ALU); + b.copy_alu_src_if_i20_overflow(&mut op.src, GPR, SrcType::ALU); } Op::FMnMx(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, GPR); - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::F32); - copy_alu_src_if_f20_overflow(b, src1, GPR, SrcType::F32); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32); } Op::Prmt(op) => { - copy_alu_src_if_not_reg(b, &mut op.srcs[0], GPR, SrcType::GPR); - copy_alu_src_if_not_reg(b, &mut op.srcs[1], GPR, SrcType::GPR); - copy_alu_src_if_i20_overflow(b, &mut op.sel, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(&mut op.srcs[0], GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.srcs[1], GPR, SrcType::GPR); + b.copy_alu_src_if_i20_overflow(&mut op.sel, GPR, SrcType::ALU); } Op::FFma(op) => { let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; - copy_alu_src_if_fabs(b, src0, SrcType::F32); - copy_alu_src_if_fabs(b, src1, SrcType::F32); - copy_alu_src_if_fabs(b, src2, SrcType::F32); + b.copy_alu_src_if_fabs(src0, SrcType::F32); + b.copy_alu_src_if_fabs(src1, SrcType::F32); + b.copy_alu_src_if_fabs(src2, SrcType::F32); swap_srcs_if_not_reg(src0, src1, GPR); - copy_alu_src_if_not_reg(b, src0, GPR, SrcType::F32); - copy_alu_src_if_not_reg(b, src2, GPR, SrcType::F32); - copy_alu_src_if_f20_overflow(b, src1, GPR, SrcType::F32); + b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32); + b.copy_alu_src_if_not_reg(src2, GPR, SrcType::F32); + b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32); } Op::Ldc(op) => { // TODO: cb must be a bound constant buffer - copy_alu_src_if_not_reg(b, &mut op.offset, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.offset, GPR, SrcType::GPR); } Op::Copy(_) => (), // Nothing to do Op::SuLd(op) => { - copy_alu_src_if_not_reg(b, &mut op.handle, GPR, SrcType::GPR); - copy_alu_src_if_not_reg(b, &mut op.coord, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.handle, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.coord, GPR, SrcType::GPR); } Op::SuAtom(op) => { - copy_alu_src_if_not_reg(b, &mut op.coord, GPR, SrcType::GPR); - copy_alu_src_if_not_reg(b, &mut op.handle, GPR, SrcType::GPR); - copy_alu_src_if_not_reg(b, &mut op.data, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.coord, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.handle, GPR, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.data, GPR, SrcType::GPR); } Op::Out(op) => { - copy_alu_src_if_not_reg(b, &mut op.handle, GPR, SrcType::GPR); - copy_alu_src_if_i20_overflow(b, &mut op.stream, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(&mut op.handle, GPR, SrcType::GPR); + b.copy_alu_src_if_i20_overflow(&mut op.stream, GPR, SrcType::ALU); } Op::Bfe(op) => { - copy_alu_src_if_not_reg(b, &mut op.base, GPR, SrcType::ALU); + b.copy_alu_src_if_not_reg(&mut op.base, GPR, SrcType::ALU); } _ => { let src_types = instr.src_types(); @@ -589,7 +597,7 @@ fn legalize_sm50_instr( } fn legalize_sm70_instr( - b: &mut impl SSABuilder, + b: &mut LegalizeBuilder, bl: &impl BlockLiveness, ip: usize, instr: &mut Instr, @@ -604,7 +612,7 @@ fn legalize_sm70_instr( // Uniform instructions can't support cbufs let src_types = instr.src_types(); for (i, src) in instr.srcs_mut().iter_mut().enumerate() { - copy_alu_src_if_cbuf(b, src, gpr, src_types[i]); + b.copy_alu_src_if_cbuf(src, gpr, src_types[i]); } } @@ -612,23 +620,23 @@ fn legalize_sm70_instr( Op::FAdd(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, gpr); - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F32); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F32); } Op::FFma(op) => { let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; swap_srcs_if_not_reg(src0, src1, gpr); - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F32); - copy_alu_src_if_both_not_reg(b, src1, src2, gpr, SrcType::F32); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F32); + b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::F32); } Op::FMnMx(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, gpr); - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F32); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F32); } Op::FMul(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, gpr); - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F32); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F32); } Op::FSet(op) => { let [ref mut src0, ref mut src1] = op.srcs; @@ -636,7 +644,7 @@ fn legalize_sm70_instr( std::mem::swap(src0, src1); op.cmp_op = op.cmp_op.flip(); } - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F32); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F32); } Op::FSetP(op) => { let [ref mut src0, ref mut src1] = op.srcs; @@ -644,29 +652,29 @@ fn legalize_sm70_instr( std::mem::swap(src0, src1); op.cmp_op = op.cmp_op.flip(); } - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F32); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F32); } Op::HAdd2(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, gpr); - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F16v2); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F16v2); } Op::HFma2(op) => { let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; swap_srcs_if_not_reg(src0, src1, gpr); - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F16v2); - copy_alu_src_if_not_reg(b, src1, gpr, SrcType::F16v2); - copy_alu_src_if_both_not_reg(b, src1, src2, gpr, SrcType::F16v2); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F16v2); + b.copy_alu_src_if_not_reg(src1, gpr, SrcType::F16v2); + b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::F16v2); // HFMA2 doesn't have fabs or fneg on SRC2. if !src2.src_mod.is_none() { - copy_alu_src_and_lower_fmod(b, src2, SrcType::F16v2); + b.copy_alu_src_and_lower_fmod(src2, SrcType::F16v2); } } Op::HMul2(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, gpr); - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F16v2); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F16v2); } Op::HSet2(op) => { let [ref mut src0, ref mut src1] = op.srcs; @@ -674,7 +682,7 @@ fn legalize_sm70_instr( std::mem::swap(src0, src1); op.cmp_op = op.cmp_op.flip(); } - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F16v2); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F16v2); } Op::HSetP2(op) => { let [ref mut src0, ref mut src1] = op.srcs; @@ -682,29 +690,29 @@ fn legalize_sm70_instr( std::mem::swap(src0, src1); op.cmp_op = op.cmp_op.flip(); } - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F16v2); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F16v2); } Op::HMnMx2(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, gpr); - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F16v2); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F16v2); } Op::MuFu(_) => (), // Nothing to do Op::DAdd(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, gpr); - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F64); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F64); } Op::DFma(op) => { let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; swap_srcs_if_not_reg(src0, src1, gpr); - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F64); - copy_alu_src_if_both_not_reg(b, src1, src2, gpr, SrcType::F64); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F64); + b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::F64); } Op::DMul(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, gpr); - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F64); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F64); } Op::DSetP(op) => { let [ref mut src0, ref mut src1] = op.srcs; @@ -712,10 +720,10 @@ fn legalize_sm70_instr( std::mem::swap(src0, src1); op.cmp_op = op.cmp_op.flip(); } - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F64); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F64); } Op::BMsk(op) => { - copy_alu_src_if_not_reg(b, &mut op.pos, gpr, SrcType::ALU); + b.copy_alu_src_if_not_reg(&mut op.pos, gpr, SrcType::ALU); } Op::BRev(_) | Op::Flo(_) => (), Op::IAbs(_) => (), @@ -732,8 +740,8 @@ fn legalize_sm70_instr( }); *src0 = val.into(); } - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::I32); - copy_alu_src_if_both_not_reg(b, src1, src2, gpr, SrcType::I32); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::I32); + b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::I32); } Op::IAdd3X(op) => { let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; @@ -749,11 +757,11 @@ fn legalize_sm70_instr( }); *src0 = val.into(); } - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::B32); - copy_alu_src_if_both_not_reg(b, src1, src2, gpr, SrcType::B32); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::B32); + b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::B32); if !op.is_uniform() { - copy_src_if_upred(b, &mut op.carry[0]); - copy_src_if_upred(b, &mut op.carry[1]); + b.copy_src_if_upred(&mut op.carry[0]); + b.copy_src_if_upred(&mut op.carry[1]); } } Op::IDp4(op) => { @@ -762,25 +770,25 @@ fn legalize_sm70_instr( if swap_srcs_if_not_reg(src0, src1, gpr) { std::mem::swap(src_type0, src_type1); } - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::ALU); - copy_alu_src_if_not_reg(b, src2, gpr, SrcType::ALU); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); + b.copy_alu_src_if_not_reg(src2, gpr, SrcType::ALU); } Op::IMad(op) => { let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; swap_srcs_if_not_reg(src0, src1, gpr); - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::ALU); - copy_alu_src_if_both_not_reg(b, src1, src2, gpr, SrcType::ALU); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); + b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::ALU); } Op::IMad64(op) => { let [ref mut src0, ref mut src1, ref mut src2] = op.srcs; swap_srcs_if_not_reg(src0, src1, gpr); - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::ALU); - copy_alu_src_if_both_not_reg(b, src1, src2, gpr, SrcType::ALU); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); + b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::ALU); } Op::IMnMx(op) => { let [ref mut src0, ref mut src1] = op.srcs; swap_srcs_if_not_reg(src0, src1, gpr); - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::ALU); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); } Op::ISetP(op) => { let [ref mut src0, ref mut src1] = op.srcs; @@ -788,10 +796,10 @@ fn legalize_sm70_instr( std::mem::swap(src0, src1); op.cmp_op = op.cmp_op.flip(); } - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::ALU); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); if !op.is_uniform() { - copy_src_if_upred(b, &mut op.low_cmp); - copy_src_if_upred(b, &mut op.accum); + b.copy_src_if_upred(&mut op.low_cmp); + b.copy_src_if_upred(&mut op.accum); } } Op::Lop3(op) => { @@ -819,14 +827,13 @@ fn legalize_sm70_instr( op.op = LogicOp3::new_lut(&|x, y, z| op.op.eval(x, z, y)) } - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::ALU); - copy_alu_src_if_not_reg(b, src2, gpr, SrcType::ALU); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); + b.copy_alu_src_if_not_reg(src2, gpr, SrcType::ALU); } Op::PopC(_) | Op::R2UR(_) => (), Op::Shf(op) => { - copy_alu_src_if_not_reg(b, &mut op.low, gpr, SrcType::ALU); - copy_alu_src_if_both_not_reg( - b, + b.copy_alu_src_if_not_reg(&mut op.low, gpr, SrcType::ALU); + b.copy_alu_src_if_both_not_reg( &op.shift, &mut op.high, gpr, @@ -835,18 +842,18 @@ fn legalize_sm70_instr( } Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::Mov(_) | Op::FRnd(_) => (), Op::Prmt(op) => { - copy_alu_src_if_not_reg(b, &mut op.srcs[0], gpr, SrcType::ALU); - copy_alu_src_if_not_reg(b, &mut op.srcs[1], gpr, SrcType::ALU); + b.copy_alu_src_if_not_reg(&mut op.srcs[0], gpr, SrcType::ALU); + b.copy_alu_src_if_not_reg(&mut op.srcs[1], gpr, SrcType::ALU); } Op::Sel(op) => { if !op.is_uniform() { - copy_src_if_upred(b, &mut op.cond); + b.copy_src_if_upred(&mut op.cond); } let [ref mut src0, ref mut src1] = op.srcs; if swap_srcs_if_not_reg(src0, src1, gpr) { op.cond = op.cond.bnot(); } - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::ALU); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); } Op::PLop3(op) => { // Fold constants and modifiers if we can @@ -882,28 +889,23 @@ fn legalize_sm70_instr( *lop = LogicOp3::new_lut(&|x, y, z| lop.eval(x, z, y)) } } - copy_src_if_upred(b, src0); - copy_src_if_upred(b, src1); + b.copy_src_if_upred(src0); + b.copy_src_if_upred(src1); } } Op::FSwzAdd(op) => { let [ref mut src0, ref mut src1] = op.srcs; - copy_alu_src_if_not_reg(b, src0, gpr, SrcType::F32); - copy_alu_src_if_not_reg(b, src1, gpr, SrcType::F32); + b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F32); + b.copy_alu_src_if_not_reg(src1, gpr, SrcType::F32); } Op::Shfl(op) => { - copy_alu_src_if_not_reg(b, &mut op.src, gpr, SrcType::GPR); - copy_alu_src_if_not_reg_or_imm(b, &mut op.lane, gpr, SrcType::ALU); - copy_alu_src_if_not_reg_or_imm(b, &mut op.c, gpr, SrcType::ALU); + b.copy_alu_src_if_not_reg(&mut op.src, gpr, SrcType::GPR); + b.copy_alu_src_if_not_reg_or_imm(&mut op.lane, gpr, SrcType::ALU); + b.copy_alu_src_if_not_reg_or_imm(&mut op.c, gpr, SrcType::ALU); } Op::Out(op) => { - copy_alu_src_if_not_reg(b, &mut op.handle, gpr, SrcType::GPR); - copy_alu_src_if_not_reg_or_imm( - b, - &mut op.stream, - gpr, - SrcType::ALU, - ); + b.copy_alu_src_if_not_reg(&mut op.handle, gpr, SrcType::GPR); + b.copy_alu_src_if_not_reg_or_imm(&mut op.stream, gpr, SrcType::ALU); } Op::Break(op) => { let bar_in = op.bar_in.src_ref.as_ssa().unwrap(); @@ -922,14 +924,14 @@ fn legalize_sm70_instr( } } Op::OutFinal(op) => { - copy_alu_src_if_not_reg(b, &mut op.handle, gpr, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.handle, gpr, SrcType::GPR); } Op::Ldc(op) => { - copy_alu_src_if_not_reg(b, &mut op.offset, gpr, SrcType::GPR); + b.copy_alu_src_if_not_reg(&mut op.offset, gpr, SrcType::GPR); } Op::BSync(_) => (), Op::Vote(op) => { - copy_src_if_upred(b, &mut op.pred); + b.copy_src_if_upred(&mut op.pred); } Op::Copy(_) => (), // Nothing to do Op::Pin(_) | Op::Unpin(_) => (), // Nothing to do @@ -943,7 +945,7 @@ fn legalize_sm70_instr( assert!(src_types[i] != SrcType::SSA); } SrcRef::SSA(ssa) => { - copy_ssa_ref_if_uniform(b, ssa); + b.copy_ssa_ref_if_uniform(ssa); } _ => panic!("Unsupported source reference"), }, @@ -967,7 +969,7 @@ fn legalize_sm70_instr( } fn legalize_instr( - b: &mut impl SSABuilder, + b: &mut LegalizeBuilder, bl: &impl BlockLiveness, block_uniform: bool, pinned: &HashSet, @@ -977,13 +979,13 @@ fn legalize_instr( if matches!(&instr.op, Op::PhiDsts(_)) { debug_assert!(instr.pred.is_true()); } else if !instr.is_uniform() { - copy_pred_if_upred(b, &mut instr.pred); + b.copy_pred_if_upred(&mut instr.pred); } let src_types = instr.src_types(); for (i, src) in instr.srcs_mut().iter_mut().enumerate() { *src = src.fold_imm(src_types[i]); - copy_src_if_not_same_file(b, src); + b.copy_src_if_not_same_file(src); if !block_uniform { // In non-uniform control-flow, we can't collect uniform vectors so @@ -994,7 +996,7 @@ fn legalize_instr( && vec.comps() > 1 && !pinned.contains(vec) { - copy_ssa_ref(b, vec, vec.file().unwrap().to_warp()); + b.copy_ssa_ref(vec, vec.file().unwrap().to_warp()); } } SrcRef::CBuf(CBufRef {