From d6de61cb7e9768eba477782f7afc22381e130ca9 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 23 Jul 2024 09:20:29 -0500 Subject: [PATCH] nak: Drop the old encode_sm*.rs files I don't know how these didn't get dropped earlier. Part-of: --- src/nouveau/compiler/nak/encode_sm50.rs | 2254 -------------------- src/nouveau/compiler/nak/encode_sm70.rs | 2599 ----------------------- 2 files changed, 4853 deletions(-) delete mode 100644 src/nouveau/compiler/nak/encode_sm50.rs delete mode 100644 src/nouveau/compiler/nak/encode_sm70.rs diff --git a/src/nouveau/compiler/nak/encode_sm50.rs b/src/nouveau/compiler/nak/encode_sm50.rs deleted file mode 100644 index cce335dcd35..00000000000 --- a/src/nouveau/compiler/nak/encode_sm50.rs +++ /dev/null @@ -1,2254 +0,0 @@ -// Copyright © 2023 Collabora, Ltd. -// SPDX-License-Identifier: MIT - -use crate::ir::*; -use bitview::*; - -use std::collections::HashMap; -use std::ops::Range; - -impl Src { - fn is_reg_or_zero(&self) -> bool { - matches!(self.src_ref, SrcRef::Zero | SrcRef::Reg(_)) - } -} - -fn align_down(value: usize, align: usize) -> usize { - value / align * align -} - -fn align_up(value: usize, align: usize) -> usize { - align_down(value + (align - 1), align) -} - -struct SM50Instr { - inst: [u32; 2], - sched: u32, - sm: u8, -} - -impl BitViewable for SM50Instr { - fn bits(&self) -> usize { - BitView::new(&self.inst).bits() - } - - fn get_bit_range_u64(&self, range: Range) -> u64 { - BitView::new(&self.inst).get_bit_range_u64(range) - } -} - -impl BitMutViewable for SM50Instr { - fn set_bit_range_u64(&mut self, range: Range, val: u64) { - BitMutView::new(&mut self.inst).set_bit_range_u64(range, val); - } -} - -impl SetFieldU64 for SM50Instr { - fn set_field_u64(&mut self, range: Range, val: u64) { - BitMutView::new(&mut self.inst).set_field_u64(range, val); - } -} - -impl SM50Instr { - fn new(sm: u8) -> Self { - Self { - inst: [0x0; 2], - sched: 0x7e0, - sm, - } - } - - fn nop(sm: u8) -> Self { - let mut res = Self::new(sm); - - res.encode_nop(); - - res.set_instr_deps(&InstrDeps::new()); - - res - } - - fn set_bit(&mut self, bit: usize, val: bool) { - BitMutView::new(&mut self.inst).set_bit(bit, val); - } - - fn set_opcode(&mut self, opcode: u16) { - self.set_field(48..64, opcode); - } - - fn set_pred_reg(&mut self, range: Range, reg: RegRef) { - assert!(range.len() == 3); - assert!(reg.file() == RegFile::Pred); - assert!(reg.base_idx() <= 7); - assert!(reg.comps() == 1); - self.set_field(range, reg.base_idx()); - } - - fn set_pred(&mut self, pred: &Pred) { - assert!(!pred.is_false()); - self.set_pred_reg( - 16..19, - match pred.pred_ref { - PredRef::None => RegRef::zero(RegFile::Pred, 1), - PredRef::Reg(reg) => reg, - PredRef::SSA(_) => panic!("SSA values must be lowered"), - }, - ); - self.set_bit(19, pred.pred_inv); - } - - fn set_instr_deps(&mut self, deps: &InstrDeps) { - let mut sched = BitMutView::new(&mut self.sched); - - sched.set_field(0..4, deps.delay); - sched.set_bit(4, deps.yld); - sched.set_field(5..8, deps.wr_bar().unwrap_or(7)); - sched.set_field(8..11, deps.rd_bar().unwrap_or(7)); - sched.set_field(11..17, deps.wt_bar_mask); - sched.set_field(17..21, deps.reuse_mask); - } - - fn set_reg(&mut self, range: Range, reg: RegRef) { - assert!(range.len() == 8); - assert!(reg.file() == RegFile::GPR); - self.set_field(range, reg.base_idx()); - } - - fn set_reg_src_ref(&mut self, range: Range, src_ref: SrcRef) { - match src_ref { - SrcRef::Zero => self.set_reg(range, RegRef::zero(RegFile::GPR, 1)), - SrcRef::Reg(reg) => self.set_reg(range, reg), - _ => panic!("Not a register"), - } - } - - fn set_reg_src(&mut self, range: Range, src: Src) { - assert!(src.src_mod.is_none()); - self.set_reg_src_ref(range, src.src_ref); - } - - fn set_reg_fmod_src( - &mut self, - range: Range, - abs_bit: usize, - neg_bit: usize, - src: Src, - ) { - self.set_reg_src_ref(range, src.src_ref); - self.set_bit(abs_bit, src.src_mod.has_fabs()); - self.set_bit(neg_bit, src.src_mod.has_fneg()); - } - - fn set_reg_ineg_src( - &mut self, - range: Range, - neg_bit: usize, - src: Src, - ) { - self.set_reg_src_ref(range, src.src_ref); - self.set_bit(neg_bit, src.src_mod.is_ineg()); - } - - fn set_pred_dst(&mut self, range: Range, dst: Dst) { - match dst { - Dst::None => { - self.set_pred_reg(range, RegRef::zero(RegFile::Pred, 1)); - } - Dst::Reg(reg) => self.set_pred_reg(range, reg), - _ => panic!("Not a register"), - } - } - - fn set_pred_src(&mut self, range: Range, not_bit: usize, src: Src) { - // The default for predicates is true - let true_reg = RegRef::new(RegFile::Pred, 7, 1); - - let (not, reg) = match src.src_ref { - SrcRef::True => (false, true_reg), - SrcRef::False => (true, true_reg), - SrcRef::Reg(reg) => (false, reg), - _ => panic!("Not a register"), - }; - self.set_pred_reg(range, reg); - self.set_bit(not_bit, not ^ src.src_mod.is_bnot()); - } - - fn set_dst(&mut self, dst: Dst) { - let reg = match dst { - Dst::None => RegRef::zero(RegFile::GPR, 1), - Dst::Reg(reg) => reg, - _ => panic!("invalid dst {dst}"), - }; - self.set_reg(0..8, reg); - } - - fn set_src_imm32(&mut self, range: Range, u: u32) { - assert!(range.len() == 32); - self.set_field(range, u); - } - - fn set_src_imm_i20( - &mut self, - range: Range, - sign_bit: usize, - i: u32, - ) { - assert!(range.len() == 19); - assert!((i & 0xfff80000) == 0 || (i & 0xfff80000) == 0xfff80000); - - self.set_field(range, i & 0x7ffff); - self.set_field(sign_bit..sign_bit + 1, (i & 0x80000) >> 19); - } - - fn set_src_imm_f20( - &mut self, - range: Range, - sign_bit: usize, - f: u32, - ) { - assert!(range.len() == 19); - assert!((f & 0x00000fff) == 0); - - self.set_field(range, (f >> 12) & 0x7ffff); - self.set_field(sign_bit..sign_bit + 1, f >> 31); - } - - fn set_src_cb(&mut self, range: Range, cb: &CBufRef) { - let mut v = BitMutView::new_subset(self, range); - - assert!(cb.offset % 4 == 0); - - v.set_field(0..14, cb.offset >> 2); - if let CBuf::Binding(idx) = cb.buf { - v.set_field(14..19, idx); - } else { - panic!("Must be a bound constant buffer"); - } - } - - fn set_cb_fmod_src( - &mut self, - range: Range, - abs_bit: usize, - neg_bit: usize, - src: Src, - ) { - if let SrcRef::CBuf(cb) = &src.src_ref { - self.set_src_cb(range, cb); - } else { - panic!("Not a CBuf source"); - } - - self.set_bit(abs_bit, src.src_mod.has_fabs()); - self.set_bit(neg_bit, src.src_mod.has_fneg()); - } - - fn set_cb_ineg_src( - &mut self, - range: Range, - neg_bit: usize, - src: Src, - ) { - if let SrcRef::CBuf(cb) = &src.src_ref { - self.set_src_cb(range, cb); - } else { - panic!("Not a CBuf source"); - } - - self.set_bit(neg_bit, src.src_mod.is_ineg()); - } - - fn encode_mov(&mut self, op: &OpMov) { - match &op.src.src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c98); - self.set_reg_src(20..28, op.src); - self.set_field(39..43, op.quad_lanes); - } - SrcRef::Imm32(i) => { - self.set_opcode(0x0100); - self.set_src_imm32(20..52, *i); - self.set_field(12..16, op.quad_lanes); - } - SrcRef::CBuf(cb) => { - self.set_opcode(0x4c98); - self.set_src_cb(20..39, cb); - self.set_field(39..43, op.quad_lanes); - } - src => panic!("Unsupported src type for MOV: {src}"), - } - - self.set_dst(op.dst); - } - - fn encode_sel(&mut self, op: &OpSel) { - match &op.srcs[1].src_ref { - SrcRef::Imm32(imm32) => { - self.set_opcode(0x38a0); - self.set_src_imm_i20(20..39, 56, *imm32); - } - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5ca0); - self.set_reg_src_ref(20..28, op.srcs[1].src_ref); - } - SrcRef::CBuf(cbuf) => { - self.set_opcode(0x4ca0); - self.set_src_cb(20..39, cbuf); - } - src => panic!("Unsupported src type for SEL: {src}"), - } - - self.set_dst(op.dst); - self.set_reg_src(8..16, op.srcs[0]); - self.set_pred_src(39..42, 42, op.cond); - } - - fn encode_shfl(&mut self, op: &OpShfl) { - self.set_opcode(0xef10); - - self.set_dst(op.dst); - self.set_pred_dst(48..51, op.in_bounds); - self.set_reg_src(8..16, op.src); - - match op.lane.src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_bit(28, false); - self.set_reg_src(20..28, op.lane); - } - SrcRef::Imm32(imm) => { - self.set_bit(28, true); - self.set_field(20..25, imm & 0x1f); - } - lane => panic!("unsupported lane src type for SHFL: {lane}"), - } - match op.c.src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_bit(29, false); - self.set_reg_src(39..47, op.c); - } - SrcRef::Imm32(imm) => { - self.set_bit(29, true); - self.set_field(34..47, imm & 0x1f1f); - } - c => panic!("unsupported c src type for SHFL: {c}"), - } - - self.set_field( - 30..32, - match op.op { - ShflOp::Idx => 0u8, - ShflOp::Up => 1u8, - ShflOp::Down => 2u8, - ShflOp::Bfly => 3u8, - }, - ); - } - - fn encode_vote(&mut self, op: &OpVote) { - self.set_opcode(0x50d8); - - self.set_dst(op.ballot); - self.set_pred_dst(45..48, op.vote); - self.set_pred_src(39..42, 42, op.pred); - - self.set_field( - 48..50, - match op.op { - VoteOp::All => 0u8, - VoteOp::Any => 1u8, - VoteOp::Eq => 2u8, - }, - ); - } - - fn encode_psetp(&mut self, op: &OpPSetP) { - self.set_opcode(0x5090); - - self.set_pred_dst(3..6, op.dsts[0]); - self.set_pred_dst(0..3, op.dsts[1]); - - self.set_pred_src(12..15, 15, op.srcs[0]); - self.set_pred_src(29..32, 32, op.srcs[1]); - self.set_pred_src(39..42, 42, op.srcs[2]); - - self.set_pred_set_op(24..26, op.ops[0]); - self.set_pred_set_op(45..47, op.ops[1]); - } - - fn set_mem_type(&mut self, range: Range, mem_type: MemType) { - assert!(range.len() == 3); - self.set_field( - range, - match mem_type { - MemType::U8 => 0_u8, - MemType::I8 => 1_u8, - MemType::U16 => 2_u8, - MemType::I16 => 3_u8, - MemType::B32 => 4_u8, - MemType::B64 => 5_u8, - MemType::B128 => 6_u8, - }, - ); - } - - fn set_mem_order(&mut self, _order: &MemOrder) { - // TODO: order and scope aren't present before SM70, what should we do? - } - - fn set_mem_access(&mut self, access: &MemAccess) { - self.set_field( - 45..46, - match access.space.addr_type() { - MemAddrType::A32 => 0_u8, - MemAddrType::A64 => 1_u8, - }, - ); - self.set_mem_type(48..51, access.mem_type); - self.set_mem_order(&access.order); - } - - fn set_image_dim(&mut self, range: Range, dim: ImageDim) { - assert!(range.len() == 3); - self.set_field( - range, - match dim { - ImageDim::_1D => 0_u8, - ImageDim::_1DBuffer => 1_u8, - ImageDim::_1DArray => 2_u8, - ImageDim::_2D => 3_u8, - ImageDim::_2DArray => 4_u8, - ImageDim::_3D => 5_u8, - }, - ); - } - - fn set_rnd_mode(&mut self, range: Range, rnd_mode: FRndMode) { - assert!(range.len() == 2); - self.set_field( - range, - match rnd_mode { - FRndMode::NearestEven => 0_u8, - FRndMode::NegInf => 1_u8, - FRndMode::PosInf => 2_u8, - FRndMode::Zero => 3_u8, - }, - ); - } - - fn encode_ldg(&mut self, op: &OpLd) { - self.set_opcode(0xeed0); - - self.set_dst(op.dst); - self.set_reg_src(8..16, op.addr); - self.set_field(20..44, op.offset); - - self.set_mem_access(&op.access); - } - - fn encode_ldl(&mut self, op: &OpLd) { - self.set_opcode(0xef40); - - self.set_dst(op.dst); - self.set_reg_src(8..16, op.addr); - self.set_field(20..44, op.offset); - - self.set_mem_access(&op.access); - } - - fn encode_lds(&mut self, op: &OpLd) { - self.set_opcode(0xef48); - - self.set_dst(op.dst); - self.set_reg_src(8..16, op.addr); - self.set_field(20..44, op.offset); - - self.set_mem_access(&op.access); - } - - fn encode_ld(&mut self, op: &OpLd) { - match op.access.space { - MemSpace::Global(_) => self.encode_ldg(op), - MemSpace::Local => self.encode_ldl(op), - MemSpace::Shared => self.encode_lds(op), - } - } - - fn encode_ldc(&mut self, op: &OpLdc) { - assert!(op.cb.src_mod.is_none()); - let SrcRef::CBuf(cb) = &op.cb.src_ref else { - panic!("Not a CBuf source"); - }; - let CBuf::Binding(cb_idx) = cb.buf else { - panic!("Must be a bound constant buffer"); - }; - - self.set_opcode(0xef90); - - self.set_dst(op.dst); - self.set_reg_src(8..16, op.offset); - self.set_field(20..36, cb.offset); - self.set_field(36..41, cb_idx); - self.set_field( - 44..46, - match op.mode { - LdcMode::Indexed => 0_u8, - LdcMode::IndexedLinear => 1_u8, - LdcMode::IndexedSegmented => 2_u8, - LdcMode::IndexedSegmentedLinear => 3_u8, - }, - ); - self.set_mem_type(48..51, op.mem_type); - } - - fn encode_stg(&mut self, op: &OpSt) { - self.set_opcode(0xeed8); - - self.set_reg_src(0..8, op.data); - self.set_reg_src(8..16, op.addr); - self.set_field(20..44, op.offset); - self.set_mem_access(&op.access); - } - - fn encode_stl(&mut self, op: &OpSt) { - self.set_opcode(0xef50); - - self.set_reg_src(0..8, op.data); - self.set_reg_src(8..16, op.addr); - self.set_field(20..44, op.offset); - self.set_mem_access(&op.access); - } - - fn encode_sts(&mut self, op: &OpSt) { - self.set_opcode(0xef58); - - self.set_reg_src(0..8, op.data); - self.set_reg_src(8..16, op.addr); - self.set_field(20..44, op.offset); - self.set_mem_access(&op.access); - } - - fn encode_st(&mut self, op: &OpSt) { - match op.access.space { - MemSpace::Global(_) => self.encode_stg(op), - MemSpace::Local => self.encode_stl(op), - MemSpace::Shared => self.encode_sts(op), - } - } - - fn encode_lop2(&mut self, op: &OpLop2) { - if let Some(imm32) = op.srcs[1].as_imm_not_i20() { - self.set_opcode(0x0400); - - self.set_dst(op.dst); - self.set_reg_src_ref(8..16, op.srcs[0].src_ref); - self.set_bit(55, op.srcs[0].src_mod.is_bnot()); - self.set_src_imm32(20..52, imm32); - - self.set_field( - 53..55, - match op.op { - LogicOp2::And => 0_u8, - LogicOp2::Or => 1_u8, - LogicOp2::Xor => 2_u8, - LogicOp2::PassB => { - panic!("PASS_B is not supported for LOP32I"); - } - }, - ); - } else { - match &op.srcs[1].src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c40); - self.set_reg_src_ref(20..28, op.srcs[1].src_ref); - } - SrcRef::Imm32(i) => { - self.set_opcode(0x3840); - self.set_src_imm_i20(20..39, 56, *i); - } - SrcRef::CBuf(cb) => { - self.set_opcode(0x4c40); - self.set_src_cb(20..39, cb); - } - src1 => panic!("unsupported src1 type for IMUL: {src1}"), - } - - self.set_dst(op.dst); - self.set_reg_src_ref(8..16, op.srcs[0].src_ref); - - self.set_bit(39, op.srcs[0].src_mod.is_bnot()); - self.set_bit(40, op.srcs[1].src_mod.is_bnot()); - - self.set_field( - 41..43, - match op.op { - LogicOp2::And => 0_u8, - LogicOp2::Or => 1_u8, - LogicOp2::Xor => 2_u8, - LogicOp2::PassB => 3_u8, - }, - ); - - self.set_pred_dst(48..51, Dst::None); - } - } - - fn encode_shf(&mut self, op: &OpShf) { - match &op.shift.src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5cf8); - self.set_reg_src(20..28, op.shift); - } - SrcRef::Imm32(i) => { - self.set_opcode(0x38f8); - assert!(op.shift.src_mod.is_none()); - self.set_src_imm_i20(20..39, 56, *i); - } - src1 => panic!("unsupported src1 type for SHF: {src1}"), - } - - self.set_field( - 37..39, - match op.data_type { - IntType::I32 => 0_u8, - IntType::U32 => 0_u8, - IntType::U64 => 2_u8, - IntType::I64 => 3_u8, - _ => panic!("Invalid shift data type"), - }, - ); - - self.set_dst(op.dst); - self.set_reg_src(8..16, op.low); - self.set_reg_src(39..47, op.high); - - self.set_bit(47, false); // .CC - self.set_bit(48, op.dst_high); - self.set_bit(49, false); // .X - self.set_bit(50, op.wrap); - } - - fn encode_shl(&mut self, op: &OpShl) { - self.set_dst(op.dst); - self.set_reg_src(8..16, op.src); - match op.shift.src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c48); - self.set_reg_src(20..28, op.shift); - } - SrcRef::Imm32(i) => { - self.set_opcode(0x3848); - self.set_src_imm_i20(20..39, 56, i); - } - SrcRef::CBuf(cb) => { - self.set_opcode(0x4c48); - self.set_src_cb(20..39, &cb); - } - src1 => panic!("unsupported src1 type for SHL: {src1}"), - } - - self.set_bit(39, op.wrap); - } - - fn encode_shr(&mut self, op: &OpShr) { - self.set_dst(op.dst); - self.set_reg_src(8..16, op.src); - match op.shift.src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c28); - self.set_reg_src(20..28, op.shift); - } - SrcRef::Imm32(i) => { - self.set_opcode(0x3828); - self.set_src_imm_i20(20..39, 56, i); - } - SrcRef::CBuf(cb) => { - self.set_opcode(0x4c28); - self.set_src_cb(20..39, &cb); - } - src1 => panic!("unsupported src1 type for SHL: {src1}"), - } - - self.set_bit(39, op.wrap); - self.set_bit(48, op.signed); - } - - fn encode_i2f(&mut self, op: &OpI2F) { - let abs_bit = 49; - let neg_bit = 45; - - match &op.src.src_ref { - SrcRef::Imm32(imm) => { - self.set_opcode(0x38b8); - self.set_src_imm_i20(20..39, 56, *imm); - } - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5cb8); - self.set_reg_fmod_src(20..28, abs_bit, neg_bit, op.src); - } - SrcRef::CBuf(_) => { - self.set_opcode(0x4cb8); - self.set_cb_fmod_src(20..39, abs_bit, neg_bit, op.src); - } - src => panic!("Unsupported src type for I2F: {src}"), - } - - self.set_field(41..43, 0_u8); // TODO: subop - self.set_bit(13, op.src_type.is_signed()); - self.set_field(8..10, (op.dst_type.bits() / 8).ilog2()); - self.set_rnd_mode(39..41, op.rnd_mode); - self.set_field(10..12, (op.src_type.bits() / 8).ilog2()); - - self.set_dst(op.dst); - } - - fn encode_f2f(&mut self, op: &OpF2F) { - assert!(op.src.is_reg_or_zero()); - - let abs_bit = 49; - let neg_bit = 45; - - match &op.src.src_ref { - SrcRef::Imm32(imm) => { - self.set_opcode(0x38a8); - self.set_src_imm_i20(20..39, 56, *imm); - } - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5ca8); - self.set_reg_fmod_src(20..28, abs_bit, neg_bit, op.src); - } - SrcRef::CBuf(_) => { - self.set_opcode(0x4ca8); - self.set_cb_fmod_src(20..39, abs_bit, neg_bit, op.src); - } - src => panic!("Unsupported src type for F2F: {src}"), - } - - // no saturation in the IR, would be bit 50 - self.set_field(8..10, (op.dst_type.bits() / 8).ilog2()); - self.set_field(10..12, (op.src_type.bits() / 8).ilog2()); - self.set_rnd_mode(39..41, op.rnd_mode); - self.set_bit(42, op.integer_rnd); - self.set_bit(44, op.ftz); - - self.set_dst(op.dst); - } - - fn encode_i2i(&mut self, op: &OpI2I) { - match &op.src.src_ref { - SrcRef::Imm32(imm32) => { - self.set_opcode(0x38e0); - self.set_src_imm_i20(20..39, 56, *imm32); - } - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5ce0); - self.set_reg_src(20..28, op.src); - } - SrcRef::CBuf(cbuf) => { - self.set_opcode(0x4ce0); - self.set_src_cb(20..39, cbuf); - } - src => panic!("Unsupported src type for I2I: {src}"), - } - - self.set_bit(45, op.neg); - self.set_bit(49, op.abs); - self.set_bit(50, op.saturate); - self.set_bit(12, op.dst_type.is_signed()); - self.set_bit(13, op.src_type.is_signed()); - self.set_field(8..10, (op.dst_type.bits() / 8).ilog2()); - self.set_field(10..12, (op.src_type.bits() / 8).ilog2()); - self.set_field(41..43, 0u8); // src.B1-3 - self.set_bit(47, false); // dst.CC - - self.set_dst(op.dst); - } - - fn encode_imad(&mut self, op: &OpIMad) { - let neg_1_bit = 51; - let neg_2_bit = 52; - - match &op.srcs[2].src_ref { - SrcRef::Imm32(imm) => { - panic!("Invalid immediate src2 for IMAD {}", *imm) - } - SrcRef::Reg(_) => match &op.srcs[1].src_ref { - SrcRef::Imm32(imm) => { - self.set_opcode(0x3400); - self.set_src_imm_i20(20..39, 56, *imm); - } - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5a00); - self.set_reg_ineg_src(20..28, neg_1_bit, op.srcs[1]); - } - SrcRef::CBuf(_) => { - self.set_opcode(0x4a00); - self.set_cb_ineg_src(20..39, neg_1_bit, op.srcs[1]); - } - - src => panic!("Invalid src1 for IMAD {src}"), - }, - SrcRef::CBuf(_) => { - self.set_opcode(0x5200); - self.set_reg_ineg_src(39..47, neg_1_bit, op.srcs[1]); - self.set_cb_ineg_src(20..39, neg_2_bit, op.srcs[2]); - } - src => panic!("Unsupported src2 type for F2F: {src}"), - } - - self.set_bit(48, op.signed); // src0 signed - self.set_bit( - 51, - op.srcs[0].src_mod.is_ineg() ^ op.srcs[1].src_mod.is_ineg(), - ); - self.set_bit(53, op.signed); // src1 signed - - self.set_reg_src(8..16, op.srcs[0]); - self.set_dst(op.dst); - } - - fn encode_imul(&mut self, op: &OpIMul) { - assert!(op.srcs[0].src_mod.is_none()); - assert!(op.srcs[1].src_mod.is_none()); - - self.set_dst(op.dst); - self.set_reg_src(8..16, op.srcs[0]); - - if let Some(i) = op.srcs[1].as_imm_not_i20() { - self.set_opcode(0x1fc0); - self.set_src_imm32(20..52, i); - - self.set_bit(53, op.high); - self.set_bit(54, op.signed[0]); - self.set_bit(55, op.signed[1]); - } else { - match op.srcs[1].src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c38); - self.set_reg_src(20..28, op.srcs[1]); - } - SrcRef::Imm32(i) => { - self.set_opcode(0x3838); - self.set_src_imm_i20(20..39, 56, i); - } - SrcRef::CBuf(cb) => { - self.set_opcode(0x4c38); - self.set_src_cb(20..39, &cb); - } - src1 => panic!("unsupported src1 type for IMUL: {src1}"), - }; - - self.set_bit(39, op.high); - self.set_bit(40, op.signed[0]); - self.set_bit(41, op.signed[1]); - } - } - - fn encode_f2i(&mut self, op: &OpF2I) { - match &op.src.src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5cb0); - self.set_reg_fmod_src(20..28, 49, 45, op.src); - } - SrcRef::Imm32(i) => { - self.set_opcode(0x38b0); - self.set_src_imm_f20(20..39, 56, *i); - } - SrcRef::CBuf(_) => { - self.set_opcode(0x4cb0); - self.set_cb_fmod_src(20..39, 49, 45, op.src); - } - src => panic!("Unsupported src type for F2I: {src}"), - } - - self.set_dst(op.dst); - - self.set_field(8..10, (op.dst_type.bits() / 8).ilog2()); - self.set_field(10..12, (op.src_type.bits() / 8).ilog2()); - self.set_bit(12, op.dst_type.is_signed()); - self.set_rnd_mode(39..41, op.rnd_mode); - self.set_bit(44, op.ftz); - self.set_bit(47, false); // .CC - } - - fn set_pred_set_op(&mut self, range: Range, op: PredSetOp) { - assert!(range.len() == 2); - self.set_field( - range, - match op { - PredSetOp::And => 0_u8, - PredSetOp::Or => 1_u8, - PredSetOp::Xor => 2_u8, - }, - ); - } - - fn encode_imnmx(&mut self, op: &OpIMnMx) { - match &op.srcs[1].src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c20); - self.set_reg_src(20..28, op.srcs[1]); - } - SrcRef::Imm32(i) => { - self.set_opcode(0x3820); - self.set_src_imm_f20(20..39, 56, *i); - } - SrcRef::CBuf(cb) => { - self.set_opcode(0x4c20); - self.set_src_cb(20..39, cb); - } - src1 => panic!("unsupported src1 type for IMNMX: {src1}"), - } - - self.set_dst(op.dst); - self.set_reg_src(8..16, op.srcs[0]); - self.set_pred_src(39..42, 42, op.min); - self.set_bit(47, false); // .CC - self.set_bit( - 48, - match op.cmp_type { - IntCmpType::U32 => false, - IntCmpType::I32 => true, - }, - ); - } - - fn set_int_cmp_op(&mut self, range: Range, op: IntCmpOp) { - assert!(range.len() == 3); - self.set_field( - range, - match op { - IntCmpOp::Eq => 2_u8, - IntCmpOp::Ne => 5_u8, - IntCmpOp::Lt => 1_u8, - IntCmpOp::Le => 3_u8, - IntCmpOp::Gt => 4_u8, - IntCmpOp::Ge => 6_u8, - }, - ); - } - - fn encode_isetp(&mut self, op: &OpISetP) { - assert!(op.srcs[0].src_mod.is_none()); - assert!(op.srcs[1].src_mod.is_none()); - - match &op.srcs[1].src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5b60); - self.set_reg_src(20..28, op.srcs[1]); - } - SrcRef::Imm32(i) => { - self.set_opcode(0x3660); - self.set_src_imm_i20(20..39, 56, *i); - } - SrcRef::CBuf(cb) => { - self.set_opcode(0x4b60); - self.set_src_cb(20..39, cb); - } - _ => panic!("Unsupported src type"), - } - - self.set_pred_dst(0..3, Dst::None); // dst1 - self.set_pred_dst(3..6, op.dst); - self.set_reg_src(8..16, op.srcs[0]); - self.set_pred_src(39..42, 42, op.accum); - - self.set_bit(43, false); // .X - self.set_pred_set_op(45..47, op.set_op); - - self.set_field( - 48..49, - match op.cmp_type { - IntCmpType::U32 => 0_u32, - IntCmpType::I32 => 1_u32, - }, - ); - self.set_int_cmp_op(49..52, op.cmp_op); - } - - fn encode_sust(&mut self, op: &OpSuSt) { - self.set_opcode(0xeb20); - - self.set_reg_src(8..16, op.coord); - self.set_reg_src(0..8, op.data); - self.set_reg_src(39..47, op.handle); - - self.set_image_dim(33..36, op.image_dim); - self.set_mem_order(&op.mem_order); - - assert!(op.mask == 0x1 || op.mask == 0x3 || op.mask == 0xf); - self.set_field(20..24, op.mask); - } - - fn set_atom_op(&mut self, range: Range, atom_op: AtomOp) { - assert!(range.len() == 4); - self.set_field( - range, - match atom_op { - AtomOp::Add => 0_u8, - AtomOp::Min => 1_u8, - AtomOp::Max => 2_u8, - AtomOp::Inc => 3_u8, - AtomOp::Dec => 4_u8, - AtomOp::And => 5_u8, - AtomOp::Or => 6_u8, - AtomOp::Xor => 7_u8, - AtomOp::Exch => 8_u8, - AtomOp::CmpExch => panic!("CmpXchg not yet supported"), - }, - ); - } - - fn encode_atomg(&mut self, op: &OpAtom) { - self.set_opcode(0xed00); - self.set_mem_order(&op.mem_order); - - self.set_dst(op.dst); - self.set_reg_src(8..16, op.addr); - self.set_reg_src(20..28, op.data); - self.set_field(28..48, op.addr_offset); - self.set_field( - 48..49, - match op.mem_space.addr_type() { - MemAddrType::A32 => 0_u8, - MemAddrType::A64 => 1_u8, - }, - ); - self.set_field( - 49..52, - match op.atom_type { - AtomType::U32 => 0_u8, - AtomType::I32 => 1_u8, - AtomType::U64 => 2_u8, - AtomType::F32 => 3_u8, - // NOTE: U128 => 4_u8, - AtomType::I64 => 5_u8, - // TODO: do something about ATOMG.F64 - other => panic!("ATOMG.{other} not supported on SM50"), - }, - ); - self.set_atom_op(52..56, op.atom_op); - } - - fn encode_atoms(&mut self, op: &OpAtom) { - self.set_opcode(0xec00); - self.set_mem_order(&op.mem_order); - - self.set_dst(op.dst); - self.set_reg_src(8..16, op.addr); - self.set_reg_src(20..28, op.data); - self.set_field( - 28..30, - match op.atom_type { - AtomType::U32 => 0_u8, - AtomType::I32 => 1_u8, - AtomType::U64 => 2_u8, - AtomType::I64 => 3_u8, - // TODO: do something about ATOMS.F{32,64} - other => panic!("ATOMS.{other} not supported on SM50"), - }, - ); - assert_eq!(op.addr_offset % 4, 0); - self.set_field(30..52, op.addr_offset / 4); - self.set_atom_op(52..56, op.atom_op); - } - - fn encode_atom(&mut self, op: &OpAtom) { - match op.mem_space { - MemSpace::Global(_) => self.encode_atomg(op), - MemSpace::Local => panic!("Atomics do not support local"), - MemSpace::Shared => self.encode_atoms(op), - } - } - - fn set_tex_dim(&mut self, range: Range, dim: TexDim) { - assert!(range.len() == 3); - self.set_field( - range, - match dim { - TexDim::_1D => 0_u8, - TexDim::Array1D => 1_u8, - TexDim::_2D => 2_u8, - TexDim::Array2D => 3_u8, - TexDim::_3D => 4_u8, - TexDim::Cube => 6_u8, - TexDim::ArrayCube => 7_u8, - }, - ); - } - - fn set_tex_lod_mode(&mut self, range: Range, lod_mode: TexLodMode) { - assert!(range.len() == 2); - self.set_field( - range, - match lod_mode { - TexLodMode::Auto => 0_u8, - TexLodMode::Zero => 1_u8, - TexLodMode::Bias => 2_u8, - TexLodMode::Lod => 3_u8, - _ => panic!("Unknown LOD mode"), - }, - ); - } - - fn encode_tex(&mut self, op: &OpTex) { - self.set_opcode(0xdeb8); - - self.set_dst(op.dsts[0]); - assert!(op.dsts[1].is_none()); - assert!(op.fault.is_none()); - self.set_reg_src(8..16, op.srcs[0]); - self.set_reg_src(20..28, op.srcs[1]); - - self.set_tex_dim(28..31, op.dim); - self.set_field(31..35, op.mask); - self.set_bit(35, false); // ToDo: NDV - self.set_bit(36, op.offset); - self.set_tex_lod_mode(37..39, op.lod_mode); - self.set_bit(49, false); // TODO: .NODEP - self.set_bit(50, op.z_cmpr); - } - - fn encode_tld(&mut self, op: &OpTld) { - self.set_opcode(0xdd38); - - self.set_dst(op.dsts[0]); - assert!(op.dsts[1].is_none()); - assert!(op.fault.is_none()); - self.set_reg_src(8..16, op.srcs[0]); - self.set_reg_src(20..28, op.srcs[1]); - - self.set_tex_dim(28..31, op.dim); - self.set_field(31..35, op.mask); - self.set_bit(35, op.offset); - self.set_bit(49, false); // TODO: .NODEP - self.set_bit(50, op.is_ms); - - assert!( - op.lod_mode == TexLodMode::Zero || op.lod_mode == TexLodMode::Lod - ); - self.set_bit(55, op.lod_mode == TexLodMode::Lod); - } - - fn encode_tld4(&mut self, op: &OpTld4) { - self.set_opcode(0xdef8); - - self.set_dst(op.dsts[0]); - assert!(op.dsts[1].is_none()); - assert!(op.fault.is_none()); - self.set_reg_src(8..16, op.srcs[0]); - self.set_reg_src(20..28, op.srcs[1]); - - self.set_tex_dim(28..31, op.dim); - self.set_field(31..35, op.mask); - self.set_bit(35, false); // ToDo: NDV - self.set_field( - 36..38, - match op.offset_mode { - Tld4OffsetMode::None => 0_u8, - Tld4OffsetMode::AddOffI => 1_u8, - Tld4OffsetMode::PerPx => 2_u8, - }, - ); - self.set_field(38..40, op.comp); - self.set_bit(49, false); // TODO: .NODEP - self.set_bit(50, op.z_cmpr); - } - - fn encode_tmml(&mut self, op: &OpTmml) { - self.set_opcode(0xdf60); - - self.set_dst(op.dsts[0]); - assert!(op.dsts[1].is_none()); - self.set_reg_src(8..16, op.srcs[0]); - self.set_reg_src(20..28, op.srcs[1]); - - self.set_tex_dim(28..31, op.dim); - self.set_field(31..35, op.mask); - self.set_bit(35, false); // ToDo: NDV - self.set_bit(49, false); // TODO: .NODEP - } - - fn encode_txd(&mut self, op: &OpTxd) { - self.set_opcode(0xde78); - - self.set_dst(op.dsts[0]); - assert!(op.dsts[1].is_none()); - assert!(op.fault.is_none()); - self.set_reg_src(8..16, op.srcs[0]); - self.set_reg_src(20..28, op.srcs[1]); - - self.set_tex_dim(28..31, op.dim); - self.set_field(31..35, op.mask); - self.set_bit(35, op.offset); - self.set_bit(49, false); // TODO: .NODEP - } - - fn encode_txq(&mut self, op: &OpTxq) { - self.set_opcode(0xdf50); - - self.set_dst(op.dsts[0]); - assert!(op.dsts[1].is_none()); - self.set_reg_src(8..16, op.src); - - self.set_field( - 22..28, - match op.query { - TexQuery::Dimension => 1_u8, - TexQuery::TextureType => 2_u8, - TexQuery::SamplerPos => 5_u8, - // TexQuery::Filter => 0x10_u8, - // TexQuery::Lod => 0x12_u8, - // TexQuery::Wrap => 0x14_u8, - // TexQuery::BorderColour => 0x16, - }, - ); - self.set_field(31..35, op.mask); - self.set_bit(49, false); // TODO: .NODEP - } - - fn encode_ipa(&mut self, op: &OpIpa) { - self.set_opcode(0xe000); - - self.set_dst(op.dst); - self.set_reg_src(8..16, 0.into()); // addr - self.set_reg_src(20..28, op.inv_w); - self.set_reg_src(39..47, op.offset); - - assert!(op.addr % 4 == 0); - self.set_field(28..38, op.addr); - self.set_bit(38, false); // .IDX - self.set_pred_dst(47..50, Dst::None); // TODO: What is this for? - self.set_bit(51, false); // .SAT - self.set_field( - 52..54, - match op.loc { - InterpLoc::Default => 0_u8, - InterpLoc::Centroid => 1_u8, - InterpLoc::Offset => 2_u8, - }, - ); - self.set_field( - 54..56, - match op.freq { - InterpFreq::Pass => 0_u8, - InterpFreq::PassMulW => 1_u8, - InterpFreq::Constant => 2_u8, - InterpFreq::State => 3_u8, - }, - ); - } - - fn encode_ald(&mut self, op: &OpALd) { - self.set_opcode(0xefd8); - - self.set_dst(op.dst); - self.set_reg_src(8..16, op.offset); - self.set_reg_src(39..47, op.vtx); - - assert!(!op.access.phys); - self.set_field(20..30, op.access.addr); - self.set_bit(31, op.access.patch); - self.set_bit(32, op.access.output); - self.set_field(47..49, op.access.comps - 1); - } - - fn encode_ast(&mut self, op: &OpASt) { - self.set_opcode(0xeff0); - - self.set_reg_src(0..8, op.data); - self.set_reg_src(8..16, op.offset); - self.set_reg_src(39..47, op.vtx); - - assert!(!op.access.phys); - assert!(op.access.output); - self.set_field(20..30, op.access.addr); - self.set_bit(31, op.access.patch); - self.set_bit(32, op.access.output); - self.set_field(47..49, op.access.comps - 1); - } - - fn encode_membar(&mut self, op: &OpMemBar) { - self.set_opcode(0xef98); - - self.set_field( - 8..10, - match op.scope { - MemScope::CTA => 0_u8, - MemScope::GPU => 1_u8, - MemScope::System => 2_u8, - }, - ); - } - - fn set_rel_offset( - &mut self, - range: Range, - label: &Label, - ip: usize, - labels: &HashMap, - ) { - let ip = u32::try_from(ip).unwrap(); - let ip = i32::try_from(ip).unwrap(); - - let target_ip = *labels.get(label).unwrap(); - let target_ip = u32::try_from(target_ip).unwrap(); - let target_ip = i32::try_from(target_ip).unwrap(); - - let rel_offset = target_ip - ip - 8; - - self.set_field(range, rel_offset); - } - - fn encode_bra( - &mut self, - op: &OpBra, - ip: usize, - labels: &HashMap, - ) { - self.set_opcode(0xe240); - self.set_rel_offset(20..44, &op.target, ip, labels); - self.set_field(0..5, 0xF_u8); // TODO: Pred? - } - - fn encode_exit(&mut self, _op: &OpExit) { - self.set_opcode(0xe300); - - // TODO: pred - self.set_pred(&Pred { - pred_ref: PredRef::None, - pred_inv: false, - }); - - // TODO: CC flags - self.set_field(0..4, 0xf_u8); // CC.T - } - - fn encode_bar(&mut self, _op: &OpBar) { - self.set_opcode(0xf0a8); - - self.set_reg_src(8..16, SrcRef::Zero.into()); - - // 00: RED.POPC - // 01: RED.AND - // 02: RED.OR - self.set_field(35..37, 0_u8); - - // 00: SYNC - // 01: ARV - // 02: RED - // 03: SCAN - self.set_field(32..35, 0_u8); - - self.set_pred_src(39..42, 42, SrcRef::True.into()); - } - - fn encode_cs2r(&mut self, op: &OpCS2R) { - self.set_opcode(0x50c8); - self.set_dst(op.dst); - self.set_field(20..28, op.idx); - } - - fn encode_kill(&mut self, _op: &OpKill) { - self.set_opcode(0xe330); - self.set_field(0..5, 0x0f_u8); - } - - fn encode_nop(&mut self) { - self.set_opcode(0x50b0); - - // TODO: pred - self.set_pred(&Pred { - pred_ref: PredRef::None, - pred_inv: false, - }); - - // TODO: CC flags - self.set_field(8..12, 0xf_u8); // CC.T - } - - fn encode_s2r(&mut self, op: &OpS2R) { - self.set_opcode(0xf0c8); - self.set_dst(op.dst); - self.set_field(20..28, op.idx); - } - - fn encode_popc(&mut self, op: &OpPopC) { - assert!(op.src.is_reg_or_zero()); - - match &op.src.src_ref { - SrcRef::Imm32(imm) => { - self.set_opcode(0x3808); - self.set_src_imm_i20(20..39, 56, *imm); - } - SrcRef::Reg(_) => { - self.set_opcode(0x5c08); - self.set_reg_src(20..28, op.src); - } - SrcRef::CBuf(cbuf) => { - self.set_opcode(0x4c08); - self.set_src_cb(20..39, cbuf); - } - src => panic!("Invalid source for POPC: {src}"), - } - - let not_mod = matches!(op.src.src_mod, SrcMod::BNot); - self.set_bit(40, not_mod); - self.set_dst(op.dst); - } - - fn encode_fadd(&mut self, op: &OpFAdd) { - if let Some(imm32) = op.srcs[1].as_imm_not_f20() { - self.set_opcode(0x0800); - self.set_dst(op.dst); - self.set_reg_fmod_src(8..16, 54, 56, op.srcs[0]); - self.set_src_imm32(20..52, imm32); - self.set_bit(55, op.ftz); - } else { - match &op.srcs[1].src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c58); - self.set_reg_fmod_src(20..28, 49, 45, op.srcs[1]); - } - SrcRef::Imm32(imm) => { - self.set_opcode(0x3858); - self.set_src_imm_f20(20..39, 56, *imm); - assert!(op.srcs[1].src_mod.is_none()); - } - SrcRef::CBuf(_) => { - self.set_opcode(0x4c58); - self.set_cb_fmod_src(20..39, 49, 45, op.srcs[1]); - } - _ => panic!("Unsupported src type"), - } - - self.set_dst(op.dst); - self.set_reg_fmod_src(8..16, 46, 48, op.srcs[0]); - - self.set_rnd_mode(39..41, op.rnd_mode); - self.set_bit(44, op.ftz); - self.set_bit(50, op.saturate); - } - } - - fn encode_fmnmx(&mut self, op: &OpFMnMx) { - match &op.srcs[1].src_ref { - SrcRef::Imm32(imm32) => { - self.set_opcode(0x3860); - self.set_src_imm_f20(20..39, 56, *imm32); - } - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c60); - self.set_reg_fmod_src(20..28, 49, 45, op.srcs[1]); - } - SrcRef::CBuf(_) => { - self.set_opcode(0x4c60); - self.set_cb_fmod_src(20..39, 49, 45, op.srcs[1]); - } - src => panic!("Unsupported src type for FMNMX: {src}"), - } - - self.set_reg_fmod_src(8..16, 46, 48, op.srcs[0]); - self.set_dst(op.dst); - self.set_pred_src(39..42, 42, op.min); - self.set_bit(44, op.ftz); - } - - fn encode_fmul(&mut self, op: &OpFMul) { - if let Some(imm32) = op.srcs[1].as_imm_not_f20() { - self.set_opcode(0x1e00); - - self.set_bit(53, op.ftz); - self.set_bit(54, op.dnz); - self.set_bit(55, op.saturate); - - self.set_src_imm32(20..52, imm32); - self.set_bit( - 19, - op.srcs[0].src_mod.has_fneg() ^ op.srcs[1].src_mod.has_fneg(), - ); - } else { - match &op.srcs[1].src_ref { - SrcRef::Imm32(imm32) => { - self.set_opcode(0x3868); - self.set_src_imm_f20(20..39, 56, *imm32); - } - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c68); - self.set_reg_src(20..28, op.srcs[1]); - } - SrcRef::CBuf(cbuf) => { - self.set_opcode(0x4c68); - self.set_src_cb(20..39, cbuf); - } - src => panic!("Unsupported src type for FMUL: {src}"), - } - - self.set_rnd_mode(39..41, op.rnd_mode); - self.set_field(41..44, 0x0_u8); // TODO: PDIV - self.set_bit(44, op.ftz); - self.set_bit(45, op.dnz); - self.set_bit( - 48, - op.srcs[0].src_mod.has_fneg() ^ op.srcs[1].src_mod.has_fneg(), - ); - self.set_bit(50, op.saturate); - } - - self.set_reg_fmod_src(8..16, 46, 48, op.srcs[0]); - self.set_dst(op.dst); - } - - fn encode_ffma(&mut self, op: &OpFFma) { - // FFMA doesn't have any abs flags. - assert!(!op.srcs[0].src_mod.has_fabs()); - assert!(!op.srcs[1].src_mod.has_fabs()); - assert!(!op.srcs[2].src_mod.has_fabs()); - - match &op.srcs[1].src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5980); - self.set_reg_src_ref(20..28, op.srcs[1].src_ref); - } - SrcRef::Imm32(i) => { - self.set_opcode(0x3280); - self.set_src_imm_f20(20..39, 56, *i); - } - SrcRef::CBuf(cb) => { - self.set_opcode(0x4980); - self.set_src_cb(20..39, cb); - } - src1 => panic!("unsupported src1 type for IMUL: {src1}"), - } - - self.set_dst(op.dst); - self.set_reg_src_ref(8..16, op.srcs[0].src_ref); - self.set_reg_src_ref(39..47, op.srcs[2].src_ref); - - self.set_bit( - 48, - op.srcs[0].src_mod.has_fneg() ^ op.srcs[1].src_mod.has_fneg(), - ); - self.set_bit(49, op.srcs[2].src_mod.has_fneg()); - self.set_bit(50, op.saturate); - self.set_rnd_mode(51..53, op.rnd_mode); - - self.set_bit(53, op.ftz); - self.set_bit(54, op.dnz); - } - - fn set_float_cmp_op(&mut self, range: Range, op: FloatCmpOp) { - assert!(range.len() == 4); - self.set_field( - range, - match op { - FloatCmpOp::OrdLt => 0x01_u8, - FloatCmpOp::OrdEq => 0x02_u8, - FloatCmpOp::OrdLe => 0x03_u8, - FloatCmpOp::OrdGt => 0x04_u8, - FloatCmpOp::OrdNe => 0x05_u8, - FloatCmpOp::OrdGe => 0x06_u8, - FloatCmpOp::UnordLt => 0x09_u8, - FloatCmpOp::UnordEq => 0x0a_u8, - FloatCmpOp::UnordLe => 0x0b_u8, - FloatCmpOp::UnordGt => 0x0c_u8, - FloatCmpOp::UnordNe => 0x0d_u8, - FloatCmpOp::UnordGe => 0x0e_u8, - FloatCmpOp::IsNum => 0x07_u8, - FloatCmpOp::IsNan => 0x08_u8, - }, - ); - } - - fn encode_fset(&mut self, op: &OpFSet) { - match &op.srcs[1].src_ref { - SrcRef::Imm32(imm32) => { - self.set_opcode(0x3000); - self.set_src_imm_f20(20..39, 56, *imm32); - } - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5800); - self.set_reg_fmod_src(20..28, 44, 53, op.srcs[1]); - } - SrcRef::CBuf(_) => { - self.set_opcode(0x4800); - self.set_cb_fmod_src(20..39, 44, 6, op.srcs[1]); - } - src => panic!("Unsupported src type for FSET: {src}"), - } - - self.set_reg_fmod_src(8..16, 54, 43, op.srcs[0]); - self.set_pred_src(39..42, 42, SrcRef::True.into()); - self.set_float_cmp_op(48..52, op.cmp_op); - self.set_bit(52, true); // bool float - self.set_bit(55, op.ftz); - self.set_dst(op.dst); - } - - fn encode_fsetp(&mut self, op: &OpFSetP) { - match &op.srcs[1].src_ref { - SrcRef::Imm32(imm32) => { - self.set_opcode(0x36b0); - self.set_src_imm_f20(20..39, 56, *imm32); - } - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5bb0); - self.set_reg_fmod_src(20..28, 44, 6, op.srcs[1]); - } - SrcRef::CBuf(_) => { - self.set_opcode(0x4bb0); - self.set_cb_fmod_src(20..39, 44, 6, op.srcs[1]); - } - src => panic!("Unsupported src type for FSETP: {src}"), - } - - self.set_pred_dst(3..6, op.dst); - self.set_pred_dst(0..3, Dst::None); // dst1 - self.set_pred_src(39..42, 42, op.accum); - self.set_pred_set_op(45..47, op.set_op); - self.set_bit(47, op.ftz); - self.set_float_cmp_op(48..52, op.cmp_op); - self.set_reg_fmod_src(8..16, 7, 43, op.srcs[0]); - } - - fn encode_fswzadd(&mut self, op: &OpFSwzAdd) { - self.set_opcode(0x50f8); - - self.set_dst(op.dst); - self.set_reg_src(8..16, op.srcs[0]); - self.set_reg_src(20..28, op.srcs[1]); - - self.set_field( - 39..41, - match op.rnd_mode { - FRndMode::NearestEven => 0u8, - FRndMode::NegInf => 1u8, - FRndMode::PosInf => 2u8, - FRndMode::Zero => 3u8, - }, - ); - - for (i, op) in op.ops.iter().enumerate() { - self.set_field( - 28 + i * 2..28 + (i + 1) * 2, - match op { - FSwzAddOp::Add => 0u8, - FSwzAddOp::SubLeft => 1u8, - FSwzAddOp::SubRight => 2u8, - FSwzAddOp::MoveLeft => 3u8, - }, - ); - } - - self.set_bit(38, false); /* .NDV */ - self.set_bit(44, op.ftz); - self.set_bit(47, false); /* dst.CC */ - } - - fn encode_rro(&mut self, op: &OpRro) { - match &op.src.src_ref { - SrcRef::Imm32(imm32) => { - self.set_opcode(0x3890); - self.set_src_imm_f20(20..39, 56, *imm32); - } - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c90); - self.set_reg_fmod_src(20..28, 49, 45, op.src); - } - SrcRef::CBuf(_) => { - self.set_opcode(0x4c90); - self.set_cb_fmod_src(20..39, 49, 45, op.src); - } - src => panic!("Unsupported src type for RRO: {src}"), - } - - self.set_dst(op.dst); - self.set_field( - 39..40, - match op.op { - RroOp::SinCos => 0u8, - RroOp::Exp2 => 1u8, - }, - ); - } - - fn encode_mufu(&mut self, op: &OpMuFu) { - assert!(op.src.is_reg_or_zero()); - - // TODO: This is following ALU encoding, figure out the correct form of this. - self.set_opcode(0x5080); - - self.set_dst(op.dst); - self.set_reg_fmod_src(8..16, 46, 48, op.src); - - self.set_field( - 20..24, - match op.op { - MuFuOp::Cos => 0_u8, - MuFuOp::Sin => 1_u8, - MuFuOp::Exp2 => 2_u8, - MuFuOp::Log2 => 3_u8, - MuFuOp::Rcp => 4_u8, - MuFuOp::Rsq => 5_u8, - MuFuOp::Rcp64H => 6_u8, - MuFuOp::Rsq64H => 7_u8, - // SQRT is only on SM52 and later - MuFuOp::Sqrt if self.sm >= 52 => 8_u8, - MuFuOp::Sqrt => panic!("MUFU.SQRT not supported on SM50"), - MuFuOp::Tanh => panic!("MUFU.TANH not supported on SM50"), - }, - ); - } - - fn encode_flo(&mut self, op: &OpFlo) { - match op.src.src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c30); - self.set_reg_src_ref(20..28, op.src.src_ref); - } - SrcRef::Imm32(imm) => { - self.set_opcode(0x3830); - self.set_src_imm_i20(20..39, 56, imm); - } - SrcRef::CBuf(cb) => { - self.set_opcode(0x4c30); - self.set_src_cb(20..39, &cb); - } - src => panic!("Unsupported src type for FLO: {src}"), - } - - self.set_dst(op.dst); - self.set_bit(40, op.src.src_mod.is_bnot()); - self.set_bit(48, op.signed); - self.set_bit(41, op.return_shift_amount); - self.set_bit(47, false); /* dst.CC */ - } - - fn encode_dadd(&mut self, op: &OpDAdd) { - match &op.srcs[1].src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c70); - self.set_reg_fmod_src(20..28, 49, 45, op.srcs[1]); - } - SrcRef::Imm32(imm) => { - self.set_opcode(0x3870); - self.set_src_imm_f20(20..39, 56, *imm); - assert!(op.srcs[1].src_mod.is_none()); - } - SrcRef::CBuf(_) => { - self.set_opcode(0x4c70); - self.set_cb_fmod_src(20..39, 49, 45, op.srcs[1]); - } - _ => panic!("Unsupported src type"), - } - - self.set_dst(op.dst); - self.set_reg_fmod_src(8..16, 46, 48, op.srcs[0]); - self.set_rnd_mode(39..41, op.rnd_mode); - } - - fn encode_dfma(&mut self, op: &OpDFma) { - match &op.srcs[2].src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - match &op.srcs[1].src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5b70); - self.set_reg_src_ref(20..28, op.srcs[1].src_ref); - } - SrcRef::Imm32(imm) => { - self.set_opcode(0x3670); - self.set_src_imm_f20(20..39, 56, *imm); - assert!(op.srcs[1].src_mod.is_none()); - } - SrcRef::CBuf(cb) => { - self.set_opcode(0x4b70); - self.set_src_cb(20..39, cb); - } - _ => panic!("Invalid dfma src1: {}", op.srcs[1]), - } - self.set_reg_src_ref(39..47, op.srcs[2].src_ref); - } - SrcRef::CBuf(cb) => { - self.set_opcode(0x5370); - self.set_reg_src_ref(39..47, op.srcs[1].src_ref); - self.set_src_cb(20..39, cb); - } - _ => panic!("Invalid dfma src2: {}", op.srcs[2]), - } - - self.set_dst(op.dst); - self.set_reg_src_ref(8..16, op.srcs[0].src_ref); - - assert!(!op.srcs[0].src_mod.has_fabs()); - assert!(!op.srcs[1].src_mod.has_fabs()); - assert!(!op.srcs[2].src_mod.has_fabs()); - self.set_bit( - 48, - op.srcs[0].src_mod.has_fneg() ^ op.srcs[1].src_mod.has_fneg(), - ); - self.set_bit(49, op.srcs[2].src_mod.has_fneg()); - - self.set_rnd_mode(50..52, op.rnd_mode); - } - - fn encode_dmnmx(&mut self, op: &OpDMnMx) { - match &op.srcs[1].src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c50); - self.set_reg_fmod_src(20..28, 49, 45, op.srcs[1]); - } - SrcRef::Imm32(imm32) => { - self.set_opcode(0x3850); - self.set_src_imm_f20(20..39, 56, *imm32); - } - SrcRef::CBuf(_) => { - self.set_opcode(0x4c50); - self.set_cb_fmod_src(20..39, 49, 45, op.srcs[1]); - } - src => panic!("Unsupported src type for FMNMX: {src}"), - } - - self.set_reg_fmod_src(8..16, 46, 48, op.srcs[0]); - self.set_dst(op.dst); - self.set_pred_src(39..42, 42, op.min); - } - - fn encode_dmul(&mut self, op: &OpDMul) { - match &op.srcs[1].src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c80); - self.set_reg_src_ref(20..28, op.srcs[1].src_ref); - } - SrcRef::Imm32(imm) => { - self.set_opcode(0x3880); - self.set_src_imm_f20(20..39, 56, *imm); - assert!(op.srcs[1].src_mod.is_none()); - } - SrcRef::CBuf(cb) => { - self.set_opcode(0x4c80); - self.set_src_cb(20..39, cb); - } - _ => panic!("Invalid dmul src1: {}", op.srcs[1]), - } - - self.set_dst(op.dst); - self.set_reg_src_ref(8..16, op.srcs[0].src_ref); - - self.set_rnd_mode(39..41, op.rnd_mode); - - assert!(!op.srcs[0].src_mod.has_fabs()); - assert!(!op.srcs[1].src_mod.has_fabs()); - self.set_bit( - 48, - op.srcs[0].src_mod.has_fneg() ^ op.srcs[1].src_mod.has_fneg(), - ); - } - - fn encode_dsetp(&mut self, op: &OpDSetP) { - match &op.srcs[1].src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5b80); - self.set_reg_fmod_src(20..28, 44, 6, op.srcs[1]); - } - SrcRef::Imm32(imm) => { - self.set_opcode(0x3680); - self.set_src_imm_f20(20..39, 56, *imm); - assert!(op.srcs[1].src_mod.is_none()); - } - SrcRef::CBuf(_) => { - self.set_opcode(0x4b80); - self.set_reg_fmod_src(20..39, 44, 6, op.srcs[1]); - } - _ => panic!("Invalid dmul src1: {}", op.srcs[1]), - } - - self.set_pred_dst(3..6, op.dst); - self.set_pred_dst(0..3, Dst::None); // dst1 - self.set_pred_src(39..42, 42, op.accum); - self.set_pred_set_op(45..47, op.set_op); - self.set_float_cmp_op(48..52, op.cmp_op); - self.set_reg_fmod_src(8..16, 7, 43, op.srcs[0]); - } - - fn encode_iadd2(&mut self, op: &OpIAdd2) { - let carry_in = match op.carry_in.src_ref { - SrcRef::Reg(reg) if reg.file() == RegFile::Carry => true, - SrcRef::Zero => false, - other => panic!("invalid carry_in src for IADD2 {other}"), - }; - let carry_out = match op.carry_out { - Dst::Reg(reg) if reg.file() == RegFile::Carry => true, - Dst::None => false, - other => panic!("invalid carry_out dst for IADD2 {other}"), - }; - - if let Some(imm32) = op.srcs[1].as_imm_not_i20() { - self.set_opcode(0x1c00); - - self.set_dst(op.dst); - self.set_reg_ineg_src(8..16, 56, op.srcs[0]); - self.set_src_imm32(20..52, imm32); - - self.set_bit(53, carry_in); - self.set_bit(52, carry_out); - } else { - match &op.srcs[1].src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c10); - self.set_reg_ineg_src(20..28, 48, op.srcs[1]); - } - SrcRef::Imm32(imm) => { - self.set_opcode(0x3810); - self.set_src_imm_i20(20..39, 56, *imm); - } - SrcRef::CBuf(_) => { - self.set_opcode(0x4c10); - self.set_cb_ineg_src(20..39, 48, op.srcs[1]); - } - src => panic!("Unsupported src type for IADD: {src}"), - } - - self.set_dst(op.dst); - self.set_reg_ineg_src(8..16, 49, op.srcs[0]); - - self.set_bit(43, carry_in); - self.set_bit(47, carry_out); - } - } - - fn encode_prmt(&mut self, op: &OpPrmt) { - match &op.sel.src_ref { - SrcRef::Imm32(imm) => { - self.set_opcode(0x36c0); - self.set_src_imm_i20(20..39, 56, *imm); - } - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5bc0); - self.set_reg_src(20..28, op.sel); - } - SrcRef::CBuf(cbuf) => { - self.set_opcode(0x4bc0); - self.set_src_cb(20..39, cbuf); - } - src => panic!("Unsupported src type for PRMT: {src}"), - } - - self.set_reg_src(8..16, op.srcs[0]); - self.set_reg_src(39..47, op.srcs[1]); - self.set_dst(op.dst); - // TODO: subop? - } - - fn encode_suld(&mut self, op: &OpSuLd) { - self.set_opcode(0xeb00); - - assert!(op.mask == 0x1 || op.mask == 0x3 || op.mask == 0xf); - self.set_field(20..24, op.mask); - self.set_image_dim(33..36, op.image_dim); - - // mem_eviction_policy not a thing for sm < 70 - - let scope = match op.mem_order { - MemOrder::Constant => MemScope::System, - MemOrder::Weak => MemScope::CTA, - MemOrder::Strong(s) => s, - }; - - self.set_field( - 24..26, - match scope { - MemScope::CTA => 0_u8, - /* SM => 1_u8, */ - MemScope::GPU => 2_u8, - MemScope::System => 3_u8, - }, - ); - - self.set_dst(op.dst); - - self.set_reg_src(8..16, op.coord); - self.set_reg_src(39..47, op.handle); - } - - fn encode_suatom(&mut self, op: &OpSuAtom) { - if matches!(op.atom_op, AtomOp::CmpExch) { - self.set_opcode(0xeac0); - } else { - self.set_opcode(0xea60); - } - - let atom_type: u8 = match op.atom_type { - AtomType::U32 => 0, - AtomType::I32 => 1, - AtomType::F32 => 3, - AtomType::U64 => 2, - AtomType::I64 => 5, - _ => panic!("Unsupported atom type {}", op.atom_type), - }; - - let atom_op: u8 = match op.atom_op { - AtomOp::Add => 0, - AtomOp::Min => 1, - AtomOp::Max => 2, - AtomOp::Inc => 3, - AtomOp::Dec => 4, - AtomOp::And => 5, - AtomOp::Or => 6, - AtomOp::Xor => 7, - AtomOp::Exch => 8, - AtomOp::CmpExch => 0, - }; - - self.set_image_dim(33..36, op.image_dim); - self.set_field(36..39, atom_type); - self.set_field(29..33, atom_op); - - // The hardware requires that we set .D on atomics. This is safe to do - // in in the emit code because it only affects format conversion, not - // surface coordinates and atomics are required to be performed with - // image formats that that exactly match the shader data type. So, for - // instance, a uint32_t atomic has to happen on an R32_UINT or R32_SINT - // image. - self.set_bit(52, true); // .D - - self.set_dst(op.dst); - - self.set_reg_src(20..28, op.data); - self.set_reg_src(8..16, op.coord); - self.set_reg_src(39..47, op.handle); - } - - fn encode_isberd(&mut self, op: &OpIsberd) { - self.set_opcode(0xefd0); - self.set_dst(op.dst); - self.set_reg_src(8..16, op.idx); - } - - fn encode_out(&mut self, op: &OpOut) { - match &op.stream.src_ref { - SrcRef::Imm32(imm32) => { - self.set_opcode(0xf6e0); - self.set_src_imm_i20(20..39, 56, *imm32); - } - SrcRef::CBuf(cbuf) => { - self.set_opcode(0xebe0); - self.set_src_cb(20..39, cbuf); - } - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0xfbe0); - self.set_reg_src(20..28, op.stream); - } - src => panic!("Unsupported src type for OUT: {src}"), - } - - self.set_field( - 39..41, - match op.out_type { - OutType::Emit => 1_u8, - OutType::Cut => 2_u8, - OutType::EmitThenCut => 3_u8, - }, - ); - - self.set_reg_src(8..16, op.handle); - self.set_dst(op.dst); - } - - fn encode_bfe(&mut self, op: &OpBfe) { - match &op.range.src_ref { - SrcRef::Imm32(imm32) => { - self.set_opcode(0x3800); - // We guarantee that imm32 is 16bits, as it's a result of a PRMT - // instruction that only fills the bottom two bytes. - self.set_src_imm_i20(20..39, 56, *imm32 & 0xffff); - } - SrcRef::CBuf(cbuf) => { - self.set_opcode(0x4c00); - self.set_src_cb(20..39, cbuf); - } - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5c00); - self.set_reg_src(20..28, op.range); - } - src => panic!("Unsupported src type for BFE: {src}"), - } - - if op.signed { - self.set_bit(48, true); - } - - if op.reverse { - self.set_bit(40, true); - } - - self.set_reg_src(8..16, op.base); - self.set_dst(op.dst); - } - - pub fn encode( - instr: &Instr, - sm: u8, - ip: usize, - labels: &HashMap, - ) -> Self { - assert!(sm >= 50); - - let mut si = SM50Instr::new(sm); - - match &instr.op { - Op::FAdd(op) => si.encode_fadd(op), - Op::FMnMx(op) => si.encode_fmnmx(op), - Op::FMul(op) => si.encode_fmul(op), - Op::FFma(op) => si.encode_ffma(op), - Op::FSet(op) => si.encode_fset(op), - Op::FSetP(op) => si.encode_fsetp(op), - Op::FSwzAdd(op) => si.encode_fswzadd(op), - Op::Rro(op) => si.encode_rro(op), - Op::MuFu(op) => si.encode_mufu(op), - Op::Flo(op) => si.encode_flo(op), - Op::DAdd(op) => si.encode_dadd(op), - Op::DFma(op) => si.encode_dfma(op), - Op::DMnMx(op) => si.encode_dmnmx(op), - Op::DMul(op) => si.encode_dmul(op), - Op::DSetP(op) => si.encode_dsetp(op), - Op::IAdd2(op) => si.encode_iadd2(op), - Op::Mov(op) => si.encode_mov(op), - Op::Sel(op) => si.encode_sel(op), - Op::Shfl(op) => si.encode_shfl(op), - Op::Vote(op) => si.encode_vote(op), - Op::PSetP(op) => si.encode_psetp(op), - Op::SuSt(op) => si.encode_sust(op), - Op::S2R(op) => si.encode_s2r(op), - Op::PopC(op) => si.encode_popc(op), - Op::Prmt(op) => si.encode_prmt(op), - Op::Ld(op) => si.encode_ld(op), - Op::Ldc(op) => si.encode_ldc(op), - Op::St(op) => si.encode_st(op), - Op::Lop2(op) => si.encode_lop2(op), - Op::Shf(op) => si.encode_shf(op), - Op::Shl(op) => si.encode_shl(op), - Op::Shr(op) => si.encode_shr(op), - Op::F2F(op) => si.encode_f2f(op), - Op::F2I(op) => si.encode_f2i(op), - Op::I2F(op) => si.encode_i2f(op), - Op::I2I(op) => si.encode_i2i(op), - Op::IMad(op) => si.encode_imad(op), - Op::IMul(op) => si.encode_imul(op), - Op::IMnMx(op) => si.encode_imnmx(op), - Op::ISetP(op) => si.encode_isetp(op), - Op::Tex(op) => si.encode_tex(op), - Op::Tld(op) => si.encode_tld(op), - Op::Tld4(op) => si.encode_tld4(op), - Op::Tmml(op) => si.encode_tmml(op), - Op::Txd(op) => si.encode_txd(op), - Op::Txq(op) => si.encode_txq(op), - Op::Ipa(op) => si.encode_ipa(op), - Op::ALd(op) => si.encode_ald(op), - Op::ASt(op) => si.encode_ast(op), - Op::MemBar(op) => si.encode_membar(op), - Op::Atom(op) => si.encode_atom(op), - Op::Bra(op) => si.encode_bra(op, ip, labels), - Op::Exit(op) => si.encode_exit(op), - Op::Bar(op) => si.encode_bar(op), - Op::SuLd(op) => si.encode_suld(op), - Op::SuAtom(op) => si.encode_suatom(op), - Op::Kill(op) => si.encode_kill(op), - Op::CS2R(op) => si.encode_cs2r(op), - Op::Nop(_) => si.encode_nop(), - Op::Isberd(op) => si.encode_isberd(&op), - Op::Out(op) => si.encode_out(&op), - Op::Bfe(op) => si.encode_bfe(&op), - _ => panic!("Unhandled instruction {}", instr.op), - } - - si.set_pred(&instr.pred); - si.set_instr_deps(&instr.deps); - - si - } -} - -fn encode_instr( - instr_index: usize, - instr: Option<&Box>, - sm: u8, - labels: &HashMap, - ip: &mut usize, - sched_instr: &mut [u32; 2], -) -> [u32; 2] { - let res = instr - .map(|x| SM50Instr::encode(x, sm, *ip, labels)) - .unwrap_or_else(|| SM50Instr::nop(sm)); - - *ip += 8; - - BitMutView::new(sched_instr) - .set_field(21 * instr_index..21 * (instr_index + 1), res.sched); - - res.inst -} - -pub fn encode_sm50_shader(sm: &dyn ShaderModel, s: &Shader<'_>) -> Vec { - assert!(s.functions.len() == 1); - let func = &s.functions[0]; - - let mut num_instrs = 0_usize; - let mut labels = HashMap::new(); - for b in &func.blocks { - // We ensure blocks will have groups of 3 instructions with a - // schedule instruction before each groups. As we should never jump - // to a schedule instruction, we account for that here. - labels.insert(b.label, num_instrs + 8); - - let block_num_instrs = align_up(b.instrs.len(), 3); - - // Every 3 instructions, we have a new schedule instruction so we - // need to account for that. - num_instrs += (block_num_instrs + (block_num_instrs / 3)) * 8; - } - - let mut encoded = Vec::new(); - for b in &func.blocks { - // A block is composed of groups of 3 instructions. - let block_num_instrs = align_up(b.instrs.len(), 3); - - let mut instrs_iter = b.instrs.iter(); - - for _ in 0..(block_num_instrs / 3) { - let mut ip = ((encoded.len() / 2) + 1) * 8; - - let mut sched_instr = [0x0; 2]; - - let instr0 = encode_instr( - 0, - instrs_iter.next(), - sm.sm(), - &labels, - &mut ip, - &mut sched_instr, - ); - let instr1 = encode_instr( - 1, - instrs_iter.next(), - sm.sm(), - &labels, - &mut ip, - &mut sched_instr, - ); - let instr2 = encode_instr( - 2, - instrs_iter.next(), - sm.sm(), - &labels, - &mut ip, - &mut sched_instr, - ); - - encoded.extend_from_slice(&sched_instr[..]); - encoded.extend_from_slice(&instr0[..]); - encoded.extend_from_slice(&instr1[..]); - encoded.extend_from_slice(&instr2[..]); - } - } - - encoded -} diff --git a/src/nouveau/compiler/nak/encode_sm70.rs b/src/nouveau/compiler/nak/encode_sm70.rs deleted file mode 100644 index a830a3ca7a0..00000000000 --- a/src/nouveau/compiler/nak/encode_sm70.rs +++ /dev/null @@ -1,2599 +0,0 @@ -// Copyright © 2022 Collabora, Ltd. -// SPDX-License-Identifier: MIT - -use crate::ir::*; -use bitview::*; - -use std::collections::HashMap; -use std::ops::Range; - -struct ALURegRef { - pub reg: RegRef, - pub abs: bool, - pub neg: bool, - pub swizzle: SrcSwizzle, -} - -struct ALUCBufRef { - pub cb: CBufRef, - pub abs: bool, - pub neg: bool, - pub swizzle: SrcSwizzle, -} - -enum ALUSrc { - None, - Imm32(u32), - Reg(ALURegRef), - UReg(ALURegRef), - CBuf(ALUCBufRef), -} - -fn src_is_zero_or_gpr(src: &Src) -> bool { - match src.src_ref { - SrcRef::Zero => true, - SrcRef::Reg(reg) => reg.file() == RegFile::GPR, - _ => false, - } -} - -fn src_mod_has_abs(src_mod: SrcMod) -> bool { - match src_mod { - SrcMod::None | SrcMod::FNeg | SrcMod::INeg | SrcMod::BNot => false, - SrcMod::FAbs | SrcMod::FNegAbs => true, - } -} - -fn src_mod_has_neg(src_mod: SrcMod) -> bool { - match src_mod { - SrcMod::None | SrcMod::FAbs => false, - SrcMod::FNeg | SrcMod::FNegAbs | SrcMod::INeg | SrcMod::BNot => true, - } -} - -fn src_mod_is_bnot(src_mod: SrcMod) -> bool { - match src_mod { - SrcMod::None => false, - SrcMod::BNot => true, - _ => panic!("Not an predicate source modifier"), - } -} - -fn dst_is_bar(dst: Dst) -> bool { - match dst { - Dst::None => false, - Dst::SSA(ssa) => ssa.file().unwrap() == RegFile::Bar, - Dst::Reg(reg) => reg.file() == RegFile::Bar, - } -} - -impl ALUSrc { - fn from_src(src: Option<&Src>, op_is_uniform: bool) -> ALUSrc { - let Some(src) = src else { - return ALUSrc::None; - }; - - match src.src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - let reg = match src.src_ref { - SrcRef::Zero => { - let file = if op_is_uniform { - RegFile::UGPR - } else { - RegFile::GPR - }; - RegRef::zero(file, 1) - } - SrcRef::Reg(reg) => reg, - _ => panic!("Invalid source ref"), - }; - assert!(reg.comps() <= 2); - let alu_ref = ALURegRef { - reg: reg, - abs: src_mod_has_abs(src.src_mod), - neg: src_mod_has_neg(src.src_mod), - swizzle: src.src_swizzle, - }; - if op_is_uniform { - assert!(reg.file() == RegFile::UGPR); - ALUSrc::Reg(alu_ref) - } else { - match reg.file() { - RegFile::GPR => ALUSrc::Reg(alu_ref), - RegFile::UGPR => ALUSrc::UReg(alu_ref), - _ => panic!("Invalid ALU register file"), - } - } - } - SrcRef::Imm32(i) => { - assert!(src.src_mod.is_none()); - assert!(src.src_swizzle.is_none()); - ALUSrc::Imm32(i) - } - SrcRef::CBuf(cb) => { - let alu_ref = ALUCBufRef { - cb: cb, - abs: src_mod_has_abs(src.src_mod), - neg: src_mod_has_neg(src.src_mod), - swizzle: src.src_swizzle, - }; - ALUSrc::CBuf(alu_ref) - } - _ => panic!("Invalid ALU source"), - } - } - - pub fn has_src_mod(&self) -> bool { - match self { - ALUSrc::Reg(reg) | ALUSrc::UReg(reg) => reg.abs || reg.neg, - ALUSrc::CBuf(cb) => cb.abs || cb.neg, - _ => false, - } - } -} - -struct SM70Instr { - inst: [u32; 4], - sm: u8, -} - -impl BitViewable for SM70Instr { - fn bits(&self) -> usize { - BitView::new(&self.inst).bits() - } - - fn get_bit_range_u64(&self, range: Range) -> u64 { - BitView::new(&self.inst).get_bit_range_u64(range) - } -} - -impl BitMutViewable for SM70Instr { - fn set_bit_range_u64(&mut self, range: Range, val: u64) { - BitMutView::new(&mut self.inst).set_bit_range_u64(range, val); - } -} - -impl SetFieldU64 for SM70Instr { - fn set_field_u64(&mut self, range: Range, val: u64) { - BitMutView::new(&mut self.inst).set_field_u64(range, val); - } -} - -impl SM70Instr { - fn set_bit(&mut self, bit: usize, val: bool) { - BitMutView::new(&mut self.inst).set_bit(bit, val); - } - - fn set_reg(&mut self, range: Range, reg: RegRef) { - assert!(range.len() == 8); - assert!(reg.file() == RegFile::GPR); - self.set_field(range, reg.base_idx()); - } - - fn set_ureg(&mut self, range: Range, reg: RegRef) { - assert!(self.sm >= 75); - assert!(range.len() == 8); - assert!(reg.file() == RegFile::UGPR); - assert!(reg.base_idx() <= 63); - self.set_field(range, reg.base_idx()); - } - - fn set_pred_reg(&mut self, range: Range, reg: RegRef) { - assert!(range.len() == 3); - assert!(reg.base_idx() <= 7); - assert!(reg.comps() == 1); - self.set_field(range, reg.base_idx()); - } - - fn set_reg_src(&mut self, range: Range, src: Src) { - assert!(src.src_mod.is_none()); - match src.src_ref { - SrcRef::Zero => self.set_reg(range, RegRef::zero(RegFile::GPR, 1)), - SrcRef::Reg(reg) => self.set_reg(range, reg), - _ => panic!("Not a register"), - } - } - - fn set_pred_dst(&mut self, range: Range, dst: Dst) { - match dst { - Dst::None => { - self.set_pred_reg(range, RegRef::zero(RegFile::Pred, 1)); - } - Dst::Reg(reg) => self.set_pred_reg(range, reg), - _ => panic!("Not a register"), - } - } - - fn set_pred_src_file( - &mut self, - range: Range, - not_bit: usize, - src: Src, - file: RegFile, - ) { - // The default for predicates is true - let true_reg = RegRef::new(file, 7, 1); - - let (not, reg) = match src.src_ref { - SrcRef::True => (false, true_reg), - SrcRef::False => (true, true_reg), - SrcRef::Reg(reg) => { - assert!(reg.file() == file); - (false, reg) - } - _ => panic!("Not a register"), - }; - self.set_pred_reg(range, reg); - self.set_bit(not_bit, not ^ src_mod_is_bnot(src.src_mod)); - } - - fn set_pred_src(&mut self, range: Range, not_bit: usize, src: Src) { - self.set_pred_src_file(range, not_bit, src, RegFile::Pred); - } - - fn set_upred_src(&mut self, range: Range, not_bit: usize, src: Src) { - self.set_pred_src_file(range, not_bit, src, RegFile::UPred); - } - - fn set_src_cb(&mut self, range: Range, cx_bit: usize, cb: &CBufRef) { - let mut v = BitMutView::new_subset(self, range); - v.set_field(6..22, cb.offset); - match cb.buf { - CBuf::Binding(idx) => { - v.set_field(22..27, idx); - self.set_bit(cx_bit, false); - } - CBuf::BindlessUGPR(reg) => { - assert!(reg.base_idx() <= 63); - assert!(reg.file() == RegFile::UGPR); - v.set_field(0..6, reg.base_idx()); - self.set_bit(cx_bit, true); - } - CBuf::BindlessSSA(_) => panic!("SSA values must be lowered"), - } - } - - fn set_opcode(&mut self, opcode: u16) { - self.set_field(0..12, opcode); - } - - fn set_pred(&mut self, pred: &Pred) { - assert!(!pred.is_false()); - self.set_pred_reg( - 12..15, - match pred.pred_ref { - PredRef::None => RegRef::zero(RegFile::Pred, 1), - PredRef::Reg(reg) => reg, - PredRef::SSA(_) => panic!("SSA values must be lowered"), - }, - ); - self.set_bit(15, pred.pred_inv); - } - - fn set_dst(&mut self, dst: Dst) { - match dst { - Dst::None => self.set_reg(16..24, RegRef::zero(RegFile::GPR, 1)), - Dst::Reg(reg) => self.set_reg(16..24, reg), - _ => panic!("Not a register"), - } - } - - fn set_udst(&mut self, dst: Dst) { - match dst { - Dst::None => self.set_ureg(16..24, RegRef::zero(RegFile::UGPR, 1)), - Dst::Reg(reg) => self.set_ureg(16..24, reg), - _ => panic!("Not a register"), - } - } - - fn set_bar_reg(&mut self, range: Range, reg: RegRef) { - assert!(range.len() == 4); - assert!(reg.file() == RegFile::Bar); - assert!(reg.comps() == 1); - self.set_field(range, reg.base_idx()); - } - - fn set_bar_dst(&mut self, range: Range, dst: Dst) { - self.set_bar_reg(range, *dst.as_reg().unwrap()); - } - - fn set_bar_src(&mut self, range: Range, src: Src) { - assert!(src.src_mod.is_none()); - self.set_bar_reg(range, *src.src_ref.as_reg().unwrap()); - } - - fn set_swizzle(&mut self, range: Range, swizzle: SrcSwizzle) { - assert!(range.len() == 2); - - self.set_field( - range, - match swizzle { - SrcSwizzle::None => 0x00_u8, - SrcSwizzle::Xx => 0x02_u8, - SrcSwizzle::Yy => 0x03_u8, - }, - ); - } - - fn set_alu_reg( - &mut self, - range: Range, - abs_bit: usize, - neg_bit: usize, - swizzle_range: Range, - file: RegFile, - is_fp16_alu: bool, - has_mod: bool, - reg: &ALURegRef, - ) { - match file { - RegFile::GPR => self.set_reg(range, reg.reg), - RegFile::UGPR => self.set_ureg(range, reg.reg), - _ => panic!("Invalid ALU src register file"), - } - - if has_mod { - self.set_bit(abs_bit, reg.abs); - self.set_bit(neg_bit, reg.neg); - } else { - assert!(!reg.abs && !reg.neg); - } - - if is_fp16_alu { - self.set_swizzle(swizzle_range, reg.swizzle); - } else { - assert!(reg.swizzle == SrcSwizzle::None); - } - } - - fn encode_alu_src0( - &mut self, - src: &ALUSrc, - file: RegFile, - is_fp16_alu: bool, - ) { - let reg = match src { - ALUSrc::None => return, - ALUSrc::Reg(reg) => reg, - _ => panic!("Invalid ALU src"), - }; - self.set_alu_reg(24..32, 73, 72, 74..76, file, is_fp16_alu, true, reg); - } - - fn encode_alu_src2( - &mut self, - src: &ALUSrc, - file: RegFile, - is_fp16_alu: bool, - bit74_75_are_mod: bool, - ) { - let reg = match src { - ALUSrc::None => return, - ALUSrc::Reg(reg) => reg, - _ => panic!("Invalid ALU src"), - }; - self.set_alu_reg( - 64..72, - 74, - 75, - 81..83, - file, - is_fp16_alu, - bit74_75_are_mod, - reg, - ); - } - - fn encode_alu_reg(&mut self, reg: &ALURegRef, is_fp16_alu: bool) { - self.set_alu_reg( - 32..40, - 62, - 63, - 60..62, - RegFile::GPR, - is_fp16_alu, - true, - reg, - ); - } - - fn encode_alu_ureg(&mut self, reg: &ALURegRef, is_fp16_alu: bool) { - self.set_ureg(32..40, reg.reg); - self.set_bit(62, reg.abs); - self.set_bit(63, reg.neg); - - if is_fp16_alu { - self.set_swizzle(60..62, reg.swizzle); - } else { - assert!(reg.swizzle == SrcSwizzle::None); - } - - self.set_bit(91, true); - } - - fn encode_alu_imm(&mut self, imm: &u32) { - self.set_field(32..64, *imm); - } - - fn encode_alu_cb(&mut self, cb: &ALUCBufRef, is_fp16_alu: bool) { - self.set_src_cb(32..59, 91, &cb.cb); - self.set_bit(62, cb.abs); - self.set_bit(63, cb.neg); - - if is_fp16_alu { - self.set_swizzle(60..62, cb.swizzle); - } else { - assert!(cb.swizzle == SrcSwizzle::None); - } - } - - fn encode_alu_base( - &mut self, - opcode: u16, - dst: Option<&Dst>, - src0: Option<&Src>, - src1: Option<&Src>, - src2: Option<&Src>, - is_fp16_alu: bool, - ) { - if let Some(dst) = dst { - self.set_dst(*dst); - } - - let src0 = ALUSrc::from_src(src0, false); - let src1 = ALUSrc::from_src(src1, false); - let src2 = ALUSrc::from_src(src2, false); - - // Bits 74..76 are used both for the swizzle on src0 and for the source - // modifier for the register source of src1 and src2. When both are - // registers, it's used for src2. The hardware elects to always support - // a swizzle and not support source modifiers in that case. - let bit74_75_are_mod = !is_fp16_alu - || matches!(src1, ALUSrc::None) - || matches!(src2, ALUSrc::None); - debug_assert!(bit74_75_are_mod || !src0.has_src_mod()); - - self.encode_alu_src0(&src0, RegFile::GPR, is_fp16_alu); - - let form = match &src2 { - ALUSrc::None | ALUSrc::Reg(_) => { - self.encode_alu_src2( - &src2, - RegFile::GPR, - is_fp16_alu, - bit74_75_are_mod, - ); - match &src1 { - ALUSrc::None => 1_u8, // form - ALUSrc::Reg(reg1) => { - self.encode_alu_reg(reg1, is_fp16_alu); - 1_u8 // form - } - ALUSrc::UReg(reg1) => { - self.encode_alu_ureg(reg1, is_fp16_alu); - 6_u8 // form - } - ALUSrc::Imm32(imm1) => { - self.encode_alu_imm(imm1); - 4_u8 // form - } - ALUSrc::CBuf(cb1) => { - self.encode_alu_cb(cb1, is_fp16_alu); - 5_u8 // form - } - } - } - ALUSrc::UReg(reg2) => { - self.encode_alu_ureg(reg2, is_fp16_alu); - self.encode_alu_src2( - &src1, - RegFile::GPR, - is_fp16_alu, - bit74_75_are_mod, - ); - 7_u8 // form - } - ALUSrc::Imm32(imm2) => { - self.encode_alu_imm(imm2); - self.encode_alu_src2( - &src1, - RegFile::GPR, - is_fp16_alu, - bit74_75_are_mod, - ); - 2_u8 // form - } - ALUSrc::CBuf(cb2) => { - // TODO set_src_cx - self.encode_alu_cb(cb2, is_fp16_alu); - self.encode_alu_src2( - &src1, - RegFile::GPR, - is_fp16_alu, - bit74_75_are_mod, - ); - 3_u8 // form - } - }; - - self.set_field(0..9, opcode); - self.set_field(9..12, form); - } - - fn encode_alu( - &mut self, - opcode: u16, - dst: Option<&Dst>, - src0: Option<&Src>, - src1: Option<&Src>, - src2: Option<&Src>, - ) { - self.encode_alu_base(opcode, dst, src0, src1, src2, false); - } - - fn encode_fp16_alu( - &mut self, - opcode: u16, - dst: Option<&Dst>, - src0: Option<&Src>, - src1: Option<&Src>, - src2: Option<&Src>, - ) { - self.encode_alu_base(opcode, dst, src0, src1, src2, true); - } - - fn encode_ualu( - &mut self, - opcode: u16, - dst: Option<&Dst>, - src0: Option<&Src>, - src1: Option<&Src>, - src2: Option<&Src>, - ) { - if let Some(dst) = dst { - self.set_udst(*dst); - } - - let src0 = ALUSrc::from_src(src0, true); - let src1 = ALUSrc::from_src(src1, true); - let src2 = ALUSrc::from_src(src2, true); - - // All uniform ALU requires bit 91 set - self.set_bit(91, true); - - self.encode_alu_src0(&src0, RegFile::UGPR, false); - let form = match &src2 { - ALUSrc::None | ALUSrc::Reg(_) => { - self.encode_alu_src2(&src2, RegFile::UGPR, false, true); - match &src1 { - ALUSrc::None => 1_u8, // form - ALUSrc::Reg(reg1) => { - self.encode_alu_ureg(reg1, false); - 1_u8 // form - } - ALUSrc::UReg(_) => panic!("UALU never has UReg"), - ALUSrc::Imm32(imm1) => { - self.encode_alu_imm(imm1); - 4_u8 // form - } - ALUSrc::CBuf(_) => panic!("UALU does not support cbufs"), - } - } - ALUSrc::UReg(_) => panic!("UALU never has UReg"), - ALUSrc::Imm32(imm2) => { - self.encode_alu_imm(imm2); - self.encode_alu_src2(&src1, RegFile::UGPR, false, true); - 2_u8 // form - } - ALUSrc::CBuf(_) => panic!("UALU does not support cbufs"), - }; - - self.set_field(0..9, opcode); - self.set_field(9..12, form); - } - - fn set_instr_deps(&mut self, deps: &InstrDeps) { - self.set_field(105..109, deps.delay); - self.set_bit(109, deps.yld); - self.set_field(110..113, deps.wr_bar().unwrap_or(7)); - self.set_field(113..116, deps.rd_bar().unwrap_or(7)); - self.set_field(116..122, deps.wt_bar_mask); - self.set_field(122..126, deps.reuse_mask); - } - - fn set_rnd_mode(&mut self, range: Range, rnd_mode: FRndMode) { - assert!(range.len() == 2); - self.set_field( - range, - match rnd_mode { - FRndMode::NearestEven => 0_u8, - FRndMode::NegInf => 1_u8, - FRndMode::PosInf => 2_u8, - FRndMode::Zero => 3_u8, - }, - ); - } - - fn encode_fadd(&mut self, op: &OpFAdd) { - if src_is_zero_or_gpr(&op.srcs[1]) { - self.encode_alu( - 0x021, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - None, - ); - } else { - self.encode_alu( - 0x021, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&Src::new_zero()), - Some(&op.srcs[1]), - ); - } - self.set_bit(77, op.saturate); - self.set_rnd_mode(78..80, op.rnd_mode); - self.set_bit(80, op.ftz); - } - - fn encode_ffma(&mut self, op: &OpFFma) { - self.encode_alu( - 0x023, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&op.srcs[2]), - ); - self.set_bit(76, op.dnz); - self.set_bit(77, op.saturate); - self.set_rnd_mode(78..80, op.rnd_mode); - self.set_bit(80, op.ftz); - } - - fn encode_fmnmx(&mut self, op: &OpFMnMx) { - self.encode_alu( - 0x009, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&Src::new_zero()), - ); - self.set_pred_src(87..90, 90, op.min); - self.set_bit(80, op.ftz); - } - - fn encode_fmul(&mut self, op: &OpFMul) { - self.encode_alu( - 0x020, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&Src::new_zero()), - ); - self.set_bit(76, op.dnz); - self.set_bit(77, op.saturate); - self.set_rnd_mode(78..80, op.rnd_mode); - self.set_bit(80, op.ftz); - self.set_field(84..87, 0x4_u8) // TODO: PDIV - } - - fn set_float_cmp_op(&mut self, range: Range, op: FloatCmpOp) { - assert!(range.len() == 4); - self.set_field( - range, - match op { - FloatCmpOp::OrdLt => 0x01_u8, - FloatCmpOp::OrdEq => 0x02_u8, - FloatCmpOp::OrdLe => 0x03_u8, - FloatCmpOp::OrdGt => 0x04_u8, - FloatCmpOp::OrdNe => 0x05_u8, - FloatCmpOp::OrdGe => 0x06_u8, - FloatCmpOp::UnordLt => 0x09_u8, - FloatCmpOp::UnordEq => 0x0a_u8, - FloatCmpOp::UnordLe => 0x0b_u8, - FloatCmpOp::UnordGt => 0x0c_u8, - FloatCmpOp::UnordNe => 0x0d_u8, - FloatCmpOp::UnordGe => 0x0e_u8, - FloatCmpOp::IsNum => 0x07_u8, - FloatCmpOp::IsNan => 0x08_u8, - }, - ); - } - - fn encode_fset(&mut self, op: &OpFSet) { - self.encode_alu( - 0x00a, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - None, - ); - self.set_float_cmp_op(76..80, op.cmp_op); - self.set_bit(80, op.ftz); - self.set_field(87..90, 0x7_u8); // TODO: src predicate - } - - fn set_pred_set_op(&mut self, range: Range, op: PredSetOp) { - assert!(range.len() == 2); - self.set_field( - range, - match op { - PredSetOp::And => 0_u8, - PredSetOp::Or => 1_u8, - PredSetOp::Xor => 2_u8, - }, - ); - } - - fn encode_fsetp(&mut self, op: &OpFSetP) { - self.encode_alu( - 0x00b, - None, - Some(&op.srcs[0]), - Some(&op.srcs[1]), - None, - ); - - self.set_pred_set_op(74..76, op.set_op); - self.set_float_cmp_op(76..80, op.cmp_op); - self.set_bit(80, op.ftz); - - self.set_pred_dst(81..84, op.dst); - self.set_pred_dst(84..87, Dst::None); // dst1 - - self.set_pred_src(87..90, 90, op.accum); - } - - fn encode_fswzadd(&mut self, op: &OpFSwzAdd) { - self.set_opcode(0x822); - self.set_dst(op.dst); - - self.set_reg_src(24..32, op.srcs[0]); - self.set_reg_src(64..72, op.srcs[1]); - - let mut subop = 0x0_u8; - - for (i, swz_op) in op.ops.iter().enumerate() { - let swz_op = match swz_op { - FSwzAddOp::Add => 0, - FSwzAddOp::SubRight => 2, - FSwzAddOp::SubLeft => 1, - FSwzAddOp::MoveLeft => 3, - }; - - subop |= swz_op << ((op.ops.len() - i - 1) * 2); - } - - self.set_field(32..40, subop); - - self.set_bit(77, false); // NDV - self.set_rnd_mode(78..80, op.rnd_mode); - self.set_bit(80, op.ftz); - } - - fn encode_mufu(&mut self, op: &OpMuFu) { - self.encode_alu(0x108, Some(&op.dst), None, Some(&op.src), None); - self.set_field( - 74..80, - match op.op { - MuFuOp::Cos => 0_u8, - MuFuOp::Sin => 1_u8, - MuFuOp::Exp2 => 2_u8, - MuFuOp::Log2 => 3_u8, - MuFuOp::Rcp => 4_u8, - MuFuOp::Rsq => 5_u8, - MuFuOp::Rcp64H => 6_u8, - MuFuOp::Rsq64H => 7_u8, - MuFuOp::Sqrt => 8_u8, - MuFuOp::Tanh => 9_u8, - }, - ); - } - - fn encode_dadd(&mut self, op: &OpDAdd) { - self.encode_alu( - 0x029, - Some(&op.dst), - Some(&op.srcs[0]), - None, - Some(&op.srcs[1]), - ); - self.set_rnd_mode(78..80, op.rnd_mode); - } - - fn encode_dfma(&mut self, op: &OpDFma) { - self.encode_alu( - 0x02b, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&op.srcs[2]), - ); - self.set_rnd_mode(78..80, op.rnd_mode); - } - - fn encode_dmul(&mut self, op: &OpDMul) { - self.encode_alu( - 0x028, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - None, - ); - self.set_rnd_mode(78..80, op.rnd_mode); - } - - fn encode_dsetp(&mut self, op: &OpDSetP) { - if src_is_zero_or_gpr(&op.srcs[1]) { - self.encode_alu( - 0x02a, - None, - Some(&op.srcs[0]), - Some(&op.srcs[1]), - None, - ); - } else { - self.encode_alu( - 0x02a, - None, - Some(&op.srcs[0]), - None, - Some(&op.srcs[1]), - ); - } - - self.set_pred_set_op(74..76, op.set_op); - self.set_float_cmp_op(76..80, op.cmp_op); - - self.set_pred_dst(81..84, op.dst); - self.set_pred_dst(84..87, Dst::None); /* dst1 */ - - self.set_pred_src(87..90, 90, op.accum); - } - - fn encode_hadd2(&mut self, op: &OpHAdd2) { - if src_is_zero_or_gpr(&op.srcs[1]) { - self.encode_fp16_alu( - 0x030, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - None, - ); - } else { - self.encode_fp16_alu( - 0x030, - Some(&op.dst), - Some(&op.srcs[0]), - None, - Some(&op.srcs[1]), - ); - } - - self.set_bit(77, op.saturate); - self.set_bit(78, op.f32); - self.set_bit(80, op.ftz); - self.set_bit(85, false); // .BF16_V2 (SM90+) - } - - fn encode_hfma2(&mut self, op: &OpHFma2) { - // HFMA2 doesn't have fneg and fabs on SRC2. - assert!(op.srcs[2].src_mod.is_none()); - - self.encode_fp16_alu( - 0x031, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&op.srcs[2]), - ); - - self.set_bit(76, op.dnz); - self.set_bit(77, op.saturate); - self.set_bit(78, op.f32); - self.set_bit(79, false); // .RELU (SM86+) - self.set_bit(80, op.ftz); - self.set_bit(85, false); // .BF16_V2 (SM86+) - } - - fn encode_hmul2(&mut self, op: &OpHMul2) { - self.encode_fp16_alu( - 0x032, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - None, - ); - - self.set_bit(76, op.dnz); - self.set_bit(77, op.saturate); - self.set_bit(78, false); // .F32 (SM70-SM75) - self.set_bit(79, false); // .RELU (SM86+) - self.set_bit(80, op.ftz); - self.set_bit(85, false); // .BF16_V2 (SM90+) - } - - fn encode_hset2(&mut self, op: &OpHSet2) { - if src_is_zero_or_gpr(&op.srcs[1]) { - self.encode_fp16_alu( - 0x033, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - None, - ); - } else { - self.encode_fp16_alu( - 0x033, - Some(&op.dst), - Some(&op.srcs[0]), - None, - Some(&op.srcs[1]), - ); - } - - self.set_bit(65, false); // .BF16_V2 (SM90+) - self.set_pred_set_op(69..71, op.set_op); - - // This differentiate between integer and fp16 output - self.set_bit(71, true); // .BF - self.set_float_cmp_op(76..80, op.cmp_op); - self.set_bit(80, op.ftz); - - self.set_pred_src(87..90, 90, op.accum); - } - - fn encode_hsetp2(&mut self, op: &OpHSetP2) { - if src_is_zero_or_gpr(&op.srcs[1]) { - self.encode_fp16_alu( - 0x034, - None, - Some(&op.srcs[0]), - Some(&op.srcs[1]), - None, - ); - } else { - self.encode_fp16_alu( - 0x034, - None, - Some(&op.srcs[0]), - None, - Some(&op.srcs[1]), - ); - } - - self.set_bit(65, false); // .BF16_V2 (SM90+) - self.set_pred_set_op(69..71, op.set_op); - self.set_bit(71, op.horizontal); // .H_AND - self.set_float_cmp_op(76..80, op.cmp_op); - self.set_bit(80, op.ftz); - - self.set_pred_dst(81..84, op.dsts[0]); - self.set_pred_dst(84..87, op.dsts[1]); - - self.set_pred_src(87..90, 90, op.accum); - } - - fn encode_hmnmx2(&mut self, op: &OpHMnMx2) { - assert!(self.sm >= 80); - - self.encode_fp16_alu( - 0x040, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - None, - ); - - // This differentiate between integer and fp16 output - self.set_bit(78, false); // .F32 (SM86) - self.set_bit(80, op.ftz); - self.set_bit(81, false); // .NAN - self.set_bit(82, false); // .XORSIGN - self.set_bit(85, false); // .BF16_V2 - - self.set_pred_src(87..90, 90, op.min); - } - - fn encode_bmsk(&mut self, op: &OpBMsk) { - if op.is_uniform() { - self.encode_ualu( - 0x09b, - Some(&op.dst), - Some(&op.pos), - Some(&op.width), - None, - ); - } else { - self.encode_alu( - 0x01b, - Some(&op.dst), - Some(&op.pos), - Some(&op.width), - None, - ); - } - - self.set_bit(75, op.wrap); - } - - fn encode_brev(&mut self, op: &OpBRev) { - if op.is_uniform() { - self.encode_ualu(0x0be, Some(&op.dst), None, Some(&op.src), None); - } else { - self.encode_alu(0x101, Some(&op.dst), None, Some(&op.src), None); - } - } - - fn encode_flo(&mut self, op: &OpFlo) { - if op.is_uniform() { - self.encode_ualu(0x0bd, Some(&op.dst), None, Some(&op.src), None); - } else { - self.encode_alu(0x100, Some(&op.dst), None, Some(&op.src), None); - } - self.set_pred_dst(81..84, Dst::None); - self.set_field(74..75, op.return_shift_amount as u8); - self.set_field(73..74, op.signed as u8); - let not_mod = matches!(op.src.src_mod, SrcMod::BNot); - self.set_field(63..64, not_mod) - } - - fn encode_iabs(&mut self, op: &OpIAbs) { - self.encode_alu(0x013, Some(&op.dst), None, Some(&op.src), None); - } - - fn encode_iadd3(&mut self, op: &OpIAdd3) { - // Hardware requires at least one of these be unmodified - assert!(op.srcs[0].src_mod.is_none() || op.srcs[1].src_mod.is_none()); - - if op.is_uniform() { - self.encode_ualu( - 0x090, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&op.srcs[2]), - ); - } else { - self.encode_alu( - 0x010, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&op.srcs[2]), - ); - } - - self.set_pred_src(87..90, 90, false.into()); - self.set_pred_src(77..80, 80, false.into()); - - self.set_pred_dst(81..84, op.overflow[0]); - self.set_pred_dst(84..87, op.overflow[1]); - } - - fn encode_iadd3x(&mut self, op: &OpIAdd3X) { - // Hardware requires at least one of these be unmodified - assert!(op.srcs[0].src_mod.is_none() || op.srcs[1].src_mod.is_none()); - - if op.is_uniform() { - self.encode_ualu( - 0x090, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&op.srcs[2]), - ); - - self.set_upred_src(87..90, 90, op.carry[0]); - self.set_upred_src(77..80, 80, op.carry[1]); - } else { - self.encode_alu( - 0x010, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&op.srcs[2]), - ); - - self.set_pred_src(87..90, 90, op.carry[0]); - self.set_pred_src(77..80, 80, op.carry[1]); - } - - self.set_bit(74, true); // .X - - self.set_pred_dst(81..84, op.overflow[0]); - self.set_pred_dst(84..87, op.overflow[1]); - } - - fn encode_idp4(&mut self, op: &OpIDp4) { - self.encode_alu( - 0x026, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&op.srcs[2]), - ); - - self.set_bit( - 73, - match op.src_types[0] { - IntType::U8 => false, - IntType::I8 => true, - _ => panic!("Invalid DP4 source type"), - }, - ); - self.set_bit( - 74, - match op.src_types[1] { - IntType::U8 => false, - IntType::I8 => true, - _ => panic!("Invalid DP4 source type"), - }, - ); - } - - fn encode_imad(&mut self, op: &OpIMad) { - if op.is_uniform() { - self.encode_ualu( - 0x0a4, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&op.srcs[2]), - ); - } else { - self.encode_alu( - 0x024, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&op.srcs[2]), - ); - } - self.set_pred_dst(81..84, Dst::None); - self.set_bit(73, op.signed); - } - - fn encode_imad64(&mut self, op: &OpIMad64) { - if op.is_uniform() { - self.encode_ualu( - 0x0a5, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&op.srcs[2]), - ); - } else { - self.encode_alu( - 0x025, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&op.srcs[2]), - ); - } - self.set_pred_dst(81..84, Dst::None); - self.set_bit(73, op.signed); - } - - fn encode_imnmx(&mut self, op: &OpIMnMx) { - self.encode_alu( - 0x017, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - None, - ); - self.set_pred_src(87..90, 90, op.min); - self.set_bit( - 73, - match op.cmp_type { - IntCmpType::U32 => false, - IntCmpType::I32 => true, - }, - ); - } - - fn set_int_cmp_op(&mut self, range: Range, op: IntCmpOp) { - assert!(range.len() == 3); - self.set_field( - range, - match op { - IntCmpOp::Eq => 2_u8, - IntCmpOp::Ne => 5_u8, - IntCmpOp::Lt => 1_u8, - IntCmpOp::Le => 3_u8, - IntCmpOp::Gt => 4_u8, - IntCmpOp::Ge => 6_u8, - }, - ); - } - - fn encode_isetp(&mut self, op: &OpISetP) { - if op.is_uniform() { - self.encode_ualu( - 0x08c, - None, - Some(&op.srcs[0]), - Some(&op.srcs[1]), - None, - ); - - self.set_upred_src(68..71, 71, op.low_cmp); - self.set_upred_src(87..90, 90, op.accum); - } else { - self.encode_alu( - 0x00c, - None, - Some(&op.srcs[0]), - Some(&op.srcs[1]), - None, - ); - - self.set_pred_src(68..71, 71, op.low_cmp); - self.set_pred_src(87..90, 90, op.accum); - } - - self.set_bit(72, op.ex); - - self.set_field( - 73..74, - match op.cmp_type { - IntCmpType::U32 => 0_u32, - IntCmpType::I32 => 1_u32, - }, - ); - self.set_pred_set_op(74..76, op.set_op); - self.set_int_cmp_op(76..79, op.cmp_op); - - self.set_pred_dst(81..84, op.dst); - self.set_pred_dst(84..87, Dst::None); // dst1 - } - - fn encode_lop3(&mut self, op: &OpLop3) { - if op.is_uniform() { - self.encode_ualu( - 0x092, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&op.srcs[2]), - ); - - self.set_upred_src(87..90, 90, SrcRef::False.into()); - } else { - self.encode_alu( - 0x012, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - Some(&op.srcs[2]), - ); - - self.set_pred_src(87..90, 90, SrcRef::False.into()); - } - - self.set_field(72..80, op.op.lut); - self.set_bit(80, false); // .PAND - self.set_field(81..84, 7_u32); // pred - } - - fn encode_popc(&mut self, op: &OpPopC) { - if op.is_uniform() { - self.encode_ualu(0x0bf, Some(&op.dst), None, Some(&op.src), None); - } else { - self.encode_alu(0x109, Some(&op.dst), None, Some(&op.src), None); - } - - let not_mod = matches!(op.src.src_mod, SrcMod::BNot); - self.set_field(63..64, not_mod) - } - - fn encode_shf(&mut self, op: &OpShf) { - if op.is_uniform() { - self.encode_ualu( - 0x099, - Some(&op.dst), - Some(&op.low), - Some(&op.shift), - Some(&op.high), - ); - } else { - self.encode_alu( - 0x019, - Some(&op.dst), - Some(&op.low), - Some(&op.shift), - Some(&op.high), - ); - } - - self.set_field( - 73..75, - match op.data_type { - IntType::I64 => 0_u8, - IntType::U64 => 1_u8, - IntType::I32 => 2_u8, - IntType::U32 => 3_u8, - _ => panic!("Invalid shift data type"), - }, - ); - self.set_bit(75, op.wrap); - self.set_bit(76, op.right); - self.set_bit(80, op.dst_high); - } - - fn encode_f2f(&mut self, op: &OpF2F) { - assert!(!op.integer_rnd); - if op.src_type.bits() <= 32 && op.dst_type.bits() <= 32 { - self.encode_alu(0x104, Some(&op.dst), None, Some(&op.src), None); - } else { - self.encode_alu(0x110, Some(&op.dst), None, Some(&op.src), None); - } - - if op.high { - self.set_field(60..62, 1_u8); // .H1 - } - - self.set_field(75..77, (op.dst_type.bits() / 8).ilog2()); - self.set_rnd_mode(78..80, op.rnd_mode); - self.set_bit(80, op.ftz); - self.set_field(84..86, (op.src_type.bits() / 8).ilog2()); - } - - fn encode_f2i(&mut self, op: &OpF2I) { - if op.src_type.bits() <= 32 && op.dst_type.bits() <= 32 { - self.encode_alu(0x105, Some(&op.dst), None, Some(&op.src), None); - } else { - self.encode_alu(0x111, Some(&op.dst), None, Some(&op.src), None); - } - - self.set_bit(72, op.dst_type.is_signed()); - self.set_field(75..77, (op.dst_type.bits() / 8).ilog2()); - self.set_bit(77, false); // NTZ - self.set_rnd_mode(78..80, op.rnd_mode); - self.set_bit(80, op.ftz); - self.set_field(84..86, (op.src_type.bits() / 8).ilog2()); - } - - fn encode_i2f(&mut self, op: &OpI2F) { - if op.src_type.bits() <= 32 && op.dst_type.bits() <= 32 { - self.encode_alu(0x106, Some(&op.dst), None, Some(&op.src), None); - } else { - self.encode_alu(0x112, Some(&op.dst), None, Some(&op.src), None); - } - - self.set_field(60..62, 0_u8); // TODO: subop - self.set_bit(74, op.src_type.is_signed()); - self.set_field(75..77, (op.dst_type.bits() / 8).ilog2()); - self.set_rnd_mode(78..80, op.rnd_mode); - self.set_field(84..86, (op.src_type.bits() / 8).ilog2()); - } - - fn encode_frnd(&mut self, op: &OpFRnd) { - if op.src_type.bits() <= 32 && op.dst_type.bits() <= 32 { - self.encode_alu(0x107, Some(&op.dst), None, Some(&op.src), None); - } else { - self.encode_alu(0x113, Some(&op.dst), None, Some(&op.src), None); - } - - self.set_field(84..86, (op.src_type.bits() / 8).ilog2()); - self.set_bit(80, op.ftz); - self.set_rnd_mode(78..80, op.rnd_mode); - self.set_field(75..77, (op.dst_type.bits() / 8).ilog2()); - } - - fn encode_mov(&mut self, op: &OpMov) { - if op.is_uniform() { - self.set_opcode(0xc82); - self.set_udst(op.dst); - - // umov is encoded like a non-uniform ALU op - let src = ALUSrc::from_src(Some(&op.src), true); - let form: u8 = match &src { - ALUSrc::Reg(reg) => { - self.encode_alu_ureg(reg, false); - 0x6 // form - } - ALUSrc::Imm32(imm) => { - self.encode_alu_imm(imm); - 0x4 // form - } - _ => panic!("Invalid umov src"), - }; - self.set_field(9..12, form); - } else { - self.encode_alu(0x002, Some(&op.dst), None, Some(&op.src), None); - self.set_field(72..76, op.quad_lanes); - } - } - - fn encode_prmt(&mut self, op: &OpPrmt) { - if op.is_uniform() { - self.encode_ualu( - 0x96, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.sel), - Some(&op.srcs[1]), - ); - } else { - self.encode_alu( - 0x16, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.sel), - Some(&op.srcs[1]), - ); - } - - self.set_field( - 72..75, - match op.mode { - PrmtMode::Index => 0_u8, - PrmtMode::Forward4Extract => 1_u8, - PrmtMode::Backward4Extract => 2_u8, - PrmtMode::Replicate8 => 3_u8, - PrmtMode::EdgeClampLeft => 4_u8, - PrmtMode::EdgeClampRight => 5_u8, - PrmtMode::Replicate16 => 6_u8, - }, - ) - } - - fn encode_sel(&mut self, op: &OpSel) { - if op.is_uniform() { - self.encode_ualu( - 0x087, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - None, - ); - - self.set_upred_src(87..90, 90, op.cond); - } else { - self.encode_alu( - 0x007, - Some(&op.dst), - Some(&op.srcs[0]), - Some(&op.srcs[1]), - None, - ); - - self.set_pred_src(87..90, 90, op.cond); - } - } - - fn encode_shfl(&mut self, op: &OpShfl) { - assert!(op.lane.src_mod.is_none()); - assert!(op.c.src_mod.is_none()); - - match &op.lane.src_ref { - SrcRef::Zero | SrcRef::Reg(_) => match &op.c.src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x389); - self.set_reg_src(32..40, op.lane); - self.set_reg_src(64..72, op.c); - } - SrcRef::Imm32(imm_c) => { - self.set_opcode(0x589); - self.set_reg_src(32..40, op.lane); - self.set_field(40..53, *imm_c & 0x1f1f); - } - _ => panic!("Invalid instruction form"), - }, - SrcRef::Imm32(imm_lane) => match &op.c.src_ref { - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x989); - self.set_field(53..58, *imm_lane & 0x1f); - self.set_reg_src(64..72, op.c); - } - SrcRef::Imm32(imm_c) => { - self.set_opcode(0xf89); - self.set_field(40..53, *imm_c & 0x1f1f); - self.set_field(53..58, *imm_lane & 0x1f); - } - _ => panic!("Invalid instruction form"), - }, - _ => panic!("Invalid instruction form"), - }; - - self.set_dst(op.dst); - self.set_pred_dst(81..84, op.in_bounds); - self.set_reg_src(24..32, op.src); - self.set_field( - 58..60, - match op.op { - ShflOp::Idx => 0_u8, - ShflOp::Up => 1_u8, - ShflOp::Down => 2_u8, - ShflOp::Bfly => 3_u8, - }, - ); - } - - fn encode_plop3(&mut self, op: &OpPLop3) { - if op.is_uniform() { - self.set_opcode(0x89c); - - self.set_upred_src(68..71, 71, op.srcs[2]); - self.set_upred_src(77..80, 80, op.srcs[1]); - self.set_upred_src(87..90, 90, op.srcs[0]); - } else { - self.set_opcode(0x81c); - - if op.srcs[2].src_ref.as_reg().is_some_and(|r| r.is_uniform()) { - self.set_upred_src(68..71, 71, op.srcs[2]); - self.set_bit(67, true); - } else { - self.set_pred_src(68..71, 71, op.srcs[2]); - } - self.set_pred_src(77..80, 80, op.srcs[1]); - self.set_pred_src(87..90, 90, op.srcs[0]); - } - self.set_field(16..24, op.ops[1].lut); - self.set_field(64..67, op.ops[0].lut & 0x7); - self.set_field(72..77, op.ops[0].lut >> 3); - - self.set_pred_dst(81..84, op.dsts[0]); - self.set_pred_dst(84..87, op.dsts[1]); - } - - fn set_tex_dim(&mut self, range: Range, dim: TexDim) { - assert!(range.len() == 3); - self.set_field( - range, - match dim { - TexDim::_1D => 0_u8, - TexDim::Array1D => 4_u8, - TexDim::_2D => 1_u8, - TexDim::Array2D => 5_u8, - TexDim::_3D => 2_u8, - TexDim::Cube => 3_u8, - TexDim::ArrayCube => 7_u8, - }, - ); - } - - fn set_tex_lod_mode(&mut self, range: Range, lod_mode: TexLodMode) { - assert!(range.len() == 3); - self.set_field( - range, - match lod_mode { - TexLodMode::Auto => 0_u8, - TexLodMode::Zero => 1_u8, - TexLodMode::Bias => 2_u8, - TexLodMode::Lod => 3_u8, - TexLodMode::Clamp => 4_u8, - TexLodMode::BiasClamp => 5_u8, - }, - ); - } - - fn encode_r2ur(&mut self, op: &OpR2UR) { - self.set_opcode(0x3c2); - self.set_udst(op.dst); - self.set_reg_src(24..32, op.src); - self.set_pred_dst(81..84, Dst::None); - } - - fn encode_tex(&mut self, op: &OpTex) { - self.set_opcode(0x361); - self.set_bit(59, true); // .B - - self.set_dst(op.dsts[0]); - if let Dst::Reg(reg) = op.dsts[1] { - self.set_reg(64..72, reg); - } else { - self.set_field(64..72, 255_u8); - } - self.set_pred_dst(81..84, op.fault); - - self.set_reg_src(24..32, op.srcs[0]); - self.set_reg_src(32..40, op.srcs[1]); - - self.set_tex_dim(61..64, op.dim); - self.set_field(72..76, op.mask); - self.set_bit(76, op.offset); - self.set_bit(77, false); // ToDo: NDV - self.set_bit(78, op.z_cmpr); - self.set_field(84..87, 1); - self.set_tex_lod_mode(87..90, op.lod_mode); - self.set_bit(90, false); // TODO: .NODEP - } - - fn encode_tld(&mut self, op: &OpTld) { - self.set_opcode(0x367); - self.set_bit(59, true); // .B - - self.set_dst(op.dsts[0]); - if let Dst::Reg(reg) = op.dsts[1] { - self.set_reg(64..72, reg); - } else { - self.set_field(64..72, 255_u8); - } - self.set_pred_dst(81..84, op.fault); - - self.set_reg_src(24..32, op.srcs[0]); - self.set_reg_src(32..40, op.srcs[1]); - - self.set_tex_dim(61..64, op.dim); - self.set_field(72..76, op.mask); - self.set_bit(76, op.offset); - // bit 77: .CL - self.set_bit(78, op.is_ms); - // bits 79..81: .F16 - assert!( - op.lod_mode == TexLodMode::Zero || op.lod_mode == TexLodMode::Lod - ); - self.set_tex_lod_mode(87..90, op.lod_mode); - self.set_bit(90, false); // TODO: .NODEP - } - - fn encode_tld4(&mut self, op: &OpTld4) { - self.set_opcode(0x364); - self.set_bit(59, true); // .B - - self.set_dst(op.dsts[0]); - if let Dst::Reg(reg) = op.dsts[1] { - self.set_reg(64..72, reg); - } else { - self.set_field(64..72, 255_u8); - } - self.set_pred_dst(81..84, op.fault); - - self.set_reg_src(24..32, op.srcs[0]); - self.set_reg_src(32..40, op.srcs[1]); - - self.set_tex_dim(61..64, op.dim); - self.set_field(72..76, op.mask); - self.set_field( - 76..78, - match op.offset_mode { - Tld4OffsetMode::None => 0_u8, - Tld4OffsetMode::AddOffI => 1_u8, - Tld4OffsetMode::PerPx => 2_u8, - }, - ); - // bit 77: .CL - self.set_bit(78, op.z_cmpr); - self.set_bit(84, true); // !.EF - self.set_field(87..89, op.comp); - self.set_bit(90, false); // TODO: .NODEP - } - - fn encode_tmml(&mut self, op: &OpTmml) { - self.set_opcode(0x36a); - self.set_bit(59, true); // .B - - self.set_dst(op.dsts[0]); - if let Dst::Reg(reg) = op.dsts[1] { - self.set_reg(64..72, reg); - } else { - self.set_field(64..72, 255_u8); - } - - self.set_reg_src(24..32, op.srcs[0]); - self.set_reg_src(32..40, op.srcs[1]); - - self.set_tex_dim(61..64, op.dim); - self.set_field(72..76, op.mask); - self.set_bit(77, false); // ToDo: NDV - self.set_bit(90, false); // TODO: .NODEP - } - - fn encode_txd(&mut self, op: &OpTxd) { - self.set_opcode(0x36d); - self.set_bit(59, true); // .B - - self.set_dst(op.dsts[0]); - if let Dst::Reg(reg) = op.dsts[1] { - self.set_reg(64..72, reg); - } else { - self.set_field(64..72, 255_u8); - } - self.set_pred_dst(81..84, op.fault); - - self.set_reg_src(24..32, op.srcs[0]); - self.set_reg_src(32..40, op.srcs[1]); - - self.set_tex_dim(61..64, op.dim); - self.set_field(72..76, op.mask); - self.set_bit(76, op.offset); - self.set_bit(77, false); // ToDo: NDV - self.set_bit(90, false); // TODO: .NODEP - } - - fn encode_txq(&mut self, op: &OpTxq) { - self.set_opcode(0x370); - self.set_bit(59, true); // .B - - self.set_dst(op.dsts[0]); - if let Dst::Reg(reg) = op.dsts[1] { - self.set_reg(64..72, reg); - } else { - self.set_field(64..72, 255_u8); - } - - self.set_reg_src(24..32, op.src); - self.set_field( - 62..64, - match op.query { - TexQuery::Dimension => 0_u8, - TexQuery::TextureType => 1_u8, - TexQuery::SamplerPos => 2_u8, - }, - ); - self.set_field(72..76, op.mask); - } - - fn set_image_dim(&mut self, range: Range, dim: ImageDim) { - assert!(range.len() == 3); - self.set_field( - range, - match dim { - ImageDim::_1D => 0_u8, - ImageDim::_1DBuffer => 1_u8, - ImageDim::_1DArray => 2_u8, - ImageDim::_2D => 3_u8, - ImageDim::_2DArray => 4_u8, - ImageDim::_3D => 5_u8, - }, - ); - } - - fn set_mem_order(&mut self, order: &MemOrder) { - if self.sm < 80 { - let scope = match order { - MemOrder::Constant => MemScope::System, - MemOrder::Weak => MemScope::CTA, - MemOrder::Strong(s) => *s, - }; - self.set_field( - 77..79, - match scope { - MemScope::CTA => 0_u8, - // SM => 1_u8, - MemScope::GPU => 2_u8, - MemScope::System => 3_u8, - }, - ); - self.set_field( - 79..81, - match order { - MemOrder::Constant => 0_u8, - MemOrder::Weak => 1_u8, - MemOrder::Strong(_) => 2_u8, - // MMIO => 3_u8, - }, - ); - } else { - self.set_field( - 77..81, - match order { - MemOrder::Constant => 0x4_u8, - MemOrder::Weak => 0x0_u8, - MemOrder::Strong(MemScope::CTA) => 0x5_u8, - MemOrder::Strong(MemScope::GPU) => 0x7_u8, - MemOrder::Strong(MemScope::System) => 0xa_u8, - }, - ); - } - } - - fn set_eviction_priority(&mut self, pri: &MemEvictionPriority) { - self.set_field( - 84..86, - match pri { - MemEvictionPriority::First => 0_u8, - MemEvictionPriority::Normal => 1_u8, - MemEvictionPriority::Last => 2_u8, - MemEvictionPriority::Unchanged => 3_u8, - }, - ); - } - - fn encode_suld(&mut self, op: &OpSuLd) { - self.set_opcode(0x998); - - self.set_dst(op.dst); - self.set_reg_src(24..32, op.coord); - self.set_reg_src(64..72, op.handle); - self.set_pred_dst(81..84, op.fault); - - self.set_image_dim(61..64, op.image_dim); - self.set_mem_order(&op.mem_order); - self.set_eviction_priority(&op.mem_eviction_priority); - - assert!(op.mask == 0x1 || op.mask == 0x3 || op.mask == 0xf); - self.set_field(72..76, op.mask); - } - - fn encode_sust(&mut self, op: &OpSuSt) { - self.set_opcode(0x99c); - - self.set_reg_src(24..32, op.coord); - self.set_reg_src(32..40, op.data); - self.set_reg_src(64..72, op.handle); - - self.set_image_dim(61..64, op.image_dim); - self.set_mem_order(&op.mem_order); - self.set_eviction_priority(&op.mem_eviction_priority); - - assert!(op.mask == 0x1 || op.mask == 0x3 || op.mask == 0xf); - self.set_field(72..76, op.mask); - } - - fn encode_suatom(&mut self, op: &OpSuAtom) { - if matches!(op.atom_op, AtomOp::CmpExch) { - self.set_opcode(0x396); - } else { - self.set_opcode(0x394); - } - - self.set_dst(op.dst); - self.set_reg_src(24..32, op.coord); - self.set_reg_src(32..40, op.data); - self.set_reg_src(64..72, op.handle); - self.set_pred_dst(81..84, op.fault); - - self.set_image_dim(61..64, op.image_dim); - self.set_mem_order(&op.mem_order); - self.set_eviction_priority(&op.mem_eviction_priority); - - self.set_bit(72, false); // .BA - self.set_atom_type(73..76, op.atom_type); - self.set_atom_op(87..91, op.atom_op); - } - - fn set_mem_type(&mut self, range: Range, mem_type: MemType) { - assert!(range.len() == 3); - self.set_field( - range, - match mem_type { - MemType::U8 => 0_u8, - MemType::I8 => 1_u8, - MemType::U16 => 2_u8, - MemType::I16 => 3_u8, - MemType::B32 => 4_u8, - MemType::B64 => 5_u8, - MemType::B128 => 6_u8, - }, - ); - } - - fn set_mem_access(&mut self, access: &MemAccess) { - self.set_field( - 72..73, - match access.space.addr_type() { - MemAddrType::A32 => 0_u8, - MemAddrType::A64 => 1_u8, - }, - ); - self.set_mem_type(73..76, access.mem_type); - self.set_mem_order(&access.order); - self.set_eviction_priority(&access.eviction_priority); - } - - fn encode_ldg(&mut self, op: &OpLd) { - self.set_opcode(0x381); - - self.set_dst(op.dst); - self.set_pred_dst(81..84, Dst::None); - - self.set_reg_src(24..32, op.addr); - self.set_field(40..64, op.offset); - - self.set_mem_access(&op.access); - } - - fn encode_ldl(&mut self, op: &OpLd) { - self.set_opcode(0x983); - self.set_field(84..87, 1_u8); - - self.set_dst(op.dst); - self.set_reg_src(24..32, op.addr); - self.set_field(40..64, op.offset); - - self.set_mem_type(73..76, op.access.mem_type); - assert!(op.access.order == MemOrder::Strong(MemScope::CTA)); - assert!(op.access.eviction_priority == MemEvictionPriority::Normal); - } - - fn encode_lds(&mut self, op: &OpLd) { - self.set_opcode(0x984); - - self.set_dst(op.dst); - self.set_reg_src(24..32, op.addr); - self.set_field(40..64, op.offset); - - self.set_mem_type(73..76, op.access.mem_type); - assert!(op.access.order == MemOrder::Strong(MemScope::CTA)); - assert!(op.access.eviction_priority == MemEvictionPriority::Normal); - - self.set_bit(87, false); // !.ZD - Returns a predicate? - } - - fn encode_ld(&mut self, op: &OpLd) { - match op.access.space { - MemSpace::Global(_) => self.encode_ldg(op), - MemSpace::Local => self.encode_ldl(op), - MemSpace::Shared => self.encode_lds(op), - } - } - - fn encode_ldc(&mut self, op: &OpLdc) { - let SrcRef::CBuf(cb) = &op.cb.src_ref else { - panic!("LDC must take a cbuf source"); - }; - - match cb.buf { - CBuf::Binding(idx) => { - if op.is_uniform() { - self.set_opcode(0xab9); - self.set_udst(op.dst); - - assert!(op.offset.is_zero()); - assert!(op.mode == LdcMode::Indexed); - } else { - self.set_opcode(0xb82); - self.set_dst(op.dst); - - self.set_reg_src(24..32, op.offset); - self.set_field( - 78..80, - match op.mode { - LdcMode::Indexed => 0_u8, - LdcMode::IndexedLinear => 1_u8, - LdcMode::IndexedSegmented => 2_u8, - LdcMode::IndexedSegmentedLinear => 3_u8, - }, - ); - } - self.set_field(54..59, idx); - self.set_bit(91, false); // Bound - } - CBuf::BindlessUGPR(handle) => { - if op.is_uniform() { - self.set_opcode(0xab9); - self.set_udst(op.dst); - - assert!(op.offset.is_zero()); - } else { - self.set_opcode(0x582); - self.set_dst(op.dst); - - self.set_reg_src(64..72, op.offset); - } - - self.set_ureg(24..32, handle); - self.set_reg_src(64..72, op.offset); - assert!(op.mode == LdcMode::Indexed); - self.set_bit(91, true); // Bindless - } - CBuf::BindlessSSA(_) => panic!("SSA values must be lowered"), - } - - self.set_field(38..54, cb.offset); - self.set_mem_type(73..76, op.mem_type); - } - - fn encode_stg(&mut self, op: &OpSt) { - self.set_opcode(0x386); - - self.set_reg_src(24..32, op.addr); - self.set_reg_src(32..40, op.data); - self.set_field(40..64, op.offset); - - self.set_mem_access(&op.access); - } - - fn encode_stl(&mut self, op: &OpSt) { - self.set_opcode(0x387); - self.set_field(84..87, 1_u8); - - self.set_reg_src(24..32, op.addr); - self.set_reg_src(32..40, op.data); - self.set_field(40..64, op.offset); - - self.set_mem_type(73..76, op.access.mem_type); - assert!(op.access.order == MemOrder::Strong(MemScope::CTA)); - assert!(op.access.eviction_priority == MemEvictionPriority::Normal); - } - - fn encode_sts(&mut self, op: &OpSt) { - self.set_opcode(0x388); - - self.set_reg_src(24..32, op.addr); - self.set_reg_src(32..40, op.data); - self.set_field(40..64, op.offset); - - self.set_mem_type(73..76, op.access.mem_type); - assert!(op.access.order == MemOrder::Strong(MemScope::CTA)); - assert!(op.access.eviction_priority == MemEvictionPriority::Normal); - } - - fn encode_st(&mut self, op: &OpSt) { - match op.access.space { - MemSpace::Global(_) => self.encode_stg(op), - MemSpace::Local => self.encode_stl(op), - MemSpace::Shared => self.encode_sts(op), - } - } - - fn set_atom_op(&mut self, range: Range, atom_op: AtomOp) { - assert!(range.len() == 4); - self.set_field( - range, - match atom_op { - AtomOp::Add | AtomOp::CmpExch => 0_u8, - AtomOp::Min => 1_u8, - AtomOp::Max => 2_u8, - AtomOp::Inc => 3_u8, - AtomOp::Dec => 4_u8, - AtomOp::And => 5_u8, - AtomOp::Or => 6_u8, - AtomOp::Xor => 7_u8, - AtomOp::Exch => 8_u8, - }, - ); - } - - fn set_atom_type(&mut self, range: Range, atom_type: AtomType) { - assert!(range.len() == 3); - self.set_field( - range, - match atom_type { - AtomType::U32 => 0_u8, - AtomType::I32 => 1_u8, - AtomType::U64 => 2_u8, - AtomType::F32 => 3_u8, - AtomType::F16x2 => 4_u8, - AtomType::I64 => 5_u8, - AtomType::F64 => 6_u8, - }, - ); - } - - fn encode_atomg(&mut self, op: &OpAtom) { - if op.atom_op == AtomOp::CmpExch { - self.set_opcode(0x3a9); - - self.set_reg_src(32..40, op.cmpr); - self.set_reg_src(64..72, op.data); - } else { - self.set_opcode(0x3a8); - - self.set_reg_src(32..40, op.data); - - self.set_atom_op(87..91, op.atom_op); - } - - self.set_dst(op.dst); - self.set_pred_dst(81..84, Dst::None); - - self.set_reg_src(24..32, op.addr); - self.set_field(40..64, op.addr_offset); - - self.set_field( - 72..73, - match op.mem_space.addr_type() { - MemAddrType::A32 => 0_u8, - MemAddrType::A64 => 1_u8, - }, - ); - - self.set_atom_type(73..76, op.atom_type); - self.set_mem_order(&op.mem_order); - self.set_eviction_priority(&op.mem_eviction_priority); - } - - fn encode_atoms(&mut self, op: &OpAtom) { - if op.atom_op == AtomOp::CmpExch { - self.set_opcode(0x38d); - - self.set_reg_src(32..40, op.cmpr); - self.set_reg_src(64..72, op.data); - } else { - self.set_opcode(0x38c); - - self.set_reg_src(32..40, op.data); - - self.set_atom_op(87..91, op.atom_op); - } - - self.set_dst(op.dst); - self.set_reg_src(24..32, op.addr); - self.set_field(40..64, op.addr_offset); - - assert!(op.mem_order == MemOrder::Strong(MemScope::CTA)); - assert!(op.mem_eviction_priority == MemEvictionPriority::Normal); - - self.set_atom_type(73..76, op.atom_type); - } - - fn encode_atom(&mut self, op: &OpAtom) { - match op.mem_space { - MemSpace::Global(_) => self.encode_atomg(op), - MemSpace::Local => panic!("Atomics do not support local"), - MemSpace::Shared => self.encode_atoms(op), - } - } - - fn encode_al2p(&mut self, op: &OpAL2P) { - self.set_opcode(0x920); - - self.set_dst(op.dst); - self.set_reg_src(24..32, op.offset); - - self.set_field(40..50, op.access.addr); - self.set_field(74..76, 0_u8); // comps - assert!(!op.access.patch); - self.set_bit(79, op.access.output); - } - - fn encode_ald(&mut self, op: &OpALd) { - self.set_opcode(0x321); - - self.set_dst(op.dst); - self.set_reg_src(32..40, op.vtx); - self.set_reg_src(24..32, op.offset); - - self.set_field(40..50, op.access.addr); - self.set_field(74..76, op.access.comps - 1); - self.set_field(76..77, op.access.patch); - self.set_field(77..78, op.access.phys); - self.set_field(79..80, op.access.output); - } - - fn encode_ast(&mut self, op: &OpASt) { - self.set_opcode(0x322); - - self.set_reg_src(32..40, op.data); - self.set_reg_src(64..72, op.vtx); - self.set_reg_src(24..32, op.offset); - - self.set_field(40..50, op.access.addr); - self.set_field(74..76, op.access.comps - 1); - self.set_field(76..77, op.access.patch); - self.set_field(77..78, op.access.phys); - assert!(op.access.output); - } - - fn encode_ipa(&mut self, op: &OpIpa) { - self.set_opcode(0x326); - - self.set_dst(op.dst); - - assert!(op.addr % 4 == 0); - self.set_field(64..72, op.addr >> 2); - - self.set_field( - 76..78, - match op.loc { - InterpLoc::Default => 0_u8, - InterpLoc::Centroid => 1_u8, - InterpLoc::Offset => 2_u8, - }, - ); - self.set_field( - 78..80, - match op.freq { - InterpFreq::Pass => 0_u8, - InterpFreq::Constant => 1_u8, - InterpFreq::State => 2_u8, - InterpFreq::PassMulW => { - panic!("InterpFreq::PassMulW is invalid on SM70+"); - } - }, - ); - - assert!(op.inv_w.is_zero()); - self.set_reg_src(32..40, op.offset); - - // TODO: What is this for? - self.set_pred_dst(81..84, Dst::None); - } - - fn encode_ldtram(&mut self, op: &OpLdTram) { - self.set_opcode(0x3ad); - self.set_dst(op.dst); - self.set_ureg(24..32, RegRef::zero(RegFile::UGPR, 1)); - - assert!(op.addr % 4 == 0); - self.set_field(64..72, op.addr >> 2); - - self.set_bit(72, op.use_c); - - // Unknown but required - self.set_bit(91, true); - } - - fn encode_cctl(&mut self, op: &OpCCtl) { - assert!(matches!(op.mem_space, MemSpace::Global(_))); - self.set_opcode(0x98f); - - self.set_reg_src(24..32, op.addr); - self.set_field(32..64, op.addr_offset); - - self.set_field( - 87..91, - match op.op { - CCtlOp::PF1 => 0_u8, - CCtlOp::PF2 => 1_u8, - CCtlOp::WB => 2_u8, - CCtlOp::IV => 3_u8, - CCtlOp::IVAll => 4_u8, - CCtlOp::RS => 5_u8, - CCtlOp::IVAllP => 6_u8, - CCtlOp::WBAll => 7_u8, - CCtlOp::WBAllP => 8_u8, - }, - ); - } - - fn encode_membar(&mut self, op: &OpMemBar) { - self.set_opcode(0x992); - - self.set_bit(72, false); // !.MMIO - self.set_field( - 76..79, - match op.scope { - MemScope::CTA => 0_u8, - // SM => 1_u8, - MemScope::GPU => 2_u8, - MemScope::System => 3_u8, - }, - ); - self.set_bit(80, false); // .SC - } - - fn set_rel_offset( - &mut self, - range: Range, - label: &Label, - ip: usize, - labels: &HashMap, - ) { - let ip = u64::try_from(ip).unwrap(); - let ip = i64::try_from(ip).unwrap(); - - let target_ip = *labels.get(label).unwrap(); - let target_ip = u64::try_from(target_ip).unwrap(); - let target_ip = i64::try_from(target_ip).unwrap(); - - let rel_offset = target_ip - ip - 4; - - self.set_field(range, rel_offset); - } - - fn encode_bclear(&mut self, op: &OpBClear) { - self.set_opcode(0x355); - - self.set_dst(Dst::None); - self.set_bar_dst(24..28, op.dst); - - self.set_bit(84, true); // .CLEAR - } - - fn encode_bmov(&mut self, op: &OpBMov) { - if dst_is_bar(op.dst) { - self.set_opcode(0x356); - - self.set_bar_dst(24..28, op.dst); - self.set_reg_src(32..40, op.src); - - self.set_bit(84, op.clear); - } else { - self.set_opcode(0x355); - - self.set_dst(op.dst); - self.set_bar_src(24..28, op.src); - - self.set_bit(84, op.clear); - } - } - - fn encode_break(&mut self, op: &OpBreak) { - self.set_opcode(0x942); - assert!(op.bar_in.src_ref.as_reg() == op.bar_out.as_reg()); - self.set_bar_dst(16..20, op.bar_out); - self.set_pred_src(87..90, 90, op.cond); - } - - fn encode_bssy( - &mut self, - op: &OpBSSy, - ip: usize, - labels: &HashMap, - ) { - self.set_opcode(0x945); - assert!(op.bar_in.src_ref.as_reg() == op.bar_out.as_reg()); - self.set_bar_dst(16..20, op.bar_out); - self.set_rel_offset(34..64, &op.target, ip, labels); - self.set_pred_src(87..90, 90, op.cond); - } - - fn encode_bsync(&mut self, op: &OpBSync) { - self.set_opcode(0x941); - self.set_bar_src(16..20, op.bar); - self.set_pred_src(87..90, 90, op.cond); - } - - fn encode_bra( - &mut self, - op: &OpBra, - ip: usize, - labels: &HashMap, - ) { - self.set_opcode(0x947); - self.set_rel_offset(34..82, &op.target, ip, labels); - self.set_field(87..90, 0x7_u8); // TODO: Pred? - } - - fn encode_exit(&mut self, _op: &OpExit) { - self.set_opcode(0x94d); - - // ./.KEEPREFCOUNT/.PREEMPTED/.INVALID3 - self.set_field(84..85, false); - self.set_field(85..86, false); // .NO_ATEXIT - self.set_field(87..90, 0x7_u8); // TODO: Predicate - self.set_field(90..91, false); // NOT - } - - fn encode_warpsync(&mut self, op: &OpWarpSync) { - self.encode_alu(0x148, None, None, Some(&Src::from(op.mask)), None); - self.set_pred_src(87..90, 90, SrcRef::True.into()); - } - - fn encode_bar(&mut self, _op: &OpBar) { - self.set_opcode(0xb1d); - - // self.set_opcode(0x31d); - - // // src0 == src1 - // self.set_reg_src(32..40, SrcRef::Zero.into()); - - // // 00: RED.POPC - // // 01: RED.AND - // // 02: RED.OR - // self.set_field(74..76, 0_u8); - - // // 00: SYNC - // // 01: ARV - // // 02: RED - // // 03: SCAN - // self.set_field(77..79, 0_u8); - - // self.set_pred_src(87..90, 90, SrcRef::True.into()); - } - - fn encode_cs2r(&mut self, op: &OpCS2R) { - self.set_opcode(0x805); - self.set_dst(op.dst); - self.set_field(72..80, op.idx); - self.set_bit(80, op.dst.as_reg().unwrap().comps() == 2); // .64 - } - - fn encode_isberd(&mut self, op: &OpIsberd) { - self.set_opcode(0x923); - self.set_dst(op.dst); - self.set_reg_src(24..32, op.idx); - } - - fn encode_kill(&mut self, _op: &OpKill) { - self.set_opcode(0x95b); - self.set_pred_src(87..90, 90, SrcRef::True.into()); - } - - fn encode_nop(&mut self, _op: &OpNop) { - self.set_opcode(0x918); - } - - fn encode_pixld(&mut self, op: &OpPixLd) { - self.set_opcode(0x925); - self.set_dst(op.dst); - self.set_field( - 78..81, - match op.val { - PixVal::MsCount => 0_u8, - PixVal::CovMask => 1_u8, - PixVal::CentroidOffset => 2_u8, - PixVal::MyIndex => 3_u8, - PixVal::InnerCoverage => 4_u8, - }, - ); - self.set_pred_dst(81..84, Dst::None); - } - - fn encode_s2r(&mut self, op: &OpS2R) { - assert!(!op.is_uniform()); - self.set_opcode(if op.is_uniform() { 0x9c3 } else { 0x919 }); - self.set_dst(op.dst); - self.set_field(72..80, op.idx); - } - - fn encode_out(&mut self, op: &OpOut) { - self.encode_alu( - 0x124, - Some(&op.dst), - Some(&op.handle), - Some(&op.stream), - None, - ); - - self.set_field( - 78..80, - match op.out_type { - OutType::Emit => 1_u8, - OutType::Cut => 2_u8, - OutType::EmitThenCut => 3_u8, - }, - ); - } - - fn encode_out_final(&mut self, op: &OpOutFinal) { - self.encode_alu( - 0x124, - Some(&Dst::None), - Some(&op.handle), - Some(&Src::new_zero()), - None, - ); - } - - fn encode_vote(&mut self, op: &OpVote) { - if op.is_uniform() { - self.set_opcode(0x886); - self.set_udst(op.ballot); - } else { - self.set_opcode(0x806); - self.set_dst(op.ballot); - } - - self.set_field( - 72..74, - match op.op { - VoteOp::All => 0_u8, - VoteOp::Any => 1_u8, - VoteOp::Eq => 2_u8, - }, - ); - - self.set_pred_dst(81..84, op.vote); - self.set_pred_src(87..90, 90, op.pred); - } - - pub fn encode( - instr: &Instr, - sm: u8, - ip: usize, - labels: &HashMap, - ) -> [u32; 4] { - assert!(sm >= 70); - - let mut si = SM70Instr { - inst: [0; 4], - sm: sm, - }; - - match &instr.op { - Op::FAdd(op) => si.encode_fadd(op), - Op::FFma(op) => si.encode_ffma(op), - Op::FMnMx(op) => si.encode_fmnmx(op), - Op::FMul(op) => si.encode_fmul(op), - Op::FSet(op) => si.encode_fset(op), - Op::FSetP(op) => si.encode_fsetp(op), - Op::FSwzAdd(op) => si.encode_fswzadd(op), - Op::DAdd(op) => si.encode_dadd(op), - Op::DFma(op) => si.encode_dfma(op), - Op::DMul(op) => si.encode_dmul(op), - Op::DSetP(op) => si.encode_dsetp(op), - Op::HAdd2(op) => si.encode_hadd2(op), - Op::HFma2(op) => si.encode_hfma2(op), - Op::HMul2(op) => si.encode_hmul2(op), - Op::HSet2(op) => si.encode_hset2(op), - Op::HSetP2(op) => si.encode_hsetp2(op), - Op::HMnMx2(op) => si.encode_hmnmx2(op), - Op::MuFu(op) => si.encode_mufu(op), - Op::BMsk(op) => si.encode_bmsk(op), - Op::BRev(op) => si.encode_brev(op), - Op::Flo(op) => si.encode_flo(op), - Op::IAbs(op) => si.encode_iabs(op), - Op::IAdd3(op) => si.encode_iadd3(op), - Op::IAdd3X(op) => si.encode_iadd3x(op), - Op::IDp4(op) => si.encode_idp4(op), - Op::IMad(op) => si.encode_imad(op), - Op::IMad64(op) => si.encode_imad64(op), - Op::IMnMx(op) => si.encode_imnmx(op), - Op::ISetP(op) => si.encode_isetp(op), - Op::Lop3(op) => si.encode_lop3(op), - Op::PopC(op) => si.encode_popc(op), - Op::Shf(op) => si.encode_shf(op), - Op::F2F(op) => si.encode_f2f(op), - Op::F2I(op) => si.encode_f2i(op), - Op::I2F(op) => si.encode_i2f(op), - Op::FRnd(op) => si.encode_frnd(op), - Op::Mov(op) => si.encode_mov(op), - Op::Prmt(op) => si.encode_prmt(op), - Op::Sel(op) => si.encode_sel(op), - Op::Shfl(op) => si.encode_shfl(op), - Op::PLop3(op) => si.encode_plop3(op), - Op::R2UR(op) => si.encode_r2ur(op), - Op::Tex(op) => si.encode_tex(op), - Op::Tld(op) => si.encode_tld(op), - Op::Tld4(op) => si.encode_tld4(op), - Op::Tmml(op) => si.encode_tmml(op), - Op::Txd(op) => si.encode_txd(op), - Op::Txq(op) => si.encode_txq(op), - Op::SuLd(op) => si.encode_suld(op), - Op::SuSt(op) => si.encode_sust(op), - Op::SuAtom(op) => si.encode_suatom(op), - Op::Ld(op) => si.encode_ld(op), - Op::Ldc(op) => si.encode_ldc(op), - Op::St(op) => si.encode_st(op), - Op::Atom(op) => si.encode_atom(op), - Op::AL2P(op) => si.encode_al2p(op), - Op::ALd(op) => si.encode_ald(op), - Op::ASt(op) => si.encode_ast(op), - Op::Ipa(op) => si.encode_ipa(op), - Op::LdTram(op) => si.encode_ldtram(op), - Op::CCtl(op) => si.encode_cctl(op), - Op::MemBar(op) => si.encode_membar(op), - Op::BClear(op) => si.encode_bclear(op), - Op::BMov(op) => si.encode_bmov(op), - Op::Break(op) => si.encode_break(op), - Op::BSSy(op) => si.encode_bssy(op, ip, labels), - Op::BSync(op) => si.encode_bsync(op), - Op::Bra(op) => si.encode_bra(op, ip, labels), - Op::Exit(op) => si.encode_exit(op), - Op::WarpSync(op) => si.encode_warpsync(op), - Op::Bar(op) => si.encode_bar(op), - Op::CS2R(op) => si.encode_cs2r(op), - Op::Isberd(op) => si.encode_isberd(op), - Op::Kill(op) => si.encode_kill(op), - Op::Nop(op) => si.encode_nop(op), - Op::PixLd(op) => si.encode_pixld(op), - Op::S2R(op) => si.encode_s2r(op), - Op::Out(op) => si.encode_out(op), - Op::OutFinal(op) => si.encode_out_final(op), - Op::Vote(op) => si.encode_vote(op), - _ => panic!("Unhandled instruction"), - } - - si.set_pred(&instr.pred); - si.set_instr_deps(&instr.deps); - - si.inst - } -} - -pub fn encode_sm70_shader(sm: &dyn ShaderModel, s: &Shader<'_>) -> Vec { - assert!(s.functions.len() == 1); - let func = &s.functions[0]; - - let mut ip = 0_usize; - let mut labels = HashMap::new(); - for b in &func.blocks { - labels.insert(b.label, ip); - for instr in &b.instrs { - if let Op::Nop(op) = &instr.op { - if let Some(label) = op.label { - labels.insert(label, ip); - } - } - ip += 4; - } - } - - let mut encoded = Vec::new(); - for b in &func.blocks { - for instr in &b.instrs { - let e = SM70Instr::encode(instr, sm.sm(), encoded.len(), &labels); - encoded.extend_from_slice(&e[..]); - } - } - encoded -}