Files
third_party_mesa3d/src/nouveau/compiler/nak/assign_regs.rs
Mel Henning 4c1447e89b nak: Check that swizzles are none
wherever we check that src_mod is none.

This commit simply does:
s/src_mod.is_none()/is_unmodified()/
across all of nak except the definition of is_unmodified() itself.

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Fixes: bad23ddb48 ("nak: Add F16 and F16v2 sources")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34794>
(cherry picked from commit 9d1c38ddf11202805ac8e281ddd34940f8ad68a6)
2025-05-06 23:40:53 +02:00

1530 lines
47 KiB
Rust

// Copyright © 2022 Collabora, Ltd.
// SPDX-License-Identifier: MIT
use crate::api::{GetDebugFlags, DEBUG};
use crate::ir::*;
use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness};
use crate::union_find::UnionFind;
use compiler::bitset::BitSet;
use std::cmp::{max, min, Ordering};
use std::collections::{HashMap, HashSet};
struct KillSet {
set: HashSet<SSAValue>,
vec: Vec<SSAValue>,
}
impl KillSet {
pub fn new() -> KillSet {
KillSet {
set: HashSet::new(),
vec: Vec::new(),
}
}
pub fn len(&self) -> usize {
self.vec.len()
}
pub fn clear(&mut self) {
self.set.clear();
self.vec.clear();
}
pub fn insert(&mut self, ssa: SSAValue) {
if self.set.insert(ssa) {
self.vec.push(ssa);
}
}
pub fn iter(&self) -> std::slice::Iter<'_, SSAValue> {
self.vec.iter()
}
pub fn is_empty(&self) -> bool {
self.vec.is_empty()
}
}
// These two helpers are carefully paired for the purposes of RA.
// src_ssa_ref() returns whatever SSARef is present in the source, if any.
// src_set_reg() overwrites that SSARef with a RegRef.
#[inline]
fn src_ssa_ref(src: &Src) -> Option<&SSARef> {
match &src.src_ref {
SrcRef::SSA(ssa) => Some(ssa),
SrcRef::CBuf(CBufRef {
buf: CBuf::BindlessSSA(ssa),
..
}) => Some(ssa),
_ => None,
}
}
#[inline]
fn src_set_reg(src: &mut Src, reg: RegRef) {
match &mut src.src_ref {
SrcRef::SSA(_) => {
src.src_ref = reg.into();
}
SrcRef::CBuf(cb) => {
debug_assert!(matches!(&cb.buf, CBuf::BindlessSSA(_)));
debug_assert!(reg.file() == RegFile::UGPR && reg.comps() == 2);
cb.buf = CBuf::BindlessUGPR(reg);
}
_ => (),
}
}
enum SSAUse {
FixedReg(u32),
Vec(SSARef),
}
struct SSAUseMap {
ssa_map: HashMap<SSAValue, Vec<(usize, SSAUse)>>,
}
impl SSAUseMap {
fn add_fixed_reg_use(&mut self, ip: usize, ssa: SSAValue, reg: u32) {
let v = self.ssa_map.entry(ssa).or_default();
v.push((ip, SSAUse::FixedReg(reg)));
}
fn add_vec_use(&mut self, ip: usize, vec: SSARef) {
if vec.comps() == 1 {
return;
}
for ssa in vec.iter() {
let v = self.ssa_map.entry(*ssa).or_default();
v.push((ip, SSAUse::Vec(vec)));
}
}
fn find_vec_use_after(&self, ssa: SSAValue, ip: usize) -> Option<&SSAUse> {
if let Some(v) = self.ssa_map.get(&ssa) {
let p = v.partition_point(|(uip, _)| *uip <= ip);
if p == v.len() {
None
} else {
let (_, u) = &v[p];
Some(u)
}
} else {
None
}
}
pub fn add_block(&mut self, b: &BasicBlock) {
for (ip, instr) in b.instrs.iter().enumerate() {
match &instr.op {
Op::RegOut(op) => {
for (i, src) in op.srcs.iter().enumerate() {
let out_reg = u32::try_from(i).unwrap();
if let Some(ssa) = src_ssa_ref(src) {
assert!(ssa.comps() == 1);
self.add_fixed_reg_use(ip, ssa[0], out_reg);
}
}
}
_ => {
// We don't care about predicates because they're scalar
for src in instr.srcs() {
if let Some(ssa) = src_ssa_ref(src) {
self.add_vec_use(ip, *ssa);
}
}
}
}
}
}
pub fn for_block(b: &BasicBlock) -> SSAUseMap {
let mut am = SSAUseMap {
ssa_map: HashMap::new(),
};
am.add_block(b);
am
}
}
/// Tracks the most recent register assigned to a given phi web
///
/// During register assignment, we then try to assign this register
/// to the next SSAValue in the same web.
///
/// This heuristic is inspired by the "Aggressive pre-coalescing" described in
/// section 4 of Colombet et al 2011.
///
/// Q. Colombet, B. Boissinot, P. Brisk, S. Hack and F. Rastello,
/// "Graph-coloring and treescan register allocation using repairing," 2011
/// Proceedings of the 14th International Conference on Compilers,
/// Architectures and Synthesis for Embedded Systems (CASES), Taipei,
/// Taiwan, 2011, pp. 45-54, doi: 10.1145/2038698.2038708.
struct PhiWebs {
uf: UnionFind<SSAValue>,
assignments: HashMap<SSAValue, u32>,
}
impl PhiWebs {
pub fn new(f: &Function) -> Self {
let mut uf = UnionFind::new();
// Populate uf with phi equivalence classes
//
// Note that we intentionally don't pay attention to move instructions
// below - the assumption is that any move instructions at this point
// were inserted by cssa-conversion and will hurt the coalescing
for b_idx in 0..f.blocks.len() {
let Some(phi_dsts) = f.blocks[b_idx].phi_dsts() else {
continue;
};
let dsts: HashMap<u32, &SSARef> = phi_dsts
.dsts
.iter()
.map(|(idx, dst)| {
let ssa_ref = dst.as_ssa().expect("Expected ssa form");
(*idx, ssa_ref)
})
.collect();
for pred_idx in f.blocks.pred_indices(b_idx) {
let phi_srcs =
f.blocks[*pred_idx].phi_srcs().expect("Missing phi_srcs");
for (src_idx, src) in phi_srcs.srcs.iter() {
let a = src.as_ssa().expect("Expected ssa form");
let b = dsts[src_idx];
assert_eq!(a.comps(), 1);
assert_eq!(b.comps(), 1);
uf.union(a[0], b[0]);
}
}
}
PhiWebs {
uf,
assignments: HashMap::new(),
}
}
pub fn get(&mut self, ssa: SSAValue) -> Option<u32> {
let phi_web_id = self.uf.find(ssa);
self.assignments.get(&phi_web_id).copied()
}
pub fn set(&mut self, ssa: SSAValue, reg: u32) {
let phi_web_id = self.uf.find(ssa);
self.assignments.insert(phi_web_id, reg);
}
}
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
enum LiveRef {
SSA(SSAValue),
Phi(u32),
}
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
struct LiveValue {
pub live_ref: LiveRef,
pub reg_ref: RegRef,
}
// We need a stable ordering of live values so that RA is deterministic
impl Ord for LiveValue {
fn cmp(&self, other: &Self) -> Ordering {
let s_file = u8::from(self.reg_ref.file());
let o_file = u8::from(other.reg_ref.file());
match s_file.cmp(&o_file) {
Ordering::Equal => {
let s_idx = self.reg_ref.base_idx();
let o_idx = other.reg_ref.base_idx();
s_idx.cmp(&o_idx)
}
ord => ord,
}
}
}
impl PartialOrd for LiveValue {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
#[derive(Clone)]
struct RegAllocator {
file: RegFile,
num_regs: u32,
used: BitSet,
pinned: BitSet,
reg_ssa: Vec<SSAValue>,
ssa_reg: HashMap<SSAValue, u32>,
}
impl RegAllocator {
pub fn new(file: RegFile, num_regs: u32) -> Self {
Self {
file: file,
num_regs: num_regs,
used: BitSet::new(),
pinned: BitSet::new(),
reg_ssa: Vec::new(),
ssa_reg: HashMap::new(),
}
}
fn file(&self) -> RegFile {
self.file
}
pub fn num_regs_used(&self) -> u32 {
self.ssa_reg.len().try_into().unwrap()
}
pub fn reg_is_used(&self, reg: u32) -> bool {
self.used.get(reg.try_into().unwrap())
}
pub fn reg_is_pinned(&self, reg: u32) -> bool {
self.pinned.get(reg.try_into().unwrap())
}
pub fn try_get_reg(&self, ssa: SSAValue) -> Option<u32> {
self.ssa_reg.get(&ssa).cloned()
}
pub fn try_get_ssa(&self, reg: u32) -> Option<SSAValue> {
if self.reg_is_used(reg) {
Some(self.reg_ssa[usize::try_from(reg).unwrap()])
} else {
None
}
}
pub fn try_get_vec_reg(&self, vec: &SSARef) -> Option<u32> {
let Some(reg) = self.try_get_reg(vec[0]) else {
return None;
};
let align = u32::from(vec.comps()).next_power_of_two();
if reg % align != 0 {
return None;
}
for c in 1..vec.comps() {
let ssa = vec[usize::from(c)];
if self.try_get_reg(ssa) != Some(reg + u32::from(c)) {
return None;
}
}
Some(reg)
}
pub fn free_ssa(&mut self, ssa: SSAValue) -> u32 {
assert!(ssa.file() == self.file);
let reg = self.ssa_reg.remove(&ssa).unwrap();
assert!(self.reg_is_used(reg));
let reg_usize = usize::try_from(reg).unwrap();
assert!(self.reg_ssa[reg_usize] == ssa);
self.used.remove(reg_usize);
self.pinned.remove(reg_usize);
reg
}
pub fn assign_reg(&mut self, ssa: SSAValue, reg: u32) {
assert!(ssa.file() == self.file);
assert!(reg < self.num_regs);
assert!(!self.reg_is_used(reg));
let reg_usize = usize::try_from(reg).unwrap();
if reg_usize >= self.reg_ssa.len() {
self.reg_ssa.resize(reg_usize + 1, SSAValue::NONE);
}
self.reg_ssa[reg_usize] = ssa;
let old = self.ssa_reg.insert(ssa, reg);
assert!(old.is_none());
self.used.insert(reg_usize);
}
pub fn pin_reg(&mut self, reg: u32) {
assert!(self.reg_is_used(reg));
self.pinned.insert(reg.try_into().unwrap());
}
fn reg_range_is_unset(set: &BitSet, reg: u32, comps: u8) -> bool {
for c in 0..u32::from(comps) {
if set.get((reg + c).try_into().unwrap()) {
return false;
}
}
true
}
fn try_find_unset_reg_range(
&self,
set: &BitSet,
start_reg: u32,
align: u32,
comps: u8,
) -> Option<u32> {
assert!(comps > 0 && u32::from(comps) <= self.num_regs);
let mut next_reg = start_reg;
loop {
let reg: u32 = set
.next_unset(usize::try_from(next_reg).unwrap())
.try_into()
.unwrap();
// Ensure we're properly aligned
let reg = reg.next_multiple_of(align);
// Ensure we're in-bounds. This also serves as a check to ensure
// that u8::try_from(reg + i) will succeed.
if reg > self.num_regs - u32::from(comps) {
return None;
}
if Self::reg_range_is_unset(set, reg, comps) {
return Some(reg);
}
next_reg = reg + align;
}
}
pub fn try_find_unused_reg_range(
&self,
start_reg: u32,
align: u32,
comps: u8,
) -> Option<u32> {
self.try_find_unset_reg_range(&self.used, start_reg, align, comps)
}
pub fn alloc_scalar(
&mut self,
ip: usize,
sum: &SSAUseMap,
phi_webs: &mut PhiWebs,
ssa: SSAValue,
) -> u32 {
// Bias register assignment using the phi coalescing
if let Some(reg) = phi_webs.get(ssa) {
if !self.reg_is_used(reg) {
self.assign_reg(ssa, reg);
return reg;
}
}
// Otherwise, use SSAUseMap heuristics
if let Some(u) = sum.find_vec_use_after(ssa, ip) {
match u {
SSAUse::FixedReg(reg) => {
if !self.reg_is_used(*reg) {
self.assign_reg(ssa, *reg);
return *reg;
}
}
SSAUse::Vec(vec) => {
let mut comp = u8::MAX;
for c in 0..vec.comps() {
if vec[usize::from(c)] == ssa {
comp = c;
break;
}
}
assert!(comp < vec.comps());
let align = u32::from(vec.comps()).next_power_of_two();
for c in 0..vec.comps() {
if c == comp {
continue;
}
let other = vec[usize::from(c)];
let Some(other_reg) = self.try_get_reg(other) else {
continue;
};
let vec_reg = other_reg & !(align - 1);
if other_reg != vec_reg + u32::from(c) {
continue;
}
let reg = vec_reg + u32::from(comp);
if reg < self.num_regs && !self.reg_is_used(reg) {
self.assign_reg(ssa, reg);
return reg;
}
}
// We weren't able to pair it with an already allocated
// register but maybe we can at least find an aligned one.
if let Some(reg) =
self.try_find_unused_reg_range(0, align, 1)
{
self.assign_reg(ssa, reg);
return reg;
}
}
}
}
let reg = self
.try_find_unused_reg_range(0, 1, 1)
.expect("Failed to find free register");
self.assign_reg(ssa, reg);
reg
}
}
struct VecRegAllocator<'a> {
ra: &'a mut RegAllocator,
pcopy: OpParCopy,
pinned: BitSet,
evicted: HashMap<SSAValue, u32>,
}
impl<'a> VecRegAllocator<'a> {
fn new(ra: &'a mut RegAllocator) -> Self {
let pinned = ra.pinned.clone();
VecRegAllocator {
ra,
pcopy: OpParCopy::new(),
pinned,
evicted: HashMap::new(),
}
}
fn file(&self) -> RegFile {
self.ra.file()
}
fn pin_reg(&mut self, reg: u32) {
self.pinned.insert(reg.try_into().unwrap());
}
fn pin_reg_range(&mut self, reg: u32, comps: u8) {
for c in 0..u32::from(comps) {
self.pin_reg(reg + c);
}
}
fn reg_is_pinned(&self, reg: u32) -> bool {
self.pinned.get(reg.try_into().unwrap())
}
fn reg_range_is_unpinned(&self, reg: u32, comps: u8) -> bool {
RegAllocator::reg_range_is_unset(&self.pinned, reg, comps)
}
fn assign_pin_reg(&mut self, ssa: SSAValue, reg: u32) -> RegRef {
self.pin_reg(reg);
self.ra.assign_reg(ssa, reg);
RegRef::new(self.file(), reg, 1)
}
pub fn assign_pin_vec_reg(&mut self, vec: SSARef, reg: u32) -> RegRef {
for c in 0..vec.comps() {
let ssa = vec[usize::from(c)];
self.assign_pin_reg(ssa, reg + u32::from(c));
}
RegRef::new(self.file(), reg, vec.comps())
}
fn try_find_unpinned_reg_range(
&self,
start_reg: u32,
align: u32,
comps: u8,
) -> Option<u32> {
self.ra
.try_find_unset_reg_range(&self.pinned, start_reg, align, comps)
}
pub fn evict_ssa(&mut self, ssa: SSAValue, old_reg: u32) {
assert!(ssa.file() == self.file());
assert!(!self.reg_is_pinned(old_reg));
self.evicted.insert(ssa, old_reg);
}
pub fn evict_reg_if_used(&mut self, reg: u32) {
assert!(!self.reg_is_pinned(reg));
if let Some(ssa) = self.ra.try_get_ssa(reg) {
self.ra.free_ssa(ssa);
self.evict_ssa(ssa, reg);
}
}
fn move_ssa_to_reg(&mut self, ssa: SSAValue, new_reg: u32) {
if let Some(old_reg) = self.ra.try_get_reg(ssa) {
assert!(self.evicted.get(&ssa).is_none());
assert!(!self.reg_is_pinned(old_reg));
if new_reg == old_reg {
self.pin_reg(new_reg);
} else {
self.ra.free_ssa(ssa);
self.evict_reg_if_used(new_reg);
self.pcopy.push(
RegRef::new(self.file(), new_reg, 1).into(),
RegRef::new(self.file(), old_reg, 1).into(),
);
self.assign_pin_reg(ssa, new_reg);
}
} else if let Some(old_reg) = self.evicted.remove(&ssa) {
self.evict_reg_if_used(new_reg);
self.pcopy.push(
RegRef::new(self.file(), new_reg, 1).into(),
RegRef::new(self.file(), old_reg, 1).into(),
);
self.assign_pin_reg(ssa, new_reg);
} else {
panic!("Unknown SSA value");
}
}
fn finish(mut self, pcopy: &mut OpParCopy) {
pcopy.dsts_srcs.append(&mut self.pcopy.dsts_srcs);
if !self.evicted.is_empty() {
// Sort so we get determinism, even if the hash map order changes
// from one run to another or due to rust compiler updates.
let mut evicted: Vec<_> = self.evicted.drain().collect();
evicted.sort_by_key(|(_, reg)| *reg);
for (ssa, old_reg) in evicted {
let mut next_reg = 0;
let new_reg = loop {
let reg = self
.ra
.try_find_unused_reg_range(next_reg, 1, 1)
.expect("Failed to find free register");
if !self.reg_is_pinned(reg) {
break reg;
}
next_reg = reg + 1;
};
pcopy.push(
RegRef::new(self.file(), new_reg, 1).into(),
RegRef::new(self.file(), old_reg, 1).into(),
);
self.assign_pin_reg(ssa, new_reg);
}
}
}
pub fn try_get_vec_reg(&self, vec: &SSARef) -> Option<u32> {
self.ra.try_get_vec_reg(vec)
}
pub fn collect_vector(&mut self, vec: &SSARef) -> RegRef {
if let Some(reg) = self.try_get_vec_reg(vec) {
self.pin_reg_range(reg, vec.comps());
return RegRef::new(self.file(), reg, vec.comps());
}
let comps = vec.comps();
let align = u32::from(comps).next_power_of_two();
let reg = self
.ra
.try_find_unused_reg_range(0, align, comps)
.or_else(|| {
for c in 0..comps {
let ssa = vec[usize::from(c)];
let Some(comp_reg) = self.ra.try_get_reg(ssa) else {
continue;
};
let vec_reg = comp_reg & !(align - 1);
if comp_reg != vec_reg + u32::from(c) {
continue;
}
if vec_reg + u32::from(comps) > self.ra.num_regs {
continue;
}
if self.reg_range_is_unpinned(vec_reg, comps) {
return Some(vec_reg);
}
}
None
})
.or_else(|| self.try_find_unpinned_reg_range(0, align, comps))
.expect("Failed to find an unpinned register range");
for c in 0..comps {
let ssa = vec[usize::from(c)];
self.move_ssa_to_reg(ssa, reg + u32::from(c));
}
RegRef::new(self.file(), reg, comps)
}
pub fn alloc_vector(&mut self, vec: SSARef) -> RegRef {
let comps = vec.comps();
let align = u32::from(comps).next_power_of_two();
if let Some(reg) = self.ra.try_find_unused_reg_range(0, align, comps) {
return self.assign_pin_vec_reg(vec, reg);
}
let reg = self
.try_find_unpinned_reg_range(0, align, comps)
.expect("Failed to find an unpinned register range");
for c in 0..comps {
self.evict_reg_if_used(reg + u32::from(c));
}
self.assign_pin_vec_reg(vec, reg)
}
pub fn free_killed(&mut self, killed: &KillSet) {
for ssa in killed.iter() {
if ssa.file() == self.file() {
self.ra.free_ssa(*ssa);
}
}
}
}
impl Drop for VecRegAllocator<'_> {
fn drop(&mut self) {
assert!(self.evicted.is_empty());
}
}
fn instr_remap_srcs_file(instr: &mut Instr, ra: &mut VecRegAllocator) {
// Collect vector sources first since those may silently pin some of our
// scalar sources.
for src in instr.srcs_mut() {
if let Some(ssa) = src_ssa_ref(src) {
if ssa.file().unwrap() == ra.file() && ssa.comps() > 1 {
let reg = ra.collect_vector(ssa);
src_set_reg(src, reg);
}
}
}
if let PredRef::SSA(pred) = instr.pred.pred_ref {
if pred.file() == ra.file() {
instr.pred.pred_ref = ra.collect_vector(&pred.into()).into();
}
}
for src in instr.srcs_mut() {
if let Some(ssa) = src_ssa_ref(src) {
if ssa.file().unwrap() == ra.file() && ssa.comps() == 1 {
let reg = ra.collect_vector(ssa);
src_set_reg(src, reg);
}
}
}
}
fn instr_alloc_scalar_dsts_file(
instr: &mut Instr,
ip: usize,
sum: &SSAUseMap,
phi_webs: &mut PhiWebs,
ra: &mut RegAllocator,
) {
for dst in instr.dsts_mut() {
if let Dst::SSA(ssa) = dst {
if ssa.file().unwrap() == ra.file() {
assert!(ssa.comps() == 1);
let reg = ra.alloc_scalar(ip, sum, phi_webs, ssa[0]);
*dst = RegRef::new(ra.file(), reg, 1).into();
}
}
}
}
fn instr_assign_regs_file(
instr: &mut Instr,
ip: usize,
sum: &SSAUseMap,
phi_webs: &mut PhiWebs,
killed: &KillSet,
pcopy: &mut OpParCopy,
ra: &mut RegAllocator,
) {
struct VecDst {
dst_idx: usize,
comps: u8,
killed: Option<SSARef>,
reg: u32,
}
let mut vec_dsts = Vec::new();
let mut vec_dst_comps = 0;
for (i, dst) in instr.dsts().iter().enumerate() {
if let Dst::SSA(ssa) = dst {
if ssa.file().unwrap() == ra.file() && ssa.comps() > 1 {
vec_dsts.push(VecDst {
dst_idx: i,
comps: ssa.comps(),
killed: None,
reg: u32::MAX,
});
vec_dst_comps += ssa.comps();
}
}
}
// No vector destinations is the easy case
if vec_dst_comps == 0 {
let mut vra = VecRegAllocator::new(ra);
instr_remap_srcs_file(instr, &mut vra);
vra.free_killed(killed);
vra.finish(pcopy);
instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
return;
}
// Predicates can't be vectors. This lets us ignore instr.pred in our
// analysis for the cases below. Only the easy case above needs to care
// about them.
assert!(!ra.file().is_predicate());
let mut avail = killed.set.clone();
let mut killed_vecs = Vec::new();
for src in instr.srcs() {
if let Some(vec) = src_ssa_ref(src) {
if vec.comps() > 1 {
let mut vec_killed = true;
for ssa in vec.iter() {
if ssa.file() != ra.file() || !avail.contains(ssa) {
vec_killed = false;
break;
}
}
if vec_killed {
for ssa in vec.iter() {
avail.remove(ssa);
}
killed_vecs.push(*vec);
}
}
}
}
vec_dsts.sort_by_key(|v| v.comps);
killed_vecs.sort_by_key(|v| v.comps());
let mut next_dst_reg = 0;
let mut vec_dsts_map_to_killed_srcs = true;
let mut could_trivially_allocate = true;
for vec_dst in vec_dsts.iter_mut().rev() {
while let Some(src) = killed_vecs.pop() {
if src.comps() >= vec_dst.comps {
vec_dst.killed = Some(src);
break;
}
}
if vec_dst.killed.is_none() {
vec_dsts_map_to_killed_srcs = false;
}
let align = u32::from(vec_dst.comps).next_power_of_two();
if let Some(reg) =
ra.try_find_unused_reg_range(next_dst_reg, align, vec_dst.comps)
{
vec_dst.reg = reg;
next_dst_reg = reg + u32::from(vec_dst.comps);
} else {
could_trivially_allocate = false;
}
}
if vec_dsts_map_to_killed_srcs {
let mut vra = VecRegAllocator::new(ra);
instr_remap_srcs_file(instr, &mut vra);
for vec_dst in &mut vec_dsts {
let src_vec = vec_dst.killed.as_ref().unwrap();
vec_dst.reg = vra.try_get_vec_reg(src_vec).unwrap();
}
vra.free_killed(killed);
for vec_dst in vec_dsts {
let dst = &mut instr.dsts_mut()[vec_dst.dst_idx];
*dst = vra
.assign_pin_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg)
.into();
}
vra.finish(pcopy);
instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
} else if could_trivially_allocate {
let mut vra = VecRegAllocator::new(ra);
for vec_dst in vec_dsts {
let dst = &mut instr.dsts_mut()[vec_dst.dst_idx];
*dst = vra
.assign_pin_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg)
.into();
}
instr_remap_srcs_file(instr, &mut vra);
vra.free_killed(killed);
vra.finish(pcopy);
instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
} else {
let mut vra = VecRegAllocator::new(ra);
instr_remap_srcs_file(instr, &mut vra);
// Allocate vector destinations first so we have the most freedom.
// Scalar destinations can fill in holes.
for dst in instr.dsts_mut() {
if let Dst::SSA(ssa) = dst {
if ssa.file().unwrap() == vra.file() && ssa.comps() > 1 {
*dst = vra.alloc_vector(*ssa).into();
}
}
}
vra.free_killed(killed);
vra.finish(pcopy);
instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
}
}
impl PerRegFile<RegAllocator> {
pub fn assign_reg(&mut self, ssa: SSAValue, reg: RegRef) {
assert!(reg.file() == ssa.file());
assert!(reg.comps() == 1);
self[ssa.file()].assign_reg(ssa, reg.base_idx());
}
pub fn free_killed(&mut self, killed: &KillSet) {
for ssa in killed.iter() {
self[ssa.file()].free_ssa(*ssa);
}
}
}
struct AssignRegsBlock {
ra: PerRegFile<RegAllocator>,
pcopy_tmp_gprs: u8,
live_in: Vec<LiveValue>,
phi_out: HashMap<u32, SrcRef>,
}
impl AssignRegsBlock {
fn new(num_regs: &PerRegFile<u32>, pcopy_tmp_gprs: u8) -> AssignRegsBlock {
AssignRegsBlock {
ra: PerRegFile::new_with(|file| {
RegAllocator::new(file, num_regs[file])
}),
pcopy_tmp_gprs: pcopy_tmp_gprs,
live_in: Vec::new(),
phi_out: HashMap::new(),
}
}
fn get_scalar(&self, ssa: SSAValue) -> RegRef {
let ra = &self.ra[ssa.file()];
let reg = ra.try_get_reg(ssa).expect("Unknown SSA value");
RegRef::new(ssa.file(), reg, 1)
}
fn alloc_scalar(
&mut self,
ip: usize,
sum: &SSAUseMap,
phi_webs: &mut PhiWebs,
ssa: SSAValue,
) -> RegRef {
let ra = &mut self.ra[ssa.file()];
let reg = ra.alloc_scalar(ip, sum, phi_webs, ssa);
RegRef::new(ssa.file(), reg, 1)
}
fn pin_vector(&mut self, reg: RegRef) {
let ra = &mut self.ra[reg.file()];
for c in 0..reg.comps() {
ra.pin_reg(reg.comp(c).base_idx());
}
}
fn try_coalesce(&mut self, ssa: SSAValue, src: &Src) -> bool {
debug_assert!(src.is_unmodified());
let SrcRef::Reg(src_reg) = src.src_ref else {
return false;
};
debug_assert!(src_reg.comps() == 1);
if src_reg.file() != ssa.file() {
return false;
}
let ra = &mut self.ra[src_reg.file()];
if ra.reg_is_used(src_reg.base_idx()) {
return false;
}
ra.assign_reg(ssa, src_reg.base_idx());
true
}
fn pcopy_tmp(&self) -> Option<RegRef> {
if self.pcopy_tmp_gprs > 0 {
Some(RegRef::new(
RegFile::GPR,
self.ra[RegFile::GPR].num_regs,
self.pcopy_tmp_gprs,
))
} else {
None
}
}
fn assign_regs_instr(
&mut self,
mut instr: Box<Instr>,
ip: usize,
sum: &SSAUseMap,
phi_webs: &mut PhiWebs,
srcs_killed: &KillSet,
dsts_killed: &KillSet,
pcopy: &mut OpParCopy,
) -> Option<Box<Instr>> {
match &mut instr.op {
Op::Undef(undef) => {
if let Dst::SSA(ssa) = undef.dst {
assert!(ssa.comps() == 1);
self.alloc_scalar(ip, sum, phi_webs, ssa[0]);
}
assert!(srcs_killed.is_empty());
self.ra.free_killed(dsts_killed);
None
}
Op::PhiSrcs(phi) => {
for (id, src) in phi.srcs.iter() {
assert!(src.is_unmodified());
if let Some(ssa) = src_ssa_ref(src) {
assert!(ssa.comps() == 1);
let reg = self.get_scalar(ssa[0]);
self.phi_out.insert(*id, reg.into());
} else {
self.phi_out.insert(*id, src.src_ref);
}
}
assert!(dsts_killed.is_empty());
None
}
Op::PhiDsts(phi) => {
assert!(instr.pred.is_true());
for (id, dst) in phi.dsts.iter() {
if let Dst::SSA(ssa) = dst {
assert!(ssa.comps() == 1);
let reg = self.alloc_scalar(ip, sum, phi_webs, ssa[0]);
self.live_in.push(LiveValue {
live_ref: LiveRef::Phi(*id),
reg_ref: reg,
});
}
}
assert!(srcs_killed.is_empty());
self.ra.free_killed(dsts_killed);
None
}
Op::Break(op) => {
for src in op.srcs_as_mut_slice() {
if let Some(ssa) = src_ssa_ref(src) {
assert!(ssa.comps() == 1);
let reg = self.get_scalar(ssa[0]);
src_set_reg(src, reg);
}
}
self.ra.free_killed(srcs_killed);
if let Dst::SSA(ssa) = &op.bar_out {
let reg = *op.bar_in.src_ref.as_reg().unwrap();
self.ra.assign_reg(ssa[0], reg);
op.bar_out = reg.into();
}
self.ra.free_killed(dsts_killed);
Some(instr)
}
Op::BSSy(op) => {
for src in op.srcs_as_mut_slice() {
if let Some(ssa) = src_ssa_ref(src) {
assert!(ssa.comps() == 1);
let reg = self.get_scalar(ssa[0]);
src_set_reg(src, reg);
}
}
self.ra.free_killed(srcs_killed);
if let Dst::SSA(ssa) = &op.bar_out {
let reg = *op.bar_in.src_ref.as_reg().unwrap();
self.ra.assign_reg(ssa[0], reg);
op.bar_out = reg.into();
}
self.ra.free_killed(dsts_killed);
Some(instr)
}
Op::Copy(copy) => {
if let Some(ssa) = src_ssa_ref(&copy.src) {
// This may be a Cbuf::BindlessSSA source so we need to
// support vectors because cbuf handles are vec2s. However,
// since we only have a single scalar destination, we can
// just allocate and free killed up-front.
let ra = &mut self.ra[ssa.file().unwrap()];
let mut vra = VecRegAllocator::new(ra);
let reg = vra.collect_vector(ssa);
vra.free_killed(srcs_killed);
vra.finish(pcopy);
src_set_reg(&mut copy.src, reg);
}
let mut del_copy = false;
if let Dst::SSA(dst_vec) = &mut copy.dst {
debug_assert!(dst_vec.comps() == 1);
let dst_ssa = &dst_vec[0];
if self.try_coalesce(*dst_ssa, &copy.src) {
del_copy = true;
} else {
copy.dst = self
.alloc_scalar(ip, sum, phi_webs, *dst_ssa)
.into();
}
}
self.ra.free_killed(dsts_killed);
if del_copy {
None
} else {
Some(instr)
}
}
Op::Pin(OpPin { src, dst }) | Op::Unpin(OpUnpin { src, dst }) => {
assert!(instr.pred.is_true());
// These basically act as a vector version of OpCopy except that
// they only work on SSA values and we pin the destination if
// it's OpPin.
let src_vec = src.as_ssa().unwrap();
let dst_vec = dst.as_ssa().unwrap();
assert!(src_vec.comps() == dst_vec.comps());
if srcs_killed.len() == src_vec.comps().into()
&& src_vec.file() == dst_vec.file()
{
let ra = &mut self.ra[src_vec.file().unwrap()];
let mut vra = VecRegAllocator::new(ra);
let reg = vra.collect_vector(src_vec);
vra.finish(pcopy);
for c in 0..src_vec.comps() {
let c_reg = ra.free_ssa(src_vec[usize::from(c)]);
debug_assert!(c_reg == reg.comp(c).base_idx());
ra.assign_reg(dst_vec[usize::from(c)], c_reg);
}
if matches!(&instr.op, Op::Pin(_)) {
self.pin_vector(reg);
}
self.ra.free_killed(dsts_killed);
None
} else {
// Otherwise, turn into a parallel copy
//
// We can always allocate the destination first in this
// case.
assert!(dst_vec.comps() > 1 || srcs_killed.is_empty());
let dst_ra = &mut self.ra[dst_vec.file().unwrap()];
let mut vra = VecRegAllocator::new(dst_ra);
let dst_reg = vra.alloc_vector(*dst_vec);
vra.finish(pcopy);
let mut pin_copy = OpParCopy::new();
for c in 0..dst_reg.comps() {
let src_reg = self.get_scalar(src_vec[usize::from(c)]);
pin_copy.push(dst_reg.comp(c).into(), src_reg.into());
}
if matches!(&instr.op, Op::Pin(_)) {
self.pin_vector(dst_reg);
}
self.ra.free_killed(srcs_killed);
self.ra.free_killed(dsts_killed);
Some(Instr::new_boxed(pin_copy))
}
}
Op::ParCopy(pcopy) => {
for (_, src) in pcopy.dsts_srcs.iter_mut() {
if let Some(src_vec) = src_ssa_ref(src) {
debug_assert!(src_vec.comps() == 1);
let reg = self.get_scalar(src_vec[0]).into();
src_set_reg(src, reg);
}
}
self.ra.free_killed(srcs_killed);
// Try to coalesce destinations into sources, if possible
pcopy.dsts_srcs.retain(|dst, src| match dst {
Dst::None => false,
Dst::SSA(dst_vec) => {
debug_assert!(dst_vec.comps() == 1);
!self.try_coalesce(dst_vec[0], src)
}
Dst::Reg(_) => true,
});
for (dst, _) in pcopy.dsts_srcs.iter_mut() {
if let Dst::SSA(dst_vec) = dst {
debug_assert!(dst_vec.comps() == 1);
*dst = self
.alloc_scalar(ip, sum, phi_webs, dst_vec[0])
.into();
}
}
self.ra.free_killed(dsts_killed);
pcopy.tmp = self.pcopy_tmp();
if pcopy.is_empty() {
None
} else {
Some(instr)
}
}
Op::RegOut(out) => {
for src in out.srcs.iter_mut() {
if let Some(src_vec) = src_ssa_ref(src) {
debug_assert!(src_vec.comps() == 1);
let reg = self.get_scalar(src_vec[0]).into();
src_set_reg(src, reg);
}
}
self.ra.free_killed(srcs_killed);
assert!(dsts_killed.is_empty());
// This should be the last instruction and its sources should
// be the last free GPRs.
debug_assert!(self.ra[RegFile::GPR].num_regs_used() == 0);
for (i, src) in out.srcs.iter().enumerate() {
let reg = u32::try_from(i).unwrap();
let dst = RegRef::new(RegFile::GPR, reg, 1);
pcopy.push(dst.into(), *src);
}
None
}
_ => {
for file in self.ra.values_mut() {
instr_assign_regs_file(
&mut instr,
ip,
sum,
phi_webs,
srcs_killed,
pcopy,
file,
);
}
self.ra.free_killed(dsts_killed);
Some(instr)
}
}
}
fn first_pass<BL: BlockLiveness>(
&mut self,
b: &mut BasicBlock,
bl: &BL,
pred_ra: Option<&PerRegFile<RegAllocator>>,
phi_webs: &mut PhiWebs,
) {
// Populate live in from the register file we're handed. We'll add more
// live in when we process the OpPhiDst, if any.
if let Some(pred_ra) = pred_ra {
for (raf, pred_raf) in self.ra.values_mut().zip(pred_ra.values()) {
for (ssa, reg) in &pred_raf.ssa_reg {
if bl.is_live_in(ssa) {
raf.assign_reg(*ssa, *reg);
if pred_raf.reg_is_pinned(*reg) {
raf.pin_reg(*reg);
}
self.live_in.push(LiveValue {
live_ref: LiveRef::SSA(*ssa),
reg_ref: RegRef::new(raf.file(), *reg, 1),
});
}
}
}
}
let sum = SSAUseMap::for_block(b);
let mut instrs = Vec::new();
let mut srcs_killed = KillSet::new();
let mut dsts_killed = KillSet::new();
for (ip, instr) in b.instrs.drain(..).enumerate() {
// Build up the kill set
srcs_killed.clear();
if let PredRef::SSA(ssa) = &instr.pred.pred_ref {
if !bl.is_live_after_ip(ssa, ip) {
srcs_killed.insert(*ssa);
}
}
for src in instr.srcs() {
for ssa in src.iter_ssa() {
if !bl.is_live_after_ip(ssa, ip) {
srcs_killed.insert(*ssa);
}
}
}
dsts_killed.clear();
for dst in instr.dsts() {
if let Dst::SSA(vec) = dst {
for ssa in vec.iter() {
if !bl.is_live_after_ip(ssa, ip) {
dsts_killed.insert(*ssa);
}
}
}
}
let mut pcopy = OpParCopy::new();
pcopy.tmp = self.pcopy_tmp();
let instr = self.assign_regs_instr(
instr,
ip,
&sum,
phi_webs,
&srcs_killed,
&dsts_killed,
&mut pcopy,
);
if !pcopy.is_empty() {
if DEBUG.annotate() {
instrs.push(Instr::new_boxed(OpAnnotate {
annotation: "generated by assign_regs".into(),
}));
}
if !b.uniform {
for dst in pcopy.dsts_as_slice() {
if let Dst::Reg(reg) = dst {
assert!(!reg.is_uniform());
}
}
}
instrs.push(Instr::new_boxed(pcopy));
}
if let Some(instr) = instr {
instrs.push(instr);
}
}
// Update phi_webs with the registers assigned in this block
for ra in self.ra.values() {
for (ssa, reg) in &ra.ssa_reg {
phi_webs.set(*ssa, *reg);
}
}
// Sort live-in to maintain determinism
self.live_in.sort();
b.instrs = instrs;
}
fn second_pass(&self, target: &AssignRegsBlock, b: &mut BasicBlock) {
let mut pcopy = OpParCopy::new();
pcopy.tmp = self.pcopy_tmp();
for lv in &target.live_in {
let src = match lv.live_ref {
LiveRef::SSA(ssa) => SrcRef::from(self.get_scalar(ssa)),
LiveRef::Phi(phi) => *self.phi_out.get(&phi).unwrap(),
};
let dst = lv.reg_ref;
if let SrcRef::Reg(src_reg) = src {
if dst == src_reg {
continue;
}
}
pcopy.push(dst.into(), src.into());
}
if !pcopy.is_empty() {
let ann = OpAnnotate {
annotation: "generated by assign_regs".into(),
};
if b.branch().is_some() {
b.instrs.insert(b.instrs.len() - 1, Instr::new_boxed(ann));
b.instrs.insert(b.instrs.len() - 1, Instr::new_boxed(pcopy));
} else {
b.instrs.push(Instr::new_boxed(ann));
b.instrs.push(Instr::new_boxed(pcopy));
}
}
}
}
impl Shader<'_> {
pub fn assign_regs(&mut self) {
assert!(self.functions.len() == 1);
let f = &mut self.functions[0];
// Convert to CSSA before we spill or assign registers
f.to_cssa();
let mut live = SimpleLiveness::for_function(f);
let mut max_live = live.calc_max_live(f);
// We want at least one temporary GPR reserved for parallel copies.
let mut tmp_gprs = 1_u8;
let spill_files =
[RegFile::UPred, RegFile::Pred, RegFile::UGPR, RegFile::Bar];
for file in spill_files {
let num_regs = self.sm.num_regs(file);
if max_live[file] > num_regs {
f.spill_values(file, num_regs, &mut self.info);
// Re-calculate liveness after we spill
live = SimpleLiveness::for_function(f);
max_live = live.calc_max_live(f);
if file == RegFile::Bar {
tmp_gprs = max(tmp_gprs, 2);
}
}
}
// An instruction can have at most 4 vector sources/destinations. In
// order to ensure we always succeed at allocation, regardless of
// arbitrary choices, we need at least 16 GPRs.
let mut gpr_limit = max(max_live[RegFile::GPR], 16);
let mut total_gprs = gpr_limit + u32::from(tmp_gprs);
let mut max_gprs = self.sm.num_regs(RegFile::GPR);
if DEBUG.spill() {
// To test spilling, reduce the number of registers to the minimum
// practical for RA. We need at least 16 registers to satisfy RA
// constraints for texture ops.
max_gprs = 16;
// OpRegOut can use arbitrarily many GPRs
for b in &f.blocks {
for instr in b.instrs.iter().rev() {
match &instr.op {
Op::Exit(_) => (),
Op::RegOut(op) => {
let out_gprs =
u32::try_from(op.srcs.len()).unwrap();
max_gprs = max(max_gprs, out_gprs);
}
_ => break,
}
}
}
// and another 2 for parallel copy
max_gprs += 2;
}
if let ShaderStageInfo::Compute(cs_info) = &self.info.stage {
max_gprs = min(
max_gprs,
gpr_limit_from_local_size(&cs_info.local_size)
- self.sm.hw_reserved_gprs(),
);
}
if total_gprs > max_gprs {
// If we're spilling GPRs, we need to reserve 2 GPRs for OpParCopy
// lowering because it needs to be able lower Mem copies which
// require a temporary
tmp_gprs = max(tmp_gprs, 2);
total_gprs = max_gprs;
gpr_limit = total_gprs - u32::from(tmp_gprs);
f.spill_values(RegFile::GPR, gpr_limit, &mut self.info);
// Re-calculate liveness one last time
live = SimpleLiveness::for_function(f);
}
self.info.num_gprs = total_gprs.try_into().unwrap();
let limit = PerRegFile::new_with(|file| {
if file == RegFile::GPR {
gpr_limit
} else {
self.sm.num_regs(file)
}
});
let mut phi_webs = PhiWebs::new(f);
let mut blocks: Vec<AssignRegsBlock> = Vec::new();
for b_idx in 0..f.blocks.len() {
let pred = f.blocks.pred_indices(b_idx);
let pred_ra = if pred.is_empty() {
None
} else {
// Start with the previous block's.
Some(&blocks[pred[0]].ra)
};
let bl = live.block_live(b_idx);
let mut arb = AssignRegsBlock::new(&limit, tmp_gprs);
arb.first_pass(&mut f.blocks[b_idx], bl, pred_ra, &mut phi_webs);
assert!(blocks.len() == b_idx);
blocks.push(arb);
}
for b_idx in 0..f.blocks.len() {
let arb = &blocks[b_idx];
for sb_idx in f.blocks.succ_indices(b_idx).to_vec() {
arb.second_pass(&blocks[sb_idx], &mut f.blocks[b_idx]);
}
}
}
}