diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs index a714a576953..f828b3df829 100644 --- a/src/nouveau/compiler/nak/api.rs +++ b/src/nouveau/compiler/nak/api.rs @@ -230,10 +230,10 @@ impl ShaderBin { ShaderStageInfo::Tessellation(_) => MESA_SHADER_TESS_EVAL, }, sm: sm.sm(), - num_gprs: if sm.sm() >= 70 { - max(4, info.num_gprs + 2) - } else { - max(4, info.num_gprs) + num_gprs: { + max(4, info.num_gprs as u32 + sm.hw_reserved_gprs()) + .try_into() + .unwrap() }, num_control_barriers: info.num_control_barriers, _pad0: Default::default(), diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 13c9a616e54..43873e9107f 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -7251,6 +7251,7 @@ pub struct ShaderInfo { pub trait ShaderModel { fn sm(&self) -> u8; fn num_regs(&self, file: RegFile) -> u32; + fn hw_reserved_gprs(&self) -> u32; fn crs_size(&self, max_crs_depth: u32) -> u32; fn op_can_be_uniform(&self, op: &Op) -> bool; diff --git a/src/nouveau/compiler/nak/sm50.rs b/src/nouveau/compiler/nak/sm50.rs index 74a59d72755..684d5f565e6 100644 --- a/src/nouveau/compiler/nak/sm50.rs +++ b/src/nouveau/compiler/nak/sm50.rs @@ -38,6 +38,10 @@ impl ShaderModel for ShaderModel50 { } } + fn hw_reserved_gprs(&self) -> u32 { + 0 + } + fn crs_size(&self, max_crs_depth: u32) -> u32 { if max_crs_depth <= 16 { 0 diff --git a/src/nouveau/compiler/nak/sm70.rs b/src/nouveau/compiler/nak/sm70.rs index cc155485286..e1a8d788ba0 100644 --- a/src/nouveau/compiler/nak/sm70.rs +++ b/src/nouveau/compiler/nak/sm70.rs @@ -33,12 +33,7 @@ impl ShaderModel for ShaderModel70 { fn num_regs(&self, file: RegFile) -> u32 { match file { - RegFile::GPR => { - // Volta+ has a maximum of 253 registers. Presumably - // because two registers get burned for UGPRs? Unclear - // on why we need it on Volta though. - 253 - } + RegFile::GPR => 255 - self.hw_reserved_gprs(), RegFile::UGPR => { if self.has_uniform_alu() { 63 @@ -60,6 +55,13 @@ impl ShaderModel for ShaderModel70 { } } + fn hw_reserved_gprs(&self) -> u32 { + // On Volta+, 2 GPRs get burned for the program counter - see the + // footnote on table 2 of the volta whitepaper + // https://images.nvidia.com/content/volta-architecture/pdf/volta-architecture-whitepaper.pdf + 2 + } + fn crs_size(&self, max_crs_depth: u32) -> u32 { assert!(max_crs_depth == 0); 0