diff --git a/.pick_status.json b/.pick_status.json index b85528f8ed9..d378b4d0e3b 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1674,7 +1674,7 @@ "description": "nak: Add gpr_limit_from_local_size", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs index 761e60aba2c..785dd6bd9d9 100644 --- a/src/nouveau/compiler/nak/api.rs +++ b/src/nouveau/compiler/nak/api.rs @@ -205,7 +205,7 @@ pub extern "C" fn nak_nir_options( #[repr(C)] pub struct ShaderBin { - bin: nak_shader_bin, + pub bin: nak_shader_bin, code: Vec, asm: CString, } diff --git a/src/nouveau/compiler/nak/assign_regs.rs b/src/nouveau/compiler/nak/assign_regs.rs index b15e7d7dcda..149ce0a1778 100644 --- a/src/nouveau/compiler/nak/assign_regs.rs +++ b/src/nouveau/compiler/nak/assign_regs.rs @@ -7,7 +7,7 @@ use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness}; use crate::union_find::UnionFind; use compiler::bitset::BitSet; -use std::cmp::{max, Ordering}; +use std::cmp::{max, min, Ordering}; use std::collections::{HashMap, HashSet}; struct KillSet { @@ -1437,13 +1437,22 @@ impl Shader<'_> { let mut gpr_limit = max(max_live[RegFile::GPR], 16); let mut total_gprs = gpr_limit + u32::from(tmp_gprs); - let max_gprs = if DEBUG.spill() { + let mut max_gprs = if DEBUG.spill() { // We need at least 16 registers to satisfy RA constraints for // texture ops and another 2 for parallel copy lowering 18 } else { self.sm.num_regs(RegFile::GPR) }; + + if let ShaderStageInfo::Compute(cs_info) = &self.info.stage { + max_gprs = min( + max_gprs, + gpr_limit_from_local_size(&cs_info.local_size) + - self.sm.hw_reserved_gprs(), + ); + } + if total_gprs > max_gprs { // If we're spilling GPRs, we need to reserve 2 GPRs for OpParCopy // lowering because it needs to be able lower Mem copies which diff --git a/src/nouveau/compiler/nak/hw_tests.rs b/src/nouveau/compiler/nak/hw_tests.rs index 71d70b61dc5..de670e62d91 100644 --- a/src/nouveau/compiler/nak/hw_tests.rs +++ b/src/nouveau/compiler/nak/hw_tests.rs @@ -8,6 +8,7 @@ use crate::sm50::ShaderModel50; use crate::sm70::ShaderModel70; use acorn::Acorn; +use compiler::bindings::MESA_SHADER_COMPUTE; use compiler::cfg::CFGBuilder; use nak_bindings::*; use std::str::FromStr; @@ -1188,3 +1189,25 @@ fn test_f2fp_pack_ab() { // { 1.455fp16, 0.0fp16 } assert_eq!(data[2][3], 0x3dd24000); } + +#[test] +pub fn test_gpr_limit_from_local_size() { + let run = RunSingleton::get(); + let b = TestShaderBuilder::new(run.sm.as_ref()); + let mut bin = b.compile(); + + for local_size in 1..=1024 { + let info = &mut bin.bin.info; + let cs_info = unsafe { + assert_eq!(info.stage, MESA_SHADER_COMPUTE); + &mut info.__bindgen_anon_1.cs + }; + cs_info.local_size = [local_size, 1, 1]; + let num_gprs = gpr_limit_from_local_size(&cs_info.local_size); + info.num_gprs = num_gprs.try_into().unwrap(); + + run.run.run::(&bin, &mut [0; 4096]).unwrap_or_else(|_| { + panic!("Failed with local_size {local_size}, num_gprs {num_gprs}") + }); + } +} diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index f101ca05df0..f9aafa59ca6 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -7259,6 +7259,25 @@ pub trait ShaderModel { fn encode_shader(&self, s: &Shader<'_>) -> Vec; } +/// For compute shaders, large values of local_size impose an additional limit +/// on the number of GPRs per thread +pub fn gpr_limit_from_local_size(local_size: &[u16; 3]) -> u32 { + fn prev_multiple_of(x: u32, y: u32) -> u32 { + (x / y) * y + } + + let local_size = local_size[0] * local_size[1] * local_size[2]; + // Warps are allocated in multiples of 4 + // Multiply that by 32 threads/warp + let local_size = local_size.next_multiple_of(4 * 32) as u32; + let total_regs: u32 = 65536; + + let out = total_regs / local_size; + // GPRs are allocated in multiples of 8 + let out = prev_multiple_of(out, 8); + min(out, 255) +} + pub struct Shader<'a> { pub sm: &'a dyn ShaderModel, pub info: ShaderInfo,