nak: Add gpr_limit_from_local_size
I stumbled on this limit - it turns out that large local_sizes apply an additonal limit on gprs per thread. If we violate this limit, then dmesg just gives us a rather unhelpful message that the channel is killed: nouveau 0000:01:00.0: gsp: rc engn:00000001 chid:64 type:13 scope:1 part:233 nouveau 0000:01:00.0: fifo:c00000:0008:0040:[hw_tests::test_[14761]] errored - disabling channel Cc: mesa-stable Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32952>
This commit is contained in:
@@ -204,7 +204,7 @@ pub extern "C" fn nak_nir_options(
|
||||
|
||||
#[repr(C)]
|
||||
pub struct ShaderBin {
|
||||
bin: nak_shader_bin,
|
||||
pub bin: nak_shader_bin,
|
||||
code: Vec<u32>,
|
||||
asm: CString,
|
||||
}
|
||||
|
@@ -7,7 +7,7 @@ use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness};
|
||||
use crate::union_find::UnionFind;
|
||||
|
||||
use compiler::bitset::BitSet;
|
||||
use std::cmp::{max, Ordering};
|
||||
use std::cmp::{max, min, Ordering};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
struct KillSet {
|
||||
@@ -1437,13 +1437,22 @@ impl Shader<'_> {
|
||||
let mut gpr_limit = max(max_live[RegFile::GPR], 16);
|
||||
let mut total_gprs = gpr_limit + u32::from(tmp_gprs);
|
||||
|
||||
let max_gprs = if DEBUG.spill() {
|
||||
let mut max_gprs = if DEBUG.spill() {
|
||||
// We need at least 16 registers to satisfy RA constraints for
|
||||
// texture ops and another 2 for parallel copy lowering
|
||||
18
|
||||
} else {
|
||||
self.sm.num_regs(RegFile::GPR)
|
||||
};
|
||||
|
||||
if let ShaderStageInfo::Compute(cs_info) = &self.info.stage {
|
||||
max_gprs = min(
|
||||
max_gprs,
|
||||
gpr_limit_from_local_size(&cs_info.local_size)
|
||||
- self.sm.hw_reserved_gprs(),
|
||||
);
|
||||
}
|
||||
|
||||
if total_gprs > max_gprs {
|
||||
// If we're spilling GPRs, we need to reserve 2 GPRs for OpParCopy
|
||||
// lowering because it needs to be able lower Mem copies which
|
||||
|
@@ -8,6 +8,7 @@ use crate::sm50::ShaderModel50;
|
||||
use crate::sm70::ShaderModel70;
|
||||
|
||||
use acorn::Acorn;
|
||||
use compiler::bindings::MESA_SHADER_COMPUTE;
|
||||
use compiler::cfg::CFGBuilder;
|
||||
use nak_bindings::*;
|
||||
use std::mem::offset_of;
|
||||
@@ -1167,3 +1168,25 @@ fn test_f2fp_pack_ab() {
|
||||
// { 1.455fp16, 0.0fp16 }
|
||||
assert_eq!(data[2][3], 0x3dd24000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_gpr_limit_from_local_size() {
|
||||
let run = RunSingleton::get();
|
||||
let b = TestShaderBuilder::new(run.sm.as_ref());
|
||||
let mut bin = b.compile();
|
||||
|
||||
for local_size in 1..=1024 {
|
||||
let info = &mut bin.bin.info;
|
||||
let cs_info = unsafe {
|
||||
assert_eq!(info.stage, MESA_SHADER_COMPUTE);
|
||||
&mut info.__bindgen_anon_1.cs
|
||||
};
|
||||
cs_info.local_size = [local_size, 1, 1];
|
||||
let num_gprs = gpr_limit_from_local_size(&cs_info.local_size);
|
||||
info.num_gprs = num_gprs.try_into().unwrap();
|
||||
|
||||
run.run.run::<u8>(&bin, &mut [0; 4096]).unwrap_or_else(|_| {
|
||||
panic!("Failed with local_size {local_size}, num_gprs {num_gprs}")
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@@ -7260,6 +7260,25 @@ pub trait ShaderModel {
|
||||
fn encode_shader(&self, s: &Shader<'_>) -> Vec<u32>;
|
||||
}
|
||||
|
||||
/// For compute shaders, large values of local_size impose an additional limit
|
||||
/// on the number of GPRs per thread
|
||||
pub fn gpr_limit_from_local_size(local_size: &[u16; 3]) -> u32 {
|
||||
fn prev_multiple_of(x: u32, y: u32) -> u32 {
|
||||
(x / y) * y
|
||||
}
|
||||
|
||||
let local_size = local_size[0] * local_size[1] * local_size[2];
|
||||
// Warps are allocated in multiples of 4
|
||||
// Multiply that by 32 threads/warp
|
||||
let local_size = local_size.next_multiple_of(4 * 32) as u32;
|
||||
let total_regs: u32 = 65536;
|
||||
|
||||
let out = total_regs / local_size;
|
||||
// GPRs are allocated in multiples of 8
|
||||
let out = prev_multiple_of(out, 8);
|
||||
min(out, 255)
|
||||
}
|
||||
|
||||
pub struct Shader<'a> {
|
||||
pub sm: &'a dyn ShaderModel,
|
||||
pub info: ShaderInfo,
|
||||
|
Reference in New Issue
Block a user