nak: Add ShaderModel::hw_reserved_gprs()

Cc: mesa-stable
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
(cherry picked from commit 914c722eb0)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33113>
This commit is contained in:
Mel Henning
2025-01-08 17:44:52 -05:00
committed by Dylan Baker
parent ea2f4877ab
commit d1c63709cf
5 changed files with 18 additions and 11 deletions

View File

@@ -1684,7 +1684,7 @@
"description": "nak: Add ShaderModel::hw_reserved_gprs()",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View File

@@ -231,10 +231,10 @@ impl ShaderBin {
ShaderStageInfo::Tessellation(_) => MESA_SHADER_TESS_EVAL,
},
sm: sm.sm(),
num_gprs: if sm.sm() >= 70 {
max(4, info.num_gprs + 2)
} else {
max(4, info.num_gprs)
num_gprs: {
max(4, info.num_gprs as u32 + sm.hw_reserved_gprs())
.try_into()
.unwrap()
},
num_control_barriers: info.num_control_barriers,
_pad0: Default::default(),

View File

@@ -7250,6 +7250,7 @@ pub struct ShaderInfo {
pub trait ShaderModel {
fn sm(&self) -> u8;
fn num_regs(&self, file: RegFile) -> u32;
fn hw_reserved_gprs(&self) -> u32;
fn crs_size(&self, max_crs_depth: u32) -> u32;
fn op_can_be_uniform(&self, op: &Op) -> bool;

View File

@@ -38,6 +38,10 @@ impl ShaderModel for ShaderModel50 {
}
}
fn hw_reserved_gprs(&self) -> u32 {
0
}
fn crs_size(&self, max_crs_depth: u32) -> u32 {
if max_crs_depth <= 16 {
0

View File

@@ -33,12 +33,7 @@ impl ShaderModel for ShaderModel70 {
fn num_regs(&self, file: RegFile) -> u32 {
match file {
RegFile::GPR => {
// Volta+ has a maximum of 253 registers. Presumably
// because two registers get burned for UGPRs? Unclear
// on why we need it on Volta though.
253
}
RegFile::GPR => 255 - self.hw_reserved_gprs(),
RegFile::UGPR => {
if self.has_uniform_alu() {
63
@@ -60,6 +55,13 @@ impl ShaderModel for ShaderModel70 {
}
}
fn hw_reserved_gprs(&self) -> u32 {
// On Volta+, 2 GPRs get burned for the program counter - see the
// footnote on table 2 of the volta whitepaper
// https://images.nvidia.com/content/volta-architecture/pdf/volta-architecture-whitepaper.pdf
2
}
fn crs_size(&self, max_crs_depth: u32) -> u32 {
assert!(max_crs_depth == 0);
0