nak: Add Turing latency information

This adds the latency information provided by NVIDIA.  This is copied
from excel spreadsheets provided to Red Hat.

This fully passes CTS on Turing TU104 with no regressions.

I'm sure future use of some instructions like IMAD may require some
changes to this, but it should be functionally complete.

Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33573>
This commit is contained in:
Dave Airlie
2025-02-17 07:21:41 +10:00
committed by Marge Bot
parent 3868855144
commit 6b8a4e6bb7
4 changed files with 1410 additions and 19 deletions

View File

@@ -801,6 +801,16 @@ impl SrcRef {
}
}
pub fn is_bindless_cbuf(&self) -> bool {
match self {
SrcRef::CBuf(cbuf) => match cbuf.buf {
CBuf::BindlessSSA(_) | CBuf::BindlessUGPR(_) => true,
_ => false,
},
_ => false,
}
}
pub fn is_predicate(&self) -> bool {
match self {
SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => false,
@@ -1288,6 +1298,10 @@ impl Src {
}
}
pub fn is_bindless_cbuf(&self) -> bool {
self.src_ref.is_bindless_cbuf()
}
pub fn is_predicate(&self) -> bool {
self.src_ref.is_predicate()
}

View File

@@ -29,6 +29,7 @@ mod repair_ssa;
mod sm50;
mod sm70;
mod sm70_encode;
mod sm75_instr_latencies;
mod sph;
mod spill_values;
mod to_cssa;

View File

@@ -4,6 +4,7 @@
use crate::ir::*;
use crate::legalize::LegalizeBuilder;
use crate::sm70_encode::*;
use crate::sm75_instr_latencies::SM75Latency;
pub struct ShaderModel70 {
sm: u8,
@@ -144,6 +145,18 @@ impl ShaderModel for ShaderModel70 {
}
}
fn op_needs_scoreboard(&self, op: &Op) -> bool {
if op.no_scoreboard() {
return false;
}
if self.is_turing() {
SM75Latency::needs_scoreboards(op)
} else {
!op.has_fixed_latency(self.sm())
}
}
fn exec_latency(&self, op: &Op) -> u32 {
match op {
Op::Bar(_) | Op::MemBar(_) => {
@@ -166,39 +179,53 @@ impl ShaderModel for ShaderModel70 {
&self,
write: &Op,
dst_idx: usize,
_read: &Op,
_src_idx: usize,
read: &Op,
src_idx: usize,
) -> u32 {
self.instr_latency(write, dst_idx)
if self.is_turing() {
SM75Latency::raw(write, dst_idx, Some(read), src_idx)
} else {
self.instr_latency(write, dst_idx)
}
}
fn war_latency(
&self,
_read: &Op,
_src_idx: usize,
_write: &Op,
_dst_idx: usize,
read: &Op,
src_idx: usize,
write: &Op,
dst_idx: usize,
) -> u32 {
// We assume the source gets read in the first 4 cycles. We don't know
// how quickly the write will happen. This is all a guess.
4
if self.is_turing() {
SM75Latency::war(read, src_idx, write, dst_idx)
} else {
// We assume the source gets read in the first 4 cycles. We don't
// know how quickly the write will happen. This is all a guess.
4
}
}
fn waw_latency(
&self,
a: &Op,
a_dst_idx: usize,
_a_has_pred: bool,
_b: &Op,
_b_dst_idx: usize,
a_has_pred: bool,
b: &Op,
b_dst_idx: usize,
) -> u32 {
// We know our latencies are wrong so assume the wrote could happen
// anywhere between 0 and instr_latency(a) cycles
self.instr_latency(a, a_dst_idx)
if self.is_turing() {
SM75Latency::waw(a, a_dst_idx, b, b_dst_idx, a_has_pred)
} else {
// We know our latencies are wrong so assume the wrote could happen
// anywhere between 0 and instr_latency(a) cycles
self.instr_latency(a, a_dst_idx)
}
}
fn paw_latency(&self, write: &Op, _dst_idx: usize) -> u32 {
if self.is_volta() {
fn paw_latency(&self, write: &Op, dst_idx: usize) -> u32 {
if self.is_turing() {
SM75Latency::raw(write, dst_idx, None, 0)
} else if self.is_volta() {
match write {
Op::DSetP(_) | Op::HSetP2(_) => 15,
_ => 13,
@@ -209,7 +236,11 @@ impl ShaderModel for ShaderModel70 {
}
fn worst_latency(&self, write: &Op, dst_idx: usize) -> u32 {
self.instr_latency(write, dst_idx)
if self.is_turing() {
SM75Latency::raw(write, dst_idx, None, 0)
} else {
self.instr_latency(write, dst_idx)
}
}
fn legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op) {

File diff suppressed because it is too large Load Diff