nak: Add Ampere and Ada latency information
This adds the latency information provided by NVIDIA. This is copied from excel spreadsheets provided to Red Hat. Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33573>
This commit is contained in:
@@ -30,6 +30,7 @@ mod sm50;
|
|||||||
mod sm70;
|
mod sm70;
|
||||||
mod sm70_encode;
|
mod sm70_encode;
|
||||||
mod sm75_instr_latencies;
|
mod sm75_instr_latencies;
|
||||||
|
mod sm80_instr_latencies;
|
||||||
mod sph;
|
mod sph;
|
||||||
mod spill_values;
|
mod spill_values;
|
||||||
mod to_cssa;
|
mod to_cssa;
|
||||||
|
@@ -5,6 +5,7 @@ use crate::ir::*;
|
|||||||
use crate::legalize::LegalizeBuilder;
|
use crate::legalize::LegalizeBuilder;
|
||||||
use crate::sm70_encode::*;
|
use crate::sm70_encode::*;
|
||||||
use crate::sm75_instr_latencies::SM75Latency;
|
use crate::sm75_instr_latencies::SM75Latency;
|
||||||
|
use crate::sm80_instr_latencies::SM80Latency;
|
||||||
|
|
||||||
pub struct ShaderModel70 {
|
pub struct ShaderModel70 {
|
||||||
sm: u8,
|
sm: u8,
|
||||||
@@ -150,7 +151,9 @@ impl ShaderModel for ShaderModel70 {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.is_turing() {
|
if self.is_ampere() || self.is_ada() {
|
||||||
|
SM80Latency::needs_scoreboards(op)
|
||||||
|
} else if self.is_turing() {
|
||||||
SM75Latency::needs_scoreboards(op)
|
SM75Latency::needs_scoreboards(op)
|
||||||
} else {
|
} else {
|
||||||
!op.has_fixed_latency(self.sm())
|
!op.has_fixed_latency(self.sm())
|
||||||
@@ -182,7 +185,9 @@ impl ShaderModel for ShaderModel70 {
|
|||||||
read: &Op,
|
read: &Op,
|
||||||
src_idx: usize,
|
src_idx: usize,
|
||||||
) -> u32 {
|
) -> u32 {
|
||||||
if self.is_turing() {
|
if self.is_ampere() || self.is_ada() {
|
||||||
|
SM80Latency::raw(write, dst_idx, Some(read), src_idx)
|
||||||
|
} else if self.is_turing() {
|
||||||
SM75Latency::raw(write, dst_idx, Some(read), src_idx)
|
SM75Latency::raw(write, dst_idx, Some(read), src_idx)
|
||||||
} else {
|
} else {
|
||||||
self.instr_latency(write, dst_idx)
|
self.instr_latency(write, dst_idx)
|
||||||
@@ -196,7 +201,9 @@ impl ShaderModel for ShaderModel70 {
|
|||||||
write: &Op,
|
write: &Op,
|
||||||
dst_idx: usize,
|
dst_idx: usize,
|
||||||
) -> u32 {
|
) -> u32 {
|
||||||
if self.is_turing() {
|
if self.is_ampere() || self.is_ada() {
|
||||||
|
SM80Latency::war(read, src_idx, write, dst_idx)
|
||||||
|
} else if self.is_turing() {
|
||||||
SM75Latency::war(read, src_idx, write, dst_idx)
|
SM75Latency::war(read, src_idx, write, dst_idx)
|
||||||
} else {
|
} else {
|
||||||
// We assume the source gets read in the first 4 cycles. We don't
|
// We assume the source gets read in the first 4 cycles. We don't
|
||||||
@@ -213,7 +220,9 @@ impl ShaderModel for ShaderModel70 {
|
|||||||
b: &Op,
|
b: &Op,
|
||||||
b_dst_idx: usize,
|
b_dst_idx: usize,
|
||||||
) -> u32 {
|
) -> u32 {
|
||||||
if self.is_turing() {
|
if self.is_ampere() || self.is_ada() {
|
||||||
|
SM80Latency::waw(a, a_dst_idx, b, b_dst_idx, a_has_pred)
|
||||||
|
} else if self.is_turing() {
|
||||||
SM75Latency::waw(a, a_dst_idx, b, b_dst_idx, a_has_pred)
|
SM75Latency::waw(a, a_dst_idx, b, b_dst_idx, a_has_pred)
|
||||||
} else {
|
} else {
|
||||||
// We know our latencies are wrong so assume the wrote could happen
|
// We know our latencies are wrong so assume the wrote could happen
|
||||||
@@ -223,7 +232,9 @@ impl ShaderModel for ShaderModel70 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn paw_latency(&self, write: &Op, dst_idx: usize) -> u32 {
|
fn paw_latency(&self, write: &Op, dst_idx: usize) -> u32 {
|
||||||
if self.is_turing() {
|
if self.is_ampere() || self.is_ada() {
|
||||||
|
SM80Latency::raw(write, dst_idx, None, 0)
|
||||||
|
} else if self.is_turing() {
|
||||||
SM75Latency::raw(write, dst_idx, None, 0)
|
SM75Latency::raw(write, dst_idx, None, 0)
|
||||||
} else if self.is_volta() {
|
} else if self.is_volta() {
|
||||||
match write {
|
match write {
|
||||||
@@ -236,7 +247,9 @@ impl ShaderModel for ShaderModel70 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn worst_latency(&self, write: &Op, dst_idx: usize) -> u32 {
|
fn worst_latency(&self, write: &Op, dst_idx: usize) -> u32 {
|
||||||
if self.is_turing() {
|
if self.is_ampere() || self.is_ada() {
|
||||||
|
SM80Latency::raw(write, dst_idx, None, 0)
|
||||||
|
} else if self.is_turing() {
|
||||||
SM75Latency::raw(write, dst_idx, None, 0)
|
SM75Latency::raw(write, dst_idx, None, 0)
|
||||||
} else {
|
} else {
|
||||||
self.instr_latency(write, dst_idx)
|
self.instr_latency(write, dst_idx)
|
||||||
|
1572
src/nouveau/compiler/nak/sm80_instr_latencies.rs
Normal file
1572
src/nouveau/compiler/nak/sm80_instr_latencies.rs
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user