From c1ecf08e0d0090f25e9b7f0fea99d2cc95c7b91c Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Tue, 17 Dec 2024 15:11:28 +0100 Subject: [PATCH] nak: Fix 8-bit selection for vectors This fix at least permutation issues on vec16 of 8-bits values for cooperative matrix. Fixes: 9e84e9e44b1 ("nak: Add base support for 8 and 16-bit types") Suggested-by: M Henning Signed-off-by: Mary Guillemard Reviewed-by: M Henning (cherry picked from commit 979dfaf0bb34eb566acabbb04b9d84eb31039559) Part-of: --- .pick_status.json | 2 +- src/nouveau/compiler/nak/from_nir.rs | 25 +++++++++++++++++++++---- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index fcaca36af54..4a3a25de075 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -2144,7 +2144,7 @@ "description": "nak: Fix 8-bit selection for vectors", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "9e84e9e44b111a6afe8a346fb0bb74f9c597af61", "notes": null diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 775d782c6af..1c6bf049d45 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -543,24 +543,41 @@ impl<'a> ShaderFromNir<'a> { } 8 => { for dc in 0..bits.div_ceil(32) { - let mut psrc = [Src::new_zero(); 4]; + let mut psrc = [None; 4]; let mut psel = [0_u8; 4]; for b in 0..4 { let sc = dc * 4 + b; if sc < srcs.len() { let (ssa, byte) = srcs[sc]; + // Deduplicate psrc entries for i in 0..4_u8 { let psrc_i = &mut psrc[usize::from(i)]; - if *psrc_i == Src::new_zero() { - *psrc_i = ssa.into(); - } else if *psrc_i != Src::from(ssa) { + if psrc_i.is_none() { + *psrc_i = Some(ssa.into()); + } else if *psrc_i + != Some(Src::from(ssa)) + { continue; } psel[b] = i * 4 + byte; + break; } } } + + let psrc = { + let mut res = [Src::new_zero(); 4]; + + for (idx, src) in psrc.iter().enumerate() { + if let Some(src) = src { + res[idx] = *src; + } + } + + res + }; + comps.push(b.prmt4(psrc, psel)[0]); } }