From 2f6919e6c2d898ff940e791f558d0d919fa47747 Mon Sep 17 00:00:00 2001 From: Sushma Venkatesh Reddy Date: Wed, 17 Jul 2024 18:55:46 -0700 Subject: [PATCH] intel/clflush: Utilize clflushopt in intel_invalidate_range On MTL ChromeOS boards, during AI based video conference, we were observing a lot of overhead from invalidations. Upon debug, it was found that we were using clflush in this function and that isn't efficient. With this change, while executing compute workloads like zoo models, we are getting ~25% performance improvements in a best case scenario. Rework: * Jordan: Call intel_clflushopt_range() rather than __builtin_ia32_clflushopt() because intel_mem.c is not compiled with -mclflushopt. Backport-to: 24.1 24.2 Signed-off-by: Sushma Venkatesh Reddy Reviewed-by: Matt Turner Reviewed-by: Jordan Justen Part-of: --- src/intel/common/intel_mem.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/intel/common/intel_mem.c b/src/intel/common/intel_mem.c index c2e09a44371..6ebd48fd46c 100644 --- a/src/intel/common/intel_mem.c +++ b/src/intel/common/intel_mem.c @@ -78,7 +78,7 @@ intel_invalidate_range(void *start, size_t size) if (size == 0) return; - intel_clflush_range(start, size); + intel_flush_range_no_fence(start, size); /* Modern Atom CPUs (Baytrail+) have issues with clflush serialization, * where mfence is not a sufficient synchronization barrier. We must @@ -90,6 +90,15 @@ intel_invalidate_range(void *start, size_t size) * ("drm: Restore double clflush on the last partial cacheline") * and https://bugs.freedesktop.org/show_bug.cgi?id=92845. */ +#ifdef HAVE___BUILTIN_IA32_CLFLUSHOPT + /* clflushopt doesn't include an mfence like clflush */ + if (util_get_cpu_caps()->has_clflushopt) { + __builtin_ia32_mfence(); + intel_clflushopt_range(start + size - 1, 1); + __builtin_ia32_mfence(); + return; + } +#endif __builtin_ia32_clflush(start + size - 1); __builtin_ia32_mfence(); }