new module 'jit/cache'

Bruno Haible Sun, 12 Nov 2023 15:17:57 -0800

Marc Nieper-Wißkirchen wrote:
> I can provide such a thing easily. In fact, I have it already sitting
> > around on my disk since 2021 :-)
> >
> 
> This is great!


I'm adding the new module, below.

> If we want to support a wider variety of systems, the code of Chez Scheme
> may be helpful.  The `S_doflush' function ([1]) does the cache clearing.
> 
> [1]
> https://github.com/search?q=repo%3Acisco%2FChezScheme%20S_doflush&type=code

Thanks. It appears to be more optimized than what I have on Linux/arm64,
but optimizations are dangerous, you know...


2023-11-12  Bruno Haible  <br...@clisp.org>

        jit/cache: New module.
        * lib/jit/cache.h: New file.
        * m4/valgrind-helper.m4: New file.
        * modules/jit/cache: New file.

>From f48984f04a9649727f1d0b0f4fd25e88924f9c42 Mon Sep 17 00:00:00 2001
From: Bruno Haible <br...@clisp.org>
Date: Sun, 12 Nov 2023 18:22:50 +0100
Subject: [PATCH] jit/cache: New module.

* lib/jit/cache.h: New file.
* m4/valgrind-helper.m4: New file.
* modules/jit/cache: New file.
---
 ChangeLog             |   7 +++
 lib/jit/cache.h       | 140 ++++++++++++++++++++++++++++++++++++++++++
 m4/valgrind-helper.m4 |  22 +++++++
 modules/jit/cache     |  24 ++++++++
 4 files changed, 193 insertions(+)
 create mode 100644 lib/jit/cache.h
 create mode 100644 m4/valgrind-helper.m4
 create mode 100644 modules/jit/cache

diff --git a/ChangeLog b/ChangeLog
index c2a59f8f88..c7fa04a173 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2023-11-12  Bruno Haible  <br...@clisp.org>
+
+	jit/cache: New module.
+	* lib/jit/cache.h: New file.
+	* m4/valgrind-helper.m4: New file.
+	* modules/jit/cache: New file.
+
 2023-11-11  Bruno Haible  <br...@clisp.org>
 
 	ssfmalloc: Take advantage of CHERI bounds-checking.
diff --git a/lib/jit/cache.h b/lib/jit/cache.h
new file mode 100644
index 0000000000..ce470399e5
--- /dev/null
+++ b/lib/jit/cache.h
@@ -0,0 +1,140 @@
+/* JIT compiler - Flushing the instruction cache.
+
+   Copyright (C) 1995-2023 Free Software Foundation, Inc.
+
+   This file is free software: you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   This file is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Bruno Haible <br...@clisp.org>, 2020.  */
+
+#if ENABLE_VALGRIND_SUPPORT
+# include <valgrind/valgrind.h>
+#endif
+#if defined _WIN32 && !defined __CYGWIN__
+# define WIN32_LEAN_AND_MEAN
+# include <windows.h>
+#endif
+#if defined __APPLE__ && defined __MACH__
+# include <libkern/OSCacheControl.h>
+#endif
+#if defined _AIX
+# include <sys/cache.h>
+#endif
+#if defined __sgi
+# include <sys/cachectl.h>
+#endif
+
+/* Clears the instruction cache for addresses
+   start <= address < end.
+   We need this because some CPUs have separate data cache and instruction
+   cache. The freshly built trampoline is visible to the data cache, but
+   maybe not to the instruction cache. This is hairy.  */
+static inline void
+clear_cache (void *start, void *end)
+{
+#if ENABLE_VALGRIND_SUPPORT
+  /* Documentation:
+     <https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq>  */
+  VALGRIND_DISCARD_TRANSLATIONS (start, (char *) end - (char *) start);
+#endif
+#if (defined __x86_64__ || defined _M_X64) || (defined __i386 || defined _M_IX86)
+  /* On this architecture, data cache and instruction cache are not separate.
+     Therefore, nothing to do.
+     For details, see
+     <https://stackoverflow.com/questions/10989403/how-is-x86-instruction-cache-synchronized>  */
+
+/* Use the operating system provided function, when available.  */
+#elif defined _WIN32 && !defined __CYGWIN__
+  /* Native Windows.
+     FlushInstructionCache
+     <https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-flushinstructioncache>  */
+  HANDLE process = GetCurrentProcess ();
+  while (!FlushInstructionCache (process, start, (char *) end - (char *) start))
+    ;
+#elif defined __APPLE__ && defined __MACH__
+  /* macOS  */
+  sys_icache_invalidate (start, (char *) end - (char *) start);
+#elif defined _AIX
+  /* AIX.  */
+  _sync_cache_range (start, (char *) end - (char *) start);
+#elif defined __sgi
+  /* IRIX.  */
+  cacheflush (start, (char *) end - (char *) start, ICACHE);
+#elif defined __sun
+  /* Solaris.  */
+  extern void sync_instruction_memory (char *, size_t);
+  sync_instruction_memory (start, (char *) end - (char *) start);
+
+/* No operating system provided function. Dispatch according to the CPU.  */
+#elif (defined __GNUC__ || defined __clang__) && defined __powerpc__
+  /* XXX Is this enough, or do we also need the 'clf' instruction?  */
+  uintptr_t addr = (uintptr_t) start & ~(intptr_t)3;
+  uintptr_t end_addr = (uintptr_t) end;
+  do
+    {
+      asm volatile ("icbi 0,%0; dcbf 0,%0" : : "r" (addr));
+      addr += 4;
+    }
+  while (addr < end_addr);
+  asm volatile ("sync; isync");
+#elif (defined __GNUC__ || defined __clang__) && defined __sparc
+  /* Use inline assembly.  */
+  /* The 'flush' instruction was earlier called 'iflush'.  */
+  uintptr_t addr = (uintptr_t) start & ~(intptr_t)7;
+  uintptr_t end_addr = (uintptr_t) end;
+  do
+    {
+      asm volatile ("flush %0+0" : : "r" (addr));
+      addr += 8;
+    }
+  while (addr < end_addr);
+#elif (defined __GNUC__ || defined __clang__) && defined __m68k__ && defined __linux__
+  /* Use inline assembly to call the 'cacheflush' system call.
+     sys_cacheflush (addr, scope, cache, len)
+                      d1     d2     d3    d4
+   */
+  register uintptr_t addr __asm__ ("%d1") = (uintptr_t) start;
+  register uintptr_t len __asm__ ("%d4") = (uintptr_t) end - addr;
+  __asm__ __volatile__ (
+           "move%.l %#123,%/d0" /* __NR_cacheflush */
+    "\n\t" "move%.l %#1,%/d2"   /* FLUSH_SCOPE_LINE */
+    "\n\t" "move%.l %#3,%/d3"   /* FLUSH_CACHE_BOTH */
+    "\n\t" "trap %#0"
+    :
+    : "d" (addr), "d" (len)
+    : "%d0", "%d2", "%d3"
+    );
+#elif (__GNUC__ + (__GNUC_MINOR__ >= 3) > 4) \
+      || ((__clang_major__ + (__clang_minor__ >= 4) > 3) \
+          && (defined __aarch64__ /* arm64 */ || defined __arm__))
+  /* GCC >= 4.3 has a GCC built-in.
+     <https://gcc.gnu.org/onlinedocs/gcc-4.3.6/gcc/Other-Builtins.html>
+     But it's sometimes not correctly implemented.
+     clang >= 3.4 has it as well, at least on ARM and ARM64.  */
+  /* On ARM, cache flushing can only be done through a system call.
+     GCC implements it for Linux with EABI, through an "swi 0" with code
+     0xf0002.  For other systems, it may be an "swi 0x9f0002",
+     an "swi 0xf00000", or similar.  */
+  /* On ARM64, cache flushing is done through special instructions,
+     and the length of the cache lines must be determined at runtime.
+     See gcc/libgcc/config/aarch64/sync-cache.c.  */
+  __builtin___clear_cache (start, end);
+#elif HAVE___CLEAR_CACHE
+  /* Older versions of GCC have this libgcc function, but only on some
+     platforms.  */
+  extern void __clear_cache (char *, char *);
+  __clear_cache (start, end);
+#else
+# error "Don't know how to implement clear_cache on this platform."
+#endif
+}
diff --git a/m4/valgrind-helper.m4 b/m4/valgrind-helper.m4
new file mode 100644
index 0000000000..b3d70a7ad9
--- /dev/null
+++ b/m4/valgrind-helper.m4
@@ -0,0 +1,22 @@
+# valgrind-helper.m4 serial 1
+dnl Copyright (C) 2023 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+# Enables support for running the generated binaries under valgrind if
+# - the valgrind header files are installed, and
+# - the user desires so.
+AC_DEFUN_ONCE([gl_VALGRIND_HELPER],
+[
+  AC_ARG_WITH([valgrind],
+    [AS_HELP_STRING([[--with-valgrind]],
+       [enable support for running the binaries under valgrind])],
+    [if test "$withval" != no; then
+       support_valgrind=1
+     else
+       support_valgrind=0
+     fi
+    ])
+  AC_DEFINE_UNQUOTED([ENABLE_VALGRIND_SUPPORT], [$support_valgrind])
+])
diff --git a/modules/jit/cache b/modules/jit/cache
new file mode 100644
index 0000000000..f7af2be314
--- /dev/null
+++ b/modules/jit/cache
@@ -0,0 +1,24 @@
+Description:
+JIT compiler - Flushing the instruction cache.
+
+Files:
+lib/jit/cache.h
+m4/valgrind-helper.m4
+
+Depends-on:
+
+configure.ac:
+gl_VALGRIND_HELPER
+AC_REQUIRE([AC_C_INLINE])
+AC_CHECK_FUNCS([__clear_cache])
+
+Makefile.am:
+
+Include:
+"jit/cache.h"
+
+License:
+LGPLv2+
+
+Maintainer:
+Bruno Haible
-- 
2.34.1

new module 'jit/cache'

Reply via email to