[PATCH] Cygwin: malloc tune-up

2020-08-23 Thread Mark Geisert
1. Replace global malloc lock with finer-grained internal locks.
2. Enable MSPACES, i.e. thread-specific memory pools.

Porting and testing of several dlmalloc-related malloc implementations
(ptmalloc, ptmalloc2, ptmalloc3, nedalloc) shows that they are no faster
than the current dlmalloc used by Cygwin, when the latter is tuned.  This
could be because the others are forks of earlier dlmalloc versions.  For
the record, I could not get jemalloc or tcmalloc running at all due to
chicken-egg issues, nor could I get a Win32 Heap-based malloc to work
across fork(), which was expected.

I think I can see a way to get Win32 Heap-based malloc to work across
fork()s, but it would depend on undocumented info and would likely be
subject to breakage in future Windows versions.  Too bad, because that
form of malloc package would be 2 to 8 times faster in practice.

---
 winsup/cygwin/cygmalloc.h   |  21 +++-
 winsup/cygwin/fork.cc   |   7 --
 winsup/cygwin/malloc_wrapper.cc | 163 +++-
 3 files changed, 113 insertions(+), 78 deletions(-)

diff --git a/winsup/cygwin/cygmalloc.h b/winsup/cygwin/cygmalloc.h
index 84bad824c..302ce59c8 100644
--- a/winsup/cygwin/cygmalloc.h
+++ b/winsup/cygwin/cygmalloc.h
@@ -33,15 +33,30 @@ void *mmap64 (void *, size_t, int, int, int, off_t);
 # define mmap mmap64
 # define MALLOC_FAILURE_ACTION __set_ENOMEM ()
 # define USE_DL_PREFIX 1
+# define USE_LOCKS 1
+# define MSPACES 1
+# define FOOTERS 1
 
 #elif defined (__INSIDE_CYGWIN__)
 
-# define __malloc_lock() mallock.acquire ()
-# define __malloc_unlock() mallock.release ()
-extern muto mallock;
+# define MSPACES 0
 
 #endif
 
+#if MSPACES
+void __reg2 *create_mspace (size_t, int);
+void __reg2 mspace_free (void *m, void *p);
+void __reg2 *mspace_malloc (void *m, size_t size);
+void __reg3 *mspace_realloc (void *m, void *p, size_t size);
+void __reg3 *mspace_calloc (void *m, size_t nmemb, size_t size);
+void __reg3 *mspace_memalign (void *m, size_t alignment, size_t bytes);
+void __reg2 *mspace_valloc (void *m, size_t bytes);
+size_t __reg1 mspace_usable_size (const void *p);
+int __reg2 mspace_malloc_trim (void *m, size_t);
+int __reg2 mspace_mallopt (int p, int v);
+void __reg1 mspace_malloc_stats (void *m);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/winsup/cygwin/fork.cc b/winsup/cygwin/fork.cc
index 38172ca1e..82f95dafe 100644
--- a/winsup/cygwin/fork.cc
+++ b/winsup/cygwin/fork.cc
@@ -296,8 +296,6 @@ frok::parent (volatile char * volatile stack_here)
   si.lpReserved2 = (LPBYTE) 
   si.cbReserved2 = sizeof (ch);
 
-  bool locked = __malloc_lock ();
-
   /* Remove impersonation */
   cygheap->user.deimpersonate ();
   fix_impersonation = true;
@@ -448,9 +446,6 @@ frok::parent (volatile char * volatile stack_here)
   "stack", stack_here, ch.stackbase,
   impure, impure_beg, impure_end,
   NULL);
-
-  __malloc_unlock ();
-  locked = false;
   if (!rc)
 {
   this_errno = get_errno ();
@@ -533,8 +528,6 @@ cleanup:
 
   if (fix_impersonation)
 cygheap->user.reimpersonate ();
-  if (locked)
-__malloc_unlock ();
 
   /* Remember to de-allocate the fd table. */
   if (hchild)
diff --git a/winsup/cygwin/malloc_wrapper.cc b/winsup/cygwin/malloc_wrapper.cc
index 3b245800a..f1c23b4a8 100644
--- a/winsup/cygwin/malloc_wrapper.cc
+++ b/winsup/cygwin/malloc_wrapper.cc
@@ -27,6 +27,33 @@ extern "C" struct mallinfo dlmallinfo ();
 static bool use_internal = true;
 static bool internal_malloc_determined;
 
+/* If MSPACES (thread-specific memory pools) are enabled, use a
+   thread-local variable to store a pointer to that thread's mspace.
+ */
+#if MSPACES
+static DWORD tls_mspace; // index into thread's TLS array
+#define MSPACE_SIZE (8 * 1024 * 1024)
+
+static void *
+get_current_mspace ()
+{
+  if (unlikely(tls_mspace == 0))
+api_fatal ("a malloc-related function was called before malloc_init");
+
+  void *m = TlsGetValue (tls_mspace);
+  if (unlikely(m == 0))
+{
+  m = create_mspace (MSPACE_SIZE, 0);
+  if (!m)
+api_fatal ("unable to create mspace");
+  TlsSetValue (tls_mspace, m);
+}
+  return m;
+}
+
+#define MSPACE get_current_mspace()
+#endif
+
 /* These routines are used by the application if it
doesn't provide its own malloc. */
 
@@ -37,11 +64,11 @@ free (void *p)
   if (!use_internal)
 user_data->free (p);
   else
-{
-  __malloc_lock ();
-  dlfree (p);
-  __malloc_unlock ();
-}
+#if MSPACES
+mspace_free (MSPACE, p);
+#else
+dlfree (p);
+#endif
 }
 
 extern "C" void *
@@ -51,11 +78,11 @@ malloc (size_t size)
   if (!use_internal)
 res = user_data->malloc (size);
   else
-{
-  __malloc_lock ();
-  res = dlmalloc (size);
-  __malloc_unlock ();
-}
+#if MSPACES
+res = mspace_malloc (MSPACE, size);
+#else
+res = dlmalloc (size);
+#endif
   malloc_printf ("(%ld) = %p, called by %p", size, res,
   

[PATCH] Cygwin: cwdstuff::get: clean up debug_printf output

2020-08-23 Thread Ken Brown via Cygwin-patches
Set errno = 0 at the beginning so that the debug_printf call at the
end doesn't report a nonzero errno left over from some other function
call.
---
 winsup/cygwin/path.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/winsup/cygwin/path.cc b/winsup/cygwin/path.cc
index f3b9913bd..95faf8ca7 100644
--- a/winsup/cygwin/path.cc
+++ b/winsup/cygwin/path.cc
@@ -5022,6 +5022,8 @@ char *
 cwdstuff::get (char *buf, int need_posix, int with_chroot, unsigned ulen)
 {
   tmp_pathbuf tp;
+
+  errno = 0;
   if (ulen)
 /* nothing */;
   else if (buf == NULL)
-- 
2.28.0