# New Ticket Created by  Leopold Toetsch 
# Please include the string:  [perl #17731]
# in the subject line of all future correspondence about this issue. 
# <URL: http://rt.perl.org/rt2/Ticket/Display.html?id=17731 >


As adviced by Dan, this patch integrates the possibility of using a 
Lea-like allocator instead of current copying GC.

There is one new Configure option, to select the allocator:

perl Configure [--gc=gc]  ... nothing or 'gc' - select GC in resources.c
perl Configure --gc=libc ... use ptmalloc in libc[1]
perl Configure --gc=malloc ... use Lea malloc provided with parrot
perl Configure --gc=malloc-trace ... same, with trace support

The used allocator shows up in Configure's output, e.g.:

Determining what allocator to use ... (gc) done.

[1] a test looks for availability of mallinfo, and assumes then, that 
the underlying libc uses ptmalloc (which is derived from Lea malloc 
2.6.4). For this case, no malloc.c is linked to parrot, saving ~80 KB.
(This test could be omitted, to use any system's malloc, with probably 
horrible benchmark timings - or not - who knows)

After changing the allocator the files headers.c and dod.c [2] should be 
touched before make - this avoids a 'make clean'.

A sample script called lea is at the end of the patch, though it's not 
in MANIFEST.

The 2 files malloc.c and malloc-trace.c are _not_ appended here for 
brevity, please whoever checks in, download them, if not already done, 
and commit them too - thanks.
http://gee.cs.oswego.edu/dl/html/malloc.html

Some TODOS:
- general doc of memory internals including DOD, GC and allocators
- integrate statistics and - if possbile - make the numbers (especially 
allocated memory) comparable. mallinfo().uordblks is the whole memory 
used, interpreter->memory_allocated is buffer mem only and is without 
overhead.
Currently setting interpreter->memory_allocated is disabled, because 
it's rather costy (e.g. accounting to 1% in life.pasm), this should be 
set on demand only, i.e. when e.g. <interpinfo> needs it, but not after 
each DOD run.
- possible optimizations: code, that now has to assume, that allocated 
buffers have moved due to GC, can turn off recalculating the buffer 
address by guarding this code with #ifdef GC_IS_MALLOC, e.g in hash.c
[2] we should have a Configure magic, that knows what to rebuild, when a 
switch like GC_IS_MALLOC changes ;-)
- there are some utilities, that provide summaries out of malloc-trace 
statistics - should we put these in dev/tools?

Remark WRT string.c changes:
This is mostly #17703, which is obsoleted by this patch. Changes are 
compatible and usable by both GC schemes.

Have fun,
leo


-- attachment  1 ------------------------------------------------------
url: http://rt.perl.org/rt2/attach/39088/31759/b748c6/lea-4.patch

--- parrot/config/gen/makefiles/root.in Thu Sep 26 09:44:01 2002
+++ parrot-leo/config/gen/makefiles/root.in     Thu Oct  3 07:32:08 2002
@@ -96,7 +96,7 @@
                                 packfile$(O) stacks$(O) string$(O) sub$(O) 
encoding$(O) \
                                 chartype$(O) runops_cores$(O) trace$(O) pmc$(O) 
key$(O) hash$(O) \
                                 core_pmcs$(O) platform$(O) ${jit_o} \
-                                resources$(O) rx$(O) rxstacks$(O) intlist$(O) \
+            ${gc_o} rx$(O) rxstacks$(O) intlist$(O) \
                                 embed$(O) warnings$(O) misc$(O) ${cg_o} \
                                 packout$(O) byteorder$(O) debug$(O) smallobject$(O) \
                                 headers$(O) dod$(O) method_util$(O)
@@ -119,7 +119,8 @@
 #
 ###############################################################################
 
-CFLAGS = ${ccflags} ${cc_warn} ${cc_inc} ${cc_hasjit} ${cg_flag}
+CFLAGS = ${ccflags} ${cc_warn} ${cc_inc} ${cc_hasjit} ${cg_flag} ${gc_flag}
+
 LINKFLAGS = ${linkflags}
 LDFLAGS = ${ldflags}
 
@@ -415,6 +416,8 @@
 
 ${cg_c}
 
+${gc_c}
+
 warnings$(O) : $(H_FILES)
 
 misc$(O) : $(H_FILES)
--- parrot/config/auto/gc.pl    Thu Oct  3 08:44:29 2002
+++ parrot-leo/config/auto/gc.pl        Thu Oct  3 08:05:01 2002
@@ -0,0 +1,67 @@
+package Configure::Step;
+
+use strict;
+use vars qw($description @args);
+use Parrot::Configure::Step ':auto';
+
+$description="Determining what allocator to use ...";
+
+# valid libc/malloc/malloc-trace/gc
+@args=qw(gc);
+
+sub runstep {
+  my ($gc) = @_;
+
+  if (!defined($gc)) {
+    # default is GC in resources.c
+    $gc = 'gc';
+  }
+  elsif ($gc eq 'libc') {
+    # tests mallinfo after allocation of 128 bytes
+    cc_gen('config/auto/gc/test_c.in');
+    eval { cc_build(); };
+    my $test = 0;
+    unless ($@) {
+      $test = cc_run();
+    }
+    cc_clean();
+    # used size should be somewhere here
+    unless ($test >= 128 && $test < 155) {
+      # if not, use own copy of malloc
+      $gc = 'malloc';
+    }
+  }
+
+  if ($gc =~ /^malloc(?:-trace)?$/) {
+    Configure::Data->set(
+      gc_c          => <<"EOF",
+$gc\$(O):      \$(GENERAL_H_FILES) $gc.c
+res_lea\$(O):  \$(GENERAL_H_FILES) res_lea.c
+EOF
+      gc_o          => "$gc\$(O) res_lea\$(O)",
+      gc_flag  => '-DGC_IS_MALLOC',
+    );
+  }
+  elsif ($gc eq 'libc') {
+    Configure::Data->set(
+      gc_c          => <<"EOF",
+res_lea\$(O):  \$(GENERAL_H_FILES) res_lea.c
+EOF
+      gc_o          => "res_lea\$(O)",
+      gc_flag  => '-DGC_IS_MALLOC',
+    );
+  }
+  else {
+      $gc = 'gc';
+    Configure::Data->set(
+      gc_c          => <<"EOF",
+resources\$(O):        \$(GENERAL_H_FILES) resources.c
+EOF
+      gc_o          => "resources\$(O)",
+      gc_flag  => '',
+    );
+  }
+  print(" ($gc) ");
+}
+
+1;
--- parrot/config/auto/gc/test_c.in     Thu Oct  3 08:44:29 2002
+++ parrot-leo/config/auto/gc/test_c.in Thu Oct  3 07:33:32 2002
@@ -0,0 +1,15 @@
+/*
+ * gc.c - figure out if we can use malloc as allocator
+ *
+ * This file is automatically generated by Configure
+ * from gc.in.
+ */
+#include <malloc.h>
+#include <stdio.h>
+int main(int argc, char **argv) {
+        void *p = malloc(128);
+       int used = mallinfo().uordblks;
+       printf("%d\n", used);
+
+       return 0;
+}
--- parrot/lib/Parrot/Configure/RunSteps.pm     Thu Sep  5 20:09:39 2002
+++ parrot-leo/lib/Parrot/Configure/RunSteps.pm Thu Oct  3 08:34:10 2002
@@ -26,6 +26,7 @@
        auto/jit.pl
        auto/funcptr.pl
        auto/cgoto.pl
+       auto/gc.pl
        gen/config_h.pl
        gen/config_pm.pl
        gen/makefiles.pl
--- parrot/Configure.pl Thu Sep 26 09:44:01 2002
+++ parrot-leo/Configure.pl     Thu Oct  3 07:34:01 2002
@@ -57,6 +57,10 @@
    --ld=(linker)        Use the given linker
    --intval=(type)      Use the given type for INTVAL
    --floatval=(type)    Use the given type for FLOATVAL
+
+   --cgoto=0           Don't build cgoto core - recommended when short of mem
+   --gc=gc|libc|malloc|malloc-trace    determine GC type, default = gc
+
 EOT
       exit;
     };
--- parrot/res_lea.c    Thu Oct  3 08:44:29 2002
+++ parrot-leo/res_lea.c        Wed Oct  2 13:28:30 2002
@@ -0,0 +1,85 @@
+/* resources */
+#include <assert.h>
+#include "parrot/parrot.h"
+void
+Parrot_go_collect(struct Parrot_Interp *interpreter)
+{
+    if (interpreter->GC_block_level) {
+        return;
+    }
+    interpreter->collect_runs++;        /* fake it */
+}
+void *
+Parrot_reallocate(struct Parrot_Interp *interpreter, void *from, size_t size)
+{
+    Buffer * buffer = from;
+    void *p;
+    size_t oldlen = buffer->buflen;
+    p =  realloc(buffer->bufstart, size);
+    if (size > buffer->buflen)
+       memset((char*)p + oldlen, 0, size - oldlen);
+    buffer->buflen = size;
+    buffer->bufstart = p;
+    return p;
+}
+
+void *
+Parrot_allocate(struct Parrot_Interp *interpreter, void *buffer, size_t size)
+{
+    Buffer * b = buffer;
+    b->bufstart = calloc(1, size);
+    b->buflen = size;
+    return b;
+}
+
+void *
+Parrot_reallocate_string(struct Parrot_Interp *interpreter, STRING *str,
+                         size_t size)
+{
+    void *p;
+    size_t pad, rsize;
+    pad = STRING_ALIGNMENT - 1;
+    /* 2 chars string tail, first seems to be clobbered */
+    size = ((size + pad + 2) & ~pad) - 2;
+    p = realloc(str->bufstart, size + 2);
+    str->strstart = str->bufstart = p;
+    ((char*)str->bufstart)[size+1] = 0;
+    str->buflen = size;
+    return p;
+}
+
+void *
+Parrot_allocate_string(struct Parrot_Interp *interpreter, STRING *str,
+                       size_t size)
+{
+    void *p = 0;
+    size_t pad;
+#if 0
+    if (size)
+#endif
+    {
+#if 0
+        pad = STRING_ALIGNMENT - 1;
+        size = ((size + pad + 2) & ~pad) - 2;
+#endif
+        p = calloc(1, size + 2);
+    }
+    str->strstart = str->bufstart = p;
+    str->buflen = size;
+    return str;
+}
+
+void
+Parrot_initialize_memory_pools(struct Parrot_Interp *interpreter)
+{
+}
+
+/*
+ * Local variables:
+ * c-indentation-style: bsd
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+*/
--- parrot/string.c     Sun Sep 15 15:30:59 2002
+++ parrot-leo/string.c Tue Oct  1 17:05:02 2002
@@ -18,32 +18,31 @@
 #define EXTRA_SIZE 4
 
 /* String COW support */
+
+/* make a copy of string's data:
+ * copy used string data from strstart to a newly
+ * allocated string
+ * the header stays the same
+ */
 static void
 unmake_COW(struct Parrot_Interp *interpreter, STRING *s)
 {
-#if 0
-    if (s->flags & BUFFER_constant_FLAG) {
-        /* this happens when we call string_to_cstring on 
-         * a constant string in order to print it
-         */
-        internal_exception(INVALID_OPERATION,
-                           "Cannot unmake COW on a constant header");
-    }
-    else
-#endif
     if (s->flags & (BUFFER_COW_FLAG|BUFFER_constant_FLAG)) {
+        void *p;
+        UINTVAL size;
         interpreter->GC_block_level++;
         interpreter->DOD_block_level++;
 
         /* Make the copy point to only the portion of the string that
          * we are actually using. */
-        s->bufstart = s->strstart;
-        s->buflen = s->bufused;
-
+        p = s->strstart;
+        size = s->bufused;
         /* Create new pool data for this header to use, 
          * independant of the original COW data */
-        Parrot_reallocate_string(interpreter, s, s->buflen);
-        s->flags &= ~(UINTVAL)(BUFFER_COW_FLAG | BUFFER_constant_FLAG);
+        s->flags &= ~BUFFER_constant_FLAG;
+        Parrot_allocate_string(interpreter, s, size);
+        mem_sys_memcopy(s->bufstart, p, size);
+        s->flags &= ~(UINTVAL)(BUFFER_COW_FLAG | BUFFER_external_FLAG);
         interpreter->GC_block_level--;
         interpreter->DOD_block_level--;
     }
@@ -399,8 +398,7 @@
 INTVAL
 string_compute_strlen(STRING *s)
 {
-    s->strlen = s->encoding->characters(s->bufstart, s->bufused) - 
-        ((UINTVAL)s->strstart - (UINTVAL)s->bufstart);
+    s->strlen = s->encoding->characters(s->strstart, s->bufused);
     return s->strlen;
 }
 
@@ -972,13 +970,6 @@
 const char *
 string_to_cstring(struct Parrot_Interp * interpreter, STRING * s)
 {
-    char *cstring;
-
-    /* We shouldn't modify a constant string, 
-     * so instead create a new copy of it */
-    if (s->flags & BUFFER_constant_FLAG) {
-        s = make_COW_reference(interpreter,s);
-    }
 
     unmake_COW(interpreter, s);
 
@@ -986,11 +977,9 @@
         string_grow(interpreter, s, 1);
     }
 
-    cstring = s->strstart;
-
-    cstring[s->bufused] = 0;
-
-    return cstring;
+    ((char *)s->strstart)[s->bufused] = 0;
+    /* don't return local vars, return the right thing */
+    return (char*)s->strstart;
 }
 
 
--- parrot/dod.c        Fri Aug 23 11:37:00 2002
+++ parrot-leo/dod.c    Thu Oct  3 08:36:09 2002
@@ -308,6 +308,57 @@
         interpreter->arena_base->pmc_pool->total_objects - total_used;
 }
 
+#ifdef GC_IS_MALLOC
+
+/* find other users of COW's bufstart */
+static void
+used_cow(struct Parrot_Interp *interpreter, struct Small_Object_Pool *pool)
+{
+    UINTVAL object_size = pool->object_size;
+    struct Small_Object_Arena *cur_arena;
+    UINTVAL i;
+    Buffer *b;
+    char *tail;
+
+#ifdef LEA_DEBUG
+    /* check/clear tail, e.g. on changes in string.c or res.c */
+    for (cur_arena = pool->last_Arena;
+            NULL != cur_arena;
+            cur_arena = cur_arena->prev) {
+        b = cur_arena->start_objects;
+        for (i = 0; i < cur_arena->used; i++) {
+            if ((b->flags & BUFFER_COW_FLAG) && b->bufstart &&
+                    !(b->flags & BUFFER_external_FLAG)) {
+                tail = (char*)b->bufstart + b->buflen + 1;
+                assert(*tail == 0);
+                *tail = 0;
+            }
+            b = (Buffer *)((char *)b + object_size);
+        }
+    }
+#endif
+
+    for (cur_arena = pool->last_Arena;
+            NULL != cur_arena;
+            cur_arena = cur_arena->prev) {
+        b = cur_arena->start_objects;
+        for (i = 0; i < cur_arena->used; i++) {
+            if ((b->flags & BUFFER_COW_FLAG) &&
+                    !(b->flags & BUFFER_external_FLAG)) {
+                tail = (char*)b->bufstart + b->buflen + 1;
+                /* mark living and dead users of this bufstart
+                 * tail is cleared in *allocate_string */
+                if (b->flags & BUFFER_live_FLAG)
+                    *tail |= 0x2;
+                else
+                    *tail |= 0x1;
+            }
+            b = (Buffer *)((char *)b + object_size);
+        }
+    }
+}
+
+#endif /* GC_IS_MALLOC */
 /* Put any buffers that are now unused, on to the free list
  * Avoid buffers that are immune from collection (ie, constant) */
 static void
@@ -317,13 +368,26 @@
     struct Small_Object_Arena *cur_arena;
     UINTVAL i, total_used = 0;
     UINTVAL object_size = pool->object_size;
-
+#ifdef GC_IS_MALLOC
+    char *tail;
+#endif /* GC_IS_MALLOC */
+
+#ifdef GC_IS_MALLOC
+    used_cow(interpreter, pool);
+#endif /* GC_IS_MALLOC */
     /* Run through all the buffer header pools and mark */
     for (cur_arena = pool->last_Arena;
          NULL != cur_arena;
          cur_arena = cur_arena->prev) {
         Buffer *b = cur_arena->start_objects;
         for (i = 0; i < cur_arena->used; i++) {
+#ifdef GC_IS_MALLOC
+            if ((b->flags & BUFFER_COW_FLAG) &&
+                    !(b->flags & BUFFER_external_FLAG))
+                tail = (char*)b->bufstart + b->buflen + 1;
+            else
+                tail = 0;
+#endif /* GC_IS_MALLOC */
             /* If it's not live or on the free list, put it on the free list.
              * Note that it is technically possible to have a Buffer be both
              * on_free_list and live, because of our conservative stack-walk
@@ -332,6 +396,7 @@
                              | BUFFER_constant_FLAG
                              | BUFFER_live_FLAG )))
             {
+#ifndef GC_IS_MALLOC
                 if (pool->mem_pool) {
                     if (!(b->flags & BUFFER_COW_FLAG)) {
                         ((struct Memory_Pool *)
@@ -341,10 +406,27 @@
                     ((struct Memory_Pool *)
                         pool->mem_pool)->possibly_reclaimable += b->buflen;
                 }
+#else /* GC_IS_MALLOC */
+                /* if only dead users, this one may be freed */
+                if (tail && *tail == 0x1)
+                    b->flags &= ~BUFFER_COW_FLAG;
+                /* don't free this bufstart this time, because
+                 * tail will be invalid then -
+                 * if we want to free immediately, we need extended
+                 * bookkeeping to free exactly the last user
+                 */
+                else
+#endif /* GC_IS_MALLOC */
                 add_free_buffer(interpreter, pool, b);
             } else if (!(b->flags & BUFFER_on_free_list_FLAG)) {
                 total_used++;
             }
+#ifdef GC_IS_MALLOC
+            /* clear tail for next dod run and
+             * don't unset COW on other possbily dead users */
+            if (tail)
+                *tail = 0;
+#endif /* GC_IS_MALLOC */
             b->flags &= ~BUFFER_live_FLAG;
             b = (Buffer *)((char *)b + object_size);
         }
@@ -429,11 +511,31 @@
 }
 #endif
 
+#ifdef GC_IS_MALLOC
+struct mallinfo {
+  int arena;    /* non-mmapped space allocated from system */
+  int ordblks;  /* number of free chunks */
+  int smblks;   /* number of fastbin blocks */
+  int hblks;    /* number of mmapped regions */
+  int hblkhd;   /* space in mmapped regions */
+  int usmblks;  /* maximum total allocated space */
+  int fsmblks;  /* space available in freed fastbin blocks */
+  int uordblks; /* total allocated space */
+  int fordblks; /* total free space */
+  int keepcost; /* top-most, releasable (via malloc_trim) space */
+};
+extern struct mallinfo mallinfo(void);
+#endif /* GC_IS_MALLOC */
 
 /* See if we can find some unused headers */
 void
 Parrot_do_dod_run(struct Parrot_Interp *interpreter)
 {
+#ifdef GC_IS_MALLOC
+    struct Small_Object_Pool *header_pool;
+    int j;
+
+#endif /* GC_IS_MALLOC */
     if (interpreter->DOD_block_level) {
         return;
     }
@@ -452,11 +554,29 @@
     free_unused_PMCs(interpreter);
 
     /* And unused buffers on the free list */
+#ifndef GC_IS_MALLOC
     free_unused_buffers(interpreter,
                         interpreter->arena_base->string_header_pool);
     free_unused_buffers(interpreter,
                         interpreter->arena_base->buffer_header_pool);
 
+#else /* GC_IS_MALLOC */
+    for (j = -2; j < (INTVAL) interpreter->arena_base->num_sized; j++) {
+        if (j == -2)
+            header_pool = interpreter->arena_base->string_header_pool;
+        else if (j == -1)
+            header_pool = interpreter->arena_base->buffer_header_pool;
+        else
+            header_pool = interpreter->arena_base->sized_header_pools[j];
+        if (header_pool && j < 0) {
+            free_unused_buffers(interpreter, header_pool);
+        }
+    }
+    /* update mem stats */
+#if 0
+    interpreter->memory_allocated = mallinfo().uordblks;
+#endif
+#endif /* GC_IS_MALLOC */
     /* Note it */
     interpreter->dod_runs++;
 
--- parrot/headers.c    Mon Sep  9 11:42:19 2002
+++ parrot-leo/headers.c        Thu Oct  3 08:36:09 2002
@@ -20,9 +20,15 @@
 #    define BUFFER_HEADERS_PER_ALLOC 1
 #    define STRING_HEADERS_PER_ALLOC 1
 #else
+#ifndef GC_IS_MALLOC
 #    define PMC_HEADERS_PER_ALLOC 256
 #    define BUFFER_HEADERS_PER_ALLOC 256
 #    define STRING_HEADERS_PER_ALLOC 256
+#else /* GC_IS_MALLOC */
+#    define PMC_HEADERS_PER_ALLOC 512
+#    define BUFFER_HEADERS_PER_ALLOC 512
+#    define STRING_HEADERS_PER_ALLOC 512
+#endif /* GC_IS_MALLOC */
 #endif
 
 /** PMC Header Functions for small-object lookup table **/
@@ -33,6 +39,9 @@
 {
     ((PMC *)pmc)->flags = PMC_on_free_list_FLAG;
     /* Don't let it point to garbage memory */
+#ifdef GC_IS_MALLOC
+    /* XXX custom destroy ?! */
+#endif /* GC_IS_MALLOC */
     ((PMC *)pmc)->data = NULL;
 
     /* Copied from add_free_object */
@@ -75,9 +84,20 @@
 add_free_buffer(struct Parrot_Interp *interpreter, 
                 struct Small_Object_Pool *pool, void *buffer)
 {
+#ifdef GC_IS_MALLOC
+    /* free allocated space at bufstart, but not if it is used
+     * COW or it is external
+     */
+    if (((Buffer *)buffer)->bufstart &&
+            !(((Buffer *)buffer)->flags &
+                (BUFFER_COW_FLAG|BUFFER_external_FLAG))) {
+        free(((Buffer *)buffer)->bufstart);
+    }
+#endif /* GC_IS_MALLOC */
     ((Buffer *)buffer)->flags = BUFFER_on_free_list_FLAG;
     /* Use the right length */
     ((Buffer *)buffer)->buflen = 0;
+    ((Buffer *)buffer)->bufstart = 0;
 
     /* Copied from add_free_object */
     *(void **)buffer = pool->free_list;
--- parrot/MANIFEST     Fri Sep 27 07:47:07 2002
+++ parrot-leo/MANIFEST Thu Oct  3 08:01:06 2002
@@ -45,6 +45,8 @@
 config/auto/format.pl
 config/auto/funcptr.pl
 config/auto/funcptr/test_c.in
+config/auto/gc.pl
+config/auto/gc/test_c.in
 config/auto/gcc.pl
 config/auto/gcc/test_c.in
 config/auto/headers.pl
@@ -554,6 +556,8 @@
 lib/Test/Simple.pm
 lib/Text/Balanced.pm
 make.pl
+malloc.c
+malloc-trace.c
 math.ops
 memory.c
 method_util.c
@@ -573,6 +577,7 @@
 pmc.c
 pxs.c
 register.c
+res_lea.c
 resources.c
 runops_cores.c
 rx.c
--- parrot/lea  Thu Oct  3 08:44:29 2002
+++ parrot-leo/lea      Thu Oct  3 07:34:07 2002
@@ -0,0 +1,13 @@
+#!/bin/sh
+files="dod.c headers.c"
+if [ $1 = 1 ] ; then
+       perl ./Configure.pl --gc=libc
+else
+       perl ./Configure.pl --gc=gc
+fi
+touch $files
+[ -e blib/lib/libparrot.a ] && rm blib/lib/libparrot.a
+make
+
+
+

Reply via email to