Hi all,

After digging into a problem observed with henrietta-cache on Alaric's
server, I filed #1058.  After some more digging, it turned out that
the OpenSSL egg creates mutexes which it waits for, which are put into
slot 11 of the waiting thread ("object on which thread is blocking").

According to the comments, slot 11 can hold either a pair of the FD and
the flags (:input/#t, :output/#f or :all) or a thread object.
This is wrong, because as you can see in srfi-18.scm, mutexes will be put
in there as well.  Furthermore, the create-fdset and fdset-set code
simply adds anything that's in slot 11 to the FD set for
poll()/select() regardless of whether it's a pair or something else.
This is fixed by checking whether it's a pair.

This bug was not caught by the paranoid checks for two reasons:
First, the scheduler has a (declare unsafe) statement (I didn't
see this until I added more checks and tried to cause tests to fail).
Secondly, the C_u_i_car and C_u_i_cdr accessors don't check whether
their argument is a pair, even in paranoid mode (they check whether
it's a block object, which a mutex is, so it'll just immediately
access the slots of the mutex, explaining the error message we get).

I've now added paranoid checks to these macros.  In doing so, I
noticed (because the compiler tests crashed) that the pair allocator
and accessors are abused for symbol table buckets (even though they
are their own distinct tagged type), so I've reworked that as well,
to get their own allocator and use block_item accessors instead of
pretending they're pairs.  C_u_i_car and C_u_i_cdr have been used
as a shorthand for C_block_item(x,0)/C_block_item(x,1) in quite a
few other places as well.  I think maybe these types should probably
eventually get their own (paranoia-checked) accessors as well, but
for now I haven't done that yet.

After adding these checks, I noticed that in a DEBUGBUILD, the
hash-table tests started failing consistently with the dreaded
"out of memory - heap full while resizing" error.  After spending the
weekend groveling through the garbage collector, I finally found a
very strange statement in runtime.c: after carefully calculating
the new heap size and ensuring it will fit both the current heap
and the stack, the heap size is HALVED, for no apparent reason!
There isn't even a comment stating why this is done.  Of course,
removing this line helped me get rid of this error message.
Hopefully this gets rid of it once and for all, and hopefully
this fixes the remaining cases in #1045 (except the "unknown
protocol, which is probably entirely unrelated).

I think that patches 0002 and 0003 should really go into stability.

Cheers,
Peter
-- 
http://www.more-magic.net
>From 223731240efb0045d4532c75a33270f209169d3a Mon Sep 17 00:00:00 2001
From: Peter Bex <peter....@xs4all.nl>
Date: Sat, 12 Oct 2013 11:12:57 +0200
Subject: [PATCH 1/3] Add paranoid checks to C_u_i_car and C_u_i_cdr.

Replace all calls to these two on non-pairs for "convenience" (use C_block_item)
---
 chicken.h | 17 ++++++++++++++---
 runtime.c | 51 +++++++++++++++++++++++++--------------------------
 2 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/chicken.h b/chicken.h
index 07e4fe8..bbfdfc5 100644
--- a/chicken.h
+++ b/chicken.h
@@ -1279,7 +1279,7 @@ extern double trunc(double);
 #define C_pointer_address(x)            ((C_byte *)C_block_item((x), 0))
 #define C_block_address(ptr, n, x)      C_a_unsigned_int_to_num(ptr, n, x)
 #define C_offset_pointer(x, y)          (C_pointer_address(x) + (y))
-#define C_kontinue(k, r)                ((C_proc2)(void *)C_u_i_car(k))(2, 
(k), (r))
+#define C_kontinue(k, r)                ((C_proc2)(void 
*)C_block_item(k,0))(2, (k), (r))
 #define C_fetch_byte(x, p)              (((unsigned C_byte 
*)C_data_pointer(x))[ p ])
 #define C_poke_integer(x, i, n)         (C_set_block_item(x, C_unfix(i), 
C_num_to_int(n)), C_SCHEME_UNDEFINED)
 #define C_pointer_to_block(p, x)        (C_set_block_item(p, 0, 
(C_word)C_data_pointer(x)), C_SCHEME_UNDEFINED)
@@ -1385,8 +1385,8 @@ extern double trunc(double);
 #define C_i_list_ref(lst, i)            C_i_car(C_i_list_tail(lst, i))
 #define C_u_i_list_ref(lst, i)          C_u_i_car(C_i_list_tail(lst, i))
 
-#define C_u_i_car(x)                    C_block_item(x, 0)
-#define C_u_i_cdr(x)                    C_block_item(x, 1)
+#define C_u_i_car(x)                    
(*C_CHECKp(x,C_pairp(C_VAL1(x)),&C_block_item(C_VAL1(x), 0)))
+#define C_u_i_cdr(x)                    
(*C_CHECKp(x,C_pairp(C_VAL1(x)),&C_block_item(C_VAL1(x), 1)))
 #define C_u_i_caar(x)                   C_u_i_car( C_u_i_car( x ) )
 #define C_u_i_cadr(x)                   C_u_i_car( C_u_i_cdr( x ) )
 #define C_u_i_cdar(x)                   C_u_i_cdr( C_u_i_car( x ) )
@@ -2672,6 +2672,17 @@ C_inline C_word C_fcall C_a_pair(C_word **ptr, C_word 
car, C_word cdr)
   return (C_word)p0;
 }
 
+C_inline C_word C_fcall C_a_bucket(C_word **ptr, C_word head, C_word tail)
+{
+  C_word *p = *ptr, *p0 = p;
+
+  *(p++) = C_BUCKET_TYPE | (C_SIZEOF_BUCKET - 1);
+  *(p++) = head;
+  *(p++) = tail;
+  *ptr = p;
+  return (C_word)p0;
+}
+
 
 C_inline C_word C_a_i_list1(C_word **a, int n, C_word x1)
 {
diff --git a/runtime.c b/runtime.c
index ab61a0d..2c622cb 100644
--- a/runtime.c
+++ b/runtime.c
@@ -906,7 +906,7 @@ void *CHICKEN_global_lookup(char *name)
   void *root = CHICKEN_new_gc_root();
 
   if(C_truep(s = lookup(key, len, name, symbol_table))) {
-    if(C_u_i_car(s) != C_SCHEME_UNBOUND) {
+    if(C_block_item(s, 0) != C_SCHEME_UNBOUND) {
       CHICKEN_gc_root_set(root, s);
       return root;
     }
@@ -996,7 +996,7 @@ C_regparm C_word C_find_symbol(C_word str, C_SYMBOL_TABLE 
*stable)
   else return C_SCHEME_FALSE;
 }
 
-
+/* OBSOLETE */
 C_regparm C_word C_enumerate_symbols(C_SYMBOL_TABLE *stable, C_word pos)
 {
   int i;
@@ -1554,7 +1554,7 @@ void barf(int code, char *loc, ...)
   C_dbg_hook(C_SCHEME_UNDEFINED);
 
   C_temporary_stack = C_temporary_stack_bottom;
-  err = C_u_i_car(err);
+  err = C_block_item(err, 0);
 
   if(C_immediatep(err))
     panic(C_text("`##sys#error-hook' is not defined - the `library' unit was 
probably not linked with this executable"));
@@ -2013,8 +2013,8 @@ void C_zap_strings(C_word str)
 
     for(bucket = symbol_table->table[ i ];
         bucket != C_SCHEME_END_OF_LIST;
-        bucket = C_u_i_cdr(bucket)) {
-      sym = C_u_i_car(bucket);
+        bucket = C_block_item(bucket,1)) {
+      sym = C_block_item(bucket,0);
       C_set_block_item(sym, 1, str);
     }
   }
@@ -2171,7 +2171,7 @@ C_regparm C_word C_fcall C_intern3(C_word **ptr, C_char 
*str, C_word value)
 {
   C_word s = C_intern_in(ptr, C_strlen(str), str, symbol_table);
   
-  C_mutate2(&C_u_i_car(s), value);
+  C_mutate2(&C_block_item(s,0), value);
   return s;
 }
 
@@ -2194,8 +2194,8 @@ C_regparm C_word C_fcall lookup(C_word key, int len, 
C_char *str, C_SYMBOL_TABLE
   C_word bucket, sym, s;
 
   for(bucket = stable->table[ key ]; bucket != C_SCHEME_END_OF_LIST; 
-      bucket = C_u_i_cdr(bucket)) {
-    sym = C_u_i_car(bucket);
+      bucket = C_block_item(bucket,1)) {
+    sym = C_block_item(bucket,0);
     s = C_block_item(sym, 1);
 
     if(C_header_size(s) == (C_word)len
@@ -2216,7 +2216,7 @@ double compute_symbol_table_load(double *avg_bucket_len, 
int *total_n)
     bucket = symbol_table->table[ i ];
 
     for(j = 0; bucket != C_SCHEME_END_OF_LIST; ++j)
-      bucket = C_u_i_cdr(bucket);
+      bucket = C_block_item(bucket,1);
 
     if(j > 0) {
       alen += j;
@@ -2250,8 +2250,7 @@ C_word add_symbol(C_word **ptr, C_word key, C_word 
string, C_SYMBOL_TABLE *stabl
   C_set_block_item(sym, 2, C_SCHEME_END_OF_LIST);
   *ptr = p;
   b2 = stable->table[ key ];   /* previous bucket */
-  bucket = C_a_pair(ptr, sym, b2); /* create new bucket */
-  C_block_header(bucket) = (C_block_header(bucket) & ~C_HEADER_TYPE_BITS) | 
C_BUCKET_TYPE;
+  bucket = C_a_bucket(ptr, sym, b2); /* create new bucket */
 
   if(ptr != C_heaptop) C_mutate_slot(&stable->table[ key ], bucket);
   else {
@@ -2259,7 +2258,7 @@ C_word add_symbol(C_word **ptr, C_word key, C_word 
string, C_SYMBOL_TABLE *stabl
        heap-top (say, in a toplevel literal frame allocation) then we have
        to inform the memory manager that a 2nd gen. block points to a 
        1st gen. block, hence the mutation: */
-    C_mutate2(&C_u_i_cdr(bucket), b2);
+    C_mutate2(&C_block_item(bucket,1), b2);
     stable->table[ key ] = bucket;
   }
 
@@ -2969,7 +2968,7 @@ C_regparm void C_fcall C_reclaim(void *trampoline, void 
*proc)
          C_dbg(C_text("GC"), C_text("queueing %d finalizer(s)\n"), 
pending_finalizer_count);
 
        last = C_block_item(pending_finalizers_symbol, 0);
-       assert(C_u_i_car(last) == C_fix(0));
+       assert(C_block_item(last, 0) == C_fix(0));
        C_set_block_item(last, 0, C_fix(pending_finalizer_count));
 
        for(i = 0; i < pending_finalizer_count; ++i) {
@@ -3040,10 +3039,10 @@ C_regparm void C_fcall C_reclaim(void *trampoline, void 
*proc)
        for(i = 0; i < stp->size; ++i) {
          last = 0;
          
-         for(bucket = stp->table[ i ]; bucket != C_SCHEME_END_OF_LIST; bucket 
= C_u_i_cdr(bucket))
-           if(C_u_i_car(bucket) == C_SCHEME_UNDEFINED) {
-             if(last) C_set_block_item(last, 1, C_u_i_cdr(bucket));
-             else stp->table[ i ] = C_u_i_cdr(bucket);
+         for(bucket = stp->table[ i ]; bucket != C_SCHEME_END_OF_LIST; bucket 
= C_block_item(bucket,1))
+           if(C_block_item(bucket,0) == C_SCHEME_UNDEFINED) {
+             if(last) C_set_block_item(last, 1, C_block_item(bucket,1));
+             else stp->table[ i ] = C_block_item(bucket,1);
            }
            else last = bucket;
        }
@@ -3227,7 +3226,7 @@ C_regparm void C_fcall really_mark(C_word *x)
 #endif
 
     if(C_enable_gcweak && (h & C_HEADER_TYPE_BITS) == C_BUCKET_TYPE) {
-      item = C_u_i_car(val);
+      item = C_block_item(val,0);
 
       /* Lookup item in weak item table or add entry: */
       if((wep = lookup_weak_table_entry(item, (C_word)p2)) != NULL) {
@@ -5684,7 +5683,7 @@ C_regparm C_word C_fcall C_i_check_vector_2(C_word x, 
C_word loc)
 
 C_regparm C_word C_fcall C_i_check_structure_2(C_word x, C_word st, C_word loc)
 {
-  if(C_immediatep(x) || C_header_bits(x) != C_STRUCTURE_TYPE || C_u_i_car(x) 
!= st) {
+  if(C_immediatep(x) || C_header_bits(x) != C_STRUCTURE_TYPE || 
C_block_item(x,0) != st) {
     error_location = loc;
     barf(C_BAD_ARGUMENT_TYPE_BAD_STRUCT_ERROR, NULL, x, st);
   }
@@ -6159,13 +6158,13 @@ void C_ccall C_call_cc(C_word c, C_word closure, C_word 
k, C_word cont)
 {
   C_word *a = C_alloc(3),
          wrapper;
-  void *pr = (void *)C_u_i_car(cont);
+  void *pr = (void *)C_block_item(cont,0);
 
   if(C_immediatep(cont) || C_header_bits(cont) != C_CLOSURE_TYPE)
     barf(C_BAD_ARGUMENT_TYPE_ERROR, "call-with-current-continuation", cont);
 
   /* Check for values-continuation: */
-  if(C_u_i_car(k) == (C_word)values_continuation)
+  if(C_block_item(k,0) == (C_word)values_continuation)
     wrapper = C_closure(&a, 2, (C_word)call_cc_values_wrapper, k);
   else wrapper = C_closure(&a, 2, (C_word)call_cc_wrapper, k);
 
@@ -6175,7 +6174,7 @@ void C_ccall C_call_cc(C_word c, C_word closure, C_word 
k, C_word cont)
 
 void C_ccall call_cc_wrapper(C_word c, C_word closure, C_word k, C_word result)
 {
-  C_word cont = C_u_i_cdr(closure);
+  C_word cont = C_block_item(closure,1);
 
   if(c != 3) C_bad_argc(c, 3);
 
@@ -6186,7 +6185,7 @@ void C_ccall call_cc_wrapper(C_word c, C_word closure, 
C_word k, C_word result)
 void C_ccall call_cc_values_wrapper(C_word c, C_word closure, C_word k, ...)
 {
   va_list v;
-  C_word cont = C_u_i_cdr(closure),
+  C_word cont = C_block_item(closure,1),
          x1;
   int n = c;
 
@@ -6305,7 +6304,7 @@ void C_ccall C_u_call_with_values(C_word c, C_word 
closure, C_word k, C_word thu
 
 void C_ccall values_continuation(C_word c, C_word closure, C_word arg0, ...)
 {
-  C_word kont = C_u_i_cdr(closure),
+  C_word kont = C_block_item(closure, 1),
          k = C_block_item(closure, 2),
          n = c,
          *ptr;
@@ -8111,8 +8110,8 @@ void C_ccall C_context_switch(C_word c, C_word closure, 
C_word k, C_word state)
 
   C_temporary_stack = C_temporary_stack_bottom - n;
   C_memcpy(C_temporary_stack, (C_word *)state + 2, n * sizeof(C_word));
-  trampoline = (TRAMPOLINE)C_u_i_car(adrs);
-  trampoline((void *)C_u_i_cdr(adrs));
+  trampoline = (TRAMPOLINE)C_block_item(adrs,0);
+  trampoline((void *)C_block_item(adrs,1));
 }
 
 
-- 
1.8.3.4

>From f9fcd2e40b071d4b23399dadddbf7c85e2d7509d Mon Sep 17 00:00:00 2001
From: Peter Bex <peter....@xs4all.nl>
Date: Fri, 11 Oct 2013 22:22:40 +0200
Subject: [PATCH 2/3] Fix #1058: never add mutex objects to FD lists in the
 scheduler (causes panics!)

---
 scheduler.scm | 7 ++++---
 srfi-18.scm   | 3 ++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/scheduler.scm b/scheduler.scm
index bdc7c52..f337dcf 100644
--- a/scheduler.scm
+++ b/scheduler.scm
@@ -361,7 +361,7 @@ EOF
 
 (define (##sys#thread-basic-unblock! t)
   (dbg "unblocking: " t)
-  (##sys#setislot t 11 #f)             ; (FD . RWFLAGS)
+  (##sys#setislot t 11 #f)             ; (FD . RWFLAGS) | #<MUTEX> | #<THREAD>
   (##sys#setislot t 4 #f)
   (##sys#add-to-ready-queue t) )
 
@@ -397,7 +397,8 @@ EOF
        (for-each
         (lambda (t)
           (let ((p (##sys#slot t 11)))
-            (fdset-set fd (cdr p))))
+             (when (pair? p) ; (FD . RWFLAGS)? (can also be mutex or thread)
+               (fdset-set fd (cdr p)))))
         (cdar lst))
        (loop (cdr lst))))))
 
@@ -580,7 +581,7 @@ EOF
        (define (suspend t)
          (unless (eq? t primordial)
            (##sys#setslot t 3 'suspended))
-         (##sys#setslot t 11 #f)      ; block-object (may be thread)
+         (##sys#setslot t 11 #f)      ; block-object (thread/mutex/fd & flags)
          (##sys#setslot t 12 '()))    ; recipients (waiting for join)
        (set! ##sys#primordial-thread primordial)
        (set! ready-queue-head (list primordial))
diff --git a/srfi-18.scm b/srfi-18.scm
index af4b9d5..3f8cf25 100644
--- a/srfi-18.scm
+++ b/srfi-18.scm
@@ -265,6 +265,7 @@
        (lambda (return)
         (let ([ct ##sys#current-thread])
           (define (switch)
+             (dbg ct " sleeping on mutex " (mutex-name mutex))
             (##sys#setslot mutex 3 (##sys#append (##sys#slot mutex 3) (list 
ct)))
             (##sys#schedule) )
           (define (check)
@@ -272,7 +273,7 @@
               (return
                (##sys#signal
                 (##sys#make-structure 'condition '(abandoned-mutex-exception) 
'()))) ) )
-          (dbg ct ": locking " mutex)
+          (dbg ct ": locking " (mutex-name mutex))
           (cond [(not (##sys#slot mutex 5))
                  (if (and threadsup (not thread))
                      (begin
-- 
1.8.3.4

>From 91c35bcf52f13f00b43516ea90513798875f4dd0 Mon Sep 17 00:00:00 2001
From: Peter Bex <peter....@xs4all.nl>
Date: Sun, 13 Oct 2013 12:33:06 +0200
Subject: [PATCH 3/3] Remove weird unexplained halving of new heap size which
 caused out of memory errors

---
 runtime.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/runtime.c b/runtime.c
index 2c622cb..030763e 100644
--- a/runtime.c
+++ b/runtime.c
@@ -3314,7 +3314,6 @@ C_regparm void C_fcall C_rereclaim2(C_uword size, int 
double_plus)
   }
 
   heap_size = size;
-  size /= 2;
 
   if ((new_heapspace = heap_alloc (size, &new_tospace_start)) == NULL)
     panic(C_text("out of memory - cannot allocate heap segment"));
-- 
1.8.3.4

_______________________________________________
Chicken-hackers mailing list
Chicken-hackers@nongnu.org
https://lists.nongnu.org/mailman/listinfo/chicken-hackers

Reply via email to