Author: Armin Rigo <[email protected]>
Branch: gc-more-incremental
Changeset: r79705:264a78afb911
Date: 2015-09-19 12:14 +0200
http://bitbucket.org/pypy/pypy/changeset/264a78afb911/

Log:    This is mostly an attempt to change the threshold in
        external_malloc()

diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -754,14 +754,30 @@
                 self.minor_collection()
                 if minor_collection_count == 1:
                     #
-                    # If the gc_state is not STATE_SCANNING, we're in the 
middle of
-                    # an incremental major collection.  In this case, always 
progress
-                    # one step.  If the gc_state is STATE_SCANNING, wait until 
there
-                    # is too much garbage before starting the next major 
collection.
+                    # If the gc_state is STATE_SCANNING, we're not in
+                    # the middle of an incremental major collection.
+                    # In that case, wait until there is too much
+                    # garbage before starting the next major
+                    # collection.  But if we are in the middle of an
+                    # incremental major collection, then always do (at
+                    # least) one step now.
+                    #
+                    # This will increment next_major_collection_threshold
+                    # by nursery_size//2.  If more than nursery_size//2
+                    # survives, then threshold_reached() might still be
+                    # true after that.  In that case we do a second step.
+                    # The goal is to avoid too high memory peaks if the
+                    # program allocates a lot of surviving objects.
+                    # 
                     if (self.gc_state != STATE_SCANNING or
-                                self.get_total_memory_used() >
-                                self.next_major_collection_threshold):
+                           self.threshold_reached()):
+
                         self.major_collection_step()
+
+                        if (self.gc_state != STATE_SCANNING and
+                               self.threshold_reached()):  # ^^but only if 
still
+                            self.minor_collection()        # the same 
collection
+                            self.major_collection_step()
                         #
                         # The nursery might not be empty now, because of
                         # execute_finalizers().  If it is almost full again,
@@ -825,17 +841,25 @@
             raise MemoryError
         #
         # If somebody calls this function a lot, we must eventually
-        # force a collection.  XXX make this more incremental!  For now
-        # the logic is to first do a minor GC only, and check if that
-        # was enough to free a bunch of large young objects.  If not,
-        # we do a complete major GC.
-        if self.get_total_memory_free() < raw_malloc_usage(totalsize):
+        # force a collection.  We use threshold_reached(), which might
+        # be true now but become false at some point after a few calls
+        # to major_collection_step().  If there is really no memory,
+        # then when the major collection finishes it will raise
+        # MemoryError.
+        #
+        # The logic is to first do a minor GC only, and check if that
+        # was enough to free a bunch of large young objects.  If it
+        # was, then we don't do any major collection step.
+        #
+        while self.threshold_reached(raw_malloc_usage(totalsize)):
             self.minor_collection()
-            if self.get_total_memory_free() < (raw_malloc_usage(totalsize) +
-                                               self.nursery_size // 2):
-                self.gc_step_until(STATE_SWEEPING)
-                self.gc_step_until(STATE_FINALIZING,
-                                   raw_malloc_usage(totalsize))
+            if self.threshold_reached(raw_malloc_usage(totalsize) +
+                                      self.nursery_size // 2):
+                self.major_collection_step(raw_malloc_usage(totalsize))
+            # note that this loop should not be infinite: when the
+            # last step of a major collection is done but
+            # threshold_reached(totalsize) is still true, then
+            # we should get a MemoryError from major_collection_step().
         #
         # Check if the object would fit in the ArenaCollection.
         if raw_malloc_usage(totalsize) <= self.small_request_threshold:
@@ -1112,9 +1136,9 @@
         """
         return self.ac.total_memory_used + self.rawmalloced_total_size
 
-    def get_total_memory_free(self):
+    def threshold_reached(self, extra=0):
         return (self.next_major_collection_threshold -
-                float(self.get_total_memory_used()))
+                float(self.get_total_memory_used())) < float(extra)
 
     def card_marking_words_for_length(self, length):
         # --- Unoptimized version:
@@ -2058,10 +2082,10 @@
         self.gc_step_until(STATE_MARKING)
         self.gc_step_until(STATE_SCANNING)
 
-    def gc_step_until(self, state, reserving_size=0):
+    def gc_step_until(self, state):
         while self.gc_state != state:
             self.minor_collection()
-            self.major_collection_step(reserving_size)
+            self.major_collection_step()
 
     debug_gc_step_until = gc_step_until   # xxx
 
@@ -2086,8 +2110,36 @@
             pass
         self.debug_check_consistency()
 
+        #
+        # Every call to major_collection_step() adds nursery_size//2
+        # to the threshold.  It is reset at the end of this function
+        # when the major collection is fully finished.
+        #
+        # In the common case, this is larger than the size of all
+        # objects that survive a minor collection.  After a few
+        # minor collections (each followed by one call to
+        # major_collection_step()) the threshold is much higher than
+        # the currently-in-use old memory.  Then threshold_reached()
+        # won't be true again until the major collection fully
+        # finishes, time passes, and it's time for the next major
+        # collection.
+        #
+        # However there are less common cases:
+        #
+        # * if more than half of the nursery consistently survives: we
+        #   call major_collection_step() twice after a minor
+        #   collection;
+        #
+        # * or if we're allocating a large number of bytes in
+        #   external_malloc().  In that case, we are likely to reach
+        #   again the threshold_reached() case, and more major
+        #   collection steps will be done immediately until
+        #   threshold_reached() returns false.
+        #
+        self.next_major_collection_threshold += self.nursery_size // 2
 
-        # XXX currently very course increments, get this working then split
+
+        # XXX currently very coarse increments, get this working then split
         # to smaller increments using stacks for resuming
         if self.gc_state == STATE_SCANNING:
             self.objects_to_trace = self.AddressStack()
@@ -2201,8 +2253,7 @@
                 #
                 # Max heap size: gives an upper bound on the threshold.  If we
                 # already have at least this much allocated, raise MemoryError.
-                if bounded and (float(self.get_total_memory_used()) + 
reserving_size >=
-                                self.next_major_collection_initial):
+                if bounded and self.threshold_reached(reserving_size):
                     #
                     # First raise MemoryError, giving the program a chance to
                     # quit cleanly.  It might still allocate in the nursery,
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to