Author: Armin Rigo <[email protected]>
Branch:
Changeset: r83837:490058ea54e6
Date: 2016-04-24 10:52 +0200
http://bitbucket.org/pypy/pypy/changeset/490058ea54e6/
Log: Issue #2226
Another tweak in the incremental GC: this should ensure that
progress in the major GC occurs quickly enough in all cases.
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -341,6 +341,20 @@
self.prebuilt_root_objects = self.AddressStack()
#
self._init_writebarrier_logic()
+ #
+ # The size of all the objects turned from 'young' to 'old'
+ # since we started the last major collection cycle. This is
+ # used to track progress of the incremental GC: normally, we
+ # run one major GC step after each minor collection, but if a
+ # lot of objects are made old, we need run two or more steps.
+ # Otherwise the risk is that we create old objects faster than
+ # we're collecting them. The 'threshold' is incremented after
+ # each major GC step at a fixed rate; the idea is that as long
+ # as 'size_objects_made_old > threshold_objects_made_old' then
+ # we must do more major GC steps. See major_collection_step()
+ # for more details.
+ self.size_objects_made_old = r_uint(0)
+ self.threshold_objects_made_old = r_uint(0)
def setup(self):
@@ -464,7 +478,7 @@
self.gc_nursery_debug = True
else:
self.gc_nursery_debug = False
- self.minor_collection() # to empty the nursery
+ self._minor_collection() # to empty the nursery
llarena.arena_free(self.nursery)
self.nursery_size = newsize
self.allocate_nursery()
@@ -509,8 +523,8 @@
self.min_heap_size = max(self.min_heap_size, self.nursery_size *
self.major_collection_threshold)
# the following two values are usually equal, but during raw mallocs
- # of arrays, next_major_collection_threshold is decremented to make
- # the next major collection arrive earlier.
+ # with memory pressure accounting, next_major_collection_threshold
+ # is decremented to make the next major collection arrive earlier.
# See translator/c/test/test_newgc, test_nongc_attached_to_gc
self.next_major_collection_initial = self.min_heap_size
self.next_major_collection_threshold = self.min_heap_size
@@ -700,21 +714,58 @@
def collect(self, gen=2):
"""Do a minor (gen=0), start a major (gen=1), or do a full
major (gen>=2) collection."""
- if gen <= 1:
- self.minor_collection()
- if gen == 1 or (self.gc_state != STATE_SCANNING and gen != -1):
+ if gen < 0:
+ self._minor_collection() # dangerous! no major GC cycle progress
+ elif gen <= 1:
+ self.minor_collection_with_major_progress()
+ if gen == 1 and self.gc_state == STATE_SCANNING:
self.major_collection_step()
else:
self.minor_and_major_collection()
self.rrc_invoke_callback()
+ def minor_collection_with_major_progress(self, extrasize=0):
+ """To a minor collection. Then, if there is already a major GC
+ in progress, run at least one major collection step. If there is
+ no major GC but the threshold is reached, start a major GC.
+ """
+ self._minor_collection()
+
+ # If the gc_state is STATE_SCANNING, we're not in the middle
+ # of an incremental major collection. In that case, wait
+ # until there is too much garbage before starting the next
+ # major collection. But if we are in the middle of an
+ # incremental major collection, then always do (at least) one
+ # step now.
+ #
+ # Within a major collection cycle, every call to
+ # major_collection_step() increments
+ # 'threshold_objects_made_old' by nursery_size/2.
+
+ if self.gc_state != STATE_SCANNING or
self.threshold_reached(extrasize):
+ self.major_collection_step(extrasize)
+
+ # See documentation in major_collection_step()
+ while self.gc_state != STATE_SCANNING: # target (A1)
+ threshold = self.threshold_objects_made_old
+ if threshold >= r_uint(extrasize):
+ threshold -= r_uint(extrasize)
+ if self.size_objects_made_old <= threshold: # target (A2)
+ break
+
+ self._minor_collection()
+ self.major_collection_step(extrasize)
+
+ self.rrc_invoke_callback()
+
+
def collect_and_reserve(self, totalsize):
"""To call when nursery_free overflows nursery_top.
First check if pinned objects are in front of nursery_top. If so,
jump over the pinned object and try again to reserve totalsize.
- Otherwise do a minor collection, and possibly a major collection, and
- finally reserve totalsize bytes.
+ Otherwise do a minor collection, and possibly some steps of a
+ major collection, and finally reserve totalsize bytes.
"""
minor_collection_count = 0
@@ -757,47 +808,27 @@
self.nursery_top = self.nursery_barriers.popleft()
else:
minor_collection_count += 1
- self.minor_collection()
if minor_collection_count == 1:
+ self.minor_collection_with_major_progress()
+ else:
+ # Nursery too full again. This is likely because of
+ # execute_finalizers() or rrc_invoke_callback().
+ # we need to fix it with another call to minor_collection()
+ # ---this time only the minor part so that we are sure that
+ # the nursery is empty (apart from pinned objects).
#
- # If the gc_state is STATE_SCANNING, we're not in
- # the middle of an incremental major collection.
- # In that case, wait until there is too much
- # garbage before starting the next major
- # collection. But if we are in the middle of an
- # incremental major collection, then always do (at
- # least) one step now.
+ # Note that this still works with the counters:
+ # 'size_objects_made_old' will be increased by
+ # the _minor_collection() below. We don't
+ # immediately restore the target invariant that
+ # 'size_objects_made_old <= threshold_objects_made_old'.
+ # But we will do it in the next call to
+ # minor_collection_with_major_progress().
#
- # This will increment next_major_collection_threshold
- # by nursery_size//2. If more than nursery_size//2
- # survives, then threshold_reached() might still be
- # true after that. In that case we do a second step.
- # The goal is to avoid too high memory peaks if the
- # program allocates a lot of surviving objects.
- #
- if (self.gc_state != STATE_SCANNING or
- self.threshold_reached()):
-
- self.major_collection_step()
-
- if (self.gc_state != STATE_SCANNING and
- self.threshold_reached()): # ^^but only if
still
- self.minor_collection() # the same
collection
- self.major_collection_step()
- #
- self.rrc_invoke_callback()
- #
- # The nursery might not be empty now, because of
- # execute_finalizers() or rrc_invoke_callback().
- # If it is almost full again,
- # we need to fix it with another call to
minor_collection().
- if self.nursery_free + totalsize > self.nursery_top:
- self.minor_collection()
- #
- else:
ll_assert(minor_collection_count == 2,
- "Seeing minor_collection() at least twice."
- "Too many pinned objects?")
+ "Calling minor_collection() twice is not "
+ "enough. Too many pinned objects?")
+ self._minor_collection()
#
# Tried to do something about nursery_free overflowing
# nursery_top before this point. Try to reserve totalsize now.
@@ -855,21 +886,9 @@
# to major_collection_step(). If there is really no memory,
# then when the major collection finishes it will raise
# MemoryError.
- #
- # The logic is to first do a minor GC only, and check if that
- # was enough to free a bunch of large young objects. If it
- # was, then we don't do any major collection step.
- #
- while self.threshold_reached(raw_malloc_usage(totalsize)):
- self.minor_collection()
- if self.threshold_reached(raw_malloc_usage(totalsize) +
- self.nursery_size // 2):
- self.major_collection_step(raw_malloc_usage(totalsize))
- self.rrc_invoke_callback()
- # note that this loop should not be infinite: when the
- # last step of a major collection is done but
- # threshold_reached(totalsize) is still true, then
- # we should get a MemoryError from major_collection_step().
+ if self.threshold_reached(raw_malloc_usage(totalsize)):
+ self.minor_collection_with_major_progress(
+ raw_malloc_usage(totalsize) + self.nursery_size // 2)
#
# Check if the object would fit in the ArenaCollection.
# Also, an object allocated from ArenaCollection must be old.
@@ -1547,7 +1566,7 @@
# ----------
# Nursery collection
- def minor_collection(self):
+ def _minor_collection(self):
"""Perform a minor collection: find the objects from the nursery
that remain alive and move them out."""
#
@@ -1718,6 +1737,10 @@
self.old_objects_pointing_to_pinned.foreach(
self._reset_flag_old_objects_pointing_to_pinned, None)
#
+ # Accounting: 'nursery_surviving_size' is the size of objects
+ # from the nursery that we just moved out.
+ self.size_objects_made_old += r_uint(self.nursery_surviving_size)
+ #
debug_print("minor collect, total memory used:",
self.get_total_memory_used())
debug_print("number of pinned objects:",
@@ -1958,6 +1981,7 @@
self.header(obj).tid &= ~GCFLAG_HAS_SHADOW
#
totalsize = size_gc_header + self.get_size(obj)
+ self.nursery_surviving_size += raw_malloc_usage(totalsize)
#
# Copy it. Note that references to other objects in the
# nursery are kept unchanged in this step.
@@ -2002,6 +2026,11 @@
return
hdr.tid |= GCFLAG_VISITED_RMY
#
+ # Accounting
+ size_gc_header = self.gcheaderbuilder.size_gc_header
+ size = size_gc_header + self.get_size(obj)
+ self.size_objects_made_old += r_uint(raw_malloc_usage(size))
+ #
# we just made 'obj' old, so we need to add it to the correct lists
added_somewhere = False
#
@@ -2084,14 +2113,14 @@
def gc_step_until(self, state):
while self.gc_state != state:
- self.minor_collection()
+ self._minor_collection()
self.major_collection_step()
debug_gc_step_until = gc_step_until # xxx
def debug_gc_step(self, n=1):
while n > 0:
- self.minor_collection()
+ self._minor_collection()
self.major_collection_step()
n -= 1
@@ -2111,37 +2140,44 @@
self.debug_check_consistency()
#
+ # 'threshold_objects_made_old', is used inside comparisons
+ # with 'size_objects_made_old' to know when we must do
+ # several major GC steps (i.e. several consecurive calls
+ # to the present function). Here is the target that
+ # we try to aim to: either (A1) or (A2)
+ #
+ # (A1) gc_state == STATE_SCANNING (i.e. major GC cycle ended)
+ # (A2) size_objects_made_old <= threshold_objects_made_old
+ #
# Every call to major_collection_step() adds nursery_size//2
- # to the threshold. It is reset at the end of this function
- # when the major collection is fully finished.
- #
+ # to 'threshold_objects_made_old'.
# In the common case, this is larger than the size of all
# objects that survive a minor collection. After a few
# minor collections (each followed by one call to
# major_collection_step()) the threshold is much higher than
- # the currently-in-use old memory. Then threshold_reached()
- # won't be true again until the major collection fully
- # finishes, time passes, and it's time for the next major
- # collection.
+ # the 'size_objects_made_old', making the target invariant (A2)
+ # true by a large margin.
#
# However there are less common cases:
#
- # * if more than half of the nursery consistently survives: we
- # call major_collection_step() twice after a minor
- # collection;
+ # * if more than half of the nursery consistently survives:
+ # then we need two calls to major_collection_step() after
+ # some minor collection;
#
# * or if we're allocating a large number of bytes in
- # external_malloc(). In that case, we are likely to reach
- # again the threshold_reached() case, and more major
- # collection steps will be done immediately until
- # threshold_reached() returns false.
+ # external_malloc() and some of them survive the following
+ # minor collection. In that case, more than two major
+ # collection steps must be done immediately, until we
+ # restore the target invariant (A2).
#
- self.next_major_collection_threshold += self.nursery_size // 2
+ self.threshold_objects_made_old += r_uint(self.nursery_size // 2)
- # XXX currently very coarse increments, get this working then split
- # to smaller increments using stacks for resuming
if self.gc_state == STATE_SCANNING:
+ # starting a major GC cycle: reset these two counters
+ self.size_objects_made_old = r_uint(0)
+ self.threshold_objects_made_old = r_uint(self.nursery_size // 2)
+
self.objects_to_trace = self.AddressStack()
self.collect_roots()
self.gc_state = STATE_MARKING
diff --git a/rpython/memory/gc/test/test_object_pinning.py
b/rpython/memory/gc/test/test_object_pinning.py
--- a/rpython/memory/gc/test/test_object_pinning.py
+++ b/rpython/memory/gc/test/test_object_pinning.py
@@ -19,6 +19,8 @@
BaseDirectGCTest.setup_method(self, meth)
max = getattr(meth, 'max_number_of_pinned_objects', 20)
self.gc.max_number_of_pinned_objects = max
+ if not hasattr(self.gc, 'minor_collection'):
+ self.gc.minor_collection = self.gc._minor_collection
def test_pin_can_move(self):
# even a pinned object is considered to be movable. Only the caller
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit