Avoid unbounded memory consumption when running `manage.py test`

Matt McClure Fri, 26 Jul 2013 07:04:16 -0700

TestSuite holds references to each TestCase instance. So attributes of my 
TestCase subclasses don't get freed by the garbage collector until the last 
reference to TestSuite disappears, which isn't until the entire test run 
ends. In a large test suite, the test run exhausts memory and the OS kills 
it before the suite finishes.


The most obvious solution to me is to put tearDown methods in all my 
TestCase subclasses that delete the references to their own attributes. But 
it seems like there has to be a more general, automatic solution. I can 
start to answer my own question. But I'm interested to know if others run 
into similar problems and how you solve them.

It looks like there are at least two participants that hold TestCase 
references longer than desirable.

1.  TestSuite. Here's a minimal hack to release the reference that 
TestSuite would otherwise hold after a TestCase runs until the remainder of 
the suite had finished.

diff --git a/django/utils/unittest/suite.py b/django/utils/unittest/suite.py
index f39569b..8530200 100644
--- a/django/utils/unittest/suite.py
+++ b/django/utils/unittest/suite.py
@@ -1,5 +1,6 @@
 """TestSuite"""
 
+import gc
 import sys
 import unittest
 from django.utils.unittest import case, util
@@ -96,7 +97,11 @@ class TestSuite(BaseTestSuite):
     ################################
     # private methods
     def _wrapped_run(self, result, debug=False):
-        for test in self:
+        while True:
+            try:
+                test = self._tests.pop(0)
+            except IndexError:
+                break
             if result.shouldStop:
                 break
 
@@ -116,6 +121,7 @@ class TestSuite(BaseTestSuite):
                 test(result)
             else:
                 test.debug()
+            #gc.collect()
 
     def _handleClassSetUp(self, test, result):
         previousClass = getattr(result, '_previousTestClass', None)

I believe gc.collect() in the above is unnecessary. Calling it explicitly 
accelerates the collection, but the automatic collector will free the 
TestCase from memory soon enough as long as we can manage its lifetime by 
eliminating all the references to it.

2. TestResult and subclasses may hold references to TestCase instances via 
addSuccess() and friends. For example, 
django.utils.unittest.result.TestResult.addError does:

        self.errors.append((test, self._exc_info_to_string(err, test)))

In my current scenario, I'm using unittest-xml-reporting's TestResult 
subclass, and the following patch eliminates the references to TestCase 
instances that it would otherwise save.

--- __init__.py~ 2013-05-02 16:45:19.000000000 -0400
+++ __init__.py 2013-07-18 21:39:13.000000000 -0400
@@ -8,6 +8,9 @@
 import os
 import sys
 import time
+
+import psutil
+
 from unittest import TestResult, _TextTestResult, TextTestRunner
 
 try:
@@ -27,13 +30,24 @@
         self.delegate = delegate
 
     def write(self, text):
-        self._captured.write(text)
+        #self._captured.write(text)
         self.delegate.write(text)
 
     def __getattr__(self, attr):
         return getattr(self._captured, attr)
 
 
+def testcase_name(test_method):
+    testcase = type(test_method)
+
+    # Ignore module name if it is '__main__'
+    module = testcase.__module__ + '.'
+    if module == '__main__.':
+        module = ''
+    result = module + testcase.__name__
+    return result
+
+
 class _TestInfo(object):
     """This class keeps useful information about the execution of a
     test method.
@@ -44,11 +58,21 @@
 
     def __init__(self, test_result, test_method, outcome=SUCCESS, 
err=None):
         self.test_result = test_result
-        self.test_method = test_method
         self.outcome = outcome
         self.elapsed_time = 0
         self.err = err
 
+        #self.test_method = test_method
+        self.test_description = 
self.test_result.getDescription(test_method)
+        self.test_exception_info = (
+            '' if not self.err
+            else self.test_result._exc_info_to_string(
+                    self.err, test_method)
+        )
+
+        self.test_name = testcase_name(test_method)
+        self.test_id = test_method.id()
+
     def test_finished(self):
         """Save info that can only be calculated once a test has run.
         """
@@ -58,16 +82,18 @@
     def get_description(self):
         """Return a text representation of the test method.
         """
-        return self.test_result.getDescription(self.test_method)
+        #return self.test_result.getDescription(self.test_method)
+        return self.test_description
 
     def get_error_info(self):
         """Return a text representation of an exception thrown by a test
         method.
         """
-        if not self.err:
-            return ''
-        return self.test_result._exc_info_to_string(self.err, \
-            self.test_method)
+        #if not self.err:
+        #    return ''
+        #return self.test_result._exc_info_to_string(self.err, \
+        #    self.test_method)
+        return self.test_exception_info
 
 
 class _XMLTestResult(_TextTestResult):
@@ -81,6 +107,8 @@
         self.successes = []
         self.callback = None
         self.elapsed_times = elapsed_times
+        self.start_meminfo = psutil.Process(os.getpid()).get_memory_info()
+        self.stop_meminfo = psutil.Process(os.getpid()).get_memory_info()
 
     def _prepare_callback(self, test_info, target_list, verbose_str,
         short_str):
@@ -103,6 +131,10 @@
             if self.showAll:
                 self.stream.writeln('%s (%.3fs)' % \
                     (verbose_str, test_info.elapsed_time))
+                self.stop_meminfo = 
psutil.Process(os.getpid()).get_memory_info()
+                self.delta_stop_meminfo = { 'rss': self.stop_meminfo.rss - 
self.start_meminfo.rss,
+                                            'vms': self.stop_meminfo.vms - 
self.start_meminfo.vms }
+                self.stream.write('on stop: status: {0}, delta: 
{1}\n'.format(self.stop_meminfo, self.delta_stop_meminfo))
             elif self.dots:
                 self.stream.write(short_str)
         self.callback = callback
@@ -114,6 +146,10 @@
         TestResult.startTest(self, test)
 
         if self.showAll:
+            self.start_meminfo = 
psutil.Process(os.getpid()).get_memory_info()
+            self.delta_start_meminfo = { 'rss': self.start_meminfo.rss - 
self.stop_meminfo.rss,
+                                         'vms': self.start_meminfo.vms - 
self.stop_meminfo.vms }
+            self.stream.write('on start: status: {0}, delta: 
{1}\n'.format(self.start_meminfo, self.delta_start_meminfo))
             self.stream.write('  ' + self.getDescription(test))
             self.stream.write(" ... ")
 
@@ -165,14 +201,7 @@
 
         for tests in (self.successes, self.failures, self.errors):
             for test_info in tests:
-                testcase = type(test_info.test_method)
-
-                # Ignore module name if it is '__main__'
-                module = testcase.__module__ + '.'
-                if module == '__main__.':
-                    module = ''
-                testcase_name = module + testcase.__name__
-
+                testcase_name = test_info.test_name
                 if not testcase_name in tests_by_testcase:
                     tests_by_testcase[testcase_name] = []
                 tests_by_testcase[testcase_name].append(test_info)
@@ -201,10 +230,9 @@
 
     _report_testsuite = staticmethod(_report_testsuite)
 
-    def _test_method_name(test_method):
+    def _test_method_name(test_id):
         """Returns the test method name.
         """
-        test_id = test_method.id()
         return test_id.split('.')[-1]
 
     _test_method_name = staticmethod(_test_method_name)
@@ -217,7 +245,7 @@
 
         testcase.setAttribute('classname', suite_name)
         testcase.setAttribute('name', \
-            _XMLTestResult._test_method_name(test_result.test_method))
+            _XMLTestResult._test_method_name(test_result.test_id))
         testcase.setAttribute('time', '%.3f' % test_result.elapsed_time)
 
         if (test_result.outcome != _TestInfo.SUCCESS):

With these two changes, I can see memory fluctuate, increasing with each 
test case run, and decreasing less frequently, when the automatic garbage 
collector frees the memory used by finished TestCase instances.

Matt

I posted this on django-users[1] a week ago, but no one has replied.

[1]: https://groups.google.com/forum/#!topic/django-users/_MgmdnOZ4i0

-- 
You received this message because you are subscribed to the Google Groups 
"Django developers" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/django-developers.
For more options, visit https://groups.google.com/groups/opt_out.

Avoid unbounded memory consumption when running `manage.py test`

Reply via email to