Michael Droettboom wrote:
> Can you provide a complete, standalone example that reproduces the 
> problem. Otherwise all I can do is guess.
> The usual culprit is forgetting to close figures after you're done with 
> them.

Thanks, I learned that through matplotlib-1.3.0 give spit over me a warning 
message some weeks
ago. Yes, i do call _figure.clear() and pylab.clf()  but only after the 
savefig() returns, which
is not the case here. Also use gc.collect() a lot through the code, especially 
before and after
I draw every figure. That is not enough here.

from itertools import izip, imap, ifilter
import pylab
import matplotlib
# Force matplotlib not to use any X-windows backend.
import pylab

F = pylab.gcf()

# convert the view of numpy array to tuple
DefaultSize = tuple(F.get_size_inches())

def draw_hist2d_plot(filename, mydata_x, mydata_y, colors, title_data, 
xlabel_data, ylabel_data, legends, legend_loc='upper right', 
legend_bbox_to_anchor=(1.0, 1.0), legend_ncol=None, xmin=None, xmax=None, 
ymin=None, ymax=None, fontsize=10, legend_fontsize=8, dpi=100, 
tight_layout=False, legend_inside=False, objsize=0.1):
    # hist2d(x, y, bins = None, range=None, weights=None, cmin=None, cmax=None 

    if len(mydata_x) != len(mydata_y):
        raise ValueError, "%s: len(mydata_x) != len(mydata_y): %s != %s" % 
(filename, len(mydata_x), len(mydata_y))

    if colors and len(mydata_x) != len(colors):
        sys.stderr.write("Warning: draw_hist2d_plot(): %s: len(mydata_x) != 
len(colors): %s != %s.\n" % (filename, len(mydata_x), len(colors)))

    if colors and legends and len(colors) != len(legends):
        sys.stderr.write("Warning: draw_hist2d_plot(): %s, len(colors) != 
len(legends): %s != %s.\n" % (filename, len(colors), len(legends)))

    if mydata_x and mydata_y and filename:
        if legends:
            if not legend_ncol:
                _subfigs, _ax1_num, _ax2_num, _legend_ncol = get_ncol(legends, 
                _subfigs, _ax1_num, _ax2_num, _legend_ncol = 3, 213, 313, 
            _subfigs, _ax1_num, _legend_ncol = 3, 313, 0

        _figure = pylab.figure()

        if legends:
            # do not crash on too tall figures
            if 8.4 * _subfigs < 200:
                _figure.set_size_inches(11.2, 8.4 * (_subfigs + 1))
                # _figure.set_size_inches() silently accepts a large value but 
later on _figure.savefig() crashes with:
                # ValueError: width and height must each be below 32768
                _figure.set_size_inches(11.2, 200)
                sys.stderr.write("Warning: draw_hist2d_plot(): Wanted to set %s 
figure height to %s but is too high, forcing %s instead. You will likely get an 
incomplete image.\n" % (filename, 8.4 * _subfigs, 200))
            if myoptions.debug > 5: print "Debug: draw_hist2d_plot(): Changed 
%s figure size to: %s" % (filename, str(_figure.get_size_inches()))
            _ax1 = _figure.add_subplot(_ax1_num)
            _ax2 = _figure.add_subplot(_ax2_num)
            _figure.set_size_inches(11.2, 8.4 * 2)
            _ax1 = _figure.gca()
        if myoptions.debug > 5: print "Debug: draw_hist2d_plot(): Changed %s 
figure size to: %s" % (filename, str(_figure.get_size_inches()))

        _series = []
        #for _x, _y, _c, _l in izip(mydata_x, mydata_y, colors, legends):
        for _x, _y, _c in izip(mydata_x, mydata_y, colors):
            # _Line2D = _ax1.plot(_x, _y) # returns Line2D object
            _my_PathCollection = _ax1.scatter(_x, _y, color=_c, s=objsize) # , 
label=_l) # returns PathCollection object

        if legends:
            #for _x, _y, _c, _l in izip(mydata_x, mydata_y, colors, legends):
            for _x, _y, _c in izip(mydata_x, mydata_y, colors):
                _my_PathCollection = _ax1.scatter(_x, _y, color=_c, s=objsize) 
# , label=_l)

            _ax2.legend(_series, legends, loc='upper left', 
bbox_to_anchor=(0,0,1,1), borderaxespad=0., ncol=_legend_ncol, mode='expand', 
            _ax2.tick_params(bottom='off', left='off', right='off', top='off')
            pylab.setp(_ax2.get_yticklabels(), visible=False)
            pylab.setp(_ax2.get_xticklabels(), visible=False)
            for _x, _y, _c in izip(mydata_x, mydata_y, colors):
                _ax1.scatter(_x, _y, color=_c, s=objsize) #, marker='^') # 
keeps eating memory in:
                #     draw_hist2d_plot(filename, _data_xrow, _data_yrow, 
_my_colors, _title, _xlabel, _ylabel, [], xmin=None, xmax=None, ymin=None, 
ymax=None, fontsize=10, dpi=100)
                #  File "/blah.py", line 14080, in draw_hist2d_plot
                #     _ax1.scatter(_x, _y, color=_c, s=objsize) #, marker='^')
                #  File 
"/usr/lib64/python2.7/site-packages/matplotlib/axes.py", line 6247, in scatter
                #     self._process_unit_info(xdata=x, ydata=y, kwargs=kwargs)
                #  File 
"/usr/lib64/python2.7/site-packages/matplotlib/axes.py", line 1685, in 
                #     self.xaxis.update_units(xdata)
                #  File 
"/usr/lib64/python2.7/site-packages/matplotlib/axis.py", line 1332, in 
                #     converter = munits.registry.get_converter(data)

            # pylab.subplots_adjust(left = (5/25.4)/_figure.xsize, bottom = 
(4/25.4)/_figure.ysize, right = 1 - (1/25.4)/_figure.xsize, top = 1 - 

        _ax1.set_xlabel(xlabel_data, fontsize=fontsize)
        _ax1.set_ylabel(ylabel_data, fontsize=fontsize)

        set_limits(_ax1, xmin, xmax, ymin, ymax)

        if fontsize == 10:
            _ax1.set_title('\n'.join(wrap(title_data, 100)), 
        elif fontsize == 12:
            _ax1.set_title('\n'.join(wrap(title_data, 90)), fontsize=fontsize+2)
            _ax1.set_title('\n'.join(wrap(title_data, 100)), 

        if legends:
            _figure.savefig(filename, dpi=100) #, bbox_inches='tight')
            _figure.savefig(filename, dpi=100)

        # pylab.rcdefaults()


That's the whole function. I used to suspect _ax1.scatter() in the past but 
only because I hit the memory problems earlier. That is worked around now by 
on disk bsddb3 file or gdbm somewhere upstream. This particular function is 
fed with just a huge list numbers, and that is not the issue in itself.

I would be glad if I could tell matplotlib: Here you have 100 colors, use them 
for all data
as you wish, just spread them evenly over the whole dataset so that first 
1/100th of the data
gets the first color, second 1/100th of the data gets the second color, and so 
on. Optionally,
if you would like to say: use the 100 colors in cycles for all data points, 
just loop through
the colors as long as you need some. In both scenarios, I could have avoided 
the two for loops
in the above code and necessity to generate those objects. Same for legend 


> Mike
> On 10/10/2013 09:05 AM, Martin MOKREJŠ wrote:
>> Hi,
>>    rendering some of my charts takes almost 50GB of RAM. I believe below is 
>> a stracktrace
>> of one such situation when it already took 15GB. Would somebody comments on 
>> what is
>> matplotlib doing at the very moment? Why the recursion?
>>    The charts had to have 262422 data points in a 2D scatter plot, each 
>> point has assigned
>> its own color. They are in batches so that there are 153 distinct colors but 
>> nevertheless,
>> I assigned to each data point a color value. There are 153 legend items also 
>> (one color
>> won't be used).
>> ^CTraceback (most recent call last):
>> ...
>>      _figure.savefig(filename, dpi=100)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/figure.py", line 
>> 1421, in savefig
>>      self.canvas.print_figure(*args, **kwargs)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/backend_bases.py", 
>> line 2220, in print_figure
>>      **kwargs)
>>    File 
>> "/usr/lib64/python2.7/site-packages/matplotlib/backends/backend_agg.py", 
>> line 505, in print_png
>>      FigureCanvasAgg.draw(self)
>>    File 
>> "/usr/lib64/python2.7/site-packages/matplotlib/backends/backend_agg.py", 
>> line 451, in draw
>>      self.figure.draw(self.renderer)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, 
>> in draw_wrapper
>>      draw(artist, renderer, *args, **kwargs)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/figure.py", line 
>> 1034, in draw
>>      func(*args)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, 
>> in draw_wrapper
>>      draw(artist, renderer, *args, **kwargs)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/axes.py", line 2086, 
>> in draw
>>      a.draw(renderer)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, 
>> in draw_wrapper
>>      draw(artist, renderer, *args, **kwargs)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/collections.py", line 
>> 718, in draw
>>      return Collection.draw(self, renderer)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, 
>> in draw_wrapper
>>      draw(artist, renderer, *args, **kwargs)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/collections.py", line 
>> 276, in draw
>>      offsets, transOffset, self.get_facecolor(), self.get_edgecolor(),
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/collections.py", line 
>> 551, in get_edgecolor
>>      return self._edgecolors
>> KeyboardInterrupt
>> ^CError in atexit._run_exitfuncs:
>> Traceback (most recent call last):
>>    File "/usr/lib64/python2.7/atexit.py", line 24, in _run_exitfuncs
>>      func(*targs, **kargs)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/_pylab_helpers.py", 
>> line 90, in destroy_all
>>      gc.collect()
>> KeyboardInterrupt
>> Error in sys.exitfunc:
>> Traceback (most recent call last):
>>    File "/usr/lib64/python2.7/atexit.py", line 24, in _run_exitfuncs
>>      func(*targs, **kargs)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/_pylab_helpers.py", 
>> line 90, in destroy_all
>>      gc.collect()
>> KeyboardInterrupt
>> ^C
>> Clues what is the code doing? I use mpl-1.3.0.
>> Thank you,
>> Martin
Martin Mokrejs, Ph.D.
Donovalska 1658
149 00 Prague
Czech Republic

