Michael Droettboom wrote:
> Can you provide a complete, standalone example that reproduces the 
> problem. Otherwise all I can do is guess.
> 
> The usual culprit is forgetting to close figures after you're done with 
> them.

Thanks, I learned that through matplotlib-1.3.0 give spit over me a warning 
message some weeks
ago. Yes, i do call _figure.clear() and pylab.clf()  but only after the 
savefig() returns, which
is not the case here. Also use gc.collect() a lot through the code, especially 
before and after
I draw every figure. That is not enough here.





from itertools import izip, imap, ifilter
import pylab
import matplotlib
# Force matplotlib not to use any X-windows backend.
matplotlib.use('Agg')
import pylab

F = pylab.gcf()

# convert the view of numpy array to tuple
# 
http://matplotlib.1069221.n5.nabble.com/RendererAgg-int-width-int-height-dpi-debug-False-ValueError-width-and-height-must-each-be-below-32768-td27756.html
DefaultSize = tuple(F.get_size_inches())



def draw_hist2d_plot(filename, mydata_x, mydata_y, colors, title_data, 
xlabel_data, ylabel_data, legends, legend_loc='upper right', 
legend_bbox_to_anchor=(1.0, 1.0), legend_ncol=None, xmin=None, xmax=None, 
ymin=None, ymax=None, fontsize=10, legend_fontsize=8, dpi=100, 
tight_layout=False, legend_inside=False, objsize=0.1):
    # hist2d(x, y, bins = None, range=None, weights=None, cmin=None, cmax=None 
**kwargs)

    if len(mydata_x) != len(mydata_y):
        raise ValueError, "%s: len(mydata_x) != len(mydata_y): %s != %s" % 
(filename, len(mydata_x), len(mydata_y))

    if colors and len(mydata_x) != len(colors):
        sys.stderr.write("Warning: draw_hist2d_plot(): %s: len(mydata_x) != 
len(colors): %s != %s.\n" % (filename, len(mydata_x), len(colors)))

    if colors and legends and len(colors) != len(legends):
        sys.stderr.write("Warning: draw_hist2d_plot(): %s, len(colors) != 
len(legends): %s != %s.\n" % (filename, len(colors), len(legends)))

    if mydata_x and mydata_y and filename:
        if legends:
            if not legend_ncol:
                _subfigs, _ax1_num, _ax2_num, _legend_ncol = get_ncol(legends, 
fontsize=legend_fontsize)
            else:
                _subfigs, _ax1_num, _ax2_num, _legend_ncol = 3, 213, 313, 
legend_ncol
        else:
            _subfigs, _ax1_num, _legend_ncol = 3, 313, 0

        set_my_pylab_defaults()
        pylab.clf()
        _figure = pylab.figure()
        _figure.clear()
        _figure.set_tight_layout(True)
        gc.collect()

        if legends:
            # do not crash on too tall figures
            if 8.4 * _subfigs < 200:
                _figure.set_size_inches(11.2, 8.4 * (_subfigs + 1))
            else:
                # _figure.set_size_inches() silently accepts a large value but 
later on _figure.savefig() crashes with:
                # ValueError: width and height must each be below 32768
                _figure.set_size_inches(11.2, 200)
                sys.stderr.write("Warning: draw_hist2d_plot(): Wanted to set %s 
figure height to %s but is too high, forcing %s instead. You will likely get an 
incomplete image.\n" % (filename, 8.4 * _subfigs, 200))
            if myoptions.debug > 5: print "Debug: draw_hist2d_plot(): Changed 
%s figure size to: %s" % (filename, str(_figure.get_size_inches()))
            _ax1 = _figure.add_subplot(_ax1_num)
            _ax2 = _figure.add_subplot(_ax2_num)
        else:
            _figure.set_size_inches(11.2, 8.4 * 2)
            _ax1 = _figure.gca()
        if myoptions.debug > 5: print "Debug: draw_hist2d_plot(): Changed %s 
figure size to: %s" % (filename, str(_figure.get_size_inches()))

        _series = []
        #for _x, _y, _c, _l in izip(mydata_x, mydata_y, colors, legends):
        for _x, _y, _c in izip(mydata_x, mydata_y, colors):
            # _Line2D = _ax1.plot(_x, _y) # returns Line2D object
            _my_PathCollection = _ax1.scatter(_x, _y, color=_c, s=objsize) # , 
label=_l) # returns PathCollection object
            _series.append(_my_PathCollection)

        if legends:
            #for _x, _y, _c, _l in izip(mydata_x, mydata_y, colors, legends):
            for _x, _y, _c in izip(mydata_x, mydata_y, colors):
                _my_PathCollection = _ax1.scatter(_x, _y, color=_c, s=objsize) 
# , label=_l)
                _series.append(_my_PathCollection)

            _ax2.legend(_series, legends, loc='upper left', 
bbox_to_anchor=(0,0,1,1), borderaxespad=0., ncol=_legend_ncol, mode='expand', 
fontsize=legend_fontsize)
            _ax2.set_frame_on(False)
            _ax2.tick_params(bottom='off', left='off', right='off', top='off')
            pylab.setp(_ax2.get_yticklabels(), visible=False)
            pylab.setp(_ax2.get_xticklabels(), visible=False)
        else:
            for _x, _y, _c in izip(mydata_x, mydata_y, colors):
                _ax1.scatter(_x, _y, color=_c, s=objsize) #, marker='^') # 
keeps eating memory in:
                #
                #     draw_hist2d_plot(filename, _data_xrow, _data_yrow, 
_my_colors, _title, _xlabel, _ylabel, [], xmin=None, xmax=None, ymin=None, 
ymax=None, fontsize=10, dpi=100)
                #  File "/blah.py", line 14080, in draw_hist2d_plot
                #     _ax1.scatter(_x, _y, color=_c, s=objsize) #, marker='^')
                #  File 
"/usr/lib64/python2.7/site-packages/matplotlib/axes.py", line 6247, in scatter
                #     self._process_unit_info(xdata=x, ydata=y, kwargs=kwargs)
                #  File 
"/usr/lib64/python2.7/site-packages/matplotlib/axes.py", line 1685, in 
_process_unit_info
                #     self.xaxis.update_units(xdata)
                #  File 
"/usr/lib64/python2.7/site-packages/matplotlib/axis.py", line 1332, in 
update_units
                #     converter = munits.registry.get_converter(data)

            # pylab.subplots_adjust(left = (5/25.4)/_figure.xsize, bottom = 
(4/25.4)/_figure.ysize, right = 1 - (1/25.4)/_figure.xsize, top = 1 - 
(3/25.4)/_figure.ysize)

        _ax1.set_xlabel(xlabel_data, fontsize=fontsize)
        _ax1.set_ylabel(ylabel_data, fontsize=fontsize)
        _ax1.set_xmargin(0.05)
        _ax1.set_ymargin(0.05)
        _ax1.set_autoscale_on(False)


        set_limits(_ax1, xmin, xmax, ymin, ymax)

        if fontsize == 10:
            _ax1.set_title('\n'.join(wrap(title_data, 100)), 
fontsize=fontsize+2)
        elif fontsize == 12:
            _ax1.set_title('\n'.join(wrap(title_data, 90)), fontsize=fontsize+2)
        else:
            _ax1.set_title('\n'.join(wrap(title_data, 100)), 
fontsize=fontsize+2)

        if legends:
            _figure.savefig(filename, dpi=100) #, bbox_inches='tight')
            del(_my_PathCollection)
            del(_ax2)
        else:
            _figure.savefig(filename, dpi=100)

        del(_series)
        del(_ax1)
        _figure.clear()
        del(_figure)
        pylab.clf()
        pylab.close()
        # pylab.rcdefaults()

        gc.collect()



That's the whole function. I used to suspect _ax1.scatter() in the past but 
probably
only because I hit the memory problems earlier. That is worked around now by 
using
on disk bsddb3 file or gdbm somewhere upstream. This particular function is 
nevertheless
fed with just a huge list numbers, and that is not the issue in itself.

I would be glad if I could tell matplotlib: Here you have 100 colors, use them 
for all data
as you wish, just spread them evenly over the whole dataset so that first 
1/100th of the data
gets the first color, second 1/100th of the data gets the second color, and so 
on. Optionally,
if you would like to say: use the 100 colors in cycles for all data points, 
just loop through
the colors as long as you need some. In both scenarios, I could have avoided 
the two for loops
in the above code and necessity to generate those objects. Same for legend 
stuff.

Martin

> 
> Mike
> 
> On 10/10/2013 09:05 AM, Martin MOKREJŠ wrote:
>> Hi,
>>    rendering some of my charts takes almost 50GB of RAM. I believe below is 
>> a stracktrace
>> of one such situation when it already took 15GB. Would somebody comments on 
>> what is
>> matplotlib doing at the very moment? Why the recursion?
>>
>>    The charts had to have 262422 data points in a 2D scatter plot, each 
>> point has assigned
>> its own color. They are in batches so that there are 153 distinct colors but 
>> nevertheless,
>> I assigned to each data point a color value. There are 153 legend items also 
>> (one color
>> won't be used).
>>
>> ^CTraceback (most recent call last):
>> ...
>>      _figure.savefig(filename, dpi=100)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/figure.py", line 
>> 1421, in savefig
>>      self.canvas.print_figure(*args, **kwargs)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/backend_bases.py", 
>> line 2220, in print_figure
>>      **kwargs)
>>    File 
>> "/usr/lib64/python2.7/site-packages/matplotlib/backends/backend_agg.py", 
>> line 505, in print_png
>>      FigureCanvasAgg.draw(self)
>>    File 
>> "/usr/lib64/python2.7/site-packages/matplotlib/backends/backend_agg.py", 
>> line 451, in draw
>>      self.figure.draw(self.renderer)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, 
>> in draw_wrapper
>>      draw(artist, renderer, *args, **kwargs)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/figure.py", line 
>> 1034, in draw
>>      func(*args)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, 
>> in draw_wrapper
>>      draw(artist, renderer, *args, **kwargs)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/axes.py", line 2086, 
>> in draw
>>      a.draw(renderer)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, 
>> in draw_wrapper
>>      draw(artist, renderer, *args, **kwargs)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/collections.py", line 
>> 718, in draw
>>      return Collection.draw(self, renderer)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, 
>> in draw_wrapper
>>      draw(artist, renderer, *args, **kwargs)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/collections.py", line 
>> 276, in draw
>>      offsets, transOffset, self.get_facecolor(), self.get_edgecolor(),
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/collections.py", line 
>> 551, in get_edgecolor
>>      return self._edgecolors
>> KeyboardInterrupt
>> ^CError in atexit._run_exitfuncs:
>> Traceback (most recent call last):
>>    File "/usr/lib64/python2.7/atexit.py", line 24, in _run_exitfuncs
>>      func(*targs, **kargs)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/_pylab_helpers.py", 
>> line 90, in destroy_all
>>      gc.collect()
>> KeyboardInterrupt
>> Error in sys.exitfunc:
>> Traceback (most recent call last):
>>    File "/usr/lib64/python2.7/atexit.py", line 24, in _run_exitfuncs
>>      func(*targs, **kargs)
>>    File "/usr/lib64/python2.7/site-packages/matplotlib/_pylab_helpers.py", 
>> line 90, in destroy_all
>>      gc.collect()
>> KeyboardInterrupt
>>
>> ^C
>>
>>
>> Clues what is the code doing? I use mpl-1.3.0.
>> Thank you,
>> Martin
>>
>> ------------------------------------------------------------------------------
>> October Webinars: Code for Performance
>> Free Intel webinars can help you accelerate application performance.
>> Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from
>> the latest Intel processors and coprocessors. See abstracts and register >
>> http://pubads.g.doubleclick.net/gampad/clk?id=60134071&iu=/4140/ostg.clktrk
>> _______________________________________________
>> Matplotlib-users mailing list
>> Matplotlib-users@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/matplotlib-users
> 
> 

-- 
Martin Mokrejs, Ph.D.
Bioinformatics
Donovalska 1658
149 00 Prague
Czech Republic
http://www.iresite.org
http://www.iresite.org/~mmokrejs

------------------------------------------------------------------------------
October Webinars: Code for Performance
Free Intel webinars can help you accelerate application performance.
Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from 
the latest Intel processors and coprocessors. See abstracts and register >
http://pubads.g.doubleclick.net/gampad/clk?id=60134071&iu=/4140/ostg.clktrk
_______________________________________________
Matplotlib-users mailing list
Matplotlib-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/matplotlib-users

Reply via email to