Michael Droettboom wrote: > Can you provide a complete, standalone example that reproduces the > problem. Otherwise all I can do is guess. > > The usual culprit is forgetting to close figures after you're done with > them.
Thanks, I learned that through matplotlib-1.3.0 give spit over me a warning message some weeks ago. Yes, i do call _figure.clear() and pylab.clf() but only after the savefig() returns, which is not the case here. Also use gc.collect() a lot through the code, especially before and after I draw every figure. That is not enough here. from itertools import izip, imap, ifilter import pylab import matplotlib # Force matplotlib not to use any X-windows backend. matplotlib.use('Agg') import pylab F = pylab.gcf() # convert the view of numpy array to tuple # http://matplotlib.1069221.n5.nabble.com/RendererAgg-int-width-int-height-dpi-debug-False-ValueError-width-and-height-must-each-be-below-32768-td27756.html DefaultSize = tuple(F.get_size_inches()) def draw_hist2d_plot(filename, mydata_x, mydata_y, colors, title_data, xlabel_data, ylabel_data, legends, legend_loc='upper right', legend_bbox_to_anchor=(1.0, 1.0), legend_ncol=None, xmin=None, xmax=None, ymin=None, ymax=None, fontsize=10, legend_fontsize=8, dpi=100, tight_layout=False, legend_inside=False, objsize=0.1): # hist2d(x, y, bins = None, range=None, weights=None, cmin=None, cmax=None **kwargs) if len(mydata_x) != len(mydata_y): raise ValueError, "%s: len(mydata_x) != len(mydata_y): %s != %s" % (filename, len(mydata_x), len(mydata_y)) if colors and len(mydata_x) != len(colors): sys.stderr.write("Warning: draw_hist2d_plot(): %s: len(mydata_x) != len(colors): %s != %s.\n" % (filename, len(mydata_x), len(colors))) if colors and legends and len(colors) != len(legends): sys.stderr.write("Warning: draw_hist2d_plot(): %s, len(colors) != len(legends): %s != %s.\n" % (filename, len(colors), len(legends))) if mydata_x and mydata_y and filename: if legends: if not legend_ncol: _subfigs, _ax1_num, _ax2_num, _legend_ncol = get_ncol(legends, fontsize=legend_fontsize) else: _subfigs, _ax1_num, _ax2_num, _legend_ncol = 3, 213, 313, legend_ncol else: _subfigs, _ax1_num, _legend_ncol = 3, 313, 0 set_my_pylab_defaults() pylab.clf() _figure = pylab.figure() _figure.clear() _figure.set_tight_layout(True) gc.collect() if legends: # do not crash on too tall figures if 8.4 * _subfigs < 200: _figure.set_size_inches(11.2, 8.4 * (_subfigs + 1)) else: # _figure.set_size_inches() silently accepts a large value but later on _figure.savefig() crashes with: # ValueError: width and height must each be below 32768 _figure.set_size_inches(11.2, 200) sys.stderr.write("Warning: draw_hist2d_plot(): Wanted to set %s figure height to %s but is too high, forcing %s instead. You will likely get an incomplete image.\n" % (filename, 8.4 * _subfigs, 200)) if myoptions.debug > 5: print "Debug: draw_hist2d_plot(): Changed %s figure size to: %s" % (filename, str(_figure.get_size_inches())) _ax1 = _figure.add_subplot(_ax1_num) _ax2 = _figure.add_subplot(_ax2_num) else: _figure.set_size_inches(11.2, 8.4 * 2) _ax1 = _figure.gca() if myoptions.debug > 5: print "Debug: draw_hist2d_plot(): Changed %s figure size to: %s" % (filename, str(_figure.get_size_inches())) _series = [] #for _x, _y, _c, _l in izip(mydata_x, mydata_y, colors, legends): for _x, _y, _c in izip(mydata_x, mydata_y, colors): # _Line2D = _ax1.plot(_x, _y) # returns Line2D object _my_PathCollection = _ax1.scatter(_x, _y, color=_c, s=objsize) # , label=_l) # returns PathCollection object _series.append(_my_PathCollection) if legends: #for _x, _y, _c, _l in izip(mydata_x, mydata_y, colors, legends): for _x, _y, _c in izip(mydata_x, mydata_y, colors): _my_PathCollection = _ax1.scatter(_x, _y, color=_c, s=objsize) # , label=_l) _series.append(_my_PathCollection) _ax2.legend(_series, legends, loc='upper left', bbox_to_anchor=(0,0,1,1), borderaxespad=0., ncol=_legend_ncol, mode='expand', fontsize=legend_fontsize) _ax2.set_frame_on(False) _ax2.tick_params(bottom='off', left='off', right='off', top='off') pylab.setp(_ax2.get_yticklabels(), visible=False) pylab.setp(_ax2.get_xticklabels(), visible=False) else: for _x, _y, _c in izip(mydata_x, mydata_y, colors): _ax1.scatter(_x, _y, color=_c, s=objsize) #, marker='^') # keeps eating memory in: # # draw_hist2d_plot(filename, _data_xrow, _data_yrow, _my_colors, _title, _xlabel, _ylabel, [], xmin=None, xmax=None, ymin=None, ymax=None, fontsize=10, dpi=100) # File "/blah.py", line 14080, in draw_hist2d_plot # _ax1.scatter(_x, _y, color=_c, s=objsize) #, marker='^') # File "/usr/lib64/python2.7/site-packages/matplotlib/axes.py", line 6247, in scatter # self._process_unit_info(xdata=x, ydata=y, kwargs=kwargs) # File "/usr/lib64/python2.7/site-packages/matplotlib/axes.py", line 1685, in _process_unit_info # self.xaxis.update_units(xdata) # File "/usr/lib64/python2.7/site-packages/matplotlib/axis.py", line 1332, in update_units # converter = munits.registry.get_converter(data) # pylab.subplots_adjust(left = (5/25.4)/_figure.xsize, bottom = (4/25.4)/_figure.ysize, right = 1 - (1/25.4)/_figure.xsize, top = 1 - (3/25.4)/_figure.ysize) _ax1.set_xlabel(xlabel_data, fontsize=fontsize) _ax1.set_ylabel(ylabel_data, fontsize=fontsize) _ax1.set_xmargin(0.05) _ax1.set_ymargin(0.05) _ax1.set_autoscale_on(False) set_limits(_ax1, xmin, xmax, ymin, ymax) if fontsize == 10: _ax1.set_title('\n'.join(wrap(title_data, 100)), fontsize=fontsize+2) elif fontsize == 12: _ax1.set_title('\n'.join(wrap(title_data, 90)), fontsize=fontsize+2) else: _ax1.set_title('\n'.join(wrap(title_data, 100)), fontsize=fontsize+2) if legends: _figure.savefig(filename, dpi=100) #, bbox_inches='tight') del(_my_PathCollection) del(_ax2) else: _figure.savefig(filename, dpi=100) del(_series) del(_ax1) _figure.clear() del(_figure) pylab.clf() pylab.close() # pylab.rcdefaults() gc.collect() That's the whole function. I used to suspect _ax1.scatter() in the past but probably only because I hit the memory problems earlier. That is worked around now by using on disk bsddb3 file or gdbm somewhere upstream. This particular function is nevertheless fed with just a huge list numbers, and that is not the issue in itself. I would be glad if I could tell matplotlib: Here you have 100 colors, use them for all data as you wish, just spread them evenly over the whole dataset so that first 1/100th of the data gets the first color, second 1/100th of the data gets the second color, and so on. Optionally, if you would like to say: use the 100 colors in cycles for all data points, just loop through the colors as long as you need some. In both scenarios, I could have avoided the two for loops in the above code and necessity to generate those objects. Same for legend stuff. Martin > > Mike > > On 10/10/2013 09:05 AM, Martin MOKREJŠ wrote: >> Hi, >> rendering some of my charts takes almost 50GB of RAM. I believe below is >> a stracktrace >> of one such situation when it already took 15GB. Would somebody comments on >> what is >> matplotlib doing at the very moment? Why the recursion? >> >> The charts had to have 262422 data points in a 2D scatter plot, each >> point has assigned >> its own color. They are in batches so that there are 153 distinct colors but >> nevertheless, >> I assigned to each data point a color value. There are 153 legend items also >> (one color >> won't be used). >> >> ^CTraceback (most recent call last): >> ... >> _figure.savefig(filename, dpi=100) >> File "/usr/lib64/python2.7/site-packages/matplotlib/figure.py", line >> 1421, in savefig >> self.canvas.print_figure(*args, **kwargs) >> File "/usr/lib64/python2.7/site-packages/matplotlib/backend_bases.py", >> line 2220, in print_figure >> **kwargs) >> File >> "/usr/lib64/python2.7/site-packages/matplotlib/backends/backend_agg.py", >> line 505, in print_png >> FigureCanvasAgg.draw(self) >> File >> "/usr/lib64/python2.7/site-packages/matplotlib/backends/backend_agg.py", >> line 451, in draw >> self.figure.draw(self.renderer) >> File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, >> in draw_wrapper >> draw(artist, renderer, *args, **kwargs) >> File "/usr/lib64/python2.7/site-packages/matplotlib/figure.py", line >> 1034, in draw >> func(*args) >> File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, >> in draw_wrapper >> draw(artist, renderer, *args, **kwargs) >> File "/usr/lib64/python2.7/site-packages/matplotlib/axes.py", line 2086, >> in draw >> a.draw(renderer) >> File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, >> in draw_wrapper >> draw(artist, renderer, *args, **kwargs) >> File "/usr/lib64/python2.7/site-packages/matplotlib/collections.py", line >> 718, in draw >> return Collection.draw(self, renderer) >> File "/usr/lib64/python2.7/site-packages/matplotlib/artist.py", line 54, >> in draw_wrapper >> draw(artist, renderer, *args, **kwargs) >> File "/usr/lib64/python2.7/site-packages/matplotlib/collections.py", line >> 276, in draw >> offsets, transOffset, self.get_facecolor(), self.get_edgecolor(), >> File "/usr/lib64/python2.7/site-packages/matplotlib/collections.py", line >> 551, in get_edgecolor >> return self._edgecolors >> KeyboardInterrupt >> ^CError in atexit._run_exitfuncs: >> Traceback (most recent call last): >> File "/usr/lib64/python2.7/atexit.py", line 24, in _run_exitfuncs >> func(*targs, **kargs) >> File "/usr/lib64/python2.7/site-packages/matplotlib/_pylab_helpers.py", >> line 90, in destroy_all >> gc.collect() >> KeyboardInterrupt >> Error in sys.exitfunc: >> Traceback (most recent call last): >> File "/usr/lib64/python2.7/atexit.py", line 24, in _run_exitfuncs >> func(*targs, **kargs) >> File "/usr/lib64/python2.7/site-packages/matplotlib/_pylab_helpers.py", >> line 90, in destroy_all >> gc.collect() >> KeyboardInterrupt >> >> ^C >> >> >> Clues what is the code doing? I use mpl-1.3.0. >> Thank you, >> Martin >> >> ------------------------------------------------------------------------------ >> October Webinars: Code for Performance >> Free Intel webinars can help you accelerate application performance. >> Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from >> the latest Intel processors and coprocessors. See abstracts and register > >> http://pubads.g.doubleclick.net/gampad/clk?id=60134071&iu=/4140/ostg.clktrk >> _______________________________________________ >> Matplotlib-users mailing list >> Matplotlib-users@lists.sourceforge.net >> https://lists.sourceforge.net/lists/listinfo/matplotlib-users > > -- Martin Mokrejs, Ph.D. Bioinformatics Donovalska 1658 149 00 Prague Czech Republic http://www.iresite.org http://www.iresite.org/~mmokrejs ------------------------------------------------------------------------------ October Webinars: Code for Performance Free Intel webinars can help you accelerate application performance. Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from the latest Intel processors and coprocessors. See abstracts and register > http://pubads.g.doubleclick.net/gampad/clk?id=60134071&iu=/4140/ostg.clktrk _______________________________________________ Matplotlib-users mailing list Matplotlib-users@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/matplotlib-users