Hi!
I'm using matplotlib and pygtk to write a viewer for multiple sequence
alignments. (Big matrices (~100x1000) of characters, usually colored by
cell according to chemical properties). Now I've run into trouble since
the rendering of the actual characters is very slow. I have included a
small example that illustrates this. Set SHOW_LETTERS = False to see the
change in rendering speed. The difference is even greater when using
real data, so I would very much appreciate suggestions as to how I could
improve it.
My strategy:
I create an image with the same dimension as the msa, and where each
pixel is colored according to properties of the corresponding character
in the msa. I plot this in the background of the figure using imshow().
At each redraw (refresh, resize, zoom...) I determine the size that each
image pixel occupies at the current canvas size, and pick a font where
one letter will fit inside this square. Then I put the corresponding msa
letter into this square using text()/add_artist().
Would it be possible to use only one Text instance that would hold all
letters, and somehow stretch that to the desired size? If so, could I
expect better performance?
Does anyone have a better idea?
Thanks for an awesome plotting package!
/Joel Hedlund
# Change this to see change in rendering speed:
SHOW_LETTERS = False
import matplotlib
matplotlib.use('GTKAgg')
import pylab
from numpy import array, uint8
RED = array([229, 51, 25], uint8)
BLUE = array([25, 127, 229], uint8)
GREEN = array([25, 204, 25], uint8)
CYAN = array([25, 178, 178], uint8)
PINK = array([229, 127, 127], uint8)
MAGENTA = array([204, 76, 204], uint8)
YELLOW = array([204, 204, 0], uint8)
ORANGE = array([229, 153, 76], uint8)
WHITE = array([255, 255, 255], uint8)
colors = {'G': ORANGE,
'P': YELLOW,
'T': GREEN,
'S': GREEN,
'N': GREEN,
'Q': GREEN,
'W': BLUE,
'L': BLUE,
'V': BLUE,
'I': BLUE,
'M': BLUE,
'A': BLUE,
'F': BLUE,
'C': BLUE,
'H': CYAN,
'Y': CYAN,
'E': MAGENTA,
'D': MAGENTA,
'K': RED,
'R': RED}
msa = """\
KYFALQ-----------------------------------------------------------
KYFALQPDDVYYCGIKYIKDDVILNEPSAD---APAALY--QTIEENIKIFEEEEVEFISVPVPE
KYFALQPDDVYYCGIKYIKDDVLLNEPSAD---APAARY--QTIEENIKIFEEDEVEFISVPVPE
KYFAFQPDDVYYCGIKYIKDDVVLNEPSAD---APASRY--QTIEENIKIFEEDEVEFISVPVPE
KYFAFQQDDVYYCGIKYIKDDLILNEPSAN---APAARF--QTIEENIKILEEDNVEFISVPVPE
KYFAMQ-RGVYFCGIRYLEDDAALTEPYAE----TPVRY--HSFRESIQILEEEEVELINVPVPE
KYFAMQ-RGVYFCGIRYLEDDAALTEPYAE----TPVRY--HSFKESIQILEEEDVELINVPVPE
KYFAMQ-RGVYFCGIRYLEDDAALTEPYAE----TPVRY--HSFKESIQILEEEDVELINVPVPE
KYFAFQ-GGVYFCGIKYIEDGLSLPESGAE---AQSARY--HTIEQNIQILEEEDVEFISVPVPE
KYFAFQQGGVYFCGIKYIEDGLSLPESGAQ---LKSARY--HTIEQNIQILEEEDVEFISVPVPE
RYYVLERDQVFICGLKYYEEDYELNEEVDP-EIGAP----LRLIEENVSFFEDDEVELISVPVPE
HHYY------------NPEVSYQHHGGWED-EVELPNQFPLRQMEERIRVLEREQVELISVPVPE
RYYFIEEERMYFCGVNYREENYKMQDSQEDTDMDLPAAY--KRIEEKVRVLEDEGVELINIPVPK
""".splitlines(False)
tmp = []
for s in msa:
tmp.extend(colors.get(t, WHITE) for t in s)
bg = array(tmp)
bg.shape = (len(msa), len(msa[0]), 3)
class CharSize(object):
def __init__(self, width=None, height=None, fontprops=None):
self.width = width
self.height = height
self.fontprops = fontprops
def __lt__(self, other):
if not isinstance(other, tuple):
raise TypeError("can only be compared to (x, y) tuple")
return (self.width < other[0] * 0.5) and (self.height < other[1] * 0.5)
class FontSelector(object):
def __init__(self, charsizes=None):
if charsizes is None:
charsizes = []
self.charsizes = charsizes
@classmethod
def from_sizes(cls, renderer, *fontsizes, **fontprops):
if not fontsizes:
raise TypeError("need a font size range")
o = cls()
for i in sorted(range(*fontsizes), reverse=True):
fontprops['size'] = i
oFontProps = matplotlib.font_manager.FontProperties(**fontprops)
nWidth, nHeight = renderer.get_text_width_height('M', oFontProps,
False)
o.charsizes.append(CharSize(nWidth, nHeight, oFontProps))
return o
def by_cell_size(self, width, height):
tnAvailableSize = (width, height)
for oCharSize in self.charsizes:
if oCharSize < tnAvailableSize:
return oCharSize.fontprops
return None
figure = pylab.figure()
renderer = figure.canvas.get_renderer()
font_selector = FontSelector.from_sizes(renderer, 4, 18, family='monospace')
def replot(widget):
pylab.cla()
pylab.imshow(bg, origin='lower', interpolation='nearest', aspect='auto')
pylab.ylim(len(msa), 0)
iWidth, iHeight = figure.canvas.get_width_height()
nCharWidth = iWidth / float(len(msa[0]))
nCharHeight = iHeight / float(len(msa))
oFont = font_selector.by_cell_size(nCharWidth, nCharHeight)
if oFont:
for i, sequence in enumerate(msa):
for j, char in enumerate(sequence):
o = matplotlib.text.Text(x = j + 0.5,
y = i + 0.5,
text = char,
fontproperties = oFont,
horizontalalignment = 'center',
verticalalignment = 'center')
if SHOW_LETTERS:
pylab.axes().add_artist(o)
pylab.connect('resize_event', replot)
pylab.show()
-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier.
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
Matplotlib-users mailing list
Matplotlib-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/matplotlib-users