|
The previous version of the code I posted was pretty
badly broken -- new version attached. I've fixed up some of the worst bugs in the last version of pigtables/easygrid -- attached code should now work well enough to demonstrate the concept at least. Usage: python pigtables -f filename.hd5 John |
""" An easy to use PyGTK grid that performs well with very large datasets.
Supports sorting and filtering of data. The gtk treeview is a very powerful widget, but has serious performance issues as the number of rows in the store grows. Sorted and Filtered stores can cause significant bottlenecks. For very large datasets, just loading the data into the store can be a problem. In some cases you might have very large tables that a user might want to browse. For instance, you might be using PyTables: http://pytables.sourceforge.net/html/WelcomePage.html Whilst you can create your own custom store to avoid having to read the data into memory, as soon as you attach this store to a view the view sets up a gtkrbtree for as many elements as your store says it has :( EasyGrid works around this by using a small ListStore as a buffer between the view and your actual data. The View is embedded in a ScrolledWindow, with the vertical scrollbar hidden. A separate vertical scrollbar is used to show the actual position in your data. There is lots of hacking to intercept various events and make the whole thing just about work. TODO: 1. Tidy things up, remove debugging debris 2. Get the sorting working, support reverse. 3. Get filtering working nicely. 4. Get the API for data objects worked out so users don't have to provide any more sorting or filtering code than absolutely necessary. 5. Make auto_setup much smarter, allow it to be fed formatting hints. 6. Support a data object where the data is a list of dictionaries. 7. Have an automatic option for the vertical scrollbar. 8. Anything else I can think of. """ import math from sets import Set import gtk import gobject class SortableFilterableData(object): def __init__(self, data): self.data = data self.order = None self.sort_order = None self.filter = None self.column_lookup = {} def __len__(self): if self.filter is not None: return len(self.filter) return len(self.data) def __getattr__(self, name): getattr(self.data, name) def __getitem__(self, n): """ Return n'th item from the sorted data. """ if self.order is not None: n = self.order[n] return self.data[n] def sort_on_column(self, col): """ Set up data to sort on column. """ col = self.column_lookup.get(col, col) data = self.data sort_data = zip([data[row][col] for row in xrange(len(data))], range(len(self.data))) sort_data.sort() self.sort_order = [x[1] for x in sort_data] self._set_order() def reverse(self): """ Reverse current sort. """ self.sort_order.reverse() self._set_order() def remove_sort(self): """ Remove current sort. """ self.sort_order = None self._set_order() def _set_order(self): """ Internal helper method to set order. """ if self.filter is None: self.order = self.sort_order else: if self.sort_order is not None: visible = Set(self.filter) self.order = [x for x in self.sort_order if x in visible] else: self.order = self.filter def apply_filters(self, filters): if len(filters) == 0: self.filter = None self._set_order() return data = self.data fields = self.column_lookup self.filter = [] for ix in xrange(len(data)): good = True for col, values in filters.iteritems(): if data[ix][fields.get(col, col)] not in values: good = False break if good: self.filter.append(ix) self._set_order() from numarray.random_array import randint class RandomData(SortableFilterableData): def __init__(self, n=2000): data = (zip(range(n), list(randint(n/10, shape=n)), list(randint(n/10, shape=n)), range(n))) SortableFilterableData.__init__(self, data) class EasyGrid(gtk.Frame): MAX_TABLE_SIZE = 2000 def __init__(self): """ Build the control. We put the treeview in a scrolled window. However, we need to provide our own vertical scroll-bar, so we hide the one from the scrolled window and insert our own. """ gtk.Frame.__init__(self) # Create a box to hold view and scrollbar hbox = gtk.HBox() self.add(hbox) # Create scrolled window and add it to the box self.scroll = gtk.ScrolledWindow() self.scroll.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC) self.scroll.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_NEVER) hbox.pack_start(self.scroll) self.old_vadj = self.scroll.get_vadjustment() self.old_vadj.connect('value_changed', self.old_vadj_value_changed) # Create tree-view and attach it to the scrolled window self.tree = gtk.TreeView() self.scroll.add(self.tree) # Catch move cursor events self.tree.connect('move_cursor', self.on_move_cursor) self.tree.connect('size_allocate', self.on_size_allocate) self.tree.connect('cursor_changed', self.on_cursor_changed) self.tree.connect('button_press_event', self.on_button_press_event) self.tree.connect('key_press_event', self.on_key_press_event) self.set_rules_hint(True) self.filters = {} self.current_sort = None, None self.first_table_row = 0 self.model = None # Create vertical scrollbar sb = gtk.VScrollbar() hbox.pack_start(sb, expand=False) adj = sb.get_adjustment() self.adj = adj self.adj.connect('value_changed', self.on_adj_value_changed) def __getattr__(self, name): """ Delegate anything we can't handle to the treeview. """ return getattr(self.tree, name) def attach_data(self, data): """ Attach data to the view. """ self.data = data print 'Data length stuff', type(data), len(data) print 'Data length stuff', EasyGrid.MAX_TABLE_SIZE self.table_size = min(EasyGrid.MAX_TABLE_SIZE, len(data)) self.load_store(0) self.ignore_sb_adjustment = False self.new_cursor_pos = None #self.set_cursor(0) # Wonder how this is done for normal treeviews? self.adj.set_property('upper', len(self.data)) self.adj.set_property('lower', 0.0) self.adj.set_property('step_increment', 1.0) def set_store(self, store): """ Attach the store to the view. """ self.model = store self.set_model(store) def auto_setup(self, data, colnames=None): """ Automagically set up store and view from data. """ sample = data[0] types = [type(x) for x in sample] if colnames is None: colnames = [str(x) for x in range(1, len(sample) + 1)] # Remove existing columns for col in self.get_columns(): self.remove_column(col) # Create the store self.model = gtk.ListStore(*types) # Set up columns self.column_lookup = {} for mix, ctype in enumerate(types): r = gtk.CellRendererText() column = gtk.TreeViewColumn(colnames[mix], r, text=mix) self.append_column(column) if hasattr(data, 'sort_on_column'): column.set_clickable(True) column.connect('clicked', data.sort_on_column, mix) self.column_lookup[column] = mix self.attach_data(data) # Attach model to the tree self.set_model(self.model) def load_store(self, pos): self.first_table_row = pos lastpos = min(pos+self.table_size, len(self.data)) print 'buffer reload', pos, lastpos if len(self.model) == 0: for x in xrange(int(pos), int(lastpos)): self.model.append(self.data[x]) else: for row, x in enumerate(xrange(int(pos), int(lastpos))): self.model[row] = self.data[x] def on_key_press_event(self, widget, event): if widget.is_focus(): return False key_lookup = {} #for name in ['Page_Up', 'Page_Down', 'Home', 'End', 'Up', 'Down']: for name in ['Page_Up', 'Page_Down', 'Home', 'End']: key_lookup[gtk.gdk.keyval_from_name(name)] = name name = key_lookup.get(event.keyval) if name is None: return False top, bottom, nrows = self.get_pagesize() offsets = { 'Page_Up': 2 - nrows, 'Page_Down': nrows -2, 'Home': -len(self.data), 'End': len(self.data), 'Up': -1, 'Down': 1, } sb_value = self.adj.get_value() new_sb_value = sb_value+offsets[name] upper = self.adj.get_property('upper') ps = self.adj.get_property('page_size') new_sb_value = max(min(new_sb_value, upper-ps), 0) self.adj.set_value(new_sb_value) return True def on_move_cursor(self, tree, movetype, direction): """ Call-back for keys which move the cursor in the view. """ top, bottom, nrows = self.get_pagesize() sb_value = self.adj.get_value() path = tree.get_cursor()[0] if path is None: return False current_tr = path[0] # See what type of move this is. if movetype == gtk.MOVEMENT_DISPLAY_LINES: offset = direction elif movetype == gtk.MOVEMENT_PAGES: offset = (nrows-2) * direction elif movetype == gtk.MOVEMENT_BUFFER_ENDS: if direction < 0: offset = -(self.first_table_row + current_tr) else: offset = len(self.data) - (self.first_table_row + current_tr) else: # If we can't tell it is probably a horizontal movement return False # Work out new row (in the store) that we should show. new_tr = current_tr + offset self.new_cursor_pos = self.first_table_row + new_tr return False def get_pagesize(self): """ Return to and bottom rows in view and nrows. """ if self.model is None: return 0,0,0 r = self.get_visible_rect() x, y = self.tree_to_widget_coords(r.x, r.y) top = self.get_path_at_pos(x, y) x, y = self.tree_to_widget_coords(r.x, r.y + r.height) bottom = self.get_path_at_pos(x, y-1) if top is not None: top = top[0][0] else: top = self.first_table_row if bottom is not None: bottom = bottom[0][0] else: # bottom of view is beyond the store bottom = self.first_table_row + len(self.model) nrows = 1 + bottom - top return top, bottom, nrows def on_size_allocate(self, *args): if self.tree.window is None: return 0,0,0 print 'SIZE ALLOCATE', top, bottom, nrows = self.get_pagesize() # Any time we learn the page size, set the scrollbar paging size # FIXME -- should do this on some sort of widget resize event self.adj.set_property('page_increment', max(nrows-2, 1)) self.adj.set_property('page_size', nrows) print 'MY SB', self.adj.get_value(), self.adj.get_property('upper'), print self.adj.get_property('page_size') print 'ORIG SB', self.old_vadj.get_value(), self.old_vadj.get_property('upper'), print self.old_vadj.get_property('page_size') def on_cursor_changed(self, *args): """ Debugging aid to see when the tree is getting cursor events. """ print 'ON_CURSOR_CHANGED' print self.get_pagesize() cursor = self.tree.get_cursor()[0] if cursor is None: return False print 'CURSOR', cursor cursor = cursor[0] print 'old/new cursor', cursor, self.new_cursor_pos if self.new_cursor_pos is None: return False top, bottom, nrows = self.get_pagesize() cursor_pos = self.first_table_row + cursor ncp = self.new_cursor_pos ncp = min(ncp, len(self.data)-1) self.new_cursor_pos = None if cursor_pos == ncp: # Nothing to do # set scrollbar value self.ignore_sb_adjustment = True self.adj.set_value(self.first_table_row+top) return False # fault -- need a new buffer of data if ncp < cursor_pos: # want ncp, nrows above bottom of table => ncp-ftr == ts-nrows ftr = max(0, ncp+nrows-self.table_size) else: # want ncp at the top, if possible ftr = min(ncp, len(self.data)-self.table_size) self.load_store(ftr) # Make sure target row is visible path = int(ncp-ftr) path = min(path, len(self.model)-1) path = max(path, 0) print 'making visible', path self.ignore_sb_adjustment = True print 'setting sb' self.adj.set_value(self.first_table_row+top) print 'adding idle kludge' gobject.idle_add(self.cursor_kludge, path) return True def cursor_kludge(self, path): print 'making visible kludge', path self.new_cursor_pos = path + self.first_table_row self.set_cursor(path) self.scroll_to_cell(path) def old_vadj_value_changed(self, adj): print 'OLD VADJ CHANGED', adj.get_value() def on_adj_value_changed(self, adj): """ Call-back for the adjustment for the vertical scrollbar. """ # Check in case we've been told to ignore this one. self.new_cursor_pos = adj.get_value() if not self.ignore_sb_adjustment: self.sync_view_with_scrollbar() # Unset the ignore flag. self.ignore_sb_adjustment = False return True def sync_view_with_scrollbar(self): """ Called when vertical scrollbar value has changed. If we are lucky, current pos is already visible. Otherwise we need to re-load the store and/or adjust our view into that store. """ # Extract some clues as to where we are print 'scrollabar', self.adj.get_value(), self.adj.get_property('page_size') ftr = self.first_table_row top, bottom, nrows = self.get_pagesize() pos = self.adj.get_value() last_visible_pos = pos + nrows-1 # Don't move beyond the end of the data end_of_data = False pos = min(pos, len(self.data) - 1) if last_visible_pos >= (len(self.data) - 1): last_visible_pos = len(self.data)-1 end_of_data = True print 'scrolling to', pos, last_visible_pos # Is the current pos already visible? If so nothing to do. #if pos == (ftr + top): # return if pos < ftr or last_visible_pos >= (ftr + self.table_size): # Need a new buffer of data if pos < ftr: print 'pos < ftr', pos, ftr # want pos, nrows above bottom of table => pos-ftr == ts-nrows ftr = max(0, pos+nrows-self.table_size) else: # want pos at the top, if possible print 'lvp >= ftr + ts', last_visible_pos, ftr, self.table_size ftr = min(pos, len(self.data)-self.table_size) self.load_store(ftr) # Make sure target row is visible path = int(pos-ftr) path = min(path, len(self.model)-1) #self.set_cursor(path) #self.scroll_to_cell(path) upper = self.old_vadj.get_property('upper') page_size = self.old_vadj.get_property('page_size') if end_of_data: sb_value = upper - page_size else: sb_value = path * upper / len(self.model) print 'making visible', path, pos, ftr, sb_value, self.old_vadj.get_value(), page_size, upper unit = upper / len(self.model) print 'sb stats', upper / unit, page_size / unit self.old_vadj.set_value(sb_value) return def sort_on_column(self, col, column): """ Sort data on column """ self.set_model(None) current_col, sort_type = self.current_sort if current_col != col: sort_type = None if current_col is not None: current_col.set_sort_indicator(False) print sort_type if sort_type is None: self.data.sort_on_column(column) self.current_sort = col, gtk.SORT_ASCENDING elif sort_type == gtk.SORT_ASCENDING: self.data.reverse() self.current_sort = col, gtk.SORT_DESCENDING else: print 'removing sort' self.data.remove_sort() self.current_sort = None, None col.set_sort_indicator(False) self.load_store(self.first_table_row) self.set_model(self.model) self.set_sort_indicator() def set_sort_indicator(self): col, sort_type = self.current_sort if sort_type is not None: col.set_sort_indicator(True) col.set_sort_order(sort_type) def on_button_press_event(self, view, event): x = int(event.x) y = int(event.y) time = event.time current = view.get_path_at_pos(x, y) button = event.button # If we are pressed with control modifier treat as button 2 if event.state & gtk.gdk.CONTROL_MASK: button = 2 if current is None: if button == 3: # Guess that the grid is filtered and offer to show the # user the unfiltered version. menu = gtk.Menu() item = gtk.MenuItem('Show all') item.connect('activate', self.show_all) menu.append(item) menu.show_all() menu.popup(None, None, None, event.button, event.time) elif button == 2: self.show_all() return 0 path, col, cellx, celly = current if button == 1: row = self.model.get_iter(path) row = self.row_to_dict(row) if self._selection_callback is not None: self._selection_callback(row) elif button in [2, 3]: # Filter on current cell contents self.apply_filter_for_column_and_path(col, path) return 0 def apply_filter_for_column_and_path(self, col, path): self.grab_focus() self.set_cursor(path, col, 0) row = self.model.get_iter(path) name = col.get_title() column = self.column_lookup[col] value = self.data[path[0] + self.first_table_row] filters = [value[column]] self.set_model(None) self.model.clear() current_filters = self.filters.get(column, []) if current_filters == filters: del self.filters[column] else: self.filters[column] = filters self.data.apply_filters(self.filters) self.attach_data(self.data) self.set_model(self.model) def show_all(self, evt=None): self.set_model(None) self.model.clear() self.filters = {} self.data.apply_filters(self.filters) self.attach_data(self.data) self.set_model(self.model) if __name__ == '__main__': import sys from optparse import OptionParser parser = OptionParser() parser.add_option('-n', type='int', default=2000, dest='n', help='Size of data to use.') parser.add_option('-r', action='store_true', dest='random', default=False) options, args = parser.parse_args() w = gtk.Window() w.connect('destroy', gtk.main_quit) vbox = gtk.VBox(homogeneous=True) w.add(vbox) if options.random: view = EasyGrid() vbox.pack_start(view) view.auto_setup(RandomData(options.n)) view = EasyGrid() vbox.pack_start(view) view.auto_setup(Data(options.n)) w.show_all() gtk.main()
import gtk
import gobject
import tables
from easygrid import EasyGrid, SortableFilterableData
class TableView(gtk.Frame):
def __init__(self, small):
gtk.Frame.__init__(self)
self.grid = EasyGrid()
self.small = small
self.add(self.grid)
def load_hd5file(self, filename, table):
table = self.get_table(filename, table)
print 'Length of table is', len(table)
def load_table(self, table):
print table.colnames
print len(table)
if len(table) <= self.small:
data = SortableFilterableData(table)
else:
data = table
self.grid.auto_setup(data, table.colnames)
#self.grid.load_data(data)
def set_formats_for_table(self, table):
formats = []
coltypes = table.coltypes
print coltypes
for colname in table.colnames:
thetype = coltypes[colname]
try:
typename = thetype.name
except:
typename = 'unknown'
if typename.startswith('Float'):
typename = 'float'
elif typename.startswith('UInt') or typename.startswith('Int'):
typename = 'integer'
else:
typename = 'string'
formats.append([colname, colname, typename, {'width': 100}])
print 'xxxx', type(table.colnames)
#self.grid.store.set_column_lookup(table.colnames)
self.grid.set_column_formats(formats)
def get_table(self, filename, table):
hd5 = tables.openFile(filename)
path = table.split('/')
current = hd5.root
for item in path:
current = getattr(current, item)
return current
class Hdf5View(gtk.TreeView):
def __init__(self, filename):
gtk.TreeView.__init__(self)
self.store = gtk.TreeStore(gobject.TYPE_STRING)
self.load_tables(options.filename)
renderer = gtk.CellRendererText()
column = gtk.TreeViewColumn('Table', renderer, text=0)
self.append_column(column)
self.set_model(self.store)
self.show_all()
def load_tables(self, filename):
self.table_lookup = {}
h = tables.openFile(filename)
for node in h.root:
self.load_node(None, node)
def load_node(self, parent, node):
if isinstance(node, tables.Table):
is_table = True
name = node.name
subnodes = []
else:
is_table = False
name = node._v_name
subnodes = node._v_children.keys()
subnodes.sort()
child = self.store.insert_before(parent, None, [name])
if is_table:
path = self.store.get_path(child)
self.table_lookup[path] = node
for subnode in subnodes:
the_node = getattr(node, subnode)
self.load_node(child, the_node)
class PigTables(gtk.Window):
def __init__(self, options):
gtk.Window.__init__(self)
self.split = gtk.HPaned()
self.add(self.split)
self.tree = Hdf5View(options.filename)
self.view = TableView(options.small)
scroll = gtk.ScrolledWindow()
scroll.add(self.tree)
self.split.add1(scroll)
self.split.add2(self.view)
self.split.set_position(300)
self.tree.connect('row_activated', self.on_row_activate)
def on_row_activate(self, *args):
print args
view, path, column = args
table = self.tree.table_lookup.get(path)
if table is not None:
self.view.load_table(table)
if __name__ == '__main__':
from optparse import OptionParser
parser = OptionParser()
parser.add_option('--small', dest='small', type='int', default=20000,
help='Max rows for a table to be considered small enough to sort')
parser.add_option('-f', dest='filename', default='data.hd5')
options, args = parser.parse_args()
w = PigTables(options)
w.connect('destroy', gtk.main_quit)
w.show_all()
gtk.main()
_______________________________________________ pygtk mailing list [email protected] http://www.daa.com.au/mailman/listinfo/pygtk Read the PyGTK FAQ: http://www.async.com.br/faq/pygtk/
