The previous version of the code I posted was pretty badly broken -- new version attached.

I've fixed up some of the worst bugs in the last version of pigtables/easygrid -- attached code should now work well enough to demonstrate the concept at least.

Usage:

python pigtables -f filename.hd5

John

"""
An easy to use PyGTK grid that performs well with very large datasets.

Supports sorting and filtering of data.

The gtk treeview is a very powerful widget, but has serious performance issues
as the number of rows in the store grows.  Sorted and Filtered stores can cause
significant bottlenecks.

For very large datasets, just loading the data into the store can be a problem.

In some cases you might have very large tables that a user might want
to browse.  For instance, you might be using PyTables:

http://pytables.sourceforge.net/html/WelcomePage.html

Whilst you can create your own custom store to avoid having to read
the data into memory, as soon as you attach this store to a view the
view sets up a gtkrbtree for as many elements as your store says it has :(

EasyGrid works around this by using a small ListStore as a buffer between the
view and your actual data.  The View is embedded in a ScrolledWindow, with the
vertical scrollbar hidden.  A separate vertical scrollbar is used to show the actual
position in your data.

There is lots of hacking to intercept various events and make the
whole thing just about work.


TODO:
1. Tidy things up, remove debugging debris
2. Get the sorting working, support reverse.
3. Get filtering working nicely.
4. Get the API for data objects worked out so users don't have to provide any more
   sorting or filtering code than absolutely necessary.
5. Make auto_setup much smarter, allow it to be fed formatting hints.
6. Support a data object where the data is a list of dictionaries.
7. Have an automatic option for the vertical scrollbar.
8. Anything else I can think of.
"""


import math
from sets import Set

import gtk
import gobject

class SortableFilterableData(object):

    def __init__(self, data):

        self.data = data
        self.order = None
        self.sort_order = None
        self.filter = None
        self.column_lookup = {}

    def __len__(self):

        if self.filter is not None:
            return len(self.filter)

        return len(self.data)
    
    def __getattr__(self, name):

        getattr(self.data, name)

    def __getitem__(self, n):
        """ Return n'th item from the sorted data. """
        if self.order is not None:
            n = self.order[n]

        return self.data[n]

    def sort_on_column(self, col):
        """ Set up data to sort on column. """
        col = self.column_lookup.get(col, col)
        
        data = self.data

        sort_data = zip([data[row][col] for row in xrange(len(data))], range(len(self.data)))
        sort_data.sort()

        self.sort_order = [x[1] for x in sort_data]

        self._set_order()

    def reverse(self):
        """ Reverse current sort. """
        self.sort_order.reverse()
        self._set_order()

    def remove_sort(self):
        """ Remove current sort. """
        self.sort_order = None
        self._set_order()

    def _set_order(self):
        """ Internal helper method to set order. """
        if self.filter is None:
            self.order = self.sort_order
        else:
            if self.sort_order is not None:
                visible = Set(self.filter)
                self.order = [x for x in self.sort_order if x in visible]
            else:
                self.order = self.filter

    def apply_filters(self, filters):

        if len(filters) == 0:
            self.filter = None
            self._set_order()
            return

        data = self.data
        fields = self.column_lookup

        self.filter = []
        for ix in xrange(len(data)):
            good = True
            for col, values in filters.iteritems():
                if data[ix][fields.get(col, col)] not in values:
                    good = False
                    break
            if good:
                self.filter.append(ix)

        self._set_order()

from numarray.random_array import randint

class RandomData(SortableFilterableData):

    def __init__(self, n=2000):

        data = (zip(range(n),
                        list(randint(n/10, shape=n)),
                        list(randint(n/10, shape=n)),
                        range(n)))
        SortableFilterableData.__init__(self, data)

    
class EasyGrid(gtk.Frame):

    MAX_TABLE_SIZE = 2000
    def __init__(self):
        """ Build the control.

        We put the treeview in a scrolled window.  However, we
        need to provide our own vertical scroll-bar, so we hide
        the one from the scrolled window and insert our own.
        """

        gtk.Frame.__init__(self)

        # Create a box to hold view and scrollbar
        hbox = gtk.HBox()
        self.add(hbox)

        # Create scrolled window and add it to the box
        self.scroll = gtk.ScrolledWindow()
        self.scroll.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
        self.scroll.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_NEVER)
        hbox.pack_start(self.scroll)
        self.old_vadj = self.scroll.get_vadjustment()
        self.old_vadj.connect('value_changed', self.old_vadj_value_changed)

        # Create tree-view and attach it to the scrolled window
        self.tree = gtk.TreeView()
        self.scroll.add(self.tree)

        # Catch move cursor events
        self.tree.connect('move_cursor', self.on_move_cursor)
        self.tree.connect('size_allocate', self.on_size_allocate)
        self.tree.connect('cursor_changed', self.on_cursor_changed)
        self.tree.connect('button_press_event', self.on_button_press_event)
        self.tree.connect('key_press_event', self.on_key_press_event)

        self.set_rules_hint(True)
        self.filters = {}
        self.current_sort = None, None
        self.first_table_row = 0
        self.model = None

        # Create vertical scrollbar
        sb = gtk.VScrollbar()
        hbox.pack_start(sb, expand=False)

        adj = sb.get_adjustment()
        self.adj = adj
        self.adj.connect('value_changed', self.on_adj_value_changed)

    def __getattr__(self, name):
        """ Delegate anything we can't handle to the treeview. """
        return getattr(self.tree, name)

    def attach_data(self, data):
        """ Attach data to the view. """
        self.data = data
        print 'Data length stuff', type(data), len(data)
        print 'Data length stuff', EasyGrid.MAX_TABLE_SIZE
        self.table_size = min(EasyGrid.MAX_TABLE_SIZE, len(data))

        self.load_store(0)
        
        self.ignore_sb_adjustment = False
        self.new_cursor_pos = None
        #self.set_cursor(0)

        # Wonder how this is done for normal treeviews?
        self.adj.set_property('upper', len(self.data))
        self.adj.set_property('lower', 0.0)
        self.adj.set_property('step_increment', 1.0)

    def set_store(self, store):
        """ Attach the store to the view. """
        self.model = store
        self.set_model(store)

    def auto_setup(self, data, colnames=None):
        """ Automagically set up store and view from data. """
        sample = data[0]
        types = [type(x) for x in sample]
        if colnames is None:
            colnames = [str(x) for x in range(1, len(sample) + 1)]

        # Remove existing columns
        for col in self.get_columns():
            self.remove_column(col)
        
        # Create the store
        self.model = gtk.ListStore(*types)

        # Set up columns
        self.column_lookup = {}
        for mix, ctype in enumerate(types):
            r = gtk.CellRendererText()
            column = gtk.TreeViewColumn(colnames[mix], r, text=mix)
            self.append_column(column)

            if hasattr(data, 'sort_on_column'):
                column.set_clickable(True)
                column.connect('clicked', data.sort_on_column, mix)
            self.column_lookup[column] = mix
            
        self.attach_data(data)

        # Attach model to the tree
        self.set_model(self.model)

    def load_store(self, pos):

        self.first_table_row = pos
        lastpos = min(pos+self.table_size, len(self.data))
        print 'buffer reload', pos, lastpos

        if len(self.model) == 0:
            for x in xrange(int(pos), int(lastpos)):
                self.model.append(self.data[x])
        else:
            for row, x in enumerate(xrange(int(pos), int(lastpos))):
                self.model[row] = self.data[x]
            


    def on_key_press_event(self, widget, event):

        if widget.is_focus():
            return False

        key_lookup = {}
        #for name in ['Page_Up', 'Page_Down', 'Home', 'End', 'Up', 'Down']:
        for name in ['Page_Up', 'Page_Down', 'Home', 'End']:
            key_lookup[gtk.gdk.keyval_from_name(name)] = name

        name = key_lookup.get(event.keyval)

        if name is None:
            return False

        top, bottom, nrows = self.get_pagesize()

        offsets = {
            'Page_Up': 2 - nrows,
            'Page_Down': nrows -2,
            'Home': -len(self.data),
            'End': len(self.data),
            'Up': -1,
            'Down': 1,
            }
            

        sb_value = self.adj.get_value()
        new_sb_value = sb_value+offsets[name]
        upper = self.adj.get_property('upper')
        ps = self.adj.get_property('page_size')
        new_sb_value = max(min(new_sb_value, upper-ps), 0)
        self.adj.set_value(new_sb_value)

        return True
        

    def on_move_cursor(self, tree, movetype, direction):
        """ Call-back for keys which move the cursor in the view. """

        top, bottom, nrows = self.get_pagesize()

        sb_value = self.adj.get_value()
        path = tree.get_cursor()[0]

        if path is None:
            return False

        current_tr = path[0]

        # See what type of move this is.
        if movetype == gtk.MOVEMENT_DISPLAY_LINES:
            offset = direction
        elif movetype == gtk.MOVEMENT_PAGES:
            offset = (nrows-2) * direction
        elif movetype == gtk.MOVEMENT_BUFFER_ENDS:
            if direction < 0:
                offset = -(self.first_table_row + current_tr)
            else:
                offset = len(self.data) - (self.first_table_row + current_tr)
        else:
            # If we can't tell it is probably a horizontal movement
            return False


        # Work out new row (in the store) that we should show.
        new_tr = current_tr + offset
        self.new_cursor_pos = self.first_table_row + new_tr

        return False

    def get_pagesize(self):
        """ Return to and bottom rows in view and nrows. """
        if self.model is None:
            return 0,0,0
        
        r = self.get_visible_rect()
        x, y = self.tree_to_widget_coords(r.x, r.y)
        top = self.get_path_at_pos(x, y)
        x, y = self.tree_to_widget_coords(r.x, r.y + r.height)
        bottom = self.get_path_at_pos(x, y-1)

        if top is not None:
            top = top[0][0]
        else:
            top = self.first_table_row
            
        if bottom is not None:
            bottom = bottom[0][0]
        else:
            # bottom of view is beyond the store
            bottom = self.first_table_row + len(self.model)

        nrows = 1 + bottom - top

        return top, bottom, nrows

    def on_size_allocate(self, *args):

        if self.tree.window is None:
            return 0,0,0
        
        print 'SIZE ALLOCATE',
        top, bottom, nrows = self.get_pagesize()

        # Any time we learn the page size, set the scrollbar paging size
        # FIXME -- should do this on some sort of widget resize event
        self.adj.set_property('page_increment', max(nrows-2, 1))
        self.adj.set_property('page_size', nrows)

        print 'MY   SB', self.adj.get_value(),  self.adj.get_property('upper'),
        print self.adj.get_property('page_size')
        print 'ORIG SB', self.old_vadj.get_value(), self.old_vadj.get_property('upper'),
        print self.old_vadj.get_property('page_size')

    def on_cursor_changed(self, *args):
        """ Debugging aid to see when the tree is getting cursor events. """
        print 'ON_CURSOR_CHANGED'
        print self.get_pagesize()
        cursor = self.tree.get_cursor()[0]
        if cursor is None:
            return False
        print 'CURSOR', cursor
        cursor = cursor[0]
        print 'old/new cursor', cursor, self.new_cursor_pos

        if self.new_cursor_pos is None:
            return False

        top, bottom, nrows = self.get_pagesize()
        cursor_pos = self.first_table_row + cursor
        ncp = self.new_cursor_pos
        ncp = min(ncp, len(self.data)-1)
        self.new_cursor_pos = None
        if cursor_pos == ncp:
            # Nothing to do
            # set scrollbar value
            self.ignore_sb_adjustment = True
            self.adj.set_value(self.first_table_row+top)

            return False

        # fault -- need a new buffer of data
        if ncp < cursor_pos:
            # want ncp, nrows above bottom of table => ncp-ftr == ts-nrows
            ftr = max(0, ncp+nrows-self.table_size)
        else:
            # want ncp at the top, if possible
            ftr = min(ncp, len(self.data)-self.table_size)

        self.load_store(ftr)
                
        # Make sure target row is visible
        path = int(ncp-ftr)
        path = min(path, len(self.model)-1)
        path = max(path, 0)
        print 'making visible', path
        self.ignore_sb_adjustment = True
        print 'setting sb'
        self.adj.set_value(self.first_table_row+top)
        print 'adding idle kludge'
        gobject.idle_add(self.cursor_kludge, path)
        return True

    def cursor_kludge(self, path):

        print 'making visible kludge', path
        self.new_cursor_pos = path + self.first_table_row
        self.set_cursor(path)
        self.scroll_to_cell(path)

    def old_vadj_value_changed(self, adj):
        print 'OLD VADJ CHANGED', adj.get_value()
        
    def on_adj_value_changed(self, adj):
        """ Call-back for the adjustment for the vertical scrollbar. """

        # Check in case we've been told to ignore this one.
        self.new_cursor_pos = adj.get_value()
        if not self.ignore_sb_adjustment:
            self.sync_view_with_scrollbar()

        # Unset the ignore flag.
        self.ignore_sb_adjustment = False

        return True

    def sync_view_with_scrollbar(self):
        """ Called when vertical scrollbar value has changed.

        If we are lucky, current pos is already visible.

        Otherwise we need to re-load the store and/or adjust our
        view into that store.
        """

        # Extract some clues as to where we are
        print 'scrollabar', self.adj.get_value(), self.adj.get_property('page_size')
        ftr = self.first_table_row
        top, bottom, nrows = self.get_pagesize()
        pos = self.adj.get_value()
        last_visible_pos = pos + nrows-1

        # Don't move beyond the end of the data
        end_of_data = False
        pos = min(pos, len(self.data) - 1)
        if last_visible_pos >= (len(self.data) - 1):
            last_visible_pos = len(self.data)-1
            end_of_data = True
        print 'scrolling to', pos, last_visible_pos
        # Is the current pos already visible? If so nothing to do.
        #if pos == (ftr + top):
        #    return

        if pos < ftr or last_visible_pos >= (ftr + self.table_size):
            # Need a new buffer of data
            if pos < ftr:
                print 'pos < ftr', pos, ftr
                # want pos, nrows above bottom of table => pos-ftr == ts-nrows
                ftr = max(0, pos+nrows-self.table_size)
            else:
                # want pos at the top, if possible
                print 'lvp >= ftr + ts', last_visible_pos, ftr, self.table_size
                ftr = min(pos, len(self.data)-self.table_size)

            self.load_store(ftr)
               
        # Make sure target row is visible
        path = int(pos-ftr)
        path = min(path, len(self.model)-1)
        #self.set_cursor(path)
        #self.scroll_to_cell(path)
        upper = self.old_vadj.get_property('upper')
        page_size = self.old_vadj.get_property('page_size')
        if end_of_data:
            sb_value = upper - page_size
        else:
            sb_value = path * upper / len(self.model)
        print 'making visible', path, pos, ftr, sb_value, self.old_vadj.get_value(), page_size, upper
        unit = upper / len(self.model)
        print 'sb stats', upper / unit, page_size / unit
        self.old_vadj.set_value(sb_value)
        
        return

    def sort_on_column(self, col, column):
        """ Sort data on column """

        self.set_model(None)
        current_col, sort_type = self.current_sort
        if current_col != col:
            sort_type = None
            if current_col is not None:
                current_col.set_sort_indicator(False)

        print sort_type
        if sort_type is None:
            self.data.sort_on_column(column)
            self.current_sort = col, gtk.SORT_ASCENDING
        elif sort_type == gtk.SORT_ASCENDING:
            self.data.reverse()
            self.current_sort = col, gtk.SORT_DESCENDING
        else:
            print 'removing sort'
            self.data.remove_sort()
            self.current_sort = None, None
            col.set_sort_indicator(False)
            

        self.load_store(self.first_table_row)
        self.set_model(self.model)
        self.set_sort_indicator()

    def set_sort_indicator(self):

        col, sort_type = self.current_sort
        if sort_type is not None:
            col.set_sort_indicator(True)
            col.set_sort_order(sort_type)
            

    def on_button_press_event(self, view, event):

        x = int(event.x)
        y = int(event.y)
        time = event.time
        current = view.get_path_at_pos(x, y)

        button = event.button

        # If we are pressed with control modifier treat as button 2
        if event.state & gtk.gdk.CONTROL_MASK:
            button = 2

        if current is None:
            if button == 3:
                # Guess that the grid is filtered and offer to show the
                # user the unfiltered version.
                menu = gtk.Menu()

                item = gtk.MenuItem('Show all')
                item.connect('activate', self.show_all)
                menu.append(item)
                menu.show_all()
                menu.popup(None, None, None, event.button, event.time)
            elif button == 2:
                self.show_all()

            return 0

        path, col, cellx, celly = current
        if button == 1:
            row = self.model.get_iter(path)
            row = self.row_to_dict(row)
            if self._selection_callback is not None:
                self._selection_callback(row)

        elif button in [2, 3]:
            # Filter on current cell contents
            self.apply_filter_for_column_and_path(col, path)

        return 0

    def apply_filter_for_column_and_path(self, col, path):

        self.grab_focus()
        self.set_cursor(path, col, 0)

        row = self.model.get_iter(path)
        name = col.get_title()
        column = self.column_lookup[col]

        value = self.data[path[0] + self.first_table_row]
        filters = [value[column]]

        self.set_model(None)
        self.model.clear()

        current_filters = self.filters.get(column, [])

        if current_filters == filters:
            del self.filters[column]
        else:
            self.filters[column] = filters
            
        self.data.apply_filters(self.filters)
        
        self.attach_data(self.data)
        self.set_model(self.model)

    def show_all(self, evt=None):

        
        self.set_model(None)
        self.model.clear()
        self.filters = {}
        self.data.apply_filters(self.filters)
        self.attach_data(self.data)
        self.set_model(self.model)


if __name__ == '__main__':

    import sys
    from optparse import OptionParser

    parser = OptionParser()
    parser.add_option('-n', type='int',
                      default=2000, dest='n',
                      help='Size of data to use.')
    parser.add_option('-r', action='store_true', dest='random',
                      default=False)
    options, args = parser.parse_args()
    
    w = gtk.Window()
    w.connect('destroy', gtk.main_quit)
    vbox = gtk.VBox(homogeneous=True)
    w.add(vbox)


    if options.random:
        view = EasyGrid()
        vbox.pack_start(view)
        view.auto_setup(RandomData(options.n))

    view = EasyGrid()
    vbox.pack_start(view)
    view.auto_setup(Data(options.n))

    w.show_all()

    gtk.main()


import gtk
import gobject

import tables

from easygrid import EasyGrid, SortableFilterableData

class TableView(gtk.Frame):

    def __init__(self, small):

        gtk.Frame.__init__(self)

        self.grid = EasyGrid()
        self.small = small

        self.add(self.grid)

    def load_hd5file(self, filename, table):

        table = self.get_table(filename, table)

        print 'Length of table is', len(table)


    def load_table(self, table):

        print table.colnames
        print len(table)
        if len(table) <= self.small:
            data = SortableFilterableData(table)
        else:
            data = table
        self.grid.auto_setup(data, table.colnames)
        #self.grid.load_data(data)

    def set_formats_for_table(self, table):

        formats = []
        coltypes = table.coltypes
        print coltypes
        for colname in table.colnames:
            thetype = coltypes[colname]
            try:
                typename = thetype.name
            except:
                typename = 'unknown'
            if typename.startswith('Float'):
                typename = 'float'
            elif typename.startswith('UInt') or typename.startswith('Int'):
                typename = 'integer'
            else:
                typename = 'string'

            formats.append([colname, colname, typename, {'width': 100}])

        print 'xxxx', type(table.colnames)
        #self.grid.store.set_column_lookup(table.colnames)
        self.grid.set_column_formats(formats)

    def get_table(self, filename, table):
        
        hd5 = tables.openFile(filename)

        path = table.split('/')

        current = hd5.root

        for item in path:
            current = getattr(current, item)

        return current
        


class Hdf5View(gtk.TreeView):

    def __init__(self, filename):

        gtk.TreeView.__init__(self)

        self.store = gtk.TreeStore(gobject.TYPE_STRING)
        self.load_tables(options.filename)

        renderer = gtk.CellRendererText()
        
        column = gtk.TreeViewColumn('Table', renderer, text=0)
        self.append_column(column)

        self.set_model(self.store)

        self.show_all()

    def load_tables(self, filename):

        self.table_lookup = {}

        h = tables.openFile(filename)

        for node in h.root:
            self.load_node(None, node)

    def load_node(self, parent, node):

        if isinstance(node, tables.Table):
            is_table = True
            name = node.name
            subnodes = []
        else:
            is_table = False
            name = node._v_name
            subnodes = node._v_children.keys()
            subnodes.sort()

        child = self.store.insert_before(parent, None, [name])

        if is_table:
            path = self.store.get_path(child)
            self.table_lookup[path] = node

        for subnode in subnodes:
            the_node = getattr(node, subnode)
            self.load_node(child, the_node)


class PigTables(gtk.Window):

    def __init__(self, options):

        gtk.Window.__init__(self)

        self.split = gtk.HPaned()
        self.add(self.split)
        self.tree = Hdf5View(options.filename)
        self.view = TableView(options.small)
        scroll = gtk.ScrolledWindow()
        scroll.add(self.tree)
        self.split.add1(scroll)
        self.split.add2(self.view)
        self.split.set_position(300)

        self.tree.connect('row_activated', self.on_row_activate)

    def on_row_activate(self, *args):

        print args
        view, path, column = args

        table = self.tree.table_lookup.get(path)

        if table is not None:
            self.view.load_table(table)

if __name__ == '__main__':

    from optparse import OptionParser

    parser = OptionParser()
    parser.add_option('--small', dest='small', type='int', default=20000,
                      help='Max rows for a table to be considered small enough to sort')
    parser.add_option('-f', dest='filename', default='data.hd5')
    options, args = parser.parse_args()


    w = PigTables(options)
    w.connect('destroy', gtk.main_quit)
    w.show_all()

    gtk.main()

          
    

    

_______________________________________________
pygtk mailing list   [email protected]
http://www.daa.com.au/mailman/listinfo/pygtk
Read the PyGTK FAQ: http://www.async.com.br/faq/pygtk/

Reply via email to