[galaxy-dev] sqlite galaxy datatype metadata

2014-07-25 Thread Jim Johnson

Ira,

Thanks for getting a sqlite datatype into galaxy.  I was wanting to subclass a 
sqlite datatype for another application: cistrome CEAS

I had made a sqlite datatype that captured a little metadata that I thought 
could be useful for the display peek, and perhaps for filtering.
Is this worth considering?


The peek for a CEAS DB would include table names, column names, and row count:

   SQLite Database
   GeneTable 
(chrom,name,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,name2)
 [24892]
   GenomeBGP 
(chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,chroms) [7]
   GenomeBGS 
(chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,Ns) [7]
   GenomePieP (chrom,promoter,downstream,gene,enhancer,chroms) [7]
   GenomePieS (chrom,promoter,downstream,gene,enhancer,total) [7]




$ hg diff /Users/jj/gxt/gxt/lib/galaxy/datatypes/binary.py
diff -r f002131cb905 lib/galaxy/datatypes/binary.py
--- a/lib/galaxy/datatypes/binary.pyFri Jul 25 12:01:34 2014 -0400
+++ b/lib/galaxy/datatypes/binary.pyFri Jul 25 12:13:05 2014 -0500
@@ -20,7 +20,7 @@

 from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, 
TWOBIT_MAGIC_SIZE

-from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes.metadata import 
MetadataElement,ListParameter,DictParameter
 from galaxy.datatypes import metadata
 from galaxy.datatypes.sniff import *
 import dataproviders
@@ -550,8 +550,36 @@

 @dataproviders.decorators.has_dataproviders
 class SQlite ( Binary ):
+"""Class describing a Sqlite database """
+MetadataElement( name="tables", default=[], param=ListParameter, desc="Database 
Tables", readonly=True, visible=True, no_value=[] )
+MetadataElement( name="table_columns", default={}, param=DictParameter, 
desc="Database Table Columns", readonly=True, visible=True, no_value={} )
+MetadataElement( name="table_row_count", default={}, param=DictParameter, 
desc="Database Table Row Count", readonly=True, visible=True, no_value={} )
 file_ext = "sqlite"

+def init_meta( self, dataset, copy_from=None ):
+Binary.init_meta( self, dataset, copy_from=copy_from )
+
+def set_meta( self, dataset, overwrite = True, **kwd ):
+try:
+tables = []
+columns = dict()
+rowcounts = dict()
+conn = sqlite3.connect(dataset.file_name)
+c = conn.cursor()
+tables_query = "SELECT name,sql FROM sqlite_master WHERE type='table' 
ORDER BY name"
+rslt = c.execute(tables_query).fetchall();
+for table,sql in rslt:
+tables.append(table)
+columns[table] = re.sub('^.*\((.*)\)$','\\1',sql).split(',')
+for table in tables:
+row_query = "SELECT count(*) FROM %s" % table
+rowcounts[table] = c.execute(row_query).fetchone()[0];
+dataset.metadata.tables = tables
+dataset.metadata.table_columns = columns
+dataset.metadata.table_row_count = rowcounts
+except Exception, exc:
+pass
+
 # Connects and runs a query that should work on any real database
 # If the file is not sqlite, an exception will be thrown and the sniffer 
will return false
 def sniff( self, filename ):
@@ -567,7 +595,14 @@

 def set_peek( self, dataset, is_multi_byte=False ):
 if not dataset.dataset.purged:
-dataset.peek  = "SQLite Database"
+lines = ['SQLite Database']
+if dataset.metadata.tables:
+for table in dataset.metadata.tables:
+try:
+lines.append('%s (%s) [%s]' % 
(table,','.join(dataset.metadata.table_columns.get(table,[])),dataset.metadata.table_row_count[table]))
+except:
+continue
+dataset.peek = '\n'.join(lines)
 dataset.blurb = data.nice_size( dataset.get_size() )
 else:
 dataset.peek = 'file does not exist'


Thanks,

JJ
--
James E. Johnson, Minnesota Supercomputing Institute, University of Minnesota
___
Please keep all replies on the list by using "reply all"
in your mail client.  To manage your subscriptions to this
and other Galaxy lists, please use the interface at:
  http://lists.bx.psu.edu/

To search Galaxy mailing lists use the unified search at:
  http://galaxyproject.org/search/mailinglists/

Re: [galaxy-dev] sqlite galaxy datatype metadata

2014-07-25 Thread Ira Cooke
Hi JJ, 

I think this is a great idea.   

Do you want to make the pull request for this?  Or I can merge it into my fork 
if you like.

Cheers
Ira



On 26 Jul 2014, at 3:22 am, Jim Johnson  wrote:

> Ira,
> 
> Thanks for getting a sqlite datatype into galaxy.  I was wanting to subclass 
> a sqlite datatype for another application: cistrome CEAS
> 
> I had made a sqlite datatype that captured a little metadata that I thought 
> could be useful for the display peek, and perhaps for filtering.  
> Is this worth considering?  
> 
> 
> The peek for a CEAS DB would include table names, column names, and row count:
> SQLite Database
> GeneTable 
> (chrom,name,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds,name2)
>  [24892]
> GenomeBGP 
> (chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,chroms) [7]
> GenomeBGS 
> (chrom,promoter,bipromoter,downstream,gene,rel_loc,rel_loc_cds,roi,Ns) [7]
> GenomePieP (chrom,promoter,downstream,gene,enhancer,chroms) [7]
> GenomePieS (chrom,promoter,downstream,gene,enhancer,total) [7]
> 
> 
> 
> $ hg diff /Users/jj/gxt/gxt/lib/galaxy/datatypes/binary.py
> diff -r f002131cb905 lib/galaxy/datatypes/binary.py
> --- a/lib/galaxy/datatypes/binary.pyFri Jul 25 12:01:34 2014 -0400
> +++ b/lib/galaxy/datatypes/binary.pyFri Jul 25 12:13:05 2014 -0500
> @@ -20,7 +20,7 @@
>  
>  from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, 
> TWOBIT_MAGIC_SIZE
>  
> -from galaxy.datatypes.metadata import MetadataElement
> +from galaxy.datatypes.metadata import 
> MetadataElement,ListParameter,DictParameter
>  from galaxy.datatypes import metadata
>  from galaxy.datatypes.sniff import *
>  import dataproviders
> @@ -550,8 +550,36 @@
>  
>  @dataproviders.decorators.has_dataproviders
>  class SQlite ( Binary ):
> +"""Class describing a Sqlite database """
> +MetadataElement( name="tables", default=[], param=ListParameter, 
> desc="Database Tables", readonly=True, visible=True, no_value=[] )
> +MetadataElement( name="table_columns", default={}, param=DictParameter, 
> desc="Database Table Columns", readonly=True, visible=True, no_value={} )
> +MetadataElement( name="table_row_count", default={}, 
> param=DictParameter, desc="Database Table Row Count", readonly=True, 
> visible=True, no_value={} )
>  file_ext = "sqlite"
>  
> +def init_meta( self, dataset, copy_from=None ):
> +Binary.init_meta( self, dataset, copy_from=copy_from )
> +
> +def set_meta( self, dataset, overwrite = True, **kwd ):
> +try:
> +tables = []
> +columns = dict()
> +rowcounts = dict()
> +conn = sqlite3.connect(dataset.file_name)
> +c = conn.cursor()
> +tables_query = "SELECT name,sql FROM sqlite_master WHERE 
> type='table' ORDER BY name"
> +rslt = c.execute(tables_query).fetchall();
> +for table,sql in rslt:
> +tables.append(table)
> +columns[table] = re.sub('^.*\((.*)\)$','\\1',sql).split(',')
> +for table in tables:
> +row_query = "SELECT count(*) FROM %s" % table
> +rowcounts[table] = c.execute(row_query).fetchone()[0];
> +dataset.metadata.tables = tables
> +dataset.metadata.table_columns = columns
> +dataset.metadata.table_row_count = rowcounts
> +except Exception, exc:
> +pass
> +
>  # Connects and runs a query that should work on any real database 
>  # If the file is not sqlite, an exception will be thrown and the sniffer 
> will return false
>  def sniff( self, filename ):
> @@ -567,7 +595,14 @@
>  
>  def set_peek( self, dataset, is_multi_byte=False ):
>  if not dataset.dataset.purged:
> -dataset.peek  = "SQLite Database"
> +lines = ['SQLite Database']
> +if dataset.metadata.tables:
> +for table in dataset.metadata.tables:
> +try:
> +lines.append('%s (%s) [%s]' % 
> (table,','.join(dataset.metadata.table_columns.get(table,[])),dataset.metadata.table_row_count[table]))
> +except:
> +continue
> +dataset.peek = '\n'.join(lines)
>  dataset.blurb = data.nice_size( dataset.get_size() )
>  else:
>  dataset.peek = 'file does not exist'
> 
> 
> Thanks,
> 
> JJ
> -- 
> James E. Johnson, Minnesota Supercomputing Institute, University of Minnesota

___
Please keep all replies on the list by using "reply all"
in your mail client.  To manage your subscriptions to this
and other Galaxy lists, please use the interface at:
  http://lists.bx.psu.edu/

To search Galaxy mailing lists use the unified search at:
  http://galaxyproject.org/search/mailinglists/