[hg] galaxy 1568: Add a new metadata type of Metadata Files.

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[hg] galaxy 1568: Add a new metadata type of Metadata Files.

Nate Coraor (nate@bx.psu.edu)
details:   http://www.bx.psu.edu/hg/galaxy/rev/0b022adfdc34
changeset: 1568:0b022adfdc34
user:      Dan Blankenberg <[hidden email]>
date:      Wed Oct 22 13:49:22 2008 -0400
description:
Add a new metadata type of Metadata Files.

These are now used to store the list of chromosomes for species as well as the index for MAF files.

MAF tools have been enhanced to make use of index files when available.

TODO: When datasets are purged from disk, these files should also be purged.

23 file(s) affected in this change:

lib/galaxy/datatypes/data.py
lib/galaxy/datatypes/metadata.py
lib/galaxy/datatypes/sequence.py
lib/galaxy/jobs/__init__.py
lib/galaxy/model/__init__.py
lib/galaxy/model/mapping.py
lib/galaxy/tools/actions/upload.py
lib/galaxy/tools/parameters/__init__.py
lib/galaxy/tools/parameters/basic.py
lib/galaxy/tools/parameters/validation.py
lib/galaxy/tools/util/maf_utilities.py
lib/galaxy/util/__init__.py
templates/dataset/edit_attributes.mako
tools/data_source/data_source.py
tools/maf/genebed_maf_to_fasta.xml
tools/maf/interval2maf.py
tools/maf/interval2maf.xml
tools/maf/interval_maf_to_merged_fasta.py
tools/maf/interval_maf_to_merged_fasta.xml
tools/maf/maf_stats.py
tools/maf/maf_stats.xml
tools/stats/filtering.py
tools/visualization/GMAJ.xml

diffs (672 lines):

diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/datatypes/data.py
--- a/lib/galaxy/datatypes/data.py Wed Oct 22 12:15:06 2008 -0400
+++ b/lib/galaxy/datatypes/data.py Wed Oct 22 13:49:22 2008 -0400
@@ -9,7 +9,6 @@
 # Valid first column and strand column values vor bed, other formats
 col1_startswith = ['chr', 'chl', 'groupun', 'reftig_', 'scaffold', 'super_', 'vcho']
 valid_strand = ['+', '-', '.']
-gzip_magic = '\037\213'
 
 class DataMeta( type ):
     """
@@ -86,10 +85,19 @@
     def set_readonly_meta( self, dataset ):
         """Unimplemented method, resets the readonly metadata values"""
         return True
-    def missing_meta( self, dataset ):
-        """Checks for empty metadata values, Returns True if non-optional metadata is missing"""
-        for key, value in dataset.metadata.items():
-            if dataset.metadata.spec[key].get("optional"): continue #we skip check for optional values here
+    def missing_meta( self, dataset, check = [], skip = [] ):
+        """
+        Checks for empty metadata values, Returns True if non-optional metadata is missing
+        Specifying a list of 'check' values will only check those names provided; when used, optionality is ignored
+        Specifying a list of 'skip' items will return True even when a named metadata value is missing
+        """
+        if check:
+            to_check = [ ( to_check, dataset.metadata.get( to_check ) ) for to_check in check ]
+        else:
+            to_check = dataset.metadata.items()
+        for key, value in to_check:
+            if key in skip or ( not check and dataset.metadata.spec[key].get( "optional" ) ):
+                continue #we skip check for optional and nonrequested values here
             if not value:
                 return True
         return False
@@ -328,7 +336,7 @@
         line = line[ :WIDTH ]
         if not data_checked and line:
             data_checked = True
-            if line[0:2] == gzip_magic:
+            if line[0:2] == util.gzip_magic:
                 file_type = 'gzipped'
                 break
             else:
diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/datatypes/metadata.py
--- a/lib/galaxy/datatypes/metadata.py Wed Oct 22 12:15:06 2008 -0400
+++ b/lib/galaxy/datatypes/metadata.py Wed Oct 22 13:49:22 2008 -0400
@@ -1,8 +1,9 @@
-import sys, logging
+import sys, logging, copy, shutil
 
 from galaxy.util import string_as_bool
 from galaxy.util.odict import odict
 from galaxy.web import form_builder
+import galaxy.model
 
 log = logging.getLogger( __name__ )
 
@@ -75,7 +76,13 @@
     def get_html_by_name( self, name, **kwd ):
         if name in self.spec:
             return self.spec[name].param.get_html( value=getattr( self, name ), context=self, **kwd )
-
+    def make_dict_copy( self, to_copy ):
+        """Makes a deep copy of input iterable to_copy according to self.spec"""
+        rval = {}
+        for key, value in to_copy.items():
+            if key in self.spec:
+                rval[key] = self.spec[key].param.make_copy( value, target_context=self, source_context=to_copy )
+        return rval
 
 class MetadataSpecCollection( odict ):
     """
@@ -121,7 +128,10 @@
     
     def to_string( self, value ):
         return str( value )
-
+    
+    def make_copy( self, value, target_context = None, source_context = None ):
+        return copy.deepcopy( value )
+    
     @classmethod
     def marshal ( cls, value ):
         """
@@ -150,7 +160,6 @@
         Turns a value into its usable form.
         """
         return value
-
 
 class MetadataElementSpec( object ):
     """
@@ -280,16 +289,14 @@
         return ",".join( map( str, value ) )
 
 class PythonObjectParameter( MetadataParameter ):
-    def __init__( self, spec ):
-        MetadataParameter.__init__( self, spec )
     
     def to_string( self, value ):
         if not value:
-            return self.spec.to_string( self.spec.no_value )
-        return self.spec.to_string( value )
+            return self.spec._to_string( self.spec.no_value )
+        return self.spec._to_string( value )
     
     def get_html_field( self, value=None, context={}, other_values={}, **kwd ):
-        return form_builder.TextField( self.spec.name, value=self.to_string( value ) )
+        return form_builder.TextField( self.spec.name, value=self._to_string( value ) )
 
     def get_html( self, value=None, context={}, other_values={}, **kwd ):
         return str( self )
@@ -297,3 +304,40 @@
     @classmethod
     def marshal( cls, value ):
         return value
+
+class FileParameter( MetadataParameter ):
+    
+    def to_string( self, value ):
+        if not value:
+            return str( self.spec.no_value )
+        return value.file_name
+    
+    def get_html_field( self, value=None, context={}, other_values={}, **kwd ):
+        return form_builder.TextField( self.spec.name, value=str( value.id ) )
+
+    def get_html( self, value=None, context={}, other_values={}, **kwd ):
+        return "<div>No display available for Metadata Files</div>"
+
+    def wrap( self, value ):
+        if isinstance( value, galaxy.model.MetadataFile ):
+            return value
+        try:
+            return galaxy.model.MetadataFile.get( value )
+        except:
+            #value was not a valid id
+            return None
+
+    def make_copy( self, value, target_context = None, source_context = None ):
+        value = self.wrap( value )
+        if value:
+            new_value = galaxy.model.MetadataFile( dataset = target_context.parent, name = self.spec.name )
+            new_value.flush()
+            shutil.copy( value.file_name, new_value.file_name )
+            return self.unwrap( new_value )
+        return None
+
+    @classmethod
+    def marshal( cls, value ):
+        if isinstance( value, galaxy.model.MetadataFile ):
+            value = value.id
+        return value
diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/datatypes/sequence.py
--- a/lib/galaxy/datatypes/sequence.py Wed Oct 22 12:15:06 2008 -0400
+++ b/lib/galaxy/datatypes/sequence.py Wed Oct 22 13:49:22 2008 -0400
@@ -8,6 +8,7 @@
 from cgi import escape
 from galaxy.datatypes.metadata import MetadataElement
 from galaxy.datatypes import metadata
+import galaxy.model
 from galaxy import util
 from sniff import *
 
@@ -24,7 +25,6 @@
 
     """Add metadata elements"""
     MetadataElement( name="species", desc="Species", default=[], param=metadata.SelectParameter, multiple=True, readonly=True, no_value=None )
-    MetadataElement( name="species_chromosomes", desc="Species Chromosomes", value={}, param=metadata.PythonObjectParameter, readonly=True, no_value={}, to_string=str, visible=False )
 
 class Fasta( Sequence ):
     """Class representing a FASTA sequence"""
@@ -192,23 +192,30 @@
 class Maf( Alignment ):
     """Class describing a Maf alignment"""
     file_ext = "maf"
+    
+    #Readonly and optional, users can't unset it, but if it is not set, we are generally ok; if required use a metadata validator in the tool definition
+    MetadataElement( name="species_chromosomes", desc="Species Chromosomes", param=metadata.FileParameter, readonly=True, no_value=None, visible=False, optional=True )
+    MetadataElement( name="maf_index", desc="MAF Index File", param=metadata.FileParameter, readonly=True, no_value=None, visible=False, optional=True )
 
     def init_meta( self, dataset, copy_from=None ):
         Alignment.init_meta( self, dataset, copy_from=copy_from )
     
     def set_meta( self, dataset, overwrite = True, **kwd ):
         """
-        Parses and sets species and chromosomes from MAF files.
+        Parses and sets species, chromosomes, index from MAF file.
         """
+        #these metadata values are not accessable by users, always overwrite
+        
         species = []
         species_chromosomes = {}
+        maf_reader = bx.align.maf.Reader( open( dataset.file_name ) )
+        indexes = bx.interval_index_file.Indexes()
         try:
-            for i, m in enumerate( bx.align.maf.Reader( open(dataset.file_name) ) ):
-                for c in m.components:
-                    ## spec,chrom = bx.align.maf.src_split( c.src )
-                    ## if not spec or not chrom: spec = chrom = c.src
-                    # "src_split" finds the rightmost dot, which is probably
-                    # wrong in general, and certainly here.
+            while True:
+                pos = maf_reader.file.tell()
+                block = maf_reader.next()
+                if block is None: break
+                for c in block.components:
                     spec = c.src
                     chrom = None
                     if "." in spec:
@@ -218,20 +225,44 @@
                         species_chromosomes[spec] = []
                     if chrom and chrom not in species_chromosomes[spec]:
                         species_chromosomes[spec].append( chrom )
-                # only check first 100,000 blocks for species
-                if i > 100000: break
-        except:
+                    indexes.add( c.src, c.forward_strand_start, c.forward_strand_end, pos, max=c.src_size )
+        except: #bad MAF file
             pass
-        #these metadata values are not accessable by users, always overwrite
         dataset.metadata.species = species
-        dataset.metadata.species_chromosomes = species_chromosomes
+        #only overwrite the contents if our newly determined chromosomes don't match stored
+        chrom_file = dataset.metadata.species_chromosomes
+        compare_chroms = {}
+        if chrom_file:
+            try:
+                for line in open( chrom_file.file_name ):
+                    fields = line.split( "\t" )
+                    if fields:
+                        spec = fields.pop( 0 )
+                        if spec:
+                            compare_chroms[spec] = fields
+            except:
+                pass
+        #write out species chromosomes again only if values are different
+        if not species_chromosomes or compare_chroms != species_chromosomes:
+            tmp_file = tempfile.TemporaryFile( 'w+b' )
+            for spec, chroms in species_chromosomes.items():
+                tmp_file.write( "%s\t%s\n" % ( spec, "\t".join( chroms ) ) )
+            
+            if not chrom_file:
+                chrom_file = galaxy.model.MetadataFile( dataset = dataset, name = "species_chromosomes" )
+                chrom_file.flush()
+            tmp_file.seek( 0 )
+            open( chrom_file.file_name, 'wb' ).write( tmp_file.read() )
+            dataset.metadata.species_chromosomes = chrom_file
+            tmp_file.close()
+        
+        index_file = dataset.metadata.maf_index
+        if not index_file:
+            index_file = galaxy.model.MetadataFile( dataset = dataset, name="maf_index" )
+            index_file.flush()
+        indexes.write( open( index_file.file_name, 'w' ) )
+        dataset.metadata.maf_index = index_file
     
-    def missing_meta( self, dataset ):
-        """Checks to see if species is set"""
-        if dataset.metadata.species in [None, []]:
-            return True
-        return False
-
     def display_peek( self, dataset ):
         """Returns formated html of peek"""
         return self.make_html_table( dataset )
diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/jobs/__init__.py
--- a/lib/galaxy/jobs/__init__.py Wed Oct 22 12:15:06 2008 -0400
+++ b/lib/galaxy/jobs/__init__.py Wed Oct 22 13:49:22 2008 -0400
@@ -475,7 +475,16 @@
 
     def get_input_fnames( self ):
         job = model.Job.get( self.job_id )
-        return [ da.dataset.file_name for da in job.input_datasets if da.dataset ]
+        filenames = []
+        for da in job.input_datasets: #da is JobToInputDatasetAssociation object
+            if da.dataset:
+                filenames.append( da.dataset.file_name )
+                #we will need to stage in metadata file names also
+                #TODO: would be better to only stage in metadata files that are actually needed (found in command line, referenced in config files, etc.)
+                for key, value in da.dataset.metadata.items():
+                    if isinstance( value, model.MetadataFile ):
+                        filenames.append( value.file_name )
+        return filenames
 
     def get_output_fnames( self ):
         job = model.Job.get( self.job_id )
diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/model/__init__.py
--- a/lib/galaxy/model/__init__.py Wed Oct 22 12:15:06 2008 -0400
+++ b/lib/galaxy/model/__init__.py Wed Oct 22 13:49:22 2008 -0400
@@ -5,7 +5,7 @@
 the relationship cardinalities are obvious (e.g. prefer Dataset to Data)
 """
 
-import os.path, os, errno, copy
+import os.path, os, errno
 import sha
 import galaxy.datatypes
 from galaxy.util.bunch import Bunch
@@ -165,7 +165,7 @@
         return self._metadata_collection
     def set_metadata( self, bunch ):
         # Needs to accept a MetadataCollection, a bunch, or a dict
-        self._metadata = dict( [ ( key, copy.deepcopy( value ) ) for key, value in bunch.items() ] )
+        self._metadata = self.metadata.make_dict_copy( bunch )
     metadata = property( get_metadata, set_metadata )
 
     """
@@ -223,8 +223,8 @@
         return self.datatype.set_meta( self, **kwd )
     def set_readonly_meta( self, **kwd ):
         return self.datatype.set_readonly_meta( self, **kwd )
-    def missing_meta( self ):
-        return self.datatype.missing_meta( self )
+    def missing_meta( self, **kwd ):
+        return self.datatype.missing_meta( self, **kwd )
     def as_display_type( self, type, **kwd ):
         return self.datatype.as_display_type( self, type, **kwd )
     def display_peek( self ):
@@ -258,8 +258,9 @@
         return self.datatype.find_conversion_destination( self, accepted_formats, datatypes_registry, **kwd )
     
     def copy( self, copy_children = False, parent_id = None ):
-        des = HistoryDatasetAssociation( hid=self.hid, name=self.name, info=self.info, blurb=self.blurb, peek=self.peek, extension=self.extension, dbkey=self.dbkey, metadata=self._metadata, dataset = self.dataset, visible=self.visible, deleted=self.deleted, parent_id=parent_id, copied_from_history_dataset_association = self )
+        des = HistoryDatasetAssociation( hid=self.hid, name=self.name, info=self.info, blurb=self.blurb, peek=self.peek, extension=self.extension, dbkey=self.dbkey, dataset = self.dataset, visible=self.visible, deleted=self.deleted, parent_id=parent_id, copied_from_history_dataset_association = self )
         des.flush()
+        des.metadata = self.metadata #need to set after flushed, as MetadataFiles require dataset.id
         if copy_children:
             for child in self.children:
                 child_copy = child.copy( copy_children = copy_children, parent_id = des.id )
@@ -564,6 +565,24 @@
         self.user = None
         self.order_index = None
 
+class MetadataFile( object ):
+    def __init__( self, dataset = None, name = None ):
+        self.dataset = dataset
+        self.name = name
+    @property
+    def file_name( self ):
+        assert self.id is not None, "ID must be set before filename used (commit the object)"
+        path = os.path.join( Dataset.file_path, '_metadata_files', *directory_hash_id( self.id ) )
+        # Create directory if it does not exist
+        try:
+            os.makedirs( path )
+        except OSError, e:
+            # File Exists is okay, otherwise reraise
+            if e.errno != errno.EEXIST:
+                raise
+        # Return filename inside hashed directory
+        return os.path.abspath( os.path.join( path, "metadata_%d.dat" % self.id ) )
+
 ## ---- Utility methods -------------------------------------------------------
 
 def directory_hash_id( id ):
diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/model/mapping.py
--- a/lib/galaxy/model/mapping.py Wed Oct 22 12:15:06 2008 -0400
+++ b/lib/galaxy/model/mapping.py Wed Oct 22 13:49:22 2008 -0400
@@ -238,6 +238,15 @@
     Column( "user_id", Integer, ForeignKey( "galaxy_user.id" ), index=True ),                              
     Column( "order_index", Integer ) )
 
+MetadataFile.table = Table( "metadata_file", metadata,
+    Column( "id", Integer, primary_key=True ),
+    Column( "name", String ),
+    Column( "hda_id", Integer, ForeignKey( "history_dataset_association.id" ), index=True, nullable=True ),
+    Column( "create_time", DateTime, default=now ),
+    Column( "update_time", DateTime, index=True, default=now, onupdate=now ),
+    Column( "deleted", Boolean, index=True, default=False ),
+    Column( "purged", Boolean, index=True, default=False ) )
+
 # With the tables defined we can define the mappers and setup the
 # relationships between the model objects.
 
@@ -363,6 +372,9 @@
 assign_mapper( context, StoredWorkflowMenuEntry, StoredWorkflowMenuEntry.table,
     properties=dict( stored_workflow=relation( StoredWorkflow ) ) )
 
+assign_mapper( context, MetadataFile, MetadataFile.table,
+    properties=dict( dataset=relation( HistoryDatasetAssociation ) ) )
+
 def db_next_hid( self ):
     """
     Override __next_hid to generate from the database in a concurrency
diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/tools/actions/upload.py
--- a/lib/galaxy/tools/actions/upload.py Wed Oct 22 12:15:06 2008 -0400
+++ b/lib/galaxy/tools/actions/upload.py Wed Oct 22 13:49:22 2008 -0400
@@ -196,7 +196,7 @@
         temp = open( temp_name, "U" )
         magic_check = temp.read( 2 )
         temp.close()
-        if magic_check != datatypes.data.gzip_magic:
+        if magic_check != util.gzip_magic:
             return ( False, False )
         CHUNK_SIZE = 2**15 # 32Kb
         gzipped_file = gzip.GzipFile( temp_name )
diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/tools/parameters/__init__.py
--- a/lib/galaxy/tools/parameters/__init__.py Wed Oct 22 12:15:06 2008 -0400
+++ b/lib/galaxy/tools/parameters/__init__.py Wed Oct 22 13:49:22 2008 -0400
@@ -60,4 +60,4 @@
         if key in params:
             value = params[key].value_from_basic( value, app, ignore_errors )
         rval[ key ] = value
-    return rval
\ No newline at end of file
+    return rval
diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/tools/parameters/basic.py
--- a/lib/galaxy/tools/parameters/basic.py Wed Oct 22 12:15:06 2008 -0400
+++ b/lib/galaxy/tools/parameters/basic.py Wed Oct 22 13:49:22 2008 -0400
@@ -829,6 +829,8 @@
             options = []
             for filter_key, filter_value in self.filtered.iteritems():
                 dataset = other_values[filter_key]
+                if dataset.__class__.__name__.endswith( "DatasetFilenameWrapper" ): #this is a bad way to check for this, but problems importing class ( due to circular imports? )
+                    dataset = dataset.dataset
                 if dataset:
                     for meta_key, meta_dict in filter_value.iteritems():
                         if dataset.metadata.spec[meta_key].param.to_string( dataset.metadata.get( meta_key ) ) == meta_dict['value']:
diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/tools/parameters/validation.py
--- a/lib/galaxy/tools/parameters/validation.py Wed Oct 22 12:15:06 2008 -0400
+++ b/lib/galaxy/tools/parameters/validation.py Wed Oct 22 13:49:22 2008 -0400
@@ -163,13 +163,15 @@
     """
     Validator that checks for missing metadata
     """
-    def __init__( self, message=None ):
+    def __init__( self, message = None, check = "", skip = "" ):
         self.message = message
+        self.check = check.split( "," )
+        self.skip = skip.split( "," )
     @classmethod
     def from_element( cls, param, elem ):
-        return cls( elem.get( 'message', None ) )
+        return cls( message=elem.get( 'message', None ), check=elem.get( 'check', "" ), skip=elem.get( 'skip', "" ) )
     def validate( self, value, history=None ):
-        if value and value.missing_meta():
+        if value and value.missing_meta( check = self.check, skip = self.skip ):
             if self.message is None:
                 self.message = "Metadata missing, click the pencil icon in the history item to edit / save the metadata attributes"
             raise ValueError( self.message )
diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/tools/util/maf_utilities.py
--- a/lib/galaxy/tools/util/maf_utilities.py Wed Oct 22 12:15:06 2008 -0400
+++ b/lib/galaxy/tools/util/maf_utilities.py Wed Oct 22 13:49:22 2008 -0400
@@ -145,8 +145,16 @@
         except:
             pass
     return None
+
+#return ( index, temp_index_filename ) for user maf, if available, or build one and return it, return None when no tempfile is created
+def open_or_build_maf_index( maf_file, index_filename, species = None ):
+    try:
+        return ( bx.align.maf.Indexed( maf_file, index_filename = index_filename, keep_open = True, parse_e_rows = False ), None )
+    except:
+        return build_maf_index( maf_file, species = species )
+    
 
-#builds and returns (index, index_filename) for specified maf_file
+#builds and returns ( index, index_filename ) for specified maf_file
 def build_maf_index( maf_file, species = None ):
     indexes = bx.interval_index_file.Indexes()
     try:
diff -r b02b8d9196a8 -r 0b022adfdc34 lib/galaxy/util/__init__.py
--- a/lib/galaxy/util/__init__.py Wed Oct 22 12:15:06 2008 -0400
+++ b/lib/galaxy/util/__init__.py Wed Oct 22 13:49:22 2008 -0400
@@ -16,6 +16,8 @@
 
 log   = logging.getLogger(__name__)
 _lock = threading.RLock()
+
+gzip_magic = '\037\213'
 
 def synchronized(func):
     """This wrapper will serialize access to 'func' to a single thread. Use it as a decorator."""
diff -r b02b8d9196a8 -r 0b022adfdc34 templates/dataset/edit_attributes.mako
--- a/templates/dataset/edit_attributes.mako Wed Oct 22 12:15:06 2008 -0400
+++ b/templates/dataset/edit_attributes.mako Wed Oct 22 13:49:22 2008 -0400
@@ -65,6 +65,9 @@
               if they are not accurate.
           </div>
       </form>
+      %if data.missing_meta():
+          <div class="errormessagesmall">Required metadata values are missing. Some of these values may not be editable by the user. Selecting "Auto-detect" will attempt to fix these values.</div>
+      %endif
   </div>
   </div>
 
diff -r b02b8d9196a8 -r 0b022adfdc34 tools/data_source/data_source.py
--- a/tools/data_source/data_source.py Wed Oct 22 12:15:06 2008 -0400
+++ b/tools/data_source/data_source.py Wed Oct 22 13:49:22 2008 -0400
@@ -2,7 +2,8 @@
 #Retreives data from UCSC and stores in a file. UCSC parameters are provided in the input/output file.
 import urllib, sys, os, gzip, tempfile, shutil
 from galaxy import eggs
-from galaxy.datatypes import data
+#from galaxy.datatypes import data
+from galaxy.util import gzip_magic
 
 assert sys.version_info[:2] >= ( 2, 4 )
 
@@ -14,7 +15,7 @@
     temp = open( filename, "U" )
     magic_check = temp.read( 2 )
     temp.close()
-    if magic_check != data.gzip_magic:
+    if magic_check != gzip_magic:
         return False
     return True
 
diff -r b02b8d9196a8 -r 0b022adfdc34 tools/maf/genebed_maf_to_fasta.xml
--- a/tools/maf/genebed_maf_to_fasta.xml Wed Oct 22 12:15:06 2008 -0400
+++ b/tools/maf/genebed_maf_to_fasta.xml Wed Oct 22 13:49:22 2008 -0400
@@ -1,6 +1,6 @@
 <tool id="GeneBed_Maf_Fasta2" name="Stitch Gene blocks">
   <description>given a set of coding exon intervals</description>
-  <command interpreter="python">#if $maf_source_type.maf_source == "user":#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_file --interval_file=$input1 --output_file=$out_file1 --mafSourceType=$maf_source_type.maf_source --geneBED --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
+  <command interpreter="python">#if $maf_source_type.maf_source == "user":#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_file --mafIndex=$maf_source_type.maf_file.metadata.maf_index --interval_file=$input1 --output_file=$out_file1 --mafSourceType=$maf_source_type.maf_source --geneBED --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
 #else:#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_identifier --interval_file=$input1 --output_file=$out_file1 --mafSourceType=$maf_source_type.maf_source  --geneBED --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
 #end if
   </command>
diff -r b02b8d9196a8 -r 0b022adfdc34 tools/maf/interval2maf.py
--- a/tools/maf/interval2maf.py Wed Oct 22 12:15:06 2008 -0400
+++ b/tools/maf/interval2maf.py Wed Oct 22 13:49:22 2008 -0400
@@ -16,6 +16,7 @@
    -S, --strandCol=S: Column of Strand
    -t, --mafType=t: Type of MAF source to use
    -m, --mafFile=m: Path of source MAF file, if not using cached version
+   -I, --mafIndex=I: Path of precomputed source MAF file index, if not using cached version
    -i, --interval_file=i:       Input interval file
    -o, --output_file=o:      Output MAF file
    -p, --species=p: Species to include in output
@@ -92,7 +93,7 @@
             print >> sys.stderr, "The MAF source specified (%s) appears to be invalid." % ( options.mafType )
             sys.exit()
     elif options.mafFile:
-        index, index_filename = maf_utilities.build_maf_index( options.mafFile, species = [dbkey] )
+        index, index_filename = maf_utilities.open_or_build_maf_index( options.mafFile, options.mafIndex, species = [dbkey] )
         if index is None:
             print >> sys.stderr, "Your MAF file appears to be malformed."
             sys.exit()
diff -r b02b8d9196a8 -r 0b022adfdc34 tools/maf/interval2maf.xml
--- a/tools/maf/interval2maf.xml Wed Oct 22 12:15:06 2008 -0400
+++ b/tools/maf/interval2maf.xml Wed Oct 22 13:49:22 2008 -0400
@@ -1,7 +1,7 @@
 <tool id="Interval2Maf1" name="Extract MAF blocks">
   <description>given a set of genomic intervals</description>
   <command interpreter="python">
-    #if $maf_source_type.maf_source == "user":#interval2maf.py --dbkey=${input1.dbkey} --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafFile=$maf_source_type.mafFile --interval_file=$input1 --output_file=$out_file1 --mafIndexFile=${GALAXY_DATA_INDEX_DIR}/maf_index.loc
+    #if $maf_source_type.maf_source == "user":#interval2maf.py --dbkey=${input1.dbkey} --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafFile=$maf_source_type.mafFile --mafIndex=$maf_source_type.mafFile.metadata.maf_index --interval_file=$input1 --output_file=$out_file1 --mafIndexFile=${GALAXY_DATA_INDEX_DIR}/maf_index.loc
     #else:#interval2maf.py --dbkey=${input1.dbkey} --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafType=$maf_source_type.mafType --interval_file=$input1 --output_file=$out_file1 --mafIndexFile=${GALAXY_DATA_INDEX_DIR}/maf_index.loc
     #end if
   </command>
diff -r b02b8d9196a8 -r 0b022adfdc34 tools/maf/interval_maf_to_merged_fasta.py
--- a/tools/maf/interval_maf_to_merged_fasta.py Wed Oct 22 12:15:06 2008 -0400
+++ b/tools/maf/interval_maf_to_merged_fasta.py Wed Oct 22 13:49:22 2008 -0400
@@ -15,6 +15,7 @@
    -G, --geneBED: Input is a Gene BED file, process and join exons as one region
    -t, --mafSourceType=t: Type of MAF source to use
    -m, --mafSource=m: Path of source MAF file, if not using cached version
+   -I, --mafIndex=I: Path of precomputed source MAF file index, if not using cached version
    -i, --interval_file=i:       Input interval file
    -o, --output_file=o:      Output MAF file
    -p, --species=p: Species to include in output
@@ -105,7 +106,7 @@
             stop_err( "The MAF source specified (%s) appears to be invalid." % ( options.mafSource ) )
     elif options.mafSourceType.lower() in ["user"]:
         #index maf for use here, need to remove index_file when finished
-        index, index_filename = maf_utilities.build_maf_index( options.mafSource, species = [primary_species] )
+        index, index_filename = maf_utilities.open_or_build_maf_index( options.mafSource, options.mafIndex, species = [primary_species] )
         if index is None:
             stop_err( "Your MAF file appears to be malformed." )
     else:
diff -r b02b8d9196a8 -r 0b022adfdc34 tools/maf/interval_maf_to_merged_fasta.xml
--- a/tools/maf/interval_maf_to_merged_fasta.xml Wed Oct 22 12:15:06 2008 -0400
+++ b/tools/maf/interval_maf_to_merged_fasta.xml Wed Oct 22 13:49:22 2008 -0400
@@ -1,6 +1,6 @@
 <tool id="Interval_Maf_Merged_Fasta2" name="Stitch MAF blocks">
   <description>given a set of genomic intervals</description>
-  <command interpreter="python">#if $maf_source_type.maf_source == "user":#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_file --interval_file=$input1 --output_file=$out_file1 --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafSourceType=$maf_source_type.maf_source --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
+  <command interpreter="python">#if $maf_source_type.maf_source == "user":#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_file --mafIndex=$maf_source_type.maf_file.metadata.maf_index --interval_file=$input1 --output_file=$out_file1 --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafSourceType=$maf_source_type.maf_source --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
 #else:#interval_maf_to_merged_fasta.py --dbkey=$dbkey --species=$maf_source_type.species --mafSource=$maf_source_type.maf_identifier --interval_file=$input1 --output_file=$out_file1 --chromCol=${input1.metadata.chromCol} --startCol=${input1.metadata.startCol} --endCol=${input1.metadata.endCol} --strandCol=${input1.metadata.strandCol} --mafSourceType=$maf_source_type.maf_source --mafIndexFileDir=${GALAXY_DATA_INDEX_DIR}
 #end if
   </command>
diff -r b02b8d9196a8 -r 0b022adfdc34 tools/maf/maf_stats.py
--- a/tools/maf/maf_stats.py Wed Oct 22 12:15:06 2008 -0400
+++ b/tools/maf/maf_stats.py Wed Oct 22 13:49:22 2008 -0400
@@ -31,10 +31,14 @@
     else: summary = False
 
     mafIndexFile = "%s/maf_index.loc" % sys.argv[9]
+    try:
+        maf_index_filename = sys.argv[10].strip()
+    except:
+        maf_index_filename = None
     index = index_filename = None
     if maf_source_type == "user":
         #index maf for use here
-        index, index_filename = maf_utilities.build_maf_index( input_maf_filename, species = [dbkey] )
+        index, index_filename = maf_utilities.open_or_build_maf_index( input_maf_filename, maf_index_filename, species = [dbkey] )
         if index is None:
             print >>sys.stderr, "Your MAF file appears to be malformed."
             sys.exit()
diff -r b02b8d9196a8 -r 0b022adfdc34 tools/maf/maf_stats.xml
--- a/tools/maf/maf_stats.xml Wed Oct 22 12:15:06 2008 -0400
+++ b/tools/maf/maf_stats.xml Wed Oct 22 13:49:22 2008 -0400
@@ -7,7 +7,10 @@
     #else:
       $maf_source_type.maf_source $maf_source_type.mafType $input1 $out_file1 $dbkey ${input1.metadata.chromCol} ${input1.metadata.startCol} ${input1.metadata.endCol} $summary
     #end if
-    ${GALAXY_DATA_INDEX_DIR}
+    ${GALAXY_DATA_INDEX_DIR}
+    #if $maf_source_type.maf_source == "user":
+    $input2.metadata.maf_index
+    #end if
   </command>
   <inputs>
     <param format="interval" name="input1" label="Interval File" type="data">
diff -r b02b8d9196a8 -r 0b022adfdc34 tools/stats/filtering.py
--- a/tools/stats/filtering.py Wed Oct 22 12:15:06 2008 -0400
+++ b/tools/stats/filtering.py Wed Oct 22 13:49:22 2008 -0400
@@ -4,7 +4,6 @@
 
 import sys, sets, re, os.path
 from galaxy import eggs
-from galaxy.datatypes import metadata
 
 assert sys.version_info[:2] >= ( 2, 4 )
 
diff -r b02b8d9196a8 -r 0b022adfdc34 tools/visualization/GMAJ.xml
--- a/tools/visualization/GMAJ.xml Wed Oct 22 12:15:06 2008 -0400
+++ b/tools/visualization/GMAJ.xml Wed Oct 22 13:49:22 2008 -0400
@@ -2,7 +2,9 @@
 <description>Multiple Alignment Viewer</description>
   <command interpreter="python">GMAJ.py $out_file1 $maf_input $gmaj_file $filenames_file</command>
   <inputs>
-      <param name="maf_input" type="data" format="maf" label="Alignment File" optional="False"/>
+      <param name="maf_input" type="data" format="maf" label="Alignment File" optional="False">
+        <validator type="metadata" check="species_chromosomes" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/>
+      </param>
       <param name="refseq" label="Reference Sequence" type="select">
         <option value="first" selected="true">First sequence in each block</option>
         <option value="any">Any sequence</option>
@@ -103,9 +105,19 @@
 
 #set $seq_count = 0
 #for $annotation_count, $annotation in $enumerate( $annotations ):
-#if $annotation.annotation_style.style == "galaxy":
-#if $maf_input.dataset.metadata.species_chromosomes and $annotation.annotation_style['species'].value in $maf_input.dataset.metadata.species_chromosomes and $maf_input.dataset.metadata.species_chromosomes[$annotation.annotation_style['species'].value]:
-#set $seq_names = [ "%s.%s" % ( $annotation.annotation_style['species'].value, $chrom ) for $chrom in $maf_input.dataset.metadata.species_chromosomes[$annotation.annotation_style['species'].value]]
+#if $annotation.annotation_style.style == "galaxy":
+#set $species_chromosomes = {}
+#if $maf_input.dataset.metadata.species_chromosomes:
+#for $line in open( $maf_input.dataset.metadata.species_chromosomes.file_name ):
+#set $fields = $line.split( "\t" )
+#if $fields:
+#set $spec = $fields.pop( 0 )
+#set $species_chromosomes[spec] = $fields
+#end if
+#end for
+#end if
+#if $species_chromosomes and $annotation.annotation_style['species'].value in $species_chromosomes and $species_chromosomes[$annotation.annotation_style['species'].value]:
+#set $seq_names = [ "%s.%s" % ( $annotation.annotation_style['species'].value, $chrom ) for $chrom in $species_chromosomes[$annotation.annotation_style['species'].value]]
 #else:
 #set $seq_names = [$annotation.annotation_style['species']]
 #end if
@@ -171,4 +183,4 @@
 Gmaj is a tool for viewing and manipulating Generalized Multiple Alignments (GMAs) produced by programs such as TBA (though it can also be used with maf-format alignments from other sources). It can display interactive graphical and text representations of the alignments, a diagram showing the locations of exons and repeats, and other annotations -- all with the user's choice of reference sequence.
 
   </help>
-</tool>
\ No newline at end of file
+</tool>