[hg] galaxy 1550: Use only 1 underlying executable ( data_source...

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[hg] galaxy 1550: Use only 1 underlying executable ( data_source...

Nate Coraor (nate@bx.psu.edu)
details:   http://www.bx.psu.edu/hg/galaxy/rev/64c0734ff262
changeset: 1550:64c0734ff262
user:      Greg Von Kuster <[hidden email]>
date:      Tue Oct 07 15:21:46 2008 -0400
description:
Use only 1 underlying executable ( data_source.py ) for data source tools.  A new tag set is added to the data source tool configs to handle tranlsation of request param names sent by remote apps ( something like <param_trans galaxy_name="dbkey" remote_name="GENOME" missing="?" /> ).

21 file(s) affected in this change:

lib/galaxy/jobs/__init__.py
lib/galaxy/tools/__init__.py
lib/galaxy/util/__init__.py
lib/galaxy/web/controllers/tool_runner.py
tool_conf.xml.sample
tools/data_source/biomart.py
tools/data_source/biomart.xml
tools/data_source/biomart_filter.py
tools/data_source/biomart_test.xml
tools/data_source/data_source.py
tools/data_source/epigraph.py
tools/data_source/epigraph_code.py
tools/data_source/epigraph_import.xml
tools/data_source/flymine.xml
tools/data_source/flymine_filter_code.py
tools/data_source/intermine.py
tools/data_source/ucsc_tablebrowser.py
tools/data_source/ucsc_tablebrowser.xml
tools/data_source/ucsc_tablebrowser_archaea.xml
tools/data_source/ucsc_tablebrowser_code.py
tools/data_source/ucsc_tablebrowser_test.xml

diffs (1016 lines):

diff -r 960820cccaaa -r 64c0734ff262 lib/galaxy/jobs/__init__.py
--- a/lib/galaxy/jobs/__init__.py Tue Oct 07 11:58:32 2008 -0400
+++ b/lib/galaxy/jobs/__init__.py Tue Oct 07 15:21:46 2008 -0400
@@ -270,6 +270,10 @@
         incoming['userEmail'] = userEmail
         # Build params, done before hook so hook can use
         param_dict = self.tool.build_param_dict( incoming, inp_data, out_data )
+        # Certain tools require tasks to be completed prior to job execution
+        # ( this used to be performed in the "exec_before_job" hook, but hooks are deprecated ).
+        if self.tool.tool_type is not None:
+            out_data = self.tool.exec_before_job( self.queue.app, inp_data, out_data, param_dict )
         # Run the before queue ("exec_before_job") hook
         self.tool.call_hook( 'exec_before_job', self.queue.app, inp_data=inp_data,
                              out_data=out_data, tool=self.tool, param_dict=incoming)
@@ -437,6 +441,10 @@
         # Create generated output children and primary datasets and add to param_dict
         collected_datasets = {'children':self.tool.collect_child_datasets(out_data),'primary':self.tool.collect_primary_datasets(out_data)}
         param_dict.update({'__collected_datasets__':collected_datasets})
+        # Certain tools require tasks to be completed after job execution
+        # ( this used to be performed in the "exec_after_process" hook, but hooks are deprecated ).
+        if self.tool.tool_type is not None:
+            self.tool.exec_after_process( self.queue.app, inp_data, out_data, param_dict )
         # Call 'exec_after_process' hook
         self.tool.call_hook( 'exec_after_process', self.queue.app, inp_data=inp_data,
                              out_data=out_data, param_dict=param_dict,
diff -r 960820cccaaa -r 64c0734ff262 lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py Tue Oct 07 11:58:32 2008 -0400
+++ b/lib/galaxy/tools/__init__.py Tue Oct 07 15:21:46 2008 -0400
@@ -225,8 +225,22 @@
         if not self.version:
             # For backward compatibility, some tools may not have versions yet.
             self.version = "1.0.0"
-        # Command line (template). Optional for tools that do not invoke a
-        # local program  
+        # Type of tool
+        self.tool_type = root.get( "tool_type", None )
+        if self.tool_type is not None:
+            # data_source tool
+            if self.tool_type == "data_source":
+                self.param_trans_dict = {}
+                req_param_trans = root.find( "request_param_translation" )
+                if req_param_trans is not None:
+                    for req_param in req_param_trans.findall( "request_param" ):
+                        # req_param tags must look like <request_param galaxy_name="dbkey" remote_name="GENOME" missing="" />
+                        trans_list = []
+                        remote_name = req_param.get( "remote_name" )
+                        trans_list.append( req_param.get( "galaxy_name" ) )
+                        trans_list.append( req_param.get( "missing" ) )
+                        self.param_trans_dict[ remote_name ] = trans_list
+        # Command line (template). Optional for tools that do not invoke a local program  
         command = root.find("command")
         if command is not None and command.text is not None:
             self.command = command.text.lstrip() # get rid of leading whitespace
@@ -1115,7 +1129,56 @@
         except Exception, e:
             e.args = ( "Error in '%s' hook '%s', original message: %s" % ( self.name, hook_name, e.args[0] ) )
             raise
-    
+
+    def exec_before_job( self, app, inp_data, out_data, param_dict={} ):
+        if self.tool_type == 'data_source':
+            # List for converting UCSC to Galaxy exts, if not in following dictionary, use provided datatype
+            data_type_to_ext = { 'wigdata':'wig', 'tab':'interval', 'hyperlinks':'html', 'sequence':'fasta' }
+            dbkey = param_dict.get( 'dbkey ' )
+            organism = param_dict.get( 'organism' )
+            table = param_dict.get( 'table' )
+            description = param_dict.get( 'description' )
+            if description == 'range':
+                description = param_dict.get( 'position', '' )
+                if not description:
+                    description = 'unknown position'
+            data_type = param_dict.get( 'data_type ')
+            items = out_data.items()
+            for name, data in items:
+                if organism and table and description:
+                    data.name  = '%s on %s: %s (%s)' % ( data.name, organism, table, description )
+                data.dbkey = dbkey
+                ext = data_type
+                try:
+                    ext = data_type_to_ext[ data_type ]
+                except:
+                    pass
+                if ext not in app.datatypes_registry.datatypes_by_extension:
+                    ext = 'interval'
+                data = app.datatypes_registry.change_datatype( data, ext )
+                # store external data source's request parameters temporarily in output file
+                out = open( data.file_name, 'w' )
+                for key, value in param_dict.items():
+                    print >> out, '%s\t%s' % ( key, value )
+                out.close()
+                out_data[ name ] = data
+            return out_data
+
+    def exec_after_process( self, app, inp_data, out_data, param_dict ):
+        # TODO: for data_source tools at least, this code can probably be handled more optimally by adding a new
+        # tag set in the tool config.
+        if self.tool_type == 'data_source':
+            name, data = out_data.items()[0]
+            if data.state == data.states.OK:
+                data.info = data.name
+            if not isinstance( data.datatype, datatypes.interval.Bed ) and isinstance( data.datatype, datatypes.interval.Interval ):
+                data.set_meta()
+                if data.missing_meta():
+                    data = app.datatypes_registry.change_datatype( data, 'tabular' )
+            data.set_peek()
+            data.set_size()
+            data.flush()
+
     def collect_associated_files( self, output ):
         for name, outdata in output.items():
             temp_file_path = os.path.join( self.app.config.new_file_path, "dataset_%s_files" % ( outdata.id ) )
diff -r 960820cccaaa -r 64c0734ff262 lib/galaxy/util/__init__.py
--- a/lib/galaxy/util/__init__.py Tue Oct 07 11:58:32 2008 -0400
+++ b/lib/galaxy/util/__init__.py Tue Oct 07 15:21:46 2008 -0400
@@ -141,13 +141,30 @@
     #       different parameters can be sanitized in different ways.
     NEVER_SANITIZE = ['file_data', 'url_paste', 'URL']
     
-    def __init__(self, params, safe=True, sanitize=True):
+    def __init__( self, params, safe=True, sanitize=True, tool_type=None, param_trans_dict={} ):
         if safe:
             for key, value in params.items():
+                # Check to see if we should translate certain parameter names.  For example,
+                # in data_source tools, the external data source application may send back
+                # parameter names like GENOME which is translated to dbkey in Galaxy.
+                # param_trans_dict looks like { "GENOME" : [ "dbkey" "?" ] }
+                new_key = key
+                new_value = value
+                if tool_type == 'data_source':
+                    if key in param_trans_dict:
+                        new_key = param_trans_dict[ key ][0]
+                        if not value:
+                            new_value = param_trans_dict[ key ][1]
                 if key not in self.NEVER_SANITIZE and sanitize:
-                    self.__dict__[key] = sanitize_param(value)
+                    self.__dict__[ new_key ] = sanitize_param( new_value )
                 else:
-                    self.__dict__[key] = value
+                    self.__dict__[ new_key ] = new_value
+            for key, value in param_trans_dict.items():
+                # Make sure that all translated values used in Galaxy are added to the params
+                galaxy_name = param_trans_dict[ key ][0]
+                if galaxy_name not in self.__dict__:
+                    # This will set the galaxy_name to the "missing" value
+                    self.__dict__[ galaxy_name ] = param_trans_dict[ key ][1]
         else:
             self.__dict__.update(params)
 
diff -r 960820cccaaa -r 64c0734ff262 lib/galaxy/web/controllers/tool_runner.py
--- a/lib/galaxy/web/controllers/tool_runner.py Tue Oct 07 11:58:32 2008 -0400
+++ b/lib/galaxy/web/controllers/tool_runner.py Tue Oct 07 15:21:46 2008 -0400
@@ -39,7 +39,11 @@
             log.error( "index called with tool id '%s' but no such tool exists", tool_id )
             trans.log_event( "Tool id '%s' does not exist" % tool_id )
             return "Tool '%s' does not exist, kwd=%s " % (tool_id, kwd)
-        params = util.Params(kwd, sanitize = tool.options.sanitize)
+        try:
+            param_trans_dict = tool.param_trans_dict
+        except:
+            param_trans_dict = {}
+        params = util.Params( kwd, sanitize=tool.options.sanitize, tool_type=tool.tool_type, param_trans_dict=param_trans_dict )
         history = trans.get_history()
         trans.ensure_valid_galaxy_session()
         template, vars = tool.handle_input( trans, params.__dict__ )
diff -r 960820cccaaa -r 64c0734ff262 tool_conf.xml.sample
--- a/tool_conf.xml.sample Tue Oct 07 11:58:32 2008 -0400
+++ b/tool_conf.xml.sample Tue Oct 07 15:21:46 2008 -0400
@@ -2,6 +2,7 @@
 <toolbox>
   <section name="Get Data" id="getext">
     <tool file="data_source/upload.xml"/>
+    <tool file="data_source/access_libraries.xml" />
     <tool file="data_source/ucsc_tablebrowser.xml" />
     <tool file="data_source/ucsc_tablebrowser_test.xml" />
     <tool file="data_source/ucsc_tablebrowser_archaea.xml" />
@@ -123,7 +124,6 @@
     <tool file="visualization/GMAJ.xml" />
     <tool file="visualization/LAJ.xml" />
     <tool file="visualization/build_ucsc_custom_track.xml" />
-    <tool file="visualization/build_gbrowse_custom_track.xml" />
   </section>
   <section name="Regional Variation" id="regVar">
     <tool file="regVariation/windowSplitter.xml" />
@@ -156,8 +156,8 @@
     <tool file="taxonomy/poisson2test.xml" />
   </section>
   <section name="Solexa tools" id="solexa_tools">
-    <tool file="solexa/fastq_statistics.xml" />
-    <tool file="solexa/lastz_wrapper.xml" />
+    <tool file="sr_mapping/fastq_statistics.xml" />
+    <tool file="sr_mapping/lastz_wrapper.xml" />
   </section>
   <!--
   TODO: uncomment the following EMBOSS section whenever
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/biomart.py
--- a/tools/data_source/biomart.py Tue Oct 07 11:58:32 2008 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-#!/usr/bin/env python
-#Retreives data from BIOMART and stores in a file. Biomart parameters are provided in the input/output file.
-#guruprasad Ananda
-
-import urllib, sys, os, gzip, tempfile, shutil
-from galaxy import eggs
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def __main__():
-    filename = sys.argv[1]
-    params = {}
-    for line in open(filename, 'r'):
-        try:
-            line = line.strip()
-            fields = line.split('\t')
-            params[fields[0]] = fields[1]
-        except:
-            continue
-    
-    URL = params.get( 'URL', None )
-    if not URL:
-        open( filename, 'w' ).write( "" )
-        stop_err( 'Datasource has not sent back a URL parameter.' )
-    URL = URL + '&_export=1&GALAXY_URL=0'
-    CHUNK_SIZE = 2**20 # 1Mb
-    MAX_SIZE   = CHUNK_SIZE * 100
-    try:
-        page = urllib.urlopen(URL)
-    except Exception, exc:
-        stop_err('Problems connecting to %s (%s)' % (URL, exc) )
-    
-    fp = open(filename, 'w')
-    size = 0
-    max_size_exceeded = False
-    while 1:
-        chunk = page.read(CHUNK_SIZE)
-        if not chunk:
-            break
-        size += len(chunk)
-        if size > MAX_SIZE:
-            max_size_exceeded = True
-            break
-        fp.write(chunk)
-    fp.close()
-    
-    if max_size_exceeded:
-        print 'Maximum data size of 100 MB exceeded, incomplete data retrieval.'
-    
-if __name__ == "__main__":
-    __main__()
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/biomart.xml
--- a/tools/data_source/biomart.xml Tue Oct 07 11:58:32 2008 -0400
+++ b/tools/data_source/biomart.xml Tue Oct 07 15:21:46 2008 -0400
@@ -1,24 +1,24 @@
 <?xml version="1.0"?>
-<tool name="BioMart" id="biomart">
+<tool name="BioMart" id="biomart" tool_type="data_source">
  <description>Central server</description>
- <command interpreter="python">
-        biomart.py
-        $output
-    </command>
+ <command interpreter="python">data_source.py $output</command>
  <inputs action="http://www.biomart.org/biomart/martview" check_values="false" method="get" target="_top">
  <display>go to BioMart Central $GALAXY_URL</display>
  <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" />
     </inputs>
-
+    <request_param_translation>
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" />
+        <request_param galaxy_name="organism" remote_name="organism" missing="" />
+        <request_param galaxy_name="table" remote_name="table" missing="" />
+        <request_param galaxy_name="description" remote_name="description" missing="" />
+        <request_param galaxy_name="name" remote_name="name" missing="Biomart query" />
+        <request_param galaxy_name="info" remote_name="info" missing="" />
+        <request_param galaxy_name="data_type" remote_name="type" missing="txt" />
+    </request_param_translation>
  <uihints minwidth="800"/>
-
- <code file="biomart_filter.py"/>
-
  <outputs>
  <data name="output" format="txt" />
  </outputs>
-
  <options sanitize="False" refresh="True"/>
-
 </tool>
-
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/biomart_filter.py
--- a/tools/data_source/biomart_filter.py Tue Oct 07 11:58:32 2008 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-# Greg Von Kuster
-import urllib
-from galaxy import eggs
-from galaxy.datatypes import sniff
-from galaxy import datatypes, config
-import tempfile, shutil
-
-def exec_before_job( app, inp_data, out_data, param_dict, tool=None):
-    """Sets the name of the data"""
-    data_name = param_dict.get( 'name', 'Biomart query' )
-    data_type = param_dict.get( 'type', 'txt' )
-    name, data = out_data.items()[0]
-    if data_type == 'txt':
-        data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order )
-    data = app.datatypes_registry.change_datatype(data, data_type)
-    data.name = data_name
-    #store BIOMART parameters temporarily in output file
-    out = open(data.file_name,'w')
-    for key, value in param_dict.items():
-        print >> out, "%s\t%s" % (key,value)
-    out.close()    
-    out_data[name] = data
-    
-
-def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
-    name, data = out_data.items()[0]
-    if not isinstance(data.datatype, datatypes.interval.Bed) and isinstance(data.datatype, datatypes.interval.Interval):
-        #Set meta data, format file to be valid interval type
-        data.set_meta(first_line_is_header=True)
-        #check for missing meta data, if all there, comment first line and process file
-        if not data.missing_meta():
-            line_ctr = -1
-            temp = tempfile.NamedTemporaryFile('w')
-            temp_filename = temp.name
-            temp.close()
-            temp = open(temp_filename,'w')
-            chromCol = int(data.metadata.chromCol) - 1
-            startCol = int(data.metadata.startCol) - 1
-            strandCol = int(data.metadata.strandCol) - 1
-            
-            for line in open(data.file_name, 'r'):
-                line_ctr += 1
-                #First line is a non-commented header line, lets comment it out here
-                if line_ctr == 0:
-                    temp.write("#%s" % line)
-                    continue
-                fields = line.strip().split('\t')
-                #If chrom col is an int, make it chrInt
-                try:
-                    int(fields[chromCol])
-                    fields[chromCol] = "chr%s" % fields[chromCol]
-                except:
-                    try:
-                        if fields[chromCol].upper()== "X" or fields[chromCol].upper()== "Y":
-                            fields[chromCol] = "chr%s" % fields[chromCol].upper()
-                    except:
-                        pass
-                #change to BED coordinate system
-                try:
-                    fields[startCol] = str(int(fields[startCol]) - 1)
-                except:
-                    pass
-                #set strand to +/-, instead of +1/-1
-                try:
-                    if strandCol > 0:
-                        if int(fields[strandCol]) > 0:
-                            fields[strandCol] = "+"
-                        else:
-                            fields[strandCol] = "-"
-                except:
-                    pass
-                temp.write("%s\n" % '\t'.join(fields))
-            temp.close()
-            shutil.move(temp_filename,data.file_name)
-        else:
-            data_type = sniff.guess_ext(data.file_name)
-            data = app.datatypes_registry.change_datatype(data, data_type)
-            if data.missing_meta():
-                data.set_meta()
-    else:
-        data_type = sniff.guess_ext(data.file_name)
-        data = app.datatypes_registry.change_datatype(data, data_type)
-        if data.missing_meta():
-            data.set_meta()
-    data.set_peek()
-    data.set_size()
-    data.flush()
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/biomart_test.xml
--- a/tools/data_source/biomart_test.xml Tue Oct 07 11:58:32 2008 -0400
+++ b/tools/data_source/biomart_test.xml Tue Oct 07 15:21:46 2008 -0400
@@ -1,27 +1,24 @@
 <?xml version="1.0"?>
-<tool name="BioMart" id="biomart">
-
+<tool name="BioMart" id="biomart" tool_type="data_source">
  <description>Test server</description>
-
- <command interpreter="python">
-        biomart.py
-        $output
-    </command>
-
+ <command interpreter="python">data_source.py $output</command>
  <inputs action="http://test.biomart.org/biomart/martview" check_values="false" method="get" target="_top">
  <display>go to BioMart Central $GALAXY_URL</display>
  <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" />
  </inputs>
-
- <uihints minwidth="800"/>
-
- <code file="biomart_filter.py"/>
-
+    <request_param_translation>
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="dbkey" missing="?" />
+        <request_param galaxy_name="organism" remote_name="organism" missing="" />
+        <request_param galaxy_name="table" remote_name="table" missing="" />
+        <request_param galaxy_name="description" remote_name="description" missing="" />
+        <request_param galaxy_name="name" remote_name="name" missing="Biomart test query" />
+        <request_param galaxy_name="info" remote_name="info" missing="" />
+        <request_param galaxy_name="data_type" remote_name="type" missing="txt" />
+    </request_param_translation>
+ <uihints minwidth="800"/>
  <outputs>
  <data name="output" format="txt" />
  </outputs>
-
  <options sanitize="False" refresh="True"/>
-
 </tool>
-
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/data_source.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/data_source.py Tue Oct 07 15:21:46 2008 -0400
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+#Retreives data from UCSC and stores in a file. UCSC parameters are provided in the input/output file.
+import urllib, sys, os, gzip, tempfile, shutil
+from galaxy import eggs
+from galaxy.datatypes import data
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def stop_err( msg ):
+    sys.stderr.write( msg )
+    sys.exit()
+
+def check_gzip( filename ):
+    temp = open( filename, "U" )
+    magic_check = temp.read( 2 )
+    temp.close()
+    if magic_check != data.gzip_magic:
+        return False
+    return True
+
+def __main__():
+    filename = sys.argv[1]
+    params = {}
+    for line in open( filename, 'r' ):
+        try:
+            line = line.strip()
+            fields = line.split( '\t' )
+            params[ fields[0] ] = fields[1]
+        except:
+            continue
+    URL = params.get( 'URL', None )
+    if not URL:
+        open( filename, 'w' ).write( "" )
+        stop_err( 'The remote data source application has not sent back a URL parameter in the request.' )
+    out = open( filename, 'w' )
+    CHUNK_SIZE = 2**20 # 1Mb
+    try:
+        page = urllib.urlopen( URL, urllib.urlencode( params ) )
+    except:
+        stop_err( 'It appears that the remote data source application is currently off line. Please try again later.' )
+    while 1:
+        chunk = page.read( CHUNK_SIZE )
+        if not chunk:
+            break
+        out.write( chunk )
+    out.close()
+    if check_gzip( filename ):
+        fd, uncompressed = tempfile.mkstemp()
+        gzipped_file = gzip.GzipFile( filename )
+        while 1:
+            try:
+                chunk = gzipped_file.read( CHUNK_SIZE )
+            except IOError:
+                os.close( fd )
+                os.remove( uncompressed )
+                gzipped_file.close()
+                stop_err( 'Problem uncompressing gzipped data, please try retrieving the data uncompressed.' )
+            if not chunk:
+                break
+            os.write( fd, chunk )
+        os.close( fd )
+        gzipped_file.close()
+        # Replace the gzipped file with the uncompressed file
+        shutil.move( uncompressed, filename )        
+    
+if __name__ == "__main__": __main__()
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/epigraph.py
--- a/tools/data_source/epigraph.py Tue Oct 07 11:58:32 2008 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-#!/usr/bin/env python
-#Retreives data from EpiGRAPH and stores in a file. EpiGRAPH request parameters are provided in the input/output file.
-import urllib, sys, os, gzip, tempfile, shutil
-from galaxy import eggs
-from galaxy.datatypes import data
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def check_gzip( filename ):
-    temp = open( filename, "U" )
-    magic_check = temp.read( 2 )
-    temp.close()
-    if magic_check != data.gzip_magic:
-        return False
-    return True
-
-def __main__():
-    filename = sys.argv[1]
-    params = {}
-    for line in open( filename, 'r' ):
-        try:
-            line = line.strip()
-            fields = line.split( '\t' )
-            params[ fields[0] ] = fields[1]
-        except:
-            continue
-    URL = params.get( 'URL', None )
-    if not URL:
-        open( filename, 'w' ).write( "" )
-        stop_err( 'EpiGRAPH has not sent back a URL parameter.' )
-    out = open( filename, 'w' )
-    CHUNK_SIZE = 2**20 # 1Mb
-    try:
-        page = urllib.urlopen( URL, urllib.urlencode( params ) )
-    except:
-        stop_err( 'It appears that the EpiGRAPH server is currently off-line. Please try again later.' )
-    while 1:
-        chunk = page.read( CHUNK_SIZE )
-        if not chunk:
-            break
-        out.write( chunk )
-    out.close()
-    if check_gzip( filename ):
-        fd, uncompressed = tempfile.mkstemp()
-        gzipped_file = gzip.GzipFile( filename )
-        while 1:
-            try:
-                chunk = gzipped_file.read( CHUNK_SIZE )
-            except IOError:
-                os.close( fd )
-                os.remove( uncompressed )
-                gzipped_file.close()
-                stop_err( 'Problem uncompressing gzipped data, please try retrieving the data uncompressed.' )
-            if not chunk:
-                break
-            os.write( fd, chunk )
-        os.close( fd )
-        gzipped_file.close()
-        # Replace the gzipped file with the uncompressed file
-        shutil.move( uncompressed, filename )        
-    
-if __name__ == "__main__": __main__()
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/epigraph_code.py
--- a/tools/data_source/epigraph_code.py Tue Oct 07 11:58:32 2008 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-#Code for direct connection to EpiGRAPH
-from galaxy.datatypes import sniff
-import urllib
-
-def exec_before_job( app, inp_data, out_data, param_dict, tool=None ):
-    """
-    EpiGRAPH sends data to Galaxy by passing the following parameters in the request:
-    1. URL - the url to which Galaxy should post a request to retrieve the data
-    2. GENOME - the name of the UCSC genome assembly (e.g. hg18), dbkey in Galaxy
-    3. NAME - data.name in Galaxy
-    4. INFO - data.info in Galaxy
-    """
-    items = out_data.items()
-    for name, data in items:
-        NAME = urllib.unquote( param_dict.get( 'NAME', None ) )
-        if NAME is not None:
-            data.name = NAME
-        INFO = urllib.unquote( param_dict.get( 'INFO', None ) )
-        if INFO is not None:
-            data.info = INFO
-        GENOME = urllib.unquote( param_dict.get( 'GENOME', None ) )
-        if GENOME is not None:
-            data.dbkey = GENOME
-        else:
-            data.dbkey = '?'
-        # Store EpiGRAPH request parameters temporarily in output file
-        out = open( data.file_name, 'w' )
-        for key, value in param_dict.items():
-            print >> out, "%s\t%s" % ( key, value )
-        out.close()
-        out_data[ name ] = data
-
-def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None ):
-    """Verifies the datatype after the run"""
-    name, data = out_data.items()[0]
-    if data.extension == 'txt':
-        data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order )
-        data = app.datatypes_registry.change_datatype( data, data_type )
-    data.set_peek()
-    data.set_size()
-    data.flush()
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/epigraph_import.xml
--- a/tools/data_source/epigraph_import.xml Tue Oct 07 11:58:32 2008 -0400
+++ b/tools/data_source/epigraph_import.xml Tue Oct 07 15:21:46 2008 -0400
@@ -1,15 +1,24 @@
 <?xml version="1.0"?>
-<tool name="EpiGRAPH" id="epigraph_import">
-  <description> server</description>
-  <command interpreter="python">epigraph.py $output</command>
-  <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get">
-    <display>go to EpiGRAPH server $GALAXY_URL</display>
-    <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" />
-  </inputs>
-  <uihints minwidth="800"/>  
-  <code file="epigraph_code.py"/>
-  <outputs>
-    <data name="output" format="txt" />
-  </outputs>
-  <options sanitize="False" refresh="True"/>
+<tool name="EpiGRAPH" id="epigraph_import" tool_type="data_source">
+    <description> server</description>
+    <command interpreter="python">data_source.py $output</command>
+    <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get">
+        <display>go to EpiGRAPH server $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" />
+    </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="GENOME" missing="?" />
+        <request_param galaxy_name="organism" remote_name="organism" missing="" />
+        <request_param galaxy_name="table" remote_name="table" missing="" />
+        <request_param galaxy_name="description" remote_name="description" missing="" />
+        <request_param galaxy_name="name" remote_name="NAME" missing="EpiGRAPH query" />
+        <request_param galaxy_name="info" remote_name="INFO" missing="" />
+        <request_param galaxy_name="data_type" remote_name="data_type" missing="txt" />
+    </request_param_translation>
+    <uihints minwidth="800"/>  
+    <outputs>
+        <data name="output" format="txt" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
 </tool>
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/flymine.xml
--- a/tools/data_source/flymine.xml Tue Oct 07 11:58:32 2008 -0400
+++ b/tools/data_source/flymine.xml Tue Oct 07 15:21:46 2008 -0400
@@ -1,13 +1,22 @@
 <?xml version="1.0"?>
-<tool name="Flymine" id="flymine">
+<tool name="Flymine" id="flymine" tool_type="data_source">
     <description>server</description>
-    <command interpreter="python">intermine.py $output</command>
+    <command interpreter="python">data_source.py $output</command>
     <inputs action="http://preview.flymine.org/preview/begin.do" check_values="false" method="get" target="_top">
         <display>go to Flymine server $GALAXY_URL</display>
         <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" />
     </inputs>
+    <request_param_translation>
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
+        <request_param galaxy_name="organism" remote_name="organism" missing="" />
+        <request_param galaxy_name="table" remote_name="table" missing="" />
+        <request_param galaxy_name="description" remote_name="description" missing="" />
+        <request_param galaxy_name="name" remote_name="name" missing="FlyMine query" />
+        <request_param galaxy_name="info" remote_name="info" missing="" />
+        <request_param galaxy_name="data_type" remote_name="data_type" missing="interval" />
+    </request_param_translation>
     <uihints minwidth="800"/>
-    <code file="flymine_filter_code.py"/>
     <outputs>
         <data name="output" format="txt" />
     </outputs>
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/flymine_filter_code.py
--- a/tools/data_source/flymine_filter_code.py Tue Oct 07 11:58:32 2008 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-# Code for direct connection to flymine
-from galaxy.datatypes import sniff
-import urllib
-
-import logging
-log = logging.getLogger( __name__ )
-
-def exec_before_job( app, inp_data, out_data, param_dict, tool=None ):
-    """Sets the attributes of the data"""
-    items = out_data.items()
-    for name, data in items:
-        data.dbkey = param_dict.get( 'dbkey', '?' )    
-        # Store flymine parameters temporarily in output file
-        out = open( data.file_name, 'w' )
-        for key, value in param_dict.items():
-            out.write( "%s\t%s\n" % ( key, value ) )
-        out.close()
-        out_data[ name ] = data
-
-def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None ):
-    """Verifies the data after the run"""
-    name, data = out_data.items()[0]
-    if data.state == data.states.OK:
-        data.info = data.name
-    if data.extension == 'txt':
-        data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order )
-        data = app.datatypes_registry.change_datatype( data, data_type )
-    data.set_peek()
-    data.set_size()
-    data.flush()
-
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/intermine.py
--- a/tools/data_source/intermine.py Tue Oct 07 11:58:32 2008 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-#!/usr/bin/env python
-#Retreives data from intermine and stores in a file. Intermine parameters are provided in the input/output file.
-import urllib, sys, os, gzip, tempfile, shutil
-from galaxy import eggs
-from galaxy.datatypes import data
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def __main__():
-    filename = sys.argv[1]
-    params = {}
-    
-    for line in open( filename, 'r' ):
-        try:
-            line = line.strip()
-            fields = line.split( '\t' )
-            params[ fields[0] ] = fields[1]
-        except:
-            continue
-    
-    URL = params.get( 'URL', None )
-    if not URL:
-        open( filename, 'w' ).write( "" )
-        stop_err( 'Datasource has not sent back a URL parameter.' )
-
-    CHUNK_SIZE = 2**20 # 1Mb
-    try:
-        page = urllib.urlopen( URL )
-    except Exception, exc:
-        raise Exception( 'Problems connecting to %s (%s)' % ( URL, exc ) )
-        sys.exit( 1 )
-    
-    fp = open( filename, 'wb' )
-    while 1:
-        chunk = page.read( CHUNK_SIZE )
-        if not chunk:
-            break
-        fp.write( chunk )
-    fp.close()    
-    
-if __name__ == "__main__": __main__()
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/ucsc_tablebrowser.py
--- a/tools/data_source/ucsc_tablebrowser.py Tue Oct 07 11:58:32 2008 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,70 +0,0 @@
-#!/usr/bin/env python
-#Retreives data from UCSC and stores in a file. UCSC parameters are provided in the input/output file.
-import urllib, sys, os, gzip, tempfile, shutil
-from galaxy import eggs
-from galaxy.datatypes import data
-
-assert sys.version_info[:2] >= ( 2, 4 )
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def check_gzip( filename ):
-    temp = open( filename, "U" )
-    magic_check = temp.read( 2 )
-    temp.close()
-    if magic_check != data.gzip_magic:
-        return False
-    return True
-
-def __main__():
-    filename = sys.argv[1]
-    params = {}
-    
-    for line in open(filename, 'r'):
-        try:
-            line = line.strip()
-            fields = line.split('\t')
-            params[fields[0]] = fields[1]
-        except:
-            continue
-    
-    URL = params.get( 'URL', None )
-    if not URL:
-        open( filename, 'w' ).write( "" )
-        #raise Exception('Datasource has not sent back a URL parameter')
-        stop_err( 'Datasource has not sent back a URL parameter.' )
-    out = open( filename, 'w' )
-    CHUNK_SIZE = 2**20 # 1Mb
-    try:
-        page = urllib.urlopen( URL, urllib.urlencode( params ) )
-    except:
-        stop_err( 'It appears that the UCSC Table Browser is currently offline. Please try again later.' )
-    
-    while 1:
-        chunk = page.read( CHUNK_SIZE )
-        if not chunk:
-            break
-        out.write( chunk )
-    out.close()
-    if check_gzip( filename ):
-        fd, uncompressed = tempfile.mkstemp()
-        gzipped_file = gzip.GzipFile( filename )
-        while 1:
-            try:
-                chunk = gzipped_file.read( CHUNK_SIZE )
-            except IOError:
-                os.close( fd )
-                os.remove( uncompressed )
-                gzipped_file.close()
-                stop_err( 'Problem decompressing gzipped data, please try retrieving the data uncompressed.' )
-            if not chunk:
-                break
-            os.write( fd, chunk )
-        os.close( fd )
-        gzipped_file.close()
-        # Replace the gzipped file with the decompressed file
-        shutil.move( uncompressed, filename )        
-    
-if __name__ == "__main__": __main__()
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/ucsc_tablebrowser.xml
--- a/tools/data_source/ucsc_tablebrowser.xml Tue Oct 07 11:58:32 2008 -0400
+++ b/tools/data_source/ucsc_tablebrowser.xml Tue Oct 07 15:21:46 2008 -0400
@@ -1,10 +1,7 @@
 <?xml version="1.0"?>
-<tool name="UCSC Main" id="ucsc_table_direct1">
-
+<tool name="UCSC Main" id="ucsc_table_direct1" tool_type="data_source">
  <description>table browser</description>
-
- <command interpreter="python">ucsc_tablebrowser.py $output</command>
-
+ <command interpreter="python">data_source.py $output</command>
  <inputs action="http://genome.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get">
  <display>go to UCSC Table Browser $GALAXY_URL</display>
  <param name="GALAXY_URL" type="baseurl" value="/tool_runner" />
@@ -13,15 +10,17 @@
  <param name="hgta_compressType" type="hidden" value="none" />
  <param name="hgta_outputType" type="hidden" value="bed" />
  </inputs>
-
+ <request_param_translation>
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
+        <request_param galaxy_name="organism" remote_name="org" missing="unknown species" />
+    <request_param galaxy_name="table" remote_name="hgta_track" missing="unknown table" />
+    <request_param galaxy_name="description" remote_name="hgta_regionType" missing="no description" />
+    <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="interval" />
+ </request_param_translation>
  <uihints minwidth="800"/>
-
- <code file="ucsc_tablebrowser_code.py"/>
-
  <outputs>
  <data name="output" format="bed" />
  </outputs>
  <options sanitize="False" refresh="True"/>
-
 </tool>
-
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/ucsc_tablebrowser_archaea.xml
--- a/tools/data_source/ucsc_tablebrowser_archaea.xml Tue Oct 07 11:58:32 2008 -0400
+++ b/tools/data_source/ucsc_tablebrowser_archaea.xml Tue Oct 07 15:21:46 2008 -0400
@@ -1,10 +1,7 @@
 <?xml version="1.0"?>
-<tool name="UCSC Archaea" id="ucsc_table_direct_archaea1">
-
+<tool name="UCSC Archaea" id="ucsc_table_direct_archaea1" tool_type="data_source">
  <description>table browser</description>
-
- <command interpreter="python">ucsc_tablebrowser.py $output</command>
-
+ <command interpreter="python">data_source.py $output</command>
  <inputs action="http://archaea.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get">
  <display>go to UCSC Table Browser $GALAXY_URL</display>
  <param name="GALAXY_URL" type="baseurl" value="/tool_runner" />
@@ -13,15 +10,17 @@
  <param name="hgta_compressType" type="hidden" value="none" />
  <param name="hgta_outputType" type="hidden" value="bed" />
  </inputs>
-
+    <request_param_translation>
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
+        <request_param galaxy_name="organism" remote_name="org" missing="unknown species" />
+        <request_param galaxy_name="table" remote_name="hgta_track" missing="" />
+        <request_param galaxy_name="description" remote_name="hgta_regionType" missing="" />
+        <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="interval" />
+    </request_param_translation>
  <uihints minwidth="800"/>
-
- <code file="ucsc_tablebrowser_code.py"/>
-
  <outputs>
  <data name="output" format="bed" />
  </outputs>
  <options sanitize="False" refresh="True"/>
-
 </tool>
-
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/ucsc_tablebrowser_code.py
--- a/tools/data_source/ucsc_tablebrowser_code.py Tue Oct 07 11:58:32 2008 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-#Code for direct connection to UCSC
-from galaxy import datatypes
-
-def exec_before_job( app, inp_data, out_data, param_dict, tool=None):
-    """Sets the name of the data"""
-    outputType = param_dict.get( 'hgta_outputType', "interval" ).lower() #assume all data is interval, we will fix later if not the case
-    #list for converting ucsc to galaxy exts, if not in following dictionary, use provided datatype
-    outputType_to_ext = {'wigdata':'wig','tab':'interval','hyperlinks':'html','sequence':'fasta'}
-    items = out_data.items()
-    description = param_dict.get('hgta_regionType',"")
-    organism = param_dict.get('org',"unkown species")
-    table = param_dict.get('hgta_track',"")
-    if description == 'range':
-        try:
-            description = param_dict.get('position',"")
-        except:
-            description = "unkown position"
-    for name, data in items:
-        data.name  = "%s on %s: %s (%s)" % (data.name, organism, table, description)
-        data.dbkey = param_dict.get('db', '?')
-        ext = outputType
-        try:
-            ext = outputType_to_ext[outputType]
-        except:
-            pass
-        if ext not in app.datatypes_registry.datatypes_by_extension:
-            ext = 'interval'
-        data = app.datatypes_registry.change_datatype(data, ext)
-        
-        #store ucsc parameters temporarily in output file
-        out = open(data.file_name,'w')
-        for key, value in param_dict.items():
-            print >> out, "%s\t%s" % (key,value)
-        out.close()
-        
-        out_data[name] = data
-
-def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
-    """Verifies the datatype after the run"""
-    
-    name, data = out_data.items()[0]
-    if data.state == data.states.OK:
-        data.info = data.name
-    
-    if not isinstance(data.datatype, datatypes.interval.Bed) and isinstance(data.datatype, datatypes.interval.Interval):
-        data.set_meta()
-        if data.missing_meta():
-            data = app.datatypes_registry.change_datatype(data, 'tabular')
-    data.set_peek()
-    data.set_size()
-    data.flush()
diff -r 960820cccaaa -r 64c0734ff262 tools/data_source/ucsc_tablebrowser_test.xml
--- a/tools/data_source/ucsc_tablebrowser_test.xml Tue Oct 07 11:58:32 2008 -0400
+++ b/tools/data_source/ucsc_tablebrowser_test.xml Tue Oct 07 15:21:46 2008 -0400
@@ -1,10 +1,7 @@
 <?xml version="1.0"?>
-<tool name="UCSC Test" id="ucsc_table_direct_test1">
-
+<tool name="UCSC Test" id="ucsc_table_direct_test1" tool_type="data_source">
  <description>table browser</description>
-
- <command interpreter="python">ucsc_tablebrowser.py $output</command>
-
+ <command interpreter="python">data_source.py $output</command>
  <inputs action="http://genome-test.cse.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get">
  <display>go to UCSC Table Browser $GALAXY_URL</display>
  <param name="GALAXY_URL" type="baseurl" value="/tool_runner" />
@@ -13,15 +10,17 @@
  <param name="hgta_compressType" type="hidden" value="none" />
  <param name="hgta_outputType" type="hidden" value="bed" />
  </inputs>
-
+    <request_param_translation>
+        <request_param galaxy_name="URL" remote_name="URL" missing="" />
+        <request_param galaxy_name="dbkey" remote_name="db" missing="?" />
+        <request_param galaxy_name="organism" remote_name="org" missing="unknown species" />
+        <request_param galaxy_name="table" remote_name="hgta_track" missing="" />
+        <request_param galaxy_name="description" remote_name="hgta_regionType" missing="" />
+        <request_param galaxy_name="data_type" remote_name="hgta_outputType" missing="interval" />
+    </request_param_translation>
  <uihints minwidth="800"/>
-
- <code file="ucsc_tablebrowser_code.py"/>
-
  <outputs>
  <data name="output" format="bed" />
  </outputs>
  <options sanitize="False" refresh="True"/>
-
 </tool>
-