[hg] galaxy 1524: Integrate with intermine ( data source ) and e...

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[hg] galaxy 1524: Integrate with intermine ( data source ) and e...

greg
details:   http://www.bx.psu.edu/hg/galaxy/rev/aae4754d6828
changeset: 1524:aae4754d6828
user:      Greg Von Kuster <[hidden email]>
date:      Mon Sep 22 10:36:34 2008 -0400
description:
Integrate with intermine ( data source ) and epigraph ( data destination ).  Receiving data from epigraph coming soon.  Data is sent to epigraph using a combination of DATA_URL and REDIRECT_URL tool params.  This tool creates jobs, but does not queue them for execution.

12 file(s) affected in this change:

lib/galaxy/tools/__init__.py
lib/galaxy/tools/actions/__init__.py
lib/galaxy/tools/parameters/basic.py
lib/galaxy/web/controllers/async.py
lib/galaxy/web/controllers/tool_runner.py
templates/root/redirect.mako
templates/tool_form.tmpl
tool_conf.xml.sample
tools/data_destination/epigraph.xml
tools/data_source/flymine.xml
tools/data_source/flymine_filter_code.py
tools/data_source/intermine.py

diffs (429 lines):

diff -r dabed25dfbaf -r aae4754d6828 lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py Sun Sep 21 17:36:28 2008 -0400
+++ b/lib/galaxy/tools/__init__.py Mon Sep 22 10:36:34 2008 -0400
@@ -239,6 +239,16 @@
                 self.command = interpreter + " " + self.command
         else:
             self.command = ''
+        # Parameters used to build URL for redirection to external app
+        redirect_url_params = root.find( "redirect_url_params" )
+        if redirect_url_params is not None and redirect_url_params.text is not None:
+            # get rid of leading / trailing white space
+            redirect_url_params = redirect_url_params.text.strip()
+            # Replace remaining white space with something we can safely split on later
+            # when we are building the params
+            self.redirect_url_params = redirect_url_params.replace( ' ', '**^**' )
+        else:
+            self.redirect_url_params = ''
         # Short description of the tool
         self.description = util.xml_text(root, "description")
         # Job runner
@@ -677,7 +687,7 @@
                 return "tool_form.tmpl", dict( errors=errors, tool_state=state, incoming=incoming, error_message=error_message )
             # If we've completed the last page we can execute the tool
             elif state.page == self.last_page:
-                out_data = self.execute( trans, params )
+                out_data = self.execute( trans, incoming=params )
                 return 'tool_executed.tmpl', dict( out_data=out_data )
             # Otherwise move on to the next page
             else:
@@ -689,8 +699,8 @@
             # Just a refresh, render the form with updated state and errors.
             return 'tool_form.tmpl', dict( errors=errors, tool_state=state )
       
-    def update_state( self, trans, inputs, state, incoming,
-                      prefix="", context=None, update_only=False, old_errors={}, changed_dependencies={} ):
+    def update_state( self, trans, inputs, state, incoming, prefix="", context=None,
+                      update_only=False, old_errors={}, changed_dependencies={} ):
         """
         Update the tool state in `state` using the user input in `incoming`.
         This is designed to be called recursively: `inputs` contains the
@@ -877,14 +887,14 @@
                 raise Exception( "Unexpected parameter type" )
         return args
             
-    def execute( self, trans, incoming={}, set_output_hid = True ):
+    def execute( self, trans, incoming={}, set_output_hid=True ):
         """
         Execute the tool using parameter values in `incoming`. This just
         dispatches to the `ToolAction` instance specified by
         `self.tool_action`. In general this will create a `Job` that
         when run will build the tool's outputs, e.g. `DefaultToolAction`.
         """
-        return self.tool_action.execute( self, trans, incoming, set_output_hid = set_output_hid )
+        return self.tool_action.execute( self, trans, incoming=incoming, set_output_hid=set_output_hid )
         
     def params_to_strings( self, params, app ):
         return params_to_strings( self.inputs, params, app )
@@ -1045,7 +1055,54 @@
             #e.args = ( 'Error substituting into command line. Params: %r, Command: %s' % ( param_dict, self.command ) )
             raise
         return command_line
-        
+
+    def build_redirect_url_params( self, param_dict ):
+        """Substitute parameter values into self.redirect_url_params"""
+        if not self.redirect_url_params:
+            return
+        redirect_url_params = None            
+        # Substituting parameter values into the url params
+        redirect_url_params = fill_template( self.redirect_url_params, context=param_dict )
+        # Remove newlines
+        redirect_url_params = redirect_url_params.replace( "\n", " " ).replace( "\r", " " )
+        return redirect_url_params
+
+    def parse_redirect_url( self, inp_data, param_dict ):
+        """Parse the REDIRECT_URL tool param"""
+        # Tools that send data to an external application via a redirect must include the following 3
+        # tool params:
+        # REDIRECT_URL - the url to which the data is being sent
+        # DATA_URL - the url to which the receiving application will send an http post to retrieve the Galaxy data
+        # GALAXY_URL - the to which the external application may post data as a response
+        redirect_url = param_dict.get( 'REDIRECT_URL' )
+        redirect_url_params = self.build_redirect_url_params( param_dict )
+        # Add the parameters to the redirect url.  We're splitting the param string on '**^**'
+        # because the self.parse() method replaced white space with that separator.
+        params = redirect_url_params.split( '**^**' )
+        rup_dict = {}
+        for param in params:
+            p_list = param.split( '=' )
+            p_name = p_list[0]
+            p_val = p_list[1]
+            rup_dict[ p_name ] = p_val
+        DATA_URL = param_dict.get( 'DATA_URL', None )
+        assert DATA_URL is not None, "DATA_URL parameter missing in tool config."
+        # Get the dataset - there should only be 1
+        for name in inp_data.keys():
+            data = inp_data[ name ]
+        DATA_URL += "/%s/display" % str( data.id )
+        redirect_url += "?DATA_URL=%s" % DATA_URL
+        # Add the redirect_url_params to redirect_url
+        for p_name in rup_dict:
+            redirect_url += "&%s=%s" % ( p_name, rup_dict[ p_name ] )
+        # Add the current user email to redirect_url
+        if data.history.user:
+             USERNAME = str( data.history.user.email )
+        else:
+             USERNAME = 'Anonymous'
+        redirect_url += "&USERNAME=%s" % USERNAME
+        return redirect_url
+
     def call_hook( self, hook_name, *args, **kwargs ):
         """
         Call the custom code hook function identified by 'hook_name' if any,
diff -r dabed25dfbaf -r aae4754d6828 lib/galaxy/tools/actions/__init__.py
--- a/lib/galaxy/tools/actions/__init__.py Sun Sep 21 17:36:28 2008 -0400
+++ b/lib/galaxy/tools/actions/__init__.py Mon Sep 22 10:36:34 2008 -0400
@@ -2,6 +2,8 @@
 from galaxy.tools.parameters import *
 from galaxy.util.template import fill_template
 from galaxy.util.none_like import NoneDataset
+from galaxy.web import url_for
+from galaxy.jobs import JOB_OK
 
 import logging
 log = logging.getLogger( __name__ )
@@ -63,7 +65,7 @@
         tool.visit_inputs( param_values, visitor )
         return input_datasets
     
-    def execute(self, tool, trans, incoming={}, set_output_hid = True ):
+    def execute(self, tool, trans, incoming={}, set_output_hid=True ):
         out_data = {}
         # Collect any input datasets from the incoming parameters
         inp_data = self.collect_input_datasets( tool, incoming, trans )
@@ -90,15 +92,12 @@
             on_text = '%s, %s, and others' % tuple(input_names[0:2])
         else:
             on_text = ""
-        
         # Add the dbkey to the incoming parameters
         incoming[ "dbkey" ] = input_dbkey
-        
         # Keep track of parent / child relationships, we'll create all the
         # datasets first, then create the associations
         parent_to_child_pairs = []
         child_dataset_names = set()
-        
         for name, output in tool.outputs.items():
             if output.parent:
                 parent_to_child_pairs.append( ( output.parent, name ) )
@@ -149,23 +148,19 @@
             out_data[ name ] = data
             # Store all changes to database
             trans.app.model.flush()
-            
         # Add all the top-level (non-child) datasets to the history
         for name in out_data.keys():
             if name not in child_dataset_names and name not in incoming: #don't add children; or already existing datasets, i.e. async created
                 data = out_data[ name ]
                 trans.history.add_dataset( data, set_hid = set_output_hid )
                 data.flush()
-                
         # Add all the children to their parents
         for parent_name, child_name in parent_to_child_pairs:
             parent_dataset = out_data[ parent_name ]
             child_dataset = out_data[ child_name ]
             parent_dataset.children.append( child_dataset )
-            
         # Store data after custom code runs
         trans.app.model.flush()
-        
         # Create the job object
         job = trans.app.model.Job()
         job.session_id = trans.get_galaxy_session( create=True ).id
@@ -189,8 +184,19 @@
         for name, dataset in out_data.iteritems():
             job.add_output_dataset( name, dataset )
         trans.app.model.flush()
-        
-        # Queue the job for execution
-        trans.app.job_queue.put( job.id, tool )
-        trans.log_event( "Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id )
-        return out_data
+        # Some tools are not really executable, but jobs are still created for them ( for record keeping ).
+        # Examples include tools that redirect to other applications ( epigraph ).  These special tools must
+        # include something that can be retrieved from the params ( e.g., REDIRECT_URL ) to keep the job
+        # from being queued.
+        if 'REDIRECT_URL' in incoming:
+            redirect_url = tool.parse_redirect_url( inp_data, incoming )
+            # Job should not be queued, so set state to ok
+            job.state = JOB_OK
+            job.info = "Redirected to: %s" % redirect_url
+            job.flush()
+            trans.response.send_redirect( url_for( controller='tool_runner', action='redirect', redirect_url=redirect_url ) )
+        else:
+            # Queue the job for execution
+            trans.app.job_queue.put( job.id, tool )
+            trans.log_event( "Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id )
+            return out_data
diff -r dabed25dfbaf -r aae4754d6828 lib/galaxy/tools/parameters/basic.py
--- a/lib/galaxy/tools/parameters/basic.py Sun Sep 21 17:36:28 2008 -0400
+++ b/lib/galaxy/tools/parameters/basic.py Mon Sep 22 10:36:34 2008 -0400
@@ -332,6 +332,8 @@
         return form_builder.HiddenField( self.name, self.value )
     def get_initial_value( self, trans, context ):
         return self.value
+    def get_label( self ):
+        return None
     
 ## This is clearly a HACK, parameters should only be used for things the user
 ## can change, there needs to be a different way to specify this. I'm leaving
@@ -354,6 +356,9 @@
         return form_builder.HiddenField( self.name, self.get_value( trans ) )
     def get_initial_value( self, trans, context ):
         return self.value
+    def get_label( self ):
+        # BaseURLToolParameters are ultimately "hidden" parameters
+        return None
 
 class SelectToolParameter( ToolParameter ):
     """
diff -r dabed25dfbaf -r aae4754d6828 lib/galaxy/web/controllers/async.py
--- a/lib/galaxy/web/controllers/async.py Sun Sep 21 17:36:28 2008 -0400
+++ b/lib/galaxy/web/controllers/async.py Mon Sep 22 10:36:34 2008 -0400
@@ -68,8 +68,8 @@
                 galaxy_url  = trans.request.base + '/async/%s/%s/%s' % ( tool_id, data.id, key )
                 galaxy_url = params.get("GALAXY_URL",galaxy_url)
                 params = dict( url=URL, GALAXY_URL=galaxy_url )
-                params[tool.outputs.keys()[0]] = data.id #assume there is exactly one output file possible
-                #tool.execute( app=self.app, history=history, incoming=params )
+                # Assume there is exactly one output file possible
+                params[tool.outputs.keys()[0]] = data.id
                 tool.execute( trans, incoming=params )
             else:
                 log.debug('async error -> %s' % STATUS)
diff -r dabed25dfbaf -r aae4754d6828 lib/galaxy/web/controllers/tool_runner.py
--- a/lib/galaxy/web/controllers/tool_runner.py Sun Sep 21 17:36:28 2008 -0400
+++ b/lib/galaxy/web/controllers/tool_runner.py Mon Sep 22 10:36:34 2008 -0400
@@ -51,3 +51,10 @@
             add_frame.wiki_url = trans.app.config.wiki_url
             add_frame.from_noframe = True
         return trans.fill_template( template, history=history, toolbox=toolbox, tool=tool, util=util, add_frame=add_frame, **vars )
+
+    @web.expose
+    def redirect( self, trans, redirect_url=None, **kwd ):
+        if not redirect_url:
+            return trans.show_error_message( "Required URL for redirection missing" )
+        trans.log_event( "Redirecting to: %s" % redirect_url )
+        return trans.fill_template( 'root/redirect.mako', redirect_url=redirect_url )
diff -r dabed25dfbaf -r aae4754d6828 templates/root/redirect.mako
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/templates/root/redirect.mako Mon Sep 22 10:36:34 2008 -0400
@@ -0,0 +1,5 @@
+<%inherit file="/base.mako"/>
+
+<script type="text/javascript">  
+    top.location.href = '${redirect_url}';
+</script>
\ No newline at end of file
diff -r dabed25dfbaf -r aae4754d6828 templates/tool_form.tmpl
--- a/templates/tool_form.tmpl Sun Sep 21 17:36:28 2008 -0400
+++ b/templates/tool_form.tmpl Mon Sep 22 10:36:34 2008 -0400
@@ -73,10 +73,12 @@
         #set cls = "form-row"
     #end if
     <div class="$cls">
-        <label>
-            ${param.get_label()}:
-        </label>
-
+        #set label = $param.get_label()
+        #if $label:
+            <label>
+                $label:
+            </label>
+        #end if
         #set field = $param.get_html_field( $caller, $parent_state[ $param.name ], $context )
         #set $field.refresh_on_change = $param.refresh_on_change
         <div style="float: left; width: 250px; margin-right: 10px;">$field.get_html( $prefix )</div>
diff -r dabed25dfbaf -r aae4754d6828 tool_conf.xml.sample
--- a/tool_conf.xml.sample Sun Sep 21 17:36:28 2008 -0400
+++ b/tool_conf.xml.sample Mon Sep 22 10:36:34 2008 -0400
@@ -9,6 +9,7 @@
     <tool file="data_source/biomart.xml" />
     <tool file="data_source/biomart_test.xml" />
     <tool file="data_source/gbrowse_elegans.xml" />
+    <tool file="data_source/flymine.xml" />
     <tool file="data_source/encode_db.xml" />
     <tool file="data_source/hbvar.xml" />
     <tool file="validation/fix_errors.xml" />
@@ -20,6 +21,9 @@
     <tool file="data_source/encode_import_transcription_regulation.xml"/>
     <tool file="data_source/encode_import_all_latest_datasets.xml" />
     <tool file="data_source/encode_import_gencode.xml" />
+  </section>
+  <section name="Send Data" id="send">
+    <tool file="data_destination/epigraph.xml" />
   </section>
   <section name="ENCODE Tools" id="EncodeTools">
     <tool file="encode/gencode_partition.xml" />
diff -r dabed25dfbaf -r aae4754d6828 tools/data_destination/epigraph.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_destination/epigraph.xml Mon Sep 22 10:36:34 2008 -0400
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+<tool name="Perform EpiGRAPH" id="epigraph">
+  <description> Genome analysis and prediction</description>
+  <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params>
+  <inputs>
+    <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH">
+      <validator type="unspecified_build" />
+    </param>
+    <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/DataImport.jsp" />
+    <param name="DATA_URL" type="baseurl" value="/datasets" />
+    <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" />
+  </inputs>
+  <outputs/>
+  <help>
+**What it does**
+
+This tool sends the selected dataset to EpiGRAPH for in-depth analysis and prediction.
+
+  </help>
+</tool>
+
diff -r dabed25dfbaf -r aae4754d6828 tools/data_source/flymine.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/flymine.xml Mon Sep 22 10:36:34 2008 -0400
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<tool name="Flymine" id="flymine">
+    <description>server</description>
+    <command interpreter="python">intermine.py $output</command>
+    <inputs action="http://preview.flymine.org/preview/begin.do" check_values="false" method="get" target="_top">
+        <display>go to Flymine server $GALAXY_URL</display>
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=flymine" />
+    </inputs>
+    <uihints minwidth="800"/>
+    <code file="flymine_filter_code.py"/>
+    <outputs>
+        <data name="output" format="txt" />
+    </outputs>
+    <options sanitize="False" refresh="True"/>
+</tool>
+
diff -r dabed25dfbaf -r aae4754d6828 tools/data_source/flymine_filter_code.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/flymine_filter_code.py Mon Sep 22 10:36:34 2008 -0400
@@ -0,0 +1,31 @@
+# Code for direct connection to flymine
+from galaxy.datatypes import sniff
+import urllib
+
+import logging
+log = logging.getLogger( __name__ )
+
+def exec_before_job( app, inp_data, out_data, param_dict, tool=None ):
+    """Sets the attributes of the data"""
+    items = out_data.items()
+    for name, data in items:
+        data.dbkey = param_dict.get( 'dbkey', '?' )    
+        # Store flymine parameters temporarily in output file
+        out = open( data.file_name, 'w' )
+        for key, value in param_dict.items():
+            out.write( "%s\t%s\n" % ( key, value ) )
+        out.close()
+        out_data[ name ] = data
+
+def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None ):
+    """Verifies the data after the run"""
+    name, data = out_data.items()[0]
+    if data.state == data.states.OK:
+        data.info = data.name
+    if data.extension == 'txt':
+        data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order )
+        data = app.datatypes_registry.change_datatype( data, data_type )
+    data.set_peek()
+    data.set_size()
+    data.flush()
+
diff -r dabed25dfbaf -r aae4754d6828 tools/data_source/intermine.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/intermine.py Mon Sep 22 10:36:34 2008 -0400
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+#Retreives data from intermine and stores in a file. Intermine parameters are provided in the input/output file.
+import urllib, sys, os, gzip, tempfile, shutil
+from galaxy import eggs
+from galaxy.datatypes import data
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def stop_err( msg ):
+    sys.stderr.write( msg )
+    sys.exit()
+
+def __main__():
+    filename = sys.argv[1]
+    params = {}
+    
+    for line in open( filename, 'r' ):
+        try:
+            line = line.strip()
+            fields = line.split( '\t' )
+            params[ fields[0] ] = fields[1]
+        except:
+            continue
+    
+    URL = params.get( 'URL', None )
+    if not URL:
+        open( filename, 'w' ).write( "" )
+        stop_err( 'Datasource has not sent back a URL parameter.' )
+
+    CHUNK_SIZE = 2**20 # 1Mb
+    try:
+        page = urllib.urlopen( URL )
+    except Exception, exc:
+        raise Exception( 'Problems connecting to %s (%s)' % ( URL, exc ) )
+        sys.exit( 1 )
+    
+    fp = open( filename, 'wb' )
+    while 1:
+        chunk = page.read( CHUNK_SIZE )
+        if not chunk:
+            break
+        fp.write( chunk )
+    fp.close()    
+    
+if __name__ == "__main__": __main__()