[hg] galaxy 1575: Add a new URL_method attribute to data_source ...

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[hg] galaxy 1575: Add a new URL_method attribute to data_source ...

Greg Von Kuster
details:   http://www.bx.psu.edu/hg/galaxy/rev/415cc6dc8e35
changeset: 1575:415cc6dc8e35
user:      Greg Von Kuster <[hidden email]>
date:      Mon Oct 27 16:03:43 2008 -0400
description:
Add a new URL_method attribute to data_source tool types whose value is either "get" or "post" ( some require a get request while others require a post request ).  This fixes the Biomart problem ( along with a new, well documented hack that can be eliminated when Biomart encodes the value of URL in the initial response - they'll tell us when they've fixed this ).  Also added some requested info to the "send to EpiGRAPH" tool.

12 file(s) affected in this change:

lib/galaxy/tools/__init__.py
lib/galaxy/util/__init__.py
lib/galaxy/web/controllers/tool_runner.py
tools/data_destination/epigraph.xml
tools/data_source/biomart.xml
tools/data_source/biomart_test.xml
tools/data_source/data_source.py
tools/data_source/epigraph_import.xml
tools/data_source/flymine.xml
tools/data_source/ucsc_tablebrowser.xml
tools/data_source/ucsc_tablebrowser_archaea.xml
tools/data_source/ucsc_tablebrowser_test.xml

diffs (299 lines):

diff -r 36a7ff82faf0 -r 415cc6dc8e35 lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py Fri Oct 24 12:32:32 2008 -0400
+++ b/lib/galaxy/tools/__init__.py Mon Oct 27 16:03:43 2008 -0400
@@ -228,19 +228,21 @@
             self.version = "1.0.0"
         # Type of tool
         self.tool_type = root.get( "tool_type", None )
-        if self.tool_type is not None:
-            # data_source tool
-            if self.tool_type == "data_source":
-                self.param_trans_dict = {}
-                req_param_trans = root.find( "request_param_translation" )
-                if req_param_trans is not None:
-                    for req_param in req_param_trans.findall( "request_param" ):
-                        # req_param tags must look like <request_param galaxy_name="dbkey" remote_name="GENOME" missing="" />
-                        trans_list = []
-                        remote_name = req_param.get( "remote_name" )
-                        trans_list.append( req_param.get( "galaxy_name" ) )
-                        trans_list.append( req_param.get( "missing" ) )
-                        self.param_trans_dict[ remote_name ] = trans_list
+        # data_source tool
+        if self.tool_type == "data_source":
+            self.URL_method = root.get( "URL_method", "get" ) # get is the default
+            # TODO: Biomart hack - eliminate when they encode URL - they'll let us know when...
+            self.add_to_URL = root.get( "add_to_URL", None )
+            self.param_trans_dict = {}
+            req_param_trans = root.find( "request_param_translation" )
+            if req_param_trans is not None:
+                for req_param in req_param_trans.findall( "request_param" ):
+                    # req_param tags must look like <request_param galaxy_name="dbkey" remote_name="GENOME" missing="" />
+                    trans_list = []
+                    remote_name = req_param.get( "remote_name" )
+                    trans_list.append( req_param.get( "galaxy_name" ) )
+                    trans_list.append( req_param.get( "missing" ) )
+                    self.param_trans_dict[ remote_name ] = trans_list
         # Command line (template). Optional for tools that do not invoke a local program  
         command = root.find("command")
         if command is not None and command.text is not None:
diff -r 36a7ff82faf0 -r 415cc6dc8e35 lib/galaxy/util/__init__.py
--- a/lib/galaxy/util/__init__.py Fri Oct 24 12:32:32 2008 -0400
+++ b/lib/galaxy/util/__init__.py Mon Oct 27 16:03:43 2008 -0400
@@ -143,7 +143,7 @@
     #       different parameters can be sanitized in different ways.
     NEVER_SANITIZE = ['file_data', 'url_paste', 'URL']
     
-    def __init__( self, params, safe=True, sanitize=True, tool_type=None, param_trans_dict={} ):
+    def __init__( self, params, safe=True, sanitize=True, tool=None ):
         if safe:
             for key, value in params.items():
                 # Check to see if we should translate certain parameter names.  For example,
@@ -152,21 +152,27 @@
                 # param_trans_dict looks like { "GENOME" : [ "dbkey" "?" ] }
                 new_key = key
                 new_value = value
-                if tool_type == 'data_source':
-                    if key in param_trans_dict:
-                        new_key = param_trans_dict[ key ][0]
+                if tool and tool.tool_type == 'data_source':
+                    if key in tool.param_trans_dict:
+                        new_key = tool.param_trans_dict[ key ][0]
                         if not value:
-                            new_value = param_trans_dict[ key ][1]
+                            new_value = tool.param_trans_dict[ key ][1]
                 if key not in self.NEVER_SANITIZE and sanitize:
                     self.__dict__[ new_key ] = sanitize_param( new_value )
                 else:
                     self.__dict__[ new_key ] = new_value
-            for key, value in param_trans_dict.items():
-                # Make sure that all translated values used in Galaxy are added to the params
-                galaxy_name = param_trans_dict[ key ][0]
-                if galaxy_name not in self.__dict__:
-                    # This will set the galaxy_name to the "missing" value
-                    self.__dict__[ galaxy_name ] = param_trans_dict[ key ][1]
+            if tool and tool.tool_type == 'data_source':
+                # Add the tool's URL_method to params
+                self.__dict__[ 'URL_method' ] = tool.URL_method
+                # TODO: Biomart hack - eliminate when they encode URL - they'll let us know when...
+                if tool.add_to_URL is not None:
+                    self.__dict__[ 'add_to_URL' ] = tool.add_to_URL
+                for key, value in tool.param_trans_dict.items():
+                    # Make sure that all translated values used in Galaxy are added to the params
+                    galaxy_name = tool.param_trans_dict[ key ][0]
+                    if galaxy_name not in self.__dict__:
+                        # This will set the galaxy_name to the "missing" value
+                        self.__dict__[ galaxy_name ] = tool.param_trans_dict[ key ][1]
         else:
             self.__dict__.update(params)
 
diff -r 36a7ff82faf0 -r 415cc6dc8e35 lib/galaxy/web/controllers/tool_runner.py
--- a/lib/galaxy/web/controllers/tool_runner.py Fri Oct 24 12:32:32 2008 -0400
+++ b/lib/galaxy/web/controllers/tool_runner.py Mon Oct 27 16:03:43 2008 -0400
@@ -39,11 +39,7 @@
             log.error( "index called with tool id '%s' but no such tool exists", tool_id )
             trans.log_event( "Tool id '%s' does not exist" % tool_id )
             return "Tool '%s' does not exist, kwd=%s " % (tool_id, kwd)
-        try:
-            param_trans_dict = tool.param_trans_dict
-        except:
-            param_trans_dict = {}
-        params = util.Params( kwd, sanitize=tool.options.sanitize, tool_type=tool.tool_type, param_trans_dict=param_trans_dict )
+        params = util.Params( kwd, sanitize=tool.options.sanitize, tool=tool )
         history = trans.get_history()
         trans.ensure_valid_galaxy_session()
         template, vars = tool.handle_input( trans, params.__dict__ )
diff -r 36a7ff82faf0 -r 415cc6dc8e35 tools/data_destination/epigraph.xml
--- a/tools/data_destination/epigraph.xml Fri Oct 24 12:32:32 2008 -0400
+++ b/tools/data_destination/epigraph.xml Mon Oct 27 16:03:43 2008 -0400
@@ -1,21 +1,40 @@
 <?xml version="1.0"?>
-<tool name="Perform genome" id="epigraph_export">
-  <description> analysis and prediction with EpiGRAPH</description>
-  <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params>
-  <inputs>
-    <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH">
-      <validator type="unspecified_build" />
-    </param>
-    <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/DataImport.jsp" />
-    <param name="DATA_URL" type="baseurl" value="/datasets" />
-    <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" />
-  </inputs>
-  <outputs/>
-  <help>
+<tool name="Perform genome analysis" id="epigraph_export">
+    <description> and prediction with EpiGRAPH</description>
+    <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params>
+    <inputs>
+        <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH">
+            <validator type="unspecified_build" />
+        </param>
+        <param name="REDIRECT_URL" type="hidden" value="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/DataImport.jsp" />
+        <param name="DATA_URL" type="baseurl" value="/datasets" />
+        <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" />
+    </inputs>
+    <outputs/>
+    <help>
+
+.. class:: warningmark
+
+After clicking the **Execute** button, you will be redirected to the EpiGRAPH website. Please be patient while the dataset is being imported. Inside EpiGRAPH, buttons are available to send the results of the EpiGRAPH analysis back to Galaxy. In addition, you can always abandon an EpiGRAPH session and return to Galaxy by directing your browser to your current Galaxy instance.
+
+-----
+
+.. class:: infomark
+
 **What it does**
 
-This tool sends the selected dataset to EpiGRAPH for in-depth analysis and prediction.
+This tool sends the selected dataset to EpiGRAPH in order to perform an in-depth analysis with statistical and machine learning methods.
 
-  </help>
+-----
+
+.. class:: infomark
+
+**EpiGRAPH outline**
+
+The EpiGRAPH_ web service enables biologists to uncover hidden associations in vertebrate genome and epigenome datasets. Users can upload or import sets of genomic regions and EpiGRAPH will test a wide range of attributes (including DNA sequence and structure, gene density, chromatin modifications and evolutionary conservation) for enrichment or depletion among these regions. Furthermore, EpiGRAPH learns to predictively identify genomic regions that exhibit similar properties.
+
+.. _EpiGRAPH: http://epigraph.mpi-inf.mpg.de/
+
+    </help>
 </tool>
 
diff -r 36a7ff82faf0 -r 415cc6dc8e35 tools/data_source/biomart.xml
--- a/tools/data_source/biomart.xml Fri Oct 24 12:32:32 2008 -0400
+++ b/tools/data_source/biomart.xml Mon Oct 27 16:03:43 2008 -0400
@@ -1,5 +1,13 @@
 <?xml version="1.0"?>
-<tool name="BioMart" id="biomart" tool_type="data_source">
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+
+    TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile
+    everything including and beyond the first '&' is truncated from URL.  They said they'll let us know when this is fixed at their end.
+-->
+<tool name="BioMart" id="biomart" tool_type="data_source" URL_method="get" add_to_URL="biomart_hack">
  <description>Central server</description>
  <command interpreter="python">data_source.py $output</command>
  <inputs action="http://www.biomart.org/biomart/martview" check_values="false" method="get" target="_top">
diff -r 36a7ff82faf0 -r 415cc6dc8e35 tools/data_source/biomart_test.xml
--- a/tools/data_source/biomart_test.xml Fri Oct 24 12:32:32 2008 -0400
+++ b/tools/data_source/biomart_test.xml Mon Oct 27 16:03:43 2008 -0400
@@ -1,5 +1,13 @@
 <?xml version="1.0"?>
-<tool name="BioMart" id="biomart" tool_type="data_source">
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+
+    TODO: Hack to get biomart to work - the 'add_to_URL' param can be eliminated when the Biomart team encodes URL prior to sending, meanwhile
+    everything including and beyond the first '&' is truncated from URL.  They said they'll let us know when this is fixed at their end.
+-->
+<tool name="BioMart" id="biomart_test" tool_type="data_source" URL_method="get" add_to_URL="biomart_hack">
  <description>Test server</description>
  <command interpreter="python">data_source.py $output</command>
  <inputs action="http://test.biomart.org/biomart/martview" check_values="false" method="get" target="_top">
diff -r 36a7ff82faf0 -r 415cc6dc8e35 tools/data_source/data_source.py
--- a/tools/data_source/data_source.py Fri Oct 24 12:32:32 2008 -0400
+++ b/tools/data_source/data_source.py Mon Oct 27 16:03:43 2008 -0400
@@ -33,10 +33,20 @@
     if not URL:
         open( filename, 'w' ).write( "" )
         stop_err( 'The remote data source application has not sent back a URL parameter in the request.' )
+    # TODO: Hack to get biomart to work - this can be eliminated when the Biomart team encodes URL prior to sending, meanwhile
+    # everything including and beyond the first '&' is truncated from URL.  They said they'll let us know when this is fixed
+    # at their end.
+    add_to_URL = params.get( 'add_to_URL', None )
+    if add_to_URL:
+        URL += '&_export=1&GALAXY_URL=0'
+    URL_method = params.get( 'URL_method', None )
     out = open( filename, 'w' )
     CHUNK_SIZE = 2**20 # 1Mb
     try:
-        page = urllib.urlopen( URL, urllib.urlencode( params ) )
+        if not URL_method or URL_method == 'get':
+            page = urllib.urlopen( URL )
+        elif URL_method == 'post':
+            page = urllib.urlopen( URL, urllib.urlencode( params ) )
     except:
         stop_err( 'It appears that the remote data source application is currently off line. Please try again later.' )
     while 1:
diff -r 36a7ff82faf0 -r 415cc6dc8e35 tools/data_source/epigraph_import.xml
--- a/tools/data_source/epigraph_import.xml Fri Oct 24 12:32:32 2008 -0400
+++ b/tools/data_source/epigraph_import.xml Mon Oct 27 16:03:43 2008 -0400
@@ -1,5 +1,10 @@
 <?xml version="1.0"?>
-<tool name="EpiGRAPH" id="epigraph_import" tool_type="data_source">
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="EpiGRAPH" id="epigraph_import" tool_type="data_source" URL_method="get">
     <description> server</description>
     <command interpreter="python">data_source.py $output</command>
     <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get">
diff -r 36a7ff82faf0 -r 415cc6dc8e35 tools/data_source/flymine.xml
--- a/tools/data_source/flymine.xml Fri Oct 24 12:32:32 2008 -0400
+++ b/tools/data_source/flymine.xml Mon Oct 27 16:03:43 2008 -0400
@@ -1,5 +1,10 @@
 <?xml version="1.0"?>
-<tool name="Flymine" id="flymine" tool_type="data_source">
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="Flymine" id="flymine" tool_type="data_source" URL_method="post">
     <description>server</description>
     <command interpreter="python">data_source.py $output</command>
     <inputs action="http://preview.flymine.org/preview/begin.do" check_values="false" method="get" target="_top">
diff -r 36a7ff82faf0 -r 415cc6dc8e35 tools/data_source/ucsc_tablebrowser.xml
--- a/tools/data_source/ucsc_tablebrowser.xml Fri Oct 24 12:32:32 2008 -0400
+++ b/tools/data_source/ucsc_tablebrowser.xml Mon Oct 27 16:03:43 2008 -0400
@@ -1,5 +1,10 @@
 <?xml version="1.0"?>
-<tool name="UCSC Main" id="ucsc_table_direct1" tool_type="data_source">
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="UCSC Main" id="ucsc_table_direct1" tool_type="data_source" URL_method="post">
  <description>table browser</description>
  <command interpreter="python">data_source.py $output</command>
  <inputs action="http://genome.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get">
diff -r 36a7ff82faf0 -r 415cc6dc8e35 tools/data_source/ucsc_tablebrowser_archaea.xml
--- a/tools/data_source/ucsc_tablebrowser_archaea.xml Fri Oct 24 12:32:32 2008 -0400
+++ b/tools/data_source/ucsc_tablebrowser_archaea.xml Mon Oct 27 16:03:43 2008 -0400
@@ -1,5 +1,10 @@
 <?xml version="1.0"?>
-<tool name="UCSC Archaea" id="ucsc_table_direct_archaea1" tool_type="data_source">
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="UCSC Archaea" id="ucsc_table_direct_archaea1" tool_type="data_source" URL_method="post">
  <description>table browser</description>
  <command interpreter="python">data_source.py $output</command>
  <inputs action="http://archaea.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get">
diff -r 36a7ff82faf0 -r 415cc6dc8e35 tools/data_source/ucsc_tablebrowser_test.xml
--- a/tools/data_source/ucsc_tablebrowser_test.xml Fri Oct 24 12:32:32 2008 -0400
+++ b/tools/data_source/ucsc_tablebrowser_test.xml Mon Oct 27 16:03:43 2008 -0400
@@ -1,5 +1,10 @@
 <?xml version="1.0"?>
-<tool name="UCSC Test" id="ucsc_table_direct_test1" tool_type="data_source">
+<!--
+    If the value of 'URL_method' is 'get', the request will consist of the value of 'URL' coming back in
+    the initial response.  If value of 'URL_method' is 'post', any additional params coming back in the
+    initial response ( in addition to 'URL' ) will be encoded and appended to URL and a post will be performed.
+-->
+<tool name="UCSC Test" id="ucsc_table_direct_test1" tool_type="data_source" URL_method="post">
  <description>table browser</description>
  <command interpreter="python">data_source.py $output</command>
  <inputs action="http://genome-test.cse.ucsc.edu/cgi-bin/hgTables" check_values="false" method="get">