[hg] galaxy 1548: Treat EpiGRAPH as a data source much like ucsc...

classic Classic list List threaded Threaded
1 message Options
| Threaded
Open this post in threaded view
|

[hg] galaxy 1548: Treat EpiGRAPH as a data source much like ucsc...

Nate Coraor (nate@bx.psu.edu)
details:   http://www.bx.psu.edu/hg/galaxy/rev/ae341e281c89
changeset: 1548:ae341e281c89
user:      Greg Von Kuster <[hidden email]>
date:      Mon Oct 06 13:34:51 2008 -0400
description:
Treat EpiGRAPH as a data source much like ucsc table browser.

6 file(s) affected in this change:

lib/galaxy/tools/__init__.py
tool_conf.xml.sample
tools/data_destination/epigraph.xml
tools/data_source/epigraph.py
tools/data_source/epigraph_code.py
tools/data_source/epigraph_import.xml

diffs (174 lines):

diff -r 684b78f79f8b -r ae341e281c89 lib/galaxy/tools/__init__.py
--- a/lib/galaxy/tools/__init__.py Mon Oct 06 11:48:35 2008 -0400
+++ b/lib/galaxy/tools/__init__.py Mon Oct 06 13:34:51 2008 -0400
@@ -1070,11 +1070,10 @@
 
     def parse_redirect_url( self, inp_data, param_dict ):
         """Parse the REDIRECT_URL tool param"""
-        # Tools that send data to an external application via a redirect must include the following 3
-        # tool params:
+        # Tools that send data to an external application via a redirect must include the following 3 tool params:
         # REDIRECT_URL - the url to which the data is being sent
         # DATA_URL - the url to which the receiving application will send an http post to retrieve the Galaxy data
-        # GALAXY_URL - the to which the external application may post data as a response
+        # GALAXY_URL - the url to which the external application may post data as a response
         redirect_url = param_dict.get( 'REDIRECT_URL' )
         redirect_url_params = self.build_redirect_url_params( param_dict )
         # Add the parameters to the redirect url.  We're splitting the param string on '**^**'
diff -r 684b78f79f8b -r ae341e281c89 tool_conf.xml.sample
--- a/tool_conf.xml.sample Mon Oct 06 11:48:35 2008 -0400
+++ b/tool_conf.xml.sample Mon Oct 06 13:34:51 2008 -0400
@@ -10,6 +10,7 @@
     <tool file="data_source/biomart_test.xml" />
     <tool file="data_source/gbrowse_elegans.xml" />
     <tool file="data_source/flymine.xml" />
+    <tool file="data_source/epigraph_import.xml" />
     <tool file="data_source/encode_db.xml" />
     <tool file="data_source/hbvar.xml" />
     <tool file="validation/fix_errors.xml" />
diff -r 684b78f79f8b -r ae341e281c89 tools/data_destination/epigraph.xml
--- a/tools/data_destination/epigraph.xml Mon Oct 06 11:48:35 2008 -0400
+++ b/tools/data_destination/epigraph.xml Mon Oct 06 13:34:51 2008 -0400
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
-<tool name="Perform EpiGRAPH" id="epigraph">
-  <description> Genome analysis and prediction</description>
+<tool name="Perform genome" id="epigraph_export">
+  <description> analysis and prediction with EpiGRAPH</description>
   <redirect_url_params>GENOME=${input1.dbkey} NAME=${input1.name} INFO=${input1.info}</redirect_url_params>
   <inputs>
     <param format="bed" name="input1" type="data" label="Send this dataset to EpiGRAPH">
diff -r 684b78f79f8b -r ae341e281c89 tools/data_source/epigraph.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/epigraph.py Mon Oct 06 13:34:51 2008 -0400
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+#Retreives data from EpiGRAPH and stores in a file. EpiGRAPH request parameters are provided in the input/output file.
+import urllib, sys, os, gzip, tempfile, shutil
+from galaxy import eggs
+from galaxy.datatypes import data
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def stop_err( msg ):
+    sys.stderr.write( msg )
+    sys.exit()
+
+def check_gzip( filename ):
+    temp = open( filename, "U" )
+    magic_check = temp.read( 2 )
+    temp.close()
+    if magic_check != data.gzip_magic:
+        return False
+    return True
+
+def __main__():
+    filename = sys.argv[1]
+    params = {}
+    for line in open( filename, 'r' ):
+        try:
+            line = line.strip()
+            fields = line.split( '\t' )
+            params[ fields[0] ] = fields[1]
+        except:
+            continue
+    URL = params.get( 'URL', None )
+    if not URL:
+        open( filename, 'w' ).write( "" )
+        stop_err( 'EpiGRAPH has not sent back a URL parameter.' )
+    out = open( filename, 'w' )
+    CHUNK_SIZE = 2**20 # 1Mb
+    try:
+        page = urllib.urlopen( URL, urllib.urlencode( params ) )
+    except:
+        stop_err( 'It appears that the EpiGRAPH server is currently off-line. Please try again later.' )
+    while 1:
+        chunk = page.read( CHUNK_SIZE )
+        if not chunk:
+            break
+        out.write( chunk )
+    out.close()
+    if check_gzip( filename ):
+        fd, uncompressed = tempfile.mkstemp()
+        gzipped_file = gzip.GzipFile( filename )
+        while 1:
+            try:
+                chunk = gzipped_file.read( CHUNK_SIZE )
+            except IOError:
+                os.close( fd )
+                os.remove( uncompressed )
+                gzipped_file.close()
+                stop_err( 'Problem uncompressing gzipped data, please try retrieving the data uncompressed.' )
+            if not chunk:
+                break
+            os.write( fd, chunk )
+        os.close( fd )
+        gzipped_file.close()
+        # Replace the gzipped file with the uncompressed file
+        shutil.move( uncompressed, filename )        
+    
+if __name__ == "__main__": __main__()
diff -r 684b78f79f8b -r ae341e281c89 tools/data_source/epigraph_code.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/epigraph_code.py Mon Oct 06 13:34:51 2008 -0400
@@ -0,0 +1,41 @@
+#Code for direct connection to EpiGRAPH
+from galaxy.datatypes import sniff
+import urllib
+
+def exec_before_job( app, inp_data, out_data, param_dict, tool=None ):
+    """
+    EpiGRAPH sends data to Galaxy by passing the following parameters in the request:
+    1. URL - the url to which Galaxy should post a request to retrieve the data
+    2. GENOME - the name of the UCSC genome assembly (e.g. hg18), dbkey in Galaxy
+    3. NAME - data.name in Galaxy
+    4. INFO - data.info in Galaxy
+    """
+    items = out_data.items()
+    for name, data in items:
+        NAME = urllib.unquote( param_dict.get( 'NAME', None ) )
+        if NAME is not None:
+            data.name = NAME
+        INFO = urllib.unquote( param_dict.get( 'INFO', None ) )
+        if INFO is not None:
+            data.info = INFO
+        GENOME = urllib.unquote( param_dict.get( 'GENOME', None ) )
+        if GENOME is not None:
+            data.dbkey = GENOME
+        else:
+            data.dbkey = '?'
+        # Store EpiGRAPH request parameters temporarily in output file
+        out = open( data.file_name, 'w' )
+        for key, value in param_dict.items():
+            print >> out, "%s\t%s" % ( key, value )
+        out.close()
+        out_data[ name ] = data
+
+def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None ):
+    """Verifies the datatype after the run"""
+    name, data = out_data.items()[0]
+    if data.extension == 'txt':
+        data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order )
+        data = app.datatypes_registry.change_datatype( data, data_type )
+    data.set_peek()
+    data.set_size()
+    data.flush()
diff -r 684b78f79f8b -r ae341e281c89 tools/data_source/epigraph_import.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/epigraph_import.xml Mon Oct 06 13:34:51 2008 -0400
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<tool name="EpiGRAPH" id="epigraph_import">
+  <description> server</description>
+  <command interpreter="python">epigraph.py $output</command>
+  <inputs action="http://epigraph.mpi-inf.mpg.de/WebGRAPH_Public_Test/faces/Login.jsp" check_values="false" method="get">
+    <display>go to EpiGRAPH server $GALAXY_URL</display>
+    <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=epigraph_import" />
+  </inputs>
+  <uihints minwidth="800"/>  
+  <code file="epigraph_code.py"/>
+  <outputs>
+    <data name="output" format="txt" />
+  </outputs>
+  <options sanitize="False" refresh="True"/>
+</tool>