[hg] galaxy 1535: Modified the way biomart tool runs: output gen...

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[hg] galaxy 1535: Modified the way biomart tool runs: output gen...

Greg Von Kuster
details:   http://www.bx.psu.edu/hg/galaxy/rev/931d6ca549d3
changeset: 1535:931d6ca549d3
user:      guru
date:      Tue Sep 30 15:30:57 2008 -0400
description:
Modified the way biomart tool runs: output generation will be completed before exec_afer_process hook is called.

4 file(s) affected in this change:

tools/data_source/biomart.py
tools/data_source/biomart.xml
tools/data_source/biomart_filter.py
tools/data_source/biomart_test.xml

diffs (148 lines):

diff -r 1706aadf16b2 -r 931d6ca549d3 tools/data_source/biomart.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/biomart.py Tue Sep 30 15:30:57 2008 -0400
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+#Retreives data from BIOMART and stores in a file. Biomart parameters are provided in the input/output file.
+#guruprasad Ananda
+
+import urllib, sys, os, gzip, tempfile, shutil
+from galaxy import eggs
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+def stop_err( msg ):
+    sys.stderr.write( msg )
+    sys.exit()
+
+def __main__():
+    filename = sys.argv[1]
+    params = {}
+    for line in open(filename, 'r'):
+        try:
+            line = line.strip()
+            fields = line.split('\t')
+            params[fields[0]] = fields[1]
+        except:
+            continue
+    
+    URL = params.get( 'URL', None )
+    if not URL:
+        open( filename, 'w' ).write( "" )
+        stop_err( 'Datasource has not sent back a URL parameter.' )
+    URL = URL + '&_export=1&GALAXY_URL=0'
+    CHUNK_SIZE = 2**20 # 1Mb
+    MAX_SIZE   = CHUNK_SIZE * 100
+    try:
+        page = urllib.urlopen(URL)
+    except Exception, exc:
+        stop_err('Problems connecting to %s (%s)' % (URL, exc) )
+    
+    fp = open(filename, 'w')
+    size = 0
+    max_size_exceeded = False
+    while 1:
+        chunk = page.read(CHUNK_SIZE)
+        if not chunk:
+            break
+        size += len(chunk)
+        if size > MAX_SIZE:
+            max_size_exceeded = True
+            break
+        fp.write(chunk)
+    fp.close()
+    
+    if max_size_exceeded:
+        print 'Maximum data size of 100 MB exceeded, incomplete data retrieval.'
+    
+if __name__ == "__main__":
+    __main__()
diff -r 1706aadf16b2 -r 931d6ca549d3 tools/data_source/biomart.xml
--- a/tools/data_source/biomart.xml Mon Sep 29 10:37:54 2008 -0400
+++ b/tools/data_source/biomart.xml Tue Sep 30 15:30:57 2008 -0400
@@ -1,14 +1,14 @@
 <?xml version="1.0"?>
 <tool name="BioMart" id="biomart">
-
  <description>Central server</description>
-
- <command/>
-
+ <command interpreter="python">
+        biomart.py
+        $output
+    </command>
  <inputs action="http://www.biomart.org/biomart/martview" check_values="false" method="get" target="_top">
  <display>go to BioMart Central $GALAXY_URL</display>
  <param name="GALAXY_URL" type="baseurl" value="/tool_runner/biomart" />
- </inputs>
+    </inputs>
 
  <uihints minwidth="800"/>
 
diff -r 1706aadf16b2 -r 931d6ca549d3 tools/data_source/biomart_filter.py
--- a/tools/data_source/biomart_filter.py Mon Sep 29 10:37:54 2008 -0400
+++ b/tools/data_source/biomart_filter.py Tue Sep 30 15:30:57 2008 -0400
@@ -14,41 +14,16 @@
         data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order )
     data = app.datatypes_registry.change_datatype(data, data_type)
     data.name = data_name
+    #store BIOMART parameters temporarily in output file
+    out = open(data.file_name,'w')
+    for key, value in param_dict.items():
+        print >> out, "%s\t%s" % (key,value)
+    out.close()    
     out_data[name] = data
+    
 
 def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
-    """Verifies the data after the run"""
-    URL = param_dict.get( 'URL', None )
-    if not URL:
-        raise Exception('Datasource has not sent back a URL parameter')
-    URL = URL + '&_export=1&GALAXY_URL=0'
-    CHUNK_SIZE = 2**20 # 1Mb
-    MAX_SIZE   = CHUNK_SIZE * 100
-    try:
-        page = urllib.urlopen(URL)
-    except Exception, exc:
-        raise Exception('Problems connecting to %s (%s)' % (URL, exc) )
     name, data = out_data.items()[0]
-    fp = open(data.file_name, 'wb')
-    size = 0
-    max_size_exceeded = False
-
-    while 1:
-        chunk = page.read(CHUNK_SIZE)
-        if not chunk:
-            break
-        size += len(chunk)
-        if size > MAX_SIZE:
-            max_size_exceeded = True
-            break
-        fp.write(chunk)
-    fp.close()
-    
-    if max_size_exceeded:
-        data.info = 'Maximum data size of 100 MB exceeded, incomplete data retrieval.'
-    else:
-        data.info = data.name
-
     if not isinstance(data.datatype, datatypes.interval.Bed) and isinstance(data.datatype, datatypes.interval.Interval):
         #Set meta data, format file to be valid interval type
         data.set_meta(first_line_is_header=True)
diff -r 1706aadf16b2 -r 931d6ca549d3 tools/data_source/biomart_test.xml
--- a/tools/data_source/biomart_test.xml Mon Sep 29 10:37:54 2008 -0400
+++ b/tools/data_source/biomart_test.xml Tue Sep 30 15:30:57 2008 -0400
@@ -3,7 +3,10 @@
 
  <description>Test server</description>
 
- <command/>
+ <command interpreter="python">
+        biomart.py
+        $output
+    </command>
 
  <inputs action="http://test.biomart.org/biomart/martview" check_values="false" method="get" target="_top">
  <display>go to BioMart Central $GALAXY_URL</display>