[hg] galaxy 1509: Rewrote "Compare two queries" tool in Python.

classic Classic list List threaded Threaded
1 message Options
| Threaded
Open this post in threaded view
|

[hg] galaxy 1509: Rewrote "Compare two queries" tool in Python.

greg
details:   http://www.bx.psu.edu/hg/galaxy/rev/eb941905fd70
changeset: 1509:eb941905fd70
user:      guru
date:      Tue Sep 16 14:09:16 2008 -0400
description:
Rewrote "Compare two queries" tool in Python.

2 file(s) affected in this change:

tools/filters/compare.xml
tools/filters/joinWrapper.py

diffs (68 lines):

diff -r ec547440ec97 -r eb941905fd70 tools/filters/compare.xml
--- a/tools/filters/compare.xml Tue Sep 16 13:25:42 2008 -0400
+++ b/tools/filters/compare.xml Tue Sep 16 14:09:16 2008 -0400
@@ -1,6 +1,6 @@
 <tool id="comp1" name="Compare two Queries">
   <description>to find common or distinct rows</description>
-  <command interpreter="perl">joinWrapper.pl $input1 $input2 $field1 $field2 $mode "Y" $out_file1</command>
+  <command interpreter="python">joinWrapper.py $input1 $input2 $field1 $field2 $mode $out_file1</command>
   <inputs>
     <param format="tabular" name="input1" type="data" label="Compare"/>
     <param name="field1" label="Using column" type="data_column" data_ref="input1" />
diff -r ec547440ec97 -r eb941905fd70 tools/filters/joinWrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/filters/joinWrapper.py Tue Sep 16 14:09:16 2008 -0400
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+#Guruprasad Ananda
+"""
+This tool provides the UNIX "join" functionality.
+"""
+import sys, os, tempfile
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit()
+
+def main():
+    infile1 = sys.argv[1]
+    infile2 = sys.argv[2]
+    field1 = int(sys.argv[3])
+    field2 = int(sys.argv[4])
+    mode =sys.argv[5]
+    outfile = sys.argv[6]
+    
+    tmpfile1 = tempfile.NamedTemporaryFile()
+    tmpfile2 = tempfile.NamedTemporaryFile()
+    
+    try:
+        #Sort the two files based on specified fields
+        os.system("sort -k %d -o %s %s" %(field1, tmpfile1.name, infile1))
+        os.system("sort -k %d -o %s %s" %(field2, tmpfile2.name, infile2))
+    except Exception, exc:
+        stop_err( 'Initialization error -> %s' %str(exc) )
+        
+    option = ""
+    for line in file(tmpfile1.name):
+        line = line.strip()
+        if line:
+            elems = line.split('\t')
+            for j in range(1,len(elems)+1):
+                if j == 1:
+                    option = "1.1"
+                else:
+                    option = option + ",1." + str(j)
+            break
+    
+    if mode == "V":
+        cmdline = 'join -v 1 -o %s -1 %d -2 %d %s %s | tr " " "\t" > %s' %(option, field1, field2, tmpfile1.name, tmpfile2.name, outfile)
+    else:
+        cmdline = 'join -o %s -1 %d -2 %d %s %s | tr " " "\t" > %s' %(option, field1, field2, tmpfile1.name, tmpfile2.name, outfile)
+    
+    try:
+        os.system(cmdline)
+    except Exception, exj:
+        stop_err('Error joining the two datasets -> %s' %str(exj))
+      
+if __name__ == "__main__":
+    main()