[hg] galaxy 1537: add error msg for shrimp_wrapper when memory e...

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[hg] galaxy 1537: add error msg for shrimp_wrapper when memory e...

Greg Von Kuster
details:   http://www.bx.psu.edu/hg/galaxy/rev/1dbd88fb9e57
changeset: 1537:1dbd88fb9e57
user:      wychung
date:      Tue Sep 30 16:40:42 2008 -0400
description:
add error msg for shrimp_wrapper when memory error and zero hits.
convert last column of megablast output to float.

3 file(s) affected in this change:

tools/metag_tools/megablast_wrapper.py
tools/metag_tools/shrimp_wrapper.py
tools/metag_tools/shrimp_wrapper.xml

diffs (111 lines):

diff -r 8ab38aa72998 -r 1dbd88fb9e57 tools/metag_tools/megablast_wrapper.py
--- a/tools/metag_tools/megablast_wrapper.py Tue Sep 30 16:17:12 2008 -0400
+++ b/tools/metag_tools/megablast_wrapper.py Tue Sep 30 16:40:42 2008 -0400
@@ -55,7 +55,9 @@
     chunk = db[(db_build)]
     megablast_command = "megablast -d %s -i %s -o %s -m 8 -a 8 -W %s -p %s -e %s -F %s > /dev/null 2>&1 " \
         % ( chunk, query_filename, mega_temp_output, mega_word_size, mega_iden_cutoff, mega_evalue_cutoff, mega_filter )
-        
+    
+    print megablast_command
+      
     try:
         os.system( megablast_command )
     except Exception, e:
@@ -67,8 +69,12 @@
         line = line.rstrip( '\r\n' )
         fields = line.split()
         try:
+            # get gi and length of that gi seq
             gi, gi_len = fields[1].split('_')
-            new_line = "%s\t%s\t%s\t%s" % ( fields[0], gi, gi_len, '\t'.join( fields[2:] ) )
+            # convert the last column (causing problem in filter tool) to float
+            fields[-1] = float(fields[-1])
+            
+            new_line = "%s\t%s\t%s\t%s\t%0.1f" % ( fields[0], gi, gi_len, '\t'.join( fields[2:-1] ), fields[-1] )
         except:
             new_line = line
             invalid_lines += 1
diff -r 8ab38aa72998 -r 1dbd88fb9e57 tools/metag_tools/shrimp_wrapper.py
--- a/tools/metag_tools/shrimp_wrapper.py Tue Sep 30 16:17:12 2008 -0400
+++ b/tools/metag_tools/shrimp_wrapper.py Tue Sep 30 16:40:42 2008 -0400
@@ -1,15 +1,21 @@
 #! /usr/bin/python
 
 """
+TODO
+1. decrease memory usage
+2. multi-fasta fastq file, ex. 454
+3. split reads into small chuncks?
+
 SHRiMP wrapper
 
 Inputs:
-    reference seq and reads
+1. reference seq
+2. reads
 
 Outputs:
-    table of 8 columns:
+1. table of 8 columns:
          chrom   ref_loc     read_id     read_loc    ref_nuc     read_nuc    quality     coverage
-    SHRiMP output
+2. SHRiMP output
         
 Parameters:
     -s    Spaced Seed                             (default: 111111011111)
@@ -37,13 +43,9 @@
 >7:2:1147:982/1 chr3    +   95338194    95338225    4   35  36  2700    9T7C14
 >7:2:587:93/1   chr3    +   14913541    14913577    1   35  36  2960    19--16
 
-Testing:
-%python shrimp_wrapper.py single ~/Desktop/shrimp_wrapper/phix_anc.fa tmp tmp1 ~/Desktop/shrimp_wrapper/phix.10.solexa.fastq
-%python shrimp_wrapper.py paired ~/Desktop/shrimp_wrapper/eca_ref_chrMT.fa tmp tmp1 ~/Desktop/shrimp_wrapper/eca.5.solexa_1.fastq ~/Desktop/shrimp_wrapper/eca.5.solexa_2.fastq
-
 """
 
-import os, sys, tempfile, os.path
+import os, sys, tempfile, os.path, re
 
 assert sys.version_info[:2] >= (2.4)
 
@@ -575,6 +577,29 @@
             if os.path.exists(query_qual_end1): os.remove(query_qual_end1)
             if os.path.exists(query_qual_end2): os.remove(query_qual_end2)
             stop_err(str(e))
+    
+    # check SHRiMP output: count number of lines
+    num_hits = 0
+    if shrimp_outfile:
+        for i, line in enumerate(file(shrimp_outfile)):
+            line = line.rstrip('\r\n')
+            if not line or line.startswith('#'): continue
+            try:
+                fields = line.split()
+                num_hits += 1
+            except Exception, e:
+                stop_err(str(e))
+                
+    if num_hits == 0:   # no hits generated
+        err_msg = ''
+        if shrimp_log:
+            for i, line in enumerate(file(shrimp_log)):
+                if line.startswith('error'):            # deal with memory error:
+                    err_msg += line                     # error: realloc failed: Cannot allocate memory
+                if re.search('Reads Matched', line):    # deal with zero hits
+                    if int(line[8:].split()[2]) == 0:
+                        err_msg = 'Zero hits found.\n'
+        stop_err('SHRiMP Failed due to:\n' + err_msg)
         
     # convert to table
     if type_of_reads == 'single':
diff -r 8ab38aa72998 -r 1dbd88fb9e57 tools/metag_tools/shrimp_wrapper.xml
--- a/tools/metag_tools/shrimp_wrapper.xml Tue Sep 30 16:17:12 2008 -0400
+++ b/tools/metag_tools/shrimp_wrapper.xml Tue Sep 30 16:40:42 2008 -0400
@@ -1,5 +1,5 @@
 <tool id="shrimp_wrapper" name="SHRiMP" version="1.0.0">
-  <description>SHort Read Mapping Package</description>
+  <description>: SHort Read Mapping Package</description>
   <command interpreter="python">
     #if     ($type_of_reads.single_or_paired=="single" and $param.skip_or_full=="skip"):#shrimp_wrapper.py $input_target $output1 $output2 $input_query
     #elif   ($type_of_reads.single_or_paired=="paired" and $param.skip_or_full=="skip"):#shrimp_wrapper.py $input_target $output1 $output2 $type_of_reads.input1,$type_of_reads.input2,$type_of_reads.insertion_size