[hg] galaxy 1508: Small update for maf stats tool.

classic Classic list List threaded Threaded
1 message Options
| Threaded
Open this post in threaded view
|

[hg] galaxy 1508: Small update for maf stats tool.

greg
details:   http://www.bx.psu.edu/hg/galaxy/rev/ec547440ec97
changeset: 1508:ec547440ec97
user:      Dan Blankenberg <[hidden email]>
date:      Tue Sep 16 13:25:42 2008 -0400
description:
Small update for maf stats tool.

2 file(s) affected in this change:

lib/galaxy/tools/util/maf_utilities.py
tools/maf/maf_stats.py

diffs (99 lines):

diff -r 842f1883cf53 -r ec547440ec97 lib/galaxy/tools/util/maf_utilities.py
--- a/lib/galaxy/tools/util/maf_utilities.py Mon Sep 15 15:04:41 2008 -0400
+++ b/lib/galaxy/tools/util/maf_utilities.py Tue Sep 16 13:25:42 2008 -0400
@@ -199,7 +199,7 @@
         yield block
 def get_chopped_blocks_with_index_offset_for_region( index, src, region, species = None, mincols = 0, force_strand = None ):
     for block, idx, offset in index.get_as_iterator_with_index_and_offset( src, region.start, region.end ):
-        block = chop_block_by_region( block, src, region, species, mincols )
+        block = chop_block_by_region( block, src, region, species, mincols, force_strand )
         if block is not None:
             yield block, idx, offset
 
@@ -209,6 +209,25 @@
     else: alignment = RegionAlignment( end - start, primary_species )
     return fill_region_alignment( alignment, index, primary_species, chrom, start, end, strand, species, mincols )
 
+#reduces a block to only positions exisiting in the src provided
+def reduce_block_by_primary_genome( block, species, chromosome, region_start ):
+    #returns ( startIndex, {species:texts}
+    #where texts' contents are reduced to only positions existing in the primary genome
+    src = "%s.%s" % ( species, chromosome )
+    ref = block.get_component_by_src( src )
+    start_offset = ref.start - region_start
+    species_texts = {}
+    for c in block.components:
+        species_texts[ c.src.split( '.' )[0] ] = list( c.text )
+    #remove locations which are gaps in the primary species, starting from the downstream end
+    for i in range( len( species_texts[ species ] ) - 1, -1, -1 ):
+        if species_texts[ species ][i] == '-':
+            for text in species_texts.values():
+                text.pop( i )
+    for spec, text in species_texts.items():
+        species_texts[spec] = ''.join( text )
+    return ( start_offset, species_texts )
+
 #fills a region alignment
 def fill_region_alignment( alignment, index, primary_species, chrom, start, end, strand = '+', species = None, mincols = 0 ):
     region = bx.intervals.Interval( start, end )
@@ -216,22 +235,7 @@
     region.strand = strand
     primary_src = "%s.%s" % ( primary_species, chrom )
     
-    def reduce_block_by_primary_genome( block ):
-        #returns ( startIndex, {species:texts}
-        #where texts' contents are reduced to only positions existing in the primary genome
-        ref = block.get_component_by_src( primary_src )
-        start_offset = ref.start - start
-        species_texts = {}
-        for c in block.components:
-            species_texts[ c.src.split( '.' )[0] ] = list( c.text )
-        #remove locations which are gaps in the primary species, starting from the downstream end
-        for i in range( len( species_texts[ primary_species ] ) - 1, -1, -1 ):
-            if species_texts[ primary_species ][i] == '-':
-                for text in species_texts.values():
-                    text.pop( i )
-        for spec, text in species_texts.items():
-            species_texts[spec] = ''.join( text )
-        return ( start_offset, species_texts )
+
     
     #Order blocks overlaping this position by score, lowest first
     blocks = []
@@ -248,7 +252,7 @@
     for block_dict in blocks:
         block = chop_block_by_region( block_dict[1].get_at_offset( block_dict[2] ), primary_src, region, species, mincols, strand )
         if block is None: continue
-        start_offset, species_texts = reduce_block_by_primary_genome( block )
+        start_offset, species_texts = reduce_block_by_primary_genome( block, primary_species, chrom, start )
         for spec, text in species_texts.items():
             try:
                 alignment.set_range( start_offset, spec, text )
diff -r 842f1883cf53 -r ec547440ec97 tools/maf/maf_stats.py
--- a/tools/maf/maf_stats.py Mon Sep 15 15:04:41 2008 -0400
+++ b/tools/maf/maf_stats.py Tue Sep 16 13:25:42 2008 -0400
@@ -64,19 +64,11 @@
             for c in block.components:
                 spec = c.src.split( '.' )[0]
                 if spec not in coverage: coverage[spec] = zeros( region.end - region.start, dtype = bool )
-            ref = block.get_component_by_src( src )
-            #skip gap locations due to insertions in secondary species relative to primary species
-            start_offset = ref.start - region.start
-            num_gaps = 0
-            for i in range( len( ref.text.rstrip().rstrip( "-" ) ) ):
-                if ref.text[i] in ["-"]:
-                    num_gaps += 1
-                    continue
-                #Toggle base if covered
-                for comp in block.components:
-                    spec = comp.src.split( '.' )[0]
-                    if comp.text and comp.text[i] not in ['-']:
-                        coverage[spec][start_offset + i - num_gaps] = True
+            start_offset, alignment = maf_utilities.reduce_block_by_primary_genome( block, dbkey, region.chrom, region.start )
+            for i in range( len( alignment[dbkey] ) ):
+                for spec, text in alignment.items():
+                    if text[i] != '-':
+                        coverage[spec][start_offset + i] = True
         if summary:
             #record summary
             for key in coverage.keys():