[hg] galaxy 1533: Update to implicit converter discovery. Will n...

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[hg] galaxy 1533: Update to implicit converter discovery. Will n...

Greg Von Kuster
details:   http://www.bx.psu.edu/hg/galaxy/rev/3830fefb07da
changeset: 1533:3830fefb07da
user:      Dan Blankenberg <[hidden email]>
date:      Fri Sep 26 14:22:37 2008 -0400
description:
Update to implicit converter discovery. Will now allow the use of converters that create datatypes that are a subclass of what is needed by the tool.

i.e. If tool input requires 'interval', and a datatype has a BED converter available but no interval converter, the BED converter can now be used.

5 file(s) affected in this change:

lib/galaxy/datatypes/data.py
lib/galaxy/datatypes/registry.py
lib/galaxy/model/__init__.py
lib/galaxy/tools/actions/__init__.py
lib/galaxy/tools/parameters/basic.py

diffs (138 lines):

diff -r 3b6771227a43 -r 3830fefb07da lib/galaxy/datatypes/data.py
--- a/lib/galaxy/datatypes/data.py Fri Sep 26 09:10:04 2008 -0400
+++ b/lib/galaxy/datatypes/data.py Fri Sep 26 14:22:37 2008 -0400
@@ -185,6 +185,10 @@
         """Returns available converters by type for this dataset"""
         return datatypes_registry.get_converters_by_datatype(original_dataset.ext)
     
+    def find_conversion_destination( self, dataset, accepted_formats, datatypes_registry, **kwd ):
+        """Returns ( target_ext, exisiting converted dataset )"""
+        return datatypes_registry.find_conversion_destination_for_dataset_by_extensions( dataset, accepted_formats, **kwd )
+    
     def convert_dataset(self, trans, original_dataset, target_type, return_output = False, visible = True ):
         """This function adds a job to the queue to convert a dataset to another type. Returns a message about success/failure."""
         converter = trans.app.datatypes_registry.get_converter_by_target_type( original_dataset.ext, target_type )
diff -r 3b6771227a43 -r 3830fefb07da lib/galaxy/datatypes/registry.py
--- a/lib/galaxy/datatypes/registry.py Fri Sep 26 09:10:04 2008 -0400
+++ b/lib/galaxy/datatypes/registry.py Fri Sep 26 14:22:37 2008 -0400
@@ -233,3 +233,16 @@
         if target_ext in converters.keys():
             return converters[target_ext]
         return None
+    def find_conversion_destination_for_dataset_by_extensions( self, dataset, accepted_formats, converter_safe = True ):
+        """Returns ( target_ext, exisiting converted dataset )"""
+        for convert_ext in self.get_converters_by_datatype( dataset.ext ):
+            if isinstance( self.get_datatype_by_extension( convert_ext ), accepted_formats ):
+                datasets = dataset.get_converted_files_by_type( convert_ext )
+                if datasets:
+                    ret_data = datasets[0]
+                elif not converter_safe:
+                    continue
+                else:
+                    ret_data = None
+                return ( convert_ext, ret_data )
+        return ( None, None )
\ No newline at end of file
diff -r 3b6771227a43 -r 3830fefb07da lib/galaxy/model/__init__.py
--- a/lib/galaxy/model/__init__.py Fri Sep 26 09:10:04 2008 -0400
+++ b/lib/galaxy/model/__init__.py Fri Sep 26 14:22:37 2008 -0400
@@ -254,6 +254,10 @@
 
     def get_converter_types(self):
         return self.datatype.get_converter_types( self, datatypes_registry)
+    
+    def find_conversion_destination( self, accepted_formats, **kwd ):
+        """Returns ( target_ext, exisiting converted dataset )"""
+        return self.datatype.find_conversion_destination( self, accepted_formats, datatypes_registry, **kwd )
     
     def copy( self, copy_children = False, parent_id = None ):
         des = HistoryDatasetAssociation( hid=self.hid, name=self.name, info=self.info, blurb=self.blurb, peek=self.peek, extension=self.extension, dbkey=self.dbkey, metadata=self._metadata, dataset = self.dataset, visible=self.visible, deleted=self.deleted, parent_id=parent_id, copied_from_history_dataset_association = self )
diff -r 3b6771227a43 -r 3830fefb07da lib/galaxy/tools/actions/__init__.py
--- a/lib/galaxy/tools/actions/__init__.py Fri Sep 26 09:10:04 2008 -0400
+++ b/lib/galaxy/tools/actions/__init__.py Fri Sep 26 14:22:37 2008 -0400
@@ -29,22 +29,21 @@
         def visitor( prefix, input, value, parent = None ):
             def process_dataset( data ):
                 if data and not isinstance( data.datatype, input.formats ):
-                    for target_ext in input.extensions:
-                        if target_ext in data.get_converter_types():
-                            data.refresh() #need to refresh incase this conversion just took place, i.e. input above in tool performed the same conversion
-                            datasets = data.get_converted_files_by_type( target_ext )
-                            if datasets: data = datasets[0]
-                            elif input.converter_safe( param_values, trans ):
-                                #run converter here
-                                assoc = trans.app.model.ImplicitlyConvertedDatasetAssociation( parent = data, file_type = target_ext, metadata_safe = False )
-                                new_data = data.datatype.convert_dataset( trans, data, target_ext, return_output = True, visible = False ).values()[0]
-                                new_data.hid = data.hid
-                                new_data.name = data.name
-                                new_data.flush()
-                                assoc.dataset = new_data
-                                assoc.flush()
-                                data = new_data
-                            break
+                    data.refresh() #need to refresh in case this conversion just took place, i.e. input above in tool performed the same conversion
+                    target_ext, converted_dataset = data.find_conversion_destination( input.formats, converter_safe = input.converter_safe( param_values, trans ) )
+                    if target_ext:
+                        if converted_dataset:
+                            data = converted_dataset
+                        else:
+                            #run converter here
+                            assoc = trans.app.model.ImplicitlyConvertedDatasetAssociation( parent = data, file_type = target_ext, metadata_safe = False )
+                            new_data = data.datatype.convert_dataset( trans, data, target_ext, return_output = True, visible = False ).values()[0]
+                            new_data.hid = data.hid
+                            new_data.name = data.name
+                            new_data.flush()
+                            assoc.dataset = new_data
+                            assoc.flush()
+                            data = new_data
                 return data
             if isinstance( input, DataToolParameter ):
                 if isinstance( value, list ):
diff -r 3b6771227a43 -r 3830fefb07da lib/galaxy/tools/parameters/basic.py
--- a/lib/galaxy/tools/parameters/basic.py Fri Sep 26 09:10:04 2008 -0400
+++ b/lib/galaxy/tools/parameters/basic.py Fri Sep 26 14:22:37 2008 -0400
@@ -1059,16 +1059,12 @@
                         selected = ( value and ( data in value ) )
                         field.add_option( "%s: %s" % ( hid, data.name[:30] ), data.id, selected )
                     else:
-                        for target_ext in self.extensions:
-                            if target_ext in data.get_converter_types():
-                                datasets = data.get_converted_files_by_type( target_ext )
-                                if datasets:
-                                    data = datasets[0]
-                                elif not self.converter_safe( other_values, trans ):
-                                    continue
-                                selected = ( value and ( data in value ) )
-                                field.add_option( "%s: (as %s) %s" % ( hid, target_ext, data.name[:30] ), data.id, selected )
-                                break #we only report the first valid converter, assume self.extensions is a priority list
+                        target_ext, converted_dataset = data.find_conversion_destination( self.formats, converter_safe = self.converter_safe( other_values, trans ) )
+                        if target_ext:
+                            if converted_dataset:
+                                data = converted_dataset
+                            selected = ( value and ( data in value ) )
+                            field.add_option( "%s: (as %s) %s" % ( hid, target_ext, data.name[:30] ), data.id, selected )
                 # Also collect children via association object
                 dataset_collector( data.children, hid )
         dataset_collector( history.datasets, None )
@@ -1111,9 +1107,9 @@
                 pass #no valid options
         def dataset_collector( datasets ):
             def is_convertable( dataset ):
-                for target_ext in self.extensions:
-                    if target_ext in data.get_converter_types():
-                        return True
+                target_ext, converted_dataset = dataset.find_conversion_destination( self.formats, converter_safe = True ) #need to assume converter_safe = True, since we don't know about other parameter values here
+                if target_ext is not None:
+                    return True
                 return False
             for i, data in enumerate( datasets ):
                 if data.visible and not data.deleted and data.state not in [data.states.ERROR] and ( isinstance( data.datatype, self.formats) or is_convertable( data ) ):
@@ -1184,7 +1180,7 @@
             return []
 
     def converter_safe( self, other_values, trans ):
-        if trans.workflow_building_mode:
+        if not hasattr( trans, 'workflow_building_mode' ) or trans.workflow_building_mode:
             return False
         converter_safe = [True]
         def visitor( prefix, input, value, parent = None ):