[hg] galaxy 1577: Improve job error messaging, some fixes for se...

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[hg] galaxy 1577: Improve job error messaging, some fixes for se...

Greg Von Kuster
details:   http://www.bx.psu.edu/hg/galaxy/rev/0f4fd4c20cd6
changeset: 1577:0f4fd4c20cd6
user:      Greg Von Kuster <[hidden email]>
date:      Tue Oct 28 10:21:02 2008 -0400
description:
Improve job error messaging, some fixes for setting job state, job info, dataset state, and dataset info when job ends in error.

2 file(s) affected in this change:

lib/galaxy/jobs/__init__.py
templates/dataset/errors.tmpl

diffs (293 lines):

diff -r 8eec48aaca6e -r 0f4fd4c20cd6 lib/galaxy/jobs/__init__.py
--- a/lib/galaxy/jobs/__init__.py Mon Oct 27 17:03:50 2008 -0400
+++ b/lib/galaxy/jobs/__init__.py Tue Oct 28 10:21:02 2008 -0400
@@ -176,11 +176,17 @@
                         self.dispatcher.put( job )
                         log.debug( "job %d dispatched" % job.job_id)
                 elif job_state == JOB_DELETED:
-                    log.debug( "job %d deleted by user while still queued" % job.job_id )
+                    msg = "job %d deleted by user while still queued" % job.job_id
+                    job.info = msg
+                    log.debug( msg )
                 else:
-                    log.error( "unknown job state '%s' for job %d" % ( job_state, job.job_id ))
-            except:
-                log.exception( "failure running job %d" % job.job_id )
+                    msg = "unknown job state '%s' for job %d" % ( job_state, job.job_id )
+                    job.info = msg
+                    log.error( msg )
+            except Exception, e:
+                msg = "failure running job %d: %s" % ( job.job_id, str( e ) )
+                job.info = msg
+                log.exception( msg )
         # Update the waiting list
         self.waiting = new_waiting
         # If special (e.g. fair) scheduling is enabled, dispatch all jobs
@@ -194,9 +200,10 @@
                 except Empty:
                     # squeue is empty, so stop dispatching
                     break
-                except: # if something else breaks while dispatching
-                    job.fail( "failure dispatching job" )
-                    log.exception( "failure running job %d" % sjob.job_id )
+                except Exception, e: # if something else breaks while dispatching
+                    msg = "failure running job %d: %s" % ( sjob.job_id, str( e ) )
+                    job.fail( msg )
+                    log.exception( msg )
             
     def put( self, job_id, tool ):
         """Add a job to the queue (by job identifier)"""
@@ -301,7 +308,7 @@
         self.extra_filenames = extra_filenames
         return extra_filenames
         
-    def fail( self, message, exception=False ):
+    def fail( self, message, state=None, exception=False ):
         """
         Indicate job failure by setting state and message on all output
         datasets.
@@ -309,25 +316,26 @@
         job = model.Job.get( self.job_id )
         job.refresh()
         # if the job was deleted, don't fail it
-        if job.state == job.states.DELETED:
-            self.cleanup()
-            return
-        for dataset_assoc in job.output_datasets:
-            dataset = dataset_assoc.dataset
-            dataset.refresh()
-            dataset.state = dataset.states.ERROR
-            dataset.blurb = 'tool error'
-            dataset.info = message
-            dataset.set_size()
-            dataset.flush()
-        job.state = model.Job.states.ERROR
-        job.command_line = self.command_line
-        job.info = message
-        # If the failure is due to a Galaxy framework exception, save
-        # the traceback
-        if exception:
-            job.traceback = traceback.format_exc()
-        job.flush()
+        if not job.state == job.states.DELETED:
+            for dataset_assoc in job.output_datasets:
+                dataset = dataset_assoc.dataset
+                dataset.refresh()
+                dataset.state = dataset.states.ERROR
+                dataset.blurb = 'tool error'
+                dataset.info = message
+                dataset.set_size()
+                dataset.flush()
+            if state is not None:
+                job.state = state
+            else:
+                job.state = model.Job.states.ERROR
+            job.command_line = self.command_line
+            job.info = message
+            # If the failure is due to a Galaxy framework exception, save the traceback
+            if exception:
+                job.traceback = traceback.format_exc()
+            job.flush()
+        # If the job was deleted, just clean up
         self.cleanup()
         
     def change_state( self, state, info = False ):
@@ -371,16 +379,19 @@
         job.refresh()
         for dataset_assoc in job.input_datasets:
             idata = dataset_assoc.dataset
-            if not idata: continue
+            if not idata:
+                continue
             idata.refresh()
             idata.dataset.refresh() #we need to refresh the base Dataset, since that is where 'state' is stored
             # don't run jobs for which the input dataset was deleted
-            if idata.deleted == True:
-                self.fail( "input data %d was deleted before this job ran" % idata.hid )
+            if idata.deleted:
+                msg = "input data %d was deleted before this job started" % idata.hid
+                self.fail( msg, state=JOB_INPUT_DELETED )
                 return JOB_INPUT_DELETED
             # an error in the input data causes us to bail immediately
             elif idata.state == idata.states.ERROR:
-                self.fail( "error in input data %d" % idata.hid )
+                msg = "input data %d is in an error state" % idata.hid
+                self.fail( msg, state=JOB_INPUT_ERROR )
                 return JOB_INPUT_ERROR
             elif idata.state != idata.states.OK:
                 # need to requeue
@@ -467,8 +478,8 @@
                 os.remove( fname )
             if self.working_directory is not None:
                 os.rmdir( self.working_directory )
-        except:
-            log.exception( "Unable to cleanup job %s" % self.job_id )
+        except Exception, e:
+            log.exception( "Unable to cleanup job %s, exception: %s" % ( str( self.job_id ), str( e ) ) )
         
     def get_command_line( self ):
         return self.command_line
@@ -617,7 +628,7 @@
         job = model.Job.get( job_id )
         job.refresh()
         job.state = job.states.DELETED
-        job.info = "Job deleted by user before it completed."
+        job.info = "Job output deleted by user before job completed."
         job.flush()
         for dataset_assoc in job.output_datasets:
             dataset = dataset_assoc.dataset
@@ -630,7 +641,7 @@
                 dataset.deleted = True
                 dataset.blurb = 'deleted'
                 dataset.peek = 'Job deleted'
-                dataset.info = 'Job deleted by user before it completed'
+                dataset.info = 'Job output deleted by user before job completed'
                 dataset.flush()
 
     def put( self, job ):
diff -r 8eec48aaca6e -r 0f4fd4c20cd6 templates/dataset/errors.tmpl
--- a/templates/dataset/errors.tmpl Mon Oct 27 17:03:50 2008 -0400
+++ b/templates/dataset/errors.tmpl Tue Oct 28 10:21:02 2008 -0400
@@ -1,79 +1,69 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>
+    <head>
+        <title>Dataset generation errors</title>
+        <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
+        <link href="/static/style/base.css" rel="stylesheet" type="text/css" />
+        <style>
+            pre
+            {
+                background: white;
+                color: black;
+                border: dotted black 1px;
+                overflow: auto;
+                padding: 10px;
+            }
+        </style>
+    </head>
 
-<head>
-<title>Dataset generation errors</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
-<link href="/static/style/base.css" rel="stylesheet" type="text/css" />
-<style>
-pre
-{
-    background: white;
-    color: black;
-    border: dotted black 1px;
-    overflow: auto;
-    padding: 10px;
-}
-</style>
-</head>
+    <body>
+        <h2>Dataset generation errors</h2>
+        <p><b>Dataset $dataset.hid: $dataset.display_name</b></p>
 
-<body>
-  
-  <h2>Dataset generation errors</h2>
-
-  <p><b>Dataset $dataset.hid: $dataset.display_name</b></p>
-  
-  #if $dataset.creating_job_associations
-  
-    #set job = $dataset.creating_job_associations[0].job
-  
-    #if job.traceback
-      The Galaxy framework encountered the following error while attempting
-      to run the tool:
+        #if $dataset.creating_job_associations
+            #set job = $dataset.creating_job_associations[0].job
+            #if job.traceback
+                The Galaxy framework encountered the following error while attempting to run the tool:
+                <pre>${job.traceback}</pre>
+            #end if
+            #if $job.stderr or $job.info
+                Tool execution generated the following error message:
+                #if $job.stderr
+                    <pre>${job.stderr}</pre>
+                #elif $job.info
+                    <pre>${job.info}</pre>
+                #end if
+            #else
+                Tool execution did not generate any error messages.
+            #end if
+            #if $job.stdout
+                The tool produced the following additional output:
+                <pre>${job.stdout}</pre>
+            #end if
+        #else
+            The tool did not create any additional job / error info.
+        #end if
       
-      <pre>${job.traceback}</pre>
-  
-    #end if
-  
-    #if $job.stderr
-      Tool execution generated the following error message:
-      <pre>${job.stderr}</pre>
-    #else
-      Tool execution did not generate any error messages.
-    #end if
-
-    #if $job.stdout
-      The tool produced the following additional output:
-      <pre>${job.stdout}</pre>
-    #end if
-
-  #else
-  
-    The tool did not create any additional job / error info.
-  
-  #end if
-  
-  <h2>Report this error to the Galaxy Team</h2>
-  
-  <p>The Galaxy team regularly reviews errors that occur in the application.
-  However, if you would like to provide additional information (such as
-  what you were trying to do when the error occurred) and a contact e-mail
-  address, we will be better able to investigate your problem and get back
-  to you.</p>
-  
-  <div class="toolForm">
-    <div class="toolFormTitle">Error Report</div>
-    <div class="toolFormBody">
-    <form name="report_error" action="${h.url_for( action='report_error')}" method="post" >
-      <input type="hidden" name="id" value="$dataset.id" />
-      <table>
-        <tr valign="top"><td>Your Email:</td><td><input type="text" name="email" size="40" /></td></tr>
-        <tr valign="top"><td>Message:</td><td><textarea name="message", rows="10" cols="40" /></textarea></td></tr>
-   <tr><td></td><td><input type="submit" value="Report">
-      </table>
-    </form>
-    </div>
-  </div>
-  
-</body>
+        <h2>Report this error to the Galaxy Team</h2>
+        <p>
+            The Galaxy team regularly reviews errors that occur in the application.
+            However, if you would like to provide additional information (such as
+            what you were trying to do when the error occurred) and a contact e-mail
+            address, we will be better able to investigate your problem and get back
+            to you.
+        </p>
+        <div class="toolForm">
+            <div class="toolFormTitle">Error Report</div>
+            <div class="toolFormBody">
+                <form name="report_error" action="${h.url_for( action='report_error')}" method="post" >
+                    <input type="hidden" name="id" value="$dataset.id" />
+                    <table>
+                        <tr valign="top"><td>Your Email:</td><td><input type="text" name="email" size="40" /></td></tr>
+                        <tr valign="top"><td>Message:</td><td><textarea name="message", rows="10" cols="40" /></textarea></td></tr>
+                        <tr><td></td><td><input type="submit" value="Report">
+                    </table>
+                </form>
+            </div>
+      </div>
+    </body>
 </html>