Changeset 1581:4841a9e393c7
- Timestamp:
- 10/28/08 14:31:02
(2 months ago)
- Author:
- Greg Von Kuster <greg@bx.psu.edu>
- branch:
- default
- Message:
Purge metadata files associated with a dataset when the dataset is purged. Also remembered log.exception logs the exception, so corrected a few things in jobs.init.
-
Files:
-
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
| r1578 |
r1581 |
|
| 63 | 63 | self.use_policy = False |
|---|
| 64 | 64 | log.info("Scheduler policy not defined as expected, defaulting to FIFO") |
|---|
| 65 | | except AttributeError, detail : # try may throw AttributeError |
|---|
| | 65 | except AttributeError, detail: # try may throw AttributeError |
|---|
| 66 | 66 | self.use_policy = False |
|---|
| 67 | 67 | log.exception("Error while loading scheduler policy class, defaulting to FIFO") |
|---|
| … | … | |
| 118 | 118 | try: |
|---|
| 119 | 119 | self.monitor_step() |
|---|
| 120 | | except Exception, e: |
|---|
| 121 | | log.exception( "Exception in monitor_step: %s" % str( e ) ) |
|---|
| | 120 | except: |
|---|
| | 121 | log.exception( "Exception in monitor_step" ) |
|---|
| 122 | 122 | # Sleep |
|---|
| 123 | 123 | self.sleeper.sleep( 1 ) |
|---|
| … | … | |
| 185 | 185 | log.error( msg ) |
|---|
| 186 | 186 | except Exception, e: |
|---|
| 187 | | msg = "failure running job %d: %s" % ( job.job_id, str( e ) ) |
|---|
| 188 | | job.info = msg |
|---|
| 189 | | log.exception( msg ) |
|---|
| | 187 | job.info = "failure running job %d: %s" % ( job.job_id, str( e ) ) |
|---|
| | 188 | log.exception( "failure running job %d" % job.job_id ) |
|---|
| 190 | 189 | # Update the waiting list |
|---|
| 191 | 190 | self.waiting = new_waiting |
|---|
| … | … | |
| 202 | 201 | break |
|---|
| 203 | 202 | except Exception, e: # if something else breaks while dispatching |
|---|
| 204 | | msg = "failure running job %d: %s" % ( sjob.job_id, str( e ) ) |
|---|
| 205 | | job.fail( msg ) |
|---|
| 206 | | log.exception( msg ) |
|---|
| | 203 | job.fail( "failure running job %d: %s" % ( sjob.job_id, str( e ) ) ) |
|---|
| | 204 | log.exception( "failure running job %d" % sjob.job_id ) |
|---|
| 207 | 205 | |
|---|
| 208 | 206 | def put( self, job_id, tool ): |
|---|
| … | … | |
| 474 | 472 | if self.working_directory is not None: |
|---|
| 475 | 473 | os.rmdir( self.working_directory ) |
|---|
| 476 | | except Exception, e: |
|---|
| 477 | | log.exception( "Unable to cleanup job %s, exception: %s" % ( str( self.job_id ), str( e ) ) ) |
|---|
| | 474 | except: |
|---|
| | 475 | log.exception( "Unable to cleanup job %d" % self.job_id ) |
|---|
| 478 | 476 | |
|---|
| 479 | 477 | def get_command_line( self ): |
|---|
| … | … | |
| 574 | 572 | try: |
|---|
| 575 | 573 | self.monitor_step() |
|---|
| 576 | | except Exception, e: |
|---|
| 577 | | log.exception( "Exception in monitor_step: %s" % str( e ) ) |
|---|
| | 574 | except: |
|---|
| | 575 | log.exception( "Exception in monitor_step" ) |
|---|
| 578 | 576 | # Sleep |
|---|
| 579 | 577 | self.sleeper.sleep( 1 ) |
|---|
| r1407 |
r1581 |
|
| 48 | 48 | h = app.model.History |
|---|
| 49 | 49 | d = app.model.Dataset |
|---|
| | 50 | m = app.model.MetadataFile |
|---|
| 50 | 51 | cutoff_time = datetime.utcnow() - timedelta( days=options.days ) |
|---|
| 51 | 52 | now = strftime( "%Y-%m-%d %H:%M:%S" ) |
|---|
| … | … | |
| 64 | 65 | else: |
|---|
| 65 | 66 | print "# Datasets will NOT be removed from disk...\n" |
|---|
| 66 | | purge_histories( h, d, cutoff_time, options.remove_from_disk ) |
|---|
| | 67 | purge_histories( h, d, m, cutoff_time, options.remove_from_disk ) |
|---|
| 67 | 68 | elif options.info_purge_datasets: |
|---|
| 68 | 69 | info_purge_datasets( d, cutoff_time ) |
|---|
| … | … | |
| 72 | 73 | else: |
|---|
| 73 | 74 | print "# Datasets will NOT be removed from disk...\n" |
|---|
| 74 | | purge_datasets( d, cutoff_time, options.remove_from_disk ) |
|---|
| | 75 | purge_datasets( d, m, cutoff_time, options.remove_from_disk ) |
|---|
| 75 | 76 | sys.exit(0) |
|---|
| 76 | 77 | |
|---|
| … | … | |
| 80 | 81 | dataset_count = 0 |
|---|
| 81 | 82 | where = ( h.table.c.user_id==None ) & ( h.table.c.deleted=='f' ) & ( h.table.c.update_time < cutoff_time ) |
|---|
| 82 | | histories = h.query().filter( where ).options( eagerload( 'active_datasets' ) ) |
|---|
| | 83 | histories = h.query().filter( where ).options( eagerload( 'active_datasets' ) ).all() |
|---|
| 83 | 84 | |
|---|
| 84 | 85 | print '# The following datasets and associated userless histories will be deleted' |
|---|
| … | … | |
| 106 | 107 | print '# The following datasets and associated userless histories have been deleted' |
|---|
| 107 | 108 | start = time.clock() |
|---|
| 108 | | histories = h.query().filter( h_where ).options( eagerload( 'active_datasets' ) ) |
|---|
| | 109 | histories = h.query().filter( h_where ).options( eagerload( 'active_datasets' ) ).all() |
|---|
| 109 | 110 | for history in histories: |
|---|
| 110 | 111 | for dataset_assoc in history.active_datasets: |
|---|
| … | … | |
| 112 | 113 | # Mark all datasets as deleted |
|---|
| 113 | 114 | d_where = ( d.table.c.id==dataset_assoc.dataset_id ) |
|---|
| 114 | | datasets = d.query().filter( d_where ) |
|---|
| | 115 | datasets = d.query().filter( d_where ).all() |
|---|
| 115 | 116 | for dataset in datasets: |
|---|
| 116 | 117 | if not dataset.deleted: |
|---|
| … | … | |
| 140 | 141 | print '# The following datasets and associated deleted histories will be purged' |
|---|
| 141 | 142 | start = time.clock() |
|---|
| 142 | | histories = h.query().filter( h_where ).options( eagerload( 'datasets' ) ) |
|---|
| | 143 | histories = h.query().filter( h_where ).options( eagerload( 'datasets' ) ).all() |
|---|
| 143 | 144 | for history in histories: |
|---|
| 144 | 145 | for dataset_assoc in history.datasets: |
|---|
| … | … | |
| 146 | 147 | if dataset_assoc.deleted: |
|---|
| 147 | 148 | d_where = ( d.table.c.id==dataset_assoc.dataset_id ) |
|---|
| 148 | | datasets = d.query().filter( d_where ) |
|---|
| | 149 | datasets = d.query().filter( d_where ).all() |
|---|
| 149 | 150 | for dataset in datasets: |
|---|
| 150 | 151 | if dataset.purgable and not dataset.purged: |
|---|
| … | … | |
| 161 | 162 | print "Elapsed time: ", stop - start, "\n" |
|---|
| 162 | 163 | |
|---|
| 163 | | def purge_histories( h, d, cutoff_time, remove_from_disk ): |
|---|
| | 164 | def purge_histories( h, d, m, cutoff_time, remove_from_disk ): |
|---|
| 164 | 165 | # Purges deleted histories whose update_time is older than the cutoff_time. |
|---|
| 165 | 166 | # The datasets associated with each history are also purged. |
|---|
| … | … | |
| 173 | 174 | print '# The following datasets and associated deleted histories have been purged' |
|---|
| 174 | 175 | start = time.clock() |
|---|
| 175 | | histories = h.query().filter( h_where ).options( eagerload( 'datasets' ) ) |
|---|
| | 176 | histories = h.query().filter( h_where ).options( eagerload( 'datasets' ) ).all() |
|---|
| 176 | 177 | for history in histories: |
|---|
| 177 | 178 | errors = False |
|---|
| … | … | |
| 179 | 180 | if dataset_assoc.deleted: |
|---|
| 180 | 181 | d_where = ( d.table.c.id==dataset_assoc.dataset_id ) |
|---|
| 181 | | datasets = d.query().filter( d_where ) |
|---|
| | 182 | datasets = d.query().filter( d_where ).all() |
|---|
| 182 | 183 | for dataset in datasets: |
|---|
| 183 | 184 | if dataset.purgable and not dataset.purged: |
|---|
| … | … | |
| 187 | 188 | if remove_from_disk: |
|---|
| 188 | 189 | dataset.flush() |
|---|
| 189 | | errmsg = purge_dataset( dataset ) |
|---|
| | 190 | errmsg = purge_dataset( dataset, m ) |
|---|
| 190 | 191 | if errmsg: |
|---|
| 191 | 192 | errors = True |
|---|
| … | … | |
| 197 | 198 | dataset.flush() |
|---|
| 198 | 199 | print "%s" % dataset.file_name |
|---|
| | 200 | # Mark all associated MetadataFiles as deleted and purged |
|---|
| | 201 | print "The following metadata files associated with dataset '%s' have been marked purged" % dataset.file_name |
|---|
| | 202 | for hda in dataset.history_associations: |
|---|
| | 203 | for metadata_file in m.filter( m.table.c.hda_id==hda.id ).all(): |
|---|
| | 204 | metadata_file.deleted = True |
|---|
| | 205 | metadata_file.purged = True |
|---|
| | 206 | metadata_file.flush() |
|---|
| | 207 | print "%s" % metadata_file.file_name() |
|---|
| 199 | 208 | dataset_count += 1 |
|---|
| 200 | 209 | try: |
|---|
| … | … | |
| 219 | 228 | print '# The following deleted datasets will be purged' |
|---|
| 220 | 229 | start = time.clock() |
|---|
| 221 | | datasets = d.query().filter( where ) |
|---|
| | 230 | datasets = d.query().filter( where ).all() |
|---|
| 222 | 231 | for dataset in datasets: |
|---|
| 223 | 232 | print "%s" % dataset.file_name |
|---|
| … | … | |
| 231 | 240 | print "Elapsed time: ", stop - start, "\n" |
|---|
| 232 | 241 | |
|---|
| 233 | | def purge_datasets( d, cutoff_time, remove_from_disk ): |
|---|
| | 242 | def purge_datasets( d, m, cutoff_time, remove_from_disk ): |
|---|
| 234 | 243 | # Purges deleted datasets whose update_time is older than cutoff_time. Files may or may |
|---|
| 235 | 244 | # not be removed from disk. |
|---|
| … | … | |
| 241 | 250 | print '# The following deleted datasets have been purged' |
|---|
| 242 | 251 | start = time.clock() |
|---|
| 243 | | datasets = d.query().filter( where ) |
|---|
| | 252 | datasets = d.query().filter( where ).all() |
|---|
| 244 | 253 | for dataset in datasets: |
|---|
| 245 | 254 | file_size = dataset.file_size |
|---|
| 246 | 255 | if remove_from_disk: |
|---|
| 247 | | errmsg = purge_dataset( dataset ) |
|---|
| | 256 | errmsg = purge_dataset( dataset, m ) |
|---|
| 248 | 257 | if errmsg: |
|---|
| 249 | 258 | print errmsg |
|---|
| … | … | |
| 256 | 265 | dataset.flush() |
|---|
| 257 | 266 | print "%s" % dataset.file_name |
|---|
| | 267 | # Mark all associated MetadataFiles as deleted and purged |
|---|
| | 268 | print "The following metadata files associated with dataset '%s' have been marked purged" % dataset.file_name |
|---|
| | 269 | for hda in dataset.history_associations: |
|---|
| | 270 | for metadata_file in m.filter( m.table.c.hda_id==hda.id ).all(): |
|---|
| | 271 | metadata_file.deleted = True |
|---|
| | 272 | metadata_file.purged = True |
|---|
| | 273 | metadata_file.flush() |
|---|
| | 274 | print "%s" % metadata_file.file_name() |
|---|
| 258 | 275 | dataset_count += 1 |
|---|
| 259 | 276 | try: |
|---|
| … | … | |
| 267 | 284 | print "Elapsed time: ", stop - start, "\n" |
|---|
| 268 | 285 | |
|---|
| 269 | | def purge_dataset( dataset ): |
|---|
| | 286 | def purge_dataset( dataset, m ): |
|---|
| 270 | 287 | # Removes the file from disk and updates the database accordingly. |
|---|
| 271 | 288 | if dataset.deleted: |
|---|
| 272 | 289 | # Remove files from disk and update the database |
|---|
| 273 | | purgable = False |
|---|
| 274 | 290 | try: |
|---|
| 275 | 291 | dataset.purged = True |
|---|
| … | … | |
| 285 | 301 | break #only purge when not shared |
|---|
| 286 | 302 | else: |
|---|
| | 303 | # Remove dataset file from disk |
|---|
| 287 | 304 | os.unlink( dataset.file_name ) |
|---|
| 288 | | purgable = True |
|---|
| | 305 | # Mark all associated MetadataFiles as deleted and purged and remove them from disk |
|---|
| | 306 | print "The following metadata files associated with dataset '%s' have been purged" % dataset.file_name |
|---|
| | 307 | for hda in dataset.history_associations: |
|---|
| | 308 | for metadata_file in m.filter( m.table.c.hda_id==hda.id ).all(): |
|---|
| | 309 | os.unlink( metadata_file.file_name() ) |
|---|
| | 310 | metadata_file.deleted = True |
|---|
| | 311 | metadata_file.purged = True |
|---|
| | 312 | metadata_file.flush() |
|---|
| | 313 | print "%s" % metadata_file.file_name() |
|---|
| | 314 | try: |
|---|
| | 315 | # Remove associated extra files from disk if they exist |
|---|
| | 316 | os.unlink( dataset.extra_files_path ) |
|---|
| | 317 | except: |
|---|
| | 318 | pass |
|---|
| 289 | 319 | except Exception, exc: |
|---|
| 290 | 320 | return "# Error, exception: %s caught attempting to purge %s\n" %( str( exc ), dataset.file_name ) |
|---|
| 291 | | try: |
|---|
| 292 | | if purgable: |
|---|
| 293 | | os.unlink( dataset.extra_files_path ) |
|---|
| 294 | | except: |
|---|
| 295 | | pass |
|---|
| 296 | 321 | else: |
|---|
| 297 | 322 | return "# Error: '%s' has not previously been deleted, so it cannot be purged\n" %dataset.file_name |
|---|