root/cron/cleanup_datasets.py

Revision 13:d2cdc485fdcd, 2.3 kB (checked in by Dan Blankenberg <dan@bx.psu.edu>, 2 years ago)

Pushing my changes from james-wsgi branch onto the trunk.

Biomart doesn't work on the trunk yet (Before or after this commit).

axt to lav tool won't be functional until multiple datasets per history item is available on trunk - although I could
make it so that only the lav file is generated and no fasta files, in which case it wouldn't be an issue.

Line 
1 #!/usr/bin/env python
2
3 #This script removes deleted dataset files.
4 #Takes 3 arguments:
5 #   1: database directory to clean
6 #   2: postgres database name
7 #   3 (optional): number of days to allow as a buffer, defaults to 2
8 #python cleanup_datasets.py /home/universe/server-home/wsgi-postgres/database/files/ galaxy_test 2
9
10 import sys, os, tempfile, time
11 try:
12     database_dir = sys.argv[1]
13     database_name = sys.argv[2]
14     num_days = 2
15     try:
16         num_days = int(sys.argv[3])
17     except:
18         print "Using Default of 2 days buffer on delete"
19 except:
20     print "Usage: python %s path_to_files:/home/universe/server-home/wsgi-postgres/database/files/ database_name:galaxy_test [num_days_buffer:2]" % sys.argv[0]
21     sys.exit(0)
22 id_file = tempfile.NamedTemporaryFile('w')
23 id_filename = id_file.name
24 id_file.close()
25 ids = []
26
27 command = "psql -d %s -c \"select id from dataset;\" -o %s" % (database_name, id_filename)
28 print "Getting IDs:", command
29 id_file = os.popen(command)
30 id_file.close()
31 for line in open(id_filename,'r'):
32     try:
33         ids.append(int(line.strip()))
34     except:
35         print line.strip(),"is not a valid id, skipping."
36 os.unlink(id_filename)
37 if len(ids) < 1:
38     print "Less than 1 IDs have been found! Deleting proccess has been canceled."
39     sys.exit(0)
40 print "-----%i IDs Retrieved -----" % len(ids)
41 print "----- Checking database directory for deleted ids: %s -----" % database_dir
42 file_size = 0
43 num_delete = 0
44 for result in os.walk(database_dir):
45     this_base_dir,sub_dirs,files = result
46     for file in files:
47         if file.startswith("dataset_") and file.endswith(".dat"):
48             id = int(file.replace("dataset_","").replace(".dat",""))
49             file_name = os.path.join(this_base_dir,file)
50             if id not in ids:
51                 file_time = os.path.getctime(file_name)
52                 if time.time() > file_time + (num_days*60*60*24): #num_days (default=2) days buffer room
53                     num_delete += 1
54                     size = os.path.getsize(file_name)
55                     file_size += size
56                     os.unlink(file_name)
57 print file_size, "bytes"
58 print float(file_size) / 1024, "kilobytes"
59 print float(file_size) / 1024 / 1024, "Megabytes"
60 print float(file_size) / 1024 / 1024 / 1024, "Gigabytes"
61 print "%i files deleted" % num_delete
62
63 sys.exit(0)
Note: See TracBrowser for help on using the browser.