| 1 |
#!/usr/bin/env python |
|---|
| 2 |
|
|---|
| 3 |
#This script removes deleted dataset files. |
|---|
| 4 |
#Takes 3 arguments: |
|---|
| 5 |
# 1: database directory to clean |
|---|
| 6 |
# 2: postgres database name |
|---|
| 7 |
# 3 (optional): number of days to allow as a buffer, defaults to 2 |
|---|
| 8 |
#python cleanup_datasets.py /home/universe/server-home/wsgi-postgres/database/files/ galaxy_test 2 |
|---|
| 9 |
|
|---|
| 10 |
import sys, os, tempfile, time |
|---|
| 11 |
try: |
|---|
| 12 |
database_dir = sys.argv[1] |
|---|
| 13 |
database_name = sys.argv[2] |
|---|
| 14 |
num_days = 2 |
|---|
| 15 |
try: |
|---|
| 16 |
num_days = int(sys.argv[3]) |
|---|
| 17 |
except: |
|---|
| 18 |
print "Using Default of 2 days buffer on delete" |
|---|
| 19 |
except: |
|---|
| 20 |
print "Usage: python %s path_to_files:/home/universe/server-home/wsgi-postgres/database/files/ database_name:galaxy_test [num_days_buffer:2]" % sys.argv[0] |
|---|
| 21 |
sys.exit(0) |
|---|
| 22 |
id_file = tempfile.NamedTemporaryFile('w') |
|---|
| 23 |
id_filename = id_file.name |
|---|
| 24 |
id_file.close() |
|---|
| 25 |
ids = [] |
|---|
| 26 |
|
|---|
| 27 |
command = "psql -d %s -c \"select id from dataset;\" -o %s" % (database_name, id_filename) |
|---|
| 28 |
print "Getting IDs:", command |
|---|
| 29 |
id_file = os.popen(command) |
|---|
| 30 |
id_file.close() |
|---|
| 31 |
for line in open(id_filename,'r'): |
|---|
| 32 |
try: |
|---|
| 33 |
ids.append(int(line.strip())) |
|---|
| 34 |
except: |
|---|
| 35 |
print line.strip(),"is not a valid id, skipping." |
|---|
| 36 |
os.unlink(id_filename) |
|---|
| 37 |
if len(ids) < 1: |
|---|
| 38 |
print "Less than 1 IDs have been found! Deleting proccess has been canceled." |
|---|
| 39 |
sys.exit(0) |
|---|
| 40 |
print "-----%i IDs Retrieved -----" % len(ids) |
|---|
| 41 |
print "----- Checking database directory for deleted ids: %s -----" % database_dir |
|---|
| 42 |
file_size = 0 |
|---|
| 43 |
num_delete = 0 |
|---|
| 44 |
for result in os.walk(database_dir): |
|---|
| 45 |
this_base_dir,sub_dirs,files = result |
|---|
| 46 |
for file in files: |
|---|
| 47 |
if file.startswith("dataset_") and file.endswith(".dat"): |
|---|
| 48 |
id = int(file.replace("dataset_","").replace(".dat","")) |
|---|
| 49 |
file_name = os.path.join(this_base_dir,file) |
|---|
| 50 |
if id not in ids: |
|---|
| 51 |
file_time = os.path.getctime(file_name) |
|---|
| 52 |
if time.time() > file_time + (num_days*60*60*24): #num_days (default=2) days buffer room |
|---|
| 53 |
num_delete += 1 |
|---|
| 54 |
size = os.path.getsize(file_name) |
|---|
| 55 |
file_size += size |
|---|
| 56 |
os.unlink(file_name) |
|---|
| 57 |
print file_size, "bytes" |
|---|
| 58 |
print float(file_size) / 1024, "kilobytes" |
|---|
| 59 |
print float(file_size) / 1024 / 1024, "Megabytes" |
|---|
| 60 |
print float(file_size) / 1024 / 1024 / 1024, "Gigabytes" |
|---|
| 61 |
print "%i files deleted" % num_delete |
|---|
| 62 |
|
|---|
| 63 |
sys.exit(0) |
|---|