root/cron/check_galaxy.sh

Revision 1278:bf8211920d1d, 6.5 kB (checked in by Nate Coraor <nate@bx.psu.edu>, 8 months ago)

Forgot to getopt -n...

  • Property exe set to *
Line 
1 #!/bin/sh
2 #set -xv
3 #
4 # Runs the scripts/check_galaxy.py script in a way that's easy to handle from cron
5 #
6
7 # defaults (note: default sleep is below since it depends on debug)
8 DEBUG=0
9 STAGGER=0
10 INTERVAL=3
11 MAIL=
12 PAGE=
13 NEWHIST=
14 BARDARG=0
15 # get commandline opts
16 while getopts dsi:l:m:p:n optname
17 do
18     case $optname in
19         d)  DEBUG=1 ;;
20         s)  STAGGER=1 ;;
21         i)  INTERVAL=$OPTARG ;;
22         l)  SLEEP=$OPTARG ;;
23         m)  MAIL="$MAIL $OPTARG" ;;
24         p)  PAGE="$PAGE $OPTARG" ;;
25         n)  NEWHIST="-n" ;;
26         *)  BADARG=1 ;;
27     esac
28 done
29 shift `expr $OPTIND - 1`
30
31 if [ -z "$1" -o "$BADARG" ]; then
32     cat <<EOF
33 usage: `basename $0` [-ds] [-i interval] [-m email_address]+ [-p pager_address]+ <galaxy_host>"
34   -d            Print debugging information.
35   -s            Stagger mailing the pagers/emails, instead of all at once when
36                 there's a problem.  Useful for running check_galaxy at night.
37   -i <interval> The number of times this wrapper should execute before mailing
38                 the next address, when staggering is enabled.  Mail is sent
39                 every <interval> runs of the program, so the actual time
40                 between emails is:
41                   time = (<interval>) * (how often wrapper runs from cron)
42   -l <seconds>  This wrapper runs check_galaxy a second time if the first check
43                 fails, in case the problem is intermittent.  <seconds> is how
44                 many seconds to sleep between checks.
45   -m <address>  Email addresses to send the full check_galaxy output to, if
46                 Galaxy is down.  Use multiple -m options to specify multiple
47                 addresses.  When staggering, email will be sent in the order
48                 which you specify -m options on the command line.
49   -p <address>  Like -m, but sends just the last line of check_galaxy's output.
50                 Useful for pagers.  When staggering is enabled and both -m and
51                 -p options are present, the first -m address and the first -p
52                 address are mailed simultaneously, followed by the second -m
53                 and second -p, and so on.
54   -n            Create a new history (passes the -n option to check_galaxy.py).
55   <galaxy_host> The hostname of the Galaxy server to check.  Use a : if running
56                 on a non-80 port (e.g. galaxy.example.com:8080).
57 EOF
58     exit 1
59 fi
60
61 if [ -z "$SLEEP" ]; then
62     if [ $DEBUG ]; then
63         SLEEP=2
64     else
65         SLEEP=60
66     fi
67 fi
68
69 # globals
70 CRON_DIR=`dirname $0`
71 SCRIPTS_DIR="$CRON_DIR/../scripts"
72 CHECK_GALAXY="$SCRIPTS_DIR/check_galaxy.py"
73 VAR="$HOME/.check_galaxy"
74
75 # sanity
76 if [ ! -f $CHECK_GALAXY ]; then
77     [ $DEBUG = 1 ] && echo "$CHECK_GALAXY is missing"
78     exit 0
79 fi
80
81 # Do any other systems' default ps not take BSD ps args?
82 case `uname -s` in
83     SunOS)  PS="/usr/ucb/ps" ;;
84     *)      PS="ps" ;;
85 esac
86
87 NOTIFIED_MAIL="$VAR/$1/mail"
88 NOTIFIED_PAGE="$VAR/$1/page"
89 MUTEX="$VAR/$1/wrap.mutex"
90 COUNT="$VAR/$1/wrap.count"
91 STAGGER_FILE="$VAR/$1/wrap.stagger"
92 for dir in $VAR/$1 $NOTIFIED_MAIL $NOTIFIED_PAGE; do
93     if [ ! -d $dir ]; then
94         mkdir -p -m 0700 $dir
95         if [ $? -ne 0 ]; then
96             [ $DEBUG = 1 ] && echo "unable to create dir: $dir"
97             exit 0
98         fi
99     fi
100 done
101
102 if [ ! -f "$VAR/$1/login" ]; then
103     [ $DEBUG = 1 ] && cat <<EOF
104 Please create the file:
105   $VAR/$1/login
106 This should contain a username and password to log in to
107 Galaxy with, on one line, separated by whitespace, e.g.:
108
109 check_galaxy@example.com password
110
111 If the user does not exist, check_galaxy will create it
112 for you.
113 EOF
114     exit 0
115 fi
116
117 if [ $STAGGER ]; then
118     if [ -f "$STAGGER_FILE" ]; then
119         STAGGER_COUNT=`cat $STAGGER_FILE`
120     else
121         STAGGER_COUNT=$INTERVAL
122     fi
123 fi
124
125 # only run one at once
126 if [ -f $MUTEX ]; then
127     pid=`cat $MUTEX`
128     $PS p $pid >/dev/null 2>&1
129     if [ $? -eq 0 ]; then
130         if [ -f $COUNT ]; then
131             count=`cat $COUNT`
132         else
133             count=0
134         fi
135         if [ "$count" -eq 3 ]; then
136             echo "A check_galaxy process for $1 has been running for an unusually long time.  Something is broken." \
137                 | mail -s "$1 problems" $MAIL
138         fi
139         expr $count + 1 > $COUNT
140         exit 0
141     else
142         # stale mutex
143         rm -f $MUTEX
144     fi
145 fi
146
147 rm -f $COUNT
148 echo $$ > $MUTEX
149
150 [ $DEBUG = 1 ] && echo "running first check"
151 first_try=`$CHECK_GALAXY $NEWHIST $1 2>&1`
152
153 if [ $? -ne 0 ]; then
154     # if failure, wait and try again
155     [ $DEBUG = 1 ] && echo "first check failed, sleeping $SLEEP seconds for second run"
156     sleep $SLEEP
157 else
158     # if successful
159     [ $DEBUG = 1 ] && echo "first check succeeded"
160     for file in $NOTIFIED_MAIL/* $NOTIFIED_PAGE/*; do
161         recip=`basename $file`
162         # the literal string including the * will be passed if the dir is empty
163         [ "$recip" = '*' ] && continue
164         echo "$1 is now okay" | mail -s "$1 OK" $recip
165         rm -f $file
166         [ $DEBUG = 1 ] && echo "up: mailed $recip"
167     done
168     rm -f $MUTEX $STAGGER_FILE
169     exit 0
170 fi
171
172 [ $DEBUG = 1 ] && echo "running second check"
173 second_try=`$CHECK_GALAXY $NEWHIST $1 2>&1`
174
175 if [ $? -ne 0 ]; then
176     [ $DEBUG = 1 ] && echo "second check failed"
177     if [ $STAGGER = 1 ]; then
178         if [ "$STAGGER_COUNT" -eq "$INTERVAL" ]; then
179             # send notification this run
180             echo 1 > $STAGGER_FILE
181         else
182             # don't send notification this run
183             [ $DEBUG = 1 ] && echo "$1 is down, but it's not time to send an email.  STAGGER_COUNT was $STAGGER_COUNT"
184             expr $STAGGER_COUNT + 1 > $STAGGER_FILE
185             rm -f $MUTEX
186             exit 0
187         fi
188     fi
189     for recip in $MAIL; do
190         if [ ! -f "$NOTIFIED_MAIL/$recip" ]; then
191             cat <<HERE | mail -s "$1 problems" $recip
192 $second_try
193 HERE
194             touch "$NOTIFIED_MAIL/$recip"
195             [ $DEBUG = 1 ] && echo "dn: mailed $recip"
196             [ $STAGGER = 1 ] && break
197         fi
198     done
199     for recip in $PAGE; do
200         if [ ! -f "$NOTIFIED_PAGE/$recip" ]; then
201             cat <<HERE | tail -1 | mail -s "$1 problems" $recip
202 $second_try
203 HERE
204             touch "$NOTIFIED_PAGE/$recip"
205             [ $DEBUG = 1 ] && echo "dn: mailed $recip"
206             [ $STAGGER = 1 ] && break
207         fi
208     done
209 else
210     [ $DEBUG = 1 ] && echo "second check succeeded"
211     for file in $NOTIFIED_MAIL/* $NOTIFIED_PAGE/*; do
212         recip=`basename $file`
213         [ "$recip" = '*' ] && continue
214         echo "$1 is now okay" | mail -s "$1 OK" $recip
215         rm -f $file
216         [ $DEBUG = 1 ] && echo "up: mailed $recip"
217     done
218     rm -f $STAGGER_FILE
219 fi
220
221 rm -f $MUTEX
222 exit 0
Note: See TracBrowser for help on using the browser.