source: trunk/server/common/oursrc/hacron/hacron @ 2540

Last change on this file since 2540 was 1468, checked in by gdb, 14 years ago
Fixed the exception type problem in this code; fixed up remove-servers.
  • Property svn:executable set to *
File size: 10.9 KB
Line 
1#!/usr/bin/env python
2from __future__ import with_statement
3import glob
4import logging.handlers
5import fcntl
6import optparse
7import os
8import socket
9import shutil
10import subprocess
11import sys
12import time
13from os import path
14
15OCF_SUCCESS=0
16OCF_ERR_GENERIC=1
17OCF_ERR_ARGS=2
18OCF_ERR_UNIMPLEMENTED=3
19OCF_ERR_PERM=4
20OCF_ERR_INSTALLED=5
21OCF_ERR_CONFIGURED=6
22OCF_NOT_RUNNING=7
23
24logger = logging.getLogger('cron')
25
26HA_LOGD = os.environ.get('HA_LOGD') == 'yes'
27
28class HacronError(Exception):
29    def __init__(self, ocf_errno, msg='Something went wrong'):
30        self.ocf_errno = ocf_errno
31        self.msg = msg
32        logger.error(msg)
33   
34class HaLogHandler(logging.Handler):
35    """
36    A handler class which writes to ha_logger.
37    """
38    def __init__(self, ha_tag):
39        """
40        Initialize the handler.  ha_tag is the name of this resource.
41        """
42        logging.Handler.__init__(self)
43        self.ha_tag = ha_tag
44
45    def emit(self, record):
46        """
47        Emit a record.
48        """
49        print 'Passed', record
50        try:
51            levelname = record.levelname
52            msg = self.format(record)
53            subprocess.call(['/usr/sbin/ha_logger', '-t', self.ha_tag, msg])
54        except (KeyboardInterrupt, SystemExit):
55            raise
56        except:
57            self.handleError(record)
58
59class lock(object):
60    def __init__(self, filename):
61        self.filename = filename
62        if not _touch(filename):
63            raise
64
65    def __enter__(self):
66        f = open(self.filename)
67        fcntl.flock(f, fcntl.LOCK_EX)
68           
69    def __exit__(self, type, value, traceback):
70        f = open(self.filename)
71        fcntl.flock(f, fcntl.LOCK_UN)
72       
73def _touch(path):
74    """Effectively touches a file.  Returns true if successful, false
75    otherwise"""
76    try:
77        open(path, 'a').close()
78    except IOError:
79        return False
80    else:
81        return True
82
83def _remove(dest):
84    if not path.exists(dest) and not path.islink(dest):
85        logger.error('Tried to remove nonexistant path %s' % dest)
86        return True
87
88    try:
89        if path.isdir(dest):
90            os.rmdir(dest)
91        else:
92            os.remove(dest)
93    except OSError, e:
94        logging.error('Could not remove %s: %s' % (dest, e))
95        return False
96    else:
97        return True
98
99def _mkdir(dir):
100    try:
101        os.mkdir(dir)
102    except OSError, e:
103        logging.error('Could not mkdir %s: %s' % (dir, e))
104        return False
105    else:
106        return True
107   
108def _strip(name):
109    """Strip off the file extension, and leading /'s, if they exist"""
110    return path.splitext(path.basename(name))[0]
111
112def _suffix(name, suffix):
113    return '%s.%s' % (name, suffix)
114
115def _crondir(server):
116    return path.join(CRONSPOOL_DIR, _suffix(server, 'cronspool'))
117
118def _serverfile(server):
119    return path.join(SERVER_DIR, server)
120
121def _servers():
122    """Get a list of the servers."""
123    return [_strip(f) for f in glob.glob(path.join(SERVER_DIR, '*'))]
124
125def _is_master(server):
126    crondir = path.join(CRONSPOOL_DIR, _suffix(server, 'cronspool'))
127    return path.islink(crondir)
128
129def _restart_crond(args, options):
130    # TODO: insert correct cmd here.  Also, should we capture and log
131    # stdout?
132    if options.development:
133        cmd = ['echo', 'called crond reset']
134    else:
135        cmd = ['service', 'crond', 'reload']
136    try:
137        subprocess.check_call(cmd)
138    except OSError, e:
139        raise HacronError(OCF_ERR_GENERIC, 'Cron restart exited with return code %d' % e.errno)
140    else:
141        logger.info('Restarted crond')
142
143def start_cron(args, options):
144    serverfile = _serverfile(HOSTNAME)
145    if not _touch(serverfile):
146        logger.error('Could not touch %s' % serverfile)
147        return OCF_ERR_CONFIGURED
148    elif _is_master(HOSTNAME):
149        logger.error('%s is already the master!' % HOSTNAME)
150        return OCF_SUCCESS
151
152    logger.info('Starting %s' % HOSTNAME)
153    for server in _servers():
154        crondir = _crondir(server)
155        if server == HOSTNAME:
156            # Get rid of current crondir, and leave if that fails.
157            if not _remove(crondir):
158                logger.error("Could not remove dummy cronspool dir %s" % crondir)
159                return OCF_ERR_GENERIC
160            os.symlink('../cronspool', crondir)
161            logger.info('Created master symlink %s' % crondir)
162        else:
163            if path.islink(crondir):
164                _remove(crondir)
165                logger.info('Removed old master symlink: %s' % crondir)
166            if not path.exists(crondir):
167                _mkdir(crondir)
168                logger.info('Created slave dummy directory %s' % crondir)
169    try:
170        _restart_crond(args, options)
171    except HacronException, e:
172        return e.ocf_errno
173    return OCF_SUCCESS
174
175def stop_cron(args, options):
176    """Stop cron."""
177    if not _is_master(HOSTNAME):
178        logger.error('I am not the master!')
179        return OCF_NOT_RUNNING
180    else:
181        crondir = _crondir(HOSTNAME)
182        logger.info('Removing symlink %s' % crondir)
183        _remove(crondir)
184        _mkdir(crondir)
185        # TODO: should we do something else here?
186        try:
187            _restart_crond(args, options)
188        except HacronException, e:
189            return e.ocf_errno
190        return OCF_SUCCESS
191
192def monitor_cron(args, options):
193    """Check whether cron is running.  For now just makes sure that the
194    current machine is the master, although this should likely be fixed."""
195    if _is_master(HOSTNAME):
196        return OCF_SUCCESS
197    else:
198        return OCF_NOT_RUNNING
199
200def validate_all_cron(args, options):
201    if not _touch(_serverfile(HOSTNAME)):
202        logger.error('Could not touch %s' % _serverfile(HOSTNAME))
203        return OCF_ERR_GENERIC
204    elif not path.exists(CRONSPOOL_DIR):
205        return OCF_ERR_GENERIC
206    else:
207        return OCF_SUCCESS
208
209def setup(args, options):
210    for d in [CRONSPOOL_DIR, SERVER_DIR]:
211        if not path.exists(d):
212            os.makedirs(d)
213            logger.info('Created %s' % d)
214        else:
215            logger.info('Already exists: %s' % d)
216
217def remove_servers(servers, options):
218    """Remove servers from the list of available ones."""
219    for server in servers:
220        _remove(_serverfile(server))
221        _remove(_crondir(server))
222        logger.info('Removed %s from list of available ones' % server)
223
224
225def meta_data_cron(args, options):
226    print """<?xml version="1.0"?>
227<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
228<resource-agent name="hacron" version="0.1">
229<version>1.0</version>
230
231<longdesc lang="en">
232This is the high-availability cron manager.  It uses an extremely overpowered
233clustering solution to make it so that people can have their crontabs.  Yay.
234</longdesc>
235<shortdesc lang="en">HA Cron</shortdesc>
236
237<parameters>
238<parameter name="cron_root" required="1">
239<longdesc lang="en">
240Base directory for storage of crontabs and server information.
241</longdesc>
242<shortdesc lang="en">Cron base directory</shortdesc>
243<content type="string" />
244</parameter>
245</parameters>
246
247<actions>
248<action name="start"        timeout="90" />
249<action name="stop"         timeout="100" />
250<action name="monitor"      timeout="20" interval="10" depth="0" start-delay="0" />
251<action name="reload"       timeout="90" />
252<action name="meta-data"    timeout="5" />
253<action name="validate-all"   timeout="30" />
254</actions>
255</resource-agent>
256"""
257    return OCF_SUCCESS
258
259def usage(parser):
260    parser.print_help()
261    return 1
262
263def _set_globals(args, options):
264    global HOSTNAME, CRONROOT, CRONSPOOL_DIR, SERVER_DIR, \
265        HA_RSCTMP, OCF_RESOURCE_INSTANCE
266    if options.development:
267        logging.basicConfig(level=logging.DEBUG)
268    else:
269        if HA_LOGD:
270            handler = HaLogHandler('hacron')
271        else:
272            handler = logging.handlers.SysLogHandler('/dev/log')
273        formatter = logging.Formatter("%(module)s: %(levelname)s %(message)s")
274        handler.setLevel(logging.INFO)
275        handler.setFormatter(formatter)
276        logger.addHandler(handler)
277    HOSTNAME = options.server or os.environ.get('HA_CURHOST') or socket.gethostname()
278    CRONROOT = options.cronroot or os.environ.get('OCF_RESKEY_cron_root')
279    if not CRONROOT:
280        raise HacronError(OCF_ERR_CONFIGURED, 'No cron_root specified.')
281    CRONSPOOL_DIR = path.join(CRONROOT, 'server-cronspools')
282    SERVER_DIR = path.join(CRONROOT, 'servers')
283    HA_RSCTMP = os.environ.get('HA_RSCTMP', '/tmp')
284    OCF_RESOURCE_INSTANCE = os.environ.get('OCF_RESOURCE_INSTANCE', 'default')
285    return OCF_SUCCESS
286
287def main():
288    usage_str = """usage: %prog [-s server] [-c cronroot] [-d] cmd
289
290Script for starting and stopping cron in a multiserver environment.
291One server is designated the master.
292
293== HA available commands: ==
294start: Make this server into the master and reload crond.
295reload: Same as start.
296stop: Demote this server to a spare and reload crond.
297monitor: Indicate whether this server is successfully the master.
298validate-all: Make sure that things look right and this server is
299  ready to be promoted to master.
300meta-data: Print out the XML meta data for this service
301
302== User-only commands: ==
303setup: Create the folders, etc. necessary for running hacron.
304remove-servers server1 server2 ...: Take a list of servers out of the
305  list of available ones.
306    """
307    parser = optparse.OptionParser(usage=usage_str)
308    parser.add_option("-s", "--server",
309                      action="store", dest="server",
310                      default=None,
311                      help="choose which server to run script as")
312    parser.add_option("-c", "--cronroot",
313                      action="store", dest="cronroot",
314                      default=None,
315                      help="pick root of cron dir")
316    parser.add_option("-d", "--development",
317                      action="store_true", dest="development",
318                      default=False,
319                      help="run in development mode")
320    (options, args) = parser.parse_args()
321    if len(args) < 1:
322        return usage(parser)
323    command = args[0]
324    args = args[1:]
325
326    if command == 'meta-data':
327        return meta_data_cron(args, options)
328
329    try:
330        _set_globals(args, options)
331    except HacronError, e:
332        return e.ocf_errno
333
334    with lock('%s/hacron-%s.lock' % (HA_RSCTMP, OCF_RESOURCE_INSTANCE)):
335        if command == 'start':
336            return start_cron(args, options)
337        elif command == 'reload':
338            return start_cron(args, options)
339        elif command == 'stop':
340            return stop_cron(args, options)
341        elif command == 'monitor':
342            return monitor_cron(args, options)
343        elif command == 'validate-all':
344            return validate_all_cron(args, options)
345        elif command == 'setup':
346            return setup(args, options)
347        elif command == 'remove-servers':
348            return remove_servers(args, options)
349        else:
350            usage(parser)
351            return OCF_ERR_UNIMPLEMENTED
352
353if __name__ == '__main__':
354    try:
355        ret = main()
356    except Exception, e:
357        logger.error('exception from main: %s' % e)
358        ret = OCF_ERR_GENERIC
359        raise
360    sys.exit(ret)
Note: See TracBrowser for help on using the repository browser.