source: trunk/server/common/oursrc/hacron/hacron @ 1466

Last change on this file since 1466 was 1466, checked in by gdb, 14 years ago
Another pass over hacron
  • Property svn:executable set to *
File size: 10.6 KB
Line 
1#!/usr/bin/env python
2import glob
3import logging.handlers
4import fcntl
5import optparse
6import os
7import socket
8import shutil
9import subprocess
10import sys
11import time
12from os import path
13
14OCF_SUCCESS=0
15OCF_ERR_GENERIC=1
16OCF_ERR_ARGS=2
17OCF_ERR_UNIMPLEMENTED=3
18OCF_ERR_PERM=4
19OCF_ERR_INSTALLED=5
20OCF_ERR_CONFIGURED=6
21OCF_NOT_RUNNING=7
22
23logger = logging.getLogger('cron')
24
25HA_LOGD = os.environ.get('HA_LOGD') == 'yes'
26
27class HacronError(Exception):
28    def __init__(self, errno, msg='Something went wrong'):
29        self.errno = errno
30        self.msg = msg
31   
32class HaLogHandler(logging.Handler):
33    """
34    A handler class which writes to ha_logger.
35    """
36    def __init__(self, ha_tag):
37        """
38        Initialize the handler.  ha_tag is the name of this resource.
39        """
40        logging.Handler.__init__(self)
41        self.ha_tag = ha_tag
42
43    def emit(self, record):
44        """
45        Emit a record.
46        """
47        print 'Passed', record
48        try:
49            levelname = record.levelname
50            msg = self.format(record)
51            subprocess.call(['/usr/sbin/ha_logger', '-t', self.ha_tag, msg])
52        except (KeyboardInterrupt, SystemExit):
53            raise
54        except:
55            self.handleError(record)
56
57class lock(object):
58    def __init__(self, filename):
59        self.filename = filename
60        if not _touch(filename):
61            raise
62
63    def __enter__(self):
64        f = open(self.filename)
65        fcntl.flock(f, fcntl.LOCK_EX)
66           
67    def __exit__(self, type, value, traceback):
68        f = open(self.filename)
69        fcntl.flock(f, fcntl.LOCK_UN)
70       
71def _touch(path):
72    """Effectively touches a file.  Returns true if successful, false
73    otherwise"""
74    try:
75        open(path, 'a').close()
76    except IOError:
77        return False
78    else:
79        return True
80
81def _remove(dest):
82    try:
83        if path.isdir(dest):
84            os.rmdir(dest)
85        else:
86            os.remove(dest)
87    except OSError, e:
88        logging.error('Could not remove %s: %s' % (dest, e))
89        return False
90    else:
91        return True
92
93def _mkdir(dir):
94    try:
95        os.mkdir(dir)
96    except OSError, e:
97        logging.error('Could not mkdir %s: %s' % (dir, e))
98        return False
99    else:
100        return True
101   
102def _strip(name):
103    """Strip off the file extension, and leading /'s, if they exist"""
104    return path.splitext(path.basename(name))[0]
105
106def _suffix(name, suffix):
107    return '%s.%s' % (name, suffix)
108
109def _crondir(server):
110    return path.join(CRONSPOOL_DIR, _suffix(server, 'cronspool'))
111
112def _serverfile(server):
113    return path.join(SERVER_DIR, server)
114
115def _servers():
116    """Get a list of the servers."""
117    return [_strip(f) for f in glob.glob(path.join(SERVER_DIR, '*'))]
118
119def _is_master(server):
120    crondir = path.join(CRONSPOOL_DIR, _suffix(server, 'cronspool'))
121    return path.islink(crondir)
122
123def _restart_crond(args, options):
124    # TODO: insert correct cmd here.  Also, should we capture and log
125    # stdout?
126    if options.development:
127        cmd = ['echo', 'called crond reset']
128    else:
129        cmd = ['service', 'crond', 'reload']
130    subprocess.check_call(cmd)
131
132def start_cron(args, options):
133    if not _touch(_serverfile(HOSTNAME)):
134        return OCF_ERR_CONFIGURED
135    elif _is_master(HOSTNAME):
136        logger.error('%s is already the master!' % HOSTNAME)
137        return OCF_SUCCESS
138
139    logger.info('Starting %s' % HOSTNAME)
140    for server in _servers():
141        crondir = _crondir(server)
142        if server == HOSTNAME:
143            # Get rid of current crondir, and leave if that fails.
144            if not _remove(crondir):
145                logger.error("Could not remove dummy cronspool dir %s" % crondir)
146                return OCF_ERR_GENERIC
147            os.symlink('../cronspool', crondir)
148            logger.info('Created master symlink %s' % crondir)
149        else:
150            if path.islink(crondir):
151                _remove(crondir)
152                logger.info('Removed old master symlink: %s' % crondir)
153            if not path.exists(crondir):
154                _mkdir(crondir)
155                logger.info('Created slave dummy directory %s' % crondir)
156    try:
157        _restart_crond()
158    except OSError, e:
159        logger.error('Cron restart exited with return code %d' % e.errno)
160        return OCF_ERR_GENERIC
161    else:
162        logger.info('Restarted crond')
163    return OCF_SUCCESS
164
165def stop_cron(args, options):
166    """Stop cron."""
167    if not _is_master(HOSTNAME):
168        logger.error('I am not the master!')
169        return OCF_NOT_RUNNING
170    else:
171        crondir = _crondir(HOSTNAME)
172        logger.info('Removing symlink %s' % crondir)
173        _remove(crondir)
174        _mkdir(crondir)
175        # TODO: should we do something else here?
176        try:
177            _restart_crond()
178        except OSError, e:
179            logger.error('Cron restart exited with return code %d' % e.errno)
180            return OCF_ERR_GENERIC
181        else:
182            logger.info('Restarted crond')
183        return OCF_SUCCESS
184
185def monitor_cron(args, options):
186    """Check whether cron is running.  For now just makes sure that the
187    current machine is the master, although this should likely be fixed."""
188    if _is_master(HOSTNAME):
189        return OCF_SUCCESS
190    else:
191        return OCF_NOT_RUNNING
192
193def validate_all_cron(args, options):
194    if not _touch(_serverfile(HOSTNAME)):
195        logger.error('Could not touch %s' % _serverfile(HOSTNAME))
196        return OCF_GENERIC_ERR
197    elif not path.exists(CRONSPOOL_DIR):
198        return OCF_GENERIC_ERR
199    else:
200        return OCF_SUCCESS
201
202def setup(args, options):
203    for d in [CRONSPOOL_DIR, SERVER_DIR]:
204        if not path.exists(d):
205            os.makedirs(d)
206            logger.info('Created %s' % d)
207        else:
208            logger.info('Already exists: %s' % d)
209
210def remove_servers(servers, options):
211    """Remove servers from the list of available ones."""
212    for server in servers:
213        os.unlink(_serverfile(server))
214
215def meta_data_cron(args, options):
216    print """<?xml version="1.0"?>
217<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
218<resource-agent name="hacron" version="0.1">
219<version>1.0</version>
220
221<longdesc lang="en">
222This is the high-availability cron manager.  It uses an extremely overpowered
223clustering solution to make it so that people can have their crontabs.  Yay.
224</longdesc>
225<shortdesc lang="en">HA Cron</shortdesc>
226
227<parameters>
228<parameter name="cron_root" required="1">
229<longdesc lang="en">
230Base directory for storage of crontabs and server information.
231</longdesc>
232<shortdesc lang="en">Cron base directory</shortdesc>
233<content type="string" />
234</parameter>
235</parameters>
236
237<actions>
238<action name="start"        timeout="90" />
239<action name="stop"         timeout="100" />
240<action name="monitor"      timeout="20" interval="10" depth="0" start-delay="0" />
241<action name="reload"       timeout="90" />
242<action name="meta-data"    timeout="5" />
243<action name="validate-all"   timeout="30" />
244</actions>
245</resource-agent>
246"""
247    return OCF_SUCCESS
248
249def usage(parser):
250    parser.print_help()
251    return 1
252
253def _set_globals(args, options):
254    global HOSTNAME, CRONROOT, CRONSPOOL_DIR, SERVER_DIR, \
255        HA_RSCTMP, OCF_RESOURCE_INSTANCE
256    if options.development:
257        logging.basicConfig(level=logging.DEBUG)
258    else:
259        if HA_LOGD:
260            handler = HaLogHandler('hacron')
261        else:
262            handler = logging.handlers.SysLogHandler('/dev/log')
263        formatter = logging.Formatter("%(module)s: %(levelname)s %(message)s")
264        handler.setLevel(logging.INFO)
265        handler.setFormatter(formatter)
266        logger.addHandler(handler)
267    HOSTNAME = options.server or os.environ.get('HA_CURHOST') or socket.gethostname()
268    CRONROOT = options.cronroot or os.environ.get('OCF_RESKEY_cron_root')
269    if not CRONROOT:
270        raise HacronError(OCF_ERR_CONFIGURED, 'No cron_root specified.')
271    CRONSPOOL_DIR = path.join(CRONROOT, 'server-cronspools')
272    SERVER_DIR = path.join(CRONROOT, 'servers')
273    HA_RSCTMP = os.environ.get('HA_RSCTMP', '/tmp')
274    OCF_RESOURCE_INSTANCE = os.environ.get('OCF_RESOURCE_INSTANCE', 'default')
275    return OCF_SUCCESS
276
277def main():
278    usage_str = """usage: %%prog [-s server] [-c cronroot] [-d] cmd
279
280Script for starting and stopping cron in a multiserver environment.
281One server is designated the master.
282
283== HA available commands: ==
284start: Make this server into the master and reload crond.
285reload: Same as start.
286stop: Demote this server to a spare and reload crond.
287monitor: Indicate whether this server is successfully the master.
288validate-all: Make sure that things look right and this server is
289  ready to be promoted to master.
290meta-data: Print out the XML meta data for this service
291
292== User-only commands: ==
293setup: Create the folders, etc. necessary for running hacron.
294remove-servers server1 server2 ...: Take a list of servers out of the
295  list of available ones.
296    """
297    parser = optparse.OptionParser(usage=usage_str)
298    parser.add_option("-s", "--server",
299                      action="store", dest="server",
300                      default=None,
301                      help="choose which server to run script as")
302    parser.add_option("-c", "--cronroot",
303                      action="store", dest="cronroot",
304                      default=None,
305                      help="pick root of cron dir")
306    parser.add_option("-d", "--development",
307                      action="store_true", dest="development",
308                      default=False,
309                      help="run in development mode")
310    (options, args) = parser.parse_args()
311    if len(args) < 1:
312        return usage(parser)
313    command = args[0]
314    args = args[1:]
315
316    if command == 'meta-data':
317        return meta_data_cron(args, options)
318
319    try:
320        _set_globals(args, options)
321    except HacronError, e:
322        logger.error(e.msg)
323        return e.errno
324
325    with lock('%s/hacron-%s.lock' % (HA_RSCTMP, OCF_RESOURCE_INSTANCE)):
326        if command == 'start':
327            return start_cron(args, options)
328        elif command == 'reload':
329            return start_cron(args, options)
330        elif command == 'stop':
331            return stop_cron(args, options)
332        elif command == 'monitor':
333            return monitor_cron(args, options)
334        elif command == 'validate-all':
335            return validate_all_cron(args, options)
336        elif command == 'setup':
337            return setup(args, options)
338        elif command == 'remove-servers':
339            return remove_servers(args, options)
340        else:
341            usage(parser)
342            return OCF_ERR_UNIMPLEMENTED
343
344if __name__ == '__main__':
345    try:
346        ret = main()
347    except Exception, e:
348        logger.error('exception from main: %s' % e)
349        ret = OCF_ERR_GENERIC
350        raise
351    sys.exit(ret)
Note: See TracBrowser for help on using the repository browser.