#!/usr/bin/env python from __future__ import with_statement import glob import logging.handlers import fcntl import optparse import os import socket import shutil import subprocess import sys import time from os import path OCF_SUCCESS=0 OCF_ERR_GENERIC=1 OCF_ERR_ARGS=2 OCF_ERR_UNIMPLEMENTED=3 OCF_ERR_PERM=4 OCF_ERR_INSTALLED=5 OCF_ERR_CONFIGURED=6 OCF_NOT_RUNNING=7 logger = logging.getLogger('cron') HA_LOGD = os.environ.get('HA_LOGD') == 'yes' class HacronError(Exception): def __init__(self, ocf_errno, msg='Something went wrong'): self.ocf_errno = ocf_errno self.msg = msg logger.error(msg) class HaLogHandler(logging.Handler): """ A handler class which writes to ha_logger. """ def __init__(self, ha_tag): """ Initialize the handler. ha_tag is the name of this resource. """ logging.Handler.__init__(self) self.ha_tag = ha_tag def emit(self, record): """ Emit a record. """ print 'Passed', record try: levelname = record.levelname msg = self.format(record) subprocess.call(['/usr/sbin/ha_logger', '-t', self.ha_tag, msg]) except (KeyboardInterrupt, SystemExit): raise except: self.handleError(record) class lock(object): def __init__(self, filename): self.filename = filename if not _touch(filename): raise def __enter__(self): f = open(self.filename) fcntl.flock(f, fcntl.LOCK_EX) def __exit__(self, type, value, traceback): f = open(self.filename) fcntl.flock(f, fcntl.LOCK_UN) def _touch(path): """Effectively touches a file. Returns true if successful, false otherwise""" try: open(path, 'a').close() except IOError: return False else: return True def _remove(dest): if not path.exists(dest) and not path.islink(dest): logger.error('Tried to remove nonexistant path %s' % dest) return True try: if path.isdir(dest): os.rmdir(dest) else: os.remove(dest) except OSError, e: logging.error('Could not remove %s: %s' % (dest, e)) return False else: return True def _mkdir(dir): try: os.mkdir(dir) except OSError, e: logging.error('Could not mkdir %s: %s' % (dir, e)) return False else: return True def _strip(name): """Strip off the file extension, and leading /'s, if they exist""" return path.splitext(path.basename(name))[0] def _suffix(name, suffix): return '%s.%s' % (name, suffix) def _crondir(server): return path.join(CRONSPOOL_DIR, _suffix(server, 'cronspool')) def _serverfile(server): return path.join(SERVER_DIR, server) def _servers(): """Get a list of the servers.""" return [_strip(f) for f in glob.glob(path.join(SERVER_DIR, '*'))] def _is_master(server): crondir = path.join(CRONSPOOL_DIR, _suffix(server, 'cronspool')) return path.islink(crondir) def _restart_crond(args, options): # TODO: insert correct cmd here. Also, should we capture and log # stdout? if options.development: cmd = ['echo', 'called crond reset'] else: cmd = ['service', 'crond', 'reload'] try: subprocess.check_call(cmd) except OSError, e: raise HacronError(OCF_ERR_GENERIC, 'Cron restart exited with return code %d' % e.errno) else: logger.info('Restarted crond') def start_cron(args, options): serverfile = _serverfile(HOSTNAME) if not _touch(serverfile): logger.error('Could not touch %s' % serverfile) return OCF_ERR_CONFIGURED elif _is_master(HOSTNAME): logger.error('%s is already the master!' % HOSTNAME) return OCF_SUCCESS logger.info('Starting %s' % HOSTNAME) for server in _servers(): crondir = _crondir(server) if server == HOSTNAME: # Get rid of current crondir, and leave if that fails. if not _remove(crondir): logger.error("Could not remove dummy cronspool dir %s" % crondir) return OCF_ERR_GENERIC os.symlink('../cronspool', crondir) logger.info('Created master symlink %s' % crondir) else: if path.islink(crondir): _remove(crondir) logger.info('Removed old master symlink: %s' % crondir) if not path.exists(crondir): _mkdir(crondir) logger.info('Created slave dummy directory %s' % crondir) try: _restart_crond(args, options) except HacronException, e: return e.ocf_errno return OCF_SUCCESS def stop_cron(args, options): """Stop cron.""" if not _is_master(HOSTNAME): logger.error('I am not the master!') return OCF_NOT_RUNNING else: crondir = _crondir(HOSTNAME) logger.info('Removing symlink %s' % crondir) _remove(crondir) _mkdir(crondir) # TODO: should we do something else here? try: _restart_crond(args, options) except HacronException, e: return e.ocf_errno return OCF_SUCCESS def monitor_cron(args, options): """Check whether cron is running. For now just makes sure that the current machine is the master, although this should likely be fixed.""" if _is_master(HOSTNAME): return OCF_SUCCESS else: return OCF_NOT_RUNNING def validate_all_cron(args, options): if not _touch(_serverfile(HOSTNAME)): logger.error('Could not touch %s' % _serverfile(HOSTNAME)) return OCF_ERR_GENERIC elif not path.exists(CRONSPOOL_DIR): return OCF_ERR_GENERIC else: return OCF_SUCCESS def setup(args, options): for d in [CRONSPOOL_DIR, SERVER_DIR]: if not path.exists(d): os.makedirs(d) logger.info('Created %s' % d) else: logger.info('Already exists: %s' % d) def remove_servers(servers, options): """Remove servers from the list of available ones.""" for server in servers: _remove(_serverfile(server)) _remove(_crondir(server)) logger.info('Removed %s from list of available ones' % server) def meta_data_cron(args, options): print """ 1.0 This is the high-availability cron manager. It uses an extremely overpowered clustering solution to make it so that people can have their crontabs. Yay. HA Cron Base directory for storage of crontabs and server information. Cron base directory """ return OCF_SUCCESS def usage(parser): parser.print_help() return 1 def _set_globals(args, options): global HOSTNAME, CRONROOT, CRONSPOOL_DIR, SERVER_DIR, \ HA_RSCTMP, OCF_RESOURCE_INSTANCE if options.development: logging.basicConfig(level=logging.DEBUG) else: if HA_LOGD: handler = HaLogHandler('hacron') else: handler = logging.handlers.SysLogHandler('/dev/log') formatter = logging.Formatter("%(module)s: %(levelname)s %(message)s") handler.setLevel(logging.INFO) handler.setFormatter(formatter) logger.addHandler(handler) HOSTNAME = options.server or os.environ.get('HA_CURHOST') or socket.gethostname() CRONROOT = options.cronroot or os.environ.get('OCF_RESKEY_cron_root') if not CRONROOT: raise HacronError(OCF_ERR_CONFIGURED, 'No cron_root specified.') CRONSPOOL_DIR = path.join(CRONROOT, 'server-cronspools') SERVER_DIR = path.join(CRONROOT, 'servers') HA_RSCTMP = os.environ.get('HA_RSCTMP', '/tmp') OCF_RESOURCE_INSTANCE = os.environ.get('OCF_RESOURCE_INSTANCE', 'default') return OCF_SUCCESS def main(): usage_str = """usage: %prog [-s server] [-c cronroot] [-d] cmd Script for starting and stopping cron in a multiserver environment. One server is designated the master. == HA available commands: == start: Make this server into the master and reload crond. reload: Same as start. stop: Demote this server to a spare and reload crond. monitor: Indicate whether this server is successfully the master. validate-all: Make sure that things look right and this server is ready to be promoted to master. meta-data: Print out the XML meta data for this service == User-only commands: == setup: Create the folders, etc. necessary for running hacron. remove-servers server1 server2 ...: Take a list of servers out of the list of available ones. """ parser = optparse.OptionParser(usage=usage_str) parser.add_option("-s", "--server", action="store", dest="server", default=None, help="choose which server to run script as") parser.add_option("-c", "--cronroot", action="store", dest="cronroot", default=None, help="pick root of cron dir") parser.add_option("-d", "--development", action="store_true", dest="development", default=False, help="run in development mode") (options, args) = parser.parse_args() if len(args) < 1: return usage(parser) command = args[0] args = args[1:] if command == 'meta-data': return meta_data_cron(args, options) try: _set_globals(args, options) except HacronError, e: return e.ocf_errno with lock('%s/hacron-%s.lock' % (HA_RSCTMP, OCF_RESOURCE_INSTANCE)): if command == 'start': return start_cron(args, options) elif command == 'reload': return start_cron(args, options) elif command == 'stop': return stop_cron(args, options) elif command == 'monitor': return monitor_cron(args, options) elif command == 'validate-all': return validate_all_cron(args, options) elif command == 'setup': return setup(args, options) elif command == 'remove-servers': return remove_servers(args, options) else: usage(parser) return OCF_ERR_UNIMPLEMENTED if __name__ == '__main__': try: ret = main() except Exception, e: logger.error('exception from main: %s' % e) ret = OCF_ERR_GENERIC raise sys.exit(ret)