Ignore:
Timestamp:
Feb 13, 2010, 5:21:34 PM (15 years ago)
Author:
gdb
Message:
Another pass over hacron
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/server/common/oursrc/hacron/hacron

    r1457 r1466  
    11#!/usr/bin/env python
    2 from __future__ import with_statement
    3 
    42import glob
    53import logging.handlers
     4import fcntl
    65import optparse
    76import os
     
    2423logger = logging.getLogger('cron')
    2524
    26 import os
    27 import subprocess
    28 
    29 HA_LOGD = os.environ.get('HA_LOGD') == 'xyes'
    30 
     25HA_LOGD = os.environ.get('HA_LOGD') == 'yes'
     26
     27class HacronError(Exception):
     28    def __init__(self, errno, msg='Something went wrong'):
     29        self.errno = errno
     30        self.msg = msg
     31   
    3132class HaLogHandler(logging.Handler):
    3233    """
     
    5556
    5657class lock(object):
    57     def __init__(self, name):
    58         self.name = name
     58    def __init__(self, filename):
     59        self.filename = filename
     60        if not _touch(filename):
     61            raise
    5962
    6063    def __enter__(self):
    61         tries = 0
    62         while True:
    63             try:
    64                 self.lock = os.open(self.name, os.O_RDWR | os.O_CREAT | os.O_EXCL)
    65             except OSError:
    66                 logger.error('Could not acquire lock %s.  Sleeping...' % self.name)
    67                 time.sleep(0.5)
    68                 tries += 1
    69                 if not tries % 60:
    70                     logger.error("Waited too long; got bored.  Clearing lock %s." % self.name)
    71                     _remove(self.name)
    72             else:
    73                 break
     64        f = open(self.filename)
     65        fcntl.flock(f, fcntl.LOCK_EX)
    7466           
    7567    def __exit__(self, type, value, traceback):
    76         os.close(self.lock)
    77         _remove(self.name)
     68        f = open(self.filename)
     69        fcntl.flock(f, fcntl.LOCK_UN)
    7870       
    7971def _touch(path):
     
    118110    return path.join(CRONSPOOL_DIR, _suffix(server, 'cronspool'))
    119111
    120 def _server_exists(server):
    121     return path.exists(path.join(SERVER_DIR, server))
    122 
    123112def _serverfile(server):
    124113    return path.join(SERVER_DIR, server)
     
    132121    return path.islink(crondir)
    133122
     123def _restart_crond(args, options):
     124    # TODO: insert correct cmd here.  Also, should we capture and log
     125    # stdout?
     126    if options.development:
     127        cmd = ['echo', 'called crond reset']
     128    else:
     129        cmd = ['service', 'crond', 'reload']
     130    subprocess.check_call(cmd)
     131
    134132def start_cron(args, options):
    135133    if not _touch(_serverfile(HOSTNAME)):
    136134        return OCF_ERR_CONFIGURED
     135    elif _is_master(HOSTNAME):
     136        logger.error('%s is already the master!' % HOSTNAME)
     137        return OCF_SUCCESS
    137138
    138139    logger.info('Starting %s' % HOSTNAME)
    139     if _is_master(HOSTNAME):
    140         logger.error('%s is already the master!' % HOSTNAME)
    141140    for server in _servers():
    142141        crondir = _crondir(server)
    143142        if server == HOSTNAME:
    144             _remove(crondir)
     143            # Get rid of current crondir, and leave if that fails.
     144            if not _remove(crondir):
     145                logger.error("Could not remove dummy cronspool dir %s" % crondir)
     146                return OCF_ERR_GENERIC
    145147            os.symlink('../cronspool', crondir)
    146148            logger.info('Created master symlink %s' % crondir)
     
    152154                _mkdir(crondir)
    153155                logger.info('Created slave dummy directory %s' % crondir)
    154 
    155     if CRON_RESTART_COMMAND:
    156         ret = subprocess.call(CRON_RESTART_COMMAND)
    157         if ret:
    158             logger.error('Cron restart exited with return code %d' % ret)
    159             return OCF_ERR_GENERIC
    160         else:
    161             logger.info('Restarted crond')
     156    try:
     157        _restart_crond()
     158    except OSError, e:
     159        logger.error('Cron restart exited with return code %d' % e.errno)
     160        return OCF_ERR_GENERIC
     161    else:
     162        logger.info('Restarted crond')
    162163    return OCF_SUCCESS
    163164
    164165def stop_cron(args, options):
     166    """Stop cron."""
    165167    if not _is_master(HOSTNAME):
    166168        logger.error('I am not the master!')
     169        return OCF_NOT_RUNNING
    167170    else:
    168171        crondir = _crondir(HOSTNAME)
     
    170173        _remove(crondir)
    171174        _mkdir(crondir)
    172     return OCF_SUCCESS
     175        # TODO: should we do something else here?
     176        try:
     177            _restart_crond()
     178        except OSError, e:
     179            logger.error('Cron restart exited with return code %d' % e.errno)
     180            return OCF_ERR_GENERIC
     181        else:
     182            logger.info('Restarted crond')
     183        return OCF_SUCCESS
    173184
    174185def monitor_cron(args, options):
     186    """Check whether cron is running.  For now just makes sure that the
     187    current machine is the master, although this should likely be fixed."""
    175188    if _is_master(HOSTNAME):
    176189        return OCF_SUCCESS
     
    182195        logger.error('Could not touch %s' % _serverfile(HOSTNAME))
    183196        return OCF_GENERIC_ERR
    184     if not path.exists(CRONSPOOL_DIR):
     197    elif not path.exists(CRONSPOOL_DIR):
    185198        return OCF_GENERIC_ERR
     199    else:
     200        return OCF_SUCCESS
    186201
    187202def setup(args, options):
     
    193208            logger.info('Already exists: %s' % d)
    194209
    195 def add_servers(servers, options):
    196     for server in servers:
    197         _touch(_serverfile(server))
    198 
    199210def remove_servers(servers, options):
     211    """Remove servers from the list of available ones."""
    200212    for server in servers:
    201213        os.unlink(_serverfile(server))
     
    219231</longdesc>
    220232<shortdesc lang="en">Cron base directory</shortdesc>
    221 <content type="string" />
    222 </parameter>
    223 
    224 <parameter name="cron_restart_cmd">
    225 <longdesc lang="en">
    226 Command to restart cron.
    227 </longdesc>
    228 <shortdesc lang="en">Restart cron cmd</shortdesc>
    229233<content type="string" />
    230234</parameter>
     
    248252
    249253def _set_globals(args, options):
    250     global HOSTNAME, CRONROOT, CRONSPOOL_DIR, SERVER_DIR, CRON_RESTART_COMMAND, \
     254    global HOSTNAME, CRONROOT, CRONSPOOL_DIR, SERVER_DIR, \
    251255        HA_RSCTMP, OCF_RESOURCE_INSTANCE
    252256    if options.development:
     
    264268    CRONROOT = options.cronroot or os.environ.get('OCF_RESKEY_cron_root')
    265269    if not CRONROOT:
    266         logging.error('No cron_root specified.')
    267         return OCF_ERR_CONFIGURED
     270        raise HacronError(OCF_ERR_CONFIGURED, 'No cron_root specified.')
    268271    CRONSPOOL_DIR = path.join(CRONROOT, 'server-cronspools')
    269272    SERVER_DIR = path.join(CRONROOT, 'servers')
    270     CRON_RESTART_COMMAND = options.cron_restart or os.environ.get('OCF_RESKEY_cron_restart_cmd')
    271 
    272273    HA_RSCTMP = os.environ.get('HA_RSCTMP', '/tmp')
    273274    OCF_RESOURCE_INSTANCE = os.environ.get('OCF_RESOURCE_INSTANCE', 'default')
     
    275276
    276277def main():
    277     cmds = ['start', 'reload', 'stop', 'monitor', 'validate-all', 'setup',
    278             'remove-servers', 'meta-data']
    279     usage_str = "usage: %%prog [%s]" % '|'.join(cmds)
     278    usage_str = """usage: %%prog [-s server] [-c cronroot] [-d] cmd
     279
     280Script for starting and stopping cron in a multiserver environment.
     281One server is designated the master.
     282
     283== HA available commands: ==
     284start: Make this server into the master and reload crond.
     285reload: Same as start.
     286stop: Demote this server to a spare and reload crond.
     287monitor: Indicate whether this server is successfully the master.
     288validate-all: Make sure that things look right and this server is
     289  ready to be promoted to master.
     290meta-data: Print out the XML meta data for this service
     291
     292== User-only commands: ==
     293setup: Create the folders, etc. necessary for running hacron.
     294remove-servers server1 server2 ...: Take a list of servers out of the
     295  list of available ones.
     296    """
    280297    parser = optparse.OptionParser(usage=usage_str)
    281298    parser.add_option("-s", "--server",
     
    290307                      action="store_true", dest="development",
    291308                      default=False,
    292                       help="run in production")
    293     parser.add_option("-r", "--cron-restart",
    294                       action="store", dest="cron_restart",
    295                       default=None,
    296                       help="run in production")
     309                      help="run in development mode")
    297310    (options, args) = parser.parse_args()
    298311    if len(args) < 1:
     
    303316    if command == 'meta-data':
    304317        return meta_data_cron(args, options)
    305     globals_status = _set_globals(args, options)
     318
     319    try:
     320        _set_globals(args, options)
     321    except HacronError, e:
     322        logger.error(e.msg)
     323        return e.errno
     324
    306325    with lock('%s/hacron-%s.lock' % (HA_RSCTMP, OCF_RESOURCE_INSTANCE)):
    307         if globals_status:
    308             return globals_status
    309326        if command == 'start':
    310327            return start_cron(args, options)
     
    319336        elif command == 'setup':
    320337            return setup(args, options)
    321         elif command == 'add-servers':
     338        elif command == 'remove-servers':
    322339            return remove_servers(args, options)
    323340        else:
Note: See TracChangeset for help on using the changeset viewer.