From: Edward Z. Yang Date: Sat, 27 Jun 2009 08:05:01 +0000 (-0400) Subject: Knock off a whole bunch of TODO items. X-Git-Url: https://scripts.mit.edu/gitweb/wizard.git/commitdiff_plain/53b2bb2bcfedf9c8cb4e94c7ce8cbeb40bf033da Knock off a whole bunch of TODO items. Signed-off-by: Edward Z. Yang --- diff --git a/TODO b/TODO index 4b2efbc..078286b 100644 --- a/TODO +++ b/TODO @@ -2,22 +2,13 @@ The Git Autoinstaller TODO NOW: -- Something needs to be done if disk quota is exceeded: - - Catch the OSError and throw a domain-specific error - so massmigrate can deal gracefully - - Perform an added memory calculation, check this against - remaining quotai, and bail out if it's within some - percentage of their remaining quota - - Checks should also be performed against the partition - X with the new --shared flag this may not be necessary - as repos weighs less than 200K +- Not quite sure what I'm going to do to deal with the fact that + there will be lower priviledged jobs trying to write to the logs. + Perhaps the best way to do this is stick it in tmp and chmod + it liberally. - Should write to a "processed" file to make resuming with unexpected failure faster (possibly should be database if parallelized). Remember that each subprocess will be a fork -- Figure out IPC logging. -- Make version directory also work with a location (with no - colon) format; this will make making fake "testbed" directories - easier to do. Also let it accept a file, instead of a directory. - Check how many autoinstalls are missing w bits for daemon.scripts (this would need pyafs) - Run parallel-find.pl @@ -44,6 +35,28 @@ NOTES: means they have basically ~no space footprint. However, it also means that /mit/scripts/wizard/srv MUST NOT lose revs. +- Full fledged logging options. Namely: + x all loggers (delay implementing this until we actually have debug stmts) + - default is WARNING + - debug => loglevel = DEBUG + x stdout logger + - default is WARNING (see below for exception) + - verbose => loglevel = INFO + x file logger (only allowed for serial processing) + - default is OFF + - log-file => loglevel = INFO + x database logger (necessary for parallel processing, not implemented) + - default is OFF + - log-db => loglevel = INFO + +- More on the database logger: it will be very simple with one + table named `logs` in SQLite, with columns: `job`, `level`, + `message`. Job identifies the subprocess/thread that emitted + the log, so things can be correlated together. We will then + have `wizard dump` which takes a database like this and dumps + it into a file logger type file. The database may also store + a queue like structure which can be used to coordinate jobs. + OVERALL PLAN: * Some parts of the infrastructure will not be touched, although I plan diff --git a/bin/wizard b/bin/wizard index 8b1597f..7170c7a 100755 --- a/bin/wizard +++ b/bin/wizard @@ -9,7 +9,7 @@ sys.path.insert(0,os.path.abspath(os.path.join(__file__,'../../lib'))) import wizard.command def main(): - usage = """usage: %prog [-d|--version-dir] COMMAND [ARGS] + usage = """usage: %prog [-s|--versions] COMMAND [ARGS] Wizard is a Git-based autoinstall management system for scripts. @@ -22,22 +22,13 @@ Its commands are: See '%prog help COMMAND' for more information on a specific command.""" parser = optparse.OptionParser(usage) - parser.add_option("-d", "--version-dir", dest="version_dir", + parser.disable_interspersed_args() + parser.add_option("-s", "--versions", dest="versions", default="/afs/athena.mit.edu/contrib/scripts/sec-tools/store/versions", - help="Location of parallel-find output") + help="Location of parallel-find output directory, or a file containing a newline separated list of 'all autoinstalls' (for testing).") # Find the end of the "global" options - i = 1 - try: - while not sys.argv[i] or sys.argv[i][0] == '-': - if sys.argv[i] == "-h" or sys.argv[i] == "--help": - parser.print_help() - raise SystemExit(-1) - i += 1 - except IndexError: - parser.print_help() - raise SystemExit(-1) - options, args = parser.parse_args(sys.argv[1:i+1]) - rest_argv = sys.argv[i+1:] + options, args = parser.parse_args() + rest_argv = args[1:] command = args[0] # shouldn't fail if command == "help": try: diff --git a/lib/wizard/command/_base.py b/lib/wizard/command/_base.py index 2ba6e2f..9303269 100644 --- a/lib/wizard/command/_base.py +++ b/lib/wizard/command/_base.py @@ -12,14 +12,40 @@ def makeLogger(options): logger = logging.getLogger("main") logger.setLevel(logging.INFO) stdout = logging.StreamHandler(sys.stdout) + stdout.setFormatter(logging.Formatter(" " * int(options.indent) + '%(message)s')) logger.addHandler(stdout) - if options.verbose: - logger.verbose = True + if options.log_file: + file = logging.FileHandler(options.log_file) + logger.addHandler(file) + if options.debug: + logger.setLevel(logging.DEBUG) else: - if not options.debug: stdout.setLevel(logging.ERROR) - if options.debug: logger.setLevel(logging.DEBUG) + stdout.setLevel(logging.WARNING) + if options.verbose or options.dry_run: + stdout.setLevel(logging.INFO) + if options.log_file: + file.setLevel(logging.INFO) return logger +def makeBaseArgs(options, **grab): + """Takes parsed options, and breaks them back into a command + line string that we can pass into a subcommand""" + args = [] + grab["log_file"] = "--log-file" + grab["debug"] = "--debug" + grab["verbose"] = "--verbose" + grab["indent"] = "--indent" + #grab["log_db"] = "--log-db" + for k,flag in grab.items(): + value = getattr(options, k) + if not value and k != "indent": continue + args.append(flag) + if type(value) is not bool: + if k == "indent": + value += 4 + args.append(str(value)) + return args + class NullLogHandler(logging.Handler): """Log handler that doesn't do anything""" def emit(self, record): @@ -33,6 +59,10 @@ class WizardOptionParser(optparse.OptionParser): default=False, help="Turns on verbose output") self.add_option("--debug", dest="debug", action="store_true", default=False, help="Turns on debugging output") + self.add_option("--log-file", dest="log_file", + default=None, help="Logs verbose output to file") + self.add_option("--indent", dest="indent", + default=0, help="Indents stdout, useful for nested calls") def parse_all(self, argv, logger): options, numeric_args = self.parse_args(argv) return options, numeric_args, logger and logger or makeLogger(options) diff --git a/lib/wizard/command/massmigrate.py b/lib/wizard/command/massmigrate.py index a796df2..a318069 100644 --- a/lib/wizard/command/massmigrate.py +++ b/lib/wizard/command/massmigrate.py @@ -1,7 +1,10 @@ import optparse +import os import wizard from wizard import deploy +from wizard import util +from wizard import shell from wizard.command import _base from wizard.command import migrate @@ -12,9 +15,7 @@ Mass migrates an application to the new repository format. Essentially equivalent to running '%prog migrate' on all autoinstalls for a particular application found by parallel-find, but with advanced reporting. - -NOTE: --verbose implies --no-parallelize, as it results in -output going to stdout/stderr.""" +""" parser = _base.WizardOptionParser(usage) parser.add_option("--no-parallelize", dest="no_parallelize", action="store_true", default=False, help="Turn off parallelization") @@ -28,32 +29,38 @@ output going to stdout/stderr.""" if options.verbose or options.dry_run: options.no_parallelize = True app = args[0] - base_args = [] - if options.verbose: base_args.append("--verbose") - if options.dry_run: base_args.append("--dry-run") - deploys = [] + errors = {} + sh = shell.Shell(logger) # dry run happens to lower down... + base_args = _base.makeBaseArgs(options, dry_run="--dry-run") + # check if we have root + uid = os.getuid() + user = None for line in deploy.getInstallLines(global_options): + # validate and filter the deployments try: d = deploy.Deployment.parse(line) except deploy.DeploymentParseError, deploy.NoSuchApplication: continue name = d.getApplication().name if name != app: continue - deploys.append(d) - # parallelization code would go here - errors = {} - for d in deploys: - sub_argv = base_args + [d.location] - logger.info("$ wizard migrate " + " ".join(sub_argv)) + # actual meat try: - migrate.main(sub_argv, global_options, logger) - except migrate.AlreadyMigratedError as e: - logger.info("Skipped already migrated %s" % d.location) - except wizard.Error as e: - name = e.__module__ + "." + e.__class__.__name__ - if name not in errors: errors[name] = [] - errors[name].append((d, e)) - logger.error("ERROR [%s] in %s" % (name, d.location)) - logger.info("Backtrace:\n" + str(e)) + if not uid: + # only attempt to change user if we're root. If we're + # not root, assume a permission friendly test environment. + user = util.get_dir_user(d.location) + sh.callAsUser(shell.wizard, "migrate", d.location, *base_args, + user = user) + except shell.PythonCallError as e: + if e.name == "wizard.command.migrate.AlreadyMigratedError": + logger.info("Skipped already migrated %s" % d.location) + elif e.name.startswith("wizard."): + name = e.name + if name not in errors: errors[name] = [] + errors[name].append(d) + logger.error("ERROR [%s] in %s" % (name, d.location)) + logger.info(e.stderr) + else: + raise e for name, deploys in errors.items(): logger.warning("ERROR [%s] from %d installs" % (name, len(deploys))) diff --git a/lib/wizard/command/migrate.py b/lib/wizard/command/migrate.py index f9bc487..70c2e3f 100644 --- a/lib/wizard/command/migrate.py +++ b/lib/wizard/command/migrate.py @@ -136,7 +136,7 @@ what repository and tag to use.""" try: tag = "v%s-scripts" % version.version sh.call("git", "--git-dir", repo, "rev-parse", tag) - except shell.CalledProcessError: + except shell.CallError: raise NoTagError(version) did_git_init = False did_git_checkout_scripts = False @@ -145,9 +145,14 @@ what repository and tag to use.""" sh.call("git", "--git-dir=.git", "init") did_git_init = True # configure our alternates (to save space and make this quick) - alternates = open(".git/objects/info/alternates", "w") - alternates.write(os.path.join(repo, "objects")) - alternates.close() + data = os.path.join(repo, "objects") + file = ".git/objects/info/alternates" + if not options.dry_run: + alternates = open(file, "w") + alternates.write(data) + alternates.close() + else: + logger.info("# create %s containing \"%s\"" % (file, data)) # configure our remote sh.call("git", "remote", "add", "origin", repo) # configure what would normally be set up on a 'git clone' for consistency @@ -165,11 +170,11 @@ what repository and tag to use.""" if options.verbose: try: sh.call("git", "status") - except shell.CalledProcessError: + except shell.CallError: pass try: sh.call("git", "diff") - except shell.CalledProcessError: + except shell.CallError: pass except: # this... is pretty bad diff --git a/lib/wizard/command/summary.py b/lib/wizard/command/summary.py index 11fc581..dc0cde9 100644 --- a/lib/wizard/command/summary.py +++ b/lib/wizard/command/summary.py @@ -90,6 +90,7 @@ Examples: if r: printer.chat("Found " + options.count_exists + " in " + d.location) printer.write() + print show for app in deploy.applications.values(): if app.name not in show: continue printer.write(app.report()) diff --git a/lib/wizard/deploy.py b/lib/wizard/deploy.py index a2c3069..7140c90 100644 --- a/lib/wizard/deploy.py +++ b/lib/wizard/deploy.py @@ -42,8 +42,10 @@ didn't contain enough fields. def getInstallLines(global_options): """Retrieves a list of lines from the version directory that can be passed to Deployment.parse()""" - vd = global_options.version_dir - return fileinput.input([vd + "/" + f for f in os.listdir(vd)]) + vs = global_options.versions + if os.path.isfile(vs): + return fileinput.input([vs]) + return fileinput.input([vs + "/" + f for f in os.listdir(vs)]) class Deployment(object): """Represents a deployment of an autoinstall; i.e. a concrete @@ -60,10 +62,11 @@ class Deployment(object): """Parses a line from the results of parallel-find.pl. This will work out of the box with fileinput, see getInstallLines()""" + line = line.rstrip() try: - location, deploydir = line.rstrip().split(":") + location, deploydir = line.split(":") except ValueError: - raise DeploymentParseError(line) + return Deployment(line) # lazy loaded version return Deployment(location, version=ApplicationVersion.parse(deploydir)) @staticmethod def fromDir(dir): diff --git a/lib/wizard/shell.py b/lib/wizard/shell.py index 3484163..9df5583 100644 --- a/lib/wizard/shell.py +++ b/lib/wizard/shell.py @@ -1,32 +1,54 @@ import subprocess import sys -class CalledProcessError(subprocess.CalledProcessError): - pass +import wizard as _wizard +from wizard import util + +wizard = sys.argv[0] + +class CallError(_wizard.Error): + def __init__(self, code, args, stdout, stderr): + self.code = code + self.args = args + self.stdout = stdout + self.stderr = stderr + def __str__(self): + return "CallError [%d]" % self.code + +class PythonCallError(CallError): + def __init__(self, code, args, stdout, stderr): + self.name = util.get_exception_name(stderr) + CallError.__init__(self, code, args, stdout, stderr) + def __str__(self): + return "PythonCallError [%s]" % self.name + +def is_python(args): + return args[0] == "python" or args[0] == wizard class Shell(object): """An advanced shell, with the ability to do dry-run and log commands""" def __init__(self, logger = False, dry = False): """ `logger` The logger - `dry` Whether or not to not run any commands, and just print""" + `dry` Don't run any commands, just print them""" self.logger = logger self.dry = dry - def call(self, *args): + def call(self, *args, **kwargs): + (python,) = ("python" in kwargs) and [kwargs["python"]] or [None] if self.dry or self.logger: self.logger.info("$ " + ' '.join(args)) if self.dry: return - proc = None - if self.logger: - if hasattr(self.logger, "verbose"): - # this is a special short-circuit to make redrawing - # output from Git work - proc = subprocess.Popen(args, stdout=sys.stdout, stderr=sys.stderr) - else: - proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - else: - proc = subprocess.Popen(args) - stdout, _ = proc.communicate() + if python is None and is_python(args): + python = True + proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = proc.communicate() if self.logger and stdout: self.logger.info(stdout) if proc.returncode: - raise CalledProcessError(proc.returncode, args) + if python: eclass = PythonCallError + else: eclass = CallError + raise eclass(proc.returncode, args, stdout, stderr) + return (stdout, stderr) + def callAsUser(self, *args, **kwargs): + user = ("user" in kwargs) and kwargs["user"] or None + if not user: return self.call(*args) + return self.call("sudo", "-u", user, *args, python=is_python(args)) diff --git a/lib/wizard/tests/deploy_test.py b/lib/wizard/tests/deploy_test.py index 79c8245..a1e4a37 100644 --- a/lib/wizard/tests/deploy_test.py +++ b/lib/wizard/tests/deploy_test.py @@ -13,13 +13,6 @@ def test_deployment_parse(): assert result.getVersion() == Version("1.11.0") assert result.getApplication().name == "mediawiki" -def test_deployment_parse_parseerror(): - try: - Deployment.parse("foo") - assert False - except DeploymentParseError: - pass - def test_deployment_parse_nosuchapplication(): try: Deployment.parse("a:/foo/obviouslybogus-1.11.0\n") diff --git a/lib/wizard/tests/util_test.py b/lib/wizard/tests/util_test.py index 9f765e2..ad77c88 100644 --- a/lib/wizard/tests/util_test.py +++ b/lib/wizard/tests/util_test.py @@ -1,5 +1,14 @@ +import traceback + from wizard.util import * +class MyError(Exception): + def __str__(self): + return """ + +ERROR: Foo +""" + def test_get_dir_user(): assert get_dir_user("/mit/ezyang/web_scripts/test-wiki") == "ezyang" @@ -7,5 +16,17 @@ def test_get_dir_user_locker(): assert get_dir_user("/mit/apo/web_scripts/") == "apo" def test_get_dir_url(): - assert get_dir_url("/mit/ezyang/web_scripts/foo",) == "http://ezyang.scripts.mit.edu/foo" + assert get_dir_url("/mit/ezyang/web_scripts/foo") == "http://ezyang.scripts.mit.edu/foo" + +def test_get_exception_name(): + try: + raise NotImplementedError + except NotImplementedError: + assert get_exception_name(traceback.format_exc()) == "NotImplementedError" + +def test_get_exception_name_withstr(): + try: + raise MyError + except MyError: + assert get_exception_name(traceback.format_exc()) == "MyError" diff --git a/lib/wizard/util.py b/lib/wizard/util.py index 97e2dde..dddbb34 100644 --- a/lib/wizard/util.py +++ b/lib/wizard/util.py @@ -1,21 +1,24 @@ -import pwd -import grp import os.path import ldap -def switch_user(name): - """Switches the process to the run level of a Scripts user. Once - you do this, you can't go back, so be sure to fork beforehand!""" - _, _, uid, gid, _, _, _ = pwd.getpwnam('ezyang') - nssgid = grp.getgrnam('nss-nonlocal-users')[2] - os.setgid(gid) - os.setgroups([gid, nssgid]) - os.setuid(uid) +def get_exception_name(output): + """Reads the stderr output of another Python command and grabs the + fully qualified exception name""" + lines = output.split("\n") + for line in lines[1:]: # skip the "traceback" line + line = line.rstrip() + if line[0] == ' ': continue + if line[-1] == ":": + return line[:-1] + else: + return line def get_dir_user(dir): """Finds the username of the person who owns this directory, via LDAP. Only works for directories under web_scripts""" - homedir, _, _ = os.path.realpath(dir).partition("/web_scripts") + dir = os.path.realpath(dir) + homedir, _, _ = dir.partition("/web_scripts") + if homedir == dir: return None con = ldap.initialize('ldap://scripts.mit.edu') return con.search_s( "ou=People,dc=scripts,dc=mit,dc=edu", # base