From: Edward Z. Yang <ezyang@mit.edu>
Date: Sat, 27 Jun 2009 08:05:01 +0000 (-0400)
Subject: Knock off a whole bunch of TODO items.
X-Git-Url: https://scripts.mit.edu/gitweb/wizard.git/commitdiff_plain/53b2bb2bcfedf9c8cb4e94c7ce8cbeb40bf033da

Knock off a whole bunch of TODO items.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
---

diff --git a/TODO b/TODO
index 4b2efbc..078286b 100644
--- a/TODO
+++ b/TODO
@@ -2,22 +2,13 @@ The Git Autoinstaller
 
 TODO NOW:
 
-- Something needs to be done if disk quota is exceeded:
-    - Catch the OSError and throw a domain-specific error
-      so massmigrate can deal gracefully
-    - Perform an added memory calculation, check this against
-      remaining quotai, and bail out if it's within some
-      percentage of their remaining quota
-    - Checks should also be performed against the partition
-    X with the new --shared flag this may not be necessary
-      as repos weighs less than 200K
+- Not quite sure what I'm going to do to deal with the fact that
+  there will be lower priviledged jobs trying to write to the logs.
+  Perhaps the best way to do this is stick it in tmp and chmod
+  it liberally.
 - Should write to a "processed" file to make resuming with
   unexpected failure faster (possibly should be database if
   parallelized).  Remember that each subprocess will be a fork
-- Figure out IPC logging.
-- Make version directory also work with a location (with no
-  colon) format; this will make making fake "testbed" directories
-  easier to do.  Also let it accept a file, instead of a directory.
 - Check how many autoinstalls are missing w bits for
   daemon.scripts (this would need pyafs)
 - Run parallel-find.pl
@@ -44,6 +35,28 @@ NOTES:
   means they have basically ~no space footprint.  However, it
   also means that /mit/scripts/wizard/srv MUST NOT lose revs.
 
+- Full fledged logging options. Namely:
+  x all loggers (delay implementing this until we actually have debug stmts)
+    - default is WARNING
+    - debug     => loglevel = DEBUG
+  x stdout logger
+    - default is WARNING (see below for exception)
+    - verbose   => loglevel = INFO
+  x file logger (only allowed for serial processing)
+    - default is OFF
+    - log-file   => loglevel = INFO
+  x database logger (necessary for parallel processing, not implemented)
+    - default is OFF
+    - log-db    => loglevel = INFO
+
+- More on the database logger: it will be very simple with one
+  table named `logs` in SQLite, with columns: `job`, `level`,
+  `message`.  Job identifies the subprocess/thread that emitted
+  the log, so things can be correlated together.  We will then
+  have `wizard dump` which takes a database like this and dumps
+  it into a file logger type file.  The database may also store
+  a queue like structure which can be used to coordinate jobs.
+
 OVERALL PLAN:
 
 * Some parts of the infrastructure will not be touched, although I plan
diff --git a/bin/wizard b/bin/wizard
index 8b1597f..7170c7a 100755
--- a/bin/wizard
+++ b/bin/wizard
@@ -9,7 +9,7 @@ sys.path.insert(0,os.path.abspath(os.path.join(__file__,'../../lib')))
 import wizard.command
 
 def main():
-    usage = """usage: %prog [-d|--version-dir] COMMAND [ARGS]
+    usage = """usage: %prog [-s|--versions] COMMAND [ARGS]
 
 Wizard is a Git-based autoinstall management system for scripts.
 
@@ -22,22 +22,13 @@ Its commands are:
 See '%prog help COMMAND' for more information on a specific command."""
 
     parser = optparse.OptionParser(usage)
-    parser.add_option("-d", "--version-dir", dest="version_dir",
+    parser.disable_interspersed_args()
+    parser.add_option("-s", "--versions", dest="versions",
             default="/afs/athena.mit.edu/contrib/scripts/sec-tools/store/versions",
-            help="Location of parallel-find output")
+            help="Location of parallel-find output directory, or a file containing a newline separated list of 'all autoinstalls' (for testing).")
     # Find the end of the "global" options
-    i = 1
-    try:
-        while not sys.argv[i] or sys.argv[i][0] == '-':
-            if sys.argv[i] == "-h" or sys.argv[i] == "--help":
-                parser.print_help()
-                raise SystemExit(-1)
-            i += 1
-    except IndexError:
-        parser.print_help()
-        raise SystemExit(-1)
-    options, args = parser.parse_args(sys.argv[1:i+1])
-    rest_argv = sys.argv[i+1:]
+    options, args = parser.parse_args()
+    rest_argv = args[1:]
     command = args[0] # shouldn't fail
     if command == "help":
         try:
diff --git a/lib/wizard/command/_base.py b/lib/wizard/command/_base.py
index 2ba6e2f..9303269 100644
--- a/lib/wizard/command/_base.py
+++ b/lib/wizard/command/_base.py
@@ -12,14 +12,40 @@ def makeLogger(options):
     logger = logging.getLogger("main")
     logger.setLevel(logging.INFO)
     stdout = logging.StreamHandler(sys.stdout)
+    stdout.setFormatter(logging.Formatter(" " * int(options.indent) + '%(message)s'))
     logger.addHandler(stdout)
-    if options.verbose:
-        logger.verbose = True
+    if options.log_file:
+        file = logging.FileHandler(options.log_file)
+        logger.addHandler(file)
+    if options.debug:
+        logger.setLevel(logging.DEBUG)
     else:
-        if not options.debug: stdout.setLevel(logging.ERROR)
-    if options.debug: logger.setLevel(logging.DEBUG)
+        stdout.setLevel(logging.WARNING)
+        if options.verbose or options.dry_run:
+            stdout.setLevel(logging.INFO)
+        if options.log_file:
+            file.setLevel(logging.INFO)
     return logger
 
+def makeBaseArgs(options, **grab):
+    """Takes parsed options, and breaks them back into a command
+    line string that we can pass into a subcommand"""
+    args = []
+    grab["log_file"] = "--log-file"
+    grab["debug"] = "--debug"
+    grab["verbose"] = "--verbose"
+    grab["indent"] = "--indent"
+    #grab["log_db"] = "--log-db"
+    for k,flag in grab.items():
+        value = getattr(options, k)
+        if not value and k != "indent": continue
+        args.append(flag)
+        if type(value) is not bool:
+            if k == "indent":
+                value += 4
+            args.append(str(value))
+    return args
+
 class NullLogHandler(logging.Handler):
     """Log handler that doesn't do anything"""
     def emit(self, record):
@@ -33,6 +59,10 @@ class WizardOptionParser(optparse.OptionParser):
                 default=False, help="Turns on verbose output")
         self.add_option("--debug", dest="debug", action="store_true",
                 default=False, help="Turns on debugging output")
+        self.add_option("--log-file", dest="log_file",
+                default=None, help="Logs verbose output to file")
+        self.add_option("--indent", dest="indent",
+                default=0, help="Indents stdout, useful for nested calls")
     def parse_all(self, argv, logger):
         options, numeric_args = self.parse_args(argv)
         return options, numeric_args, logger and logger or makeLogger(options)
diff --git a/lib/wizard/command/massmigrate.py b/lib/wizard/command/massmigrate.py
index a796df2..a318069 100644
--- a/lib/wizard/command/massmigrate.py
+++ b/lib/wizard/command/massmigrate.py
@@ -1,7 +1,10 @@
 import optparse
+import os
 
 import wizard
 from wizard import deploy
+from wizard import util
+from wizard import shell
 from wizard.command import _base
 from wizard.command import migrate
 
@@ -12,9 +15,7 @@ Mass migrates an application to the new repository format.
 Essentially equivalent to running '%prog migrate' on all
 autoinstalls for a particular application found by parallel-find,
 but with advanced reporting.
-
-NOTE: --verbose implies --no-parallelize, as it results in
-output going to stdout/stderr."""
+"""
     parser = _base.WizardOptionParser(usage)
     parser.add_option("--no-parallelize", dest="no_parallelize", action="store_true",
             default=False, help="Turn off parallelization")
@@ -28,32 +29,38 @@ output going to stdout/stderr."""
     if options.verbose or options.dry_run:
         options.no_parallelize = True
     app = args[0]
-    base_args = []
-    if options.verbose: base_args.append("--verbose")
-    if options.dry_run: base_args.append("--dry-run")
-    deploys = []
+    errors = {}
+    sh = shell.Shell(logger) # dry run happens to lower down...
+    base_args = _base.makeBaseArgs(options, dry_run="--dry-run")
+    # check if we have root
+    uid = os.getuid()
+    user = None
     for line in deploy.getInstallLines(global_options):
+        # validate and filter the deployments
         try:
             d = deploy.Deployment.parse(line)
         except deploy.DeploymentParseError, deploy.NoSuchApplication:
             continue
         name = d.getApplication().name
         if name != app: continue
-        deploys.append(d)
-    # parallelization code would go here
-    errors = {}
-    for d in deploys:
-        sub_argv = base_args + [d.location]
-        logger.info("$ wizard migrate " + " ".join(sub_argv))
+        # actual meat
         try:
-            migrate.main(sub_argv, global_options, logger)
-        except migrate.AlreadyMigratedError as e:
-            logger.info("Skipped already migrated %s" % d.location)
-        except wizard.Error as e:
-            name = e.__module__ + "." + e.__class__.__name__
-            if name not in errors: errors[name] = []
-            errors[name].append((d, e))
-            logger.error("ERROR [%s] in %s" % (name, d.location))
-            logger.info("Backtrace:\n" + str(e))
+            if not uid:
+                # only attempt to change user if we're root. If we're
+                # not root, assume a permission friendly test environment.
+                user = util.get_dir_user(d.location)
+            sh.callAsUser(shell.wizard, "migrate", d.location, *base_args,
+                          user = user)
+        except shell.PythonCallError as e:
+            if e.name == "wizard.command.migrate.AlreadyMigratedError":
+                logger.info("Skipped already migrated %s" % d.location)
+            elif e.name.startswith("wizard."):
+                name = e.name
+                if name not in errors: errors[name] = []
+                errors[name].append(d)
+                logger.error("ERROR [%s] in %s" % (name, d.location))
+                logger.info(e.stderr)
+            else:
+                raise e
     for name, deploys in errors.items():
         logger.warning("ERROR [%s] from %d installs" % (name, len(deploys)))
diff --git a/lib/wizard/command/migrate.py b/lib/wizard/command/migrate.py
index f9bc487..70c2e3f 100644
--- a/lib/wizard/command/migrate.py
+++ b/lib/wizard/command/migrate.py
@@ -136,7 +136,7 @@ what repository and tag to use."""
     try:
         tag = "v%s-scripts" % version.version
         sh.call("git", "--git-dir", repo, "rev-parse", tag)
-    except shell.CalledProcessError:
+    except shell.CallError:
         raise NoTagError(version)
     did_git_init = False
     did_git_checkout_scripts = False
@@ -145,9 +145,14 @@ what repository and tag to use."""
         sh.call("git", "--git-dir=.git", "init")
         did_git_init = True
         # configure our alternates (to save space and make this quick)
-        alternates = open(".git/objects/info/alternates", "w")
-        alternates.write(os.path.join(repo, "objects"))
-        alternates.close()
+        data = os.path.join(repo, "objects")
+        file = ".git/objects/info/alternates"
+        if not options.dry_run:
+            alternates = open(file, "w")
+            alternates.write(data)
+            alternates.close()
+        else:
+            logger.info("# create %s containing \"%s\"" % (file, data))
         # configure our remote
         sh.call("git", "remote", "add", "origin", repo)
         # configure what would normally be set up on a 'git clone' for consistency
@@ -165,11 +170,11 @@ what repository and tag to use."""
         if options.verbose:
             try:
                 sh.call("git", "status")
-            except shell.CalledProcessError:
+            except shell.CallError:
                 pass
             try:
                 sh.call("git", "diff")
-            except shell.CalledProcessError:
+            except shell.CallError:
                 pass
     except:
         # this... is pretty bad
diff --git a/lib/wizard/command/summary.py b/lib/wizard/command/summary.py
index 11fc581..dc0cde9 100644
--- a/lib/wizard/command/summary.py
+++ b/lib/wizard/command/summary.py
@@ -90,6 +90,7 @@ Examples:
             if r:
                 printer.chat("Found " + options.count_exists + " in " + d.location)
     printer.write()
+    print show
     for app in deploy.applications.values():
         if app.name not in show: continue
         printer.write(app.report())
diff --git a/lib/wizard/deploy.py b/lib/wizard/deploy.py
index a2c3069..7140c90 100644
--- a/lib/wizard/deploy.py
+++ b/lib/wizard/deploy.py
@@ -42,8 +42,10 @@ didn't contain enough fields.
 def getInstallLines(global_options):
     """Retrieves a list of lines from the version directory that
     can be passed to Deployment.parse()"""
-    vd = global_options.version_dir
-    return fileinput.input([vd + "/" + f for f in os.listdir(vd)])
+    vs = global_options.versions
+    if os.path.isfile(vs):
+        return fileinput.input([vs])
+    return fileinput.input([vs + "/" + f for f in os.listdir(vs)])
 
 class Deployment(object):
     """Represents a deployment of an autoinstall; i.e. a concrete
@@ -60,10 +62,11 @@ class Deployment(object):
         """Parses a line from the results of parallel-find.pl.
         This will work out of the box with fileinput, see
         getInstallLines()"""
+        line = line.rstrip()
         try:
-            location, deploydir = line.rstrip().split(":")
+            location, deploydir = line.split(":")
         except ValueError:
-            raise DeploymentParseError(line)
+            return Deployment(line) # lazy loaded version
         return Deployment(location, version=ApplicationVersion.parse(deploydir))
     @staticmethod
     def fromDir(dir):
diff --git a/lib/wizard/shell.py b/lib/wizard/shell.py
index 3484163..9df5583 100644
--- a/lib/wizard/shell.py
+++ b/lib/wizard/shell.py
@@ -1,32 +1,54 @@
 import subprocess
 import sys
 
-class CalledProcessError(subprocess.CalledProcessError):
-    pass
+import wizard as _wizard
+from wizard import util
+
+wizard = sys.argv[0]
+
+class CallError(_wizard.Error):
+    def __init__(self, code, args, stdout, stderr):
+        self.code = code
+        self.args = args
+        self.stdout = stdout
+        self.stderr = stderr
+    def __str__(self):
+        return "CallError [%d]" % self.code
+
+class PythonCallError(CallError):
+    def __init__(self, code, args, stdout, stderr):
+        self.name = util.get_exception_name(stderr)
+        CallError.__init__(self, code, args, stdout, stderr)
+    def __str__(self):
+        return "PythonCallError [%s]" % self.name
+
+def is_python(args):
+    return args[0] == "python" or args[0] == wizard
 
 class Shell(object):
     """An advanced shell, with the ability to do dry-run and log commands"""
     def __init__(self, logger = False, dry = False):
         """ `logger`    The logger
-            `dry`       Whether or not to not run any commands, and just print"""
+            `dry`       Don't run any commands, just print them"""
         self.logger = logger
         self.dry = dry
-    def call(self, *args):
+    def call(self, *args, **kwargs):
+        (python,) = ("python" in kwargs) and [kwargs["python"]] or [None]
         if self.dry or self.logger:
             self.logger.info("$ " + ' '.join(args))
         if self.dry: return
-        proc = None
-        if self.logger:
-            if hasattr(self.logger, "verbose"):
-                # this is a special short-circuit to make redrawing
-                # output from Git work
-                proc = subprocess.Popen(args, stdout=sys.stdout, stderr=sys.stderr)
-            else:
-                proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-        else:
-            proc = subprocess.Popen(args)
-        stdout, _ = proc.communicate()
+        if python is None and is_python(args):
+            python = True
+        proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout, stderr = proc.communicate()
         if self.logger and stdout: self.logger.info(stdout)
         if proc.returncode:
-            raise CalledProcessError(proc.returncode, args)
+            if python: eclass = PythonCallError
+            else: eclass = CallError
+            raise eclass(proc.returncode, args, stdout, stderr)
+        return (stdout, stderr)
+    def callAsUser(self, *args, **kwargs):
+        user = ("user" in kwargs) and kwargs["user"] or None
+        if not user: return self.call(*args)
+        return self.call("sudo", "-u", user, *args, python=is_python(args))
 
diff --git a/lib/wizard/tests/deploy_test.py b/lib/wizard/tests/deploy_test.py
index 79c8245..a1e4a37 100644
--- a/lib/wizard/tests/deploy_test.py
+++ b/lib/wizard/tests/deploy_test.py
@@ -13,13 +13,6 @@ def test_deployment_parse():
     assert result.getVersion() == Version("1.11.0")
     assert result.getApplication().name == "mediawiki"
 
-def test_deployment_parse_parseerror():
-    try:
-        Deployment.parse("foo")
-        assert False
-    except DeploymentParseError:
-        pass
-
 def test_deployment_parse_nosuchapplication():
     try:
         Deployment.parse("a:/foo/obviouslybogus-1.11.0\n")
diff --git a/lib/wizard/tests/util_test.py b/lib/wizard/tests/util_test.py
index 9f765e2..ad77c88 100644
--- a/lib/wizard/tests/util_test.py
+++ b/lib/wizard/tests/util_test.py
@@ -1,5 +1,14 @@
+import traceback
+
 from wizard.util import *
 
+class MyError(Exception):
+    def __str__(self):
+        return """
+
+ERROR: Foo
+"""
+
 def test_get_dir_user():
     assert get_dir_user("/mit/ezyang/web_scripts/test-wiki") == "ezyang"
 
@@ -7,5 +16,17 @@ def test_get_dir_user_locker():
     assert get_dir_user("/mit/apo/web_scripts/") == "apo"
 
 def test_get_dir_url():
-    assert get_dir_url("/mit/ezyang/web_scripts/foo",) == "http://ezyang.scripts.mit.edu/foo"
+    assert get_dir_url("/mit/ezyang/web_scripts/foo") == "http://ezyang.scripts.mit.edu/foo"
+
+def test_get_exception_name():
+    try:
+        raise NotImplementedError
+    except NotImplementedError:
+        assert get_exception_name(traceback.format_exc()) == "NotImplementedError"
+
+def test_get_exception_name_withstr():
+    try:
+        raise MyError
+    except MyError:
+        assert get_exception_name(traceback.format_exc()) == "MyError"
 
diff --git a/lib/wizard/util.py b/lib/wizard/util.py
index 97e2dde..dddbb34 100644
--- a/lib/wizard/util.py
+++ b/lib/wizard/util.py
@@ -1,21 +1,24 @@
-import pwd
-import grp
 import os.path
 import ldap
 
-def switch_user(name):
-    """Switches the process to the run level of a Scripts user. Once
-    you do this, you can't go back, so be sure to fork beforehand!"""
-    _, _, uid, gid, _, _, _ = pwd.getpwnam('ezyang')
-    nssgid = grp.getgrnam('nss-nonlocal-users')[2]
-    os.setgid(gid)
-    os.setgroups([gid, nssgid])
-    os.setuid(uid)
+def get_exception_name(output):
+    """Reads the stderr output of another Python command and grabs the
+    fully qualified exception name"""
+    lines = output.split("\n")
+    for line in lines[1:]: # skip the "traceback" line
+        line = line.rstrip()
+        if line[0] == ' ': continue
+        if line[-1] == ":":
+            return line[:-1]
+        else:
+            return line
 
 def get_dir_user(dir):
     """Finds the username of the person who owns this directory, via LDAP.
     Only works for directories under web_scripts"""
-    homedir, _, _ = os.path.realpath(dir).partition("/web_scripts")
+    dir = os.path.realpath(dir)
+    homedir, _, _ = dir.partition("/web_scripts")
+    if homedir == dir: return None
     con = ldap.initialize('ldap://scripts.mit.edu')
     return con.search_s(
             "ou=People,dc=scripts,dc=mit,dc=edu", # base