Knock off a whole bunch of TODO items.

author Edward Z. Yang <ezyang@mit.edu>

Sat, 27 Jun 2009 08:05:01 +0000 (04:05 -0400)

committer Edward Z. Yang <ezyang@mit.edu>

Sat, 27 Jun 2009 08:05:01 +0000 (04:05 -0400)
author Edward Z. Yang <ezyang@mit.edu>
Sat, 27 Jun 2009 08:05:01 +0000 (04:05 -0400)
committer Edward Z. Yang <ezyang@mit.edu>
Sat, 27 Jun 2009 08:05:01 +0000 (04:05 -0400)
diff --git a/TODO b/TODO

index 4b2efbca7bc66d17efe367ae87f19b39db32fdc2..078286b27365bae84aec46ca213a3d7226352cc3 100644 (file)
--- a/TODO
+++ b/TODO
@@ -2,22 +2,13 @@ The Git Autoinstaller
  
  TODO NOW:
  
-- Something needs to be done if disk quota is exceeded:
-    - Catch the OSError and throw a domain-specific error
-      so massmigrate can deal gracefully
-    - Perform an added memory calculation, check this against
-      remaining quotai, and bail out if it's within some
-      percentage of their remaining quota
-    - Checks should also be performed against the partition
-    X with the new --shared flag this may not be necessary
-      as repos weighs less than 200K
+- Not quite sure what I'm going to do to deal with the fact that
+  there will be lower priviledged jobs trying to write to the logs.
+  Perhaps the best way to do this is stick it in tmp and chmod
+  it liberally.
  - Should write to a "processed" file to make resuming with
    unexpected failure faster (possibly should be database if
    parallelized).  Remember that each subprocess will be a fork
-- Figure out IPC logging.
-- Make version directory also work with a location (with no
-  colon) format; this will make making fake "testbed" directories
-  easier to do.  Also let it accept a file, instead of a directory.
  - Check how many autoinstalls are missing w bits for
    daemon.scripts (this would need pyafs)
  - Run parallel-find.pl
@@ -44,6 +35,28 @@ NOTES:
    means they have basically ~no space footprint.  However, it
    also means that /mit/scripts/wizard/srv MUST NOT lose revs.
  
+- Full fledged logging options. Namely:
+  x all loggers (delay implementing this until we actually have debug stmts)
+    - default is WARNING
+    - debug     => loglevel = DEBUG
+  x stdout logger
+    - default is WARNING (see below for exception)
+    - verbose   => loglevel = INFO
+  x file logger (only allowed for serial processing)
+    - default is OFF
+    - log-file   => loglevel = INFO
+  x database logger (necessary for parallel processing, not implemented)
+    - default is OFF
+    - log-db    => loglevel = INFO
+
+- More on the database logger: it will be very simple with one
+  table named `logs` in SQLite, with columns: `job`, `level`,
+  `message`.  Job identifies the subprocess/thread that emitted
+  the log, so things can be correlated together.  We will then
+  have `wizard dump` which takes a database like this and dumps
+  it into a file logger type file.  The database may also store
+  a queue like structure which can be used to coordinate jobs.
+
  OVERALL PLAN:
  
  * Some parts of the infrastructure will not be touched, although I plan
diff --git a/bin/wizard b/bin/wizard

index 8b1597ffc272122b6843fc061f3dd36e0ec607e3..7170c7aa1146e303944ec23b53172933f4cc2041 100755 (executable)
--- a/bin/wizard
+++ b/bin/wizard
@@ -9,7 +9,7 @@ sys.path.insert(0,os.path.abspath(os.path.join(__file__,'../../lib')))
  import wizard.command
  
  def main():
-    usage = """usage: %prog [-d|--version-dir] COMMAND [ARGS]
+    usage = """usage: %prog [-s|--versions] COMMAND [ARGS]
  
  Wizard is a Git-based autoinstall management system for scripts.
  
@@ -22,22 +22,13 @@ Its commands are:
  See '%prog help COMMAND' for more information on a specific command."""
  
      parser = optparse.OptionParser(usage)
-    parser.add_option("-d", "--version-dir", dest="version_dir",
+    parser.disable_interspersed_args()
+    parser.add_option("-s", "--versions", dest="versions",
              default="/afs/athena.mit.edu/contrib/scripts/sec-tools/store/versions",
-            help="Location of parallel-find output")
+            help="Location of parallel-find output directory, or a file containing a newline separated list of 'all autoinstalls' (for testing).")
      # Find the end of the "global" options
-    i = 1
-    try:
-        while not sys.argv[i] or sys.argv[i][0] == '-':
-            if sys.argv[i] == "-h" or sys.argv[i] == "--help":
-                parser.print_help()
-                raise SystemExit(-1)
-            i += 1
-    except IndexError:
-        parser.print_help()
-        raise SystemExit(-1)
-    options, args = parser.parse_args(sys.argv[1:i+1])
-    rest_argv = sys.argv[i+1:]
+    options, args = parser.parse_args()
+    rest_argv = args[1:]
      command = args[0] # shouldn't fail
      if command == "help":
          try:
diff --git a/lib/wizard/command/_base.py b/lib/wizard/command/_base.py

index 2ba6e2f83700a6e9a33d68816e2528da70211ea4..930326924dc55c545cce5e875bdc29eb936ed239 100644 (file)
--- a/lib/wizard/command/_base.py
+++ b/lib/wizard/command/_base.py
@@ -12,14 +12,40 @@ def makeLogger(options):
      logger = logging.getLogger("main")
      logger.setLevel(logging.INFO)
      stdout = logging.StreamHandler(sys.stdout)
+    stdout.setFormatter(logging.Formatter(" " * int(options.indent) + '%(message)s'))
      logger.addHandler(stdout)
-    if options.verbose:
-        logger.verbose = True
+    if options.log_file:
+        file = logging.FileHandler(options.log_file)
+        logger.addHandler(file)
+    if options.debug:
+        logger.setLevel(logging.DEBUG)
      else:
-        if not options.debug: stdout.setLevel(logging.ERROR)
-    if options.debug: logger.setLevel(logging.DEBUG)
+        stdout.setLevel(logging.WARNING)
+        if options.verbose or options.dry_run:
+            stdout.setLevel(logging.INFO)
+        if options.log_file:
+            file.setLevel(logging.INFO)
      return logger
  
+def makeBaseArgs(options, **grab):
+    """Takes parsed options, and breaks them back into a command
+    line string that we can pass into a subcommand"""
+    args = []
+    grab["log_file"] = "--log-file"
+    grab["debug"] = "--debug"
+    grab["verbose"] = "--verbose"
+    grab["indent"] = "--indent"
+    #grab["log_db"] = "--log-db"
+    for k,flag in grab.items():
+        value = getattr(options, k)
+        if not value and k != "indent": continue
+        args.append(flag)
+        if type(value) is not bool:
+            if k == "indent":
+                value += 4
+            args.append(str(value))
+    return args
+
  class NullLogHandler(logging.Handler):
      """Log handler that doesn't do anything"""
      def emit(self, record):
@@ -33,6 +59,10 @@ class WizardOptionParser(optparse.OptionParser):
                  default=False, help="Turns on verbose output")
          self.add_option("--debug", dest="debug", action="store_true",
                  default=False, help="Turns on debugging output")
+        self.add_option("--log-file", dest="log_file",
+                default=None, help="Logs verbose output to file")
+        self.add_option("--indent", dest="indent",
+                default=0, help="Indents stdout, useful for nested calls")
      def parse_all(self, argv, logger):
          options, numeric_args = self.parse_args(argv)
          return options, numeric_args, logger and logger or makeLogger(options)
diff --git a/lib/wizard/command/massmigrate.py b/lib/wizard/command/massmigrate.py

index a796df24988f8cc41a84a2323ab7fbe84653d79d..a31806959ddeab8222b102eb9317c3c218cc0ec6 100644 (file)
--- a/lib/wizard/command/massmigrate.py
+++ b/lib/wizard/command/massmigrate.py
@@ -1,7 +1,10 @@
  import optparse
+import os
  
  import wizard
  from wizard import deploy
+from wizard import util
+from wizard import shell
  from wizard.command import _base
  from wizard.command import migrate
  
@@ -12,9 +15,7 @@ Mass migrates an application to the new repository format.
  Essentially equivalent to running '%prog migrate' on all
  autoinstalls for a particular application found by parallel-find,
  but with advanced reporting.
-
-NOTE: --verbose implies --no-parallelize, as it results in
-output going to stdout/stderr."""
+"""
      parser = _base.WizardOptionParser(usage)
      parser.add_option("--no-parallelize", dest="no_parallelize", action="store_true",
              default=False, help="Turn off parallelization")
@@ -28,32 +29,38 @@ output going to stdout/stderr."""
      if options.verbose or options.dry_run:
          options.no_parallelize = True
      app = args[0]
-    base_args = []
-    if options.verbose: base_args.append("--verbose")
-    if options.dry_run: base_args.append("--dry-run")
-    deploys = []
+    errors = {}
+    sh = shell.Shell(logger) # dry run happens to lower down...
+    base_args = _base.makeBaseArgs(options, dry_run="--dry-run")
+    # check if we have root
+    uid = os.getuid()
+    user = None
      for line in deploy.getInstallLines(global_options):
+        # validate and filter the deployments
          try:
              d = deploy.Deployment.parse(line)
          except deploy.DeploymentParseError, deploy.NoSuchApplication:
              continue
          name = d.getApplication().name
          if name != app: continue
-        deploys.append(d)
-    # parallelization code would go here
-    errors = {}
-    for d in deploys:
-        sub_argv = base_args + [d.location]
-        logger.info("$ wizard migrate " + " ".join(sub_argv))
+        # actual meat
          try:
-            migrate.main(sub_argv, global_options, logger)
-        except migrate.AlreadyMigratedError as e:
-            logger.info("Skipped already migrated %s" % d.location)
-        except wizard.Error as e:
-            name = e.__module__ + "." + e.__class__.__name__
-            if name not in errors: errors[name] = []
-            errors[name].append((d, e))
-            logger.error("ERROR [%s] in %s" % (name, d.location))
-            logger.info("Backtrace:\n" + str(e))
+            if not uid:
+                # only attempt to change user if we're root. If we're
+                # not root, assume a permission friendly test environment.
+                user = util.get_dir_user(d.location)
+            sh.callAsUser(shell.wizard, "migrate", d.location, *base_args,
+                          user = user)
+        except shell.PythonCallError as e:
+            if e.name == "wizard.command.migrate.AlreadyMigratedError":
+                logger.info("Skipped already migrated %s" % d.location)
+            elif e.name.startswith("wizard."):
+                name = e.name
+                if name not in errors: errors[name] = []
+                errors[name].append(d)
+                logger.error("ERROR [%s] in %s" % (name, d.location))
+                logger.info(e.stderr)
+            else:
+                raise e
      for name, deploys in errors.items():
          logger.warning("ERROR [%s] from %d installs" % (name, len(deploys)))
diff --git a/lib/wizard/command/migrate.py b/lib/wizard/command/migrate.py

index f9bc487be7b6aad35e0ce6e8048b44386ece721d..70c2e3fd7f7132c55c34b98faeef3161232b9c47 100644 (file)
--- a/lib/wizard/command/migrate.py
+++ b/lib/wizard/command/migrate.py
@@ -136,7 +136,7 @@ what repository and tag to use."""
      try:
          tag = "v%s-scripts" % version.version
          sh.call("git", "--git-dir", repo, "rev-parse", tag)
-    except shell.CalledProcessError:
+    except shell.CallError:
          raise NoTagError(version)
      did_git_init = False
      did_git_checkout_scripts = False
@@ -145,9 +145,14 @@ what repository and tag to use."""
          sh.call("git", "--git-dir=.git", "init")
          did_git_init = True
          # configure our alternates (to save space and make this quick)
-        alternates = open(".git/objects/info/alternates", "w")
-        alternates.write(os.path.join(repo, "objects"))
-        alternates.close()
+        data = os.path.join(repo, "objects")
+        file = ".git/objects/info/alternates"
+        if not options.dry_run:
+            alternates = open(file, "w")
+            alternates.write(data)
+            alternates.close()
+        else:
+            logger.info("# create %s containing \"%s\"" % (file, data))
          # configure our remote
          sh.call("git", "remote", "add", "origin", repo)
          # configure what would normally be set up on a 'git clone' for consistency
@@ -165,11 +170,11 @@ what repository and tag to use."""
          if options.verbose:
              try:
                  sh.call("git", "status")
-            except shell.CalledProcessError:
+            except shell.CallError:
                  pass
              try:
                  sh.call("git", "diff")
-            except shell.CalledProcessError:
+            except shell.CallError:
                  pass
      except:
          # this... is pretty bad
diff --git a/lib/wizard/command/summary.py b/lib/wizard/command/summary.py

index 11fc5818126d02c80973254b8968a4cb230d2c45..dc0cde9621d1b3fbdc52504c664d6fce21c30c8b 100644 (file)
--- a/lib/wizard/command/summary.py
+++ b/lib/wizard/command/summary.py
@@ -90,6 +90,7 @@ Examples:
              if r:
                  printer.chat("Found " + options.count_exists + " in " + d.location)
      printer.write()
+    print show
      for app in deploy.applications.values():
          if app.name not in show: continue
          printer.write(app.report())
diff --git a/lib/wizard/deploy.py b/lib/wizard/deploy.py

index a2c306959905a2d254a089e91a03e66023e2c361..7140c90bd28047cdaa8366244f0c144ef3487e6d 100644 (file)
--- a/lib/wizard/deploy.py
+++ b/lib/wizard/deploy.py
@@ -42,8 +42,10 @@ didn't contain enough fields.
  def getInstallLines(global_options):
      """Retrieves a list of lines from the version directory that
      can be passed to Deployment.parse()"""
-    vd = global_options.version_dir
-    return fileinput.input([vd + "/" + f for f in os.listdir(vd)])
+    vs = global_options.versions
+    if os.path.isfile(vs):
+        return fileinput.input([vs])
+    return fileinput.input([vs + "/" + f for f in os.listdir(vs)])
  
  class Deployment(object):
      """Represents a deployment of an autoinstall; i.e. a concrete
@@ -60,10 +62,11 @@ class Deployment(object):
          """Parses a line from the results of parallel-find.pl.
          This will work out of the box with fileinput, see
          getInstallLines()"""
+        line = line.rstrip()
          try:
-            location, deploydir = line.rstrip().split(":")
+            location, deploydir = line.split(":")
          except ValueError:
-            raise DeploymentParseError(line)
+            return Deployment(line) # lazy loaded version
          return Deployment(location, version=ApplicationVersion.parse(deploydir))
      @staticmethod
      def fromDir(dir):
diff --git a/lib/wizard/shell.py b/lib/wizard/shell.py

index 3484163f2001b3359ee191ee31ff70cb76620beb..9df5583275d01b46c9d49ac6f8f310bc11780c01 100644 (file)
--- a/lib/wizard/shell.py
+++ b/lib/wizard/shell.py
@@ -1,32 +1,54 @@
  import subprocess
  import sys
  
-class CalledProcessError(subprocess.CalledProcessError):
-    pass
+import wizard as _wizard
+from wizard import util
+
+wizard = sys.argv[0]
+
+class CallError(_wizard.Error):
+    def __init__(self, code, args, stdout, stderr):
+        self.code = code
+        self.args = args
+        self.stdout = stdout
+        self.stderr = stderr
+    def __str__(self):
+        return "CallError [%d]" % self.code
+
+class PythonCallError(CallError):
+    def __init__(self, code, args, stdout, stderr):
+        self.name = util.get_exception_name(stderr)
+        CallError.__init__(self, code, args, stdout, stderr)
+    def __str__(self):
+        return "PythonCallError [%s]" % self.name
+
+def is_python(args):
+    return args[0] == "python" or args[0] == wizard
  
  class Shell(object):
      """An advanced shell, with the ability to do dry-run and log commands"""
      def __init__(self, logger = False, dry = False):
          """ `logger`    The logger
-            `dry`       Whether or not to not run any commands, and just print"""
+            `dry`       Don't run any commands, just print them"""
          self.logger = logger
          self.dry = dry
-    def call(self, *args):
+    def call(self, *args, **kwargs):
+        (python,) = ("python" in kwargs) and [kwargs["python"]] or [None]
          if self.dry or self.logger:
              self.logger.info("$ " + ' '.join(args))
          if self.dry: return
-        proc = None
-        if self.logger:
-            if hasattr(self.logger, "verbose"):
-                # this is a special short-circuit to make redrawing
-                # output from Git work
-                proc = subprocess.Popen(args, stdout=sys.stdout, stderr=sys.stderr)
-            else:
-                proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-        else:
-            proc = subprocess.Popen(args)
-        stdout, _ = proc.communicate()
+        if python is None and is_python(args):
+            python = True
+        proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout, stderr = proc.communicate()
          if self.logger and stdout: self.logger.info(stdout)
          if proc.returncode:
-            raise CalledProcessError(proc.returncode, args)
+            if python: eclass = PythonCallError
+            else: eclass = CallError
+            raise eclass(proc.returncode, args, stdout, stderr)
+        return (stdout, stderr)
+    def callAsUser(self, *args, **kwargs):
+        user = ("user" in kwargs) and kwargs["user"] or None
+        if not user: return self.call(*args)
+        return self.call("sudo", "-u", user, *args, python=is_python(args))
  
diff --git a/lib/wizard/tests/deploy_test.py b/lib/wizard/tests/deploy_test.py

index 79c8245a619e6e2106c8ba1c9e04514b665e9b2c..a1e4a37c2edd517220ba50119e1b58b8f9641de4 100644 (file)
--- a/lib/wizard/tests/deploy_test.py
+++ b/lib/wizard/tests/deploy_test.py
@@ -13,13 +13,6 @@ def test_deployment_parse():
      assert result.getVersion() == Version("1.11.0")
      assert result.getApplication().name == "mediawiki"
  
-def test_deployment_parse_parseerror():
-    try:
-        Deployment.parse("foo")
-        assert False
-    except DeploymentParseError:
-        pass
-
  def test_deployment_parse_nosuchapplication():
      try:
          Deployment.parse("a:/foo/obviouslybogus-1.11.0\n")
diff --git a/lib/wizard/tests/util_test.py b/lib/wizard/tests/util_test.py

index 9f765e2d3391f3b9e2609fc922bcf90e3799350c..ad77c8838bc8671daf3120c702087d0acb1a2b3b 100644 (file)
--- a/lib/wizard/tests/util_test.py
+++ b/lib/wizard/tests/util_test.py
@@ -1,5 +1,14 @@
+import traceback
+
  from wizard.util import *
  
+class MyError(Exception):
+    def __str__(self):
+        return """
+
+ERROR: Foo
+"""
+
  def test_get_dir_user():
      assert get_dir_user("/mit/ezyang/web_scripts/test-wiki") == "ezyang"
  
@@ -7,5 +16,17 @@ def test_get_dir_user_locker():
      assert get_dir_user("/mit/apo/web_scripts/") == "apo"
  
  def test_get_dir_url():
-    assert get_dir_url("/mit/ezyang/web_scripts/foo",) == "http://ezyang.scripts.mit.edu/foo"
+    assert get_dir_url("/mit/ezyang/web_scripts/foo") == "http://ezyang.scripts.mit.edu/foo"
+
+def test_get_exception_name():
+    try:
+        raise NotImplementedError
+    except NotImplementedError:
+        assert get_exception_name(traceback.format_exc()) == "NotImplementedError"
+
+def test_get_exception_name_withstr():
+    try:
+        raise MyError
+    except MyError:
+        assert get_exception_name(traceback.format_exc()) == "MyError"
  
diff --git a/lib/wizard/util.py b/lib/wizard/util.py

index 97e2dded357cbcb05191d5114fc2a344ae5f7f48..dddbb3451b76fbd10141205d44a429e5d679d72f 100644 (file)
--- a/lib/wizard/util.py
+++ b/lib/wizard/util.py
@@ -1,21 +1,24 @@
-import pwd
-import grp
  import os.path
  import ldap
  
-def switch_user(name):
-    """Switches the process to the run level of a Scripts user. Once
-    you do this, you can't go back, so be sure to fork beforehand!"""
-    _, _, uid, gid, _, _, _ = pwd.getpwnam('ezyang')
-    nssgid = grp.getgrnam('nss-nonlocal-users')[2]
-    os.setgid(gid)
-    os.setgroups([gid, nssgid])
-    os.setuid(uid)
+def get_exception_name(output):
+    """Reads the stderr output of another Python command and grabs the
+    fully qualified exception name"""
+    lines = output.split("\n")
+    for line in lines[1:]: # skip the "traceback" line
+        line = line.rstrip()
+        if line[0] == ' ': continue
+        if line[-1] == ":":
+            return line[:-1]
+        else:
+            return line
  
  def get_dir_user(dir):
      """Finds the username of the person who owns this directory, via LDAP.
      Only works for directories under web_scripts"""
-    homedir, _, _ = os.path.realpath(dir).partition("/web_scripts")
+    dir = os.path.realpath(dir)
+    homedir, _, _ = dir.partition("/web_scripts")
+    if homedir == dir: return None
      con = ldap.initialize('ldap://scripts.mit.edu')
      return con.search_s(
              "ou=People,dc=scripts,dc=mit,dc=edu", # base
author	Edward Z. Yang <ezyang@mit.edu>
	Sat, 27 Jun 2009 08:05:01 +0000 (04:05 -0400)
committer	Edward Z. Yang <ezyang@mit.edu>
	Sat, 27 Jun 2009 08:05:01 +0000 (04:05 -0400)
TODO		patch \| blob \| history
bin/wizard		patch \| blob \| history
lib/wizard/command/_base.py		patch \| blob \| history
lib/wizard/command/massmigrate.py		patch \| blob \| history
lib/wizard/command/migrate.py		patch \| blob \| history
lib/wizard/command/summary.py		patch \| blob \| history
lib/wizard/deploy.py		patch \| blob \| history
lib/wizard/shell.py		patch \| blob \| history
lib/wizard/tests/deploy_test.py		patch \| blob \| history
lib/wizard/tests/util_test.py		patch \| blob \| history
lib/wizard/util.py		patch \| blob \| history