Document wizard.shell, and fix bug in massmigrate.

[wizard.git] / wizard / shell.py
diff --git a/wizard/shell.py b/wizard/shell.py

index bf7e172fb1132711ea675156c5da18a4065e73ca..21ad36971849fa48c92a8a7c6f54e53bcef53df9 100644 (file)
--- a/wizard/shell.py
+++ b/wizard/shell.py
@@ -1,3 +1,11 @@
+"""
+Wrappers around subprocess functionality that simulate an actual shell.
+
+.. testsetup:: *
+
+    from wizard.shell import *
+"""
+
  import subprocess
  import logging
  import sys
@@ -6,19 +14,50 @@ import os
  import wizard
  from wizard import util
  
-"""This is the path to the wizard executable as specified
-by the caller; it lets us recursively invoke wizard"""
-wizard = sys.argv[0]
+wizard_bin = sys.argv[0]
+"""
+This is the path to the wizard executable as specified
+by the caller; it lets us recursively invoke wizard.  Example::
+
+    from wizard import shell
+    sh = shell.Shell()
+    sh.call(shell.wizard_bin, "list")
+"""
  
  def is_python(args):
-    return args[0] == "python" or args[0] == wizard
+    """Detects whether or not an argument list invokes a Python program."""
+    return args[0] == "python" or args[0] == wizard_bin
  
  class Shell(object):
-    """An advanced shell, with the ability to do dry-run and log commands"""
+    """
+    An advanced shell that performs logging.  If ``dry`` is ``True``,
+    no commands are actually run.
+    """
      def __init__(self, dry = False):
-        """ `dry`       Don't run any commands, just print them"""
          self.dry = dry
      def call(self, *args, **kwargs):
+        """
+        Performs a system call.  The actual executable and options should
+        be passed as arguments to this function.  Several keyword arguments
+        are also supported:
+
+        :param python: explicitly marks the subprocess as Python or not Python
+            for improved error reporting.  By default, we use
+            :func:`is_python` to autodetect this.
+        :param input: input to feed the subprocess on standard input.
+        :returns: a tuple of strings ``(stdout, stderr)``
+
+        >>> sh = Shell()
+        >>> sh.call("echo", "Foobar")
+        ('Foobar\\n', '')
+
+        .. note::
+
+            This function does not munge trailing whitespace.  A common
+            idiom for dealing with this is::
+
+                sh.call("echo", "Foobar")[0].rstrip()
+        """
          kwargs.setdefault("python", None)
          logging.info("Running `" + ' '.join(args) + "`")
          if self.dry:
@@ -34,23 +73,39 @@ class Shell(object):
          # ourself, and then setting up a
          # SIGCHILD handler to write a single byte to the pipe to get
          # us out of select() when a subprocess exits.
-        proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        if hasattr(self, "async"):
-            self.async(proc, args, **kwargs)
+        proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+        if hasattr(self, "_async"):
+            self._async(proc, args, **kwargs)
              return proc
-        stdout, stderr = proc.communicate()
-        self.log(stdout, stderr)
+        kwargs.setdefault("input", None)
+        stdout, stderr = proc.communicate(kwargs["input"])
+        self._log(stdout, stderr)
          if proc.returncode:
              if kwargs["python"]: eclass = PythonCallError
              else: eclass = CallError
              raise eclass(proc.returncode, args, stdout, stderr)
          return (stdout, stderr)
-    def log(self, stdout, stderr):
+    def _log(self, stdout, stderr):
+        """Logs the standard output and standard input from a command."""
          if stdout:
              logging.debug("STDOUT:\n" + stdout)
          if stderr:
              logging.debug("STDERR:\n" + stderr)
      def callAsUser(self, *args, **kwargs):
+        """
+        Performs a system call as a different user.  This is only possible
+        if you are running as root.  Keyword arguments
+        are the same as :meth:`call` with the following additions:
+
+        :param user: name of the user to run command as.
+        :param uid: uid of the user to run command as.
+
+        .. note::
+
+            The resulting system call internally uses :command:`sudo`,
+            and as such environment variables will get scrubbed.  We
+            manually preserve :envvar:`SSH_GSSAPI_NAME`.
+        """
          user = kwargs.pop("user", None)
          uid = kwargs.pop("uid", None)
          kwargs.setdefault("python", is_python(args))
@@ -62,18 +117,64 @@ class Shell(object):
          if user: return self.call("sudo", "-u", user, *args, **kwargs)
  
  class ParallelShell(Shell):
-    """Commands are queued here, and executed in parallel (with
-    threading) in accordance with the maximum number of allowed
-    subprocesses, and result in callback execution when they finish."""
+    """
+    Modifies the semantics of :class:`Shell` so that
+    commands are queued here, and executed in parallel using waitpid
+    with ``max`` subprocesses, and result in callback execution
+    when they finish.
+
+    Before enqueueing a system call with :meth:`call` or :meth:`callAsUser`,
+    you should wait for an open slot using :meth:`wait`; otherwise,
+    ``max`` rate limiting will have no effect.  For example::
+
+        sh = ParallelShell()
+        for command in commands_to_execute_in_parallel:
+            sh.wait()
+            sh.call(*command)
+        sh.join()
+
+    .. method:: call(*args, **kwargs)
+
+        Enqueues a system call for parallel processing.  Keyword arguments
+        are the same as :meth:`Shell.call` with the following additions:
+
+        :param on_success: Callback function for success (zero exit status).
+            The callback function should accept two arguments,
+            ``stdout`` and ``stderr``.
+        :param on_error: Callback function for failure (nonzero exit status).
+            The callback function should accept one argument, the
+            exception that would have been thrown by the synchronous
+            version.
+        :return: The :class:`subprocess.Proc` object that was opened.
+
+    .. method:: callAsUser(*args, **kwargs)
+
+        Enqueues a system call under a different user for parallel
+        processing.  Keyword arguments are the same as
+        :meth:`Shell.callAsUser` with the additions of keyword
+        arguments from :meth:`call`.
+    """
      def __init__(self, dry = False, max = 10):
          super(ParallelShell, self).__init__(dry=dry)
          self.running = {}
          self.max = max # maximum of commands to run in parallel
-    def async(self, proc, args, python, on_success, on_error):
-        """Gets handed a subprocess.Proc object from our deferred
-        execution"""
+    def _async(self, proc, args, python, on_success, on_error):
+        """
+        Gets handed a :class:`subprocess.Proc` object from our deferred
+        execution.  See :meth:`Shell.call` source code for details.
+        """
          self.running[proc.pid] = (proc, args, python, on_success, on_error)
      def wait(self):
+        """
+        Blocking call that waits for an open subprocess slot.  You should
+        call this before enqueuing.
+
+        .. note::
+
+            This method may become unnecessary in the future.
+        """
+        # XXX: This API sucks; the actuall call/callAsUser call should
+        # probably block automatically (unless I have a good reason not to)
          # bail out immediately on initial ramp up
          if len(self.running) < self.max: return
          # now, wait for open pids.
@@ -88,7 +189,7 @@ class ParallelShell(Shell):
          # temporary files
          stdout = proc.stdout.read()
          stderr = proc.stderr.read()
-        self.log(stdout, stderr)
+        self._log(stdout, stderr)
          if status:
              if python: eclass = PythonCallError
              else: eclass = CallError
@@ -105,12 +206,25 @@ class ParallelShell(Shell):
              raise e
  
  class DummyParallelShell(ParallelShell):
-    """Same API as ParallelShell, but doesn't actually parallelize (by
-    using only one thread)"""
+    """Same API as :class:`ParallelShell`, but doesn't actually
+    parallelize (i.e. all calls to :meth:`wait` block.)"""
      def __init__(self, dry = False):
          super(DummyParallelShell, self).__init__(dry=dry, max=1)
  
-class CallError(wizard.Error):
+class Error(wizard.Error):
+    """Base exception for this module"""
+    pass
+
+class CallError(Error):
+    """Indicates that a subprocess call returned a nonzero exit status."""
+    #: The exit code of the failed subprocess.
+    code = None
+    #: List of the program and arguments that failed.
+    args = None
+    #: The stdout of the program.
+    stdout = None
+    #: The stderr of the program.
+    stderr = None
      def __init__(self, code, args, stdout, stderr):
          self.code = code
          self.args = args
@@ -120,6 +234,12 @@ class CallError(wizard.Error):
          return "CallError [%d]" % self.code
  
  class PythonCallError(CallError):
+    """
+    Indicates that a Python subprocess call had an uncaught exception.
+    This exception also contains the attributes of :class:`CallError`.
+    """
+    #: Name of the uncaught exception.
+    name = None
      def __init__(self, code, args, stdout, stderr):
          self.name = util.get_exception_name(stderr)
          CallError.__init__(self, code, args, stdout, stderr)