Refactor summary to be more function-oriented.

author Edward Z. Yang <ezyang@mit.edu>

Sun, 5 Jul 2009 00:01:42 +0000 (20:01 -0400)

committer Edward Z. Yang <ezyang@mit.edu>

Sun, 5 Jul 2009 00:01:42 +0000 (20:01 -0400)
author Edward Z. Yang <ezyang@mit.edu>
Sun, 5 Jul 2009 00:01:42 +0000 (20:01 -0400)
committer Edward Z. Yang <ezyang@mit.edu>
Sun, 5 Jul 2009 00:01:42 +0000 (20:01 -0400)
diff --git a/TODO b/TODO

index 79de13564d6fc7b5d3b569a1db577b2ebc132374..b726963f7d5cb12946cb97ed405d284477cbe33f 100644 (file)
--- a/TODO
+++ b/TODO
@@ -37,10 +37,8 @@ TODO NOW:
  
  PULLING OUT CONFIGURATION FILES IN AN AUTOMATED MANNER
  
-advancedbook: Two template files to fill out
  advancedpoll: Template file to fill out
  django: Noodles of template files
-e107: Multistage install process
  gallery2: Multistage install process
  joomla: Template file
  mediawiki: One-step install process
@@ -50,6 +48,37 @@ trac: NFC
  turbogears: NFC
  wordpress: Multistage install process
  
+PHILOSOPHY ABOUT LOGGING
+
+Logging is most useful when performing a mass run.  This
+includes things such as mass-migration as well as when running
+summary reports.  An interesting property about mass-migration
+or mass-upgrade, however, is that if they fail, they are
+idempotent, so an individual case can be debugged simply running
+the single-install equivalent with --debug on.  (This, indeed,
+may be easier to do than sifting through a logfile).
+
+It is a different story when you are running a summary report:
+you are primarily bound by your AFS cache and how quickly you can
+iterate through all of the autoinstalls.  Checking if a file
+exists on a cold AFS cache may
+take several minutes to perform; on a hot cache the same report
+may take a mere 3 seconds.  When you get to more computationally
+expensive calculations, however, even having a hot AFS cache
+is not enough to cut down your runtime.
+
+There are certain calculations that someone may want to be
+able to perform on manipulated data.  As such, this data should
+be cached on disk, if the process for extracting this data takes
+a long time.  Also, for usability sake, Wizard should generate
+the common case reports.
+
+Ensuring that machine parseable reports are made, and then making
+the machinery to reframe this data, increases complexity.  Therefore,
+the recommendation is to assume that if you need to run iteratively,
+you'll have a hot AFS cache at your fingerprints, and if that's not
+fast enough, then cache the data.
+
  COMMIT MESSAGE FIELDS:
  
  Installed-by: username@hostname
diff --git a/wizard/command/summary.py b/wizard/command/summary.py

index 376a5bd420ce2907011d7f6b5226e9a1e65f7330..bcf9148e32c76fbb48da7d00de0404d84f769541 100644 (file)
--- a/wizard/command/summary.py
+++ b/wizard/command/summary.py
@@ -1,4 +1,7 @@
  import logging
+import optparse
+import math
+import os
  
  from wizard import deploy
  from wizard.command import _base
@@ -6,11 +9,7 @@ from wizard.command import _base
  def main(argv, global_options):
      options, show = parse_args(argv)
      if not show: show = deploy.applications.keys()
-    errors, unrecognized = parse_install_lines(show, options, global_options)
-    if any(x.find("-") == -1 for x in show):
-        # only print summary when specificity is low
-        print
-        print_summary(show, errors, unrecognized)
+    options.dispatch(parse_install_lines(show, options, global_options))
  
  def parse_args(argv):
      usage = """usage: %prog summary [ARGS] APPS
@@ -25,43 +24,50 @@ Examples:
      %prog summary mediawiki
          Displays only MediaWiki statistics"""
      parser = _base.WizardOptionParser(usage)
-    parser.add_option("--count-exists", dest="count_exists",
-            default=False, help="Count deployments that contain a file")
+    def addDispatch(flag, function, type, help):
+        parser.add_option(flag, action="callback",
+            callback=make_callback(function), type=type, help=help)
+    addDispatch("--count-exists", summary_count_exists, "string",
+            "Count deployments that contain a file")
+    addDispatch("--list", summary_list, None,
+            "Print locations of deployments that match APPS")
      parser.add_option("--warn-unrecognized", dest="warn_unrecognized", action="store_true",
              default=False, help="Emit warnings when unrecognized applications are found")
-    return parser.parse_all(argv)
+    options, show = parser.parse_all(argv)
+    if not hasattr(options, "dispatch"):
+        # default
+        options.dispatch = summary_version
+    return options, show
+
+def make_callback(f):
+    def callback(option, opt_str, value, parser):
+        if hasattr(parser.values, "dispatch"):
+            # maybe make this more flexible
+            raise optparse.OptionValueError("Cannot use multiple summary modes")
+        parser.values.dispatch = lambda x: f(x, value)
+    return callback
  
  def parse_install_lines(show, options, global_options):
      show = frozenset(show)
-    errors = 0
-    unrecognized = 0
-    processed = 0
      for line in deploy.getInstallLines(global_options):
+        # construction
          try:
              d = deploy.Deployment.parse(line)
              name = d.getApplication().name
          except deploy.NoSuchApplication:
-            unrecognized += 1
              if options.warn_unrecognized:
                  logging.warning("Unrecognized application with '%s'" % line.rstrip())
              continue
          except deploy.Error:
-            errors += 1
              logging.warning("Error with '%s'" % line.rstrip())
              continue
-        logging.info("Found " + d.location)
-        if name + "-" + str(d.getVersion()) in show:
-            print d.location
-        elif name in show:
+        # filter
+        if name + "-" + str(d.getVersion()) in show or name in show:
              pass
          else:
              continue
-        d.count()
-        if options.count_exists:
-            r = d.count_exists(options.count_exists)
-            if r:
-                print "Found " + options.count_exists + " in " + d.location
-    return errors, unrecognized
+        # yield
+        yield d
  
  def print_summary(show, errors, unrecognized):
      for app in deploy.applications.values():
@@ -70,3 +76,35 @@ def print_summary(show, errors, unrecognized):
          print
      print "With %d errors and %d unrecognized applications" % (errors, unrecognized)
  
+def summary_list(deploys, value):
+    for d in deploys:
+        print d.location
+
+def summary_version(deploys, value):
+    HISTOGRAM_WIDTH = 30
+    show = set()
+    version_counts = {}
+    application_counts = {}
+    for d in deploys:
+        version = d.getAppVersion()
+        version_counts.setdefault(version, 0)
+        application_counts.setdefault(version.application, 0)
+        version_counts[version] += 1
+        application_counts[version.application] += 1
+        show.add(version.application)
+    if not show:
+        print "No applications found"
+    for application in show:
+        print "%-16s %3d installs" % (application.name, application_counts[application])
+        vmax = max(version_counts[x] for x in application.versions.values())
+        for version in sorted(application.versions.values()):
+            v = version_counts[version]
+            graph = '+' * int(math.ceil(float(v)/vmax * HISTOGRAM_WIDTH))
+            print "    %-12s %3d  %s" % (version.version, v, graph)
+        print
+
+def summary_count_exists(deploys, value):
+    for d in deploys:
+        if os.path.exists(d.location + "/" + value):
+            print d.location
+
diff --git a/wizard/deploy.py b/wizard/deploy.py

index 8612457d35df3100e2493a7f18aff604609fd673..f372fae61bdff6b1e03717f8a601704719cc1142 100644 (file)
--- a/wizard/deploy.py
+++ b/wizard/deploy.py
@@ -1,5 +1,4 @@
  import os.path
-import math
  import fileinput
  import dateutil.parser
  import distutils.version
@@ -54,17 +53,6 @@ class Deployment(object):
          """Returns the ApplicationVersion of the deployment"""
          if self._version and not force: return self._version
          else: return self.getLog()[-1].version
-    def count(self):
-        """Simple method which registers the deployment as a +1 on the
-        appropriate version. No further inspection is done."""
-        self.getAppVersion().count(self)
-        return True
-    def count_exists(self, file):
-        """Checks if the codebase has a certain file/directory in it."""
-        if os.path.exists(self.location + "/" + file):
-            self.getAppVersion().count_exists(self, file)
-            return True
-        return False
  
  class Application(object):
      """Represents the generic notion of an application, i.e.
@@ -72,10 +60,6 @@ class Application(object):
      def __init__(self, name):
          self.name = name
          self.versions = {}
-        # This is 'wizard summary' specific code
-        self._total = 0
-        self._max   = 0
-        self._c_exists = {}
      def getRepository(self):
          """Returns the Git repository that would contain this application."""
          repo = os.path.join("/afs/athena.mit.edu/contrib/scripts/git/autoinstalls", self.name + ".git")
@@ -86,19 +70,6 @@ class Application(object):
          if version not in self.versions:
              self.versions[version] = ApplicationVersion(distutils.version.LooseVersion(version), self)
          return self.versions[version]
-    # XXX: This code should go in summary.py; maybe as a mixin, maybe as
-    # a visitor acceptor
-    HISTOGRAM_WIDTH = 30
-    def _graph(self, v):
-        return '+' * int(math.ceil(float(v)/self._max * self.HISTOGRAM_WIDTH))
-    def report(self):
-        if not self.versions: return "%-11s   no installs" % self.name
-        ret = \
-            ["%-16s %3d installs" % (self.name, self._total)] + \
-            [v.report() for v in sorted(self.versions.values())]
-        for f,c in self._c_exists.items():
-            ret.append("%d users have %s" % (c,f))
-        return "\n".join(ret)
  
  class DeployLog(list):
      # As per #python; if you decide to start overloading magic methods,
@@ -228,8 +199,6 @@ class ApplicationVersion(object):
          on the application you want, so that this version gets registered."""
          self.version = version
          self.application = application
-        self.c = 0
-        self.c_exists = {}
      def getScriptsTag(self):
          """Returns the name of the Git tag for this version"""
          # XXX: This assumes that there's only a -scripts version
@@ -264,20 +233,6 @@ class ApplicationVersion(object):
              return applookup[app].getVersion(version)
          except KeyError:
              raise NoSuchApplication()
-    # This is summary specific code
-    def count(self, deployment):
-        self.c += 1
-        self.application._total += 1
-        if self.c > self.application._max:
-            self.application._max = self.c
-    def count_exists(self, deployment, n):
-        if n in self.c_exists: self.c_exists[n] += 1
-        else: self.c_exists[n] = 1
-        if n in self.application._c_exists: self.application._c_exists[n] += 1
-        else: self.application._c_exists[n] = 1
-    def report(self):
-        return "    %-12s %3d  %s" \
-            % (self.version, self.c, self.application._graph(self.c))
  
  class Error(wizard.Error):
      """Base error class for deploy errors"""
author	Edward Z. Yang <ezyang@mit.edu>
	Sun, 5 Jul 2009 00:01:42 +0000 (20:01 -0400)
committer	Edward Z. Yang <ezyang@mit.edu>
	Sun, 5 Jul 2009 00:01:42 +0000 (20:01 -0400)
TODO		patch \| blob \| history
wizard/command/summary.py		patch \| blob \| history
wizard/deploy.py		patch \| blob \| history