]> scripts.mit.edu Git - wizard.git/commitdiff
Object-orient the code, and add --count-exists functionality.
authorEdward Z. Yang <edwardzyang@thewritingpot.com>
Tue, 19 May 2009 03:30:54 +0000 (23:30 -0400)
committerEdward Z. Yang <edwardzyang@thewritingpot.com>
Tue, 19 May 2009 03:30:54 +0000 (23:30 -0400)
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
bin/install-statistics

index 6fd67b099f7bdc9ef20e47ee4f9d014b3c6e2aa5..62f47d829f4d5e10d453ffd92afe590fb7db8ed5 100755 (executable)
@@ -8,64 +8,169 @@ import os
 import optparse
 import fileinput
 import math
+import sys
 from distutils.version import LooseVersion as Version
 
-class ApplicationStatistic(object):
-    MAXBAR = 18
+class NoSuchApplication(Exception):
+    pass
+
+class DeploymentParseError(Exception):
+    pass
+
+class Deployment(object):
+    def __init__(self, location, version):
+        self.location = location
+        self.version = version
+    @staticmethod
+    def parse(line):
+        """Parses a line from the results of parallel-find.pl.
+        This will work out of the box with fileinput"""
+        try:
+            location, deploydir = line.rstrip().split(":")
+        except ValueError:
+            raise DeploymentParseError
+        name = deploydir.split("/")[-1]
+        if name.find("-") != -1:
+            app, version = name.split("-")
+        elif name == "deploy":
+            # Assume that it's django, since those were botched
+            app = "django"
+            version = "0.1-scripts"
+        else:
+            raise DeploymentParseError
+        try:
+            return Deployment(location, applications[app].getVersion(version))
+        except KeyError:
+            raise NoSuchApplication
+    def count(self):
+        """Simple method which registers the deployment as a +1 on the
+        appropriate version. No further inspection is done."""
+        self.version.count(self)
+        return True
+    def count_exists(self, file):
+        """Checks if the codebase has a certain file/directory in it."""
+        if os.path.exists(self.location + "/" + file):
+            self.version.count_exists(self, file)
+            return True
+        return False
+
+class Application(object):
+    HISTOGRAM_WIDTH = 30
     def __init__(self, name):
         self.name = name
-        self.data = {}
-        self.total = 0
-    def count(self, version):
-        self.total += 1
-        if version in self.data:
-            self.data[version] += 1
-        else:
-            self.data[version] = 1
+        self.versions = {}
+        # Some cache variables for fast access of calculated data
+        self._total = 0
+        self._max   = 0
+        self._c_exists = {}
+    def getVersion(self, version):
+        if version not in self.versions:
+            self.versions[version] = ApplicationVersion(Version(version), self)
+        return self.versions[version]
     def _graph(self, v):
-        return '+' * int(math.ceil(float(v) / self.total * self.MAXBAR))
+        return '+' * int(math.ceil(float(v)/self._max * self.HISTOGRAM_WIDTH))
     def __str__(self):
-        if not self.data: return self.name + " (no installs)"
-        ret = [self.name] + \
-            ["    %-8s %3d  %s" % (v,c,self._graph(c)) for v,c in
-                    sorted(
-                        self.data.items(),
-                        lambda x, y: cmp(Version(x[0]), Version(y[0])))]
+        if not self.versions: return "%-11s   no installs" % self.name
+        ret = \
+            ["%-16s %3d installs" % (self.name, self._total)] + \
+            [str(v) for v in sorted(self.versions.values())]
+        for f,c in self._c_exists.items():
+            ret.append("%d users have %s" % (c,f))
         return "\n".join(ret)
 
+class ApplicationVersion(object):
+    def __init__(self, version, application):
+        self.version = version
+        self.application = application
+        self.c = 0
+        self.c_exists = {}
+    def __cmp__(x, y):
+        return cmp(x.version, y.version)
+    def count(self, deployment):
+        self.c += 1
+        self.application._total += 1
+        if self.c > self.application._max:
+            self.application._max = self.c
+    def count_exists(self, deployment, n):
+        if n in self.c_exists: self.c_exists[n] += 1
+        else: self.c_exists[n] = 1
+        if n in self.application._c_exists: self.application._c_exists[n] += 1
+        else: self.application._c_exists[n] = 1
+    def __str__(self):
+        return "    %-12s %3d  %s" \
+            % (self.version, self.c, self.application._graph(self.c))
+
+application_list = [
+    "mediawiki", "wordpress", "joomla", "e107", "gallery2",
+    "phpBB", "advancedbook", "phpical", "trac", "turbogears", "django",
+    # these are technically deprecated
+    "advancedpoll", "gallery",
+]
+
+"""Hash table for looking up string application name to instance"""
+applications = dict([(n,Application(n)) for n in application_list ])
+
 def main():
-    usage = "usage: %prog [options] [application]"
+    usage = """usage: %prog [options] [application]
+
+Scans all of the collected data from parallel-find.pl, and
+determines version histograms for our applications.  You may
+optionally pass application parameters to filter the installs."""
     parser = optparse.OptionParser(usage)
-    parser.add_option("-v", "--version-dir", dest="version_dir",
+    parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
+            default=False, help="Print interesting directories")
+    parser.add_option("-q", "--quiet", dest="quiet", action="store_true",
+            default=False, help="Suppresses progress output")
+    parser.add_option("-d", "--version-dir", dest="version_dir",
             default="/afs/athena.mit.edu/contrib/scripts/sec-tools/store/versions",
             help="Location of parallel-find output")
-    options, applications = parser.parse_args()
-    if not applications:
-        # This is hard-coded: it might be better to have a central
-        # list of these somewhere and read it out here
-        applications = ["mediawiki", "wordpress", "joomla", "e107", "gallery2",
-                "phpBB", "advancedbook", "phpical", "trac", "turbogears", "django"]
-    appHash = dict([(n,ApplicationStatistic(n)) for n in applications ])
+    parser.add_option("--count-exists", dest="count_exists",
+            default=False, help="Count deployments that contain a file")
+    # There should be machine friendly output
+    options, show_applications = parser.parse_args()
+    if not show_applications: show_applications = applications.keys()
+    show_applications = frozenset(show_applications)
     vd = options.version_dir
     try:
         fi = fileinput.input([vd + "/" + f for f in os.listdir(vd)])
     except OSError:
         print "No permissions; check if AFS is mounted"
         raise SystemExit(-1)
+    errors = 0
+    unrecognized = 0
+    processed = 0
+    hanging = False
+    if not options.quiet: print "Processing",
     for line in fi:
-        print line
+        processed += 1
+        if not options.quiet and processed % 10 == 0:
+            sys.stdout.write(".")
+            sys.stdout.flush()
+            hanging = True
         try:
-            location, deploydir = line.rstrip().split(":")
-            application, version = deploydir.split("/")[-1].split("-")
-        except ValueError:
-            # old style .scripts-version
-            # not going to bother for now
+            deploy = Deployment.parse(line)
+        except DeploymentParseError:
+            errors += 1
+            continue
+        except NoSuchApplication:
+            unrecognized += 1
             continue
-        if application not in appHash: continue
-        appHash[application].count(version)
-    for stat in appHash.values():
-        print stat
+        if deploy.version.application.name not in show_applications: continue
+        deploy.count()
+        if options.count_exists:
+            r = deploy.count_exists(options.count_exists)
+            if r:
+                if hanging:
+                    hanging = False
+                    print
+                print "Found " + options.count_exists + " in " + deploy.location
+    if hanging: print
+    print
+    for app in applications.values():
+        if app.name not in show_applications: continue
+        print app
         print
+    print "With %d errors and %d unrecognized applications" % (errors, unrecognized)
 
 if __name__ == "__main__":
     main()