#!/usr/bin/env python """ This script generates basic statistics about our autoinstalls. """ import os import optparse import fileinput import math import sys from distutils.version import LooseVersion as Version class NoSuchApplication(Exception): pass class DeploymentParseError(Exception): pass class Deployment(object): def __init__(self, location, version): self.location = location self.version = version self.application = version.application @staticmethod def parse(line): """Parses a line from the results of parallel-find.pl. This will work out of the box with fileinput""" try: location, deploydir = line.rstrip().split(":") except ValueError: raise DeploymentParseError name = deploydir.split("/")[-1] if name.find("-") != -1: app, version = name.split("-") elif name == "deploy": # Assume that it's django, since those were botched app = "django" version = "0.1-scripts" else: raise DeploymentParseError try: return Deployment(location, applications[app].getVersion(version)) except KeyError: raise NoSuchApplication def count(self): """Simple method which registers the deployment as a +1 on the appropriate version. No further inspection is done.""" self.version.count(self) return True def count_exists(self, file): """Checks if the codebase has a certain file/directory in it.""" if os.path.exists(self.location + "/" + file): self.version.count_exists(self, file) return True return False class Application(object): HISTOGRAM_WIDTH = 30 def __init__(self, name): self.name = name self.versions = {} # Some cache variables for fast access of calculated data self._total = 0 self._max = 0 self._c_exists = {} def getVersion(self, version): if version not in self.versions: self.versions[version] = ApplicationVersion(Version(version), self) return self.versions[version] def _graph(self, v): return '+' * int(math.ceil(float(v)/self._max * self.HISTOGRAM_WIDTH)) def __str__(self): if not self.versions: return "%-11s no installs" % self.name ret = \ ["%-16s %3d installs" % (self.name, self._total)] + \ [str(v) for v in sorted(self.versions.values())] for f,c in self._c_exists.items(): ret.append("%d users have %s" % (c,f)) return "\n".join(ret) class ApplicationVersion(object): def __init__(self, version, application): self.version = version self.application = application self.c = 0 self.c_exists = {} def __cmp__(x, y): return cmp(x.version, y.version) def count(self, deployment): self.c += 1 self.application._total += 1 if self.c > self.application._max: self.application._max = self.c def count_exists(self, deployment, n): if n in self.c_exists: self.c_exists[n] += 1 else: self.c_exists[n] = 1 if n in self.application._c_exists: self.application._c_exists[n] += 1 else: self.application._c_exists[n] = 1 def __str__(self): return " %-12s %3d %s" \ % (self.version, self.c, self.application._graph(self.c)) class Printer(object): def __init__(self, quiet, verbose): self.i = 0 self.quiet = quiet self.verbose = verbose self.hanging = False def tick(self): self.i += 1 if not self.quiet and self.i % 10 == 0: sys.stdout.write(".") sys.stdout.flush() self.hanging = True def _hang(self): if self.hanging: self.hanging = False print def write(self, str = ""): self._hang() print str def qwrite(self, str = ""): if not self.quiet: self._hang print str def tweet(self, str = ""): if not self.quiet: self._hang() print str, # note comma def chat(self, str = ""): if self.verbose: self._hang() print str application_list = [ "mediawiki", "wordpress", "joomla", "e107", "gallery2", "phpBB", "advancedbook", "phpical", "trac", "turbogears", "django", # these are technically deprecated "advancedpoll", "gallery", ] """Hash table for looking up string application name to instance""" applications = dict([(n,Application(n)) for n in application_list ]) def main(): usage = """usage: %prog [options] [application] Scans all of the collected data from parallel-find.pl, and determines version histograms for our applications. You may optionally pass application parameters to filter the installs. Examples: %prog Basic usage %prog mediawiki Displays only MediaWiki statistics %prog -v -q mediawiki-1.2.3 Displays all deployments of this version""" parser = optparse.OptionParser(usage) parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Print interesting directories") parser.add_option("-q", "--quiet", dest="quiet", action="store_true", default=False, help="Suppresses progress output") parser.add_option("-d", "--version-dir", dest="version_dir", default="/afs/athena.mit.edu/contrib/scripts/sec-tools/store/versions", help="Location of parallel-find output") parser.add_option("--count-exists", dest="count_exists", default=False, help="Count deployments that contain a file") # There should be machine friendly output options, show = parser.parse_args() if not show: show = applications.keys() show = frozenset(show) vd = options.version_dir try: fi = fileinput.input([vd + "/" + f for f in os.listdir(vd)]) except OSError: print "No permissions; check if AFS is mounted" raise SystemExit(-1) errors = 0 unrecognized = 0 processed = 0 printer = Printer(options.quiet, options.verbose) # I really don't like this boolean hanging = False # whether or not we last outputted a newline printer.tweet("Processing") for line in fi: printer.tick() try: deploy = Deployment.parse(line) except DeploymentParseError: errors += 1 continue except NoSuchApplication: unrecognized += 1 continue if deploy.application.name + "-" + str(deploy.version.version) in show: printer.write("%s-%s deployment at %s" \ % (deploy.application.name, deploy.version.version, deploy.location)) elif deploy.application.name in show: pass else: continue deploy.count() if options.count_exists: r = deploy.count_exists(options.count_exists) if r: printer.chat("Found " + options.count_exists + " in " + deploy.location) printer.write() for app in applications.values(): if app.name not in show: continue printer.write(app) printer.write() printer.write("With %d errors and %d unrecognized applications" % (errors, unrecognized)) if __name__ == "__main__": main()