From 954caf42e8ef08b2fe02f3b27760f99a1ed934a6 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Mon, 18 May 2009 23:30:54 -0400 Subject: [PATCH] Object-orient the code, and add --count-exists functionality. Signed-off-by: Edward Z. Yang --- bin/install-statistics | 177 ++++++++++++++++++++++++++++++++--------- 1 file changed, 141 insertions(+), 36 deletions(-) diff --git a/bin/install-statistics b/bin/install-statistics index 6fd67b0..62f47d8 100755 --- a/bin/install-statistics +++ b/bin/install-statistics @@ -8,64 +8,169 @@ import os import optparse import fileinput import math +import sys from distutils.version import LooseVersion as Version -class ApplicationStatistic(object): - MAXBAR = 18 +class NoSuchApplication(Exception): + pass + +class DeploymentParseError(Exception): + pass + +class Deployment(object): + def __init__(self, location, version): + self.location = location + self.version = version + @staticmethod + def parse(line): + """Parses a line from the results of parallel-find.pl. + This will work out of the box with fileinput""" + try: + location, deploydir = line.rstrip().split(":") + except ValueError: + raise DeploymentParseError + name = deploydir.split("/")[-1] + if name.find("-") != -1: + app, version = name.split("-") + elif name == "deploy": + # Assume that it's django, since those were botched + app = "django" + version = "0.1-scripts" + else: + raise DeploymentParseError + try: + return Deployment(location, applications[app].getVersion(version)) + except KeyError: + raise NoSuchApplication + def count(self): + """Simple method which registers the deployment as a +1 on the + appropriate version. No further inspection is done.""" + self.version.count(self) + return True + def count_exists(self, file): + """Checks if the codebase has a certain file/directory in it.""" + if os.path.exists(self.location + "/" + file): + self.version.count_exists(self, file) + return True + return False + +class Application(object): + HISTOGRAM_WIDTH = 30 def __init__(self, name): self.name = name - self.data = {} - self.total = 0 - def count(self, version): - self.total += 1 - if version in self.data: - self.data[version] += 1 - else: - self.data[version] = 1 + self.versions = {} + # Some cache variables for fast access of calculated data + self._total = 0 + self._max = 0 + self._c_exists = {} + def getVersion(self, version): + if version not in self.versions: + self.versions[version] = ApplicationVersion(Version(version), self) + return self.versions[version] def _graph(self, v): - return '+' * int(math.ceil(float(v) / self.total * self.MAXBAR)) + return '+' * int(math.ceil(float(v)/self._max * self.HISTOGRAM_WIDTH)) def __str__(self): - if not self.data: return self.name + " (no installs)" - ret = [self.name] + \ - [" %-8s %3d %s" % (v,c,self._graph(c)) for v,c in - sorted( - self.data.items(), - lambda x, y: cmp(Version(x[0]), Version(y[0])))] + if not self.versions: return "%-11s no installs" % self.name + ret = \ + ["%-16s %3d installs" % (self.name, self._total)] + \ + [str(v) for v in sorted(self.versions.values())] + for f,c in self._c_exists.items(): + ret.append("%d users have %s" % (c,f)) return "\n".join(ret) +class ApplicationVersion(object): + def __init__(self, version, application): + self.version = version + self.application = application + self.c = 0 + self.c_exists = {} + def __cmp__(x, y): + return cmp(x.version, y.version) + def count(self, deployment): + self.c += 1 + self.application._total += 1 + if self.c > self.application._max: + self.application._max = self.c + def count_exists(self, deployment, n): + if n in self.c_exists: self.c_exists[n] += 1 + else: self.c_exists[n] = 1 + if n in self.application._c_exists: self.application._c_exists[n] += 1 + else: self.application._c_exists[n] = 1 + def __str__(self): + return " %-12s %3d %s" \ + % (self.version, self.c, self.application._graph(self.c)) + +application_list = [ + "mediawiki", "wordpress", "joomla", "e107", "gallery2", + "phpBB", "advancedbook", "phpical", "trac", "turbogears", "django", + # these are technically deprecated + "advancedpoll", "gallery", +] + +"""Hash table for looking up string application name to instance""" +applications = dict([(n,Application(n)) for n in application_list ]) + def main(): - usage = "usage: %prog [options] [application]" + usage = """usage: %prog [options] [application] + +Scans all of the collected data from parallel-find.pl, and +determines version histograms for our applications. You may +optionally pass application parameters to filter the installs.""" parser = optparse.OptionParser(usage) - parser.add_option("-v", "--version-dir", dest="version_dir", + parser.add_option("-v", "--verbose", dest="verbose", action="store_true", + default=False, help="Print interesting directories") + parser.add_option("-q", "--quiet", dest="quiet", action="store_true", + default=False, help="Suppresses progress output") + parser.add_option("-d", "--version-dir", dest="version_dir", default="/afs/athena.mit.edu/contrib/scripts/sec-tools/store/versions", help="Location of parallel-find output") - options, applications = parser.parse_args() - if not applications: - # This is hard-coded: it might be better to have a central - # list of these somewhere and read it out here - applications = ["mediawiki", "wordpress", "joomla", "e107", "gallery2", - "phpBB", "advancedbook", "phpical", "trac", "turbogears", "django"] - appHash = dict([(n,ApplicationStatistic(n)) for n in applications ]) + parser.add_option("--count-exists", dest="count_exists", + default=False, help="Count deployments that contain a file") + # There should be machine friendly output + options, show_applications = parser.parse_args() + if not show_applications: show_applications = applications.keys() + show_applications = frozenset(show_applications) vd = options.version_dir try: fi = fileinput.input([vd + "/" + f for f in os.listdir(vd)]) except OSError: print "No permissions; check if AFS is mounted" raise SystemExit(-1) + errors = 0 + unrecognized = 0 + processed = 0 + hanging = False + if not options.quiet: print "Processing", for line in fi: - print line + processed += 1 + if not options.quiet and processed % 10 == 0: + sys.stdout.write(".") + sys.stdout.flush() + hanging = True try: - location, deploydir = line.rstrip().split(":") - application, version = deploydir.split("/")[-1].split("-") - except ValueError: - # old style .scripts-version - # not going to bother for now + deploy = Deployment.parse(line) + except DeploymentParseError: + errors += 1 + continue + except NoSuchApplication: + unrecognized += 1 continue - if application not in appHash: continue - appHash[application].count(version) - for stat in appHash.values(): - print stat + if deploy.version.application.name not in show_applications: continue + deploy.count() + if options.count_exists: + r = deploy.count_exists(options.count_exists) + if r: + if hanging: + hanging = False + print + print "Found " + options.count_exists + " in " + deploy.location + if hanging: print + print + for app in applications.values(): + if app.name not in show_applications: continue + print app print + print "With %d errors and %d unrecognized applications" % (errors, unrecognized) if __name__ == "__main__": main() -- 2.45.2