import optparse
import fileinput
import math
+import sys
from distutils.version import LooseVersion as Version
-class ApplicationStatistic(object):
- MAXBAR = 18
+class NoSuchApplication(Exception):
+ pass
+
+class DeploymentParseError(Exception):
+ pass
+
+class Deployment(object):
+ def __init__(self, location, version):
+ self.location = location
+ self.version = version
+ @staticmethod
+ def parse(line):
+ """Parses a line from the results of parallel-find.pl.
+ This will work out of the box with fileinput"""
+ try:
+ location, deploydir = line.rstrip().split(":")
+ except ValueError:
+ raise DeploymentParseError
+ name = deploydir.split("/")[-1]
+ if name.find("-") != -1:
+ app, version = name.split("-")
+ elif name == "deploy":
+ # Assume that it's django, since those were botched
+ app = "django"
+ version = "0.1-scripts"
+ else:
+ raise DeploymentParseError
+ try:
+ return Deployment(location, applications[app].getVersion(version))
+ except KeyError:
+ raise NoSuchApplication
+ def count(self):
+ """Simple method which registers the deployment as a +1 on the
+ appropriate version. No further inspection is done."""
+ self.version.count(self)
+ return True
+ def count_exists(self, file):
+ """Checks if the codebase has a certain file/directory in it."""
+ if os.path.exists(self.location + "/" + file):
+ self.version.count_exists(self, file)
+ return True
+ return False
+
+class Application(object):
+ HISTOGRAM_WIDTH = 30
def __init__(self, name):
self.name = name
- self.data = {}
- self.total = 0
- def count(self, version):
- self.total += 1
- if version in self.data:
- self.data[version] += 1
- else:
- self.data[version] = 1
+ self.versions = {}
+ # Some cache variables for fast access of calculated data
+ self._total = 0
+ self._max = 0
+ self._c_exists = {}
+ def getVersion(self, version):
+ if version not in self.versions:
+ self.versions[version] = ApplicationVersion(Version(version), self)
+ return self.versions[version]
def _graph(self, v):
- return '+' * int(math.ceil(float(v) / self.total * self.MAXBAR))
+ return '+' * int(math.ceil(float(v)/self._max * self.HISTOGRAM_WIDTH))
def __str__(self):
- if not self.data: return self.name + " (no installs)"
- ret = [self.name] + \
- [" %-8s %3d %s" % (v,c,self._graph(c)) for v,c in
- sorted(
- self.data.items(),
- lambda x, y: cmp(Version(x[0]), Version(y[0])))]
+ if not self.versions: return "%-11s no installs" % self.name
+ ret = \
+ ["%-16s %3d installs" % (self.name, self._total)] + \
+ [str(v) for v in sorted(self.versions.values())]
+ for f,c in self._c_exists.items():
+ ret.append("%d users have %s" % (c,f))
return "\n".join(ret)
+class ApplicationVersion(object):
+ def __init__(self, version, application):
+ self.version = version
+ self.application = application
+ self.c = 0
+ self.c_exists = {}
+ def __cmp__(x, y):
+ return cmp(x.version, y.version)
+ def count(self, deployment):
+ self.c += 1
+ self.application._total += 1
+ if self.c > self.application._max:
+ self.application._max = self.c
+ def count_exists(self, deployment, n):
+ if n in self.c_exists: self.c_exists[n] += 1
+ else: self.c_exists[n] = 1
+ if n in self.application._c_exists: self.application._c_exists[n] += 1
+ else: self.application._c_exists[n] = 1
+ def __str__(self):
+ return " %-12s %3d %s" \
+ % (self.version, self.c, self.application._graph(self.c))
+
+application_list = [
+ "mediawiki", "wordpress", "joomla", "e107", "gallery2",
+ "phpBB", "advancedbook", "phpical", "trac", "turbogears", "django",
+ # these are technically deprecated
+ "advancedpoll", "gallery",
+]
+
+"""Hash table for looking up string application name to instance"""
+applications = dict([(n,Application(n)) for n in application_list ])
+
def main():
- usage = "usage: %prog [options] [application]"
+ usage = """usage: %prog [options] [application]
+
+Scans all of the collected data from parallel-find.pl, and
+determines version histograms for our applications. You may
+optionally pass application parameters to filter the installs."""
parser = optparse.OptionParser(usage)
- parser.add_option("-v", "--version-dir", dest="version_dir",
+ parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
+ default=False, help="Print interesting directories")
+ parser.add_option("-q", "--quiet", dest="quiet", action="store_true",
+ default=False, help="Suppresses progress output")
+ parser.add_option("-d", "--version-dir", dest="version_dir",
default="/afs/athena.mit.edu/contrib/scripts/sec-tools/store/versions",
help="Location of parallel-find output")
- options, applications = parser.parse_args()
- if not applications:
- # This is hard-coded: it might be better to have a central
- # list of these somewhere and read it out here
- applications = ["mediawiki", "wordpress", "joomla", "e107", "gallery2",
- "phpBB", "advancedbook", "phpical", "trac", "turbogears", "django"]
- appHash = dict([(n,ApplicationStatistic(n)) for n in applications ])
+ parser.add_option("--count-exists", dest="count_exists",
+ default=False, help="Count deployments that contain a file")
+ # There should be machine friendly output
+ options, show_applications = parser.parse_args()
+ if not show_applications: show_applications = applications.keys()
+ show_applications = frozenset(show_applications)
vd = options.version_dir
try:
fi = fileinput.input([vd + "/" + f for f in os.listdir(vd)])
except OSError:
print "No permissions; check if AFS is mounted"
raise SystemExit(-1)
+ errors = 0
+ unrecognized = 0
+ processed = 0
+ hanging = False
+ if not options.quiet: print "Processing",
for line in fi:
- print line
+ processed += 1
+ if not options.quiet and processed % 10 == 0:
+ sys.stdout.write(".")
+ sys.stdout.flush()
+ hanging = True
try:
- location, deploydir = line.rstrip().split(":")
- application, version = deploydir.split("/")[-1].split("-")
- except ValueError:
- # old style .scripts-version
- # not going to bother for now
+ deploy = Deployment.parse(line)
+ except DeploymentParseError:
+ errors += 1
+ continue
+ except NoSuchApplication:
+ unrecognized += 1
continue
- if application not in appHash: continue
- appHash[application].count(version)
- for stat in appHash.values():
- print stat
+ if deploy.version.application.name not in show_applications: continue
+ deploy.count()
+ if options.count_exists:
+ r = deploy.count_exists(options.count_exists)
+ if r:
+ if hanging:
+ hanging = False
+ print
+ print "Found " + options.count_exists + " in " + deploy.location
+ if hanging: print
+ print
+ for app in applications.values():
+ if app.name not in show_applications: continue
+ print app
print
+ print "With %d errors and %d unrecognized applications" % (errors, unrecognized)
if __name__ == "__main__":
main()