]> scripts.mit.edu Git - wizard.git/blob - bin/install-statistics
Object-orient the code, and add --count-exists functionality.
[wizard.git] / bin / install-statistics
1 #!/usr/bin/env python
2
3 """
4 This script generates basic statistics about our autoinstalls.
5 """
6
7 import os
8 import optparse
9 import fileinput
10 import math
11 import sys
12 from distutils.version import LooseVersion as Version
13
14 class NoSuchApplication(Exception):
15     pass
16
17 class DeploymentParseError(Exception):
18     pass
19
20 class Deployment(object):
21     def __init__(self, location, version):
22         self.location = location
23         self.version = version
24     @staticmethod
25     def parse(line):
26         """Parses a line from the results of parallel-find.pl.
27         This will work out of the box with fileinput"""
28         try:
29             location, deploydir = line.rstrip().split(":")
30         except ValueError:
31             raise DeploymentParseError
32         name = deploydir.split("/")[-1]
33         if name.find("-") != -1:
34             app, version = name.split("-")
35         elif name == "deploy":
36             # Assume that it's django, since those were botched
37             app = "django"
38             version = "0.1-scripts"
39         else:
40             raise DeploymentParseError
41         try:
42             return Deployment(location, applications[app].getVersion(version))
43         except KeyError:
44             raise NoSuchApplication
45     def count(self):
46         """Simple method which registers the deployment as a +1 on the
47         appropriate version. No further inspection is done."""
48         self.version.count(self)
49         return True
50     def count_exists(self, file):
51         """Checks if the codebase has a certain file/directory in it."""
52         if os.path.exists(self.location + "/" + file):
53             self.version.count_exists(self, file)
54             return True
55         return False
56
57 class Application(object):
58     HISTOGRAM_WIDTH = 30
59     def __init__(self, name):
60         self.name = name
61         self.versions = {}
62         # Some cache variables for fast access of calculated data
63         self._total = 0
64         self._max   = 0
65         self._c_exists = {}
66     def getVersion(self, version):
67         if version not in self.versions:
68             self.versions[version] = ApplicationVersion(Version(version), self)
69         return self.versions[version]
70     def _graph(self, v):
71         return '+' * int(math.ceil(float(v)/self._max * self.HISTOGRAM_WIDTH))
72     def __str__(self):
73         if not self.versions: return "%-11s   no installs" % self.name
74         ret = \
75             ["%-16s %3d installs" % (self.name, self._total)] + \
76             [str(v) for v in sorted(self.versions.values())]
77         for f,c in self._c_exists.items():
78             ret.append("%d users have %s" % (c,f))
79         return "\n".join(ret)
80
81 class ApplicationVersion(object):
82     def __init__(self, version, application):
83         self.version = version
84         self.application = application
85         self.c = 0
86         self.c_exists = {}
87     def __cmp__(x, y):
88         return cmp(x.version, y.version)
89     def count(self, deployment):
90         self.c += 1
91         self.application._total += 1
92         if self.c > self.application._max:
93             self.application._max = self.c
94     def count_exists(self, deployment, n):
95         if n in self.c_exists: self.c_exists[n] += 1
96         else: self.c_exists[n] = 1
97         if n in self.application._c_exists: self.application._c_exists[n] += 1
98         else: self.application._c_exists[n] = 1
99     def __str__(self):
100         return "    %-12s %3d  %s" \
101             % (self.version, self.c, self.application._graph(self.c))
102
103 application_list = [
104     "mediawiki", "wordpress", "joomla", "e107", "gallery2",
105     "phpBB", "advancedbook", "phpical", "trac", "turbogears", "django",
106     # these are technically deprecated
107     "advancedpoll", "gallery",
108 ]
109
110 """Hash table for looking up string application name to instance"""
111 applications = dict([(n,Application(n)) for n in application_list ])
112
113 def main():
114     usage = """usage: %prog [options] [application]
115
116 Scans all of the collected data from parallel-find.pl, and
117 determines version histograms for our applications.  You may
118 optionally pass application parameters to filter the installs."""
119     parser = optparse.OptionParser(usage)
120     parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
121             default=False, help="Print interesting directories")
122     parser.add_option("-q", "--quiet", dest="quiet", action="store_true",
123             default=False, help="Suppresses progress output")
124     parser.add_option("-d", "--version-dir", dest="version_dir",
125             default="/afs/athena.mit.edu/contrib/scripts/sec-tools/store/versions",
126             help="Location of parallel-find output")
127     parser.add_option("--count-exists", dest="count_exists",
128             default=False, help="Count deployments that contain a file")
129     # There should be machine friendly output
130     options, show_applications = parser.parse_args()
131     if not show_applications: show_applications = applications.keys()
132     show_applications = frozenset(show_applications)
133     vd = options.version_dir
134     try:
135         fi = fileinput.input([vd + "/" + f for f in os.listdir(vd)])
136     except OSError:
137         print "No permissions; check if AFS is mounted"
138         raise SystemExit(-1)
139     errors = 0
140     unrecognized = 0
141     processed = 0
142     hanging = False
143     if not options.quiet: print "Processing",
144     for line in fi:
145         processed += 1
146         if not options.quiet and processed % 10 == 0:
147             sys.stdout.write(".")
148             sys.stdout.flush()
149             hanging = True
150         try:
151             deploy = Deployment.parse(line)
152         except DeploymentParseError:
153             errors += 1
154             continue
155         except NoSuchApplication:
156             unrecognized += 1
157             continue
158         if deploy.version.application.name not in show_applications: continue
159         deploy.count()
160         if options.count_exists:
161             r = deploy.count_exists(options.count_exists)
162             if r:
163                 if hanging:
164                     hanging = False
165                     print
166                 print "Found " + options.count_exists + " in " + deploy.location
167     if hanging: print
168     print
169     for app in applications.values():
170         if app.name not in show_applications: continue
171         print app
172         print
173     print "With %d errors and %d unrecognized applications" % (errors, unrecognized)
174
175 if __name__ == "__main__":
176     main()
177