X-Git-Url: https://scripts.mit.edu/gitweb/wizard.git/blobdiff_plain/87328ee0cd3b89539e58fb90e661f586e79dc279..1157f400fa6912757953582cda2967ec07c82614:/wizard/command/mass_upgrade.py diff --git a/wizard/command/mass_upgrade.py b/wizard/command/mass_upgrade.py index 649ac0f..f9aa8bb 100644 --- a/wizard/command/mass_upgrade.py +++ b/wizard/command/mass_upgrade.py @@ -6,28 +6,43 @@ import sys import shutil import errno -from wizard import deploy, scripts, shell, sset, command +from wizard import deploy, report, shell, sset, command from wizard.command import upgrade def main(argv, baton): options, args = parse_args(argv, baton) app = args[0] - base_args = calculate_base_args(options) - base_args.append("--non-interactive") sh = shell.ParallelShell.make(options.no_parallelize, options.max_processes) command.create_logdir(options.log_dir) - seen = sset.make(options.seen) - is_root = not os.getuid() - report = command.open_reports(options.log_dir, ('lookup', 'warnings', 'errors', 'success'), - options.redo, ('not_migrated', 'merge', 'verify', 'backup_failure', 'blacklisted')) + # setup reports + human_status = { + 'up_to_date': 'are now up-to-date', + 'not_migrated': 'were not migrated', + 'merge': 'had merge failures', + 'verify': 'had web verification errors', + 'backup_failure': 'had a backup failure', + 'blacklisted': 'were blacklisted', + 'db': 'had database errors', + 'quota': 'had too low quota', + 'permissions': 'had insufficient permissions for upgrade' + } + if options.remerge: + os.unlink(os.path.join(options.log_dir, 'merge.txt')) + status = (report.make_fresh if options.redo else report.make)(options.log_dir, *human_status.keys()) + runtime = report.make_fresh(options.log_dir, 'success', 'lookup', 'warnings', 'errors') + # setup rr_cache rr_cache = os.path.join(options.log_dir, "rr-cache") - base_args.append("--rr-cache=" + rr_cache) try: os.mkdir(rr_cache) except OSError as e: if e.errno != errno.EEXIST: raise - # loop stuff + os.chmod(rr_cache, 0o777) + # setup base arguments + base_args = calculate_base_args(options) + base_args.append("--non-interactive") + base_args.append("--rr-cache=" + rr_cache) + # loop variables errors = {} i = 0 deploys = deploy.parse_install_lines(app, options.versions_path, user=options.user) @@ -39,20 +54,22 @@ def main(argv, baton): os.chmod("/dev/shm/wizard", 0o777) try: for i, d in enumerate(requested_deploys, 1): - report.lookup.write("%04d %s\n" % (i, d.location)) # pylint: disable-msg=E1101 - report.flush() - # check if we want to punt due to --limit - if d.location in seen: - continue - if i in report.skip: - continue - if is_root and not command.security_check_homedir(d.location): + runtime.write("lookup", i, d.location) + if not os.getuid() and not command.security_check_homedir(d.location): continue + if not options.redo: + found = False + for r in status.reports.values(): + if i in r.values: + found = True + break + if found: + continue # XXX: we may be able to punt based on detected versions from d, which # would be faster than spinning up a new process. On the other hand, - # `seen` makes this mostly not a problem - logging.info("[%04d] Processing %s" % (i, d.location)) - child_args = list(base_args) + # our aggressive caching strategies using reports make this mostly not a problem + logging.info("[%04d] Processing %s", i, d.location) + child_args = list(base_args) # copy # calculate the log file, if a log dir was specified if options.log_dir: log_file = command.calculate_log_name(options.log_dir, i) @@ -62,53 +79,48 @@ def main(argv, baton): # we need to make another stack frame so that d and i get specific bindings. def on_success(stdout, stderr): if stderr: - report.lookup.write("[%04d] %s\n" % (i, d.location)) # pylint: disable-msg=E1101 - logging.warning("[%04d] Warnings at [%s]:\n%s" % (i, d.location, stderr)) - seen.add(d.location) - report.success.write("%s\n" % d.location) # pylint: disable-msg=E1101 - report.successes += 1 - report.flush() + runtime.write("warnings", i, d.location) + logging.warning("[%04d] Warnings at [%s]:\n%s", i, d.location, stderr) + runtime.write("success", i, d.location) + status.write("up_to_date", i, d.location) def on_error(e): if e.name == "AlreadyUpgraded": - seen.add(d.location) logging.info("[%04d] Skipped already upgraded %s" % (i, d.location)) + status.write("up_to_date", i, d.location) elif e.name == "MergeFailed": - seen.add(d.location) conflicts, _, tmpdir = e.stdout.rstrip().partition(" ") - logging.warning("[%04d] Conflicts in %d files: resolve at [%s], source at [%s]" % (i, int(conflicts), tmpdir, d.location)) - report.merge.write("[%04d] %s %d %s\n" % (i, tmpdir, int(conflicts), d.location)) # pylint: disable-msg=E1101 - report.fails['merge'] += 1 + logging.warning("[%04d] Conflicts in %s files: resolve at [%s], source at [%s]", + i, conflicts, tmpdir, d.location) + status.write("merge", i, tmpdir, conflicts, d.location) elif e.name == "BlacklistedError": - reason = e.stdout.rstrip() - reason = reason.replace("\n", " ") - shortmsg = "[%04d] %s %s\n" % (i, d.location, reason) - report.blacklisted.write(shortmsg) # pylint: disable-msg=E1101 - report.fails['blacklisted'] += 1 - logging.warning("[%04d] Blacklisted because of '%s' at %s" % (i, reason, d.location)) + reason = e.stdout.rstrip().replace("\n", " ") + logging.warning("[%04d] Blacklisted because of '%s' at %s", i, reason, d.location) + status.write("blacklisted", i, d.location, reason) + elif e.name == "WebVerificationError": + url = d.url.geturl() + # This should actually be a warning, but it's a really common error + logging.info("[%04d] Could not verify application at %s", i, url) + status.write("verify", i, url) + elif e.name == "DatabaseVerificationError": + logging.info("[%04d] Could not verify database ast %s", i, d.location) + status.write("db", i, d.location) + elif e.name == "NotMigratedError": + logging.info("[%04d] Application not migrated at %s", i, d.location) + status.write("not_migrated", i, d.location) + elif e.name == "BackupFailure": + logging.info("[%04d] Failed backups at %s", i, d.location) + status.write("backup_failure", i, d.location) + elif e.name == "QuotaTooLow": + logging.info("[%04d] Quota too low at %s", i, d.location) + status.write("quota", i, d.location) + elif e.name == "PermissionsError": + logging.info("[%04d] Insufficient permissions to upgrade %s", i, d.location) + status.write("permissions", i, d.location) else: - name = e.name - if name == "WebVerificationError": - url = d.url.geturl() - # This should actually be a warning, but - # it's a really common error - logging.info("[%04d] Could not verify application at %s" % (i, url)) - report.verify.write("[%04d] %s\n" % (i, url)) # pylint: disable-msg=E1101 - report.fails['verify'] += 1 - elif e.name == "NotMigratedError": - logging.info("[%04d] Application not migrated at %s" % (i, d.location)) - report.not_migrated.write("[%04d] %s\n" % (i, d.location)) # pylint: disable-msg=E1101 - report.fails['not_migrated'] += 1 - else: - if name not in errors: errors[name] = [] - errors[name].append(d) - msg = "[%04d] %s in %s" % (i, name, d.location) - logging.error(msg) - report.errors.write(msg + "\n") # pylint: disable-msg=E1101 - shortmsg = "[%04d] %s\n" % (i, d.location) - if name == "BackupFailure": - report.backup_failure.write(shortmsg) # pylint: disable-msg=E1101 - report.fails['backup_failure'] += 1 - report.flush() + errors.setdefault(e.name, []).append(d) + logging.error("[%04d] %s in %s", i, e.name, d.location) + runtime.write("errors", i, e.name, d.location) + # lack of status write means that we'll always retry return (on_success, on_error) on_success, on_error = make_on_pair(d, i) sh.call("wizard", "upgrade", d.location, *child_args, @@ -117,29 +129,33 @@ def main(argv, baton): finally: sys.stderr.write("\n") for name, deploys in errors.items(): - logging.warning("%s from %d installs" % (name, len(deploys))) + logging.warning("%s from %d installs", name, len(deploys)) print - def printPercent(description, number, total): - print "%d out of %d installs (%.1f%%) had %s" % (number, total, float(number)/total*100, description) - if report.fails['merge']: - printPercent("merge conflicts", report.fails['merge'], i) - if report.fails['verify']: - printPercent("web verification failure", report.fails['verify'], i) - printPercent("successful upgrades", report.successes, i) + total = sum(len(x.values) for x in status.reports.values()) + def printPercent(description, number): + print "% 4d out of % 4d installs (% 5.1f%%) %s" % (number, total, float(number)/total*100, description) + error_count = sum(len(e) for e in errors.values()) + if error_count: + printPercent("had unusual errors", error_count) + for name, description in human_status.items(): + values = status.reports[name].values + if values: + printPercent(description, len(values)) + sys.stderr.write("\n") + print "%d installs were upgraded this run" % len(runtime.reports["success"].values) def parse_args(argv, baton): usage = """usage: %prog mass-upgrade [ARGS] APPLICATION -Mass upgrades an application to the latest scripts version. -Essentially equivalent to running '%prog upgrade' on all -autoinstalls for a particular application found by parallel-find, -but with advanced reporting. +Mass upgrades an application to the latest version. Essentially +equivalent to running '%prog upgrade' on all autoinstalls for a +particular application found by parallel-find, but with advanced +reporting. This command is intended to be run as root on a server with the scripts AFS patch.""" parser = command.WizardOptionParser(usage) baton.push(parser, "log_dir") - baton.push(parser, "seen") baton.push(parser, "no_parallelize") baton.push(parser, "dry_run") baton.push(parser, "max_processes") @@ -150,7 +166,9 @@ the scripts AFS patch.""" parser.add_option("--force", dest="force", action="store_true", default=False, help="Force running upgrade even if it's already at latest version.") parser.add_option("--redo", dest="redo", action="store_true", - default=False, help="Redo failed upgrades; use this if you updated Wizard's code.") + default=False, help="Redo all upgrades; use this if you updated Wizard's code.") + parser.add_option("--remerge", dest="remerge", action="store_true", + default=False, help="Redo all merges.") options, args, = parser.parse_all(argv) if len(args) > 1: parser.error("too many arguments") @@ -159,5 +177,7 @@ the scripts AFS patch.""" return options, args def calculate_base_args(options): - return command.make_base_args(options, dry_run="--dry-run", srv_path="--srv-path", force="--force") + # Do not pass --debug to subprocesses, since it will trigger the OS + # kernel buffer issue + return command.make_base_args(options, dry_run="--dry-run", srv_path="--srv-path", force="--force", debug=None)