From 88d41c5ef7b2775f876864607b6867df9b47ee3a Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sun, 17 Jul 2011 01:03:08 -0400 Subject: [PATCH] Initial commit of 'wizard remaster'. Signed-off-by: Edward Z. Yang --- wizard/command/remaster.py | 110 +++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 wizard/command/remaster.py diff --git a/wizard/command/remaster.py b/wizard/command/remaster.py new file mode 100644 index 0000000..e601500 --- /dev/null +++ b/wizard/command/remaster.py @@ -0,0 +1,110 @@ +import logging +import tempfile +import os +import sys + +from wizard import command, shell + +def main(argv, baton): + options, args = parse_args(argv, baton) + # Determine upstream commit history + # XXX This is a little sloppy (since it also pulls in remotes and + # originals), but if the upstream repo has a clean set of remotes, + # that shouldn't be a problem. + excludes = map(lambda line: line.partition("\t")[0], + shell.eval("git", "ls-remote", "origin").splitlines()) + # Determine local commits and their parents + local_commits = set() + all_parents = set() + for line in shell.eval("git", "rev-list", "--parents", "--branches", "--not", *excludes).split("\n"): + (commit, _, parent_string) = line.partition(' ') + local_commits.add(commit) + parents = parent_string.split() + all_parents.update(parents) + # Determine what commits need mapping + needs_map = all_parents - local_commits + # Determine the new commits for these maps + mapping = {} + for hash in needs_map: + summary = shell.eval("git", "log", "-1", "--pretty=format:%s", hash) + # Find the corresponding commit by grepping for the summary from + # "live" tags (which should have been updated to the new history + # we are remastering to.) -F == fixed string (no regexing). + candidates = shell.eval("git", "rev-list", "-F", "--grep=" + summary, "--tags").splitlines() + if len(candidates) != 1: + raise "Failed looking for " + hash + mapping[hash] = candidates[0] + + # XXX Make this more robust: given our pre-processing, there is a + # very specific set of parent IDs we expect to see (not necessarily + # the ones in our mapping so far: those are precisely the IDs that + # may change, but some will stay the same.) Consider nops. This + # might be hard since git-filter-branch manufactures hashes as it + # goes along. + + # Prepare the parent filter script + t = tempfile.NamedTemporaryFile(delete=False) + try: + t.write("#!/bin/sed -f\n") + for search, replace in mapping.items(): + t.write("s/%s/%s/g\n" % (search, replace)) + t.close() + shell.call("chmod", "a+x", t.name) # necessary? + logging.info("Sed script %s", t.name) + # Do the rewrite + shell.call("git", "filter-branch", "--parent-filter", t.name, "--", + "--branches", "--not", *excludes, + stdout=sys.stdout, stderr=sys.stderr) + finally: + # Cleanup + os.unlink(t.name) + +def parse_args(argv, baton): + usage = """usage: %prog remaster [ARGS] + +Reconciles divergent commit histories by rewriting all parent links to +point to the new commits. This only works if we are able to construct a +one-to-one correspondence between the old and new commits. This should +be automatically invoked by 'wizard upgrade' if a remastering is +necessary. + +Consider this history: + + D-----E heads/master + / / + A--B--C remotes/origin/master + +Suppose on a fetch, we discover that origin/master has been rebased, and +replaced with 'old-master': + + D-----E heads/master + / / + A--B--C remotes/origin/old-master + + A'-B'-C' remotes/origin/master + +We would like to construct a new tree as follows: + + D'----E' heads/master + / / + A'-B'-C' remotes/origin/master + +Where D/D' and E/E' have identical trees, just different parent commit +pointers. This is what 'wizard remaster' does. + +In order to do this, we need to know two things: (1) which commits in +the old history were not provided by the user (the ones to rewrite are +'git log master ^origin/master', in the old history before the force +update of branch locations), and (2) what the correspondence between +the old commits and the new commits are. (1) is determined by looking +at all references in the remote repository. (2) is determined by +comparing commit messages; a user can also manually add extra mappings +if this heuristic fails (not implemented yet). +""" + parser = command.WizardOptionParser(usage) + parser.add_option("-f", "--force", dest="force", action="store_true", + default=False, help="Force overwriting.") + options, args = parser.parse_all(argv) + if len(args) > 0: + parser.error("too many arguments") + return options, args -- 2.45.0