import logging import tempfile import os import sys from wizard import command, shell def main(argv, baton): options, args = parse_args(argv, baton) # Determine upstream commit history # XXX This is a little sloppy (since it also pulls in remotes and # originals), but if the upstream repo has a clean set of remotes, # that shouldn't be a problem. excludes = map(lambda line: line.partition("\t")[0], shell.eval("git", "ls-remote", "origin").splitlines()) # Determine local commits and their parents local_commits = set() all_parents = set() for line in shell.eval("git", "rev-list", "--parents", "--branches", "--not", *excludes).split("\n"): (commit, _, parent_string) = line.partition(' ') local_commits.add(commit) parents = parent_string.split() all_parents.update(parents) # Determine what commits need mapping needs_map = all_parents - local_commits # Determine the new commits for these maps mapping = {} for hash in needs_map: summary = shell.eval("git", "log", "-1", "--pretty=format:%s", hash) # Find the corresponding commit by grepping for the summary from # "live" tags (which should have been updated to the new history # we are remastering to.) -F == fixed string (no regexing). candidates = shell.eval("git", "rev-list", "-F", "--grep=" + summary, "--tags").splitlines() if len(candidates) != 1: raise "Failed looking for " + hash mapping[hash] = candidates[0] # XXX Make this more robust: given our pre-processing, there is a # very specific set of parent IDs we expect to see (not necessarily # the ones in our mapping so far: those are precisely the IDs that # may change, but some will stay the same.) Consider nops. This # might be hard since git-filter-branch manufactures hashes as it # goes along. # Prepare the parent filter script t = tempfile.NamedTemporaryFile(delete=False) try: t.write("#!/bin/sed -f\n") for search, replace in mapping.items(): t.write("s/%s/%s/g\n" % (search, replace)) t.close() shell.call("chmod", "a+x", t.name) # necessary? logging.info("Sed script %s", t.name) # Do the rewrite shell.call("git", "filter-branch", "--parent-filter", t.name, "--", "--branches", "--not", *excludes, stdout=sys.stdout, stderr=sys.stderr) finally: # Cleanup os.unlink(t.name) def parse_args(argv, baton): usage = """usage: %prog remaster [ARGS] Reconciles divergent commit histories by rewriting all parent links to point to the new commits. This only works if we are able to construct a one-to-one correspondence between the old and new commits. This should be automatically invoked by 'wizard upgrade' if a remastering is necessary. Consider this history: D-----E heads/master / / A--B--C remotes/origin/master Suppose on a fetch, we discover that origin/master has been rebased, and replaced with 'old-master': D-----E heads/master / / A--B--C remotes/origin/old-master A'-B'-C' remotes/origin/master We would like to construct a new tree as follows: D'----E' heads/master / / A'-B'-C' remotes/origin/master Where D/D' and E/E' have identical trees, just different parent commit pointers. This is what 'wizard remaster' does. In order to do this, we need to know two things: (1) which commits in the old history were not provided by the user (the ones to rewrite are 'git log master ^origin/master', in the old history before the force update of branch locations), and (2) what the correspondence between the old commits and the new commits are. (1) is determined by looking at all references in the remote repository. (2) is determined by comparing commit messages; a user can also manually add extra mappings if this heuristic fails (not implemented yet). """ parser = command.WizardOptionParser(usage) parser.add_option("-f", "--force", dest="force", action="store_true", default=False, help="Force overwriting.") options, args = parser.parse_all(argv) if len(args) > 0: parser.error("too many arguments") return options, args