]> scripts.mit.edu Git - wizard.git/commitdiff
Rewrite merge functionality into its own module.
authorEdward Z. Yang <ezyang@mit.edu>
Mon, 11 Jan 2010 00:15:18 +0000 (19:15 -0500)
committerEdward Z. Yang <ezyang@mit.edu>
Mon, 11 Jan 2010 00:15:18 +0000 (19:15 -0500)
Previously, Wizard had a gigantic merge routine inside of
the upgrade command, and interweaved automated resolution
before and after the merge took place.

This commit reorganizes the merge functionality into
the wizard.merge module, and also pushes as much pre-merge
resolution to before merge, so that we can generate more
meaningful rerere post-images.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
TODO
doc/index.rst
doc/module/wizard.merge.rst [new file with mode: 0644]
wizard/app/__init__.py
wizard/command/upgrade.py
wizard/deploy.py
wizard/merge.py [new file with mode: 0644]
wizard/tests/merge_test.py [new file with mode: 0644]

diff --git a/TODO b/TODO
index 884d042f3e0294a9e0735b366d7624fbd92e2fff..8fec3bb64fb917c7c8c96527aaea271726bcf493 100644 (file)
--- a/TODO
+++ b/TODO
@@ -44,15 +44,7 @@ TODO NOW:
           A very small sqlite database might be a good idea here, although
           the type of information we're interested in a somewhat unnatural
           query.  Alternatively, we just have a very simple text file.
-- Create 'wizard merge' command
-    - Uses application specific hinting to prematurely resolve
-      conflicts.
-    - Newline resolution gets done prior-merge (presently is done
-      post merge).
-    - Classes of disappeared files made ok.
-    - Make this general utility(?)
-- Need to make script to tell us about all installs that we don't support
-  versions of (i.e. this mismatches)
+- Make it possible to say certain classes of missing files are ok
 
 - Wizard needs a correct arch/ setup
 - The wizard command, when not on scripts, should automatically SSH to
index 8006b56562080802caf71b68de6c3960f808d928..a2da3b5ad0105512788ac3accdf74ec2863d6b9d 100644 (file)
@@ -73,6 +73,7 @@ Modules
     module/wizard.cache
     module/wizard.deploy
     module/wizard.install
+    module/wizard.merge
     module/wizard.shell
     module/wizard.util
     module/wizard.sset
diff --git a/doc/module/wizard.merge.rst b/doc/module/wizard.merge.rst
new file mode 100644 (file)
index 0000000..383e708
--- /dev/null
@@ -0,0 +1,22 @@
+:mod:`wizard.merge`
+===================
+
+.. automodule:: wizard.merge
+
+Functions
+---------
+.. autofunction:: merge
+
+Utility functions
+-----------------
+.. autofunction:: git_commit_tree
+.. autofunction:: git_newline_style
+.. autofunction:: git_diff_text
+.. autofunction:: get_newline
+.. autofunction:: convert_newline
+
+Exceptions
+----------
+.. autoexception:: Error
+.. autoexception:: MergeError
+    :show-inheritance:
index 5f318dcde9a86a8884bc7f79bfeef088cfcedbe2..391f8d8956aeb0f1192d04e7fa07cc2e51c0aeda 100644 (file)
@@ -244,55 +244,6 @@ class Application(object):
         for status in shell.eval("git", "ls-files", "--unmerged").splitlines():
             files.add(status.split()[-1])
         for file in files:
-            # check for newline mismatch
-            # HACK: using git diff to tell if files are binary or not
-            if not len(shell.eval("git", "diff", file).splitlines()) == 1 and util.mixed_newlines(file):
-                # this code only works on Unix
-                def get_newline(filename):
-                    f = open(filename, "U")
-                    # for some reason I need two
-                    s = f.readline()
-                    if s != "" and f.newlines is None:
-                        f.readline()
-                    if not isinstance(f.newlines, str):
-                        raise Exception("Assert: expected newlines to be string, instead was %s in %s" % (repr(f.newlines), file))
-                    return f.newlines
-                def create_reference(id):
-                    f = tempfile.NamedTemporaryFile(prefix="wizardResolve", delete=False)
-                    shell.call("git", "cat-file", "blob", ":%d:%s" % (id, file), stdout=f)
-                    f.close()
-                    return get_newline(f.name), f.name
-                def convert(filename, dest_nl):
-                    contents = open(filename, "U").read().replace("\n", dest_nl)
-                    open(filename, "wb").write(contents)
-                logging.info("Mixed newlines detected in %s", file)
-                common_nl, common_file = create_reference(1)
-                our_nl,    our_file    = create_reference(2)
-                their_nl,  their_file  = create_reference(3)
-                remerge = False
-                if common_nl != their_nl:
-                    # upstream can't keep their newlines straight
-                    logging.info("Converting common file (1) from %s to %s newlines", repr(common_nl), repr(their_nl))
-                    convert(common_file, their_nl)
-                    remerge = True
-                if our_nl != their_nl:
-                    # common case
-                    logging.info("Converting our file (2) from %s to %s newlines", repr(our_nl), repr(their_nl))
-                    convert(our_file, their_nl)
-                    remerge = True
-                if remerge:
-                    logging.info("Remerging %s", file)
-                    with open(file, "wb") as f:
-                        try:
-                            shell.call("git", "merge-file", "--stdout", our_file, common_file, their_file, stdout=f)
-                            logging.info("New merge was clean")
-                            shell.call("git", "add", file)
-                            continue
-                        except shell.CallError:
-                            pass
-                    logging.info("Merge was still unclean")
-                else:
-                    logging.warning("Mixed newlines detected in %s, but no remerge possible", file)
             # manual resolutions
             if file in self.resolutions:
                 contents = open(file, "r").read()
index 0672700771397b8e2cf89c52fd62be6922a32a19..388f00aeae8bba988483e4f368fd7f631d9d8fab 100644 (file)
@@ -8,7 +8,7 @@ import itertools
 import time
 import errno
 
-from wizard import app, command, deploy, scripts, shell, util
+from wizard import app, command, deploy, merge, scripts, shell, util
 
 kib_buffer = 1024 * 30 # 30 MiB we will always leave available
 errno_blacklisted = 64
@@ -140,6 +140,9 @@ class Upgrade(object):
             raise LocalChangesError()
         except shell.CallError:
             pass
+        # Working copy is not anchored anywhere useful for git describe,
+        # so we need to give it a hint.
+        self.wc.setAppVersion(self.prod.app_version)
 
     def preflight(self):
         """
@@ -233,103 +236,70 @@ class Upgrade(object):
         if self.options.log_file:
             open(".git/WIZARD_LOG_FILE", "w").write(self.options.log_file)
     def mergePerform(self):
-        def make_virtual_commit(rev, parents = []):
-            """
-            Takes a revision and generates a "virtual" commit with
-            user-specific variables instantiated for a smooth, easy
-            merge.
-
-            .. warning::
-
-                Changes the state of the working copy.
-            """
-            shell.call("git", "checkout", "-q", rev, "--")
-            self.wc.parametrize(self.prod)
-            for file in self.wc.application.parametrized_files:
-                try:
-                    shell.call("git", "add", "--", file)
-                except shell.CallError:
-                    pass
-            virtual_tree = shell.eval("git", "write-tree", log=True)
-            parent_args = itertools.chain(*(["-p", p] for p in parents))
-            virtual_commit = shell.eval("git", "commit-tree", virtual_tree,
-                    *parent_args, input="", log=True)
-            shell.call("git", "reset", "--hard")
-            return virtual_commit
-        user_tree = shell.eval("git", "rev-parse", "HEAD^{tree}")
-        base_virtual_commit = make_virtual_commit(self.wc.app_version.scripts_tag)
-        next_virtual_commit = make_virtual_commit(self.version, [base_virtual_commit])
-        user_virtual_commit = shell.eval("git", "commit-tree", user_tree,
-                "-p", base_virtual_commit, input="", log=True)
-        shell.call("git", "checkout", user_virtual_commit, "--")
-        self.wc.prepareMerge()
-        try:
-            shell.call("git", "commit", "--amend", "-a", "-m", "amendment")
-        except shell.CallError as e:
-            pass
+        def prepare_config():
+            self.wc.prepareConfig()
+            shell.call("git", "add", ".")
+        def resolve_conflicts():
+            return self.wc.resolveConflicts()
         shell.call("git", "config", "merge.conflictstyle", "diff3")
         shell.call("git", "config", "rerere.enabled", "true")
         try:
-            shell.call("git", "merge", next_virtual_commit)
-        except shell.CallError as e:
-            logging.info("Merge failed with these messages:\n\n" + e.stderr)
-            # Run the application's specific merge resolution algorithms
-            # and see if we can salvage it
-            if self.wc.resolveConflicts():
-                logging.info("Resolved conflicts with application specific knowledge")
-                shell.call("git", "commit", "-a", "-m", "merge")
-                return
-            files = set()
-            for line in shell.eval("git", "ls-files", "--unmerged").splitlines():
-                files.add(line.split(None, 3)[-1])
-            conflicts = len(files)
-            # XXX: this is kind of fiddly; note that temp_dir still points at the OLD
-            # location after this code.
-            self.temp_wc_dir = mv_shm_to_tmp(os.getcwd(), self.use_shm)
-            self.wc.location = self.temp_wc_dir
-            os.chdir(self.temp_wc_dir)
-            open(os.path.join(self.prod.location, ".scripts/pending"), "w").write(self.temp_wc_dir)
-            if self.options.non_interactive:
-                print "%d %s" % (conflicts, self.temp_wc_dir)
-                raise MergeFailed
-            else:
-                user_shell = os.getenv("SHELL")
-                if not user_shell: user_shell = "/bin/bash"
-                # XXX: scripts specific hack, since mbash doesn't respect the current working directory
-                # When the revolution comes (i.e. $ATHENA_HOMEDIR/Scripts is your Scripts home
-                # directory) this isn't strictly necessary, but we'll probably need to support
-                # web_scripts directories ad infinitum.
-                if user_shell == "/usr/local/bin/mbash": user_shell = "/bin/bash"
-                while 1:
-                    print
-                    print "ERROR: The merge failed with %d conflicts in these files:" % conflicts
-                    print
-                    for file in sorted(files):
-                        print "  * %s" % file
+            merge.merge(self.wc.app_version.scripts_tag, self.version,
+                        prepare_config, resolve_conflicts)
+        except merge.MergeError:
+            self.mergeFail()
+    def mergeFail(self):
+        files = set()
+        for line in shell.eval("git", "ls-files", "--unmerged").splitlines():
+            files.add(line.split(None, 3)[-1])
+        conflicts = len(files)
+        # XXX: this is kind of fiddly; note that temp_dir still points at the OLD
+        # location after this code.
+        self.temp_wc_dir = mv_shm_to_tmp(os.getcwd(), self.use_shm)
+        self.wc.location = self.temp_wc_dir
+        os.chdir(self.temp_wc_dir)
+        open(os.path.join(self.prod.location, ".scripts/pending"), "w").write(self.temp_wc_dir)
+        if self.options.non_interactive:
+            print "%d %s" % (conflicts, self.temp_wc_dir)
+            raise MergeFailed
+        else:
+            user_shell = os.getenv("SHELL")
+            if not user_shell: user_shell = "/bin/bash"
+            # XXX: scripts specific hack, since mbash doesn't respect the current working directory
+            # When the revolution comes (i.e. $ATHENA_HOMEDIR/Scripts is your Scripts home
+            # directory) this isn't strictly necessary, but we'll probably need to support
+            # web_scripts directories ad infinitum.
+            if user_shell == "/usr/local/bin/mbash": user_shell = "/bin/bash"
+            while 1:
+                print
+                print "ERROR: The merge failed with %d conflicts in these files:" % conflicts
+                print
+                for file in sorted(files):
+                    print "  * %s" % file
+                print
+                print "Please resolve these conflicts (edit and then `git add`), and"
+                print "then type 'exit'.  You will now be dropped into a shell whose working"
+                print "directory is %s" % self.temp_wc_dir
+                try:
+                    shell.call(user_shell, "-i", interactive=True)
+                except shell.CallError as e:
+                    logging.warning("Shell returned non-zero exit code %d" % e.code)
+                if shell.eval("git", "ls-files", "--unmerged").strip():
                     print
-                    print "Please resolve these conflicts (edit and then `git add`), and"
-                    print "then type 'exit'.  You will now be dropped into a shell whose working"
-                    print "directory is %s" % self.temp_wc_dir
-                    try:
-                        shell.call(user_shell, "-i", interactive=True)
-                    except shell.CallError as e:
-                        logging.warning("Shell returned non-zero exit code %d" % e.code)
-                    if shell.eval("git", "ls-files", "--unmerged").strip():
+                    print "WARNING: There are still unmerged files."
+                    out = raw_input("Continue editing? [y/N]: ")
+                    if out == "y" or out == "Y":
+                        continue
+                    else:
+                        print "Aborting.  The conflicted working copy can be found at:"
                         print
-                        print "WARNING: There are still unmerged files."
-                        out = raw_input("Continue editing? [y/N]: ")
-                        if out == "y" or out == "Y":
-                            continue
-                        else:
-                            print "Aborting.  The conflicted working copy can be found at:"
-                            print
-                            print "    %s" % self.temp_wc_dir
-                            print
-                            print "and you can resume the upgrade process by running in that directory:"
-                            print
-                            print "    wizard upgrade --continue"
-                            sys.exit(1)
-                    break
+                        print "    %s" % self.temp_wc_dir
+                        print
+                        print "and you can resume the upgrade process by running in that directory:"
+                        print
+                        print "    wizard upgrade --continue"
+                        sys.exit(1)
+                break
 
     def postflight(self):
         with util.ChangeDirectory(self.temp_wc_dir):
@@ -339,8 +309,10 @@ class Upgrade(object):
                 pass
             else:
                 shell.call("git", "commit", "--allow-empty", "-am", "throw-away commit")
+            self.wc.parametrize(self.prod)
+            shell.call("git", "add", ".")
             message = self.postflightCommitMessage()
-            new_tree = shell.eval("git", "rev-parse", "HEAD^{tree}")
+            new_tree = shell.eval("git", "write-tree")
             final_commit = shell.eval("git", "commit-tree", new_tree,
                     "-p", self.user_commit, "-p", self.next_commit, input=message, log=True)
             # a master branch may not necessarily exist if the user
index 613e095a22d3d658a5a727c94a834b39bd12647b..500f89d60b1a511cd8b7a76c2c9ded1def4c9804 100644 (file)
@@ -415,6 +415,13 @@ class WorkingCopy(Deployment):
     modifications to without fear of interfering with a production
     deployment.  More operations are permitted on these copies.
     """
+    def setAppVersion(self, app_version):
+        """
+        Manually resets the application version; useful if the working
+        copy is off in space (i.e. not anchored to something we can
+        git describe off of.)
+        """
+        self._app_version = app_version
     @chdir_to_location
     def parametrize(self, deployment):
         """
diff --git a/wizard/merge.py b/wizard/merge.py
new file mode 100644 (file)
index 0000000..94cc13c
--- /dev/null
@@ -0,0 +1,203 @@
+"""
+Advanced merge tools for git rerere, sloppy commits and parametrization.
+
+Wizard requires infrastructure for reusing merge resolutions across
+many repositories, due to the number of user installs and highly
+repetitive conflict resolution process.  This environment results
+in a number of unique challenges:
+
+    1. Users are commonly very sloppy with their text editors and
+       will frequently change the line-ending style of their file.
+       Because Git respects newline choice when core.autocrlf is
+       off, a file that flips newline style will result in a full
+       merge conflict.
+
+    2. We version user configuration files, which means that there
+       will always be a set of changes between upstream and ours.
+       Since Git refuses to automatically merge changes that are
+       too close to each other, these frequently result in spurious
+       merge commits.
+
+Furthermore, both of these make it difficult to reuse rerere resolutions
+across installations.  Thus, an advanced Wizard merge has the following
+properties:
+
+    1. Wizard will perform a full scan of all files that were
+       different between common and ours, filter out those that
+       are binary (using as close to the Git heuristic as possible)
+       and then check among common, ours and theirs if the newlines
+       match.  The newline style of theirs always takes precedence.
+
+    2. Wizard will genericize the ours copy so that it matches
+       common and theirs, and reparametrize it once the merge
+       is finished.  Consumers of this function should be careful
+       to appropriately reparametrize if there are conflicts
+       (we can't do it any earlier, because we want clean rerere
+       postimages).
+
+Usage of this functionality is primarily through the :func:`merge` function;
+see that function more usage information.  While the ``git`` and ``newline``
+functions published by this module are public, use of these functions outside
+of this module is discouraged.
+"""
+
+import logging
+import itertools
+import tempfile
+import os
+
+import wizard
+from wizard import shell
+
+def git_commit_tree(tree, *parents):
+    """
+    Convenience wrapper for ``git commit-tree``.  Writes an empty log.
+    """
+    parent_args = itertools.chain(*(["-p", p] for p in parents))
+    commit = shell.eval("git", "commit-tree", tree,
+            *parent_args, input="", log=True)
+    return commit
+
+def git_diff_text(a, b):
+    """
+    Returns a list of files that are text and are different between
+    commits ``a`` and ``b``.
+    """
+    lines = shell.eval("git", "diff", "--numstat", a, b).strip().split("\n")
+    files = []
+    for line in lines:
+        added, deleted, name = line.split()
+        if added != "-" and deleted != "-":
+            files.append(name)
+    return files
+
+def git_newline_style(rev, name):
+    """
+    Returns the newline style for a blob, identified by Git revision
+    ``rev`` and filename ``name``.
+    """
+    f = tempfile.NamedTemporaryFile(prefix="wizardResolve", delete=False)
+    shell.call("git", "cat-file", "blob", "%s:%s" % (rev, name), stdout=f)
+    f.close()
+    nl = get_newline(f.name)
+    os.unlink(f.name)
+    return nl
+
+# only works on Unix
+def get_newline(filename):
+    """
+    Determines the newline style of ``filename``.  This will be a
+    string if only one newline style was find, or a tuple of newline
+    types found.
+    """
+    f = open(filename, 'U')
+    f.read()
+    return f.newlines
+
+def convert_newline(filename, dest_nl):
+    """
+    Replaces the detected newline style of ``filename`` with
+    ``dest_nl`` type newlines.
+    """
+    contents = open(filename, "U").read().replace("\n", dest_nl)
+    open(filename, "wb").write(contents)
+
+def merge(common_id, theirs_id,
+          prepare_config=None,
+          resolve_conflicts=None):
+    """
+    Performs a merge.  Throws a :class:`MergeError` if the merge fails
+    (and leaves the current working directory in a state amenable
+    to manual conflict resolution), or returns a tree id of the successful
+    merge (the directory state is undefined and should not be relied upon).
+    This function is not responsible for actually coming
+    up with the real merge commit, because it can fail.
+
+    If ``prepare_config`` is used, you are expected to reverse the effects
+    of this on whatever the final tree is; otherwise you will lose
+    those changes.
+
+    Arguments:
+
+        * ``common_id``: base commit to calculate merge off of
+        * ``theirs_id``: changes to merge in from commit
+        * ``prepare_config``: function that removes any user-specific
+          values from files.  This function is expected to ``git add``
+          any files it changes.
+        * ``resolve_conflicts``: this function attempts to resolve
+          conflicts automatically.  Returns ``True`` if all conflicts
+          are resolved, and ``False`` otherwise.  It is permitted
+          to resolve some but not all conflicts.
+
+    .. note::
+
+        Wizard has a strange idea of repository topology (due to lack of
+        rebases, see documentation about doctoring retroactive commits),
+        so we require the common and theirs commits, instead of
+        using the normal Git algorithm.
+    """
+    if prepare_config is None:
+        prepare_config = lambda: None
+    if resolve_conflicts is None:
+        resolve_conflicts = lambda: False
+    ours_id = shell.eval("git", "rev-parse", "HEAD")
+    theirs_newline_cache = {}
+    def get_theirs_newline(file):
+        if file not in theirs_newline_cache:
+            nl = git_newline_style(theirs_id, file)
+            if not isinstance(nl, str):
+                # A case of incompetent upstream, unfortunately
+                logging.warning("Canonical version (theirs) of %s has mixed newline style, forced to \\n", file)
+                nl = "\n"
+            theirs_newline_cache[file] = nl
+        return theirs_newline_cache[file]
+    theirs_tree = shell.eval("git", "rev-parse", "%s^{tree}" % theirs_id)
+    # operations on the ours tree
+    for file in git_diff_text(ours_id, theirs_id):
+        theirs_nl = get_theirs_newline(file)
+        ours_nl = get_newline(file) # current checkout is ours_id
+        if theirs_nl != ours_nl:
+            logging.info("Converting our file (3) from %s to %s newlines", repr(ours_nl), repr(theirs_nl))
+            convert_newline(file, theirs_nl)
+            shell.eval("git", "add", file)
+    prepare_config() # for Wizard, this usually genericizes config files
+    ours_tree = shell.eval("git", "write-tree")
+    logging.info("Merge wrote virtual tree for ours: %s", ours_tree)
+    # operations on the common tree
+    shell.call("git", "reset", "--hard", common_id)
+    for file in git_diff_text(common_id, theirs_id):
+        theirs_nl = get_theirs_newline(file)
+        common_nl = get_newline(file) # current checkout is common_id
+        if theirs_nl != common_nl:
+            logging.info("Converting common file (1) from %s to %s newlines", repr(common_nl), repr(theirs_nl))
+            convert_newline(file, theirs_nl)
+            shell.eval("git", "add", file)
+    common_tree = shell.eval("git", "write-tree")
+    logging.info("Merge wrote virtual tree for common: %s", common_tree)
+    # construct merge commit graph
+    common_virtual_id = git_commit_tree(common_tree)
+    ours_virtual_id   = git_commit_tree(ours_tree, common_virtual_id)
+    theirs_virtual_id = git_commit_tree(theirs_tree, common_virtual_id)
+    # perform merge
+    shell.call("git", "reset", "--hard", ours_virtual_id)
+    try:
+        shell.call("git", "merge", theirs_virtual_id)
+    except shell.CallError as e:
+        logging.info("Merge failed with these message:\n\n" + e.stderr)
+        if resolve_conflicts():
+            logging.info("Resolved conflicts automatically")
+            shell.call("git", "commit", "-a", "-m", "merge")
+        else:
+            raise MergeError
+    # post-merge operations
+    result_tree = shell.eval("git", "write-tree")
+    logging.info("Resolution tree is %s", result_tree)
+    return result_tree
+
+class Error(wizard.Error):
+    """Base error class for merge"""
+    pass
+
+class MergeError(Error):
+    """Merge terminated with a conflict, oh no!"""
+    pass
diff --git a/wizard/tests/merge_test.py b/wizard/tests/merge_test.py
new file mode 100644 (file)
index 0000000..099dcd1
--- /dev/null
@@ -0,0 +1,22 @@
+import unittest
+
+from wizard import tests
+from wizard.merge import *
+
+sampleFile = tests.getTestFile("merge_test.sample")
+
+class NewlineTest(unittest.TestCase):
+    def testGetNewlineUnix(self):
+        open(sampleFile, "wb").write("\n\n\n")
+        self.assertEqual(get_newline(sampleFile), "\n")
+        os.unlink(sampleFile)
+    def testGetNewlineMixed(self):
+        open(sampleFile, "wb").write("\n\n\n\n\n\n\r\n")
+        self.assertEqual(get_newline(sampleFile), ("\n", "\r\n"))
+        os.unlink(sampleFile)
+    def testConvertNewline(self):
+        open(sampleFile, "wb").write("\r\n\r\n\n")
+        convert_newline(sampleFile, "\r")
+        self.assertEqual(open(sampleFile, "rb").read(), "\r\r\r")
+        os.unlink(sampleFile)
+