Implement backend for automatic conflict resolution.

author Edward Z. Yang <ezyang@mit.edu>

Sat, 10 Oct 2009 04:40:17 +0000 (00:40 -0400)

committer Edward Z. Yang <ezyang@mit.edu>

Sat, 10 Oct 2009 04:40:17 +0000 (00:40 -0400)
author Edward Z. Yang <ezyang@mit.edu>
Sat, 10 Oct 2009 04:40:17 +0000 (00:40 -0400)
committer Edward Z. Yang <ezyang@mit.edu>
Sat, 10 Oct 2009 04:40:17 +0000 (00:40 -0400)
diff --git a/TODO b/TODO

index ea006ba2eb489c2ddec1621b1ad39d5a7f68f4f7..68aee94a4a62487f26d59dab06fde9f2e8d18a99 100644 (file)
--- a/TODO
+++ b/TODO
@@ -30,43 +30,6 @@ TODO NOW:
             it means we get a little more flexibility.  Try to
             minimize wildcarding: those things need to be put into
             subpatterns and then reconstituted into the output.
-      Example:
-            Input:
-                <<<<<<<
-                ***1***
-                =======
-                upstream
-                >>>>>>>
-            Output:
-                [1] # discard system string
-            Input:
-                <<<<<<<
-                old upstream
-                =======
-                new upstream
-                >>>>>>>
-            Output:
-                ['R'] # keep the new upstream string
-                # This would be useful if a particular upstream change
-                # is really close to where user changes are, so that
-                # the conflict pops up a lot and it's actually spurious
-            Input:
-                <<<<<<<
-                ***1***
-                old upstream
-                ***2***
-                old upstream
-                ***3***
-                =======
-                new upstream
-                >>>>>>>
-            Output:
-                ['R', 1, 2, 3] # should be evident
-                # it's not actually clear to me if this is useful
-        To resolve: do we need the power of regexes?  This might suck
-        because it means we need to implement escaping.  We might want
-        simple globbing to the end of line since that's common in
-        configuration files.
  
  - Distinguish from logging and reporting (so we can easily send mail
    to users)
diff --git a/wizard/resolve.py b/wizard/resolve.py

new file mode 100644 (file)

index 0000000..e2de1f2
--- /dev/null
+++ b/wizard/resolve.py
@@ -0,0 +1,103 @@
+"""
+.. highlight:: diff
+
+This module contains algorithms for performing conflict
+resolution after Git performs its recursive merge.  It
+defines a simple domain specific language (that, at
+its simplest form, merely involves copying conflict markers
+and writing in the form that they should be resolved as) for
+specifying how to resolve conflicts.  These are mostly relevant
+for resolving conflicts in configuration files.
+
+The conflict resolution DSL is described here:
+
+Resolutions are specified as input-output pairs.  An input
+is a string with the conflict resolution markers ("<" * 7,
+"=" * 7 and ">" * 7), with the HEAD content above the equals
+divider, and the upstream content below the equals divider.
+Lines can also be marked as "***N***" where N is a natural
+number greater than 0 (i.e. 1 or more), which means that
+an arbitrary number of lines may be matched and available for output.
+
+Output is a list of integers and strings.  Integers expand
+to lines that were specified earlier; -1 and 0 are special integers
+that correspond to the entire HEAD text, and the entire upstream
+text, respectively.  Strings can be used to insert custom lines.
+
+The DSL does not currently claim to support character level granularity.
+It also does not claim to support contiguous conflicts.
+Our hope is that this simple syntax will be sufficient to cover
+most common merge failures.
+
+Here are some examples::
+
+    <<<<<<<
+    downstream
+    =======
+    upstream
+    >>>>>>>
+
+With ``[-1]`` would discard all upstream changes, whereas with ``[0]``
+would discard downstream changes (you would probably want to be
+careful about wildcarding in the upstream string).
+
+Pattern matching in action::
+
+    <<<<<<<
+    ***1***
+    old upstream
+    ***2***
+    old upstream
+    ***3***
+    =======
+    new upstream
+    >>>>>>>
+
+With ``[0, 1, 2, 3]`` would resolve with the new upstream text, and
+then the user matched globs.
+"""
+
+import re
+import itertools
+
+re_var = re.compile("^\*\*\*(\d+)\*\*\*\\\n", re.MULTILINE)
+
+def spec_to_regex(spec):
+    """
+    Translates a specification string into a regular expression tuple.
+    Note that pattern matches are out of order, so the second element
+    of the tuple is a dict specified strings to subpattern numbers.
+    Requires re.DOTALL for correct operation.
+    """
+    ours, _, theirs = "".join(spec.strip().splitlines(True)[1:-1]).partition("=======\n")
+    def regexify(text, fullmatch, matchno):
+        text_split = re.split(re_var, text)
+        ret = ""
+        mappings = {fullmatch: matchno}
+        for is_var, line in zip(itertools.cycle([False, True]), text_split):
+            if is_var:
+                ret += "(.*\\\n)"
+                matchno += 1
+                mappings[int(line)] = matchno
+            else:
+                ret += re.escape(line)
+        return ("(" + ret + ")", mappings)
+    ours_regex, ours_mappings = regexify(ours, -1, 1)
+    theirs_regex, theirs_mappings = regexify(theirs, 0, len(ours_mappings) + 1)
+    ours_mappings.update(theirs_mappings)
+    return ("<<<<<<<[^\n]*\\\n" + ours_regex + "=======\\\n" + theirs_regex + ">>>>>>>[^\n]*(\\\n|$)", ours_mappings)
+
+def result_to_repl(result, mappings):
+    def ritem_to_string(r):
+        if type(r) is int:
+            return "\\%d" % mappings[r]
+        else:
+            return r + "\n"
+    return "".join(map(ritem_to_string, result))
+
+def resolve(contents, spec, result):
+    rstring, mappings = spec_to_regex(spec)
+    print rstring
+    regex = re.compile(rstring, re.DOTALL)
+    repl = result_to_repl(result, mappings)
+    return regex.sub(repl, contents)
diff --git a/wizard/tests/resolve_test.py b/wizard/tests/resolve_test.py

new file mode 100644 (file)

index 0000000..39bd32c
--- /dev/null
+++ b/wizard/tests/resolve_test.py
@@ -0,0 +1,83 @@
+from wizard import resolve
+
+def test_resolve_simple():
+    contents = """
+foo
+bar
+<<<<<<< HEAD
+baz
+=======
+boo
+>>>>>>> upstream
+bing
+"""
+    spec = """
+<<<<<<<
+baz
+=======
+boo
+>>>>>>>
+"""
+    result = [0]
+    assert resolve.resolve(contents, spec, result) == """
+foo
+bar
+boo
+bing
+"""
+
+def test_resolve_wildcard():
+    contents = """
+foo
+bar
+<<<<<<< HEAD
+common
+uncommon
+still uncommon
+
+=======
+transformed common
+>>>>>>> 456ef127bf8531bb363b1195172c71bce3747ae7
+baz
+"""
+
+    spec = """
+<<<<<<<
+common
+***1***
+=======
+transformed common
+>>>>>>>
+"""
+
+    result = [0, 1]
+    assert resolve.resolve(contents, spec, result) == """
+foo
+bar
+transformed common
+uncommon
+still uncommon
+
+baz
+"""
+
+def test_resolve_user():
+    contents = """
+top
+<<<<<<<
+the user is right
+=======
+blah blah
+>>>>>>>"""
+    spec = """
+<<<<<<<
+***1***
+=======
+blah blah
+>>>>>>>
+"""
+    result = [-1]
+    assert resolve.resolve(contents, spec, result) == """
+top
+the user is right
+"""
author	Edward Z. Yang <ezyang@mit.edu>
	Sat, 10 Oct 2009 04:40:17 +0000 (00:40 -0400)
committer	Edward Z. Yang <ezyang@mit.edu>
	Sat, 10 Oct 2009 04:40:17 +0000 (00:40 -0400)
TODO		patch \| blob \| history
wizard/resolve.py	[new file with mode: 0644]	patch \| blob
wizard/tests/resolve_test.py	[new file with mode: 0644]	patch \| blob