A very small sqlite database might be a good idea here, although
the type of information we're interested in a somewhat unnatural
query. Alternatively, we just have a very simple text file.
-- Create 'wizard merge' command
- - Uses application specific hinting to prematurely resolve
- conflicts.
- - Newline resolution gets done prior-merge (presently is done
- post merge).
- - Classes of disappeared files made ok.
- - Make this general utility(?)
-- Need to make script to tell us about all installs that we don't support
- versions of (i.e. this mismatches)
+- Make it possible to say certain classes of missing files are ok
- Wizard needs a correct arch/ setup
- The wizard command, when not on scripts, should automatically SSH to
module/wizard.cache
module/wizard.deploy
module/wizard.install
+ module/wizard.merge
module/wizard.shell
module/wizard.util
module/wizard.sset
--- /dev/null
+:mod:`wizard.merge`
+===================
+
+.. automodule:: wizard.merge
+
+Functions
+---------
+.. autofunction:: merge
+
+Utility functions
+-----------------
+.. autofunction:: git_commit_tree
+.. autofunction:: git_newline_style
+.. autofunction:: git_diff_text
+.. autofunction:: get_newline
+.. autofunction:: convert_newline
+
+Exceptions
+----------
+.. autoexception:: Error
+.. autoexception:: MergeError
+ :show-inheritance:
for status in shell.eval("git", "ls-files", "--unmerged").splitlines():
files.add(status.split()[-1])
for file in files:
- # check for newline mismatch
- # HACK: using git diff to tell if files are binary or not
- if not len(shell.eval("git", "diff", file).splitlines()) == 1 and util.mixed_newlines(file):
- # this code only works on Unix
- def get_newline(filename):
- f = open(filename, "U")
- # for some reason I need two
- s = f.readline()
- if s != "" and f.newlines is None:
- f.readline()
- if not isinstance(f.newlines, str):
- raise Exception("Assert: expected newlines to be string, instead was %s in %s" % (repr(f.newlines), file))
- return f.newlines
- def create_reference(id):
- f = tempfile.NamedTemporaryFile(prefix="wizardResolve", delete=False)
- shell.call("git", "cat-file", "blob", ":%d:%s" % (id, file), stdout=f)
- f.close()
- return get_newline(f.name), f.name
- def convert(filename, dest_nl):
- contents = open(filename, "U").read().replace("\n", dest_nl)
- open(filename, "wb").write(contents)
- logging.info("Mixed newlines detected in %s", file)
- common_nl, common_file = create_reference(1)
- our_nl, our_file = create_reference(2)
- their_nl, their_file = create_reference(3)
- remerge = False
- if common_nl != their_nl:
- # upstream can't keep their newlines straight
- logging.info("Converting common file (1) from %s to %s newlines", repr(common_nl), repr(their_nl))
- convert(common_file, their_nl)
- remerge = True
- if our_nl != their_nl:
- # common case
- logging.info("Converting our file (2) from %s to %s newlines", repr(our_nl), repr(their_nl))
- convert(our_file, their_nl)
- remerge = True
- if remerge:
- logging.info("Remerging %s", file)
- with open(file, "wb") as f:
- try:
- shell.call("git", "merge-file", "--stdout", our_file, common_file, their_file, stdout=f)
- logging.info("New merge was clean")
- shell.call("git", "add", file)
- continue
- except shell.CallError:
- pass
- logging.info("Merge was still unclean")
- else:
- logging.warning("Mixed newlines detected in %s, but no remerge possible", file)
# manual resolutions
if file in self.resolutions:
contents = open(file, "r").read()
import time
import errno
-from wizard import app, command, deploy, scripts, shell, util
+from wizard import app, command, deploy, merge, scripts, shell, util
kib_buffer = 1024 * 30 # 30 MiB we will always leave available
errno_blacklisted = 64
raise LocalChangesError()
except shell.CallError:
pass
+ # Working copy is not anchored anywhere useful for git describe,
+ # so we need to give it a hint.
+ self.wc.setAppVersion(self.prod.app_version)
def preflight(self):
"""
if self.options.log_file:
open(".git/WIZARD_LOG_FILE", "w").write(self.options.log_file)
def mergePerform(self):
- def make_virtual_commit(rev, parents = []):
- """
- Takes a revision and generates a "virtual" commit with
- user-specific variables instantiated for a smooth, easy
- merge.
-
- .. warning::
-
- Changes the state of the working copy.
- """
- shell.call("git", "checkout", "-q", rev, "--")
- self.wc.parametrize(self.prod)
- for file in self.wc.application.parametrized_files:
- try:
- shell.call("git", "add", "--", file)
- except shell.CallError:
- pass
- virtual_tree = shell.eval("git", "write-tree", log=True)
- parent_args = itertools.chain(*(["-p", p] for p in parents))
- virtual_commit = shell.eval("git", "commit-tree", virtual_tree,
- *parent_args, input="", log=True)
- shell.call("git", "reset", "--hard")
- return virtual_commit
- user_tree = shell.eval("git", "rev-parse", "HEAD^{tree}")
- base_virtual_commit = make_virtual_commit(self.wc.app_version.scripts_tag)
- next_virtual_commit = make_virtual_commit(self.version, [base_virtual_commit])
- user_virtual_commit = shell.eval("git", "commit-tree", user_tree,
- "-p", base_virtual_commit, input="", log=True)
- shell.call("git", "checkout", user_virtual_commit, "--")
- self.wc.prepareMerge()
- try:
- shell.call("git", "commit", "--amend", "-a", "-m", "amendment")
- except shell.CallError as e:
- pass
+ def prepare_config():
+ self.wc.prepareConfig()
+ shell.call("git", "add", ".")
+ def resolve_conflicts():
+ return self.wc.resolveConflicts()
shell.call("git", "config", "merge.conflictstyle", "diff3")
shell.call("git", "config", "rerere.enabled", "true")
try:
- shell.call("git", "merge", next_virtual_commit)
- except shell.CallError as e:
- logging.info("Merge failed with these messages:\n\n" + e.stderr)
- # Run the application's specific merge resolution algorithms
- # and see if we can salvage it
- if self.wc.resolveConflicts():
- logging.info("Resolved conflicts with application specific knowledge")
- shell.call("git", "commit", "-a", "-m", "merge")
- return
- files = set()
- for line in shell.eval("git", "ls-files", "--unmerged").splitlines():
- files.add(line.split(None, 3)[-1])
- conflicts = len(files)
- # XXX: this is kind of fiddly; note that temp_dir still points at the OLD
- # location after this code.
- self.temp_wc_dir = mv_shm_to_tmp(os.getcwd(), self.use_shm)
- self.wc.location = self.temp_wc_dir
- os.chdir(self.temp_wc_dir)
- open(os.path.join(self.prod.location, ".scripts/pending"), "w").write(self.temp_wc_dir)
- if self.options.non_interactive:
- print "%d %s" % (conflicts, self.temp_wc_dir)
- raise MergeFailed
- else:
- user_shell = os.getenv("SHELL")
- if not user_shell: user_shell = "/bin/bash"
- # XXX: scripts specific hack, since mbash doesn't respect the current working directory
- # When the revolution comes (i.e. $ATHENA_HOMEDIR/Scripts is your Scripts home
- # directory) this isn't strictly necessary, but we'll probably need to support
- # web_scripts directories ad infinitum.
- if user_shell == "/usr/local/bin/mbash": user_shell = "/bin/bash"
- while 1:
- print
- print "ERROR: The merge failed with %d conflicts in these files:" % conflicts
- print
- for file in sorted(files):
- print " * %s" % file
+ merge.merge(self.wc.app_version.scripts_tag, self.version,
+ prepare_config, resolve_conflicts)
+ except merge.MergeError:
+ self.mergeFail()
+ def mergeFail(self):
+ files = set()
+ for line in shell.eval("git", "ls-files", "--unmerged").splitlines():
+ files.add(line.split(None, 3)[-1])
+ conflicts = len(files)
+ # XXX: this is kind of fiddly; note that temp_dir still points at the OLD
+ # location after this code.
+ self.temp_wc_dir = mv_shm_to_tmp(os.getcwd(), self.use_shm)
+ self.wc.location = self.temp_wc_dir
+ os.chdir(self.temp_wc_dir)
+ open(os.path.join(self.prod.location, ".scripts/pending"), "w").write(self.temp_wc_dir)
+ if self.options.non_interactive:
+ print "%d %s" % (conflicts, self.temp_wc_dir)
+ raise MergeFailed
+ else:
+ user_shell = os.getenv("SHELL")
+ if not user_shell: user_shell = "/bin/bash"
+ # XXX: scripts specific hack, since mbash doesn't respect the current working directory
+ # When the revolution comes (i.e. $ATHENA_HOMEDIR/Scripts is your Scripts home
+ # directory) this isn't strictly necessary, but we'll probably need to support
+ # web_scripts directories ad infinitum.
+ if user_shell == "/usr/local/bin/mbash": user_shell = "/bin/bash"
+ while 1:
+ print
+ print "ERROR: The merge failed with %d conflicts in these files:" % conflicts
+ print
+ for file in sorted(files):
+ print " * %s" % file
+ print
+ print "Please resolve these conflicts (edit and then `git add`), and"
+ print "then type 'exit'. You will now be dropped into a shell whose working"
+ print "directory is %s" % self.temp_wc_dir
+ try:
+ shell.call(user_shell, "-i", interactive=True)
+ except shell.CallError as e:
+ logging.warning("Shell returned non-zero exit code %d" % e.code)
+ if shell.eval("git", "ls-files", "--unmerged").strip():
print
- print "Please resolve these conflicts (edit and then `git add`), and"
- print "then type 'exit'. You will now be dropped into a shell whose working"
- print "directory is %s" % self.temp_wc_dir
- try:
- shell.call(user_shell, "-i", interactive=True)
- except shell.CallError as e:
- logging.warning("Shell returned non-zero exit code %d" % e.code)
- if shell.eval("git", "ls-files", "--unmerged").strip():
+ print "WARNING: There are still unmerged files."
+ out = raw_input("Continue editing? [y/N]: ")
+ if out == "y" or out == "Y":
+ continue
+ else:
+ print "Aborting. The conflicted working copy can be found at:"
print
- print "WARNING: There are still unmerged files."
- out = raw_input("Continue editing? [y/N]: ")
- if out == "y" or out == "Y":
- continue
- else:
- print "Aborting. The conflicted working copy can be found at:"
- print
- print " %s" % self.temp_wc_dir
- print
- print "and you can resume the upgrade process by running in that directory:"
- print
- print " wizard upgrade --continue"
- sys.exit(1)
- break
+ print " %s" % self.temp_wc_dir
+ print
+ print "and you can resume the upgrade process by running in that directory:"
+ print
+ print " wizard upgrade --continue"
+ sys.exit(1)
+ break
def postflight(self):
with util.ChangeDirectory(self.temp_wc_dir):
pass
else:
shell.call("git", "commit", "--allow-empty", "-am", "throw-away commit")
+ self.wc.parametrize(self.prod)
+ shell.call("git", "add", ".")
message = self.postflightCommitMessage()
- new_tree = shell.eval("git", "rev-parse", "HEAD^{tree}")
+ new_tree = shell.eval("git", "write-tree")
final_commit = shell.eval("git", "commit-tree", new_tree,
"-p", self.user_commit, "-p", self.next_commit, input=message, log=True)
# a master branch may not necessarily exist if the user
modifications to without fear of interfering with a production
deployment. More operations are permitted on these copies.
"""
+ def setAppVersion(self, app_version):
+ """
+ Manually resets the application version; useful if the working
+ copy is off in space (i.e. not anchored to something we can
+ git describe off of.)
+ """
+ self._app_version = app_version
@chdir_to_location
def parametrize(self, deployment):
"""
--- /dev/null
+"""
+Advanced merge tools for git rerere, sloppy commits and parametrization.
+
+Wizard requires infrastructure for reusing merge resolutions across
+many repositories, due to the number of user installs and highly
+repetitive conflict resolution process. This environment results
+in a number of unique challenges:
+
+ 1. Users are commonly very sloppy with their text editors and
+ will frequently change the line-ending style of their file.
+ Because Git respects newline choice when core.autocrlf is
+ off, a file that flips newline style will result in a full
+ merge conflict.
+
+ 2. We version user configuration files, which means that there
+ will always be a set of changes between upstream and ours.
+ Since Git refuses to automatically merge changes that are
+ too close to each other, these frequently result in spurious
+ merge commits.
+
+Furthermore, both of these make it difficult to reuse rerere resolutions
+across installations. Thus, an advanced Wizard merge has the following
+properties:
+
+ 1. Wizard will perform a full scan of all files that were
+ different between common and ours, filter out those that
+ are binary (using as close to the Git heuristic as possible)
+ and then check among common, ours and theirs if the newlines
+ match. The newline style of theirs always takes precedence.
+
+ 2. Wizard will genericize the ours copy so that it matches
+ common and theirs, and reparametrize it once the merge
+ is finished. Consumers of this function should be careful
+ to appropriately reparametrize if there are conflicts
+ (we can't do it any earlier, because we want clean rerere
+ postimages).
+
+Usage of this functionality is primarily through the :func:`merge` function;
+see that function more usage information. While the ``git`` and ``newline``
+functions published by this module are public, use of these functions outside
+of this module is discouraged.
+"""
+
+import logging
+import itertools
+import tempfile
+import os
+
+import wizard
+from wizard import shell
+
+def git_commit_tree(tree, *parents):
+ """
+ Convenience wrapper for ``git commit-tree``. Writes an empty log.
+ """
+ parent_args = itertools.chain(*(["-p", p] for p in parents))
+ commit = shell.eval("git", "commit-tree", tree,
+ *parent_args, input="", log=True)
+ return commit
+
+def git_diff_text(a, b):
+ """
+ Returns a list of files that are text and are different between
+ commits ``a`` and ``b``.
+ """
+ lines = shell.eval("git", "diff", "--numstat", a, b).strip().split("\n")
+ files = []
+ for line in lines:
+ added, deleted, name = line.split()
+ if added != "-" and deleted != "-":
+ files.append(name)
+ return files
+
+def git_newline_style(rev, name):
+ """
+ Returns the newline style for a blob, identified by Git revision
+ ``rev`` and filename ``name``.
+ """
+ f = tempfile.NamedTemporaryFile(prefix="wizardResolve", delete=False)
+ shell.call("git", "cat-file", "blob", "%s:%s" % (rev, name), stdout=f)
+ f.close()
+ nl = get_newline(f.name)
+ os.unlink(f.name)
+ return nl
+
+# only works on Unix
+def get_newline(filename):
+ """
+ Determines the newline style of ``filename``. This will be a
+ string if only one newline style was find, or a tuple of newline
+ types found.
+ """
+ f = open(filename, 'U')
+ f.read()
+ return f.newlines
+
+def convert_newline(filename, dest_nl):
+ """
+ Replaces the detected newline style of ``filename`` with
+ ``dest_nl`` type newlines.
+ """
+ contents = open(filename, "U").read().replace("\n", dest_nl)
+ open(filename, "wb").write(contents)
+
+def merge(common_id, theirs_id,
+ prepare_config=None,
+ resolve_conflicts=None):
+ """
+ Performs a merge. Throws a :class:`MergeError` if the merge fails
+ (and leaves the current working directory in a state amenable
+ to manual conflict resolution), or returns a tree id of the successful
+ merge (the directory state is undefined and should not be relied upon).
+ This function is not responsible for actually coming
+ up with the real merge commit, because it can fail.
+
+ If ``prepare_config`` is used, you are expected to reverse the effects
+ of this on whatever the final tree is; otherwise you will lose
+ those changes.
+
+ Arguments:
+
+ * ``common_id``: base commit to calculate merge off of
+ * ``theirs_id``: changes to merge in from commit
+ * ``prepare_config``: function that removes any user-specific
+ values from files. This function is expected to ``git add``
+ any files it changes.
+ * ``resolve_conflicts``: this function attempts to resolve
+ conflicts automatically. Returns ``True`` if all conflicts
+ are resolved, and ``False`` otherwise. It is permitted
+ to resolve some but not all conflicts.
+
+ .. note::
+
+ Wizard has a strange idea of repository topology (due to lack of
+ rebases, see documentation about doctoring retroactive commits),
+ so we require the common and theirs commits, instead of
+ using the normal Git algorithm.
+ """
+ if prepare_config is None:
+ prepare_config = lambda: None
+ if resolve_conflicts is None:
+ resolve_conflicts = lambda: False
+ ours_id = shell.eval("git", "rev-parse", "HEAD")
+ theirs_newline_cache = {}
+ def get_theirs_newline(file):
+ if file not in theirs_newline_cache:
+ nl = git_newline_style(theirs_id, file)
+ if not isinstance(nl, str):
+ # A case of incompetent upstream, unfortunately
+ logging.warning("Canonical version (theirs) of %s has mixed newline style, forced to \\n", file)
+ nl = "\n"
+ theirs_newline_cache[file] = nl
+ return theirs_newline_cache[file]
+ theirs_tree = shell.eval("git", "rev-parse", "%s^{tree}" % theirs_id)
+ # operations on the ours tree
+ for file in git_diff_text(ours_id, theirs_id):
+ theirs_nl = get_theirs_newline(file)
+ ours_nl = get_newline(file) # current checkout is ours_id
+ if theirs_nl != ours_nl:
+ logging.info("Converting our file (3) from %s to %s newlines", repr(ours_nl), repr(theirs_nl))
+ convert_newline(file, theirs_nl)
+ shell.eval("git", "add", file)
+ prepare_config() # for Wizard, this usually genericizes config files
+ ours_tree = shell.eval("git", "write-tree")
+ logging.info("Merge wrote virtual tree for ours: %s", ours_tree)
+ # operations on the common tree
+ shell.call("git", "reset", "--hard", common_id)
+ for file in git_diff_text(common_id, theirs_id):
+ theirs_nl = get_theirs_newline(file)
+ common_nl = get_newline(file) # current checkout is common_id
+ if theirs_nl != common_nl:
+ logging.info("Converting common file (1) from %s to %s newlines", repr(common_nl), repr(theirs_nl))
+ convert_newline(file, theirs_nl)
+ shell.eval("git", "add", file)
+ common_tree = shell.eval("git", "write-tree")
+ logging.info("Merge wrote virtual tree for common: %s", common_tree)
+ # construct merge commit graph
+ common_virtual_id = git_commit_tree(common_tree)
+ ours_virtual_id = git_commit_tree(ours_tree, common_virtual_id)
+ theirs_virtual_id = git_commit_tree(theirs_tree, common_virtual_id)
+ # perform merge
+ shell.call("git", "reset", "--hard", ours_virtual_id)
+ try:
+ shell.call("git", "merge", theirs_virtual_id)
+ except shell.CallError as e:
+ logging.info("Merge failed with these message:\n\n" + e.stderr)
+ if resolve_conflicts():
+ logging.info("Resolved conflicts automatically")
+ shell.call("git", "commit", "-a", "-m", "merge")
+ else:
+ raise MergeError
+ # post-merge operations
+ result_tree = shell.eval("git", "write-tree")
+ logging.info("Resolution tree is %s", result_tree)
+ return result_tree
+
+class Error(wizard.Error):
+ """Base error class for merge"""
+ pass
+
+class MergeError(Error):
+ """Merge terminated with a conflict, oh no!"""
+ pass
--- /dev/null
+import unittest
+
+from wizard import tests
+from wizard.merge import *
+
+sampleFile = tests.getTestFile("merge_test.sample")
+
+class NewlineTest(unittest.TestCase):
+ def testGetNewlineUnix(self):
+ open(sampleFile, "wb").write("\n\n\n")
+ self.assertEqual(get_newline(sampleFile), "\n")
+ os.unlink(sampleFile)
+ def testGetNewlineMixed(self):
+ open(sampleFile, "wb").write("\n\n\n\n\n\n\r\n")
+ self.assertEqual(get_newline(sampleFile), ("\n", "\r\n"))
+ os.unlink(sampleFile)
+ def testConvertNewline(self):
+ open(sampleFile, "wb").write("\r\n\r\n\n")
+ convert_newline(sampleFile, "\r")
+ self.assertEqual(open(sampleFile, "rb").read(), "\r\r\r")
+ os.unlink(sampleFile)
+