changeset 18944:a9c443b3b240

unionrepo: read-only operations on a union of two localrepos unionrepo is just like bundlerepo without bundles. The implementation is very similar to bundlerepo, but I don't see any obvious way to generalize it. Some most obvious use cases for this would be log and diff across local repos, as a kind of preview of pulls, for instance: $ hg -R union:repo1+repo2 heads $ hg -R union:repo1+repo2 log -r REPO1REV -r REPO2REV $ hg -R union:repo1+repo2 log -r '::REPO1REV-::REPO2REV' $ hg -R union:repo1+repo2 log -r 'ancestor(REPO1REV,REPO2REV)' $ hg -R union:repo1+repo2 diff -r REPO1REV -r REPO2REV This is going to be used in RhodeCode, and Bitbucket already uses something similar. Having a core implementation would be beneficial.
author Mads Kiilerich <madski@unity3d.com>
date Fri, 18 Jan 2013 15:54:09 +0100
parents 27e8dfc2c338
children e75b72fffdfe
files mercurial/hg.py mercurial/unionrepo.py tests/test-unionrepo.t
diffstat 3 files changed, 359 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/hg.py	Mon Apr 15 23:52:57 2013 +0900
+++ b/mercurial/hg.py	Fri Jan 18 15:54:09 2013 +0100
@@ -9,8 +9,8 @@
 from i18n import _
 from lock import release
 from node import hex, nullid
-import localrepo, bundlerepo, httppeer, sshpeer, statichttprepo, bookmarks
-import lock, util, extensions, error, node, scmutil, phases, url
+import localrepo, bundlerepo, unionrepo, httppeer, sshpeer, statichttprepo
+import bookmarks, lock, util, extensions, error, node, scmutil, phases, url
 import cmdutil, discovery
 import merge as mergemod
 import verify as verifymod
@@ -64,6 +64,7 @@
 
 schemes = {
     'bundle': bundlerepo,
+    'union': unionrepo,
     'file': _local,
     'http': httppeer,
     'https': httppeer,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/unionrepo.py	Fri Jan 18 15:54:09 2013 +0100
@@ -0,0 +1,208 @@
+# unionrepo.py - repository class for viewing union of repository changesets
+#
+# Derived from bundlerepo.py
+# Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
+# Copyright 2013 Unity Technologies, Mads Kiilerich <madski@unity3d.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+"""Repository class for "in-memory pull" of one local repository to another,
+allowing operations like diff and log with revsets.
+"""
+
+from node import nullid
+from i18n import _
+import os
+import util, mdiff, cmdutil, scmutil
+import localrepo, changelog, manifest, filelog, revlog
+
+class unionrevlog(revlog.revlog):
+    def __init__(self, opener, indexfile, revlog2, linkmapper):
+        # How it works:
+        # To retrieve a revision, we just need to know the node id so we can
+        # look it up in revlog2.
+        #
+        # To differentiate a rev in the second revlog from a rev in the revlog,
+        # we check revision against repotiprev.
+        opener = scmutil.readonlyvfs(opener)
+        revlog.revlog.__init__(self, opener, indexfile)
+        self.revlog2 = revlog2
+
+        n = len(self)
+        self.repotiprev = n - 1
+        self.bundlerevs = set() # used by 'bundle()' revset expression
+        for rev2 in self.revlog2:
+            rev = self.revlog2.index[rev2]
+            # rev numbers - in revlog2, very different from self.rev
+            _start, _csize, _rsize, _base, linkrev, p1rev, p2rev, node = rev
+
+            if linkmapper is None: # link is to same revlog
+                assert linkrev == rev2 # we never link back
+                link = n
+            else: # rev must be mapped from repo2 cl to unified cl by linkmapper
+                link = linkmapper(linkrev)
+
+            if node in self.nodemap:
+                # this happens for the common revlog revisions
+                self.bundlerevs.add(self.nodemap[node])
+                continue
+
+            p1node = self.revlog2.node(p1rev)
+            p2node = self.revlog2.node(p2rev)
+
+            e = (None, None, None, None,
+                 link, self.rev(p1node), self.rev(p2node), node)
+            self.index.insert(-1, e)
+            self.nodemap[node] = n
+            self.bundlerevs.add(n)
+            n += 1
+
+    def _chunk(self, rev):
+        if rev <= self.repotiprev:
+            return revlog.revlog._chunk(self, rev)
+        return self.revlog2._chunk(self.node(rev))
+
+    def revdiff(self, rev1, rev2):
+        """return or calculate a delta between two revisions"""
+        if rev1 > self.repotiprev and rev2 > self.repotiprev:
+            return self.revlog2.revdiff(
+                self.revlog2.rev(self.node(rev1)),
+                self.revlog2.rev(self.node(rev2)))
+        elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
+            return revlog.revlog.revdiff(self, rev1, rev2)
+
+        return mdiff.textdiff(self.revision(self.node(rev1)),
+                              self.revision(self.node(rev2)))
+
+    def revision(self, nodeorrev):
+        """return an uncompressed revision of a given node or revision
+        number.
+        """
+        if isinstance(nodeorrev, int):
+            rev = nodeorrev
+            node = self.node(rev)
+        else:
+            node = nodeorrev
+            rev = self.rev(node)
+
+        if node == nullid:
+            return ""
+
+        if rev > self.repotiprev:
+            text = self.revlog2.revision(node)
+            self._cache = (node, rev, text)
+        else:
+            text = revlog.revlog.revision(self, rev)
+            # already cached
+        return text
+
+    def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
+        raise NotImplementedError
+    def addgroup(self, revs, linkmapper, transaction):
+        raise NotImplementedError
+    def strip(self, rev, minlink):
+        raise NotImplementedError
+    def checksize(self):
+        raise NotImplementedError
+
+class unionchangelog(unionrevlog, changelog.changelog):
+    def __init__(self, opener, opener2):
+        changelog.changelog.__init__(self, opener)
+        linkmapper = None
+        changelog2 = changelog.changelog(opener2)
+        unionrevlog.__init__(self, opener, self.indexfile, changelog2,
+                             linkmapper)
+
+class unionmanifest(unionrevlog, manifest.manifest):
+    def __init__(self, opener, opener2, linkmapper):
+        manifest.manifest.__init__(self, opener)
+        manifest2 = manifest.manifest(opener2)
+        unionrevlog.__init__(self, opener, self.indexfile, manifest2,
+                             linkmapper)
+
+class unionfilelog(unionrevlog, filelog.filelog):
+    def __init__(self, opener, path, opener2, linkmapper, repo):
+        filelog.filelog.__init__(self, opener, path)
+        filelog2 = filelog.filelog(opener2, path)
+        unionrevlog.__init__(self, opener, self.indexfile, filelog2,
+                             linkmapper)
+        self._repo = repo
+
+    def _file(self, f):
+        self._repo.file(f)
+
+class unionpeer(localrepo.localpeer):
+    def canpush(self):
+        return False
+
+class unionrepository(localrepo.localrepository):
+    def __init__(self, ui, path, path2):
+        localrepo.localrepository.__init__(self, ui, path)
+        self.ui.setconfig('phases', 'publish', False)
+
+        self._url = 'union:%s+%s' % (util.expandpath(path),
+                                     util.expandpath(path2))
+        self.repo2 = localrepo.localrepository(ui, path2)
+
+    @localrepo.unfilteredpropertycache
+    def changelog(self):
+        return unionchangelog(self.sopener, self.repo2.sopener)
+
+    def _clrev(self, rev2):
+        """map from repo2 changelog rev to temporary rev in self.changelog"""
+        node = self.repo2.changelog.node(rev2)
+        return self.changelog.rev(node)
+
+    @localrepo.unfilteredpropertycache
+    def manifest(self):
+        return unionmanifest(self.sopener, self.repo2.sopener,
+                             self._clrev)
+
+    def url(self):
+        return self._url
+
+    def file(self, f):
+        return unionfilelog(self.sopener, f, self.repo2.sopener,
+                            self._clrev, self)
+
+    def close(self):
+        self.repo2.close()
+
+    def cancopy(self):
+        return False
+
+    def peer(self):
+        return unionpeer(self)
+
+    def getcwd(self):
+        return os.getcwd() # always outside the repo
+
+def instance(ui, path, create):
+    if create:
+        raise util.Abort(_('cannot create new union repository'))
+    parentpath = ui.config("bundle", "mainreporoot", "")
+    if not parentpath:
+        # try to find the correct path to the working directory repo
+        parentpath = cmdutil.findrepo(os.getcwd())
+        if parentpath is None:
+            parentpath = ''
+    if parentpath:
+        # Try to make the full path relative so we get a nice, short URL.
+        # In particular, we don't want temp dir names in test outputs.
+        cwd = os.getcwd()
+        if parentpath == cwd:
+            parentpath = ''
+        else:
+            cwd = os.path.join(cwd,'')
+            if parentpath.startswith(cwd):
+                parentpath = parentpath[len(cwd):]
+    if path.startswith('union:'):
+        s = path.split(":", 1)[1].split("+", 1)
+        if len(s) == 1:
+            repopath, repopath2 = parentpath, s[0]
+        else:
+            repopath, repopath2 = s
+    else:
+        repopath, repopath2 = parentpath, path
+    return unionrepository(ui, repopath, repopath2)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-unionrepo.t	Fri Jan 18 15:54:09 2013 +0100
@@ -0,0 +1,148 @@
+Test unionrepo functionality
+
+Create one repository
+
+  $ hg init repo1
+  $ cd repo1
+  $ touch repo1-0
+  $ echo repo1-0 > f
+  $ hg ci -Aqmrepo1-0
+  $ touch repo1-1
+  $ echo repo1-1 >> f
+  $ hg ci -Aqmrepo1-1
+  $ touch repo1-2
+  $ echo repo1-2 >> f
+  $ hg ci -Aqmrepo1-2
+  $ hg log --template '{rev}:{node|short}  {desc|firstline}\n'
+  2:68c0685446a3  repo1-2
+  1:8a58db72e69d  repo1-1
+  0:f093fec0529b  repo1-0
+  $ tip1=`hg id -q`
+  $ cd ..
+
+- and a clone with a not-completely-trivial history
+
+  $ hg clone -q repo1 --rev 0 repo2
+  $ cd repo2
+  $ touch repo2-1
+  $ sed '1irepo2-1 at top' f > f.tmp
+  $ mv f.tmp f
+  $ hg ci -Aqmrepo2-1
+  $ touch repo2-2
+  $ hg pull -q ../repo1 -r 1
+  $ hg merge -q
+  $ hg ci -Aqmrepo2-2-merge
+  $ touch repo2-3
+  $ echo repo2-3 >> f
+  $ hg ci -mrepo2-3
+  $ hg log --template '{rev}:{node|short}  {desc|firstline}\n'
+  4:2f0d178c469c  repo2-3
+  3:9e6fb3e0b9da  repo2-2-merge
+  2:8a58db72e69d  repo1-1
+  1:c337dba826e7  repo2-1
+  0:f093fec0529b  repo1-0
+  $ cd ..
+
+revisions from repo2 appear as appended / pulled to repo1
+
+  $ hg -R union:repo1+repo2 log --template '{rev}:{node|short}  {desc|firstline}\n'
+  5:2f0d178c469c  repo2-3
+  4:9e6fb3e0b9da  repo2-2-merge
+  3:c337dba826e7  repo2-1
+  2:68c0685446a3  repo1-2
+  1:8a58db72e69d  repo1-1
+  0:f093fec0529b  repo1-0
+
+manifest can be retrieved for revisions in both repos
+
+  $ hg -R union:repo1+repo2 mani -r $tip1
+  f
+  repo1-0
+  repo1-1
+  repo1-2
+  $ hg -R union:repo1+repo2 mani -r 4
+  f
+  repo1-0
+  repo1-1
+  repo2-1
+  repo2-2
+
+files can be retrieved form both repos
+
+  $ hg -R repo1 cat repo1/f -r2
+  repo1-0
+  repo1-1
+  repo1-2
+
+  $ hg -R union:repo1+repo2 cat -r$tip1 repo1/f
+  repo1-0
+  repo1-1
+  repo1-2
+
+  $ hg -R union:repo1+repo2 cat -r4 $TESTTMP/repo1/f
+  repo2-1 at top
+  repo1-0
+  repo1-1
+
+files can be compared across repos
+
+  $ hg -R union:repo1+repo2 diff -r$tip1 -rtip
+  diff -r 68c0685446a3 -r 2f0d178c469c f
+  --- a/f	Thu Jan 01 00:00:00 1970 +0000
+  +++ b/f	Thu Jan 01 00:00:00 1970 +0000
+  @@ -1,3 +1,4 @@
+  +repo2-1 at top
+   repo1-0
+   repo1-1
+  -repo1-2
+  +repo2-3
+
+heads from both repos are found correctly
+
+  $ hg -R union:repo1+repo2 heads --template '{rev}:{node|short}  {desc|firstline}\n'
+  5:2f0d178c469c  repo2-3
+  2:68c0685446a3  repo1-2
+
+revsets works across repos
+
+  $ hg -R union:repo1+repo2 id -r "ancestor($tip1, 5)"
+  8a58db72e69d
+
+annotate works - an indication that linkrevs works
+
+  $ hg --cwd repo1 -R union:../repo2 annotate $TESTTMP/repo1/f -r tip
+  3: repo2-1 at top
+  0: repo1-0
+  1: repo1-1
+  5: repo2-3
+
+union repos can be cloned ... and clones works correctly
+
+  $ hg clone -U union:repo1+repo2 repo3
+  requesting all changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 6 changesets with 11 changes to 6 files (+1 heads)
+
+  $ hg -R repo3 paths
+  default = union:repo1+repo2
+
+  $ hg -R repo3 verify
+  checking changesets
+  checking manifests
+  crosschecking files in changesets and manifests
+  checking files
+  6 files, 6 changesets, 11 total revisions
+
+  $ hg -R repo3 heads --template '{rev}:{node|short}  {desc|firstline}\n'
+  5:2f0d178c469c  repo2-3
+  2:68c0685446a3  repo1-2
+
+  $ hg -R repo3 log --template '{rev}:{node|short}  {desc|firstline}\n'
+  5:2f0d178c469c  repo2-3
+  4:9e6fb3e0b9da  repo2-2-merge
+  3:c337dba826e7  repo2-1
+  2:68c0685446a3  repo1-2
+  1:8a58db72e69d  repo1-1
+  0:f093fec0529b  repo1-0