mercurial/manifest.py
branchstable
changeset 25855 96a38d44ba09
parent 25276 c436ba9d6ac0
child 26199 5411059d93f8
--- a/mercurial/manifest.py	Fri Jul 03 18:10:58 2015 +0100
+++ b/mercurial/manifest.py	Sat Jul 18 17:32:38 2015 -0500
@@ -219,7 +219,7 @@
         files instead of over manifest files.'''
         files = match.files()
         return (len(files) < 100 and (match.isexact() or
-            (not match.anypats() and util.all(fn in self for fn in files))))
+            (match.prefix() and all(fn in self for fn in files))))
 
     def walk(self, match):
         '''Generates matching file names.
@@ -441,32 +441,64 @@
     else:
         return '', f
 
+_noop = lambda: None
+
 class treemanifest(object):
     def __init__(self, dir='', text=''):
         self._dir = dir
+        self._node = revlog.nullid
+        self._load = _noop
+        self._dirty = False
         self._dirs = {}
         # Using _lazymanifest here is a little slower than plain old dicts
         self._files = {}
         self._flags = {}
-        self.parse(text)
+        if text:
+            def readsubtree(subdir, subm):
+                raise AssertionError('treemanifest constructor only accepts '
+                                     'flat manifests')
+            self.parse(text, readsubtree)
+            self._dirty = True # Mark flat manifest dirty after parsing
 
     def _subpath(self, path):
         return self._dir + path
 
     def __len__(self):
+        self._load()
         size = len(self._files)
         for m in self._dirs.values():
             size += m.__len__()
         return size
 
     def _isempty(self):
+        self._load() # for consistency; already loaded by all callers
         return (not self._files and (not self._dirs or
-                util.all(m._isempty() for m in self._dirs.values())))
+                all(m._isempty() for m in self._dirs.values())))
 
     def __str__(self):
-        return '<treemanifest dir=%s>' % self._dir
+        return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s>' %
+                (self._dir, revlog.hex(self._node),
+                 bool(self._load is _noop),
+                 self._dirty))
+
+    def dir(self):
+        '''The directory that this tree manifest represents, including a
+        trailing '/'. Empty string for the repo root directory.'''
+        return self._dir
+
+    def node(self):
+        '''This node of this instance. nullid for unsaved instances. Should
+        be updated when the instance is read or written from a revlog.
+        '''
+        assert not self._dirty
+        return self._node
+
+    def setnode(self, node):
+        self._node = node
+        self._dirty = False
 
     def iteritems(self):
+        self._load()
         for p, n in sorted(self._dirs.items() + self._files.items()):
             if p in self._files:
                 yield self._subpath(p), n
@@ -475,6 +507,7 @@
                     yield f, sn
 
     def iterkeys(self):
+        self._load()
         for p in sorted(self._dirs.keys() + self._files.keys()):
             if p in self._files:
                 yield self._subpath(p)
@@ -491,6 +524,7 @@
     def __contains__(self, f):
         if f is None:
             return False
+        self._load()
         dir, subpath = _splittopdir(f)
         if dir:
             if dir not in self._dirs:
@@ -500,6 +534,7 @@
             return f in self._files
 
     def get(self, f, default=None):
+        self._load()
         dir, subpath = _splittopdir(f)
         if dir:
             if dir not in self._dirs:
@@ -509,6 +544,7 @@
             return self._files.get(f, default)
 
     def __getitem__(self, f):
+        self._load()
         dir, subpath = _splittopdir(f)
         if dir:
             return self._dirs[dir].__getitem__(subpath)
@@ -516,6 +552,7 @@
             return self._files[f]
 
     def flags(self, f):
+        self._load()
         dir, subpath = _splittopdir(f)
         if dir:
             if dir not in self._dirs:
@@ -527,6 +564,7 @@
             return self._flags.get(f, '')
 
     def find(self, f):
+        self._load()
         dir, subpath = _splittopdir(f)
         if dir:
             return self._dirs[dir].find(subpath)
@@ -534,6 +572,7 @@
             return self._files[f], self._flags.get(f, '')
 
     def __delitem__(self, f):
+        self._load()
         dir, subpath = _splittopdir(f)
         if dir:
             self._dirs[dir].__delitem__(subpath)
@@ -544,9 +583,11 @@
             del self._files[f]
             if f in self._flags:
                 del self._flags[f]
+        self._dirty = True
 
     def __setitem__(self, f, n):
         assert n is not None
+        self._load()
         dir, subpath = _splittopdir(f)
         if dir:
             if dir not in self._dirs:
@@ -554,9 +595,12 @@
             self._dirs[dir].__setitem__(subpath, n)
         else:
             self._files[f] = n[:21] # to match manifestdict's behavior
+        self._dirty = True
 
     def setflag(self, f, flags):
         """Set the flags (symlink, executable) for path f."""
+        assert 'd' not in flags
+        self._load()
         dir, subpath = _splittopdir(f)
         if dir:
             if dir not in self._dirs:
@@ -564,19 +608,35 @@
             self._dirs[dir].setflag(subpath, flags)
         else:
             self._flags[f] = flags
+        self._dirty = True
 
     def copy(self):
         copy = treemanifest(self._dir)
-        for d in self._dirs:
-            copy._dirs[d] = self._dirs[d].copy()
-        copy._files = dict.copy(self._files)
-        copy._flags = dict.copy(self._flags)
+        copy._node = self._node
+        copy._dirty = self._dirty
+        def _load():
+            self._load()
+            for d in self._dirs:
+                copy._dirs[d] = self._dirs[d].copy()
+            copy._files = dict.copy(self._files)
+            copy._flags = dict.copy(self._flags)
+            copy._load = _noop
+        copy._load = _load
+        if self._load == _noop:
+            # Chaining _load if it's _noop is functionally correct, but the
+            # chain may end up excessively long (stack overflow), and
+            # will prevent garbage collection of 'self'.
+            copy._load()
         return copy
 
     def filesnotin(self, m2):
         '''Set of files in this manifest that are not in the other'''
         files = set()
         def _filesnotin(t1, t2):
+            if t1._node == t2._node and not t1._dirty and not t2._dirty:
+                return
+            t1._load()
+            t2._load()
             for d, m1 in t1._dirs.iteritems():
                 if d in t2._dirs:
                     m2 = t2._dirs[d]
@@ -599,6 +659,7 @@
         return self._alldirs
 
     def hasdir(self, dir):
+        self._load()
         topdir, subdir = _splittopdir(dir)
         if topdir:
             if topdir in self._dirs:
@@ -635,27 +696,20 @@
             if not self.hasdir(fn):
                 match.bad(fn, None)
 
-    def _walk(self, match, alldirs=False):
-        '''Recursively generates matching file names for walk().
-
-        Will visit all subdirectories if alldirs is True, otherwise it will
-        only visit subdirectories for which match.visitdir is True.'''
-
-        if not alldirs:
-            # substring to strip trailing slash
-            visit = match.visitdir(self._dir[:-1] or '.')
-            if not visit:
-                return
-            alldirs = (visit == 'all')
+    def _walk(self, match):
+        '''Recursively generates matching file names for walk().'''
+        if not match.visitdir(self._dir[:-1] or '.'):
+            return
 
         # yield this dir's files and walk its submanifests
+        self._load()
         for p in sorted(self._dirs.keys() + self._files.keys()):
             if p in self._files:
                 fullp = self._subpath(p)
                 if match(fullp):
                     yield fullp
             else:
-                for f in self._dirs[p]._walk(match, alldirs):
+                for f in self._dirs[p]._walk(match):
                     yield f
 
     def matches(self, match):
@@ -665,20 +719,15 @@
 
         return self._matches(match)
 
-    def _matches(self, match, alldirs=False):
+    def _matches(self, match):
         '''recursively generate a new manifest filtered by the match argument.
-
-        Will visit all subdirectories if alldirs is True, otherwise it will
-        only visit subdirectories for which match.visitdir is True.'''
-
+        '''
         ret = treemanifest(self._dir)
-        if not alldirs:
-            # substring to strip trailing slash
-            visit = match.visitdir(self._dir[:-1] or '.')
-            if not visit:
-                return ret
-            alldirs = (visit == 'all')
 
+        if not match.visitdir(self._dir[:-1] or '.'):
+            return ret
+
+        self._load()
         for fn in self._files:
             fullp = self._subpath(fn)
             if not match(fullp):
@@ -688,10 +737,12 @@
                 ret._flags[fn] = self._flags[fn]
 
         for dir, subm in self._dirs.iteritems():
-            m = subm._matches(match, alldirs)
+            m = subm._matches(match)
             if not m._isempty():
                 ret._dirs[dir] = m
 
+        if not ret._isempty():
+            ret._dirty = True
         return ret
 
     def diff(self, m2, clean=False):
@@ -712,6 +763,10 @@
         result = {}
         emptytree = treemanifest()
         def _diff(t1, t2):
+            if t1._node == t2._node and not t1._dirty and not t2._dirty:
+                return
+            t1._load()
+            t2._load()
             for d, m1 in t1._dirs.iteritems():
                 m2 = t2._dirs.get(d, emptytree)
                 _diff(m1, m2)
@@ -737,20 +792,71 @@
         _diff(self, m2)
         return result
 
-    def parse(self, text):
+    def unmodifiedsince(self, m2):
+        return not self._dirty and not m2._dirty and self._node == m2._node
+
+    def parse(self, text, readsubtree):
         for f, n, fl in _parse(text):
-            self[f] = n
-            if fl:
-                self.setflag(f, fl)
+            if fl == 'd':
+                f = f + '/'
+                self._dirs[f] = readsubtree(self._subpath(f), n)
+            elif '/' in f:
+                # This is a flat manifest, so use __setitem__ and setflag rather
+                # than assigning directly to _files and _flags, so we can
+                # assign a path in a subdirectory, and to mark dirty (compared
+                # to nullid).
+                self[f] = n
+                if fl:
+                    self.setflag(f, fl)
+            else:
+                # Assigning to _files and _flags avoids marking as dirty,
+                # and should be a little faster.
+                self._files[f] = n
+                if fl:
+                    self._flags[f] = fl
 
     def text(self, usemanifestv2=False):
         """Get the full data of this manifest as a bytestring."""
+        self._load()
         flags = self.flags
         return _text(((f, self[f], flags(f)) for f in self.keys()),
                      usemanifestv2)
 
+    def dirtext(self, usemanifestv2=False):
+        """Get the full data of this directory as a bytestring. Make sure that
+        any submanifests have been written first, so their nodeids are correct.
+        """
+        self._load()
+        flags = self.flags
+        dirs = [(d[:-1], self._dirs[d]._node, 'd') for d in self._dirs]
+        files = [(f, self._files[f], flags(f)) for f in self._files]
+        return _text(sorted(dirs + files), usemanifestv2)
+
+    def read(self, gettext, readsubtree):
+        def _load():
+            # Mark as loaded already here, so __setitem__ and setflag() don't
+            # cause infinite loops when they try to load.
+            self._load = _noop
+            self.parse(gettext(), readsubtree)
+            self._dirty = False
+        self._load = _load
+
+    def writesubtrees(self, m1, m2, writesubtree):
+        self._load() # for consistency; should never have any effect here
+        emptytree = treemanifest()
+        for d, subm in self._dirs.iteritems():
+            subp1 = m1._dirs.get(d, emptytree)._node
+            subp2 = m2._dirs.get(d, emptytree)._node
+            if subp1 == revlog.nullid:
+                subp1, subp2 = subp2, subp1
+            writesubtree(subm, subp1, subp2)
+
 class manifest(revlog.revlog):
-    def __init__(self, opener):
+    def __init__(self, opener, dir='', dirlogcache=None):
+        '''The 'dir' and 'dirlogcache' arguments are for internal use by
+        manifest.manifest only. External users should create a root manifest
+        log with manifest.manifest(opener) and call dirlog() on it.
+        '''
         # During normal operations, we expect to deal with not more than four
         # revs at a time (such as during commit --amend). When rebasing large
         # stacks of commits, the number can go up, hence the config knob below.
@@ -760,19 +866,38 @@
         opts = getattr(opener, 'options', None)
         if opts is not None:
             cachesize = opts.get('manifestcachesize', cachesize)
-            usetreemanifest = opts.get('usetreemanifest', usetreemanifest)
+            usetreemanifest = opts.get('treemanifest', usetreemanifest)
             usemanifestv2 = opts.get('manifestv2', usemanifestv2)
         self._mancache = util.lrucachedict(cachesize)
-        revlog.revlog.__init__(self, opener, "00manifest.i")
         self._treeinmem = usetreemanifest
         self._treeondisk = usetreemanifest
         self._usemanifestv2 = usemanifestv2
+        indexfile = "00manifest.i"
+        if dir:
+            assert self._treeondisk
+            if not dir.endswith('/'):
+                dir = dir + '/'
+            indexfile = "meta/" + dir + "00manifest.i"
+        revlog.revlog.__init__(self, opener, indexfile)
+        self._dir = dir
+        # The dirlogcache is kept on the root manifest log
+        if dir:
+            self._dirlogcache = dirlogcache
+        else:
+            self._dirlogcache = {'': self}
 
     def _newmanifest(self, data=''):
         if self._treeinmem:
-            return treemanifest('', data)
+            return treemanifest(self._dir, data)
         return manifestdict(data)
 
+    def dirlog(self, dir):
+        assert self._treeondisk
+        if dir not in self._dirlogcache:
+            self._dirlogcache[dir] = manifest(self.opener, dir,
+                                              self._dirlogcache)
+        return self._dirlogcache[dir]
+
     def _slowreaddelta(self, node):
         r0 = self.deltaparent(self.rev(node))
         m0 = self.read(self.node(r0))
@@ -793,7 +918,13 @@
         return self._newmanifest(d)
 
     def readfast(self, node):
-        '''use the faster of readdelta or read'''
+        '''use the faster of readdelta or read
+
+        This will return a manifest which is either only the files
+        added/modified relative to p1, or all files in the
+        manifest. Which one is returned depends on the codepath used
+        to retrieve the data.
+        '''
         r = self.rev(node)
         deltaparent = self.deltaparent(r)
         if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
@@ -805,9 +936,19 @@
             return self._newmanifest() # don't upset local cache
         if node in self._mancache:
             return self._mancache[node][0]
-        text = self.revision(node)
-        arraytext = array.array('c', text)
-        m = self._newmanifest(text)
+        if self._treeondisk:
+            def gettext():
+                return self.revision(node)
+            def readsubtree(dir, subm):
+                return self.dirlog(dir).read(subm)
+            m = self._newmanifest()
+            m.read(gettext, readsubtree)
+            m.setnode(node)
+            arraytext = None
+        else:
+            text = self.revision(node)
+            m = self._newmanifest(text)
+            arraytext = array.array('c', text)
         self._mancache[node] = (m, arraytext)
         return m
 
@@ -845,10 +986,37 @@
             # just encode a fulltext of the manifest and pass that
             # through to the revlog layer, and let it handle the delta
             # process.
-            text = m.text(self._usemanifestv2)
-            arraytext = array.array('c', text)
-            n = self.addrevision(text, transaction, link, p1, p2)
+            if self._treeondisk:
+                m1 = self.read(p1)
+                m2 = self.read(p2)
+                n = self._addtree(m, transaction, link, m1, m2)
+                arraytext = None
+            else:
+                text = m.text(self._usemanifestv2)
+                n = self.addrevision(text, transaction, link, p1, p2)
+                arraytext = array.array('c', text)
 
         self._mancache[n] = (m, arraytext)
 
         return n
+
+    def _addtree(self, m, transaction, link, m1, m2):
+        # If the manifest is unchanged compared to one parent,
+        # don't write a new revision
+        if m.unmodifiedsince(m1) or m.unmodifiedsince(m2):
+            return m.node()
+        def writesubtree(subm, subp1, subp2):
+            sublog = self.dirlog(subm.dir())
+            sublog.add(subm, transaction, link, subp1, subp2, None, None)
+        m.writesubtrees(m1, m2, writesubtree)
+        text = m.dirtext(self._usemanifestv2)
+        # Double-check whether contents are unchanged to one parent
+        if text == m1.dirtext(self._usemanifestv2):
+            n = m1.node()
+        elif text == m2.dirtext(self._usemanifestv2):
+            n = m2.node()
+        else:
+            n = self.addrevision(text, transaction, link, m1.node(), m2.node())
+        # Save nodeid so parent manifest can calculate its nodeid
+        m.setnode(n)
+        return n