--- a/mercurial/manifest.py Fri Jul 03 18:10:58 2015 +0100
+++ b/mercurial/manifest.py Sat Jul 18 17:32:38 2015 -0500
@@ -219,7 +219,7 @@
files instead of over manifest files.'''
files = match.files()
return (len(files) < 100 and (match.isexact() or
- (not match.anypats() and util.all(fn in self for fn in files))))
+ (match.prefix() and all(fn in self for fn in files))))
def walk(self, match):
'''Generates matching file names.
@@ -441,32 +441,64 @@
else:
return '', f
+_noop = lambda: None
+
class treemanifest(object):
def __init__(self, dir='', text=''):
self._dir = dir
+ self._node = revlog.nullid
+ self._load = _noop
+ self._dirty = False
self._dirs = {}
# Using _lazymanifest here is a little slower than plain old dicts
self._files = {}
self._flags = {}
- self.parse(text)
+ if text:
+ def readsubtree(subdir, subm):
+ raise AssertionError('treemanifest constructor only accepts '
+ 'flat manifests')
+ self.parse(text, readsubtree)
+ self._dirty = True # Mark flat manifest dirty after parsing
def _subpath(self, path):
return self._dir + path
def __len__(self):
+ self._load()
size = len(self._files)
for m in self._dirs.values():
size += m.__len__()
return size
def _isempty(self):
+ self._load() # for consistency; already loaded by all callers
return (not self._files and (not self._dirs or
- util.all(m._isempty() for m in self._dirs.values())))
+ all(m._isempty() for m in self._dirs.values())))
def __str__(self):
- return '<treemanifest dir=%s>' % self._dir
+ return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s>' %
+ (self._dir, revlog.hex(self._node),
+ bool(self._load is _noop),
+ self._dirty))
+
+ def dir(self):
+ '''The directory that this tree manifest represents, including a
+ trailing '/'. Empty string for the repo root directory.'''
+ return self._dir
+
+ def node(self):
+ '''This node of this instance. nullid for unsaved instances. Should
+ be updated when the instance is read or written from a revlog.
+ '''
+ assert not self._dirty
+ return self._node
+
+ def setnode(self, node):
+ self._node = node
+ self._dirty = False
def iteritems(self):
+ self._load()
for p, n in sorted(self._dirs.items() + self._files.items()):
if p in self._files:
yield self._subpath(p), n
@@ -475,6 +507,7 @@
yield f, sn
def iterkeys(self):
+ self._load()
for p in sorted(self._dirs.keys() + self._files.keys()):
if p in self._files:
yield self._subpath(p)
@@ -491,6 +524,7 @@
def __contains__(self, f):
if f is None:
return False
+ self._load()
dir, subpath = _splittopdir(f)
if dir:
if dir not in self._dirs:
@@ -500,6 +534,7 @@
return f in self._files
def get(self, f, default=None):
+ self._load()
dir, subpath = _splittopdir(f)
if dir:
if dir not in self._dirs:
@@ -509,6 +544,7 @@
return self._files.get(f, default)
def __getitem__(self, f):
+ self._load()
dir, subpath = _splittopdir(f)
if dir:
return self._dirs[dir].__getitem__(subpath)
@@ -516,6 +552,7 @@
return self._files[f]
def flags(self, f):
+ self._load()
dir, subpath = _splittopdir(f)
if dir:
if dir not in self._dirs:
@@ -527,6 +564,7 @@
return self._flags.get(f, '')
def find(self, f):
+ self._load()
dir, subpath = _splittopdir(f)
if dir:
return self._dirs[dir].find(subpath)
@@ -534,6 +572,7 @@
return self._files[f], self._flags.get(f, '')
def __delitem__(self, f):
+ self._load()
dir, subpath = _splittopdir(f)
if dir:
self._dirs[dir].__delitem__(subpath)
@@ -544,9 +583,11 @@
del self._files[f]
if f in self._flags:
del self._flags[f]
+ self._dirty = True
def __setitem__(self, f, n):
assert n is not None
+ self._load()
dir, subpath = _splittopdir(f)
if dir:
if dir not in self._dirs:
@@ -554,9 +595,12 @@
self._dirs[dir].__setitem__(subpath, n)
else:
self._files[f] = n[:21] # to match manifestdict's behavior
+ self._dirty = True
def setflag(self, f, flags):
"""Set the flags (symlink, executable) for path f."""
+ assert 'd' not in flags
+ self._load()
dir, subpath = _splittopdir(f)
if dir:
if dir not in self._dirs:
@@ -564,19 +608,35 @@
self._dirs[dir].setflag(subpath, flags)
else:
self._flags[f] = flags
+ self._dirty = True
def copy(self):
copy = treemanifest(self._dir)
- for d in self._dirs:
- copy._dirs[d] = self._dirs[d].copy()
- copy._files = dict.copy(self._files)
- copy._flags = dict.copy(self._flags)
+ copy._node = self._node
+ copy._dirty = self._dirty
+ def _load():
+ self._load()
+ for d in self._dirs:
+ copy._dirs[d] = self._dirs[d].copy()
+ copy._files = dict.copy(self._files)
+ copy._flags = dict.copy(self._flags)
+ copy._load = _noop
+ copy._load = _load
+ if self._load == _noop:
+ # Chaining _load if it's _noop is functionally correct, but the
+ # chain may end up excessively long (stack overflow), and
+ # will prevent garbage collection of 'self'.
+ copy._load()
return copy
def filesnotin(self, m2):
'''Set of files in this manifest that are not in the other'''
files = set()
def _filesnotin(t1, t2):
+ if t1._node == t2._node and not t1._dirty and not t2._dirty:
+ return
+ t1._load()
+ t2._load()
for d, m1 in t1._dirs.iteritems():
if d in t2._dirs:
m2 = t2._dirs[d]
@@ -599,6 +659,7 @@
return self._alldirs
def hasdir(self, dir):
+ self._load()
topdir, subdir = _splittopdir(dir)
if topdir:
if topdir in self._dirs:
@@ -635,27 +696,20 @@
if not self.hasdir(fn):
match.bad(fn, None)
- def _walk(self, match, alldirs=False):
- '''Recursively generates matching file names for walk().
-
- Will visit all subdirectories if alldirs is True, otherwise it will
- only visit subdirectories for which match.visitdir is True.'''
-
- if not alldirs:
- # substring to strip trailing slash
- visit = match.visitdir(self._dir[:-1] or '.')
- if not visit:
- return
- alldirs = (visit == 'all')
+ def _walk(self, match):
+ '''Recursively generates matching file names for walk().'''
+ if not match.visitdir(self._dir[:-1] or '.'):
+ return
# yield this dir's files and walk its submanifests
+ self._load()
for p in sorted(self._dirs.keys() + self._files.keys()):
if p in self._files:
fullp = self._subpath(p)
if match(fullp):
yield fullp
else:
- for f in self._dirs[p]._walk(match, alldirs):
+ for f in self._dirs[p]._walk(match):
yield f
def matches(self, match):
@@ -665,20 +719,15 @@
return self._matches(match)
- def _matches(self, match, alldirs=False):
+ def _matches(self, match):
'''recursively generate a new manifest filtered by the match argument.
-
- Will visit all subdirectories if alldirs is True, otherwise it will
- only visit subdirectories for which match.visitdir is True.'''
-
+ '''
ret = treemanifest(self._dir)
- if not alldirs:
- # substring to strip trailing slash
- visit = match.visitdir(self._dir[:-1] or '.')
- if not visit:
- return ret
- alldirs = (visit == 'all')
+ if not match.visitdir(self._dir[:-1] or '.'):
+ return ret
+
+ self._load()
for fn in self._files:
fullp = self._subpath(fn)
if not match(fullp):
@@ -688,10 +737,12 @@
ret._flags[fn] = self._flags[fn]
for dir, subm in self._dirs.iteritems():
- m = subm._matches(match, alldirs)
+ m = subm._matches(match)
if not m._isempty():
ret._dirs[dir] = m
+ if not ret._isempty():
+ ret._dirty = True
return ret
def diff(self, m2, clean=False):
@@ -712,6 +763,10 @@
result = {}
emptytree = treemanifest()
def _diff(t1, t2):
+ if t1._node == t2._node and not t1._dirty and not t2._dirty:
+ return
+ t1._load()
+ t2._load()
for d, m1 in t1._dirs.iteritems():
m2 = t2._dirs.get(d, emptytree)
_diff(m1, m2)
@@ -737,20 +792,71 @@
_diff(self, m2)
return result
- def parse(self, text):
+ def unmodifiedsince(self, m2):
+ return not self._dirty and not m2._dirty and self._node == m2._node
+
+ def parse(self, text, readsubtree):
for f, n, fl in _parse(text):
- self[f] = n
- if fl:
- self.setflag(f, fl)
+ if fl == 'd':
+ f = f + '/'
+ self._dirs[f] = readsubtree(self._subpath(f), n)
+ elif '/' in f:
+ # This is a flat manifest, so use __setitem__ and setflag rather
+ # than assigning directly to _files and _flags, so we can
+ # assign a path in a subdirectory, and to mark dirty (compared
+ # to nullid).
+ self[f] = n
+ if fl:
+ self.setflag(f, fl)
+ else:
+ # Assigning to _files and _flags avoids marking as dirty,
+ # and should be a little faster.
+ self._files[f] = n
+ if fl:
+ self._flags[f] = fl
def text(self, usemanifestv2=False):
"""Get the full data of this manifest as a bytestring."""
+ self._load()
flags = self.flags
return _text(((f, self[f], flags(f)) for f in self.keys()),
usemanifestv2)
+ def dirtext(self, usemanifestv2=False):
+ """Get the full data of this directory as a bytestring. Make sure that
+ any submanifests have been written first, so their nodeids are correct.
+ """
+ self._load()
+ flags = self.flags
+ dirs = [(d[:-1], self._dirs[d]._node, 'd') for d in self._dirs]
+ files = [(f, self._files[f], flags(f)) for f in self._files]
+ return _text(sorted(dirs + files), usemanifestv2)
+
+ def read(self, gettext, readsubtree):
+ def _load():
+ # Mark as loaded already here, so __setitem__ and setflag() don't
+ # cause infinite loops when they try to load.
+ self._load = _noop
+ self.parse(gettext(), readsubtree)
+ self._dirty = False
+ self._load = _load
+
+ def writesubtrees(self, m1, m2, writesubtree):
+ self._load() # for consistency; should never have any effect here
+ emptytree = treemanifest()
+ for d, subm in self._dirs.iteritems():
+ subp1 = m1._dirs.get(d, emptytree)._node
+ subp2 = m2._dirs.get(d, emptytree)._node
+ if subp1 == revlog.nullid:
+ subp1, subp2 = subp2, subp1
+ writesubtree(subm, subp1, subp2)
+
class manifest(revlog.revlog):
- def __init__(self, opener):
+ def __init__(self, opener, dir='', dirlogcache=None):
+ '''The 'dir' and 'dirlogcache' arguments are for internal use by
+ manifest.manifest only. External users should create a root manifest
+ log with manifest.manifest(opener) and call dirlog() on it.
+ '''
# During normal operations, we expect to deal with not more than four
# revs at a time (such as during commit --amend). When rebasing large
# stacks of commits, the number can go up, hence the config knob below.
@@ -760,19 +866,38 @@
opts = getattr(opener, 'options', None)
if opts is not None:
cachesize = opts.get('manifestcachesize', cachesize)
- usetreemanifest = opts.get('usetreemanifest', usetreemanifest)
+ usetreemanifest = opts.get('treemanifest', usetreemanifest)
usemanifestv2 = opts.get('manifestv2', usemanifestv2)
self._mancache = util.lrucachedict(cachesize)
- revlog.revlog.__init__(self, opener, "00manifest.i")
self._treeinmem = usetreemanifest
self._treeondisk = usetreemanifest
self._usemanifestv2 = usemanifestv2
+ indexfile = "00manifest.i"
+ if dir:
+ assert self._treeondisk
+ if not dir.endswith('/'):
+ dir = dir + '/'
+ indexfile = "meta/" + dir + "00manifest.i"
+ revlog.revlog.__init__(self, opener, indexfile)
+ self._dir = dir
+ # The dirlogcache is kept on the root manifest log
+ if dir:
+ self._dirlogcache = dirlogcache
+ else:
+ self._dirlogcache = {'': self}
def _newmanifest(self, data=''):
if self._treeinmem:
- return treemanifest('', data)
+ return treemanifest(self._dir, data)
return manifestdict(data)
+ def dirlog(self, dir):
+ assert self._treeondisk
+ if dir not in self._dirlogcache:
+ self._dirlogcache[dir] = manifest(self.opener, dir,
+ self._dirlogcache)
+ return self._dirlogcache[dir]
+
def _slowreaddelta(self, node):
r0 = self.deltaparent(self.rev(node))
m0 = self.read(self.node(r0))
@@ -793,7 +918,13 @@
return self._newmanifest(d)
def readfast(self, node):
- '''use the faster of readdelta or read'''
+ '''use the faster of readdelta or read
+
+ This will return a manifest which is either only the files
+ added/modified relative to p1, or all files in the
+ manifest. Which one is returned depends on the codepath used
+ to retrieve the data.
+ '''
r = self.rev(node)
deltaparent = self.deltaparent(r)
if deltaparent != revlog.nullrev and deltaparent in self.parentrevs(r):
@@ -805,9 +936,19 @@
return self._newmanifest() # don't upset local cache
if node in self._mancache:
return self._mancache[node][0]
- text = self.revision(node)
- arraytext = array.array('c', text)
- m = self._newmanifest(text)
+ if self._treeondisk:
+ def gettext():
+ return self.revision(node)
+ def readsubtree(dir, subm):
+ return self.dirlog(dir).read(subm)
+ m = self._newmanifest()
+ m.read(gettext, readsubtree)
+ m.setnode(node)
+ arraytext = None
+ else:
+ text = self.revision(node)
+ m = self._newmanifest(text)
+ arraytext = array.array('c', text)
self._mancache[node] = (m, arraytext)
return m
@@ -845,10 +986,37 @@
# just encode a fulltext of the manifest and pass that
# through to the revlog layer, and let it handle the delta
# process.
- text = m.text(self._usemanifestv2)
- arraytext = array.array('c', text)
- n = self.addrevision(text, transaction, link, p1, p2)
+ if self._treeondisk:
+ m1 = self.read(p1)
+ m2 = self.read(p2)
+ n = self._addtree(m, transaction, link, m1, m2)
+ arraytext = None
+ else:
+ text = m.text(self._usemanifestv2)
+ n = self.addrevision(text, transaction, link, p1, p2)
+ arraytext = array.array('c', text)
self._mancache[n] = (m, arraytext)
return n
+
+ def _addtree(self, m, transaction, link, m1, m2):
+ # If the manifest is unchanged compared to one parent,
+ # don't write a new revision
+ if m.unmodifiedsince(m1) or m.unmodifiedsince(m2):
+ return m.node()
+ def writesubtree(subm, subp1, subp2):
+ sublog = self.dirlog(subm.dir())
+ sublog.add(subm, transaction, link, subp1, subp2, None, None)
+ m.writesubtrees(m1, m2, writesubtree)
+ text = m.dirtext(self._usemanifestv2)
+ # Double-check whether contents are unchanged to one parent
+ if text == m1.dirtext(self._usemanifestv2):
+ n = m1.node()
+ elif text == m2.dirtext(self._usemanifestv2):
+ n = m2.node()
+ else:
+ n = self.addrevision(text, transaction, link, m1.node(), m2.node())
+ # Save nodeid so parent manifest can calculate its nodeid
+ m.setnode(n)
+ return n