treemanifest: rework lazy-copying code (
issue4840)
The old lazy-copy code formed a chain of copied manifests with each
copy. Under typical operation, the stack never got more than a couple
of manifests deep and was fine. Under conditions like hgsubversion or
convert, the stack could get hundreds of manifests deep, and
eventually overflow the recursion limit for Python. I was able to
consistently reproduce this by converting an hgsubversion clone of
svn's history to treemanifests.
This may result in fewer manifests staying in memory during operations
like convert when treemanifests are in use, and should make those
operations faster since there will be significantly fewer noop
function calls going on.
A previous attempt (never mailed) of mine to fix this problem tried to
simply have all treemanifests only have a loadfunc - that caused
somewhat weird problems because the gettext() callable passed into
read() wasn't idempotent, so the easy solution is to have a loadfunc
and a copyfunc.
--- a/mercurial/manifest.py Fri Sep 25 17:18:28 2015 -0400
+++ b/mercurial/manifest.py Fri Sep 25 22:54:46 2015 -0400
@@ -442,13 +442,14 @@
else:
return '', f
-_noop = lambda: None
+_noop = lambda s: None
class treemanifest(object):
def __init__(self, dir='', text=''):
self._dir = dir
self._node = revlog.nullid
- self._load = _noop
+ self._loadfunc = _noop
+ self._copyfunc = _noop
self._dirty = False
self._dirs = {}
# Using _lazymanifest here is a little slower than plain old dicts
@@ -479,7 +480,7 @@
def __repr__(self):
return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s at 0x%x>' %
(self._dir, revlog.hex(self._node),
- bool(self._load is _noop),
+ bool(self._loadfunc is _noop),
self._dirty, id(self)))
def dir(self):
@@ -598,6 +599,14 @@
self._files[f] = n[:21] # to match manifestdict's behavior
self._dirty = True
+ def _load(self):
+ if self._loadfunc is not _noop:
+ lf, self._loadfunc = self._loadfunc, _noop
+ lf(self)
+ elif self._copyfunc is not _noop:
+ cf, self._copyfunc = self._copyfunc, _noop
+ cf(self)
+
def setflag(self, f, flags):
"""Set the flags (symlink, executable) for path f."""
assert 'd' not in flags
@@ -615,19 +624,19 @@
copy = treemanifest(self._dir)
copy._node = self._node
copy._dirty = self._dirty
- def _load_for_copy():
- self._load()
- for d in self._dirs:
- copy._dirs[d] = self._dirs[d].copy()
- copy._files = dict.copy(self._files)
- copy._flags = dict.copy(self._flags)
- copy._load = _noop
- copy._load = _load_for_copy
- if self._load == _noop:
- # Chaining _load if it's _noop is functionally correct, but the
- # chain may end up excessively long (stack overflow), and
- # will prevent garbage collection of 'self'.
- copy._load()
+ if self._copyfunc is _noop:
+ def _copyfunc(s):
+ self._load()
+ for d in self._dirs:
+ s._dirs[d] = self._dirs[d].copy()
+ s._files = dict.copy(self._files)
+ s._flags = dict.copy(self._flags)
+ if self._loadfunc is _noop:
+ _copyfunc(copy)
+ else:
+ copy._copyfunc = _copyfunc
+ else:
+ copy._copyfunc = self._copyfunc
return copy
def filesnotin(self, m2):
@@ -834,13 +843,10 @@
return _text(sorted(dirs + files), usemanifestv2)
def read(self, gettext, readsubtree):
- def _load_for_read():
- # Mark as loaded already here, so __setitem__ and setflag() don't
- # cause infinite loops when they try to load.
- self._load = _noop
- self.parse(gettext(), readsubtree)
- self._dirty = False
- self._load = _load_for_read
+ def _load_for_read(s):
+ s.parse(gettext(), readsubtree)
+ s._dirty = False
+ self._loadfunc = _load_for_read
def writesubtrees(self, m1, m2, writesubtree):
self._load() # for consistency; should never have any effect here