treemanifest: create treemanifest class
There are a number of problems with large and flat manifests. Copying
from http://mercurial.selenic.com/wiki/ManifestShardingPlan:
* manifest too large for RAM
* manifest resolution too much CPU (long delta chains)
* committing is slow because entire manifest has to be hashed
* impossible for narrow clone to leave out part of manifest as all is
needed to calculate new hash
* diffing two revisions involves traversing entire subdirectories
even if identical
This is a first step in a series introducing a manifest revlog per
directory.
This change adds a new manifest class: treemanifest, which is a tree
where each node has a dict of files (nodeids), a dict of flags, and a
dict of subdirectories (treemanifests). So far, it behaves just like
manifestdict, but it will later help us write one manifest revlog per
directory. The new class is still unused; it will be used after the
next change.
The code is not yet optimized. Running with it (see below) makes most
or all operations slower. Once we start storing manifest revlogs for
every directory, it should be possible to make many of these
operations much faster. The fastdelta() optimization has been
intentionally not implemented for the treemanifests. We can implement
it later if necessary.
All tests pass when run with the following patch (and without, of
couse):
--- a/mercurial/manifest.py Thu Mar 19 11:08:42 2015 -0700
+++ b/mercurial/manifest.py Thu Mar 19 11:15:50 2015 -0700
@@ -596,7 +596,7 @@ class manifest(revlog.revlog):
return None, None
def add(self, m, transaction, link, p1, p2, added, removed):
- if p1 in self._mancache:
+ if False and p1 in self._mancache:
# If our first parent is in the manifest cache, we can
# compute a delta here using properties we know about the
# manifest up-front, which may save time later for the
@@ -626,3 +626,5 @@ class manifest(revlog.revlog):
self._mancache[n] = (m, arraytext)
return n
+
+manifestdict = treemanifest
import sys
from _lsprof import Profiler, profiler_entry
__all__ = ['profile', 'Stats']
def profile(f, *args, **kwds):
"""XXX docstring"""
p = Profiler()
p.enable(subcalls=True, builtins=True)
try:
f(*args, **kwds)
finally:
p.disable()
return Stats(p.getstats())
class Stats(object):
"""XXX docstring"""
def __init__(self, data):
self.data = data
def sort(self, crit="inlinetime"):
"""XXX docstring"""
if crit not in profiler_entry.__dict__:
raise ValueError("Can't sort by %s" % crit)
self.data.sort(key=lambda x: getattr(x, crit), reverse=True)
for e in self.data:
if e.calls:
e.calls.sort(key=lambda x: getattr(x, crit), reverse=True)
def pprint(self, top=None, file=None, limit=None, climit=None):
"""XXX docstring"""
if file is None:
file = sys.stdout
d = self.data
if top is not None:
d = d[:top]
cols = "% 12s %12s %11.4f %11.4f %s\n"
hcols = "% 12s %12s %12s %12s %s\n"
file.write(hcols % ("CallCount", "Recursive", "Total(s)",
"Inline(s)", "module:lineno(function)"))
count = 0
for e in d:
file.write(cols % (e.callcount, e.reccallcount, e.totaltime,
e.inlinetime, label(e.code)))
count += 1
if limit is not None and count == limit:
return
ccount = 0
if climit and e.calls:
for se in e.calls:
file.write(cols % (se.callcount, se.reccallcount,
se.totaltime, se.inlinetime,
" %s" % label(se.code)))
count += 1
ccount += 1
if limit is not None and count == limit:
return
if climit is not None and ccount == climit:
break
def freeze(self):
"""Replace all references to code objects with string
descriptions; this makes it possible to pickle the instance."""
# this code is probably rather ickier than it needs to be!
for i in range(len(self.data)):
e = self.data[i]
if not isinstance(e.code, str):
self.data[i] = type(e)((label(e.code),) + e[1:])
if e.calls:
for j in range(len(e.calls)):
se = e.calls[j]
if not isinstance(se.code, str):
e.calls[j] = type(se)((label(se.code),) + se[1:])
_fn2mod = {}
def label(code):
if isinstance(code, str):
return code
try:
mname = _fn2mod[code.co_filename]
except KeyError:
for k, v in list(sys.modules.iteritems()):
if v is None:
continue
if not isinstance(getattr(v, '__file__', None), str):
continue
if v.__file__.startswith(code.co_filename):
mname = _fn2mod[code.co_filename] = k
break
else:
mname = _fn2mod[code.co_filename] = '<%s>' % code.co_filename
return '%s:%d(%s)' % (mname, code.co_firstlineno, code.co_name)
if __name__ == '__main__':
import os
sys.argv = sys.argv[1:]
if not sys.argv:
print >> sys.stderr, "usage: lsprof.py <script> <arguments...>"
sys.exit(2)
sys.path.insert(0, os.path.abspath(os.path.dirname(sys.argv[0])))
stats = profile(execfile, sys.argv[0], globals(), locals())
stats.sort()
stats.pprint()