manifest: split manifestdict into high-level and low-level logic
The low-level logic type (_lazymanifest) matches the behavior of the C
implementation introduced in
a5f1bccd. A future patch will use that
when available.
--- a/mercurial/manifest.py Sat Mar 07 11:43:12 2015 -0500
+++ b/mercurial/manifest.py Sat Mar 07 12:04:39 2015 -0500
@@ -9,53 +9,119 @@
import mdiff, parsers, error, revlog, util
import array, struct
-# Pure Python fallback
-def _parsemanifest(mfdict, fdict, lines):
- bin = revlog.bin
- for l in lines.splitlines():
- f, n = l.split('\0')
- if len(n) > 40:
- fdict[f] = n[40:]
- mfdict[f] = bin(n[:40])
- else:
- mfdict[f] = bin(n)
+
+class _lazymanifest(dict):
+ """This is the pure implementation of lazymanifest.
+
+ It has not been optimized *at all* and is not lazy.
+ """
-def _parse(lines, mfdict, flags):
- try:
- parsers.parse_manifest(mfdict, flags, lines)
- except AttributeError:
- _parsemanifest(mfdict, flags, lines)
- return mfdict
-
-class manifestdict(dict):
- def __init__(self, data=''):
- self._flags = {}
- _parse(data, self, self._flags)
+ def __init__(self, data):
+ # This init method does a little bit of excessive-looking
+ # precondition checking. This is so that the behavior of this
+ # class exactly matches its C counterpart to try and help
+ # prevent surprise breakage for anyone that develops against
+ # the pure version.
+ if data and data[-1] != '\n':
+ raise ValueError('Manifest did not end in a newline.')
+ dict.__init__(self)
+ prev = None
+ for l in data.splitlines():
+ if prev is not None and prev > l:
+ raise ValueError('Manifest lines not in sorted order.')
+ prev = l
+ f, n = l.split('\0')
+ if len(n) > 40:
+ self[f] = revlog.bin(n[:40]), n[40:]
+ else:
+ self[f] = revlog.bin(n), ''
def __setitem__(self, k, v):
- assert v is not None
- dict.__setitem__(self, k, v)
- def flags(self, f):
- return self._flags.get(f, "")
- def setflag(self, f, flags):
- """Set the flags (symlink, executable) for path f."""
- self._flags[f] = flags
+ node, flag = v
+ assert node is not None
+ if len(node) > 21:
+ node = node[:21] # match c implementation behavior
+ dict.__setitem__(self, k, (node, flag))
+
+ def __iter__(self):
+ return ((f, e[0], e[1]) for f, e in sorted(self.iteritems()))
+
def copy(self):
- copy = manifestdict()
- dict.__init__(copy, self)
- copy._flags = dict.copy(self._flags)
- return copy
+ c = _lazymanifest('')
+ c.update(self)
+ return c
+
+ def diff(self, m2, clean=False):
+ '''Finds changes between the current manifest and m2.'''
+ diff = {}
+
+ for fn, e1 in self.iteritems():
+ if fn not in m2:
+ diff[fn] = e1, (None, '')
+ else:
+ e2 = m2[fn]
+ if e1 != e2:
+ diff[fn] = e1, e2
+ elif clean:
+ diff[fn] = None
+
+ for fn, e2 in m2.iteritems():
+ if fn not in self:
+ diff[fn] = (None, ''), e2
+
+ return diff
+
+ def filtercopy(self, filterfn):
+ c = _lazymanifest('')
+ for f, n, fl in self:
+ if filterfn(f):
+ c[f] = n, fl
+ return c
+
+ def text(self):
+ """Get the full data of this manifest as a bytestring."""
+ fl = sorted(self)
+
+ _hex = revlog.hex
+ # if this is changed to support newlines in filenames,
+ # be sure to check the templates/ dir again (especially *-raw.tmpl)
+ return ''.join("%s\0%s%s\n" % (
+ f, _hex(n[:20]), flag) for f, n, flag in fl)
+
+class manifestdict(object):
+ def __init__(self, data=''):
+ self._lm = _lazymanifest(data)
+
+ def __getitem__(self, key):
+ return self._lm[key][0]
+
+ def __len__(self):
+ return len(self._lm)
+
+ def __setitem__(self, key, node):
+ self._lm[key] = node, self.flags(key, '')
+
+ def __contains__(self, key):
+ return key in self._lm
+
+ def __delitem__(self, key):
+ del self._lm[key]
+
+ def keys(self):
+ return [x[0] for x in self._lm]
+
+ def iterkeys(self):
+ return iter(self.keys())
+
def intersectfiles(self, files):
- '''make a new manifestdict with the intersection of self with files
+ '''make a new lazymanifest with the intersection of self with files
The algorithm assumes that files is much smaller than self.'''
ret = manifestdict()
+ lm = self._lm
for fn in files:
- if fn in self:
- ret[fn] = self[fn]
- flags = self._flags.get(fn, None)
- if flags:
- ret._flags[fn] = flags
+ if fn in lm:
+ ret._lm[fn] = self._lm[fn]
return ret
def filesnotin(self, m2):
@@ -74,11 +140,9 @@
(not match.anypats() and util.all(fn in self for fn in files))):
return self.intersectfiles(files)
- m = self.copy()
- for fn in m.keys():
- if not match(fn):
- del m[fn]
- return m
+ lm = manifestdict('')
+ lm._lm = self._lm.filtercopy(match)
+ return lm
def diff(self, m2, clean=False):
'''Finds changes between the current manifest and m2.
@@ -95,35 +159,36 @@
the nodeid will be None and the flags will be the empty
string.
'''
- diff = {}
+ return self._lm.diff(m2._lm, clean)
+
+ def setflag(self, key, flag):
+ self._lm[key] = self[key], flag
+
+ def get(self, key, default=None):
+ try:
+ return self._lm[key][0]
+ except KeyError:
+ return default
- for fn, n1 in self.iteritems():
- fl1 = self._flags.get(fn, '')
- n2 = m2.get(fn, None)
- fl2 = m2._flags.get(fn, '')
- if n2 is None:
- fl2 = ''
- if n1 != n2 or fl1 != fl2:
- diff[fn] = ((n1, fl1), (n2, fl2))
- elif clean:
- diff[fn] = None
+ def flags(self, key, default=''):
+ try:
+ return self._lm[key][1]
+ except KeyError:
+ return default
- for fn, n2 in m2.iteritems():
- if fn not in self:
- fl2 = m2._flags.get(fn, '')
- diff[fn] = ((None, ''), (n2, fl2))
+ def __iter__(self):
+ return (x[0] for x in self._lm)
- return diff
+ def copy(self):
+ c = manifestdict('')
+ c._lm = self._lm.copy()
+ return c
+
+ def iteritems(self):
+ return (x[:2] for x in self._lm)
def text(self):
- """Get the full data of this manifest as a bytestring."""
- fl = sorted(self)
- _checkforbidden(fl)
-
- hex, flags = revlog.hex, self.flags
- # if this is changed to support newlines in filenames,
- # be sure to check the templates/ dir again (especially *-raw.tmpl)
- return ''.join("%s\0%s%s\n" % (f, hex(self[f]), flags(f)) for f in fl)
+ return self._lm.text()
def fastdelta(self, base, changes):
"""Given a base manifest text as an array.array and a list of changes
@@ -143,7 +208,8 @@
# bs will either be the index of the item or the insert point
start, end = _msearch(addbuf, f, start)
if not todelete:
- l = "%s\0%s%s\n" % (f, revlog.hex(self[f]), self.flags(f))
+ h, fl = self._lm[f]
+ l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
else:
if start == end:
# item we want to delete was not found, error out
@@ -280,12 +346,10 @@
m = self._mancache[node][0]
return m.get(f), m.flags(f)
text = self.revision(node)
- start, end = _msearch(text, f)
- if start == end:
+ try:
+ return manifestdict(text)._lm[f]
+ except KeyError:
return None, None
- l = text[start:end]
- f, n = l.split('\0')
- return revlog.bin(n[:40]), n[40:-1]
def add(self, m, transaction, link, p1, p2, added, removed):
if p1 in self._mancache:
--- a/tests/test-manifest.py Sat Mar 07 11:43:12 2015 -0500
+++ b/tests/test-manifest.py Sat Mar 07 12:04:39 2015 -0500
@@ -4,7 +4,7 @@
import silenttestrunner
-from mercurial import parsers
+from mercurial import manifest as manifestmod
HASH_1 = '1' * 40
HASH_2 = 'f' * 40
@@ -38,12 +38,12 @@
self.assert_(thing in container, msg)
def testEmptyManifest(self):
- m = parsers.lazymanifest('')
+ m = manifestmod._lazymanifest('')
self.assertEqual(0, len(m))
self.assertEqual([], list(m))
def testManifest(self):
- m = parsers.lazymanifest(A_SHORT_MANIFEST)
+ m = manifestmod._lazymanifest(A_SHORT_MANIFEST)
want = [
('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'),
('foo', binascii.unhexlify(HASH_1), ''),
@@ -58,13 +58,13 @@
def testSetItem(self):
want = binascii.unhexlify(HASH_1), ''
- m = parsers.lazymanifest('')
+ m = manifestmod._lazymanifest('')
m['a'] = want
self.assertIn('a', m)
self.assertEqual(want, m['a'])
self.assertEqual('a\0' + HASH_1 + '\n', m.text())
- m = parsers.lazymanifest(A_SHORT_MANIFEST)
+ m = manifestmod._lazymanifest(A_SHORT_MANIFEST)
m['a'] = want
self.assertEqual(want, m['a'])
self.assertEqual('a\0' + HASH_1 + '\n' + A_SHORT_MANIFEST,
@@ -76,7 +76,7 @@
def testCompaction(self):
unhex = binascii.unhexlify
h1, h2 = unhex(HASH_1), unhex(HASH_2)
- m = parsers.lazymanifest(A_SHORT_MANIFEST)
+ m = manifestmod._lazymanifest(A_SHORT_MANIFEST)
m['alpha'] = h1, ''
m['beta'] = h2, ''
del m['foo']
@@ -91,8 +91,8 @@
self.assertEqual(w, list(m))
def testSetGetNodeSuffix(self):
- clean = parsers.lazymanifest(A_SHORT_MANIFEST)
- m = parsers.lazymanifest(A_SHORT_MANIFEST)
+ clean = manifestmod._lazymanifest(A_SHORT_MANIFEST)
+ m = manifestmod._lazymanifest(A_SHORT_MANIFEST)
h, f = m['foo']
want = h + 'a', f
# Merge code wants to set 21-byte fake hashes at times
@@ -120,7 +120,7 @@
self.assertEqual({'foo': ((h, ''), want)}, clean.diff(m))
def testFilterCopyException(self):
- m = parsers.lazymanifest(A_SHORT_MANIFEST)
+ m = manifestmod._lazymanifest(A_SHORT_MANIFEST)
def filt(path):
if path == 'foo':
assert False
@@ -128,7 +128,7 @@
self.assertRaises(AssertionError, m.filtercopy, filt)
def testRemoveItem(self):
- m = parsers.lazymanifest(A_SHORT_MANIFEST)
+ m = manifestmod._lazymanifest(A_SHORT_MANIFEST)
del m['foo']
self.assertRaises(KeyError, lambda : m['foo'])
self.assertEqual(1, len(m))
@@ -138,9 +138,9 @@
MISSING = (None, '')
addl = 'z-only-in-left\0' + HASH_1 + '\n'
addr = 'z-only-in-right\0' + HASH_2 + 'x\n'
- left = parsers.lazymanifest(
+ left = manifestmod._lazymanifest(
A_SHORT_MANIFEST.replace(HASH_1, HASH_3 + 'x') + addl)
- right = parsers.lazymanifest(A_SHORT_MANIFEST + addr)
+ right = manifestmod._lazymanifest(A_SHORT_MANIFEST + addr)
want = {
'foo': ((binascii.unhexlify(HASH_3), 'x'),
(binascii.unhexlify(HASH_1), '')),
@@ -154,14 +154,14 @@
'foo': (MISSING, (binascii.unhexlify(HASH_3), 'x')),
'z-only-in-left': (MISSING, (binascii.unhexlify(HASH_1), '')),
}
- self.assertEqual(want, parsers.lazymanifest('').diff(left))
+ self.assertEqual(want, manifestmod._lazymanifest('').diff(left))
want = {
'bar/baz/qux.py': ((binascii.unhexlify(HASH_2), 'l'), MISSING),
'foo': ((binascii.unhexlify(HASH_3), 'x'), MISSING),
'z-only-in-left': ((binascii.unhexlify(HASH_1), ''), MISSING),
}
- self.assertEqual(want, left.diff(parsers.lazymanifest('')))
+ self.assertEqual(want, left.diff(manifestmod._lazymanifest('')))
copy = right.copy()
del copy['z-only-in-right']
del right['foo']
@@ -171,7 +171,7 @@
}
self.assertEqual(want, right.diff(copy))
- short = parsers.lazymanifest(A_SHORT_MANIFEST)
+ short = manifestmod._lazymanifest(A_SHORT_MANIFEST)
pruned = short.copy()
del pruned['foo']
want = {
@@ -192,30 +192,37 @@
backwards = ''.join(
l + '\n' for l in reversed(A_SHORT_MANIFEST.split('\n')) if l)
try:
- parsers.lazymanifest(backwards)
+ manifestmod._lazymanifest(backwards)
self.fail('Should have raised ValueError')
except ValueError, v:
self.assertIn('Manifest lines not in sorted order.', str(v))
def testNoTerminalNewline(self):
try:
- parsers.lazymanifest(A_SHORT_MANIFEST + 'wat')
+ manifestmod._lazymanifest(A_SHORT_MANIFEST + 'wat')
self.fail('Should have raised ValueError')
except ValueError, v:
self.assertIn('Manifest did not end in a newline.', str(v))
def testNoNewLineAtAll(self):
try:
- parsers.lazymanifest('wat')
+ manifestmod._lazymanifest('wat')
self.fail('Should have raised ValueError')
except ValueError, v:
self.assertIn('Manifest did not end in a newline.', str(v))
def testHugeManifest(self):
- m = parsers.lazymanifest(A_HUGE_MANIFEST)
+ m = manifestmod._lazymanifest(A_HUGE_MANIFEST)
self.assertEqual(HUGE_MANIFEST_ENTRIES, len(m))
self.assertEqual(len(m), len(list(m)))
+ def testIntersectFiles(self):
+ m = manifestmod.manifestdict(A_HUGE_MANIFEST)
+ m2 = m.intersectfiles(['file1', 'file200', 'file300'])
+ w = ('file1\0%sx\n'
+ 'file200\0%sl\n'
+ 'file300\0%s\n') % (HASH_2, HASH_1, HASH_1)
+ self.assertEqual(w, m2.text())
if __name__ == '__main__':
silenttestrunner.main(__name__)