Mercurial > hg
view tests/test-manifest.py @ 24404:96cccf1e3257
treemanifest: make diff() faster
Containment checking is slower in treemanifest than it is in
manifestdict, making the current diff algorithm O(n log n). By
traversing both treemanifests in parallel, we can make it O(n). More
importantly, once we start lazily loading submanifests, we will be
able to easily skip entire submanifest if they have the same nodeid.
author | Martin von Zweigbergk <martinvonz@google.com> |
---|---|
date | Thu, 19 Feb 2015 17:13:35 -0800 |
parents | 49cd847fd69a |
children | bb8e2b1a0803 |
line wrap: on
line source
import binascii import unittest import itertools import silenttestrunner from mercurial import manifest as manifestmod HASH_1 = '1' * 40 HASH_2 = 'f' * 40 HASH_3 = '1234567890abcdef0987654321deadbeef0fcafe' A_SHORT_MANIFEST = ( 'bar/baz/qux.py\0%(hash2)s%(flag2)s\n' 'foo\0%(hash1)s%(flag1)s\n' ) % {'hash1': HASH_1, 'flag1': '', 'hash2': HASH_2, 'flag2': 'l', } HUGE_MANIFEST_ENTRIES = 200001 A_HUGE_MANIFEST = ''.join(sorted( 'file%d\0%s%s\n' % (i, h, f) for i, h, f in itertools.izip(xrange(200001), itertools.cycle((HASH_1, HASH_2)), itertools.cycle(('', 'x', 'l'))))) class testmanifest(unittest.TestCase): def assertIn(self, thing, container, msg=None): # assertIn new in 2.7, use it if available, otherwise polyfill sup = getattr(unittest.TestCase, 'assertIn', False) if sup: return sup(self, thing, container, msg=msg) if not msg: msg = 'Expected %r in %r' % (thing, container) self.assert_(thing in container, msg) def testEmptyManifest(self): m = manifestmod._lazymanifest('') self.assertEqual(0, len(m)) self.assertEqual([], list(m.iterentries())) def testManifest(self): m = manifestmod._lazymanifest(A_SHORT_MANIFEST) want = [ ('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'), ('foo', binascii.unhexlify(HASH_1), ''), ] self.assertEqual(len(want), len(m)) self.assertEqual(want, list(m.iterentries())) self.assertEqual((binascii.unhexlify(HASH_1), ''), m['foo']) self.assertRaises(KeyError, lambda : m['wat']) self.assertEqual((binascii.unhexlify(HASH_2), 'l'), m['bar/baz/qux.py']) def testSetItem(self): want = binascii.unhexlify(HASH_1), '' m = manifestmod._lazymanifest('') m['a'] = want self.assertIn('a', m) self.assertEqual(want, m['a']) self.assertEqual('a\0' + HASH_1 + '\n', m.text()) m = manifestmod._lazymanifest(A_SHORT_MANIFEST) m['a'] = want self.assertEqual(want, m['a']) self.assertEqual('a\0' + HASH_1 + '\n' + A_SHORT_MANIFEST, m.text()) m2 = m.copy() del m del m2 # make sure we don't double free() anything def testCompaction(self): unhex = binascii.unhexlify h1, h2 = unhex(HASH_1), unhex(HASH_2) m = manifestmod._lazymanifest(A_SHORT_MANIFEST) m['alpha'] = h1, '' m['beta'] = h2, '' del m['foo'] want = 'alpha\0%s\nbar/baz/qux.py\0%sl\nbeta\0%s\n' % ( HASH_1, HASH_2, HASH_2) self.assertEqual(want, m.text()) self.assertEqual(3, len(m)) self.assertEqual((h1, ''), m['alpha']) self.assertEqual((h2, ''), m['beta']) self.assertRaises(KeyError, lambda : m['foo']) w = [('alpha', h1, ''), ('bar/baz/qux.py', h2, 'l'), ('beta', h2, '')] self.assertEqual(w, list(m.iterentries())) def testSetGetNodeSuffix(self): clean = manifestmod._lazymanifest(A_SHORT_MANIFEST) m = manifestmod._lazymanifest(A_SHORT_MANIFEST) h, f = m['foo'] want = h + 'a', f # Merge code wants to set 21-byte fake hashes at times m['foo'] = want self.assertEqual(want, m['foo']) self.assertEqual([('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'), ('foo', binascii.unhexlify(HASH_1) + 'a', '')], list(m.iterentries())) # Sometimes it even tries a 22-byte fake hash, but we can # return 21 and it'll work out m['foo'] = want[0] + '+', f self.assertEqual(want, m['foo']) # make sure the suffix survives a copy m2 = m.filtercopy(lambda x: x == 'foo') self.assertEqual(want, m2['foo']) self.assertEqual(1, len(m2)) self.assertEqual(('foo\0%s\n' % HASH_1), m2.text()) m2 = m.copy() self.assertEqual(want, m2['foo']) # suffix with iteration self.assertEqual([('bar/baz/qux.py', binascii.unhexlify(HASH_2), 'l'), ('foo', want[0], '')], list(m.iterentries())) # shows up in diff self.assertEqual({'foo': (want, (h, ''))}, m.diff(clean)) self.assertEqual({'foo': ((h, ''), want)}, clean.diff(m)) def testFilterCopyException(self): m = manifestmod._lazymanifest(A_SHORT_MANIFEST) def filt(path): if path == 'foo': assert False return True self.assertRaises(AssertionError, m.filtercopy, filt) def testRemoveItem(self): m = manifestmod._lazymanifest(A_SHORT_MANIFEST) del m['foo'] self.assertRaises(KeyError, lambda : m['foo']) self.assertEqual(1, len(m)) self.assertEqual(1, len(list(m))) # now restore and make sure everything works right m['foo'] = 'a' * 20, '' self.assertEqual(2, len(m)) self.assertEqual(2, len(list(m))) def testManifestDiff(self): MISSING = (None, '') addl = 'z-only-in-left\0' + HASH_1 + '\n' addr = 'z-only-in-right\0' + HASH_2 + 'x\n' left = manifestmod._lazymanifest( A_SHORT_MANIFEST.replace(HASH_1, HASH_3 + 'x') + addl) right = manifestmod._lazymanifest(A_SHORT_MANIFEST + addr) want = { 'foo': ((binascii.unhexlify(HASH_3), 'x'), (binascii.unhexlify(HASH_1), '')), 'z-only-in-left': ((binascii.unhexlify(HASH_1), ''), MISSING), 'z-only-in-right': (MISSING, (binascii.unhexlify(HASH_2), 'x')), } self.assertEqual(want, left.diff(right)) want = { 'bar/baz/qux.py': (MISSING, (binascii.unhexlify(HASH_2), 'l')), 'foo': (MISSING, (binascii.unhexlify(HASH_3), 'x')), 'z-only-in-left': (MISSING, (binascii.unhexlify(HASH_1), '')), } self.assertEqual(want, manifestmod._lazymanifest('').diff(left)) want = { 'bar/baz/qux.py': ((binascii.unhexlify(HASH_2), 'l'), MISSING), 'foo': ((binascii.unhexlify(HASH_3), 'x'), MISSING), 'z-only-in-left': ((binascii.unhexlify(HASH_1), ''), MISSING), } self.assertEqual(want, left.diff(manifestmod._lazymanifest(''))) copy = right.copy() del copy['z-only-in-right'] del right['foo'] want = { 'foo': (MISSING, (binascii.unhexlify(HASH_1), '')), 'z-only-in-right': ((binascii.unhexlify(HASH_2), 'x'), MISSING), } self.assertEqual(want, right.diff(copy)) short = manifestmod._lazymanifest(A_SHORT_MANIFEST) pruned = short.copy() del pruned['foo'] want = { 'foo': ((binascii.unhexlify(HASH_1), ''), MISSING), } self.assertEqual(want, short.diff(pruned)) want = { 'foo': (MISSING, (binascii.unhexlify(HASH_1), '')), } self.assertEqual(want, pruned.diff(short)) want = { 'bar/baz/qux.py': None, 'foo': (MISSING, (binascii.unhexlify(HASH_1), '')), } self.assertEqual(want, pruned.diff(short, True)) def testReversedLines(self): backwards = ''.join( l + '\n' for l in reversed(A_SHORT_MANIFEST.split('\n')) if l) try: manifestmod._lazymanifest(backwards) self.fail('Should have raised ValueError') except ValueError, v: self.assertIn('Manifest lines not in sorted order.', str(v)) def testNoTerminalNewline(self): try: manifestmod._lazymanifest(A_SHORT_MANIFEST + 'wat') self.fail('Should have raised ValueError') except ValueError, v: self.assertIn('Manifest did not end in a newline.', str(v)) def testNoNewLineAtAll(self): try: manifestmod._lazymanifest('wat') self.fail('Should have raised ValueError') except ValueError, v: self.assertIn('Manifest did not end in a newline.', str(v)) def testHugeManifest(self): m = manifestmod._lazymanifest(A_HUGE_MANIFEST) self.assertEqual(HUGE_MANIFEST_ENTRIES, len(m)) self.assertEqual(len(m), len(list(m))) def testIntersectFiles(self): m = manifestmod.manifestdict(A_HUGE_MANIFEST) m2 = m.intersectfiles(['file1', 'file200', 'file300']) w = ('file1\0%sx\n' 'file200\0%sl\n' 'file300\0%s\n') % (HASH_2, HASH_1, HASH_1) self.assertEqual(w, m2.text()) if __name__ == '__main__': silenttestrunner.main(__name__)