# HG changeset patch # User Augie Fackler # Date 1519347882 18000 # Node ID 0147a4730420470fdaf5702dd732ca2b465e50b5 # Parent b8d0761a85c7421071750de23228415306852d69 cleanup: say goodbye to manifestv2 format This experiment was a bust: we'd hoped for smaller repository sizes, but things got larger. Google ended up rolling out tree manifests in a format that's compatible with the original manifest format, and I believe Facebook is doing the same. This code was never implemented as native speedups, so I'm pretty comfortable saying nobody is using the experimental feature. Let's rip it out. I noticed this code still kicking around because I was investigating a repo corruption issue for timeless. .. bc:: Support for the experimental manifestv2 format has been removed, as it was never completed and failed to meet expectations. Differential Revision: https://phab.mercurial-scm.org/D2393 diff -r b8d0761a85c7 -r 0147a4730420 mercurial/configitems.py --- a/mercurial/configitems.py Wed Feb 21 16:47:39 2018 -0800 +++ b/mercurial/configitems.py Thu Feb 22 20:04:42 2018 -0500 @@ -538,9 +538,6 @@ coreconfigitem('experimental', 'httppostargs', default=False, ) -coreconfigitem('experimental', 'manifestv2', - default=False, -) coreconfigitem('experimental', 'mergedriver', default=None, ) diff -r b8d0761a85c7 -r 0147a4730420 mercurial/help/internals/requirements.txt --- a/mercurial/help/internals/requirements.txt Wed Feb 21 16:47:39 2018 -0800 +++ b/mercurial/help/internals/requirements.txt Thu Feb 22 20:04:42 2018 -0500 @@ -1,4 +1,3 @@ - Repositories contain a file (``.hg/requires``) containing a list of features/capabilities that are *required* for clients to interface with the repository. This file has been present in Mercurial since @@ -105,8 +104,10 @@ Denotes that version 2 of manifests are being used. Support for this requirement was added in Mercurial 3.4 (released -May 2015). The requirement is currently experimental and is disabled -by default. +May 2015). The new format failed to meet expectations and support +for the format and requirement were removed in Mercurial 4.6 +(released May 2018) since the feature never graduated frome experiment +status. treemanifest ============ diff -r b8d0761a85c7 -r 0147a4730420 mercurial/localrepo.py --- a/mercurial/localrepo.py Wed Feb 21 16:47:39 2018 -0800 +++ b/mercurial/localrepo.py Thu Feb 22 20:04:42 2018 -0500 @@ -303,11 +303,15 @@ class localrepository(object): + # obsolete experimental requirements: + # - manifestv2: An experimental new manifest format that allowed + # for stem compression of long paths. Experiment ended up not + # being successful (repository sizes went up due to worse delta + # chains), and the code was deleted in 4.6. supportedformats = { 'revlogv1', 'generaldelta', 'treemanifest', - 'manifestv2', REVLOGV2_REQUIREMENT, } _basesupported = supportedformats | { @@ -322,7 +326,6 @@ 'revlogv1', 'generaldelta', 'treemanifest', - 'manifestv2', } # a list of (ui, featureset) functions. @@ -2261,8 +2264,6 @@ requirements.add('generaldelta') if ui.configbool('experimental', 'treemanifest'): requirements.add('treemanifest') - if ui.configbool('experimental', 'manifestv2'): - requirements.add('manifestv2') revlogv2 = ui.config('experimental', 'revlogv2') if revlogv2 == 'enable-unstable-format-and-corrupt-my-data': diff -r b8d0761a85c7 -r 0147a4730420 mercurial/manifest.py --- a/mercurial/manifest.py Wed Feb 21 16:47:39 2018 -0800 +++ b/mercurial/manifest.py Thu Feb 22 20:04:42 2018 -0500 @@ -9,7 +9,6 @@ import heapq import itertools -import os import struct from .i18n import _ @@ -28,7 +27,7 @@ parsers = policy.importmod(r'parsers') propertycache = util.propertycache -def _parsev1(data): +def _parse(data): # This method does a little bit of excessive-looking # precondition checking. This is so that the behavior of this # class exactly matches its C counterpart to try and help @@ -47,43 +46,7 @@ else: yield f, bin(n), '' -def _parsev2(data): - metadataend = data.find('\n') - # Just ignore metadata for now - pos = metadataend + 1 - prevf = '' - while pos < len(data): - end = data.find('\n', pos + 1) # +1 to skip stem length byte - if end == -1: - raise ValueError('Manifest ended with incomplete file entry.') - stemlen = ord(data[pos:pos + 1]) - items = data[pos + 1:end].split('\0') - f = prevf[:stemlen] + items[0] - if prevf > f: - raise ValueError('Manifest entries not in sorted order.') - fl = items[1] - # Just ignore metadata (items[2:] for now) - n = data[end + 1:end + 21] - yield f, n, fl - pos = end + 22 - prevf = f - -def _parse(data): - """Generates (path, node, flags) tuples from a manifest text""" - if data.startswith('\0'): - return iter(_parsev2(data)) - else: - return iter(_parsev1(data)) - -def _text(it, usemanifestv2): - """Given an iterator over (path, node, flags) tuples, returns a manifest - text""" - if usemanifestv2: - return _textv2(it) - else: - return _textv1(it) - -def _textv1(it): +def _text(it): files = [] lines = [] _hex = revlog.hex @@ -96,19 +59,6 @@ _checkforbidden(files) return ''.join(lines) -def _textv2(it): - files = [] - lines = ['\0\n'] - prevf = '' - for f, n, fl in it: - files.append(f) - stem = os.path.commonprefix([prevf, f]) - stemlen = min(len(stem), 255) - lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n)) - prevf = f - _checkforbidden(files) - return ''.join(lines) - class lazymanifestiter(object): def __init__(self, lm): self.pos = 0 @@ -414,13 +364,7 @@ class manifestdict(object): def __init__(self, data=''): - if data.startswith('\0'): - #_lazymanifest can not parse v2 - self._lm = _lazymanifest('') - for f, n, fl in _parsev2(data): - self._lm[f] = n, fl - else: - self._lm = _lazymanifest(data) + self._lm = _lazymanifest(data) def __getitem__(self, key): return self._lm[key][0] @@ -589,12 +533,9 @@ def iterentries(self): return self._lm.iterentries() - def text(self, usemanifestv2=False): - if usemanifestv2: - return _textv2(self._lm.iterentries()) - else: - # use (probably) native version for v1 - return self._lm.text() + def text(self): + # most likely uses native version + return self._lm.text() def fastdelta(self, base, changes): """Given a base manifest text as a bytearray and a list of changes @@ -1138,12 +1079,12 @@ if fl: self._flags[f] = fl - def text(self, usemanifestv2=False): + def text(self): """Get the full data of this manifest as a bytestring.""" self._load() - return _text(self.iterentries(), usemanifestv2) + return _text(self.iterentries()) - def dirtext(self, usemanifestv2=False): + def dirtext(self): """Get the full data of this directory as a bytestring. Make sure that any submanifests have been written first, so their nodeids are correct. """ @@ -1151,7 +1092,7 @@ flags = self.flags dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs] files = [(f, self._files[f], flags(f)) for f in self._files] - return _text(sorted(dirs + files), usemanifestv2) + return _text(sorted(dirs + files)) def read(self, gettext, readsubtree): def _load_for_read(s): @@ -1208,15 +1149,12 @@ # stacks of commits, the number can go up, hence the config knob below. cachesize = 4 optiontreemanifest = False - usemanifestv2 = False opts = getattr(opener, 'options', None) if opts is not None: cachesize = opts.get('manifestcachesize', cachesize) optiontreemanifest = opts.get('treemanifest', False) - usemanifestv2 = opts.get('manifestv2', usemanifestv2) self._treeondisk = optiontreemanifest or treemanifest - self._usemanifestv2 = usemanifestv2 self._fulltextcache = util.lrucachedict(cachesize) @@ -1262,8 +1200,7 @@ return self._dirlogcache[d] def add(self, m, transaction, link, p1, p2, added, removed, readtree=None): - if (p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta') - and not self._usemanifestv2): + if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'): # If our first parent is in the manifest cache, we can # compute a delta here using properties we know about the # manifest up-front, which may save time later for the @@ -1290,7 +1227,7 @@ n = self._addtree(m, transaction, link, m1, m2, readtree) arraytext = None else: - text = m.text(self._usemanifestv2) + text = m.text() n = self.addrevision(text, transaction, link, p1, p2) arraytext = bytearray(text) @@ -1309,13 +1246,13 @@ sublog.add(subm, transaction, link, subp1, subp2, None, None, readtree=readtree) m.writesubtrees(m1, m2, writesubtree) - text = m.dirtext(self._usemanifestv2) + text = m.dirtext() n = None if self._dir != '': # Double-check whether contents are unchanged to one parent - if text == m1.dirtext(self._usemanifestv2): + if text == m1.dirtext(): n = m1.node() - elif text == m2.dirtext(self._usemanifestv2): + elif text == m2.dirtext(): n = m2.node() if not n: @@ -1493,19 +1430,6 @@ Changing the value of `shallow` has no effect on flat manifests. ''' revlog = self._revlog() - if revlog._usemanifestv2: - # Need to perform a slow delta - r0 = revlog.deltaparent(revlog.rev(self._node)) - m0 = self._manifestlog[revlog.node(r0)].read() - m1 = self.read() - md = manifestdict() - for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems(): - if n1: - md[f] = n1 - if fl1: - md.setflag(f, fl1) - return md - r = revlog.rev(self._node) d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r)) return manifestdict(d) @@ -1608,7 +1532,7 @@ its 't' flag. ''' revlog = self._revlog() - if shallow and not revlog._usemanifestv2: + if shallow: r = revlog.rev(self._node) d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r)) return manifestdict(d) diff -r b8d0761a85c7 -r 0147a4730420 mercurial/upgrade.py --- a/mercurial/upgrade.py Wed Feb 21 16:47:39 2018 -0800 +++ b/mercurial/upgrade.py Thu Feb 22 20:04:42 2018 -0500 @@ -46,7 +46,6 @@ return { # The upgrade code does not yet support these experimental features. # This is an artificial limitation. - 'manifestv2', 'treemanifest', # This was a precursor to generaldelta and was never enabled by default. # It should (hopefully) not exist in the wild. diff -r b8d0761a85c7 -r 0147a4730420 tests/test-manifest.py --- a/tests/test-manifest.py Wed Feb 21 16:47:39 2018 -0800 +++ b/tests/test-manifest.py Thu Feb 22 20:04:42 2018 -0500 @@ -11,7 +11,6 @@ ) EMTPY_MANIFEST = b'' -EMTPY_MANIFEST_V2 = b'\0\n' HASH_1 = b'1' * 40 BIN_HASH_1 = binascii.unhexlify(HASH_1) @@ -28,42 +27,6 @@ b'flag2': b'l', } -# Same data as A_SHORT_MANIFEST -A_SHORT_MANIFEST_V2 = ( - b'\0\n' - b'\x00bar/baz/qux.py\0%(flag2)s\n%(hash2)s\n' - b'\x00foo\0%(flag1)s\n%(hash1)s\n' - ) % {b'hash1': BIN_HASH_1, - b'flag1': b'', - b'hash2': BIN_HASH_2, - b'flag2': b'l', - } - -# Same data as A_SHORT_MANIFEST -A_METADATA_MANIFEST = ( - b'\0foo\0bar\n' - b'\x00bar/baz/qux.py\0%(flag2)s\0foo\0bar\n%(hash2)s\n' # flag and metadata - b'\x00foo\0%(flag1)s\0foo\n%(hash1)s\n' # no flag, but metadata - ) % {b'hash1': BIN_HASH_1, - b'flag1': b'', - b'hash2': BIN_HASH_2, - b'flag2': b'l', - } - -A_STEM_COMPRESSED_MANIFEST = ( - b'\0\n' - b'\x00bar/baz/qux.py\0%(flag2)s\n%(hash2)s\n' - b'\x04qux/foo.py\0%(flag1)s\n%(hash1)s\n' # simple case of 4 stem chars - b'\x0az.py\0%(flag1)s\n%(hash1)s\n' # tricky newline = 10 stem characters - b'\x00%(verylongdir)sx/x\0\n%(hash1)s\n' - b'\xffx/y\0\n%(hash2)s\n' # more than 255 stem chars - ) % {b'hash1': BIN_HASH_1, - b'flag1': b'', - b'hash2': BIN_HASH_2, - b'flag2': b'l', - b'verylongdir': 255 * b'x', - } - A_DEEPER_MANIFEST = ( b'a/b/c/bar.py\0%(hash3)s%(flag1)s\n' b'a/b/c/bar.txt\0%(hash1)s%(flag1)s\n' @@ -111,11 +74,6 @@ self.assertEqual(0, len(m)) self.assertEqual([], list(m)) - def testEmptyManifestv2(self): - m = self.parsemanifest(EMTPY_MANIFEST_V2) - self.assertEqual(0, len(m)) - self.assertEqual([], list(m)) - def testManifest(self): m = self.parsemanifest(A_SHORT_MANIFEST) self.assertEqual([b'bar/baz/qux.py', b'foo'], list(m)) @@ -126,31 +84,6 @@ with self.assertRaises(KeyError): m[b'wat'] - def testParseManifestV2(self): - m1 = self.parsemanifest(A_SHORT_MANIFEST) - m2 = self.parsemanifest(A_SHORT_MANIFEST_V2) - # Should have same content as A_SHORT_MANIFEST - self.assertEqual(m1.text(), m2.text()) - - def testParseManifestMetadata(self): - # Metadata is for future-proofing and should be accepted but ignored - m = self.parsemanifest(A_METADATA_MANIFEST) - self.assertEqual(A_SHORT_MANIFEST, m.text()) - - def testParseManifestStemCompression(self): - m = self.parsemanifest(A_STEM_COMPRESSED_MANIFEST) - self.assertIn(b'bar/baz/qux.py', m) - self.assertIn(b'bar/qux/foo.py', m) - self.assertIn(b'bar/qux/foz.py', m) - self.assertIn(256 * b'x' + b'/x', m) - self.assertIn(256 * b'x' + b'/y', m) - self.assertEqual(A_STEM_COMPRESSED_MANIFEST, m.text(usemanifestv2=True)) - - def testTextV2(self): - m1 = self.parsemanifest(A_SHORT_MANIFEST) - v2text = m1.text(usemanifestv2=True) - self.assertEqual(A_SHORT_MANIFEST_V2, v2text) - def testSetItem(self): want = BIN_HASH_1 diff -r b8d0761a85c7 -r 0147a4730420 tests/test-manifestv2.t --- a/tests/test-manifestv2.t Wed Feb 21 16:47:39 2018 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,102 +0,0 @@ -Create repo with old manifest - - $ cat << EOF >> $HGRCPATH - > [format] - > usegeneraldelta=yes - > EOF - - $ hg init existing - $ cd existing - $ echo footext > foo - $ hg add foo - $ hg commit -m initial - -We're using v1, so no manifestv2 entry is in requires yet. - - $ grep manifestv2 .hg/requires - [1] - -Let's clone this with manifestv2 enabled to switch to the new format for -future commits. - - $ cd .. - $ hg clone --pull existing new --config experimental.manifestv2=1 - requesting all changes - adding changesets - adding manifests - adding file changes - added 1 changesets with 1 changes to 1 files - new changesets 0fc9a4fafa44 - updating to branch default - 1 files updated, 0 files merged, 0 files removed, 0 files unresolved - $ cd new - -Check that entry was added to .hg/requires. - - $ grep manifestv2 .hg/requires - manifestv2 - -Make a new commit. - - $ echo newfootext > foo - $ hg commit -m new - -Check that the manifest actually switched to v2. - - $ hg debugdata -m 0 - foo\x0021e958b1dca695a60ee2e9cf151753204ee0f9e9 (esc) - - $ hg debugdata -m 1 - \x00 (esc) - \x00foo\x00 (esc) - I\xab\x7f\xb8(\x83\xcas\x15\x9d\xc2\xd3\xd3:5\x08\xbad5_ (esc) - -Check that manifestv2 is used if the requirement is present, even if it's -disabled in the config. - - $ echo newerfootext > foo - $ hg --config experimental.manifestv2=False commit -m newer - - $ hg debugdata -m 2 - \x00 (esc) - \x00foo\x00 (esc) - \xa6\xb1\xfb\xef]\x91\xa1\x19`\xf3.#\x90S\xf8\x06 \xe2\x19\x00 (esc) - -Check that we can still read v1 manifests. - - $ hg files -r 0 - foo - - $ cd .. - -Check that entry is added to .hg/requires on repo creation - - $ hg --config experimental.manifestv2=True init repo - $ cd repo - $ grep manifestv2 .hg/requires - manifestv2 - -Set up simple repo - - $ echo a > file1 - $ echo b > file2 - $ echo c > file3 - $ hg ci -Aqm 'initial' - $ echo d > file2 - $ hg ci -m 'modify file2' - -Check that 'hg verify', which uses manifest.readdelta(), works - - $ hg verify - checking changesets - checking manifests - crosschecking files in changesets and manifests - checking files - 3 files, 2 changesets, 4 total revisions - -Check that manifest revlog is smaller than for v1 - - $ hg debugindex -m - rev offset length delta linkrev nodeid p1 p2 - 0 0 81 -1 0 57361477c778 000000000000 000000000000 - 1 81 33 0 1 aeaab5a2ef74 57361477c778 000000000000 diff -r b8d0761a85c7 -r 0147a4730420 tests/test-upgrade-repo.t --- a/tests/test-upgrade-repo.t Wed Feb 21 16:47:39 2018 -0800 +++ b/tests/test-upgrade-repo.t Thu Feb 22 20:04:42 2018 -0500 @@ -31,23 +31,18 @@ abort: cannot upgrade repository; unsupported source requirement: shared [255] -Do not yet support upgrading manifestv2 and treemanifest repos - - $ hg --config experimental.manifestv2=true init manifestv2 - $ hg -R manifestv2 debugupgraderepo - abort: cannot upgrade repository; unsupported source requirement: manifestv2 - [255] +Do not yet support upgrading treemanifest repos $ hg --config experimental.treemanifest=true init treemanifest $ hg -R treemanifest debugupgraderepo abort: cannot upgrade repository; unsupported source requirement: treemanifest [255] -Cannot add manifestv2 or treemanifest requirement during upgrade +Cannot add treemanifest requirement during upgrade $ hg init disallowaddedreq - $ hg -R disallowaddedreq --config experimental.manifestv2=true --config experimental.treemanifest=true debugupgraderepo - abort: cannot upgrade repository; do not support adding requirement: manifestv2, treemanifest + $ hg -R disallowaddedreq --config experimental.treemanifest=true debugupgraderepo + abort: cannot upgrade repository; do not support adding requirement: treemanifest [255] An upgrade of a repository created with recommended settings only suggests optimizations