cleanup: say goodbye to manifestv2 format
This experiment was a bust: we'd hoped for smaller repository sizes,
but things got larger. Google ended up rolling out tree manifests in a
format that's compatible with the original manifest format, and I
believe Facebook is doing the same. This code was never implemented as
native speedups, so I'm pretty comfortable saying nobody is using the
experimental feature. Let's rip it out.
I noticed this code still kicking around because I was investigating a
repo corruption issue for timeless.
.. bc::
Support for the experimental manifestv2 format has been removed, as
it was never completed and failed to meet expectations.
Differential Revision: https://phab.mercurial-scm.org/D2393
--- a/mercurial/configitems.py Wed Feb 21 16:47:39 2018 -0800
+++ b/mercurial/configitems.py Thu Feb 22 20:04:42 2018 -0500
@@ -538,9 +538,6 @@
coreconfigitem('experimental', 'httppostargs',
default=False,
)
-coreconfigitem('experimental', 'manifestv2',
- default=False,
-)
coreconfigitem('experimental', 'mergedriver',
default=None,
)
--- a/mercurial/help/internals/requirements.txt Wed Feb 21 16:47:39 2018 -0800
+++ b/mercurial/help/internals/requirements.txt Thu Feb 22 20:04:42 2018 -0500
@@ -1,4 +1,3 @@
-
Repositories contain a file (``.hg/requires``) containing a list of
features/capabilities that are *required* for clients to interface
with the repository. This file has been present in Mercurial since
@@ -105,8 +104,10 @@
Denotes that version 2 of manifests are being used.
Support for this requirement was added in Mercurial 3.4 (released
-May 2015). The requirement is currently experimental and is disabled
-by default.
+May 2015). The new format failed to meet expectations and support
+for the format and requirement were removed in Mercurial 4.6
+(released May 2018) since the feature never graduated frome experiment
+status.
treemanifest
============
--- a/mercurial/localrepo.py Wed Feb 21 16:47:39 2018 -0800
+++ b/mercurial/localrepo.py Thu Feb 22 20:04:42 2018 -0500
@@ -303,11 +303,15 @@
class localrepository(object):
+ # obsolete experimental requirements:
+ # - manifestv2: An experimental new manifest format that allowed
+ # for stem compression of long paths. Experiment ended up not
+ # being successful (repository sizes went up due to worse delta
+ # chains), and the code was deleted in 4.6.
supportedformats = {
'revlogv1',
'generaldelta',
'treemanifest',
- 'manifestv2',
REVLOGV2_REQUIREMENT,
}
_basesupported = supportedformats | {
@@ -322,7 +326,6 @@
'revlogv1',
'generaldelta',
'treemanifest',
- 'manifestv2',
}
# a list of (ui, featureset) functions.
@@ -2261,8 +2264,6 @@
requirements.add('generaldelta')
if ui.configbool('experimental', 'treemanifest'):
requirements.add('treemanifest')
- if ui.configbool('experimental', 'manifestv2'):
- requirements.add('manifestv2')
revlogv2 = ui.config('experimental', 'revlogv2')
if revlogv2 == 'enable-unstable-format-and-corrupt-my-data':
--- a/mercurial/manifest.py Wed Feb 21 16:47:39 2018 -0800
+++ b/mercurial/manifest.py Thu Feb 22 20:04:42 2018 -0500
@@ -9,7 +9,6 @@
import heapq
import itertools
-import os
import struct
from .i18n import _
@@ -28,7 +27,7 @@
parsers = policy.importmod(r'parsers')
propertycache = util.propertycache
-def _parsev1(data):
+def _parse(data):
# This method does a little bit of excessive-looking
# precondition checking. This is so that the behavior of this
# class exactly matches its C counterpart to try and help
@@ -47,43 +46,7 @@
else:
yield f, bin(n), ''
-def _parsev2(data):
- metadataend = data.find('\n')
- # Just ignore metadata for now
- pos = metadataend + 1
- prevf = ''
- while pos < len(data):
- end = data.find('\n', pos + 1) # +1 to skip stem length byte
- if end == -1:
- raise ValueError('Manifest ended with incomplete file entry.')
- stemlen = ord(data[pos:pos + 1])
- items = data[pos + 1:end].split('\0')
- f = prevf[:stemlen] + items[0]
- if prevf > f:
- raise ValueError('Manifest entries not in sorted order.')
- fl = items[1]
- # Just ignore metadata (items[2:] for now)
- n = data[end + 1:end + 21]
- yield f, n, fl
- pos = end + 22
- prevf = f
-
-def _parse(data):
- """Generates (path, node, flags) tuples from a manifest text"""
- if data.startswith('\0'):
- return iter(_parsev2(data))
- else:
- return iter(_parsev1(data))
-
-def _text(it, usemanifestv2):
- """Given an iterator over (path, node, flags) tuples, returns a manifest
- text"""
- if usemanifestv2:
- return _textv2(it)
- else:
- return _textv1(it)
-
-def _textv1(it):
+def _text(it):
files = []
lines = []
_hex = revlog.hex
@@ -96,19 +59,6 @@
_checkforbidden(files)
return ''.join(lines)
-def _textv2(it):
- files = []
- lines = ['\0\n']
- prevf = ''
- for f, n, fl in it:
- files.append(f)
- stem = os.path.commonprefix([prevf, f])
- stemlen = min(len(stem), 255)
- lines.append("%c%s\0%s\n%s\n" % (stemlen, f[stemlen:], fl, n))
- prevf = f
- _checkforbidden(files)
- return ''.join(lines)
-
class lazymanifestiter(object):
def __init__(self, lm):
self.pos = 0
@@ -414,13 +364,7 @@
class manifestdict(object):
def __init__(self, data=''):
- if data.startswith('\0'):
- #_lazymanifest can not parse v2
- self._lm = _lazymanifest('')
- for f, n, fl in _parsev2(data):
- self._lm[f] = n, fl
- else:
- self._lm = _lazymanifest(data)
+ self._lm = _lazymanifest(data)
def __getitem__(self, key):
return self._lm[key][0]
@@ -589,12 +533,9 @@
def iterentries(self):
return self._lm.iterentries()
- def text(self, usemanifestv2=False):
- if usemanifestv2:
- return _textv2(self._lm.iterentries())
- else:
- # use (probably) native version for v1
- return self._lm.text()
+ def text(self):
+ # most likely uses native version
+ return self._lm.text()
def fastdelta(self, base, changes):
"""Given a base manifest text as a bytearray and a list of changes
@@ -1138,12 +1079,12 @@
if fl:
self._flags[f] = fl
- def text(self, usemanifestv2=False):
+ def text(self):
"""Get the full data of this manifest as a bytestring."""
self._load()
- return _text(self.iterentries(), usemanifestv2)
+ return _text(self.iterentries())
- def dirtext(self, usemanifestv2=False):
+ def dirtext(self):
"""Get the full data of this directory as a bytestring. Make sure that
any submanifests have been written first, so their nodeids are correct.
"""
@@ -1151,7 +1092,7 @@
flags = self.flags
dirs = [(d[:-1], self._dirs[d]._node, 't') for d in self._dirs]
files = [(f, self._files[f], flags(f)) for f in self._files]
- return _text(sorted(dirs + files), usemanifestv2)
+ return _text(sorted(dirs + files))
def read(self, gettext, readsubtree):
def _load_for_read(s):
@@ -1208,15 +1149,12 @@
# stacks of commits, the number can go up, hence the config knob below.
cachesize = 4
optiontreemanifest = False
- usemanifestv2 = False
opts = getattr(opener, 'options', None)
if opts is not None:
cachesize = opts.get('manifestcachesize', cachesize)
optiontreemanifest = opts.get('treemanifest', False)
- usemanifestv2 = opts.get('manifestv2', usemanifestv2)
self._treeondisk = optiontreemanifest or treemanifest
- self._usemanifestv2 = usemanifestv2
self._fulltextcache = util.lrucachedict(cachesize)
@@ -1262,8 +1200,7 @@
return self._dirlogcache[d]
def add(self, m, transaction, link, p1, p2, added, removed, readtree=None):
- if (p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta')
- and not self._usemanifestv2):
+ if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'):
# If our first parent is in the manifest cache, we can
# compute a delta here using properties we know about the
# manifest up-front, which may save time later for the
@@ -1290,7 +1227,7 @@
n = self._addtree(m, transaction, link, m1, m2, readtree)
arraytext = None
else:
- text = m.text(self._usemanifestv2)
+ text = m.text()
n = self.addrevision(text, transaction, link, p1, p2)
arraytext = bytearray(text)
@@ -1309,13 +1246,13 @@
sublog.add(subm, transaction, link, subp1, subp2, None, None,
readtree=readtree)
m.writesubtrees(m1, m2, writesubtree)
- text = m.dirtext(self._usemanifestv2)
+ text = m.dirtext()
n = None
if self._dir != '':
# Double-check whether contents are unchanged to one parent
- if text == m1.dirtext(self._usemanifestv2):
+ if text == m1.dirtext():
n = m1.node()
- elif text == m2.dirtext(self._usemanifestv2):
+ elif text == m2.dirtext():
n = m2.node()
if not n:
@@ -1493,19 +1430,6 @@
Changing the value of `shallow` has no effect on flat manifests.
'''
revlog = self._revlog()
- if revlog._usemanifestv2:
- # Need to perform a slow delta
- r0 = revlog.deltaparent(revlog.rev(self._node))
- m0 = self._manifestlog[revlog.node(r0)].read()
- m1 = self.read()
- md = manifestdict()
- for f, ((n0, fl0), (n1, fl1)) in m0.diff(m1).iteritems():
- if n1:
- md[f] = n1
- if fl1:
- md.setflag(f, fl1)
- return md
-
r = revlog.rev(self._node)
d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
return manifestdict(d)
@@ -1608,7 +1532,7 @@
its 't' flag.
'''
revlog = self._revlog()
- if shallow and not revlog._usemanifestv2:
+ if shallow:
r = revlog.rev(self._node)
d = mdiff.patchtext(revlog.revdiff(revlog.deltaparent(r), r))
return manifestdict(d)
--- a/mercurial/upgrade.py Wed Feb 21 16:47:39 2018 -0800
+++ b/mercurial/upgrade.py Thu Feb 22 20:04:42 2018 -0500
@@ -46,7 +46,6 @@
return {
# The upgrade code does not yet support these experimental features.
# This is an artificial limitation.
- 'manifestv2',
'treemanifest',
# This was a precursor to generaldelta and was never enabled by default.
# It should (hopefully) not exist in the wild.
--- a/tests/test-manifest.py Wed Feb 21 16:47:39 2018 -0800
+++ b/tests/test-manifest.py Thu Feb 22 20:04:42 2018 -0500
@@ -11,7 +11,6 @@
)
EMTPY_MANIFEST = b''
-EMTPY_MANIFEST_V2 = b'\0\n'
HASH_1 = b'1' * 40
BIN_HASH_1 = binascii.unhexlify(HASH_1)
@@ -28,42 +27,6 @@
b'flag2': b'l',
}
-# Same data as A_SHORT_MANIFEST
-A_SHORT_MANIFEST_V2 = (
- b'\0\n'
- b'\x00bar/baz/qux.py\0%(flag2)s\n%(hash2)s\n'
- b'\x00foo\0%(flag1)s\n%(hash1)s\n'
- ) % {b'hash1': BIN_HASH_1,
- b'flag1': b'',
- b'hash2': BIN_HASH_2,
- b'flag2': b'l',
- }
-
-# Same data as A_SHORT_MANIFEST
-A_METADATA_MANIFEST = (
- b'\0foo\0bar\n'
- b'\x00bar/baz/qux.py\0%(flag2)s\0foo\0bar\n%(hash2)s\n' # flag and metadata
- b'\x00foo\0%(flag1)s\0foo\n%(hash1)s\n' # no flag, but metadata
- ) % {b'hash1': BIN_HASH_1,
- b'flag1': b'',
- b'hash2': BIN_HASH_2,
- b'flag2': b'l',
- }
-
-A_STEM_COMPRESSED_MANIFEST = (
- b'\0\n'
- b'\x00bar/baz/qux.py\0%(flag2)s\n%(hash2)s\n'
- b'\x04qux/foo.py\0%(flag1)s\n%(hash1)s\n' # simple case of 4 stem chars
- b'\x0az.py\0%(flag1)s\n%(hash1)s\n' # tricky newline = 10 stem characters
- b'\x00%(verylongdir)sx/x\0\n%(hash1)s\n'
- b'\xffx/y\0\n%(hash2)s\n' # more than 255 stem chars
- ) % {b'hash1': BIN_HASH_1,
- b'flag1': b'',
- b'hash2': BIN_HASH_2,
- b'flag2': b'l',
- b'verylongdir': 255 * b'x',
- }
-
A_DEEPER_MANIFEST = (
b'a/b/c/bar.py\0%(hash3)s%(flag1)s\n'
b'a/b/c/bar.txt\0%(hash1)s%(flag1)s\n'
@@ -111,11 +74,6 @@
self.assertEqual(0, len(m))
self.assertEqual([], list(m))
- def testEmptyManifestv2(self):
- m = self.parsemanifest(EMTPY_MANIFEST_V2)
- self.assertEqual(0, len(m))
- self.assertEqual([], list(m))
-
def testManifest(self):
m = self.parsemanifest(A_SHORT_MANIFEST)
self.assertEqual([b'bar/baz/qux.py', b'foo'], list(m))
@@ -126,31 +84,6 @@
with self.assertRaises(KeyError):
m[b'wat']
- def testParseManifestV2(self):
- m1 = self.parsemanifest(A_SHORT_MANIFEST)
- m2 = self.parsemanifest(A_SHORT_MANIFEST_V2)
- # Should have same content as A_SHORT_MANIFEST
- self.assertEqual(m1.text(), m2.text())
-
- def testParseManifestMetadata(self):
- # Metadata is for future-proofing and should be accepted but ignored
- m = self.parsemanifest(A_METADATA_MANIFEST)
- self.assertEqual(A_SHORT_MANIFEST, m.text())
-
- def testParseManifestStemCompression(self):
- m = self.parsemanifest(A_STEM_COMPRESSED_MANIFEST)
- self.assertIn(b'bar/baz/qux.py', m)
- self.assertIn(b'bar/qux/foo.py', m)
- self.assertIn(b'bar/qux/foz.py', m)
- self.assertIn(256 * b'x' + b'/x', m)
- self.assertIn(256 * b'x' + b'/y', m)
- self.assertEqual(A_STEM_COMPRESSED_MANIFEST, m.text(usemanifestv2=True))
-
- def testTextV2(self):
- m1 = self.parsemanifest(A_SHORT_MANIFEST)
- v2text = m1.text(usemanifestv2=True)
- self.assertEqual(A_SHORT_MANIFEST_V2, v2text)
-
def testSetItem(self):
want = BIN_HASH_1
--- a/tests/test-manifestv2.t Wed Feb 21 16:47:39 2018 -0800
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,102 +0,0 @@
-Create repo with old manifest
-
- $ cat << EOF >> $HGRCPATH
- > [format]
- > usegeneraldelta=yes
- > EOF
-
- $ hg init existing
- $ cd existing
- $ echo footext > foo
- $ hg add foo
- $ hg commit -m initial
-
-We're using v1, so no manifestv2 entry is in requires yet.
-
- $ grep manifestv2 .hg/requires
- [1]
-
-Let's clone this with manifestv2 enabled to switch to the new format for
-future commits.
-
- $ cd ..
- $ hg clone --pull existing new --config experimental.manifestv2=1
- requesting all changes
- adding changesets
- adding manifests
- adding file changes
- added 1 changesets with 1 changes to 1 files
- new changesets 0fc9a4fafa44
- updating to branch default
- 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
- $ cd new
-
-Check that entry was added to .hg/requires.
-
- $ grep manifestv2 .hg/requires
- manifestv2
-
-Make a new commit.
-
- $ echo newfootext > foo
- $ hg commit -m new
-
-Check that the manifest actually switched to v2.
-
- $ hg debugdata -m 0
- foo\x0021e958b1dca695a60ee2e9cf151753204ee0f9e9 (esc)
-
- $ hg debugdata -m 1
- \x00 (esc)
- \x00foo\x00 (esc)
- I\xab\x7f\xb8(\x83\xcas\x15\x9d\xc2\xd3\xd3:5\x08\xbad5_ (esc)
-
-Check that manifestv2 is used if the requirement is present, even if it's
-disabled in the config.
-
- $ echo newerfootext > foo
- $ hg --config experimental.manifestv2=False commit -m newer
-
- $ hg debugdata -m 2
- \x00 (esc)
- \x00foo\x00 (esc)
- \xa6\xb1\xfb\xef]\x91\xa1\x19`\xf3.#\x90S\xf8\x06 \xe2\x19\x00 (esc)
-
-Check that we can still read v1 manifests.
-
- $ hg files -r 0
- foo
-
- $ cd ..
-
-Check that entry is added to .hg/requires on repo creation
-
- $ hg --config experimental.manifestv2=True init repo
- $ cd repo
- $ grep manifestv2 .hg/requires
- manifestv2
-
-Set up simple repo
-
- $ echo a > file1
- $ echo b > file2
- $ echo c > file3
- $ hg ci -Aqm 'initial'
- $ echo d > file2
- $ hg ci -m 'modify file2'
-
-Check that 'hg verify', which uses manifest.readdelta(), works
-
- $ hg verify
- checking changesets
- checking manifests
- crosschecking files in changesets and manifests
- checking files
- 3 files, 2 changesets, 4 total revisions
-
-Check that manifest revlog is smaller than for v1
-
- $ hg debugindex -m
- rev offset length delta linkrev nodeid p1 p2
- 0 0 81 -1 0 57361477c778 000000000000 000000000000
- 1 81 33 0 1 aeaab5a2ef74 57361477c778 000000000000
--- a/tests/test-upgrade-repo.t Wed Feb 21 16:47:39 2018 -0800
+++ b/tests/test-upgrade-repo.t Thu Feb 22 20:04:42 2018 -0500
@@ -31,23 +31,18 @@
abort: cannot upgrade repository; unsupported source requirement: shared
[255]
-Do not yet support upgrading manifestv2 and treemanifest repos
-
- $ hg --config experimental.manifestv2=true init manifestv2
- $ hg -R manifestv2 debugupgraderepo
- abort: cannot upgrade repository; unsupported source requirement: manifestv2
- [255]
+Do not yet support upgrading treemanifest repos
$ hg --config experimental.treemanifest=true init treemanifest
$ hg -R treemanifest debugupgraderepo
abort: cannot upgrade repository; unsupported source requirement: treemanifest
[255]
-Cannot add manifestv2 or treemanifest requirement during upgrade
+Cannot add treemanifest requirement during upgrade
$ hg init disallowaddedreq
- $ hg -R disallowaddedreq --config experimental.manifestv2=true --config experimental.treemanifest=true debugupgraderepo
- abort: cannot upgrade repository; do not support adding requirement: manifestv2, treemanifest
+ $ hg -R disallowaddedreq --config experimental.treemanifest=true debugupgraderepo
+ abort: cannot upgrade repository; do not support adding requirement: treemanifest
[255]
An upgrade of a repository created with recommended settings only suggests optimizations