# HG changeset patch # User Augie Fackler # Date 1559051873 14400 # Node ID 12bd4e2d4d067939a1467a2f492b8ecad1748ef6 # Parent e2e507573c7c9b5ca8982a81fa1bbd1ad6964520# Parent c3484ddbdb9621256d597ed86b90d229c59c2af9 merge with stable diff -r e2e507573c7c -r 12bd4e2d4d06 mercurial/manifest.py --- a/mercurial/manifest.py Sat May 25 19:49:44 2019 +0300 +++ b/mercurial/manifest.py Tue May 28 09:57:53 2019 -0400 @@ -35,6 +35,9 @@ parsers = policy.importmod(r'parsers') propertycache = util.propertycache +# Allow tests to more easily test the alternate path in manifestdict.fastdelta() +FASTDELTA_TEXTDIFF_THRESHOLD = 1000 + def _parse(data): # This method does a little bit of excessive-looking # precondition checking. This is so that the behavior of this @@ -123,17 +126,36 @@ return (a > b) - (a < b) class _lazymanifest(object): - def __init__(self, data, positions=None, extrainfo=None, extradata=None): + """A pure python manifest backed by a byte string. It is supplimented with + internal lists as it is modified, until it is compacted back to a pure byte + string. + + ``data`` is the initial manifest data. + + ``positions`` is a list of offsets, one per manifest entry. Positive + values are offsets into ``data``, negative values are offsets into the + ``extradata`` list. When an entry is removed, its entry is dropped from + ``positions``. The values are encoded such that when walking the list and + indexing into ``data`` or ``extradata`` as appropriate, the entries are + sorted by filename. + + ``extradata`` is a list of (key, hash, flags) for entries that were added or + modified since the manifest was created or compacted. + """ + def __init__(self, data, positions=None, extrainfo=None, extradata=None, + hasremovals=False): if positions is None: self.positions = self.findlines(data) self.extrainfo = [0] * len(self.positions) self.data = data self.extradata = [] + self.hasremovals = False else: self.positions = positions[:] self.extrainfo = extrainfo[:] self.extradata = extradata[:] self.data = data + self.hasremovals = hasremovals def findlines(self, data): if not data: @@ -240,7 +262,10 @@ self.positions = self.positions[:needle] + self.positions[needle + 1:] self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] if cur >= 0: + # This does NOT unsort the list as far as the search functions are + # concerned, as they only examine lines mapped by self.positions. self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] + self.hasremovals = True def __setitem__(self, key, value): if not isinstance(key, bytes): @@ -276,11 +301,11 @@ def copy(self): # XXX call _compact like in C? return _lazymanifest(self.data, self.positions, self.extrainfo, - self.extradata) + self.extradata, self.hasremovals) def _compact(self): # hopefully not called TOO often - if len(self.extradata) == 0: + if len(self.extradata) == 0 and not self.hasremovals: return l = [] i = 0 @@ -290,11 +315,25 @@ if self.positions[i] >= 0: cur = self.positions[i] last_cut = cur + + # Collect all contiguous entries in the buffer at the current + # offset, breaking out only for added/modified items held in + # extradata, or a deleted line prior to the next position. while True: self.positions[i] = offset i += 1 if i == len(self.positions) or self.positions[i] < 0: break + + # A removed file has no positions[] entry, but does have an + # overwritten first byte. Break out and find the end of the + # current good entry/entries if there is a removed file + # before the next position. + if (self.hasremovals + and self.data.find('\n\x00', cur, + self.positions[i]) != -1): + break + offset += self.positions[i] - cur cur = self.positions[i] end_cut = self.data.find('\n', cur) @@ -313,6 +352,7 @@ offset += len(l[-1]) i += 1 self.data = ''.join(l) + self.hasremovals = False self.extradata = [] def _pack(self, d): @@ -558,7 +598,7 @@ addbuf = util.buffer(base) changes = list(changes) - if len(changes) < 1000: + if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD: # start with a readonly loop that finds the offset of # each line and creates the deltas for f, todelete in changes: diff -r e2e507573c7c -r 12bd4e2d4d06 tests/test-manifest.t --- a/tests/test-manifest.t Sat May 25 19:49:44 2019 +0300 +++ b/tests/test-manifest.t Tue May 28 09:57:53 2019 -0400 @@ -201,3 +201,78 @@ total cache data size 425 bytes, on-disk 425 bytes $ hg log -r '0' --debug | grep 'manifest:' manifest: 0:fce2a30dedad1eef4da95ca1dc0004157aa527cf + +Test file removal (especially with pure). The tests are crafted such that there +will be contiguous spans of existing entries to ensure that is handled properly. +(In this case, a.txt, aa.txt and c.txt, cc.txt, and ccc.txt) + + $ cat > $TESTTMP/manifest.py < from mercurial import ( + > extensions, + > manifest, + > ) + > def extsetup(ui): + > manifest.FASTDELTA_TEXTDIFF_THRESHOLD = 0 + > EOF + $ cat >> $HGRCPATH < [extensions] + > manifest = $TESTTMP/manifest.py + > EOF + +Pure removes should actually remove all dropped entries + + $ hg init repo + $ cd repo + $ echo a > a.txt + $ echo aa > aa.txt + $ echo b > b.txt + $ echo c > c.txt + $ echo c > cc.txt + $ echo c > ccc.txt + $ echo b > d.txt + $ echo c > e.txt + $ hg ci -Aqm 'a-e' + + $ hg rm b.txt d.txt + $ hg ci -m 'remove b and d' + + $ hg debugdata -m 1 + a.txt\x00b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3 (esc) + aa.txt\x00a4bdc161c8fbb523c9a60409603f8710ff49a571 (esc) + c.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) + cc.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) + ccc.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) + e.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) + + $ hg up -qC . + + $ hg verify + checking changesets + checking manifests + crosschecking files in changesets and manifests + checking files + checked 2 changesets with 8 changes to 8 files + + $ hg rollback -q --config ui.rollback=True + $ hg rm b.txt d.txt + $ echo bb > bb.txt + +A mix of adds and removes should remove all dropped entries. + + $ hg ci -Aqm 'remove b and d; add bb' + + $ hg debugdata -m 1 + a.txt\x00b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3 (esc) + aa.txt\x00a4bdc161c8fbb523c9a60409603f8710ff49a571 (esc) + bb.txt\x0004c6faf8a9fdd848a5304dfc1704749a374dff44 (esc) + c.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) + cc.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) + ccc.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) + e.txt\x00149da44f2a4e14f488b7bd4157945a9837408c00 (esc) + + $ hg verify + checking changesets + checking manifests + crosschecking files in changesets and manifests + checking files + checked 2 changesets with 9 changes to 9 files