mercurial/manifest.py
changeset 42399 12bd4e2d4d06
parent 42369 6310180662f5
parent 42172 c3484ddbdb96
child 42478 bc4373babd04
--- a/mercurial/manifest.py	Sat May 25 19:49:44 2019 +0300
+++ b/mercurial/manifest.py	Tue May 28 09:57:53 2019 -0400
@@ -35,6 +35,9 @@
 parsers = policy.importmod(r'parsers')
 propertycache = util.propertycache
 
+# Allow tests to more easily test the alternate path in manifestdict.fastdelta()
+FASTDELTA_TEXTDIFF_THRESHOLD = 1000
+
 def _parse(data):
     # This method does a little bit of excessive-looking
     # precondition checking. This is so that the behavior of this
@@ -123,17 +126,36 @@
     return (a > b) - (a < b)
 
 class _lazymanifest(object):
-    def __init__(self, data, positions=None, extrainfo=None, extradata=None):
+    """A pure python manifest backed by a byte string.  It is supplimented with
+    internal lists as it is modified, until it is compacted back to a pure byte
+    string.
+
+    ``data`` is the initial manifest data.
+
+    ``positions`` is a list of offsets, one per manifest entry.  Positive
+    values are offsets into ``data``, negative values are offsets into the
+    ``extradata`` list.  When an entry is removed, its entry is dropped from
+    ``positions``.  The values are encoded such that when walking the list and
+    indexing into ``data`` or ``extradata`` as appropriate, the entries are
+    sorted by filename.
+
+    ``extradata`` is a list of (key, hash, flags) for entries that were added or
+    modified since the manifest was created or compacted.
+    """
+    def __init__(self, data, positions=None, extrainfo=None, extradata=None,
+                 hasremovals=False):
         if positions is None:
             self.positions = self.findlines(data)
             self.extrainfo = [0] * len(self.positions)
             self.data = data
             self.extradata = []
+            self.hasremovals = False
         else:
             self.positions = positions[:]
             self.extrainfo = extrainfo[:]
             self.extradata = extradata[:]
             self.data = data
+            self.hasremovals = hasremovals
 
     def findlines(self, data):
         if not data:
@@ -240,7 +262,10 @@
         self.positions = self.positions[:needle] + self.positions[needle + 1:]
         self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
         if cur >= 0:
+            # This does NOT unsort the list as far as the search functions are
+            # concerned, as they only examine lines mapped by self.positions.
             self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
+            self.hasremovals = True
 
     def __setitem__(self, key, value):
         if not isinstance(key, bytes):
@@ -276,11 +301,11 @@
     def copy(self):
         # XXX call _compact like in C?
         return _lazymanifest(self.data, self.positions, self.extrainfo,
-            self.extradata)
+            self.extradata, self.hasremovals)
 
     def _compact(self):
         # hopefully not called TOO often
-        if len(self.extradata) == 0:
+        if len(self.extradata) == 0 and not self.hasremovals:
             return
         l = []
         i = 0
@@ -290,11 +315,25 @@
             if self.positions[i] >= 0:
                 cur = self.positions[i]
                 last_cut = cur
+
+                # Collect all contiguous entries in the buffer at the current
+                # offset, breaking out only for added/modified items held in
+                # extradata, or a deleted line prior to the next position.
                 while True:
                     self.positions[i] = offset
                     i += 1
                     if i == len(self.positions) or self.positions[i] < 0:
                         break
+
+                    # A removed file has no positions[] entry, but does have an
+                    # overwritten first byte.  Break out and find the end of the
+                    # current good entry/entries if there is a removed file
+                    # before the next position.
+                    if (self.hasremovals
+                        and self.data.find('\n\x00', cur,
+                                           self.positions[i]) != -1):
+                        break
+
                     offset += self.positions[i] - cur
                     cur = self.positions[i]
                 end_cut = self.data.find('\n', cur)
@@ -313,6 +352,7 @@
                     offset += len(l[-1])
                     i += 1
         self.data = ''.join(l)
+        self.hasremovals = False
         self.extradata = []
 
     def _pack(self, d):
@@ -558,7 +598,7 @@
         addbuf = util.buffer(base)
 
         changes = list(changes)
-        if len(changes) < 1000:
+        if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
             # start with a readonly loop that finds the offset of
             # each line and creates the deltas
             for f, todelete in changes: