comparison mercurial/manifest.py @ 42380:12bd4e2d4d06

merge with stable
author Augie Fackler <augie@google.com>
date Tue, 28 May 2019 09:57:53 -0400
parents 6310180662f5 c3484ddbdb96
children bc4373babd04
comparison
equal deleted inserted replaced
42379:e2e507573c7c 42380:12bd4e2d4d06
33 ) 33 )
34 34
35 parsers = policy.importmod(r'parsers') 35 parsers = policy.importmod(r'parsers')
36 propertycache = util.propertycache 36 propertycache = util.propertycache
37 37
38 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
39 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
40
38 def _parse(data): 41 def _parse(data):
39 # This method does a little bit of excessive-looking 42 # This method does a little bit of excessive-looking
40 # precondition checking. This is so that the behavior of this 43 # precondition checking. This is so that the behavior of this
41 # class exactly matches its C counterpart to try and help 44 # class exactly matches its C counterpart to try and help
42 # prevent surprise breakage for anyone that develops against 45 # prevent surprise breakage for anyone that develops against
121 124
122 def _cmp(a, b): 125 def _cmp(a, b):
123 return (a > b) - (a < b) 126 return (a > b) - (a < b)
124 127
125 class _lazymanifest(object): 128 class _lazymanifest(object):
126 def __init__(self, data, positions=None, extrainfo=None, extradata=None): 129 """A pure python manifest backed by a byte string. It is supplimented with
130 internal lists as it is modified, until it is compacted back to a pure byte
131 string.
132
133 ``data`` is the initial manifest data.
134
135 ``positions`` is a list of offsets, one per manifest entry. Positive
136 values are offsets into ``data``, negative values are offsets into the
137 ``extradata`` list. When an entry is removed, its entry is dropped from
138 ``positions``. The values are encoded such that when walking the list and
139 indexing into ``data`` or ``extradata`` as appropriate, the entries are
140 sorted by filename.
141
142 ``extradata`` is a list of (key, hash, flags) for entries that were added or
143 modified since the manifest was created or compacted.
144 """
145 def __init__(self, data, positions=None, extrainfo=None, extradata=None,
146 hasremovals=False):
127 if positions is None: 147 if positions is None:
128 self.positions = self.findlines(data) 148 self.positions = self.findlines(data)
129 self.extrainfo = [0] * len(self.positions) 149 self.extrainfo = [0] * len(self.positions)
130 self.data = data 150 self.data = data
131 self.extradata = [] 151 self.extradata = []
152 self.hasremovals = False
132 else: 153 else:
133 self.positions = positions[:] 154 self.positions = positions[:]
134 self.extrainfo = extrainfo[:] 155 self.extrainfo = extrainfo[:]
135 self.extradata = extradata[:] 156 self.extradata = extradata[:]
136 self.data = data 157 self.data = data
158 self.hasremovals = hasremovals
137 159
138 def findlines(self, data): 160 def findlines(self, data):
139 if not data: 161 if not data:
140 return [] 162 return []
141 pos = data.find("\n") 163 pos = data.find("\n")
238 raise KeyError 260 raise KeyError
239 cur = self.positions[needle] 261 cur = self.positions[needle]
240 self.positions = self.positions[:needle] + self.positions[needle + 1:] 262 self.positions = self.positions[:needle] + self.positions[needle + 1:]
241 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:] 263 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1:]
242 if cur >= 0: 264 if cur >= 0:
265 # This does NOT unsort the list as far as the search functions are
266 # concerned, as they only examine lines mapped by self.positions.
243 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:] 267 self.data = self.data[:cur] + '\x00' + self.data[cur + 1:]
268 self.hasremovals = True
244 269
245 def __setitem__(self, key, value): 270 def __setitem__(self, key, value):
246 if not isinstance(key, bytes): 271 if not isinstance(key, bytes):
247 raise TypeError("setitem: manifest keys must be a byte string.") 272 raise TypeError("setitem: manifest keys must be a byte string.")
248 if not isinstance(value, tuple) or len(value) != 2: 273 if not isinstance(value, tuple) or len(value) != 2:
274 self.extrainfo[needle:]) 299 self.extrainfo[needle:])
275 300
276 def copy(self): 301 def copy(self):
277 # XXX call _compact like in C? 302 # XXX call _compact like in C?
278 return _lazymanifest(self.data, self.positions, self.extrainfo, 303 return _lazymanifest(self.data, self.positions, self.extrainfo,
279 self.extradata) 304 self.extradata, self.hasremovals)
280 305
281 def _compact(self): 306 def _compact(self):
282 # hopefully not called TOO often 307 # hopefully not called TOO often
283 if len(self.extradata) == 0: 308 if len(self.extradata) == 0 and not self.hasremovals:
284 return 309 return
285 l = [] 310 l = []
286 i = 0 311 i = 0
287 offset = 0 312 offset = 0
288 self.extrainfo = [0] * len(self.positions) 313 self.extrainfo = [0] * len(self.positions)
289 while i < len(self.positions): 314 while i < len(self.positions):
290 if self.positions[i] >= 0: 315 if self.positions[i] >= 0:
291 cur = self.positions[i] 316 cur = self.positions[i]
292 last_cut = cur 317 last_cut = cur
318
319 # Collect all contiguous entries in the buffer at the current
320 # offset, breaking out only for added/modified items held in
321 # extradata, or a deleted line prior to the next position.
293 while True: 322 while True:
294 self.positions[i] = offset 323 self.positions[i] = offset
295 i += 1 324 i += 1
296 if i == len(self.positions) or self.positions[i] < 0: 325 if i == len(self.positions) or self.positions[i] < 0:
297 break 326 break
327
328 # A removed file has no positions[] entry, but does have an
329 # overwritten first byte. Break out and find the end of the
330 # current good entry/entries if there is a removed file
331 # before the next position.
332 if (self.hasremovals
333 and self.data.find('\n\x00', cur,
334 self.positions[i]) != -1):
335 break
336
298 offset += self.positions[i] - cur 337 offset += self.positions[i] - cur
299 cur = self.positions[i] 338 cur = self.positions[i]
300 end_cut = self.data.find('\n', cur) 339 end_cut = self.data.find('\n', cur)
301 if end_cut != -1: 340 if end_cut != -1:
302 end_cut += 1 341 end_cut += 1
311 if len(t[1]) > 20: 350 if len(t[1]) > 20:
312 self.extrainfo[i] = ord(t[1][21]) 351 self.extrainfo[i] = ord(t[1][21])
313 offset += len(l[-1]) 352 offset += len(l[-1])
314 i += 1 353 i += 1
315 self.data = ''.join(l) 354 self.data = ''.join(l)
355 self.hasremovals = False
316 self.extradata = [] 356 self.extradata = []
317 357
318 def _pack(self, d): 358 def _pack(self, d):
319 return d[0] + '\x00' + hex(d[1][:20]) + d[2] + '\n' 359 return d[0] + '\x00' + hex(d[1][:20]) + d[2] + '\n'
320 360
556 start = 0 596 start = 0
557 # zero copy representation of base as a buffer 597 # zero copy representation of base as a buffer
558 addbuf = util.buffer(base) 598 addbuf = util.buffer(base)
559 599
560 changes = list(changes) 600 changes = list(changes)
561 if len(changes) < 1000: 601 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
562 # start with a readonly loop that finds the offset of 602 # start with a readonly loop that finds the offset of
563 # each line and creates the deltas 603 # each line and creates the deltas
564 for f, todelete in changes: 604 for f, todelete in changes:
565 # bs will either be the index of the item or the insert point 605 # bs will either be the index of the item or the insert point
566 start, end = _msearch(addbuf, f, start) 606 start, end = _msearch(addbuf, f, start)