comparison mercurial/revlog.py @ 38642:e59e27e52297

revlog: add function to slice chunk down to a given size It is possible to encounter situations where the slicing based on density did not achieve chunk smaller than the 4*textlength limit. To avoid extra memory consumption in those cases, we need to be able to break down chunk to a given size. Actual caller comes in the next changesets.
author Boris Feld <boris.feld@octobus.net>
date Wed, 11 Jul 2018 00:35:01 -0700
parents feba6be0941b
children 967fee55e8d9
comparison
equal deleted inserted replaced
38641:feba6be0941b 38642:e59e27e52297
335 """ 335 """
336 for chunk in _slicechunktodensity(revlog, revs, 336 for chunk in _slicechunktodensity(revlog, revs,
337 revlog._srdensitythreshold, 337 revlog._srdensitythreshold,
338 revlog._srmingapsize): 338 revlog._srmingapsize):
339 yield chunk 339 yield chunk
340
341 def _slicechunktosize(revlog, revs, targetsize):
342 """slice revs to match the target size
343
344 This is intended to be used on chunk that density slicing selected by that
345 are still too large compared to the read garantee of revlog. This might
346 happens when "minimal gap size" interrupted the slicing or when chain are
347 built in a way that create large blocks next to each other.
348
349 >>> revlog = _testrevlog([
350 ... 3, #0 (3)
351 ... 5, #1 (2)
352 ... 6, #2 (1)
353 ... 8, #3 (2)
354 ... 8, #4 (empty)
355 ... 11, #5 (3)
356 ... 12, #6 (1)
357 ... 13, #7 (1)
358 ... 14, #8 (1)
359 ... ])
360
361 Cases where chunk is already small enough
362 >>> list(_slicechunktosize(revlog, [0], 3))
363 [[0]]
364 >>> list(_slicechunktosize(revlog, [6, 7], 3))
365 [[6, 7]]
366 >>> list(_slicechunktosize(revlog, [0], None))
367 [[0]]
368 >>> list(_slicechunktosize(revlog, [6, 7], None))
369 [[6, 7]]
370
371 cases where we need actual slicing
372 >>> list(_slicechunktosize(revlog, [0, 1], 3))
373 [[0], [1]]
374 >>> list(_slicechunktosize(revlog, [1, 3], 3))
375 [[1], [3]]
376 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
377 [[1, 2], [3]]
378 >>> list(_slicechunktosize(revlog, [3, 5], 3))
379 [[3], [5]]
380 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
381 [[3], [5]]
382 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
383 [[5], [6, 7, 8]]
384 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
385 [[0], [1, 2], [3], [5], [6, 7, 8]]
386
387 Case with too large individual chunk (must return valid chunk)
388 >>> list(_slicechunktosize(revlog, [0, 1], 2))
389 [[0], [1]]
390 >>> list(_slicechunktosize(revlog, [1, 3], 1))
391 [[1], [3]]
392 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
393 [[3], [5]]
394 """
395 assert targetsize is None or 0 <= targetsize
396 if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
397 yield revs
398 return
399
400 startrevidx = 0
401 startdata = revlog.start(revs[0])
402 endrevidx = 0
403 iterrevs = enumerate(revs)
404 next(iterrevs) # skip first rev.
405 for idx, r in iterrevs:
406 span = revlog.end(r) - startdata
407 if span <= targetsize:
408 endrevidx = idx
409 else:
410 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
411 if chunk:
412 yield chunk
413 startrevidx = idx
414 startdata = revlog.start(r)
415 endrevidx = idx
416 yield _trimchunk(revlog, revs, startrevidx)
340 417
341 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0): 418 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
342 """slice revs to reduce the amount of unrelated data to be read from disk. 419 """slice revs to reduce the amount of unrelated data to be read from disk.
343 420
344 ``revs`` is sliced into groups that should be read in one time. 421 ``revs`` is sliced into groups that should be read in one time.