214 return self._data[rev] |
214 return self._data[rev] |
215 |
215 |
216 def length(self, rev): |
216 def length(self, rev): |
217 return self.end(rev) - self.start(rev) |
217 return self.end(rev) - self.start(rev) |
218 |
218 |
|
219 def __len__(self): |
|
220 return len(self._data) |
|
221 |
219 def _trimchunk(revlog, revs, startidx, endidx=None): |
222 def _trimchunk(revlog, revs, startidx, endidx=None): |
220 """returns revs[startidx:endidx] without empty trailing revs |
223 """returns revs[startidx:endidx] without empty trailing revs |
221 |
224 |
222 Doctest Setup |
225 Doctest Setup |
223 >>> revlog = _testrevlog([ |
226 >>> revlog = _testrevlog([ |
339 [[0], [11, 13, 15]] |
342 [[0], [11, 13, 15]] |
340 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14])) |
343 >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14])) |
341 [[1, 2], [5, 8, 10, 11], [14]] |
344 [[1, 2], [5, 8, 10, 11], [14]] |
342 |
345 |
343 Slicing with a maximum chunk size |
346 Slicing with a maximum chunk size |
344 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 15)) |
347 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15)) |
345 [[0], [11], [13], [15]] |
348 [[0], [11], [13], [15]] |
346 >>> list(_slicechunk(revlog, [0, 11, 13, 15], 20)) |
349 >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20)) |
347 [[0], [11], [13, 15]] |
350 [[0], [11], [13, 15]] |
348 """ |
351 """ |
349 if targetsize is not None: |
352 if targetsize is not None: |
350 targetsize = max(targetsize, revlog._srmingapsize) |
353 targetsize = max(targetsize, revlog._srmingapsize) |
|
354 # targetsize should not be specified when evaluating delta candidates: |
|
355 # * targetsize is used to ensure we stay within specification when reading, |
|
356 # * deltainfo is used to pick are good delta chain when writing. |
|
357 if not (deltainfo is None or targetsize is None): |
|
358 msg = 'cannot use `targetsize` with a `deltainfo`' |
|
359 raise error.ProgrammingError(msg) |
351 for chunk in _slicechunktodensity(revlog, revs, |
360 for chunk in _slicechunktodensity(revlog, revs, |
|
361 deltainfo, |
352 revlog._srdensitythreshold, |
362 revlog._srdensitythreshold, |
353 revlog._srmingapsize): |
363 revlog._srmingapsize): |
354 for subchunk in _slicechunktosize(revlog, chunk, targetsize): |
364 for subchunk in _slicechunktosize(revlog, chunk, targetsize): |
355 yield subchunk |
365 yield subchunk |
356 |
366 |
357 def _slicechunktosize(revlog, revs, targetsize): |
367 def _slicechunktosize(revlog, revs, targetsize=None): |
358 """slice revs to match the target size |
368 """slice revs to match the target size |
359 |
369 |
360 This is intended to be used on chunk that density slicing selected by that |
370 This is intended to be used on chunk that density slicing selected by that |
361 are still too large compared to the read garantee of revlog. This might |
371 are still too large compared to the read garantee of revlog. This might |
362 happens when "minimal gap size" interrupted the slicing or when chain are |
372 happens when "minimal gap size" interrupted the slicing or when chain are |
429 startrevidx = idx |
439 startrevidx = idx |
430 startdata = revlog.start(r) |
440 startdata = revlog.start(r) |
431 endrevidx = idx |
441 endrevidx = idx |
432 yield _trimchunk(revlog, revs, startrevidx) |
442 yield _trimchunk(revlog, revs, startrevidx) |
433 |
443 |
434 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0): |
444 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5, |
|
445 mingapsize=0): |
435 """slice revs to reduce the amount of unrelated data to be read from disk. |
446 """slice revs to reduce the amount of unrelated data to be read from disk. |
436 |
447 |
437 ``revs`` is sliced into groups that should be read in one time. |
448 ``revs`` is sliced into groups that should be read in one time. |
438 Assume that revs are sorted. |
449 Assume that revs are sorted. |
|
450 |
|
451 ``deltainfo`` is a _deltainfo instance of a revision that we would append |
|
452 to the top of the revlog. |
439 |
453 |
440 The initial chunk is sliced until the overall density (payload/chunks-span |
454 The initial chunk is sliced until the overall density (payload/chunks-span |
441 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is |
455 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is |
442 skipped. |
456 skipped. |
443 |
457 |
485 |
499 |
486 if len(revs) <= 1: |
500 if len(revs) <= 1: |
487 yield revs |
501 yield revs |
488 return |
502 return |
489 |
503 |
490 readdata = deltachainspan = _segmentspan(revlog, revs) |
504 nextrev = len(revlog) |
|
505 nextoffset = revlog.end(nextrev - 1) |
|
506 |
|
507 if deltainfo is None: |
|
508 deltachainspan = _segmentspan(revlog, revs) |
|
509 chainpayload = sum(length(r) for r in revs) |
|
510 else: |
|
511 deltachainspan = deltainfo.distance |
|
512 chainpayload = deltainfo.compresseddeltalen |
491 |
513 |
492 if deltachainspan < mingapsize: |
514 if deltachainspan < mingapsize: |
493 yield revs |
515 yield revs |
494 return |
516 return |
495 |
517 |
496 chainpayload = sum(length(r) for r in revs) |
518 readdata = deltachainspan |
497 |
519 |
498 if deltachainspan: |
520 if deltachainspan: |
499 density = chainpayload / float(deltachainspan) |
521 density = chainpayload / float(deltachainspan) |
500 else: |
522 else: |
501 density = 1.0 |
523 density = 1.0 |
502 |
524 |
503 if density >= targetdensity: |
525 if density >= targetdensity: |
504 yield revs |
526 yield revs |
505 return |
527 return |
|
528 |
|
529 if deltainfo is not None: |
|
530 revs = list(revs) |
|
531 revs.append(nextrev) |
506 |
532 |
507 # Store the gaps in a heap to have them sorted by decreasing size |
533 # Store the gaps in a heap to have them sorted by decreasing size |
508 gapsheap = [] |
534 gapsheap = [] |
509 heapq.heapify(gapsheap) |
535 heapq.heapify(gapsheap) |
510 prevend = None |
536 prevend = None |
511 for i, rev in enumerate(revs): |
537 for i, rev in enumerate(revs): |
512 revstart = start(rev) |
538 if rev < nextrev: |
513 revlen = length(rev) |
539 revstart = start(rev) |
|
540 revlen = length(rev) |
|
541 else: |
|
542 revstart = nextoffset |
|
543 revlen = deltainfo.deltalen |
514 |
544 |
515 # Skip empty revisions to form larger holes |
545 # Skip empty revisions to form larger holes |
516 if revlen == 0: |
546 if revlen == 0: |
517 continue |
547 continue |
518 |
548 |
2400 # bounding it limits the amount of I/O we need to do. |
2430 # bounding it limits the amount of I/O we need to do. |
2401 # - 'deltainfo.compresseddeltalen' is the sum of the total size of |
2431 # - 'deltainfo.compresseddeltalen' is the sum of the total size of |
2402 # deltas we need to apply -- bounding it limits the amount of CPU |
2432 # deltas we need to apply -- bounding it limits the amount of CPU |
2403 # we consume. |
2433 # we consume. |
2404 |
2434 |
2405 distance = deltainfo.distance |
2435 if self._sparserevlog: |
|
2436 # As sparse-read will be used, we can consider that the distance, |
|
2437 # instead of being the span of the whole chunk, |
|
2438 # is the span of the largest read chunk |
|
2439 base = deltainfo.base |
|
2440 |
|
2441 if base != nullrev: |
|
2442 deltachain = self._deltachain(base)[0] |
|
2443 else: |
|
2444 deltachain = [] |
|
2445 |
|
2446 chunks = _slicechunk(self, deltachain, deltainfo) |
|
2447 distance = max(map(lambda revs:_segmentspan(self, revs), chunks)) |
|
2448 else: |
|
2449 distance = deltainfo.distance |
|
2450 |
2406 textlen = revinfo.textlen |
2451 textlen = revinfo.textlen |
2407 defaultmax = textlen * 4 |
2452 defaultmax = textlen * 4 |
2408 maxdist = self._maxdeltachainspan |
2453 maxdist = self._maxdeltachainspan |
2409 if not maxdist: |
2454 if not maxdist: |
2410 maxdist = distance # ensure the conditional pass |
2455 maxdist = distance # ensure the conditional pass |
2411 maxdist = max(maxdist, defaultmax) |
2456 maxdist = max(maxdist, defaultmax) |
|
2457 if self._sparserevlog and maxdist < self._srmingapsize: |
|
2458 # In multiple place, we are ignoring irrelevant data range below a |
|
2459 # certain size. Be also apply this tradeoff here and relax span |
|
2460 # constraint for small enought content. |
|
2461 maxdist = self._srmingapsize |
2412 if (distance > maxdist or deltainfo.deltalen > textlen or |
2462 if (distance > maxdist or deltainfo.deltalen > textlen or |
2413 deltainfo.compresseddeltalen > textlen * 2 or |
2463 deltainfo.compresseddeltalen > textlen * 2 or |
2414 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)): |
2464 (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)): |
2415 return False |
2465 return False |
2416 |
2466 |