comparison mercurial/utils/storageutil.py @ 49672:e92de86cf4f8

emitrevision: consider ancestors revision to emit as available base This should make more delta base valid. This notably affects: * case where we skipped some parent with empty delta to directly delta against an ancestors * case where an intermediate snapshots is stored. This change means we could sent largish intermediate snapshots over the wire. However this is actually a sub goal here. Sending snapshots over the wire means the client have a high odd of simply storing the pre-computed delta instead of doing a lengthy process that will… end up doing the same intermediate snapshot. In addition the overall size of snapshot (or any level) is "only" some or the overall delta size. (0.17% for my mercurial clone, 20% for my clone of Mozilla try). So Sending them other the wire is unlikely to change large impact on the bandwidth used. If we decide that minimising the bandwidth is an explicit goal, we should introduce new logic to filter-out snapshot as delta. The current code has no notion explicite of snapshot so far, they just tended to fall into the wobbly filtering options. In some cases, this patch can yield large improvement to the bundling time: ### data-env-vars.name = mozilla-try-2019-02-18-zstd-sparse-revlog # benchmark.name = perf-bundle # benchmark.variants.revs = last-100000 before: 68.787066 seconds after: 47.552677 seconds (-30.87%) That translate to large improvement to the pull time : ### data-env-vars.name = mozilla-try-2019-02-18-zstd-sparse-revlog # benchmark.name = pull # benchmark.variants.issue6528 = disabled # benchmark.variants.revs = last-100000 before: 142.186625 seconds after: 75.897745 seconds (-46.62%) No significant negative impact have been observed.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Sat, 12 Nov 2022 00:18:41 +0100
parents 2fd8750f3722
children e1953a34c110
comparison
equal deleted inserted replaced
49671:92c65bd0c4d6 49672:e92de86cf4f8
377 process (if present, see config: debug.bundling.stats. 377 process (if present, see config: debug.bundling.stats.
378 """ 378 """
379 379
380 fnode = store.node 380 fnode = store.node
381 frev = store.rev 381 frev = store.rev
382 parents = store.parentrevs
382 383
383 if nodesorder == b'nodes': 384 if nodesorder == b'nodes':
384 revs = [frev(n) for n in nodes] 385 revs = [frev(n) for n in nodes]
385 elif nodesorder == b'linear': 386 elif nodesorder == b'linear':
386 revs = {frev(n) for n in nodes} 387 revs = {frev(n) for n in nodes}
389 revs = sorted(frev(n) for n in nodes) 390 revs = sorted(frev(n) for n in nodes)
390 391
391 prevrev = None 392 prevrev = None
392 393
393 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions: 394 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
394 prevrev = store.parentrevs(revs[0])[0] 395 prevrev = parents(revs[0])[0]
395 396
396 # Set of revs available to delta against. 397 # Sets of revs available to delta against.
398 emitted = set()
397 available = set() 399 available = set()
398 parents = [] 400 if assumehaveparentrevisions:
401 common_heads = set(p for r in revs for p in parents(r))
402 common_heads.difference_update(revs)
403 available = store.ancestors(common_heads, inclusive=True)
399 404
400 def is_usable_base(rev): 405 def is_usable_base(rev):
401 """Is a delta against this revision usable over the wire""" 406 """Is a delta against this revision usable over the wire"""
402 if rev == nullrev: 407 if rev == nullrev:
403 return False 408 return False
404 # Base revision was already emitted in this group. 409 return rev in emitted or rev in available
405 if rev in available:
406 return True
407 # Base revision is a parent that hasn't been emitted already.
408 if assumehaveparentrevisions and rev in parents:
409 return True
410 return False
411 410
412 for rev in revs: 411 for rev in revs:
413 if rev == nullrev: 412 if rev == nullrev:
414 continue 413 continue
415 414
416 debug_delta_source = None 415 debug_delta_source = None
417 if debug_info is not None: 416 if debug_info is not None:
418 debug_info['revision-total'] += 1 417 debug_info['revision-total'] += 1
419 418
420 node = fnode(rev) 419 node = fnode(rev)
421 parents[:] = p1rev, p2rev = store.parentrevs(rev) 420 p1rev, p2rev = parents(rev)
422 421
423 if debug_info is not None: 422 if debug_info is not None:
424 if p1rev != p2rev and p1rev != nullrev and p2rev != nullrev: 423 if p1rev != p2rev and p1rev != nullrev and p2rev != nullrev:
425 debug_info['merge-total'] += 1 424 debug_info['merge-total'] += 1
426 425
529 ): 528 ):
530 if debug_info is not None: 529 if debug_info is not None:
531 debug_info['computed-delta'] += 1 # close enough 530 debug_info['computed-delta'] += 1 # close enough
532 debug_info['delta-full'] += 1 531 debug_info['delta-full'] += 1
533 revision = store.rawdata(node) 532 revision = store.rawdata(node)
534 available.add(rev) 533 emitted.add(rev)
535 else: 534 else:
536 if revdifffn: 535 if revdifffn:
537 if debug_info is not None: 536 if debug_info is not None:
538 if debug_delta_source == "full": 537 if debug_delta_source == "full":
539 debug_info['computed-delta'] += 1 538 debug_info['computed-delta'] += 1
569 assert False, 'unreachable' 568 assert False, 'unreachable'
570 delta = mdiff.textdiff( 569 delta = mdiff.textdiff(
571 store.rawdata(baserev), store.rawdata(rev) 570 store.rawdata(baserev), store.rawdata(rev)
572 ) 571 )
573 572
574 available.add(rev) 573 emitted.add(rev)
575 574
576 serialized_sidedata = None 575 serialized_sidedata = None
577 sidedata_flags = (0, 0) 576 sidedata_flags = (0, 0)
578 if sidedata_helpers: 577 if sidedata_helpers:
579 try: 578 try: