comparison mercurial/changegroup.py @ 38982:037debbf869c

changegroup: pass sorted revisions into group() (API) Currently, group() receives a list of nodes and calls _sortgroup() to sort them and turn them into revs. Since the sorting behavior varies depending on the type of data being transferred, I think it makes sense to perform the sorting before group() is invoked. This commit extracts _sortgroup() to a pair of standalone functions. It then moves the calling of these functions to the 3 call sites of group(). group() now receives an iterable of revs instead of nodes. Differential Revision: https://phab.mercurial-scm.org/D4139
author Gregory Szorc <gregory.szorc@gmail.com>
date Mon, 06 Aug 2018 11:33:05 -0700
parents 227ebd88ce5e
children fbbda9ff3deb
comparison
equal deleted inserted replaced
38981:227ebd88ce5e 38982:037debbf869c
521 # 2 bytes of flags to apply to revision data. 521 # 2 bytes of flags to apply to revision data.
522 flags = attr.ib() 522 flags = attr.ib()
523 # Iterable of chunks holding raw delta data. 523 # Iterable of chunks holding raw delta data.
524 deltachunks = attr.ib() 524 deltachunks = attr.ib()
525 525
526 def _sortnodesnormal(store, nodes, reorder):
527 """Sort nodes for changegroup generation and turn into revnums."""
528 # for generaldelta revlogs, we linearize the revs; this will both be
529 # much quicker and generate a much smaller bundle
530 if (store._generaldelta and reorder is None) or reorder:
531 dag = dagutil.revlogdag(store)
532 return dag.linearize(set(store.rev(n) for n in nodes))
533 else:
534 return sorted([store.rev(n) for n in nodes])
535
536 def _sortnodesellipsis(store, nodes, clnodetorev, lookup):
537 """Sort nodes for changegroup generation and turn into revnums."""
538 # Ellipses serving mode.
539 #
540 # In a perfect world, we'd generate better ellipsis-ified graphs
541 # for non-changelog revlogs. In practice, we haven't started doing
542 # that yet, so the resulting DAGs for the manifestlog and filelogs
543 # are actually full of bogus parentage on all the ellipsis
544 # nodes. This has the side effect that, while the contents are
545 # correct, the individual DAGs might be completely out of whack in
546 # a case like 882681bc3166 and its ancestors (back about 10
547 # revisions or so) in the main hg repo.
548 #
549 # The one invariant we *know* holds is that the new (potentially
550 # bogus) DAG shape will be valid if we order the nodes in the
551 # order that they're introduced in dramatis personae by the
552 # changelog, so what we do is we sort the non-changelog histories
553 # by the order in which they are used by the changelog.
554 key = lambda n: clnodetorev[lookup(n)]
555 return [store.rev(n) for n in sorted(nodes, key=key)]
556
526 class cgpacker(object): 557 class cgpacker(object):
527 def __init__(self, repo, filematcher, version, allowreorder, 558 def __init__(self, repo, filematcher, version, allowreorder,
528 deltaparentfn, builddeltaheader, manifestsend, 559 deltaparentfn, builddeltaheader, manifestsend,
529 bundlecaps=None, ellipses=False, 560 bundlecaps=None, ellipses=False,
530 shallow=False, ellipsisroots=None, fullnodes=None): 561 shallow=False, ellipsisroots=None, fullnodes=None):
608 self._clrevtolocalrev = self._nextclrevtolocalrev 639 self._clrevtolocalrev = self._nextclrevtolocalrev
609 self._nextclrevtolocalrev = None 640 self._nextclrevtolocalrev = None
610 641
611 return closechunk() 642 return closechunk()
612 643
613 # Extracted both for clarity and for overriding in extensions. 644 def group(self, revs, store, ischangelog, lookup, units=None):
614 def _sortgroup(self, store, ischangelog, nodelist, lookup):
615 """Sort nodes for change group and turn them into revnums."""
616 # Ellipses serving mode.
617 #
618 # In a perfect world, we'd generate better ellipsis-ified graphs
619 # for non-changelog revlogs. In practice, we haven't started doing
620 # that yet, so the resulting DAGs for the manifestlog and filelogs
621 # are actually full of bogus parentage on all the ellipsis
622 # nodes. This has the side effect that, while the contents are
623 # correct, the individual DAGs might be completely out of whack in
624 # a case like 882681bc3166 and its ancestors (back about 10
625 # revisions or so) in the main hg repo.
626 #
627 # The one invariant we *know* holds is that the new (potentially
628 # bogus) DAG shape will be valid if we order the nodes in the
629 # order that they're introduced in dramatis personae by the
630 # changelog, so what we do is we sort the non-changelog histories
631 # by the order in which they are used by the changelog.
632 if self._ellipses and not ischangelog:
633 key = lambda n: self._clnodetorev[lookup(n)]
634 return [store.rev(n) for n in sorted(nodelist, key=key)]
635
636 # for generaldelta revlogs, we linearize the revs; this will both be
637 # much quicker and generate a much smaller bundle
638 if (store._generaldelta and self._reorder is None) or self._reorder:
639 dag = dagutil.revlogdag(store)
640 return dag.linearize(set(store.rev(n) for n in nodelist))
641 else:
642 return sorted([store.rev(n) for n in nodelist])
643
644 def group(self, nodelist, store, ischangelog, lookup, units=None):
645 """Calculate a delta group, yielding a sequence of changegroup chunks 645 """Calculate a delta group, yielding a sequence of changegroup chunks
646 (strings). 646 (strings).
647 647
648 Given a list of changeset revs, return a set of deltas and 648 Given a list of changeset revs, return a set of deltas and
649 metadata corresponding to nodes. The first delta is 649 metadata corresponding to nodes. The first delta is
654 654
655 If units is not None, progress detail will be generated, units specifies 655 If units is not None, progress detail will be generated, units specifies
656 the type of revlog that is touched (changelog, manifest, etc.). 656 the type of revlog that is touched (changelog, manifest, etc.).
657 """ 657 """
658 # if we don't have any revisions touched by these changesets, bail 658 # if we don't have any revisions touched by these changesets, bail
659 if len(nodelist) == 0: 659 if len(revs) == 0:
660 yield self._close() 660 yield self._close()
661 return 661 return
662
663 revs = self._sortgroup(store, ischangelog, nodelist, lookup)
664 662
665 # add the parent of the first rev 663 # add the parent of the first rev
666 p = store.parentrevs(revs[0])[0] 664 p = store.parentrevs(revs[0])[0]
667 revs.insert(0, p) 665 revs.insert(0, p)
668 666
691 return [] 689 return []
692 690
693 rr, rl = store.rev, store.linkrev 691 rr, rl = store.rev, store.linkrev
694 return [n for n in missing if rl(rr(n)) not in commonrevs] 692 return [n for n in missing if rl(rr(n)) not in commonrevs]
695 693
696 def _packmanifests(self, dir, mfnodes, lookuplinknode): 694 def _packmanifests(self, dir, dirlog, revs, lookuplinknode):
697 """Pack manifests into a changegroup stream. 695 """Pack manifests into a changegroup stream.
698 696
699 Encodes the directory name in the output so multiple manifests 697 Encodes the directory name in the output so multiple manifests
700 can be sent. Multiple manifests is not supported by cg1 and cg2. 698 can be sent. Multiple manifests is not supported by cg1 and cg2.
701 """ 699 """
702
703 if dir: 700 if dir:
704 assert self.version == b'03' 701 assert self.version == b'03'
705 yield _fileheader(dir) 702 yield _fileheader(dir)
706 703
707 # TODO violates storage abstractions by assuming revlogs. 704 for chunk in self.group(revs, dirlog, False, lookuplinknode,
708 dirlog = self._repo.manifestlog._revlog.dirlog(dir)
709 for chunk in self.group(mfnodes, dirlog, False, lookuplinknode,
710 units=_('manifests')): 705 units=_('manifests')):
711 yield chunk 706 yield chunk
712 707
713 def generate(self, commonrevs, clnodes, fastpathlinkrev, source): 708 def generate(self, commonrevs, clnodes, fastpathlinkrev, source):
714 """Yield a sequence of changegroup byte chunks.""" 709 """Yield a sequence of changegroup byte chunks."""
848 # this manifest. 843 # this manifest.
849 changedfiles.update(c[3]) 844 changedfiles.update(c[3])
850 845
851 return x 846 return x
852 847
848 # Changelog doesn't benefit from reordering revisions. So send out
849 # revisions in store order.
850 revs = sorted(cl.rev(n) for n in nodes)
851
853 state = { 852 state = {
854 'clrevorder': clrevorder, 853 'clrevorder': clrevorder,
855 'mfs': mfs, 854 'mfs': mfs,
856 'changedfiles': changedfiles, 855 'changedfiles': changedfiles,
857 } 856 }
858 857
859 gen = self.group(nodes, cl, True, lookupcl, units=_('changesets')) 858 gen = self.group(revs, cl, True, lookupcl, units=_('changesets'))
860 859
861 return state, gen 860 return state, gen
862 861
863 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs, 862 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs,
864 fnodes, source): 863 fnodes, source):
915 return lookupmflinknode 914 return lookupmflinknode
916 915
917 size = 0 916 size = 0
918 while tmfnodes: 917 while tmfnodes:
919 dir, nodes = tmfnodes.popitem() 918 dir, nodes = tmfnodes.popitem()
920 prunednodes = self._prune(dirlog(dir), nodes, commonrevs) 919 store = dirlog(dir)
920 prunednodes = self._prune(store, nodes, commonrevs)
921 if not dir or prunednodes: 921 if not dir or prunednodes:
922 for x in self._packmanifests(dir, prunednodes, 922 lookupfn = makelookupmflinknode(dir, nodes)
923 makelookupmflinknode(dir, nodes)): 923
924 if self._ellipses:
925 revs = _sortnodesellipsis(store, prunednodes,
926 self._clnodetorev, lookupfn)
927 else:
928 revs = _sortnodesnormal(store, prunednodes,
929 self._reorder)
930
931 for x in self._packmanifests(dir, store, revs, lookupfn):
924 size += len(x) 932 size += len(x)
925 yield x 933 yield x
926 self._verbosenote(_('%8.i (manifests)\n') % size) 934 self._verbosenote(_('%8.i (manifests)\n') % size)
927 yield self._manifestsend 935 yield self._manifestsend
928 936
979 def lookupfilelog(x): 987 def lookupfilelog(x):
980 return linkrevnodes[x] 988 return linkrevnodes[x]
981 989
982 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs) 990 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs)
983 if filenodes: 991 if filenodes:
992 if self._ellipses:
993 revs = _sortnodesellipsis(filerevlog, filenodes,
994 self._clnodetorev, lookupfilelog)
995 else:
996 revs = _sortnodesnormal(filerevlog, filenodes,
997 self._reorder)
998
984 progress.update(i + 1, item=fname) 999 progress.update(i + 1, item=fname)
985 h = _fileheader(fname) 1000 h = _fileheader(fname)
986 size = len(h) 1001 size = len(h)
987 yield h 1002 yield h
988 for chunk in self.group(filenodes, filerevlog, False, 1003 for chunk in self.group(revs, filerevlog, False, lookupfilelog):
989 lookupfilelog):
990 size += len(chunk) 1004 size += len(chunk)
991 yield chunk 1005 yield chunk
992 self._verbosenote(_('%8.i %s\n') % (size, fname)) 1006 self._verbosenote(_('%8.i %s\n') % (size, fname))
993 progress.complete() 1007 progress.complete()
994 1008