Mercurial > hg
comparison mercurial/changegroup.py @ 38982:037debbf869c
changegroup: pass sorted revisions into group() (API)
Currently, group() receives a list of nodes and calls _sortgroup()
to sort them and turn them into revs. Since the sorting behavior
varies depending on the type of data being transferred, I think it
makes sense to perform the sorting before group() is invoked.
This commit extracts _sortgroup() to a pair of standalone functions.
It then moves the calling of these functions to the 3 call sites of
group(). group() now receives an iterable of revs instead of nodes.
Differential Revision: https://phab.mercurial-scm.org/D4139
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Mon, 06 Aug 2018 11:33:05 -0700 |
parents | 227ebd88ce5e |
children | fbbda9ff3deb |
comparison
equal
deleted
inserted
replaced
38981:227ebd88ce5e | 38982:037debbf869c |
---|---|
521 # 2 bytes of flags to apply to revision data. | 521 # 2 bytes of flags to apply to revision data. |
522 flags = attr.ib() | 522 flags = attr.ib() |
523 # Iterable of chunks holding raw delta data. | 523 # Iterable of chunks holding raw delta data. |
524 deltachunks = attr.ib() | 524 deltachunks = attr.ib() |
525 | 525 |
526 def _sortnodesnormal(store, nodes, reorder): | |
527 """Sort nodes for changegroup generation and turn into revnums.""" | |
528 # for generaldelta revlogs, we linearize the revs; this will both be | |
529 # much quicker and generate a much smaller bundle | |
530 if (store._generaldelta and reorder is None) or reorder: | |
531 dag = dagutil.revlogdag(store) | |
532 return dag.linearize(set(store.rev(n) for n in nodes)) | |
533 else: | |
534 return sorted([store.rev(n) for n in nodes]) | |
535 | |
536 def _sortnodesellipsis(store, nodes, clnodetorev, lookup): | |
537 """Sort nodes for changegroup generation and turn into revnums.""" | |
538 # Ellipses serving mode. | |
539 # | |
540 # In a perfect world, we'd generate better ellipsis-ified graphs | |
541 # for non-changelog revlogs. In practice, we haven't started doing | |
542 # that yet, so the resulting DAGs for the manifestlog and filelogs | |
543 # are actually full of bogus parentage on all the ellipsis | |
544 # nodes. This has the side effect that, while the contents are | |
545 # correct, the individual DAGs might be completely out of whack in | |
546 # a case like 882681bc3166 and its ancestors (back about 10 | |
547 # revisions or so) in the main hg repo. | |
548 # | |
549 # The one invariant we *know* holds is that the new (potentially | |
550 # bogus) DAG shape will be valid if we order the nodes in the | |
551 # order that they're introduced in dramatis personae by the | |
552 # changelog, so what we do is we sort the non-changelog histories | |
553 # by the order in which they are used by the changelog. | |
554 key = lambda n: clnodetorev[lookup(n)] | |
555 return [store.rev(n) for n in sorted(nodes, key=key)] | |
556 | |
526 class cgpacker(object): | 557 class cgpacker(object): |
527 def __init__(self, repo, filematcher, version, allowreorder, | 558 def __init__(self, repo, filematcher, version, allowreorder, |
528 deltaparentfn, builddeltaheader, manifestsend, | 559 deltaparentfn, builddeltaheader, manifestsend, |
529 bundlecaps=None, ellipses=False, | 560 bundlecaps=None, ellipses=False, |
530 shallow=False, ellipsisroots=None, fullnodes=None): | 561 shallow=False, ellipsisroots=None, fullnodes=None): |
608 self._clrevtolocalrev = self._nextclrevtolocalrev | 639 self._clrevtolocalrev = self._nextclrevtolocalrev |
609 self._nextclrevtolocalrev = None | 640 self._nextclrevtolocalrev = None |
610 | 641 |
611 return closechunk() | 642 return closechunk() |
612 | 643 |
613 # Extracted both for clarity and for overriding in extensions. | 644 def group(self, revs, store, ischangelog, lookup, units=None): |
614 def _sortgroup(self, store, ischangelog, nodelist, lookup): | |
615 """Sort nodes for change group and turn them into revnums.""" | |
616 # Ellipses serving mode. | |
617 # | |
618 # In a perfect world, we'd generate better ellipsis-ified graphs | |
619 # for non-changelog revlogs. In practice, we haven't started doing | |
620 # that yet, so the resulting DAGs for the manifestlog and filelogs | |
621 # are actually full of bogus parentage on all the ellipsis | |
622 # nodes. This has the side effect that, while the contents are | |
623 # correct, the individual DAGs might be completely out of whack in | |
624 # a case like 882681bc3166 and its ancestors (back about 10 | |
625 # revisions or so) in the main hg repo. | |
626 # | |
627 # The one invariant we *know* holds is that the new (potentially | |
628 # bogus) DAG shape will be valid if we order the nodes in the | |
629 # order that they're introduced in dramatis personae by the | |
630 # changelog, so what we do is we sort the non-changelog histories | |
631 # by the order in which they are used by the changelog. | |
632 if self._ellipses and not ischangelog: | |
633 key = lambda n: self._clnodetorev[lookup(n)] | |
634 return [store.rev(n) for n in sorted(nodelist, key=key)] | |
635 | |
636 # for generaldelta revlogs, we linearize the revs; this will both be | |
637 # much quicker and generate a much smaller bundle | |
638 if (store._generaldelta and self._reorder is None) or self._reorder: | |
639 dag = dagutil.revlogdag(store) | |
640 return dag.linearize(set(store.rev(n) for n in nodelist)) | |
641 else: | |
642 return sorted([store.rev(n) for n in nodelist]) | |
643 | |
644 def group(self, nodelist, store, ischangelog, lookup, units=None): | |
645 """Calculate a delta group, yielding a sequence of changegroup chunks | 645 """Calculate a delta group, yielding a sequence of changegroup chunks |
646 (strings). | 646 (strings). |
647 | 647 |
648 Given a list of changeset revs, return a set of deltas and | 648 Given a list of changeset revs, return a set of deltas and |
649 metadata corresponding to nodes. The first delta is | 649 metadata corresponding to nodes. The first delta is |
654 | 654 |
655 If units is not None, progress detail will be generated, units specifies | 655 If units is not None, progress detail will be generated, units specifies |
656 the type of revlog that is touched (changelog, manifest, etc.). | 656 the type of revlog that is touched (changelog, manifest, etc.). |
657 """ | 657 """ |
658 # if we don't have any revisions touched by these changesets, bail | 658 # if we don't have any revisions touched by these changesets, bail |
659 if len(nodelist) == 0: | 659 if len(revs) == 0: |
660 yield self._close() | 660 yield self._close() |
661 return | 661 return |
662 | |
663 revs = self._sortgroup(store, ischangelog, nodelist, lookup) | |
664 | 662 |
665 # add the parent of the first rev | 663 # add the parent of the first rev |
666 p = store.parentrevs(revs[0])[0] | 664 p = store.parentrevs(revs[0])[0] |
667 revs.insert(0, p) | 665 revs.insert(0, p) |
668 | 666 |
691 return [] | 689 return [] |
692 | 690 |
693 rr, rl = store.rev, store.linkrev | 691 rr, rl = store.rev, store.linkrev |
694 return [n for n in missing if rl(rr(n)) not in commonrevs] | 692 return [n for n in missing if rl(rr(n)) not in commonrevs] |
695 | 693 |
696 def _packmanifests(self, dir, mfnodes, lookuplinknode): | 694 def _packmanifests(self, dir, dirlog, revs, lookuplinknode): |
697 """Pack manifests into a changegroup stream. | 695 """Pack manifests into a changegroup stream. |
698 | 696 |
699 Encodes the directory name in the output so multiple manifests | 697 Encodes the directory name in the output so multiple manifests |
700 can be sent. Multiple manifests is not supported by cg1 and cg2. | 698 can be sent. Multiple manifests is not supported by cg1 and cg2. |
701 """ | 699 """ |
702 | |
703 if dir: | 700 if dir: |
704 assert self.version == b'03' | 701 assert self.version == b'03' |
705 yield _fileheader(dir) | 702 yield _fileheader(dir) |
706 | 703 |
707 # TODO violates storage abstractions by assuming revlogs. | 704 for chunk in self.group(revs, dirlog, False, lookuplinknode, |
708 dirlog = self._repo.manifestlog._revlog.dirlog(dir) | |
709 for chunk in self.group(mfnodes, dirlog, False, lookuplinknode, | |
710 units=_('manifests')): | 705 units=_('manifests')): |
711 yield chunk | 706 yield chunk |
712 | 707 |
713 def generate(self, commonrevs, clnodes, fastpathlinkrev, source): | 708 def generate(self, commonrevs, clnodes, fastpathlinkrev, source): |
714 """Yield a sequence of changegroup byte chunks.""" | 709 """Yield a sequence of changegroup byte chunks.""" |
848 # this manifest. | 843 # this manifest. |
849 changedfiles.update(c[3]) | 844 changedfiles.update(c[3]) |
850 | 845 |
851 return x | 846 return x |
852 | 847 |
848 # Changelog doesn't benefit from reordering revisions. So send out | |
849 # revisions in store order. | |
850 revs = sorted(cl.rev(n) for n in nodes) | |
851 | |
853 state = { | 852 state = { |
854 'clrevorder': clrevorder, | 853 'clrevorder': clrevorder, |
855 'mfs': mfs, | 854 'mfs': mfs, |
856 'changedfiles': changedfiles, | 855 'changedfiles': changedfiles, |
857 } | 856 } |
858 | 857 |
859 gen = self.group(nodes, cl, True, lookupcl, units=_('changesets')) | 858 gen = self.group(revs, cl, True, lookupcl, units=_('changesets')) |
860 | 859 |
861 return state, gen | 860 return state, gen |
862 | 861 |
863 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs, | 862 def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs, |
864 fnodes, source): | 863 fnodes, source): |
915 return lookupmflinknode | 914 return lookupmflinknode |
916 | 915 |
917 size = 0 | 916 size = 0 |
918 while tmfnodes: | 917 while tmfnodes: |
919 dir, nodes = tmfnodes.popitem() | 918 dir, nodes = tmfnodes.popitem() |
920 prunednodes = self._prune(dirlog(dir), nodes, commonrevs) | 919 store = dirlog(dir) |
920 prunednodes = self._prune(store, nodes, commonrevs) | |
921 if not dir or prunednodes: | 921 if not dir or prunednodes: |
922 for x in self._packmanifests(dir, prunednodes, | 922 lookupfn = makelookupmflinknode(dir, nodes) |
923 makelookupmflinknode(dir, nodes)): | 923 |
924 if self._ellipses: | |
925 revs = _sortnodesellipsis(store, prunednodes, | |
926 self._clnodetorev, lookupfn) | |
927 else: | |
928 revs = _sortnodesnormal(store, prunednodes, | |
929 self._reorder) | |
930 | |
931 for x in self._packmanifests(dir, store, revs, lookupfn): | |
924 size += len(x) | 932 size += len(x) |
925 yield x | 933 yield x |
926 self._verbosenote(_('%8.i (manifests)\n') % size) | 934 self._verbosenote(_('%8.i (manifests)\n') % size) |
927 yield self._manifestsend | 935 yield self._manifestsend |
928 | 936 |
979 def lookupfilelog(x): | 987 def lookupfilelog(x): |
980 return linkrevnodes[x] | 988 return linkrevnodes[x] |
981 | 989 |
982 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs) | 990 filenodes = self._prune(filerevlog, linkrevnodes, commonrevs) |
983 if filenodes: | 991 if filenodes: |
992 if self._ellipses: | |
993 revs = _sortnodesellipsis(filerevlog, filenodes, | |
994 self._clnodetorev, lookupfilelog) | |
995 else: | |
996 revs = _sortnodesnormal(filerevlog, filenodes, | |
997 self._reorder) | |
998 | |
984 progress.update(i + 1, item=fname) | 999 progress.update(i + 1, item=fname) |
985 h = _fileheader(fname) | 1000 h = _fileheader(fname) |
986 size = len(h) | 1001 size = len(h) |
987 yield h | 1002 yield h |
988 for chunk in self.group(filenodes, filerevlog, False, | 1003 for chunk in self.group(revs, filerevlog, False, lookupfilelog): |
989 lookupfilelog): | |
990 size += len(chunk) | 1004 size += len(chunk) |
991 yield chunk | 1005 yield chunk |
992 self._verbosenote(_('%8.i %s\n') % (size, fname)) | 1006 self._verbosenote(_('%8.i %s\n') % (size, fname)) |
993 progress.complete() | 1007 progress.complete() |
994 | 1008 |