Mercurial > hg
comparison mercurial/manifest.py @ 25220:f0fbd88b21fb
treemanifest: speed up diff by keeping track of dirty nodes
Since tree manifests have a nodeid per directory, we can avoid diffing
entire directories if they have the same nodeid. The comparison is
only valid for unmodified treemanifest instances, of course, so we
need to keep track of which have been modified. Therefore, let's add a
dirty flag to treemanifest indicating whether its nodeid can be
trusted. We set it when _files or _dirs is modified, and make diff(),
and its cousin filesnotin(), not descend into subdirectories that are
the same on both sides.
On the Mozilla repo, this speeds up 'hg diff -r .^ -r .' from 1.990s
to 1.762s. The improvement will be much larger when we start lazily
loading subdirectory manifests.
author | Martin von Zweigbergk <martinvonz@google.com> |
---|---|
date | Thu, 26 Feb 2015 08:16:13 -0800 |
parents | 2773540c3650 |
children | eafa06e9edde |
comparison
equal
deleted
inserted
replaced
25217:70e822796ac8 | 25220:f0fbd88b21fb |
---|---|
443 | 443 |
444 class treemanifest(object): | 444 class treemanifest(object): |
445 def __init__(self, dir='', text=''): | 445 def __init__(self, dir='', text=''): |
446 self._dir = dir | 446 self._dir = dir |
447 self._node = revlog.nullid | 447 self._node = revlog.nullid |
448 self._dirty = False | |
448 self._dirs = {} | 449 self._dirs = {} |
449 # Using _lazymanifest here is a little slower than plain old dicts | 450 # Using _lazymanifest here is a little slower than plain old dicts |
450 self._files = {} | 451 self._files = {} |
451 self._flags = {} | 452 self._flags = {} |
452 def readsubtree(subdir, subm): | 453 if text: |
453 raise AssertionError('treemanifest constructor only accepts ' | 454 def readsubtree(subdir, subm): |
454 'flat manifests') | 455 raise AssertionError('treemanifest constructor only accepts ' |
455 self.parse(text, readsubtree) | 456 'flat manifests') |
457 self.parse(text, readsubtree) | |
458 self._dirty = True # Mark flat manifest dirty after parsing | |
456 | 459 |
457 def _subpath(self, path): | 460 def _subpath(self, path): |
458 return self._dir + path | 461 return self._dir + path |
459 | 462 |
460 def __len__(self): | 463 def __len__(self): |
466 def _isempty(self): | 469 def _isempty(self): |
467 return (not self._files and (not self._dirs or | 470 return (not self._files and (not self._dirs or |
468 all(m._isempty() for m in self._dirs.values()))) | 471 all(m._isempty() for m in self._dirs.values()))) |
469 | 472 |
470 def __str__(self): | 473 def __str__(self): |
471 return ('<treemanifest dir=%s, node=%s>' % | 474 return ('<treemanifest dir=%s, node=%s, dirty=%s>' % |
472 (self._dir, revlog.hex(self._node))) | 475 (self._dir, revlog.hex(self._node), self._dirty)) |
473 | 476 |
474 def dir(self): | 477 def dir(self): |
475 '''The directory that this tree manifest represents, including a | 478 '''The directory that this tree manifest represents, including a |
476 trailing '/'. Empty string for the repo root directory.''' | 479 trailing '/'. Empty string for the repo root directory.''' |
477 return self._dir | 480 return self._dir |
478 | 481 |
479 def node(self): | 482 def node(self): |
480 '''This node of this instance. nullid for unsaved instances. Should | 483 '''This node of this instance. nullid for unsaved instances. Should |
481 be updated when the instance is read or written from a revlog. | 484 be updated when the instance is read or written from a revlog. |
482 ''' | 485 ''' |
486 assert not self._dirty | |
483 return self._node | 487 return self._node |
484 | 488 |
485 def setnode(self, node): | 489 def setnode(self, node): |
486 self._node = node | 490 self._node = node |
491 self._dirty = False | |
487 | 492 |
488 def iteritems(self): | 493 def iteritems(self): |
489 for p, n in sorted(self._dirs.items() + self._files.items()): | 494 for p, n in sorted(self._dirs.items() + self._files.items()): |
490 if p in self._files: | 495 if p in self._files: |
491 yield self._subpath(p), n | 496 yield self._subpath(p), n |
561 del self._dirs[dir] | 566 del self._dirs[dir] |
562 else: | 567 else: |
563 del self._files[f] | 568 del self._files[f] |
564 if f in self._flags: | 569 if f in self._flags: |
565 del self._flags[f] | 570 del self._flags[f] |
571 self._dirty = True | |
566 | 572 |
567 def __setitem__(self, f, n): | 573 def __setitem__(self, f, n): |
568 assert n is not None | 574 assert n is not None |
569 dir, subpath = _splittopdir(f) | 575 dir, subpath = _splittopdir(f) |
570 if dir: | 576 if dir: |
571 if dir not in self._dirs: | 577 if dir not in self._dirs: |
572 self._dirs[dir] = treemanifest(self._subpath(dir)) | 578 self._dirs[dir] = treemanifest(self._subpath(dir)) |
573 self._dirs[dir].__setitem__(subpath, n) | 579 self._dirs[dir].__setitem__(subpath, n) |
574 else: | 580 else: |
575 self._files[f] = n[:21] # to match manifestdict's behavior | 581 self._files[f] = n[:21] # to match manifestdict's behavior |
582 self._dirty = True | |
576 | 583 |
577 def setflag(self, f, flags): | 584 def setflag(self, f, flags): |
578 """Set the flags (symlink, executable) for path f.""" | 585 """Set the flags (symlink, executable) for path f.""" |
579 assert 'd' not in flags | 586 assert 'd' not in flags |
580 dir, subpath = _splittopdir(f) | 587 dir, subpath = _splittopdir(f) |
582 if dir not in self._dirs: | 589 if dir not in self._dirs: |
583 self._dirs[dir] = treemanifest(self._subpath(dir)) | 590 self._dirs[dir] = treemanifest(self._subpath(dir)) |
584 self._dirs[dir].setflag(subpath, flags) | 591 self._dirs[dir].setflag(subpath, flags) |
585 else: | 592 else: |
586 self._flags[f] = flags | 593 self._flags[f] = flags |
594 self._dirty = True | |
587 | 595 |
588 def copy(self): | 596 def copy(self): |
589 copy = treemanifest(self._dir) | 597 copy = treemanifest(self._dir) |
590 copy._node = self._node | 598 copy._node = self._node |
599 copy._dirty = self._dirty | |
591 for d in self._dirs: | 600 for d in self._dirs: |
592 copy._dirs[d] = self._dirs[d].copy() | 601 copy._dirs[d] = self._dirs[d].copy() |
593 copy._files = dict.copy(self._files) | 602 copy._files = dict.copy(self._files) |
594 copy._flags = dict.copy(self._flags) | 603 copy._flags = dict.copy(self._flags) |
595 return copy | 604 return copy |
596 | 605 |
597 def filesnotin(self, m2): | 606 def filesnotin(self, m2): |
598 '''Set of files in this manifest that are not in the other''' | 607 '''Set of files in this manifest that are not in the other''' |
599 files = set() | 608 files = set() |
600 def _filesnotin(t1, t2): | 609 def _filesnotin(t1, t2): |
610 if t1._node == t2._node and not t1._dirty and not t2._dirty: | |
611 return | |
601 for d, m1 in t1._dirs.iteritems(): | 612 for d, m1 in t1._dirs.iteritems(): |
602 if d in t2._dirs: | 613 if d in t2._dirs: |
603 m2 = t2._dirs[d] | 614 m2 = t2._dirs[d] |
604 _filesnotin(m1, m2) | 615 _filesnotin(m1, m2) |
605 else: | 616 else: |
697 for dir, subm in self._dirs.iteritems(): | 708 for dir, subm in self._dirs.iteritems(): |
698 m = subm._matches(match) | 709 m = subm._matches(match) |
699 if not m._isempty(): | 710 if not m._isempty(): |
700 ret._dirs[dir] = m | 711 ret._dirs[dir] = m |
701 | 712 |
713 if not ret._isempty(): | |
714 ret._dirty = True | |
702 return ret | 715 return ret |
703 | 716 |
704 def diff(self, m2, clean=False): | 717 def diff(self, m2, clean=False): |
705 '''Finds changes between the current manifest and m2. | 718 '''Finds changes between the current manifest and m2. |
706 | 719 |
717 string. | 730 string. |
718 ''' | 731 ''' |
719 result = {} | 732 result = {} |
720 emptytree = treemanifest() | 733 emptytree = treemanifest() |
721 def _diff(t1, t2): | 734 def _diff(t1, t2): |
735 if t1._node == t2._node and not t1._dirty and not t2._dirty: | |
736 return | |
722 for d, m1 in t1._dirs.iteritems(): | 737 for d, m1 in t1._dirs.iteritems(): |
723 m2 = t2._dirs.get(d, emptytree) | 738 m2 = t2._dirs.get(d, emptytree) |
724 _diff(m1, m2) | 739 _diff(m1, m2) |
725 | 740 |
726 for d, m2 in t2._dirs.iteritems(): | 741 for d, m2 in t2._dirs.iteritems(): |
747 def parse(self, text, readsubtree): | 762 def parse(self, text, readsubtree): |
748 for f, n, fl in _parse(text): | 763 for f, n, fl in _parse(text): |
749 if fl == 'd': | 764 if fl == 'd': |
750 f = f + '/' | 765 f = f + '/' |
751 self._dirs[f] = readsubtree(self._subpath(f), n) | 766 self._dirs[f] = readsubtree(self._subpath(f), n) |
752 else: | 767 elif '/' in f: |
753 # Use __setitem__ and setflag rather than assigning directly | 768 # This is a flat manifest, so use __setitem__ and setflag rather |
754 # to _files and _flags, thereby letting us parse flat manifests | 769 # than assigning directly to _files and _flags, so we can |
755 # as well as tree manifests. | 770 # assign a path in a subdirectory, and to mark dirty (compared |
771 # to nullid). | |
756 self[f] = n | 772 self[f] = n |
757 if fl: | 773 if fl: |
758 self.setflag(f, fl) | 774 self.setflag(f, fl) |
775 else: | |
776 # Assigning to _files and _flags avoids marking as dirty, | |
777 # and should be a little faster. | |
778 self._files[f] = n | |
779 if fl: | |
780 self._flags[f] = fl | |
759 | 781 |
760 def text(self, usemanifestv2=False): | 782 def text(self, usemanifestv2=False): |
761 """Get the full data of this manifest as a bytestring.""" | 783 """Get the full data of this manifest as a bytestring.""" |
762 flags = self.flags | 784 flags = self.flags |
763 return _text(((f, self[f], flags(f)) for f in self.keys()), | 785 return _text(((f, self[f], flags(f)) for f in self.keys()), |