comparison mercurial/manifest.py @ 25220:f0fbd88b21fb

treemanifest: speed up diff by keeping track of dirty nodes Since tree manifests have a nodeid per directory, we can avoid diffing entire directories if they have the same nodeid. The comparison is only valid for unmodified treemanifest instances, of course, so we need to keep track of which have been modified. Therefore, let's add a dirty flag to treemanifest indicating whether its nodeid can be trusted. We set it when _files or _dirs is modified, and make diff(), and its cousin filesnotin(), not descend into subdirectories that are the same on both sides. On the Mozilla repo, this speeds up 'hg diff -r .^ -r .' from 1.990s to 1.762s. The improvement will be much larger when we start lazily loading subdirectory manifests.
author Martin von Zweigbergk <martinvonz@google.com>
date Thu, 26 Feb 2015 08:16:13 -0800
parents 2773540c3650
children eafa06e9edde
comparison
equal deleted inserted replaced
25217:70e822796ac8 25220:f0fbd88b21fb
443 443
444 class treemanifest(object): 444 class treemanifest(object):
445 def __init__(self, dir='', text=''): 445 def __init__(self, dir='', text=''):
446 self._dir = dir 446 self._dir = dir
447 self._node = revlog.nullid 447 self._node = revlog.nullid
448 self._dirty = False
448 self._dirs = {} 449 self._dirs = {}
449 # Using _lazymanifest here is a little slower than plain old dicts 450 # Using _lazymanifest here is a little slower than plain old dicts
450 self._files = {} 451 self._files = {}
451 self._flags = {} 452 self._flags = {}
452 def readsubtree(subdir, subm): 453 if text:
453 raise AssertionError('treemanifest constructor only accepts ' 454 def readsubtree(subdir, subm):
454 'flat manifests') 455 raise AssertionError('treemanifest constructor only accepts '
455 self.parse(text, readsubtree) 456 'flat manifests')
457 self.parse(text, readsubtree)
458 self._dirty = True # Mark flat manifest dirty after parsing
456 459
457 def _subpath(self, path): 460 def _subpath(self, path):
458 return self._dir + path 461 return self._dir + path
459 462
460 def __len__(self): 463 def __len__(self):
466 def _isempty(self): 469 def _isempty(self):
467 return (not self._files and (not self._dirs or 470 return (not self._files and (not self._dirs or
468 all(m._isempty() for m in self._dirs.values()))) 471 all(m._isempty() for m in self._dirs.values())))
469 472
470 def __str__(self): 473 def __str__(self):
471 return ('<treemanifest dir=%s, node=%s>' % 474 return ('<treemanifest dir=%s, node=%s, dirty=%s>' %
472 (self._dir, revlog.hex(self._node))) 475 (self._dir, revlog.hex(self._node), self._dirty))
473 476
474 def dir(self): 477 def dir(self):
475 '''The directory that this tree manifest represents, including a 478 '''The directory that this tree manifest represents, including a
476 trailing '/'. Empty string for the repo root directory.''' 479 trailing '/'. Empty string for the repo root directory.'''
477 return self._dir 480 return self._dir
478 481
479 def node(self): 482 def node(self):
480 '''This node of this instance. nullid for unsaved instances. Should 483 '''This node of this instance. nullid for unsaved instances. Should
481 be updated when the instance is read or written from a revlog. 484 be updated when the instance is read or written from a revlog.
482 ''' 485 '''
486 assert not self._dirty
483 return self._node 487 return self._node
484 488
485 def setnode(self, node): 489 def setnode(self, node):
486 self._node = node 490 self._node = node
491 self._dirty = False
487 492
488 def iteritems(self): 493 def iteritems(self):
489 for p, n in sorted(self._dirs.items() + self._files.items()): 494 for p, n in sorted(self._dirs.items() + self._files.items()):
490 if p in self._files: 495 if p in self._files:
491 yield self._subpath(p), n 496 yield self._subpath(p), n
561 del self._dirs[dir] 566 del self._dirs[dir]
562 else: 567 else:
563 del self._files[f] 568 del self._files[f]
564 if f in self._flags: 569 if f in self._flags:
565 del self._flags[f] 570 del self._flags[f]
571 self._dirty = True
566 572
567 def __setitem__(self, f, n): 573 def __setitem__(self, f, n):
568 assert n is not None 574 assert n is not None
569 dir, subpath = _splittopdir(f) 575 dir, subpath = _splittopdir(f)
570 if dir: 576 if dir:
571 if dir not in self._dirs: 577 if dir not in self._dirs:
572 self._dirs[dir] = treemanifest(self._subpath(dir)) 578 self._dirs[dir] = treemanifest(self._subpath(dir))
573 self._dirs[dir].__setitem__(subpath, n) 579 self._dirs[dir].__setitem__(subpath, n)
574 else: 580 else:
575 self._files[f] = n[:21] # to match manifestdict's behavior 581 self._files[f] = n[:21] # to match manifestdict's behavior
582 self._dirty = True
576 583
577 def setflag(self, f, flags): 584 def setflag(self, f, flags):
578 """Set the flags (symlink, executable) for path f.""" 585 """Set the flags (symlink, executable) for path f."""
579 assert 'd' not in flags 586 assert 'd' not in flags
580 dir, subpath = _splittopdir(f) 587 dir, subpath = _splittopdir(f)
582 if dir not in self._dirs: 589 if dir not in self._dirs:
583 self._dirs[dir] = treemanifest(self._subpath(dir)) 590 self._dirs[dir] = treemanifest(self._subpath(dir))
584 self._dirs[dir].setflag(subpath, flags) 591 self._dirs[dir].setflag(subpath, flags)
585 else: 592 else:
586 self._flags[f] = flags 593 self._flags[f] = flags
594 self._dirty = True
587 595
588 def copy(self): 596 def copy(self):
589 copy = treemanifest(self._dir) 597 copy = treemanifest(self._dir)
590 copy._node = self._node 598 copy._node = self._node
599 copy._dirty = self._dirty
591 for d in self._dirs: 600 for d in self._dirs:
592 copy._dirs[d] = self._dirs[d].copy() 601 copy._dirs[d] = self._dirs[d].copy()
593 copy._files = dict.copy(self._files) 602 copy._files = dict.copy(self._files)
594 copy._flags = dict.copy(self._flags) 603 copy._flags = dict.copy(self._flags)
595 return copy 604 return copy
596 605
597 def filesnotin(self, m2): 606 def filesnotin(self, m2):
598 '''Set of files in this manifest that are not in the other''' 607 '''Set of files in this manifest that are not in the other'''
599 files = set() 608 files = set()
600 def _filesnotin(t1, t2): 609 def _filesnotin(t1, t2):
610 if t1._node == t2._node and not t1._dirty and not t2._dirty:
611 return
601 for d, m1 in t1._dirs.iteritems(): 612 for d, m1 in t1._dirs.iteritems():
602 if d in t2._dirs: 613 if d in t2._dirs:
603 m2 = t2._dirs[d] 614 m2 = t2._dirs[d]
604 _filesnotin(m1, m2) 615 _filesnotin(m1, m2)
605 else: 616 else:
697 for dir, subm in self._dirs.iteritems(): 708 for dir, subm in self._dirs.iteritems():
698 m = subm._matches(match) 709 m = subm._matches(match)
699 if not m._isempty(): 710 if not m._isempty():
700 ret._dirs[dir] = m 711 ret._dirs[dir] = m
701 712
713 if not ret._isempty():
714 ret._dirty = True
702 return ret 715 return ret
703 716
704 def diff(self, m2, clean=False): 717 def diff(self, m2, clean=False):
705 '''Finds changes between the current manifest and m2. 718 '''Finds changes between the current manifest and m2.
706 719
717 string. 730 string.
718 ''' 731 '''
719 result = {} 732 result = {}
720 emptytree = treemanifest() 733 emptytree = treemanifest()
721 def _diff(t1, t2): 734 def _diff(t1, t2):
735 if t1._node == t2._node and not t1._dirty and not t2._dirty:
736 return
722 for d, m1 in t1._dirs.iteritems(): 737 for d, m1 in t1._dirs.iteritems():
723 m2 = t2._dirs.get(d, emptytree) 738 m2 = t2._dirs.get(d, emptytree)
724 _diff(m1, m2) 739 _diff(m1, m2)
725 740
726 for d, m2 in t2._dirs.iteritems(): 741 for d, m2 in t2._dirs.iteritems():
747 def parse(self, text, readsubtree): 762 def parse(self, text, readsubtree):
748 for f, n, fl in _parse(text): 763 for f, n, fl in _parse(text):
749 if fl == 'd': 764 if fl == 'd':
750 f = f + '/' 765 f = f + '/'
751 self._dirs[f] = readsubtree(self._subpath(f), n) 766 self._dirs[f] = readsubtree(self._subpath(f), n)
752 else: 767 elif '/' in f:
753 # Use __setitem__ and setflag rather than assigning directly 768 # This is a flat manifest, so use __setitem__ and setflag rather
754 # to _files and _flags, thereby letting us parse flat manifests 769 # than assigning directly to _files and _flags, so we can
755 # as well as tree manifests. 770 # assign a path in a subdirectory, and to mark dirty (compared
771 # to nullid).
756 self[f] = n 772 self[f] = n
757 if fl: 773 if fl:
758 self.setflag(f, fl) 774 self.setflag(f, fl)
775 else:
776 # Assigning to _files and _flags avoids marking as dirty,
777 # and should be a little faster.
778 self._files[f] = n
779 if fl:
780 self._flags[f] = fl
759 781
760 def text(self, usemanifestv2=False): 782 def text(self, usemanifestv2=False):
761 """Get the full data of this manifest as a bytestring.""" 783 """Get the full data of this manifest as a bytestring."""
762 flags = self.flags 784 flags = self.flags
763 return _text(((f, self[f], flags(f)) for f in self.keys()), 785 return _text(((f, self[f], flags(f)) for f in self.keys()),