Mercurial > hg
view hgext/automv.py @ 39270:37e56607cbb9
lfs: add a progress bar when searching for blobs to upload
The search itself can take an extreme amount of time if there are a lot of
revisions involved. I've got a local repo that took 6 minutes to push 1850
commits, and 60% of that time was spent here (there are ~70K files):
\ 58.1% wrapper.py: extractpointers line 297: pointers = extractpointers(...
| 57.7% wrapper.py: pointersfromctx line 352: for p in pointersfromctx(ct...
| 57.4% wrapper.py: pointerfromctx line 397: p = pointerfromctx(ctx, f, ...
\ 38.7% context.py: __contains__ line 368: if f not in ctx:
| 38.7% util.py: __get__ line 82: return key in self._manifest
| 38.7% context.py: _manifest line 1416: result = self.func(obj)
| 38.7% manifest.py: read line 472: return self._manifestctx.re...
\ 25.6% revlog.py: revision line 1562: text = rl.revision(self._node)
\ 12.8% revlog.py: _chunks line 2217: bins = self._chunks(chain, ...
| 12.0% revlog.py: decompressline 2112: ladd(decomp(buffer(data, ch...
\ 7.8% revlog.py: checkhash line 2232: self.checkhash(text, node, ...
| 7.8% revlog.py: hash line 2315: if node != self.hash(text, ...
| 7.8% revlog.py: hash line 2242: return hash(text, p1, p2)
\ 12.0% manifest.py: __init__ line 1565: self._data = manifestdict(t...
\ 16.8% context.py: filenode line 378: if not _islfs(fctx.filelog(...
| 15.7% util.py: __get__ line 706: return self._filelog
| 14.8% context.py: _filelog line 1416: result = self.func(obj)
| 14.8% localrepo.py: file line 629: return self._repo.file(self...
| 14.8% filelog.py: __init__ line 1134: return filelog.filelog(self...
| 14.5% revlog.py: __init__ line 24: censorable=True)
author | Matt Harbison <matt_harbison@yahoo.com> |
---|---|
date | Fri, 24 Aug 2018 17:45:46 -0400 |
parents | 38637dd39cfd |
children | f89aad980025 |
line wrap: on
line source
# automv.py # # Copyright 2013-2016 Facebook, Inc. # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. """check for unrecorded moves at commit time (EXPERIMENTAL) This extension checks at commit/amend time if any of the committed files comes from an unrecorded mv. The threshold at which a file is considered a move can be set with the ``automv.similarity`` config option. This option takes a percentage between 0 (disabled) and 100 (files must be identical), the default is 95. """ # Using 95 as a default similarity is based on an analysis of the mercurial # repositories of the cpython, mozilla-central & mercurial repositories, as # well as 2 very large facebook repositories. At 95 50% of all potential # missed moves would be caught, as well as correspond with 87% of all # explicitly marked moves. Together, 80% of moved files are 95% similar or # more. # # See http://markmail.org/thread/5pxnljesvufvom57 for context. from __future__ import absolute_import from mercurial.i18n import _ from mercurial import ( commands, copies, error, extensions, pycompat, registrar, scmutil, similar ) configtable = {} configitem = registrar.configitem(configtable) configitem('automv', 'similarity', default=95, ) def extsetup(ui): entry = extensions.wrapcommand( commands.table, 'commit', mvcheck) entry[1].append( ('', 'no-automv', None, _('disable automatic file move detection'))) def mvcheck(orig, ui, repo, *pats, **opts): """Hook to check for moves at commit time""" opts = pycompat.byteskwargs(opts) renames = None disabled = opts.pop('no_automv', False) if not disabled: threshold = ui.configint('automv', 'similarity') if not 0 <= threshold <= 100: raise error.Abort(_('automv.similarity must be between 0 and 100')) if threshold > 0: match = scmutil.match(repo[None], pats, opts) added, removed = _interestingfiles(repo, match) renames = _findrenames(repo, match, added, removed, threshold / 100.0) with repo.wlock(): if renames is not None: scmutil._markchanges(repo, (), (), renames) return orig(ui, repo, *pats, **pycompat.strkwargs(opts)) def _interestingfiles(repo, matcher): """Find what files were added or removed in this commit. Returns a tuple of two lists: (added, removed). Only files not *already* marked as moved are included in the added list. """ stat = repo.status(match=matcher) added = stat[1] removed = stat[2] copy = copies._forwardcopies(repo['.'], repo[None], matcher) # remove the copy files for which we already have copy info added = [f for f in added if f not in copy] return added, removed def _findrenames(repo, matcher, added, removed, similarity): """Find what files in added are really moved files. Any file named in removed that is at least similarity% similar to a file in added is seen as a rename. """ renames = {} if similarity > 0: for src, dst, score in similar.findrenames( repo, added, removed, similarity): if repo.ui.verbose: repo.ui.status( _('detected move of %s as %s (%d%% similar)\n') % ( matcher.rel(src), matcher.rel(dst), score * 100)) renames[dst] = src if renames: repo.ui.status(_('detected move of %d files\n') % len(renames)) return renames