revset: inline parents computation to reuse the input argument
Before this change, using `heads(xxx)` would compute `xxx` multiple time. Once
to select the possible candidates, and once to compute the parent set.
The code used to compute parents is a direct copy past from the `parents`
revset. We expect to replace it quickly in a later changeset. So we did not
bother with extracting a function.
In case where the input set is expensive to compute this provides a
significant performance boost.
(output are from contrib/revsetbenchmarks.py)
revset: heads(matching(tip, "author"))
plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast
0) 15.06746 14.92766 7.335694 15.03092 7.635580 15.04133 7.454806 15.27565 14.97796 14.87607 7.480900
1) 7.529300 49% 7.592152 50% 7.480548 7.544528 50% 7.421248 7.522279 50% 7.484876 7.613154 49% 7.599553 50% 7.561410 50% 7.508990
In other cases, with a faster input set, we still see a (smaller) performance
boost.
revset: heads(all())
plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast
0) 0.038994 0.035981 0.033345 0.035751 0.033569 0.039833 0.033653 0.035428 0.039483 0.035750 0.033657
1) 0.036359 93% 0.032613 90% 0.031479 94% 0.032790 91% 0.030681 91% 0.036456 91% 0.031128 92% 0.032461 91% 0.036276 91% 0.032721 91% 0.031024 92%
revset: heads(-10000:-1)
plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast
0) 0.004184 0.003576 0.003593 0.003628 0.003569 0.004277 0.003590 0.003719 0.004194 0.003659 0.003690
1) 0.003850 92% 0.003267 91% 0.003256 90% 0.003261 89% 0.003204 89% 0.003855 90% 0.003294 91% 0.003164 85% 0.003848 91% 0.003302 90% 0.003296 89%
revset: (-5000:-1000) and heads(-10000:-1)
plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast
0) 0.004730 0.003429 0.003359 0.003391 0.003369 0.004787 0.003418 0.003469 0.004772 0.003445 0.003454
1) 0.004277 90% 0.003430 0.003423 0.003353 0.003340 0.004250 88% 0.003387 0.003385 0.004325 90% 0.003413 0.003373
revset: heads(matching(tip, "author")) and -10000:-1
plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast
0) 8.250275 8.231453 7.508579 8.230028 7.529777 8.358590 7.531636 8.301830 8.137196 8.421402 7.540355
1) 7.474707 90% 7.587345 92% 7.486192 7.548340 91% 7.485288 7.659108 91% 7.485307 7.628890 91% 7.523479 92% 7.558384 89% 7.467524
revset: (-10000:-1) and heads(matching(tip, "author"))
plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast
0) 8.341504 8.315248 7.489414 8.320746 7.548816 8.244137 7.514663 8.281701 8.218862 8.412644 7.456793
1) 7.553704 90% 7.570679 91% 7.391438 7.724237 92% 7.527400 7.570637 91% 7.580622 7.450912 89% 7.556154 91% 7.514726 89% 7.494328
# automv.py
#
# Copyright 2013-2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""check for unrecorded moves at commit time (EXPERIMENTAL)
This extension checks at commit/amend time if any of the committed files
comes from an unrecorded mv.
The threshold at which a file is considered a move can be set with the
``automv.similarity`` config option. This option takes a percentage between 0
(disabled) and 100 (files must be identical), the default is 95.
"""
# Using 95 as a default similarity is based on an analysis of the mercurial
# repositories of the cpython, mozilla-central & mercurial repositories, as
# well as 2 very large facebook repositories. At 95 50% of all potential
# missed moves would be caught, as well as correspond with 87% of all
# explicitly marked moves. Together, 80% of moved files are 95% similar or
# more.
#
# See http://markmail.org/thread/5pxnljesvufvom57 for context.
from __future__ import absolute_import
from mercurial.i18n import _
from mercurial import (
commands,
copies,
error,
extensions,
pycompat,
registrar,
scmutil,
similar
)
configtable = {}
configitem = registrar.configitem(configtable)
configitem('automv', 'similarity',
default=95,
)
def extsetup(ui):
entry = extensions.wrapcommand(
commands.table, 'commit', mvcheck)
entry[1].append(
('', 'no-automv', None,
_('disable automatic file move detection')))
def mvcheck(orig, ui, repo, *pats, **opts):
"""Hook to check for moves at commit time"""
opts = pycompat.byteskwargs(opts)
renames = None
disabled = opts.pop('no_automv', False)
if not disabled:
threshold = ui.configint('automv', 'similarity')
if not 0 <= threshold <= 100:
raise error.Abort(_('automv.similarity must be between 0 and 100'))
if threshold > 0:
match = scmutil.match(repo[None], pats, opts)
added, removed = _interestingfiles(repo, match)
renames = _findrenames(repo, match, added, removed,
threshold / 100.0)
with repo.wlock():
if renames is not None:
scmutil._markchanges(repo, (), (), renames)
return orig(ui, repo, *pats, **pycompat.strkwargs(opts))
def _interestingfiles(repo, matcher):
"""Find what files were added or removed in this commit.
Returns a tuple of two lists: (added, removed). Only files not *already*
marked as moved are included in the added list.
"""
stat = repo.status(match=matcher)
added = stat[1]
removed = stat[2]
copy = copies._forwardcopies(repo['.'], repo[None], matcher)
# remove the copy files for which we already have copy info
added = [f for f in added if f not in copy]
return added, removed
def _findrenames(repo, matcher, added, removed, similarity):
"""Find what files in added are really moved files.
Any file named in removed that is at least similarity% similar to a file
in added is seen as a rename.
"""
renames = {}
if similarity > 0:
for src, dst, score in similar.findrenames(
repo, added, removed, similarity):
if repo.ui.verbose:
repo.ui.status(
_('detected move of %s as %s (%d%% similar)\n') % (
matcher.rel(src), matcher.rel(dst), score * 100))
renames[dst] = src
if renames:
repo.ui.status(_('detected move of %d files\n') % len(renames))
return renames