Mercurial > hg
view mercurial/similar.py @ 49836:3d7bf111f01e stable
packaging: add dependencies to the PyOxidizer build on macOS
Otherwise, we get a bunch of test failures for missing things like pygments, or
tests skipped entirely. The input file is a copy/paste from the equivalent
Windows file, but with dulwich, pygit2, and pytest-vcr commented out because
the build process errors out with them, flagging them as incompatible with
loading from memory. I have no idea if that's actually true or not, because
I've noticed that if I don't `make clean` after every build, the next build
flags the watchman stuff as incompatible with loading from memory.
The remaining failures are:
Failed test-alias.t: output changed
Failed test-basic.t: output changed
Failed test-check-help.t: output changed
Failed test-commit-interactive.t: output changed
Failed test-extension.t: output changed
Failed test-help.t: output changed
Failed test-i18n.t: output changed
Failed test-log.t: output changed
Failed test-qrecord.t: output changed
Failed test-share-safe.t: output changed
Most of the issues seem related to loading help for disabled extensions from
`hgext.__index__`, namely the full extension help being unavailable, not being
able to resolve what commands are provided by what extension, and not having the
command level help available.
test-log.t, test-commit-interactive.t, and test-i18n.t look like i18n (or lack
thereof) issues.
test-basic.t is just odd:
@@ -55,7 +55,7 @@
On Python 3, stdio may be None:
$ hg debuguiprompt --config ui.interactive=true 0<&-
- abort: Bad file descriptor (no-rhg !)
+ abort: response expected
abort: response expected (rhg !)
[255]
$ hg version -q 0<&-
author | Matt Harbison <matt_harbison@yahoo.com> |
---|---|
date | Tue, 06 Dec 2022 17:12:59 -0500 |
parents | f254fc73d956 |
children | 493034cc3265 |
line wrap: on
line source
# similar.py - mechanisms for finding similar files # # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from .i18n import _ from . import ( mdiff, ) def _findexactmatches(repo, added, removed): """find renamed files that have no changes Takes a list of new filectxs and a list of removed filectxs, and yields (before, after) tuples of exact matches. """ # Build table of removed files: {hash(fctx.data()): [fctx, ...]}. # We use hash() to discard fctx.data() from memory. hashes = {} progress = repo.ui.makeprogress( _(b'searching for exact renames'), total=(len(added) + len(removed)), unit=_(b'files'), ) for fctx in removed: progress.increment() h = hash(fctx.data()) if h not in hashes: hashes[h] = [fctx] else: hashes[h].append(fctx) # For each added file, see if it corresponds to a removed file. for fctx in added: progress.increment() adata = fctx.data() h = hash(adata) for rfctx in hashes.get(h, []): # compare between actual file contents for exact identity if adata == rfctx.data(): yield (rfctx, fctx) break # Done progress.complete() def _ctxdata(fctx): # lazily load text orig = fctx.data() return orig, mdiff.splitnewlines(orig) def _score(fctx, otherdata): orig, lines = otherdata text = fctx.data() # mdiff.blocks() returns blocks of matching lines # count the number of bytes in each equal = 0 matches = mdiff.blocks(text, orig) for x1, x2, y1, y2 in matches: for line in lines[y1:y2]: equal += len(line) lengths = len(text) + len(orig) return equal * 2.0 / lengths def score(fctx1, fctx2): return _score(fctx1, _ctxdata(fctx2)) def _findsimilarmatches(repo, added, removed, threshold): """find potentially renamed files based on similar file content Takes a list of new filectxs and a list of removed filectxs, and yields (before, after, score) tuples of partial matches. """ copies = {} progress = repo.ui.makeprogress( _(b'searching for similar files'), unit=_(b'files'), total=len(removed) ) for r in removed: progress.increment() data = None for a in added: bestscore = copies.get(a, (None, threshold))[1] if data is None: data = _ctxdata(r) myscore = _score(a, data) if myscore > bestscore: copies[a] = (r, myscore) progress.complete() for dest, v in copies.items(): source, bscore = v yield source, dest, bscore def _dropempty(fctxs): return [x for x in fctxs if x.size() > 0] def findrenames(repo, added, removed, threshold): '''find renamed files -- yields (before, after, score) tuples''' wctx = repo[None] pctx = wctx.p1() # Zero length files will be frequently unrelated to each other, and # tracking the deletion/addition of such a file will probably cause more # harm than good. We strip them out here to avoid matching them later on. addedfiles = _dropempty(wctx[fp] for fp in sorted(added)) removedfiles = _dropempty(pctx[fp] for fp in sorted(removed) if fp in pctx) # Find exact matches. matchedfiles = set() for (a, b) in _findexactmatches(repo, addedfiles, removedfiles): matchedfiles.add(b) yield (a.path(), b.path(), 1.0) # If the user requested similar files to be matched, search for them also. if threshold < 1.0: addedfiles = [x for x in addedfiles if x not in matchedfiles] for (a, b, score) in _findsimilarmatches( repo, addedfiles, removedfiles, threshold ): yield (a.path(), b.path(), score)