Mercurial > hg
annotate mercurial/similar.py @ 52164:e01e84e5e426
rust-revlog: add a Rust-only `InnerRevlog`
This mirrors the Python `InnerRevlog` and will be used in a future patch
to replace said Python implementation. This allows us to start doing more
things in pure Rust, in particular reading and writing operations.
A lot of changes have to be introduced all at once, it wouldn't be very
useful to separate this patch IMO since all of them are either interlocked
or only useful with the rest.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Thu, 10 Oct 2024 10:34:51 +0200 |
parents | f4733654f144 |
children |
rev | line source |
---|---|
11059
ef4aa90b1e58
Move 'findrenames' code into its own file.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
diff
changeset
|
1 # similar.py - mechanisms for finding similar files |
ef4aa90b1e58
Move 'findrenames' code into its own file.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
diff
changeset
|
2 # |
46819
d4ba4d51f85f
contributor: change mentions of mpm to olivia
Raphaël Gomès <rgomes@octobus.net>
parents:
45942
diff
changeset
|
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> |
11059
ef4aa90b1e58
Move 'findrenames' code into its own file.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
diff
changeset
|
4 # |
ef4aa90b1e58
Move 'findrenames' code into its own file.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
diff
changeset
|
5 # This software may be used and distributed according to the terms of the |
ef4aa90b1e58
Move 'findrenames' code into its own file.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
diff
changeset
|
6 # GNU General Public License version 2 or any later version. |
ef4aa90b1e58
Move 'findrenames' code into its own file.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
diff
changeset
|
7 |
51863
f4733654f144
typing: add `from __future__ import annotations` to most files
Matt Harbison <matt_harbison@yahoo.com>
parents:
51703
diff
changeset
|
8 from __future__ import annotations |
27359
a56c47ed3885
similar: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents:
16683
diff
changeset
|
9 |
a56c47ed3885
similar: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents:
16683
diff
changeset
|
10 from .i18n import _ |
43106
d783f945a701
py3: finish porting iteritems() to pycompat and remove source transformer
Gregory Szorc <gregory.szorc@gmail.com>
parents:
43077
diff
changeset
|
11 from . import ( |
d783f945a701
py3: finish porting iteritems() to pycompat and remove source transformer
Gregory Szorc <gregory.szorc@gmail.com>
parents:
43077
diff
changeset
|
12 mdiff, |
d783f945a701
py3: finish porting iteritems() to pycompat and remove source transformer
Gregory Szorc <gregory.szorc@gmail.com>
parents:
43077
diff
changeset
|
13 ) |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
38395
diff
changeset
|
14 |
11059
ef4aa90b1e58
Move 'findrenames' code into its own file.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
diff
changeset
|
15 |
11060
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
16 def _findexactmatches(repo, added, removed): |
45942
89a2afe31e82
formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents:
43106
diff
changeset
|
17 """find renamed files that have no changes |
11060
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
18 |
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
19 Takes a list of new filectxs and a list of removed filectxs, and yields |
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
20 (before, after) tuples of exact matches. |
45942
89a2afe31e82
formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents:
43106
diff
changeset
|
21 """ |
31584
985a98c6bad0
similar: use cheaper hash() function to test exact matches
Yuya Nishihara <yuya@tcha.org>
parents:
31583
diff
changeset
|
22 # Build table of removed files: {hash(fctx.data()): [fctx, ...]}. |
985a98c6bad0
similar: use cheaper hash() function to test exact matches
Yuya Nishihara <yuya@tcha.org>
parents:
31583
diff
changeset
|
23 # We use hash() to discard fctx.data() from memory. |
11060
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
24 hashes = {} |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
38395
diff
changeset
|
25 progress = repo.ui.makeprogress( |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
26 _(b'searching for exact renames'), |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
38395
diff
changeset
|
27 total=(len(added) + len(removed)), |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
28 unit=_(b'files'), |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
38395
diff
changeset
|
29 ) |
38348
cd196be26cb7
similar: use progress helper
Martin von Zweigbergk <martinvonz@google.com>
parents:
32202
diff
changeset
|
30 for fctx in removed: |
cd196be26cb7
similar: use progress helper
Martin von Zweigbergk <martinvonz@google.com>
parents:
32202
diff
changeset
|
31 progress.increment() |
31584
985a98c6bad0
similar: use cheaper hash() function to test exact matches
Yuya Nishihara <yuya@tcha.org>
parents:
31583
diff
changeset
|
32 h = hash(fctx.data()) |
985a98c6bad0
similar: use cheaper hash() function to test exact matches
Yuya Nishihara <yuya@tcha.org>
parents:
31583
diff
changeset
|
33 if h not in hashes: |
985a98c6bad0
similar: use cheaper hash() function to test exact matches
Yuya Nishihara <yuya@tcha.org>
parents:
31583
diff
changeset
|
34 hashes[h] = [fctx] |
985a98c6bad0
similar: use cheaper hash() function to test exact matches
Yuya Nishihara <yuya@tcha.org>
parents:
31583
diff
changeset
|
35 else: |
985a98c6bad0
similar: use cheaper hash() function to test exact matches
Yuya Nishihara <yuya@tcha.org>
parents:
31583
diff
changeset
|
36 hashes[h].append(fctx) |
11060
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
37 |
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
38 # For each added file, see if it corresponds to a removed file. |
38348
cd196be26cb7
similar: use progress helper
Martin von Zweigbergk <martinvonz@google.com>
parents:
32202
diff
changeset
|
39 for fctx in added: |
cd196be26cb7
similar: use progress helper
Martin von Zweigbergk <martinvonz@google.com>
parents:
32202
diff
changeset
|
40 progress.increment() |
31210
e1d035905b2e
similar: compare between actual file contents for exact identity
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
30809
diff
changeset
|
41 adata = fctx.data() |
31584
985a98c6bad0
similar: use cheaper hash() function to test exact matches
Yuya Nishihara <yuya@tcha.org>
parents:
31583
diff
changeset
|
42 h = hash(adata) |
985a98c6bad0
similar: use cheaper hash() function to test exact matches
Yuya Nishihara <yuya@tcha.org>
parents:
31583
diff
changeset
|
43 for rfctx in hashes.get(h, []): |
31210
e1d035905b2e
similar: compare between actual file contents for exact identity
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
30809
diff
changeset
|
44 # compare between actual file contents for exact identity |
e1d035905b2e
similar: compare between actual file contents for exact identity
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
30809
diff
changeset
|
45 if adata == rfctx.data(): |
e1d035905b2e
similar: compare between actual file contents for exact identity
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
30809
diff
changeset
|
46 yield (rfctx, fctx) |
31584
985a98c6bad0
similar: use cheaper hash() function to test exact matches
Yuya Nishihara <yuya@tcha.org>
parents:
31583
diff
changeset
|
47 break |
11060
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
48 |
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
49 # Done |
38373
ef692614e601
progress: hide update(None) in a new complete() method
Martin von Zweigbergk <martinvonz@google.com>
parents:
38348
diff
changeset
|
50 progress.complete() |
11060
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
51 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
38395
diff
changeset
|
52 |
30805
0ae287eb6a4f
similar: move score function to module level
Sean Farley <sean@farley.io>
parents:
30791
diff
changeset
|
53 def _ctxdata(fctx): |
0ae287eb6a4f
similar: move score function to module level
Sean Farley <sean@farley.io>
parents:
30791
diff
changeset
|
54 # lazily load text |
0ae287eb6a4f
similar: move score function to module level
Sean Farley <sean@farley.io>
parents:
30791
diff
changeset
|
55 orig = fctx.data() |
0ae287eb6a4f
similar: move score function to module level
Sean Farley <sean@farley.io>
parents:
30791
diff
changeset
|
56 return orig, mdiff.splitnewlines(orig) |
0ae287eb6a4f
similar: move score function to module level
Sean Farley <sean@farley.io>
parents:
30791
diff
changeset
|
57 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
38395
diff
changeset
|
58 |
30809
8614546154cb
similar: remove caching from the module level
Pierre-Yves David <pierre-yves.david@ens-lyon.org>
parents:
30805
diff
changeset
|
59 def _score(fctx, otherdata): |
8614546154cb
similar: remove caching from the module level
Pierre-Yves David <pierre-yves.david@ens-lyon.org>
parents:
30805
diff
changeset
|
60 orig, lines = otherdata |
8614546154cb
similar: remove caching from the module level
Pierre-Yves David <pierre-yves.david@ens-lyon.org>
parents:
30805
diff
changeset
|
61 text = fctx.data() |
32202
ded48ad55146
bdiff: proxy through mdiff module
Yuya Nishihara <yuya@tcha.org>
parents:
31584
diff
changeset
|
62 # mdiff.blocks() returns blocks of matching lines |
30805
0ae287eb6a4f
similar: move score function to module level
Sean Farley <sean@farley.io>
parents:
30791
diff
changeset
|
63 # count the number of bytes in each |
0ae287eb6a4f
similar: move score function to module level
Sean Farley <sean@farley.io>
parents:
30791
diff
changeset
|
64 equal = 0 |
32202
ded48ad55146
bdiff: proxy through mdiff module
Yuya Nishihara <yuya@tcha.org>
parents:
31584
diff
changeset
|
65 matches = mdiff.blocks(text, orig) |
30805
0ae287eb6a4f
similar: move score function to module level
Sean Farley <sean@farley.io>
parents:
30791
diff
changeset
|
66 for x1, x2, y1, y2 in matches: |
0ae287eb6a4f
similar: move score function to module level
Sean Farley <sean@farley.io>
parents:
30791
diff
changeset
|
67 for line in lines[y1:y2]: |
0ae287eb6a4f
similar: move score function to module level
Sean Farley <sean@farley.io>
parents:
30791
diff
changeset
|
68 equal += len(line) |
0ae287eb6a4f
similar: move score function to module level
Sean Farley <sean@farley.io>
parents:
30791
diff
changeset
|
69 |
0ae287eb6a4f
similar: move score function to module level
Sean Farley <sean@farley.io>
parents:
30791
diff
changeset
|
70 lengths = len(text) + len(orig) |
0ae287eb6a4f
similar: move score function to module level
Sean Farley <sean@farley.io>
parents:
30791
diff
changeset
|
71 return equal * 2.0 / lengths |
0ae287eb6a4f
similar: move score function to module level
Sean Farley <sean@farley.io>
parents:
30791
diff
changeset
|
72 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
38395
diff
changeset
|
73 |
30809
8614546154cb
similar: remove caching from the module level
Pierre-Yves David <pierre-yves.david@ens-lyon.org>
parents:
30805
diff
changeset
|
74 def score(fctx1, fctx2): |
8614546154cb
similar: remove caching from the module level
Pierre-Yves David <pierre-yves.david@ens-lyon.org>
parents:
30805
diff
changeset
|
75 return _score(fctx1, _ctxdata(fctx2)) |
8614546154cb
similar: remove caching from the module level
Pierre-Yves David <pierre-yves.david@ens-lyon.org>
parents:
30805
diff
changeset
|
76 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
38395
diff
changeset
|
77 |
11060
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
78 def _findsimilarmatches(repo, added, removed, threshold): |
45942
89a2afe31e82
formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents:
43106
diff
changeset
|
79 """find potentially renamed files based on similar file content |
11060
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
80 |
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
81 Takes a list of new filectxs and a list of removed filectxs, and yields |
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
82 (before, after, score) tuples of partial matches. |
45942
89a2afe31e82
formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents:
43106
diff
changeset
|
83 """ |
11059
ef4aa90b1e58
Move 'findrenames' code into its own file.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
diff
changeset
|
84 copies = {} |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
38395
diff
changeset
|
85 progress = repo.ui.makeprogress( |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
86 _(b'searching for similar files'), unit=_(b'files'), total=len(removed) |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
38395
diff
changeset
|
87 ) |
38395
59c9d3cc810f
similar: use progress helper
Martin von Zweigbergk <martinvonz@google.com>
parents:
38373
diff
changeset
|
88 for r in removed: |
59c9d3cc810f
similar: use progress helper
Martin von Zweigbergk <martinvonz@google.com>
parents:
38373
diff
changeset
|
89 progress.increment() |
30809
8614546154cb
similar: remove caching from the module level
Pierre-Yves David <pierre-yves.david@ens-lyon.org>
parents:
30805
diff
changeset
|
90 data = None |
11059
ef4aa90b1e58
Move 'findrenames' code into its own file.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
diff
changeset
|
91 for a in added: |
ef4aa90b1e58
Move 'findrenames' code into its own file.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
diff
changeset
|
92 bestscore = copies.get(a, (None, threshold))[1] |
30809
8614546154cb
similar: remove caching from the module level
Pierre-Yves David <pierre-yves.david@ens-lyon.org>
parents:
30805
diff
changeset
|
93 if data is None: |
8614546154cb
similar: remove caching from the module level
Pierre-Yves David <pierre-yves.david@ens-lyon.org>
parents:
30805
diff
changeset
|
94 data = _ctxdata(r) |
8614546154cb
similar: remove caching from the module level
Pierre-Yves David <pierre-yves.david@ens-lyon.org>
parents:
30805
diff
changeset
|
95 myscore = _score(a, data) |
31583
2efd9771323e
similar: take the first match instead of the last
Yuya Nishihara <yuya@tcha.org>
parents:
31582
diff
changeset
|
96 if myscore > bestscore: |
11059
ef4aa90b1e58
Move 'findrenames' code into its own file.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
diff
changeset
|
97 copies[a] = (r, myscore) |
38395
59c9d3cc810f
similar: use progress helper
Martin von Zweigbergk <martinvonz@google.com>
parents:
38373
diff
changeset
|
98 progress.complete() |
11059
ef4aa90b1e58
Move 'findrenames' code into its own file.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
diff
changeset
|
99 |
48913
f254fc73d956
global: bulk replace simple pycompat.iteritems(x) with x.items()
Gregory Szorc <gregory.szorc@gmail.com>
parents:
48875
diff
changeset
|
100 for dest, v in copies.items(): |
30791
ada160a8cfd8
similar: rename local variable to not collide with previous
Sean Farley <sean@farley.io>
parents:
29341
diff
changeset
|
101 source, bscore = v |
ada160a8cfd8
similar: rename local variable to not collide with previous
Sean Farley <sean@farley.io>
parents:
29341
diff
changeset
|
102 yield source, dest, bscore |
11059
ef4aa90b1e58
Move 'findrenames' code into its own file.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
diff
changeset
|
103 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
38395
diff
changeset
|
104 |
31582
2e254165a37c
similar: do not look up and create filectx more than once
Yuya Nishihara <yuya@tcha.org>
parents:
31581
diff
changeset
|
105 def _dropempty(fctxs): |
2e254165a37c
similar: do not look up and create filectx more than once
Yuya Nishihara <yuya@tcha.org>
parents:
31581
diff
changeset
|
106 return [x for x in fctxs if x.size() > 0] |
2e254165a37c
similar: do not look up and create filectx more than once
Yuya Nishihara <yuya@tcha.org>
parents:
31581
diff
changeset
|
107 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
38395
diff
changeset
|
108 |
11060
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
109 def findrenames(repo, added, removed, threshold): |
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
110 '''find renamed files -- yields (before, after, score) tuples''' |
31581
b1528d195a13
similar: use common names for changectx variables
Yuya Nishihara <yuya@tcha.org>
parents:
31580
diff
changeset
|
111 wctx = repo[None] |
b1528d195a13
similar: use common names for changectx variables
Yuya Nishihara <yuya@tcha.org>
parents:
31580
diff
changeset
|
112 pctx = wctx.p1() |
11059
ef4aa90b1e58
Move 'findrenames' code into its own file.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
diff
changeset
|
113 |
11060
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
114 # Zero length files will be frequently unrelated to each other, and |
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
115 # tracking the deletion/addition of such a file will probably cause more |
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
116 # harm than good. We strip them out here to avoid matching them later on. |
31582
2e254165a37c
similar: do not look up and create filectx more than once
Yuya Nishihara <yuya@tcha.org>
parents:
31581
diff
changeset
|
117 addedfiles = _dropempty(wctx[fp] for fp in sorted(added)) |
2e254165a37c
similar: do not look up and create filectx more than once
Yuya Nishihara <yuya@tcha.org>
parents:
31581
diff
changeset
|
118 removedfiles = _dropempty(pctx[fp] for fp in sorted(removed) if fp in pctx) |
11060
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
119 |
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
120 # Find exact matches. |
31580
d3e2af4e0128
similar: get rid of quadratic addedfiles.remove()
Yuya Nishihara <yuya@tcha.org>
parents:
31579
diff
changeset
|
121 matchedfiles = set() |
51703
ca7bde5dbafb
black: format the codebase with 23.3.0
Raphaël Gomès <rgomes@octobus.net>
parents:
51700
diff
changeset
|
122 for a, b in _findexactmatches(repo, addedfiles, removedfiles): |
31580
d3e2af4e0128
similar: get rid of quadratic addedfiles.remove()
Yuya Nishihara <yuya@tcha.org>
parents:
31579
diff
changeset
|
123 matchedfiles.add(b) |
11060
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
124 yield (a.path(), b.path(), 1.0) |
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
125 |
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
126 # If the user requested similar files to be matched, search for them also. |
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
127 if threshold < 1.0: |
31580
d3e2af4e0128
similar: get rid of quadratic addedfiles.remove()
Yuya Nishihara <yuya@tcha.org>
parents:
31579
diff
changeset
|
128 addedfiles = [x for x in addedfiles if x not in matchedfiles] |
51703
ca7bde5dbafb
black: format the codebase with 23.3.0
Raphaël Gomès <rgomes@octobus.net>
parents:
51700
diff
changeset
|
129 for a, b, score in _findsimilarmatches( |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
38395
diff
changeset
|
130 repo, addedfiles, removedfiles, threshold |
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
38395
diff
changeset
|
131 ): |
11060
e6df01776e08
findrenames: Optimise "addremove -s100" by matching files by their SHA1 hashes.
David Greenaway <hg-dev@davidgreenaway.com>
parents:
11059
diff
changeset
|
132 yield (a.path(), b.path(), score) |