# HG changeset patch # User Siddharth Agarwal # Date 1365132149 25200 # Node ID 3cfaace0441e936826a4a0d4ae12d545f11a903d # Parent 2e9fe9e2671fee0a564ca77e2f3656248163cb51 copies._forwardcopies: use set operations to find missing files This is a performance win for a number of reasons: - We don't iterate over contexts, which avoids a completely unnecessary sorted call + the O(number of files) abstraction cost of doing that. - We don't check membership in a context, which avoids another O(number of files) abstraction cost. - We iterate over the manifests in C instead of Python. For a large repo with 170,000 files, this improves perfpathcopies from 0.34 seconds to 0.07. Anything that uses pathcopies, such as rebase or diff --git between two revisions, benefits. diff -r 2e9fe9e2671f -r 3cfaace0441e mercurial/copies.py --- a/mercurial/copies.py Thu Apr 04 20:36:46 2013 -0700 +++ b/mercurial/copies.py Thu Apr 04 20:22:29 2013 -0700 @@ -133,11 +133,13 @@ # we currently don't try to find where old files went, too expensive # this means we can miss a case like 'hg rm b; hg cp a b' cm = {} - for f in b: - if f not in a: - ofctx = _tracefile(b[f], a) - if ofctx: - cm[f] = ofctx.path() + missing = set(b.manifest().iterkeys()) + missing.difference_update(a.manifest().iterkeys()) + + for f in missing: + ofctx = _tracefile(b[f], a) + if ofctx: + cm[f] = ofctx.path() # combine copies from dirstate if necessary if w is not None: