copies: do full filtering at end of _changesetforwardcopies()
As mentioned earlier, pathcopies() is very slow when copies are stored
in the changeset. Most of the cost comes from calling _chain() for
every changeset, which is slow because it needs to read manifests. It
needs to read manifests to be able to filter out copies that are were
created in one commit and then deleted. (It also filters out copies
that were created from a file that didn't exist in the starting
revision, but that's a fixed revision across calls to _chain(), so
it's much cheaper.)
This patch changes from _chainandfilter() to just _chain() in the main
loop in _changesetforwardcopies(). It instead removes copies that have
subsequently been removed by using ctx.filesremoved(). We thus rely on
that to be fast.
It timed this command in mozilla-unified:
hg debugpathcopies FIREFOX_59_0b3_BUILD2 FIREFOX_BETA_59_END
It took 18s before and 1.1s after. It's still faster when copy
information is stored in filelogs: 0.70s. It also still gets slow when
there are merge commits involved, because we read manifests there
too. We'll deal with that later.
Differential Revision: https://phab.mercurial-scm.org/D6419
# coding=UTF-8
from __future__ import absolute_import
import base64
import zlib
from mercurial import (
changegroup,
exchange,
extensions,
revlog,
util,
)
# Test only: These flags are defined here only in the context of testing the
# behavior of the flag processor. The canonical way to add flags is to get in
# touch with the community and make them known in revlog.
REVIDX_NOOP = (1 << 3)
REVIDX_BASE64 = (1 << 2)
REVIDX_GZIP = (1 << 1)
REVIDX_FAIL = 1
def validatehash(self, text):
return True
def bypass(self, text):
return False
def noopdonothing(self, text):
return (text, True)
def b64encode(self, text):
return (base64.b64encode(text), False)
def b64decode(self, text):
return (base64.b64decode(text), True)
def gzipcompress(self, text):
return (zlib.compress(text), False)
def gzipdecompress(self, text):
return (zlib.decompress(text), True)
def supportedoutgoingversions(orig, repo):
versions = orig(repo)
versions.discard(b'01')
versions.discard(b'02')
versions.add(b'03')
return versions
def allsupportedversions(orig, ui):
versions = orig(ui)
versions.add(b'03')
return versions
def makewrappedfile(obj):
class wrappedfile(obj.__class__):
def addrevision(self, text, transaction, link, p1, p2,
cachedelta=None, node=None,
flags=revlog.REVIDX_DEFAULT_FLAGS):
if b'[NOOP]' in text:
flags |= REVIDX_NOOP
if b'[BASE64]' in text:
flags |= REVIDX_BASE64
if b'[GZIP]' in text:
flags |= REVIDX_GZIP
# This addrevision wrapper is meant to add a flag we will not have
# transforms registered for, ensuring we handle this error case.
if b'[FAIL]' in text:
flags |= REVIDX_FAIL
return super(wrappedfile, self).addrevision(text, transaction, link,
p1, p2,
cachedelta=cachedelta,
node=node,
flags=flags)
obj.__class__ = wrappedfile
def reposetup(ui, repo):
class wrappingflagprocessorrepo(repo.__class__):
def file(self, f):
orig = super(wrappingflagprocessorrepo, self).file(f)
makewrappedfile(orig)
return orig
repo.__class__ = wrappingflagprocessorrepo
def extsetup(ui):
# Enable changegroup3 for flags to be sent over the wire
wrapfunction = extensions.wrapfunction
wrapfunction(changegroup,
'supportedoutgoingversions',
supportedoutgoingversions)
wrapfunction(changegroup,
'allsupportedversions',
allsupportedversions)
# Teach revlog about our test flags
flags = [REVIDX_NOOP, REVIDX_BASE64, REVIDX_GZIP, REVIDX_FAIL]
revlog.REVIDX_KNOWN_FLAGS |= util.bitsfrom(flags)
revlog.REVIDX_FLAGS_ORDER.extend(flags)
# Teach exchange to use changegroup 3
for k in exchange._bundlespeccontentopts.keys():
exchange._bundlespeccontentopts[k][b"cg.version"] = b"03"
# Register flag processors for each extension
revlog.addflagprocessor(
REVIDX_NOOP,
(
noopdonothing,
noopdonothing,
validatehash,
)
)
revlog.addflagprocessor(
REVIDX_BASE64,
(
b64decode,
b64encode,
bypass,
),
)
revlog.addflagprocessor(
REVIDX_GZIP,
(
gzipdecompress,
gzipcompress,
bypass
)
)