Mercurial > hg
view tests/autodiff.py @ 50400:95acba2c29f6
encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings
Apparently the code uses "+=" with a bytes object, which is linear-time, so the
whole encoding is quadratic-time. This patch makes us use a bytearray object,
instead, which has a(n amortized-)constant-time append operation.
The encoding is still not particularly fast, but at least a 10MB file
takes tens of seconds, not many hours to encode.
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Mon, 06 Mar 2023 11:27:57 +0000 |
parents | 6000f5b25c9b |
children |
line wrap: on
line source
# Extension dedicated to test patch.diff() upgrade modes from mercurial import ( error, logcmdutil, patch, pycompat, registrar, scmutil, ) cmdtable = {} command = registrar.command(cmdtable) @command( b'autodiff', [(b'', b'git', b'', b'git upgrade mode (yes/no/auto/warn/abort)')], b'[OPTION]... [FILE]...', ) def autodiff(ui, repo, *pats, **opts): opts = pycompat.byteskwargs(opts) diffopts = patch.difffeatureopts(ui, opts) git = opts.get(b'git', b'no') brokenfiles = set() losedatafn = None if git in (b'yes', b'no'): diffopts.git = git == b'yes' diffopts.upgrade = False elif git == b'auto': diffopts.git = False diffopts.upgrade = True elif git == b'warn': diffopts.git = False diffopts.upgrade = True def losedatafn(fn=None, **kwargs): brokenfiles.add(fn) return True elif git == b'abort': diffopts.git = False diffopts.upgrade = True def losedatafn(fn=None, **kwargs): raise error.Abort(b'losing data for %s' % fn) else: raise error.Abort(b'--git must be yes, no or auto') ctx1, ctx2 = logcmdutil.revpair(repo, []) m = scmutil.match(ctx2, pats, opts) it = patch.diff( repo, ctx1.node(), ctx2.node(), match=m, opts=diffopts, losedatafn=losedatafn, ) for chunk in it: ui.write(chunk) for fn in sorted(brokenfiles): ui.write((b'data lost for: %s\n' % fn))