Mercurial > hg
annotate contrib/undumprevlog @ 50400:95acba2c29f6
encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings
Apparently the code uses "+=" with a bytes object, which is linear-time, so the
whole encoding is quadratic-time. This patch makes us use a bytearray object,
instead, which has a(n amortized-)constant-time append operation.
The encoding is still not particularly fast, but at least a 10MB file
takes tens of seconds, not many hours to encode.
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Mon, 06 Mar 2023 11:27:57 +0000 |
parents | 6000f5b25c9b |
children |
rev | line source |
---|---|
45830
c102b704edb5
global: use python3 in shebangs
Gregory Szorc <gregory.szorc@gmail.com>
parents:
45055
diff
changeset
|
1 #!/usr/bin/env python3 |
6433
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
2 # Undump a dump from dumprevlog |
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
3 # $ hg init |
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
4 # $ undumprevlog < repo.dump |
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
5 |
29167
4f76c0c490b3
py3: make contrib/undumprevlog use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
23310
diff
changeset
|
6 |
6433
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
7 import sys |
46113
59fa3890d40a
node: import symbols explicitly
Joerg Sonnenberger <joerg@bec.de>
parents:
45830
diff
changeset
|
8 from mercurial.node import bin |
29167
4f76c0c490b3
py3: make contrib/undumprevlog use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
23310
diff
changeset
|
9 from mercurial import ( |
39947
a063b84ce064
py3: byteify contrib/dumprevlog
Matt Harbison <matt_harbison@yahoo.com>
parents:
37120
diff
changeset
|
10 encoding, |
29167
4f76c0c490b3
py3: make contrib/undumprevlog use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
23310
diff
changeset
|
11 revlog, |
4f76c0c490b3
py3: make contrib/undumprevlog use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
23310
diff
changeset
|
12 transaction, |
31248
8d3e8c8c9049
vfs: use 'vfs' module directly in 'contrib/undumprevlog'
Pierre-Yves David <pierre-yves.david@ens-lyon.org>
parents:
31216
diff
changeset
|
13 vfs as vfsmod, |
29167
4f76c0c490b3
py3: make contrib/undumprevlog use absolute_import
Pulkit Goyal <7895pulkit@gmail.com>
parents:
23310
diff
changeset
|
14 ) |
43659
99e231afc29c
black: blacken scripts
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39947
diff
changeset
|
15 from mercurial.utils import procutil |
6433
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
16 |
47072
4c041c71ec01
revlog: introduce an explicit tracking of what the revlog is about
Pierre-Yves David <pierre-yves.david@octobus.net>
parents:
46113
diff
changeset
|
17 from mercurial.revlogutils import ( |
4c041c71ec01
revlog: introduce an explicit tracking of what the revlog is about
Pierre-Yves David <pierre-yves.david@octobus.net>
parents:
46113
diff
changeset
|
18 constants as revlog_constants, |
4c041c71ec01
revlog: introduce an explicit tracking of what the revlog is about
Pierre-Yves David <pierre-yves.david@octobus.net>
parents:
46113
diff
changeset
|
19 ) |
4c041c71ec01
revlog: introduce an explicit tracking of what the revlog is about
Pierre-Yves David <pierre-yves.david@octobus.net>
parents:
46113
diff
changeset
|
20 |
6466
9c426da6b03b
contrib: fix binary file issues with dumprevlog on Windows
Adrian Buehlmann <adrian@cadifra.com>
parents:
6433
diff
changeset
|
21 for fp in (sys.stdin, sys.stdout, sys.stderr): |
37120
a8a902d7176e
procutil: bulk-replace function calls to point to new module
Yuya Nishihara <yuya@tcha.org>
parents:
33872
diff
changeset
|
22 procutil.setbinary(fp) |
6466
9c426da6b03b
contrib: fix binary file issues with dumprevlog on Windows
Adrian Buehlmann <adrian@cadifra.com>
parents:
6433
diff
changeset
|
23 |
39947
a063b84ce064
py3: byteify contrib/dumprevlog
Matt Harbison <matt_harbison@yahoo.com>
parents:
37120
diff
changeset
|
24 opener = vfsmod.vfs(b'.', False) |
43659
99e231afc29c
black: blacken scripts
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39947
diff
changeset
|
25 tr = transaction.transaction( |
99e231afc29c
black: blacken scripts
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39947
diff
changeset
|
26 sys.stderr.write, opener, {b'store': opener}, b"undump.journal" |
99e231afc29c
black: blacken scripts
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39947
diff
changeset
|
27 ) |
19022
cba222f01056
tests: run check-code on Python files without .py extension
Mads Kiilerich <madski@unity3d.com>
parents:
14233
diff
changeset
|
28 while True: |
6433
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
29 l = sys.stdin.readline() |
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
30 if not l: |
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
31 break |
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
32 if l.startswith("file:"): |
39947
a063b84ce064
py3: byteify contrib/dumprevlog
Matt Harbison <matt_harbison@yahoo.com>
parents:
37120
diff
changeset
|
33 f = encoding.strtolocal(l[6:-1]) |
47150
8d3c2f9d4af7
revlog: use a "radix" to address revlog
Pierre-Yves David <pierre-yves.david@octobus.net>
parents:
47072
diff
changeset
|
34 assert f.endswith(b'.i') |
47072
4c041c71ec01
revlog: introduce an explicit tracking of what the revlog is about
Pierre-Yves David <pierre-yves.david@octobus.net>
parents:
46113
diff
changeset
|
35 r = revlog.revlog( |
4c041c71ec01
revlog: introduce an explicit tracking of what the revlog is about
Pierre-Yves David <pierre-yves.david@octobus.net>
parents:
46113
diff
changeset
|
36 opener, |
4c041c71ec01
revlog: introduce an explicit tracking of what the revlog is about
Pierre-Yves David <pierre-yves.david@octobus.net>
parents:
46113
diff
changeset
|
37 target=(revlog_constants.KIND_OTHER, b'undump-revlog'), |
47150
8d3c2f9d4af7
revlog: use a "radix" to address revlog
Pierre-Yves David <pierre-yves.david@octobus.net>
parents:
47072
diff
changeset
|
38 radix=f[:-2], |
47072
4c041c71ec01
revlog: introduce an explicit tracking of what the revlog is about
Pierre-Yves David <pierre-yves.david@octobus.net>
parents:
46113
diff
changeset
|
39 ) |
45055
4c1b4805db57
pycompat: change users of pycompat.{stdin,stdout,stderr} to use procutil.std*
Manuel Jacob <me@manueljacob.de>
parents:
43659
diff
changeset
|
40 procutil.stdout.write(b'%s\n' % f) |
6433
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
41 elif l.startswith("node:"): |
46113
59fa3890d40a
node: import symbols explicitly
Joerg Sonnenberger <joerg@bec.de>
parents:
45830
diff
changeset
|
42 n = bin(l[6:-1]) |
6433
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
43 elif l.startswith("linkrev:"): |
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
44 lr = int(l[9:-1]) |
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
45 elif l.startswith("parents:"): |
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
46 p = l[9:-1].split() |
46113
59fa3890d40a
node: import symbols explicitly
Joerg Sonnenberger <joerg@bec.de>
parents:
45830
diff
changeset
|
47 p1 = bin(p[0]) |
59fa3890d40a
node: import symbols explicitly
Joerg Sonnenberger <joerg@bec.de>
parents:
45830
diff
changeset
|
48 p2 = bin(p[1]) |
6433
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
49 elif l.startswith("length:"): |
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
50 length = int(l[8:-1]) |
43659
99e231afc29c
black: blacken scripts
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39947
diff
changeset
|
51 sys.stdin.readline() # start marker |
39947
a063b84ce064
py3: byteify contrib/dumprevlog
Matt Harbison <matt_harbison@yahoo.com>
parents:
37120
diff
changeset
|
52 d = encoding.strtolocal(sys.stdin.read(length)) |
43659
99e231afc29c
black: blacken scripts
Gregory Szorc <gregory.szorc@gmail.com>
parents:
39947
diff
changeset
|
53 sys.stdin.readline() # end marker |
6433
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
54 r.addrevision(d, tr, lr, p1, p2) |
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
55 |
ec5d77eb3431
add simple dump and undump scripts to contrib/
Matt Mackall <mpm@selenic.com>
parents:
diff
changeset
|
56 tr.close() |