Mercurial > hg
annotate mercurial/filelog.py @ 16955:92e1c64ba0d4
parsers: add a C function to pack the dirstate
This is about 9 times faster than the Python dirstate packing code.
The relatively small speedup is due to the poor locality and memory
access patterns caused by traversing dicts and other boxed Python
values.
author | Bryan O'Sullivan <bryano@fb.com> |
---|---|
date | Wed, 30 May 2012 12:55:33 -0700 |
parents | 7c231754a621 |
children | 3bda242bf244 |
rev | line source |
---|---|
1089 | 1 # filelog.py - file history class for mercurial |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
2 # |
4635
63b9d2deed48
Updated copyright notices and add "and others" to "hg version"
Thomas Arendsen Hein <thomas@intevation.de>
parents:
4258
diff
changeset
|
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com> |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
4 # |
8225
46293a0c7e9f
updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents:
7634
diff
changeset
|
5 # This software may be used and distributed according to the terms of the |
10263 | 6 # GNU General Public License version 2 or any later version. |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
7 |
7634
14a4337a9b9b
revlog: kill from-style imports
Matt Mackall <mpm@selenic.com>
parents:
7622
diff
changeset
|
8 import revlog |
14074
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
9 import re |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
10 |
14074
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
11 _mdre = re.compile('\1\n') |
13240
e5060aa22043
filelog: move metadata parsing to a helper function
Matt Mackall <mpm@selenic.com>
parents:
11541
diff
changeset
|
12 def _parsemeta(text): |
14074
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
13 """return (metadatadict, keylist, metadatasize)""" |
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
14 # text can be buffer, so we can't use .startswith or .index |
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
15 if text[:2] != '\1\n': |
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
16 return None, None, None |
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
17 s = _mdre.search(text, 2).start() |
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
18 mtext = text[2:s] |
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
19 meta = {} |
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
20 keys = [] |
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
21 for l in mtext.splitlines(): |
13240
e5060aa22043
filelog: move metadata parsing to a helper function
Matt Mackall <mpm@selenic.com>
parents:
11541
diff
changeset
|
22 k, v = l.split(": ", 1) |
14074
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
23 meta[k] = v |
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
24 keys.append(k) |
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
25 return meta, keys, (s + 2) |
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
26 |
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
27 def _packmeta(meta, keys=None): |
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
28 if not keys: |
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
29 keys = sorted(meta.iterkeys()) |
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
30 return "".join("%s: %s\n" % (k, meta[k]) for k in keys) |
13240
e5060aa22043
filelog: move metadata parsing to a helper function
Matt Mackall <mpm@selenic.com>
parents:
11541
diff
changeset
|
31 |
7634
14a4337a9b9b
revlog: kill from-style imports
Matt Mackall <mpm@selenic.com>
parents:
7622
diff
changeset
|
32 class filelog(revlog.revlog): |
4258
b11a2fb59cf5
revlog: simplify revlog version handling
Matt Mackall <mpm@selenic.com>
parents:
4257
diff
changeset
|
33 def __init__(self, opener, path): |
7634
14a4337a9b9b
revlog: kill from-style imports
Matt Mackall <mpm@selenic.com>
parents:
7622
diff
changeset
|
34 revlog.revlog.__init__(self, opener, |
8531
810387f59696
filelog encoding: move the encoding/decoding into store
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
8225
diff
changeset
|
35 "/".join(("data", path + ".i"))) |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
36 |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
37 def read(self, node): |
360 | 38 t = self.revision(node) |
686
d7d68d27ebe5
Reapply startswith() changes that got lost with stale edit
Matt Mackall <mpm@selenic.com>
parents:
681
diff
changeset
|
39 if not t.startswith('\1\n'): |
360 | 40 return t |
2579
0875cda033fd
use __contains__, index or split instead of str.find
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
2470
diff
changeset
|
41 s = t.index('\1\n', 2) |
10282
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
10263
diff
changeset
|
42 return t[s + 2:] |
360 | 43 |
44 def add(self, text, meta, transaction, link, p1=None, p2=None): | |
686
d7d68d27ebe5
Reapply startswith() changes that got lost with stale edit
Matt Mackall <mpm@selenic.com>
parents:
681
diff
changeset
|
45 if meta or text.startswith('\1\n'): |
14074
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
46 text = "\1\n%s\1\n%s" % (_packmeta(meta), text) |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
47 return self.addrevision(text, transaction, link, p1, p2) |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
48 |
1116 | 49 def renamed(self, node): |
7634
14a4337a9b9b
revlog: kill from-style imports
Matt Mackall <mpm@selenic.com>
parents:
7622
diff
changeset
|
50 if self.parents(node)[0] != revlog.nullid: |
1116 | 51 return False |
13240
e5060aa22043
filelog: move metadata parsing to a helper function
Matt Mackall <mpm@selenic.com>
parents:
11541
diff
changeset
|
52 t = self.revision(node) |
14074
e8271159c8c2
filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents:
13240
diff
changeset
|
53 m = _parsemeta(t)[0] |
5915
d0576d065993
Prefer i in d over d.has_key(i)
Christian Ebert <blacktrash@gmx.net>
parents:
4635
diff
changeset
|
54 if m and "copy" in m: |
7634
14a4337a9b9b
revlog: kill from-style imports
Matt Mackall <mpm@selenic.com>
parents:
7622
diff
changeset
|
55 return (m["copy"], revlog.bin(m["copyrev"])) |
1116 | 56 return False |
57 | |
2898
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
58 def size(self, rev): |
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
59 """return the size of a given revision""" |
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
60 |
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
61 # for revisions with renames, we have to go the slow way |
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
62 node = self.node(rev) |
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
63 if self.renamed(node): |
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
64 return len(self.read(node)) |
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
65 |
11540
2370e270a29a
filelog: test behaviour for data starting with "\1\n"
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11539
diff
changeset
|
66 # XXX if self.read(node).startswith("\1\n"), this returns (size+4) |
7634
14a4337a9b9b
revlog: kill from-style imports
Matt Mackall <mpm@selenic.com>
parents:
7622
diff
changeset
|
67 return revlog.revlog.size(self, rev) |
2898
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
68 |
2887
05257fd28591
filelog: add hash-based comparisons
Matt Mackall <mpm@selenic.com>
parents:
2859
diff
changeset
|
69 def cmp(self, node, text): |
11539
a463e3c50212
cmp: document the fact that we return True if content is different
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
10706
diff
changeset
|
70 """compare text with a given file revision |
a463e3c50212
cmp: document the fact that we return True if content is different
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
10706
diff
changeset
|
71 |
a463e3c50212
cmp: document the fact that we return True if content is different
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
10706
diff
changeset
|
72 returns True if text is different than what is stored. |
a463e3c50212
cmp: document the fact that we return True if content is different
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
10706
diff
changeset
|
73 """ |
2887
05257fd28591
filelog: add hash-based comparisons
Matt Mackall <mpm@selenic.com>
parents:
2859
diff
changeset
|
74 |
11541
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
75 t = text |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
76 if text.startswith('\1\n'): |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
77 t = '\1\n\1\n' + text |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
78 |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
79 samehashes = not revlog.revlog.cmp(self, node, t) |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
80 if samehashes: |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
81 return False |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
82 |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
83 # renaming a file produces a different hash, even if the data |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
84 # remains unchanged. Check if it's the case (slow): |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
85 if self.renamed(node): |
2887
05257fd28591
filelog: add hash-based comparisons
Matt Mackall <mpm@selenic.com>
parents:
2859
diff
changeset
|
86 t2 = self.read(node) |
2895
21631c2c09a5
filelog.cmp: return 0 for equality
Matt Mackall <mpm@selenic.com>
parents:
2890
diff
changeset
|
87 return t2 != text |
2887
05257fd28591
filelog: add hash-based comparisons
Matt Mackall <mpm@selenic.com>
parents:
2859
diff
changeset
|
88 |
11541
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
89 return True |
14287
7c231754a621
filelog: add file function to open other filelogs
Sune Foldager <cryo@cyanite.org>
parents:
14074
diff
changeset
|
90 |
7c231754a621
filelog: add file function to open other filelogs
Sune Foldager <cryo@cyanite.org>
parents:
14074
diff
changeset
|
91 def _file(self, f): |
7c231754a621
filelog: add file function to open other filelogs
Sune Foldager <cryo@cyanite.org>
parents:
14074
diff
changeset
|
92 return filelog(self.opener, f) |