annotate mercurial/filelog.py @ 23225:bdf7b1ea1dae stable

changegroup: don't store unused value on fnodes (issue4443) The contents of fnodes are only accessed once per key. It is wasteful to cache the value since nobody will use it. Before this patch, the caching of unused data in fnodes was effectively causing a memory leak during the file streaming part of bundle creation. On mozilla-central (which has ~190,000 entries in fnodes), this patch has a significant impact on RSS at the end of generate(): before: 516,124 KB after: 364,356 KB delta: -151,768 KB The origin of this code can be traced back to 627cd7842e5d and has been with us since the 2.7 release.
author Gregory Szorc <gregory.szorc@gmail.com>
date Thu, 06 Nov 2014 22:33:48 -0800
parents 58ec36686f0e
children 22a979d1ae56
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1089
142b5d5ec9cc Break apart hg.py
mpm@selenic.com
parents: 1072
diff changeset
1 # filelog.py - file history class for mercurial
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
2 #
4635
63b9d2deed48 Updated copyright notices and add "and others" to "hg version"
Thomas Arendsen Hein <thomas@intevation.de>
parents: 4258
diff changeset
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
4 #
8225
46293a0c7e9f updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents: 7634
diff changeset
5 # This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 8531
diff changeset
6 # GNU General Public License version 2 or any later version.
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
7
22596
27e2317efe89 filelog: raise CensoredNodeError when hash checks fail with censor metadata
Mike Edgar <adgar@google.com>
parents: 22422
diff changeset
8 import error, revlog
14074
e8271159c8c2 filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents: 13240
diff changeset
9 import re
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
10
14074
e8271159c8c2 filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents: 13240
diff changeset
11 _mdre = re.compile('\1\n')
22421
30a610424eff filelog: make parsemeta a public module function, to be used by censor module
Mike Edgar <adgar@google.com>
parents: 22420
diff changeset
12 def parsemeta(text):
14074
e8271159c8c2 filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents: 13240
diff changeset
13 """return (metadatadict, keylist, metadatasize)"""
e8271159c8c2 filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents: 13240
diff changeset
14 # text can be buffer, so we can't use .startswith or .index
e8271159c8c2 filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents: 13240
diff changeset
15 if text[:2] != '\1\n':
22422
75bb7c702317 filelog: parsemeta stops returning unused key list
Mike Edgar <adgar@google.com>
parents: 22421
diff changeset
16 return None, None
14074
e8271159c8c2 filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents: 13240
diff changeset
17 s = _mdre.search(text, 2).start()
e8271159c8c2 filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents: 13240
diff changeset
18 mtext = text[2:s]
e8271159c8c2 filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents: 13240
diff changeset
19 meta = {}
e8271159c8c2 filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents: 13240
diff changeset
20 for l in mtext.splitlines():
13240
e5060aa22043 filelog: move metadata parsing to a helper function
Matt Mackall <mpm@selenic.com>
parents: 11541
diff changeset
21 k, v = l.split(": ", 1)
14074
e8271159c8c2 filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents: 13240
diff changeset
22 meta[k] = v
22422
75bb7c702317 filelog: parsemeta stops returning unused key list
Mike Edgar <adgar@google.com>
parents: 22421
diff changeset
23 return meta, (s + 2)
14074
e8271159c8c2 filelog: extract metadata parsing and packing
Sune Foldager <cryo@cyanite.org>
parents: 13240
diff changeset
24
22420
4669e26747c3 filelog: make packmeta a public module function, to be used by censor
Mike Edgar <adgar@google.com>
parents: 19148
diff changeset
25 def packmeta(meta, text):
4669e26747c3 filelog: make packmeta a public module function, to be used by censor
Mike Edgar <adgar@google.com>
parents: 19148
diff changeset
26 keys = sorted(meta.iterkeys())
4669e26747c3 filelog: make packmeta a public module function, to be used by censor
Mike Edgar <adgar@google.com>
parents: 19148
diff changeset
27 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
4669e26747c3 filelog: make packmeta a public module function, to be used by censor
Mike Edgar <adgar@google.com>
parents: 19148
diff changeset
28 return "\1\n%s\1\n%s" % (metatext, text)
13240
e5060aa22043 filelog: move metadata parsing to a helper function
Matt Mackall <mpm@selenic.com>
parents: 11541
diff changeset
29
22596
27e2317efe89 filelog: raise CensoredNodeError when hash checks fail with censor metadata
Mike Edgar <adgar@google.com>
parents: 22422
diff changeset
30 def _censoredtext(text):
27e2317efe89 filelog: raise CensoredNodeError when hash checks fail with censor metadata
Mike Edgar <adgar@google.com>
parents: 22422
diff changeset
31 m, offs = parsemeta(text)
27e2317efe89 filelog: raise CensoredNodeError when hash checks fail with censor metadata
Mike Edgar <adgar@google.com>
parents: 22422
diff changeset
32 return m and "censored" in m and not text[offs:]
27e2317efe89 filelog: raise CensoredNodeError when hash checks fail with censor metadata
Mike Edgar <adgar@google.com>
parents: 22422
diff changeset
33
7634
14a4337a9b9b revlog: kill from-style imports
Matt Mackall <mpm@selenic.com>
parents: 7622
diff changeset
34 class filelog(revlog.revlog):
4258
b11a2fb59cf5 revlog: simplify revlog version handling
Matt Mackall <mpm@selenic.com>
parents: 4257
diff changeset
35 def __init__(self, opener, path):
19148
3bda242bf244 filelog: use super() for calling base functions
Durham Goode <durham@fb.com>
parents: 14287
diff changeset
36 super(filelog, self).__init__(opener,
8531
810387f59696 filelog encoding: move the encoding/decoding into store
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 8225
diff changeset
37 "/".join(("data", path + ".i")))
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
38
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
39 def read(self, node):
360
10519e4cbd02 filelog: add metadata support
mpm@selenic.com
parents: 358
diff changeset
40 t = self.revision(node)
686
d7d68d27ebe5 Reapply startswith() changes that got lost with stale edit
Matt Mackall <mpm@selenic.com>
parents: 681
diff changeset
41 if not t.startswith('\1\n'):
360
10519e4cbd02 filelog: add metadata support
mpm@selenic.com
parents: 358
diff changeset
42 return t
2579
0875cda033fd use __contains__, index or split instead of str.find
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 2470
diff changeset
43 s = t.index('\1\n', 2)
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
44 return t[s + 2:]
360
10519e4cbd02 filelog: add metadata support
mpm@selenic.com
parents: 358
diff changeset
45
10519e4cbd02 filelog: add metadata support
mpm@selenic.com
parents: 358
diff changeset
46 def add(self, text, meta, transaction, link, p1=None, p2=None):
686
d7d68d27ebe5 Reapply startswith() changes that got lost with stale edit
Matt Mackall <mpm@selenic.com>
parents: 681
diff changeset
47 if meta or text.startswith('\1\n'):
22420
4669e26747c3 filelog: make packmeta a public module function, to be used by censor
Mike Edgar <adgar@google.com>
parents: 19148
diff changeset
48 text = packmeta(meta, text)
0
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
49 return self.addrevision(text, transaction, link, p1, p2)
9117c6561b0b Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff changeset
50
1116
0cdd73b0767c Add some rename debugging support
mpm@selenic.com
parents: 1089
diff changeset
51 def renamed(self, node):
7634
14a4337a9b9b revlog: kill from-style imports
Matt Mackall <mpm@selenic.com>
parents: 7622
diff changeset
52 if self.parents(node)[0] != revlog.nullid:
1116
0cdd73b0767c Add some rename debugging support
mpm@selenic.com
parents: 1089
diff changeset
53 return False
13240
e5060aa22043 filelog: move metadata parsing to a helper function
Matt Mackall <mpm@selenic.com>
parents: 11541
diff changeset
54 t = self.revision(node)
22421
30a610424eff filelog: make parsemeta a public module function, to be used by censor module
Mike Edgar <adgar@google.com>
parents: 22420
diff changeset
55 m = parsemeta(t)[0]
5915
d0576d065993 Prefer i in d over d.has_key(i)
Christian Ebert <blacktrash@gmx.net>
parents: 4635
diff changeset
56 if m and "copy" in m:
7634
14a4337a9b9b revlog: kill from-style imports
Matt Mackall <mpm@selenic.com>
parents: 7622
diff changeset
57 return (m["copy"], revlog.bin(m["copyrev"]))
1116
0cdd73b0767c Add some rename debugging support
mpm@selenic.com
parents: 1089
diff changeset
58 return False
0cdd73b0767c Add some rename debugging support
mpm@selenic.com
parents: 1089
diff changeset
59
2898
db397c38005d merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents: 2895
diff changeset
60 def size(self, rev):
db397c38005d merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents: 2895
diff changeset
61 """return the size of a given revision"""
db397c38005d merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents: 2895
diff changeset
62
db397c38005d merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents: 2895
diff changeset
63 # for revisions with renames, we have to go the slow way
db397c38005d merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents: 2895
diff changeset
64 node = self.node(rev)
db397c38005d merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents: 2895
diff changeset
65 if self.renamed(node):
db397c38005d merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents: 2895
diff changeset
66 return len(self.read(node))
22597
58ec36686f0e filelog: censored files compare against empty data, have 0 size
Mike Edgar <adgar@google.com>
parents: 22596
diff changeset
67 if self._iscensored(rev):
58ec36686f0e filelog: censored files compare against empty data, have 0 size
Mike Edgar <adgar@google.com>
parents: 22596
diff changeset
68 return 0
2898
db397c38005d merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents: 2895
diff changeset
69
11540
2370e270a29a filelog: test behaviour for data starting with "\1\n"
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 11539
diff changeset
70 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
19148
3bda242bf244 filelog: use super() for calling base functions
Durham Goode <durham@fb.com>
parents: 14287
diff changeset
71 return super(filelog, self).size(rev)
2898
db397c38005d merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents: 2895
diff changeset
72
2887
05257fd28591 filelog: add hash-based comparisons
Matt Mackall <mpm@selenic.com>
parents: 2859
diff changeset
73 def cmp(self, node, text):
11539
a463e3c50212 cmp: document the fact that we return True if content is different
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 10706
diff changeset
74 """compare text with a given file revision
a463e3c50212 cmp: document the fact that we return True if content is different
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 10706
diff changeset
75
a463e3c50212 cmp: document the fact that we return True if content is different
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 10706
diff changeset
76 returns True if text is different than what is stored.
a463e3c50212 cmp: document the fact that we return True if content is different
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 10706
diff changeset
77 """
2887
05257fd28591 filelog: add hash-based comparisons
Matt Mackall <mpm@selenic.com>
parents: 2859
diff changeset
78
11541
ab9fa7a85dd9 filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 11540
diff changeset
79 t = text
ab9fa7a85dd9 filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 11540
diff changeset
80 if text.startswith('\1\n'):
ab9fa7a85dd9 filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 11540
diff changeset
81 t = '\1\n\1\n' + text
ab9fa7a85dd9 filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 11540
diff changeset
82
19148
3bda242bf244 filelog: use super() for calling base functions
Durham Goode <durham@fb.com>
parents: 14287
diff changeset
83 samehashes = not super(filelog, self).cmp(node, t)
11541
ab9fa7a85dd9 filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 11540
diff changeset
84 if samehashes:
ab9fa7a85dd9 filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 11540
diff changeset
85 return False
ab9fa7a85dd9 filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 11540
diff changeset
86
22597
58ec36686f0e filelog: censored files compare against empty data, have 0 size
Mike Edgar <adgar@google.com>
parents: 22596
diff changeset
87 # censored files compare against the empty file
58ec36686f0e filelog: censored files compare against empty data, have 0 size
Mike Edgar <adgar@google.com>
parents: 22596
diff changeset
88 if self._iscensored(node):
58ec36686f0e filelog: censored files compare against empty data, have 0 size
Mike Edgar <adgar@google.com>
parents: 22596
diff changeset
89 return text != ''
58ec36686f0e filelog: censored files compare against empty data, have 0 size
Mike Edgar <adgar@google.com>
parents: 22596
diff changeset
90
11541
ab9fa7a85dd9 filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 11540
diff changeset
91 # renaming a file produces a different hash, even if the data
ab9fa7a85dd9 filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 11540
diff changeset
92 # remains unchanged. Check if it's the case (slow):
ab9fa7a85dd9 filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 11540
diff changeset
93 if self.renamed(node):
2887
05257fd28591 filelog: add hash-based comparisons
Matt Mackall <mpm@selenic.com>
parents: 2859
diff changeset
94 t2 = self.read(node)
2895
21631c2c09a5 filelog.cmp: return 0 for equality
Matt Mackall <mpm@selenic.com>
parents: 2890
diff changeset
95 return t2 != text
2887
05257fd28591 filelog: add hash-based comparisons
Matt Mackall <mpm@selenic.com>
parents: 2859
diff changeset
96
11541
ab9fa7a85dd9 filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 11540
diff changeset
97 return True
14287
7c231754a621 filelog: add file function to open other filelogs
Sune Foldager <cryo@cyanite.org>
parents: 14074
diff changeset
98
22596
27e2317efe89 filelog: raise CensoredNodeError when hash checks fail with censor metadata
Mike Edgar <adgar@google.com>
parents: 22422
diff changeset
99 def checkhash(self, text, p1, p2, node, rev=None):
27e2317efe89 filelog: raise CensoredNodeError when hash checks fail with censor metadata
Mike Edgar <adgar@google.com>
parents: 22422
diff changeset
100 try:
27e2317efe89 filelog: raise CensoredNodeError when hash checks fail with censor metadata
Mike Edgar <adgar@google.com>
parents: 22422
diff changeset
101 super(filelog, self).checkhash(text, p1, p2, node, rev=rev)
27e2317efe89 filelog: raise CensoredNodeError when hash checks fail with censor metadata
Mike Edgar <adgar@google.com>
parents: 22422
diff changeset
102 except error.RevlogError:
27e2317efe89 filelog: raise CensoredNodeError when hash checks fail with censor metadata
Mike Edgar <adgar@google.com>
parents: 22422
diff changeset
103 if _censoredtext(text):
27e2317efe89 filelog: raise CensoredNodeError when hash checks fail with censor metadata
Mike Edgar <adgar@google.com>
parents: 22422
diff changeset
104 raise error.CensoredNodeError(self.indexfile, node)
27e2317efe89 filelog: raise CensoredNodeError when hash checks fail with censor metadata
Mike Edgar <adgar@google.com>
parents: 22422
diff changeset
105 raise
27e2317efe89 filelog: raise CensoredNodeError when hash checks fail with censor metadata
Mike Edgar <adgar@google.com>
parents: 22422
diff changeset
106
14287
7c231754a621 filelog: add file function to open other filelogs
Sune Foldager <cryo@cyanite.org>
parents: 14074
diff changeset
107 def _file(self, f):
7c231754a621 filelog: add file function to open other filelogs
Sune Foldager <cryo@cyanite.org>
parents: 14074
diff changeset
108 return filelog(self.opener, f)
22597
58ec36686f0e filelog: censored files compare against empty data, have 0 size
Mike Edgar <adgar@google.com>
parents: 22596
diff changeset
109
58ec36686f0e filelog: censored files compare against empty data, have 0 size
Mike Edgar <adgar@google.com>
parents: 22596
diff changeset
110 def _iscensored(self, revornode):
58ec36686f0e filelog: censored files compare against empty data, have 0 size
Mike Edgar <adgar@google.com>
parents: 22596
diff changeset
111 """Check if a file revision is censored."""
58ec36686f0e filelog: censored files compare against empty data, have 0 size
Mike Edgar <adgar@google.com>
parents: 22596
diff changeset
112 try:
58ec36686f0e filelog: censored files compare against empty data, have 0 size
Mike Edgar <adgar@google.com>
parents: 22596
diff changeset
113 self.revision(revornode)
58ec36686f0e filelog: censored files compare against empty data, have 0 size
Mike Edgar <adgar@google.com>
parents: 22596
diff changeset
114 return False
58ec36686f0e filelog: censored files compare against empty data, have 0 size
Mike Edgar <adgar@google.com>
parents: 22596
diff changeset
115 except error.CensoredNodeError:
58ec36686f0e filelog: censored files compare against empty data, have 0 size
Mike Edgar <adgar@google.com>
parents: 22596
diff changeset
116 return True