Mercurial > hg-stable
annotate mercurial/filelog.py @ 11541:ab9fa7a85dd9 stable
filelog: cmp: don't read data if hashes are identical (issue2273)
filelog.renamed() is an expensive call as it reads the filelog if p1 == nullid.
It's more efficient to first compute the hash, and to bail early if
the computed hash is the same as the stored nodeid.
'samehashes' variable is not strictly necessary, but helps for comprehension.
author | Nicolas Dumazet <nicdumz.commits@gmail.com> |
---|---|
date | Mon, 05 Jul 2010 19:49:54 +0900 |
parents | 2370e270a29a |
children | e5060aa22043 |
rev | line source |
---|---|
1089 | 1 # filelog.py - file history class for mercurial |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
2 # |
4635
63b9d2deed48
Updated copyright notices and add "and others" to "hg version"
Thomas Arendsen Hein <thomas@intevation.de>
parents:
4258
diff
changeset
|
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com> |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
4 # |
8225
46293a0c7e9f
updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents:
7634
diff
changeset
|
5 # This software may be used and distributed according to the terms of the |
10263 | 6 # GNU General Public License version 2 or any later version. |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
7 |
7634
14a4337a9b9b
revlog: kill from-style imports
Matt Mackall <mpm@selenic.com>
parents:
7622
diff
changeset
|
8 import revlog |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
9 |
7634
14a4337a9b9b
revlog: kill from-style imports
Matt Mackall <mpm@selenic.com>
parents:
7622
diff
changeset
|
10 class filelog(revlog.revlog): |
4258
b11a2fb59cf5
revlog: simplify revlog version handling
Matt Mackall <mpm@selenic.com>
parents:
4257
diff
changeset
|
11 def __init__(self, opener, path): |
7634
14a4337a9b9b
revlog: kill from-style imports
Matt Mackall <mpm@selenic.com>
parents:
7622
diff
changeset
|
12 revlog.revlog.__init__(self, opener, |
8531
810387f59696
filelog encoding: move the encoding/decoding into store
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
8225
diff
changeset
|
13 "/".join(("data", path + ".i"))) |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
14 |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
15 def read(self, node): |
360 | 16 t = self.revision(node) |
686
d7d68d27ebe5
Reapply startswith() changes that got lost with stale edit
Matt Mackall <mpm@selenic.com>
parents:
681
diff
changeset
|
17 if not t.startswith('\1\n'): |
360 | 18 return t |
2579
0875cda033fd
use __contains__, index or split instead of str.find
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
2470
diff
changeset
|
19 s = t.index('\1\n', 2) |
10282
08a0f04b56bd
many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents:
10263
diff
changeset
|
20 return t[s + 2:] |
360 | 21 |
3123
4ea58eb3f0c9
filelog: make metadata method private
Matt Mackall <mpm@selenic.com>
parents:
2948
diff
changeset
|
22 def _readmeta(self, node): |
360 | 23 t = self.revision(node) |
686
d7d68d27ebe5
Reapply startswith() changes that got lost with stale edit
Matt Mackall <mpm@selenic.com>
parents:
681
diff
changeset
|
24 if not t.startswith('\1\n'): |
1116 | 25 return {} |
2579
0875cda033fd
use __contains__, index or split instead of str.find
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
2470
diff
changeset
|
26 s = t.index('\1\n', 2) |
360 | 27 mt = t[2:s] |
1116 | 28 m = {} |
360 | 29 for l in mt.splitlines(): |
30 k, v = l.split(": ", 1) | |
31 m[k] = v | |
32 return m | |
33 | |
34 def add(self, text, meta, transaction, link, p1=None, p2=None): | |
686
d7d68d27ebe5
Reapply startswith() changes that got lost with stale edit
Matt Mackall <mpm@selenic.com>
parents:
681
diff
changeset
|
35 if meta or text.startswith('\1\n'): |
10705
194342b34870
filelog: no need to optimize an uncommon case, assume meta = {}
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
10490
diff
changeset
|
36 mt = ["%s: %s\n" % (k, v) for k, v in sorted(meta.iteritems())] |
1540
8ca9f5b17257
minor optimization: save some string trash
twaldmann@thinkmo.de
parents:
1117
diff
changeset
|
37 text = "\1\n%s\1\n%s" % ("".join(mt), text) |
0
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
38 return self.addrevision(text, transaction, link, p1, p2) |
9117c6561b0b
Add back links from file revisions to changeset revisions
mpm@selenic.com
parents:
diff
changeset
|
39 |
1116 | 40 def renamed(self, node): |
7634
14a4337a9b9b
revlog: kill from-style imports
Matt Mackall <mpm@selenic.com>
parents:
7622
diff
changeset
|
41 if self.parents(node)[0] != revlog.nullid: |
1116 | 42 return False |
3123
4ea58eb3f0c9
filelog: make metadata method private
Matt Mackall <mpm@selenic.com>
parents:
2948
diff
changeset
|
43 m = self._readmeta(node) |
5915
d0576d065993
Prefer i in d over d.has_key(i)
Christian Ebert <blacktrash@gmx.net>
parents:
4635
diff
changeset
|
44 if m and "copy" in m: |
7634
14a4337a9b9b
revlog: kill from-style imports
Matt Mackall <mpm@selenic.com>
parents:
7622
diff
changeset
|
45 return (m["copy"], revlog.bin(m["copyrev"])) |
1116 | 46 return False |
47 | |
2898
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
48 def size(self, rev): |
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
49 """return the size of a given revision""" |
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
50 |
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
51 # for revisions with renames, we have to go the slow way |
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
52 node = self.node(rev) |
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
53 if self.renamed(node): |
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
54 return len(self.read(node)) |
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
55 |
11540
2370e270a29a
filelog: test behaviour for data starting with "\1\n"
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11539
diff
changeset
|
56 # XXX if self.read(node).startswith("\1\n"), this returns (size+4) |
7634
14a4337a9b9b
revlog: kill from-style imports
Matt Mackall <mpm@selenic.com>
parents:
7622
diff
changeset
|
57 return revlog.revlog.size(self, rev) |
2898
db397c38005d
merge: use file size stored in revlog index
Matt Mackall <mpm@selenic.com>
parents:
2895
diff
changeset
|
58 |
2887
05257fd28591
filelog: add hash-based comparisons
Matt Mackall <mpm@selenic.com>
parents:
2859
diff
changeset
|
59 def cmp(self, node, text): |
11539
a463e3c50212
cmp: document the fact that we return True if content is different
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
10706
diff
changeset
|
60 """compare text with a given file revision |
a463e3c50212
cmp: document the fact that we return True if content is different
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
10706
diff
changeset
|
61 |
a463e3c50212
cmp: document the fact that we return True if content is different
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
10706
diff
changeset
|
62 returns True if text is different than what is stored. |
a463e3c50212
cmp: document the fact that we return True if content is different
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
10706
diff
changeset
|
63 """ |
2887
05257fd28591
filelog: add hash-based comparisons
Matt Mackall <mpm@selenic.com>
parents:
2859
diff
changeset
|
64 |
11541
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
65 t = text |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
66 if text.startswith('\1\n'): |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
67 t = '\1\n\1\n' + text |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
68 |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
69 samehashes = not revlog.revlog.cmp(self, node, t) |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
70 if samehashes: |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
71 return False |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
72 |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
73 # renaming a file produces a different hash, even if the data |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
74 # remains unchanged. Check if it's the case (slow): |
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
75 if self.renamed(node): |
2887
05257fd28591
filelog: add hash-based comparisons
Matt Mackall <mpm@selenic.com>
parents:
2859
diff
changeset
|
76 t2 = self.read(node) |
2895
21631c2c09a5
filelog.cmp: return 0 for equality
Matt Mackall <mpm@selenic.com>
parents:
2890
diff
changeset
|
77 return t2 != text |
2887
05257fd28591
filelog: add hash-based comparisons
Matt Mackall <mpm@selenic.com>
parents:
2859
diff
changeset
|
78 |
11541
ab9fa7a85dd9
filelog: cmp: don't read data if hashes are identical (issue2273)
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents:
11540
diff
changeset
|
79 return True |