Mercurial > hg
comparison mercurial/manifest.py @ 1534:80a3d6a0af71
Optimize manifest.add
Testing shows that manifest.add is spending a significant percentage of
its time running calcoffsets and doing text = "".join(addlist). This
patch removes the need for both of these by storying the manifest in a
character array, and using a modified bisect search to find lines without
the help of a separate index of line offsets.
manifest.add was also reworked to push delta construction/combination into the
main loop.
Time to apply 2751 patches (without psyco, ext3 noatime,data=writeback):
Stock hg: 4m45s real 3m32s user 55s sys
patched: 2m48s real 1m53s user 43s sys
quilt: 2m30s real 45s user 50s sys
(quilt does much more io...)
author | mason@suse.com |
---|---|
date | Fri, 11 Nov 2005 18:20:22 -0800 |
parents | 54e4b187f69c |
children | bf4e7ef08741 |
comparison
equal
deleted
inserted
replaced
1533:3d11f81c9145 | 1534:80a3d6a0af71 |
---|---|
7 | 7 |
8 import sys, struct | 8 import sys, struct |
9 from revlog import * | 9 from revlog import * |
10 from i18n import gettext as _ | 10 from i18n import gettext as _ |
11 from demandload import * | 11 from demandload import * |
12 demandload(globals(), "bisect") | 12 demandload(globals(), "bisect array") |
13 | 13 |
14 class manifest(revlog): | 14 class manifest(revlog): |
15 def __init__(self, opener): | 15 def __init__(self, opener): |
16 self.mapcache = None | 16 self.mapcache = None |
17 self.listcache = None | 17 self.listcache = None |
18 self.addlist = None | |
19 revlog.__init__(self, opener, "00manifest.i", "00manifest.d") | 18 revlog.__init__(self, opener, "00manifest.i", "00manifest.d") |
20 | 19 |
21 def read(self, node): | 20 def read(self, node): |
22 if node == nullid: return {} # don't upset local cache | 21 if node == nullid: return {} # don't upset local cache |
23 if self.mapcache and self.mapcache[0] == node: | 22 if self.mapcache and self.mapcache[0] == node: |
24 return self.mapcache[1] | 23 return self.mapcache[1] |
25 text = self.revision(node) | 24 text = self.revision(node) |
26 map = {} | 25 map = {} |
27 flag = {} | 26 flag = {} |
28 self.listcache = (text, text.splitlines(1)) | 27 self.listcache = array.array('c', text) |
29 for l in self.listcache[1]: | 28 lines = text.splitlines(1) |
29 for l in lines: | |
30 (f, n) = l.split('\0') | 30 (f, n) = l.split('\0') |
31 map[f] = bin(n[:40]) | 31 map[f] = bin(n[:40]) |
32 flag[f] = (n[40:-1] == "x") | 32 flag[f] = (n[40:-1] == "x") |
33 self.mapcache = (node, map, flag) | 33 self.mapcache = (node, map, flag) |
34 return map | 34 return map |
37 if node == nullid: return {} # don't upset local cache | 37 if node == nullid: return {} # don't upset local cache |
38 if not self.mapcache or self.mapcache[0] != node: | 38 if not self.mapcache or self.mapcache[0] != node: |
39 self.read(node) | 39 self.read(node) |
40 return self.mapcache[2] | 40 return self.mapcache[2] |
41 | 41 |
42 def diff(self, a, b): | |
43 return mdiff.textdiff(str(a), str(b)) | |
44 | |
42 def add(self, map, flags, transaction, link, p1=None, p2=None, | 45 def add(self, map, flags, transaction, link, p1=None, p2=None, |
43 changed=None): | 46 changed=None): |
44 # directly generate the mdiff delta from the data collected during | 47 |
45 # the bisect loop below | 48 # returns a tuple (start, end). If the string is found |
46 def gendelta(delta): | 49 # m[start:end] are the line containing that string. If start == end |
47 i = 0 | 50 # the string was not found and they indicate the proper sorted |
48 result = [] | 51 # insertion point. This was taken from bisect_left, and modified |
49 while i < len(delta): | 52 # to find line start/end as it goes along. |
50 start = delta[i][2] | 53 # |
51 end = delta[i][3] | 54 # m should be a buffer or a string |
52 l = delta[i][4] | 55 # s is a string |
53 if l == None: | 56 # |
54 l = "" | 57 def manifestsearch(m, s, lo=0, hi=None): |
55 while i < len(delta) - 1 and start <= delta[i+1][2] \ | 58 def advance(i, c): |
56 and end >= delta[i+1][2]: | 59 while i < lenm and m[i] != c: |
57 if delta[i+1][3] > end: | |
58 end = delta[i+1][3] | |
59 if delta[i+1][4]: | |
60 l += delta[i+1][4] | |
61 i += 1 | 60 i += 1 |
62 result.append(struct.pack(">lll", start, end, len(l)) + l) | 61 return i |
63 i += 1 | 62 lenm = len(m) |
64 return result | 63 if not hi: |
64 hi = lenm | |
65 while lo < hi: | |
66 mid = (lo + hi) // 2 | |
67 start = mid | |
68 while start > 0 and m[start-1] != '\n': | |
69 start -= 1 | |
70 end = advance(start, '\0') | |
71 if m[start:end] < s: | |
72 # we know that after the null there are 40 bytes of sha1 | |
73 # this translates to the bisect lo = mid + 1 | |
74 lo = advance(end + 40, '\n') + 1 | |
75 else: | |
76 # this translates to the bisect hi = mid | |
77 hi = start | |
78 end = advance(lo, '\0') | |
79 found = m[lo:end] | |
80 if cmp(s, found) == 0: | |
81 # we know that after the null there are 40 bytes of sha1 | |
82 end = advance(end + 40, '\n') | |
83 return (lo, end+1) | |
84 else: | |
85 return (lo, lo) | |
65 | 86 |
66 # apply the changes collected during the bisect loop to our addlist | 87 # apply the changes collected during the bisect loop to our addlist |
67 def addlistdelta(addlist, delta): | 88 # return a delta suitable for addrevision |
68 # apply the deltas to the addlist. start from the bottom up | 89 def addlistdelta(addlist, x): |
90 # start from the bottom up | |
69 # so changes to the offsets don't mess things up. | 91 # so changes to the offsets don't mess things up. |
70 i = len(delta) | 92 i = len(x) |
71 while i > 0: | 93 while i > 0: |
72 i -= 1 | 94 i -= 1 |
73 start = delta[i][0] | 95 start = x[i][0] |
74 end = delta[i][1] | 96 end = x[i][1] |
75 if delta[i][4]: | 97 if x[i][2]: |
76 addlist[start:end] = [delta[i][4]] | 98 addlist[start:end] = array.array('c', x[i][2]) |
77 else: | 99 else: |
78 del addlist[start:end] | 100 del addlist[start:end] |
79 return addlist | 101 return "".join([struct.pack(">lll", d[0], d[1], len(d[2])) + d[2] \ |
80 | 102 for d in x ]) |
81 # calculate the byte offset of the start of each line in the | |
82 # manifest | |
83 def calcoffsets(addlist): | |
84 offsets = [0] * (len(addlist) + 1) | |
85 offset = 0 | |
86 i = 0 | |
87 while i < len(addlist): | |
88 offsets[i] = offset | |
89 offset += len(addlist[i]) | |
90 i += 1 | |
91 offsets[i] = offset | |
92 return offsets | |
93 | 103 |
94 # if we're using the listcache, make sure it is valid and | 104 # if we're using the listcache, make sure it is valid and |
95 # parented by the same node we're diffing against | 105 # parented by the same node we're diffing against |
96 if not changed or not self.listcache or not p1 or \ | 106 if not changed or not self.listcache or not p1 or \ |
97 self.mapcache[0] != p1: | 107 self.mapcache[0] != p1: |
98 files = map.keys() | 108 files = map.keys() |
99 files.sort() | 109 files.sort() |
100 | 110 |
101 self.addlist = ["%s\000%s%s\n" % | 111 text = ["%s\000%s%s\n" % |
102 (f, hex(map[f]), flags[f] and "x" or '') | 112 (f, hex(map[f]), flags[f] and "x" or '') |
103 for f in files] | 113 for f in files] |
114 self.listcache = array.array('c', "".join(text)) | |
104 cachedelta = None | 115 cachedelta = None |
105 else: | 116 else: |
106 addlist = self.listcache[1] | 117 addlist = self.listcache |
107 | |
108 # find the starting offset for each line in the add list | |
109 offsets = calcoffsets(addlist) | |
110 | 118 |
111 # combine the changed lists into one list for sorting | 119 # combine the changed lists into one list for sorting |
112 work = [[x, 0] for x in changed[0]] | 120 work = [[x, 0] for x in changed[0]] |
113 work[len(work):] = [[x, 1] for x in changed[1]] | 121 work[len(work):] = [[x, 1] for x in changed[1]] |
114 work.sort() | 122 work.sort() |
115 | 123 |
116 delta = [] | 124 delta = [] |
117 bs = 0 | 125 dstart = None |
126 dend = None | |
127 dline = [""] | |
128 start = 0 | |
129 # zero copy representation of addlist as a buffer | |
130 addbuf = buffer(addlist) | |
118 | 131 |
132 # start with a readonly loop that finds the offset of | |
133 # each line and creates the deltas | |
119 for w in work: | 134 for w in work: |
120 f = w[0] | 135 f = w[0] |
121 # bs will either be the index of the item or the insert point | 136 # bs will either be the index of the item or the insert point |
122 bs = bisect.bisect(addlist, f, bs) | 137 start, end = manifestsearch(addbuf, f, start) |
123 if bs < len(addlist): | |
124 fn = addlist[bs][:addlist[bs].index('\0')] | |
125 else: | |
126 fn = None | |
127 if w[1] == 0: | 138 if w[1] == 0: |
128 l = "%s\000%s%s\n" % (f, hex(map[f]), | 139 l = "%s\000%s%s\n" % (f, hex(map[f]), |
129 flags[f] and "x" or '') | 140 flags[f] and "x" or '') |
130 else: | 141 else: |
131 l = None | 142 l = "" |
132 start = bs | 143 if start == end and w[1] == 1: |
133 if fn != f: | 144 # item we want to delete was not found, error out |
134 # item not found, insert a new one | 145 raise AssertionError( |
135 end = bs | |
136 if w[1] == 1: | |
137 raise AssertionError( | |
138 _("failed to remove %s from manifest\n") % f) | 146 _("failed to remove %s from manifest\n") % f) |
147 if dstart != None and dstart <= start and dend >= start: | |
148 if dend < end: | |
149 dend = end | |
150 if l: | |
151 dline.append(l) | |
139 else: | 152 else: |
140 # item is found, replace/delete the existing line | 153 if dstart != None: |
141 end = bs + 1 | 154 delta.append([dstart, dend, "".join(dline)]) |
142 delta.append([start, end, offsets[start], offsets[end], l]) | 155 dstart = start |
156 dend = end | |
157 dline = [l] | |
143 | 158 |
144 self.addlist = addlistdelta(addlist, delta) | 159 if dstart != None: |
145 if self.mapcache[0] == self.tip(): | 160 delta.append([dstart, dend, "".join(dline)]) |
146 cachedelta = "".join(gendelta(delta)) | 161 # apply the delta to the addlist, and get a delta for addrevision |
147 else: | 162 cachedelta = addlistdelta(addlist, delta) |
163 | |
164 # the delta is only valid if we've been processing the tip revision | |
165 if self.mapcache[0] != self.tip(): | |
148 cachedelta = None | 166 cachedelta = None |
167 self.listcache = addlist | |
149 | 168 |
150 text = "".join(self.addlist) | 169 n = self.addrevision(buffer(self.listcache), transaction, link, p1, \ |
151 if cachedelta and mdiff.patch(self.listcache[0], cachedelta) != text: | 170 p2, cachedelta) |
152 raise AssertionError(_("manifest delta failure\n")) | |
153 n = self.addrevision(text, transaction, link, p1, p2, cachedelta) | |
154 self.mapcache = (n, map, flags) | 171 self.mapcache = (n, map, flags) |
155 self.listcache = (text, self.addlist) | |
156 self.addlist = None | |
157 | 172 |
158 return n | 173 return n |