commit: increase perf by building a new addlist instead of editing the old one
When commiting to a repo with lots of files (>170000),
manifest.py:addlistdelta takes some time because it's editing a large
array many times. Changing it to build a new array instead of editing
the old one saves around 0.04 seconds on a 1.64 second commit. A 2.5%
gain.
The gain here is pretty minor, but it was blatantly at the top of the
profiler report and the fix is straight forward.
I tested it by comparing the arrays produced by the new and old logic
while running all of the tests.
--- a/mercurial/manifest.py Wed Nov 28 14:55:42 2012 -0800
+++ b/mercurial/manifest.py Mon Nov 19 16:05:40 2012 -0800
@@ -117,15 +117,23 @@
# apply the changes collected during the bisect loop to our addlist
# return a delta suitable for addrevision
def addlistdelta(addlist, x):
- # start from the bottom up
- # so changes to the offsets don't mess things up.
- for start, end, content in reversed(x):
+ # for large addlist arrays, building a new array is cheaper
+ # than repeatedly modifying the existing one
+ currentposition = 0
+ newaddlist = array.array('c')
+
+ for start, end, content in x:
+ newaddlist += addlist[currentposition:start]
if content:
- addlist[start:end] = array.array('c', content)
- else:
- del addlist[start:end]
- return "".join(struct.pack(">lll", start, end, len(content))
+ newaddlist += array.array('c', content)
+
+ currentposition = end
+
+ newaddlist += addlist[currentposition:]
+
+ deltatext = "".join(struct.pack(">lll", start, end, len(content))
+ content for start, end, content in x)
+ return deltatext, newaddlist
def checkforbidden(l):
for f in l:
@@ -194,7 +202,8 @@
if dstart is not None:
delta.append([dstart, dend, "".join(dline)])
# apply the delta to the addlist, and get a delta for addrevision
- cachedelta = (self.rev(p1), addlistdelta(addlist, delta))
+ deltatext, addlist = addlistdelta(addlist, delta)
+ cachedelta = (self.rev(p1), deltatext)
arraytext = addlist
text = util.buffer(arraytext)