treemanifests: remove _loadalllazy in _diff()
The benchmarks below use a similar setup as in ee7ee0c516ca and my other recent
commits. Yes, in some cases this runs in literally 5% of the time it
previously took.
before = a6f8ab53
diff --git:
repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before
------+---+---+------------------------+-----------------------+------------
m-u | | | 1.301 s +- 0.004 s | 1.309 s +- 0.012 s | 100.6%
m-u | | x | 1.303 s +- 0.009 s | 1.302 s +- 0.006 s | 99.9%
m-u | x | | 1.308 s +- 0.006 s | 1.309 s +- 0.007 s | 100.1%
m-u | x | x | 85.7 ms +- 0.6 ms | 86.0 ms +- 0.3 ms | 100.4%
l-d-r | | | 197.5 ms +- 0.7 ms | 197.8 ms +- 2.2 ms | 100.2%
l-d-r | | x | 199.4 ms +- 0.6 ms | 199.3 ms +- 0.9 ms | 99.9%
l-d-r | x | | 86.1 ms +- 0.5 ms | 85.8 ms +- 0.9 ms | 99.7%
l-d-r | x | x | 64.4 ms +- 0.4 ms | 64.4 ms +- 0.3 ms | 100.0%
diff -c . --git:
repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before
------+---+---+------------------------+-----------------------+------------
m-u | | | 236.7 ms +- 1.1 ms | 236.5 ms +- 1.3 ms | 99.9%
m-u | | x | 158.7 ms +- 1.0 ms | 128.0 ms +- 1.0 ms | 80.7% <--
m-u | x | | 239.7 ms +- 1.8 ms | 238.1 ms +- 1.5 ms | 99.3%
m-u | x | x | 132.4 ms +- 0.9 ms | 132.3 ms +- 0.6 ms | 99.9%
l-d-r | | | 81.8 ms +- 0.4 ms | 81.8 ms +- 0.3 ms | 100.0%
l-d-r | | x | 3.894 s +- 0.017 s | 193.6 ms +- 0.4 ms | 5.0% <--
l-d-r | x | | 106.9 ms +- 0.4 ms | 106.6 ms +- 0.3 ms | 99.7%
l-d-r | x | x | 182.7 ms +- 0.8 ms | 183.3 ms +- 0.9 ms | 100.3%
rebase -r . --keep -d .^^:
repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before
------+---+---+------------------------+-----------------------+------------
m-u | | | 5.615 s +- 0.109 s | 5.562 s +- 0.015 s | 99.1%
m-u | | x | 5.701 s +- 0.027 s | 5.715 s +- 0.023 s | 100.2%
m-u | x | | 5.572 s +- 0.128 s | 5.613 s +- 0.182 s | 100.7%
m-u | x | x | 633.3 ms +- 28.7 ms | 636.2 ms +- 13.8 ms | 100.5%
l-d-r | | | 666.4 ms +- 17.0 ms | 658.5 ms +- 9.3 ms | 98.8%
l-d-r | | x | 6.520 s +- 0.070 s | 6.505 s +- 0.026 s | 99.8%
l-d-r | x | | 279.0 ms +- 13.0 ms | 276.5 ms +- 4.7 ms | 99.1%
l-d-r | x | x | 1.636 s +- 0.058 s | 1.657 s +- 0.014 s | 101.3%
status --change . --copies:
repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before
------+---+---+------------------------+-----------------------+------------
m-u | | | 218.6 ms +- 1.4 ms | 217.9 ms +- 1.5 ms | 99.7%
m-u | | x | 138.5 ms +- 0.4 ms | 108.4 ms +- 0.2 ms | 78.3% <--
m-u | x | | 220.1 ms +- 1.3 ms | 219.7 ms +- 1.5 ms | 99.8%
m-u | x | x | 113.2 ms +- 0.4 ms | 112.4 ms +- 0.8 ms | 99.3%
l-d-r | | | 80.2 ms +- 0.3 ms | 80.6 ms +- 0.6 ms | 100.5%
l-d-r | | x | 3.899 s +- 0.020 s | 194.8 ms +- 4.0 ms | 5.0% <--
l-d-r | x | | 83.4 ms +- 0.8 ms | 83.2 ms +- 0.2 ms | 99.8%
l-d-r | x | x | 732.2 ms +- 4.3 ms | 194.9 ms +- 1.0 ms | 26.6% <--
status --copies:
repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before
------+---+---+------------------------+-----------------------+------------
m-u | | | 1.917 s +- 0.005 s | 1.914 s +- 0.004 s | 99.8%
m-u | | x | 1.909 s +- 0.012 s | 1.934 s +- 0.004 s | 101.3%
m-u | x | | 1.915 s +- 0.005 s | 1.904 s +- 0.004 s | 99.4%
m-u | x | x | 94.8 ms +- 0.3 ms | 94.7 ms +- 0.2 ms | 99.9%
l-d-r | | | 593.9 ms +- 1.2 ms | 594.6 ms +- 9.4 ms | 100.1%
l-d-r | | x | 595.2 ms +- 3.8 ms | 597.2 ms +- 2.6 ms | 100.3%
l-d-r | x | | 182.5 ms +- 1.6 ms | 182.1 ms +- 0.6 ms | 99.8%
l-d-r | x | x | 149.6 ms +- 0.9 ms | 149.1 ms +- 0.8 ms | 99.7%
update $rev^; ~/src/hg/hg{hg}/hg update $rev:
repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before
------+---+---+------------------------+-----------------------+------------
m-u | | | 3.121 s +- 0.007 s | 3.129 s +- 0.012 s | 100.3%
m-u | | x | 2.972 s +- 0.011 s | 2.981 s +- 0.012 s | 100.3%
m-u | x | | 3.144 s +- 0.014 s | 3.141 s +- 0.011 s | 99.9%
m-u | x | x | 312.2 ms +- 2.4 ms | 312.3 ms +- 2.1 ms | 100.0%
l-d-r | | | 444.4 ms +- 4.3 ms | 446.9 ms +- 5.3 ms | 100.6%
l-d-r | | x | 9.159 s +- 0.069 s | 9.182 s +- 0.040 s | 100.3%
l-d-r | x | | 254.6 ms +- 1.6 ms | 255.2 ms +- 1.6 ms | 100.2%
l-d-r | x | x | 1.525 s +- 0.007 s | 1.577 s +- 0.007 s | 103.4% <--?
Differential Revision: https://phab.mercurial-scm.org/D4845
from __future__ import absolute_import, print_function
import difflib
import random
import unittest
from mercurial import linelog
vecratio = 3 # number of replacelines / number of replacelines_vec
maxlinenum = 0xffffff
maxb1 = 0xffffff
maxdeltaa = 10
maxdeltab = 10
def _genedits(seed, endrev):
lines = []
random.seed(seed)
rev = 0
for rev in range(0, endrev):
n = len(lines)
a1 = random.randint(0, n)
a2 = random.randint(a1, min(n, a1 + maxdeltaa))
b1 = random.randint(0, maxb1)
b2 = random.randint(b1, b1 + maxdeltab)
usevec = not bool(random.randint(0, vecratio))
if usevec:
blines = [(random.randint(0, rev), random.randint(0, maxlinenum))
for _ in range(b1, b2)]
else:
blines = [(rev, bidx) for bidx in range(b1, b2)]
lines[a1:a2] = blines
yield lines, rev, a1, a2, b1, b2, blines, usevec
class linelogtests(unittest.TestCase):
def testlinelogencodedecode(self):
program = [linelog._eof(0, 0),
linelog._jge(41, 42),
linelog._jump(0, 43),
linelog._eof(0, 0),
linelog._jl(44, 45),
linelog._line(46, 47),
]
ll = linelog.linelog(program, maxrev=100)
enc = ll.encode()
# round-trips okay
self.assertEqual(linelog.linelog.fromdata(enc)._program, ll._program)
self.assertEqual(linelog.linelog.fromdata(enc), ll)
# This encoding matches the encoding used by hg-experimental's
# linelog file, or is supposed to if it doesn't.
self.assertEqual(enc, (b'\x00\x00\x01\x90\x00\x00\x00\x06'
b'\x00\x00\x00\xa4\x00\x00\x00*'
b'\x00\x00\x00\x00\x00\x00\x00+'
b'\x00\x00\x00\x00\x00\x00\x00\x00'
b'\x00\x00\x00\xb1\x00\x00\x00-'
b'\x00\x00\x00\xba\x00\x00\x00/'))
def testsimpleedits(self):
ll = linelog.linelog()
# Initial revision: add lines 0, 1, and 2
ll.replacelines(1, 0, 0, 0, 3)
self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(1)],
[(1, 0),
(1, 1),
(1, 2),
])
# Replace line 1 with a new line
ll.replacelines(2, 1, 2, 1, 2)
self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(2)],
[(1, 0),
(2, 1),
(1, 2),
])
# delete a line out of 2
ll.replacelines(3, 1, 2, 0, 0)
self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(3)],
[(1, 0),
(1, 2),
])
# annotation of 1 is unchanged
self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(1)],
[(1, 0),
(1, 1),
(1, 2),
])
ll.annotate(3) # set internal state to revision 3
start = ll.getoffset(0)
end = ll.getoffset(1)
self.assertEqual(ll.getalllines(start, end), [
(1, 0),
(2, 1),
(1, 1),
])
self.assertEqual(ll.getalllines(), [
(1, 0),
(2, 1),
(1, 1),
(1, 2),
])
def testparseclinelogfile(self):
# This data is what the replacements in testsimpleedits
# produce when fed to the original linelog.c implementation.
data = (b'\x00\x00\x00\x0c\x00\x00\x00\x0f'
b'\x00\x00\x00\x00\x00\x00\x00\x02'
b'\x00\x00\x00\x05\x00\x00\x00\x06'
b'\x00\x00\x00\x06\x00\x00\x00\x00'
b'\x00\x00\x00\x00\x00\x00\x00\x07'
b'\x00\x00\x00\x06\x00\x00\x00\x02'
b'\x00\x00\x00\x00\x00\x00\x00\x00'
b'\x00\x00\x00\t\x00\x00\x00\t'
b'\x00\x00\x00\x00\x00\x00\x00\x0c'
b'\x00\x00\x00\x08\x00\x00\x00\x05'
b'\x00\x00\x00\x06\x00\x00\x00\x01'
b'\x00\x00\x00\x00\x00\x00\x00\x05'
b'\x00\x00\x00\x0c\x00\x00\x00\x05'
b'\x00\x00\x00\n\x00\x00\x00\x01'
b'\x00\x00\x00\x00\x00\x00\x00\t')
llc = linelog.linelog.fromdata(data)
self.assertEqual([(l.rev, l.linenum) for l in llc.annotate(1)],
[(1, 0),
(1, 1),
(1, 2),
])
self.assertEqual([(l.rev, l.linenum) for l in llc.annotate(2)],
[(1, 0),
(2, 1),
(1, 2),
])
self.assertEqual([(l.rev, l.linenum) for l in llc.annotate(3)],
[(1, 0),
(1, 2),
])
# Check we emit the same bytecode.
ll = linelog.linelog()
# Initial revision: add lines 0, 1, and 2
ll.replacelines(1, 0, 0, 0, 3)
# Replace line 1 with a new line
ll.replacelines(2, 1, 2, 1, 2)
# delete a line out of 2
ll.replacelines(3, 1, 2, 0, 0)
diff = '\n ' + '\n '.join(difflib.unified_diff(
ll.debugstr().splitlines(), llc.debugstr().splitlines(),
'python', 'c', lineterm=''))
self.assertEqual(ll._program, llc._program, 'Program mismatch: ' + diff)
# Done as a secondary step so we get a better result if the
# program is where the mismatch is.
self.assertEqual(ll, llc)
self.assertEqual(ll.encode(), data)
def testanothersimplecase(self):
ll = linelog.linelog()
ll.replacelines(3, 0, 0, 0, 2)
ll.replacelines(4, 0, 2, 0, 0)
self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(4)],
[])
self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(3)],
[(3, 0), (3, 1)])
# rev 2 is empty because contents were only ever introduced in rev 3
self.assertEqual([(l.rev, l.linenum) for l in ll.annotate(2)],
[])
def testrandomedits(self):
# Inspired by original linelog tests.
seed = random.random()
numrevs = 2000
ll = linelog.linelog()
# Populate linelog
for lines, rev, a1, a2, b1, b2, blines, usevec in _genedits(
seed, numrevs):
if usevec:
ll.replacelines_vec(rev, a1, a2, blines)
else:
ll.replacelines(rev, a1, a2, b1, b2)
ar = ll.annotate(rev)
self.assertEqual(ll.annotateresult, lines)
# Verify we can get back these states by annotating each rev
for lines, rev, a1, a2, b1, b2, blines, usevec in _genedits(
seed, numrevs):
ar = ll.annotate(rev)
self.assertEqual([(l.rev, l.linenum) for l in ar], lines)
def testinfinitebadprogram(self):
ll = linelog.linelog.fromdata(
b'\x00\x00\x00\x00\x00\x00\x00\x02' # header
b'\x00\x00\x00\x00\x00\x00\x00\x01' # JUMP to self
)
with self.assertRaises(linelog.LineLogError):
# should not be an infinite loop and raise
ll.annotate(1)
if __name__ == '__main__':
import silenttestrunner
silenttestrunner.main(__name__)