# HG changeset patch # User Jun Wu # Date 1490932137 25200 # Node ID 985de02b5b9deaec6bcaaf14f3eadd2be9250904 # Parent aff7b32b3c0569469eec485cd49f1a443c3511c1 revlog: add a stronger test for raw processing There are some issues about revlog raw processing (flag processor). The test is relatively strong covering many cases. It will verify fixes. diff -r aff7b32b3c05 -r 985de02b5b9d tests/test-revlog-raw.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test-revlog-raw.py Thu Mar 30 20:48:57 2017 -0700 @@ -0,0 +1,290 @@ +# test revlog interaction about raw data (flagprocessor) + +from __future__ import absolute_import, print_function + +import sys + +from mercurial import ( + encoding, + node, + revlog, + transaction, + vfs, +) + +# TESTTMP is optional. This makes it convenient to run without run-tests.py +tvfs = vfs.vfs(encoding.environ.get('TESTTMP', b'/tmp')) + +# Enable generaldelta otherwise revlog won't use delta as expected by the test +tvfs.options = {'generaldelta': True, 'revlogv1': True, 'revlogv1': True} + +# The test wants to control whether to use delta explicitly, based on +# "storedeltachains". +revlog.revlog._isgooddelta = lambda self, d, textlen: self.storedeltachains + +def abort(msg): + print('abort: %s' % msg) + # Return 0 so run-tests.py could compare the output. + sys.exit() + +# Register a revlog processor for flag EXTSTORED. +# +# It simply prepends a fixed header, and replaces '1' to 'i'. So it has +# insertion and replacement, and may be interesting to test revlog's line-based +# deltas. +_extheader = b'E\n' + +def readprocessor(self, rawtext): + # True: the returned text could be used to verify hash + text = rawtext[len(_extheader):].replace(b'i', b'1') + return text, True + +def writeprocessor(self, text): + # False: the returned rawtext shouldn't be used to verify hash + rawtext = _extheader + text.replace(b'1', b'i') + return rawtext, False + +def rawprocessor(self, rawtext): + # False: do not verify hash. Only the content returned by "readprocessor" + # can be used to verify hash. + return False + +revlog.addflagprocessor(revlog.REVIDX_EXTSTORED, + (readprocessor, writeprocessor, rawprocessor)) + +# Utilities about reading and appending revlog + +def newtransaction(): + # A transaction is required to write revlogs + report = lambda msg: None + return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal') + +def newrevlog(name=b'_testrevlog.i', recreate=False): + if recreate: + tvfs.tryunlink(name) + rlog = revlog.revlog(tvfs, name) + return rlog + +def appendrev(rlog, text, tr, isext=False, isdelta=True): + '''Append a revision. If isext is True, set the EXTSTORED flag so flag + processor will be used (and rawtext is different from text). If isdelta is + True, force the revision to be a delta, otherwise it's full text. + ''' + nextrev = len(rlog) + p1 = rlog.node(nextrev - 1) + p2 = node.nullid + if isext: + flags = revlog.REVIDX_EXTSTORED + else: + flags = revlog.REVIDX_DEFAULT_FLAGS + # Change storedeltachains temporarily, to override revlog's delta decision + rlog.storedeltachains = isdelta + try: + rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags) + return nextrev + except Exception as ex: + abort('rev %d: failed to append: %s' % (nextrev, ex)) + finally: + # Restore storedeltachains. It is always True, see revlog.__init__ + rlog.storedeltachains = True + +def addgroupcopy(rlog, tr, destname=b'_destrevlog.i', optimaldelta=True): + '''Copy revlog to destname using revlog.addgroup. Return the copied revlog. + + This emulates push or pull. They use changegroup. Changegroup requires + repo to work. We don't have a repo, so a dummy changegroup is used. + + If optimaldelta is True, use optimized delta parent, so the destination + revlog could probably reuse it. Otherwise it builds sub-optimal delta, and + the destination revlog needs more work to use it. + + This exercises some revlog.addgroup (and revlog._addrevision(text=None)) + code path, which is not covered by "appendrev" alone. + ''' + class dummychangegroup(object): + @staticmethod + def deltachunk(pnode): + pnode = pnode or node.nullid + parentrev = rlog.rev(pnode) + r = parentrev + 1 + if r >= len(rlog): + return {} + if optimaldelta: + deltaparent = parentrev + else: + # suboptimal deltaparent + deltaparent = min(0, parentrev) + return {'node': rlog.node(r), 'p1': pnode, 'p2': node.nullid, + 'cs': rlog.node(rlog.linkrev(r)), 'flags': rlog.flags(r), + 'deltabase': rlog.node(deltaparent), + 'delta': rlog.revdiff(deltaparent, r)} + + def linkmap(lnode): + return rlog.rev(lnode) + + dlog = newrevlog(destname, recreate=True) + dlog.addgroup(dummychangegroup(), linkmap, tr) + return dlog + +def lowlevelcopy(rlog, tr, destname=b'_destrevlog.i'): + '''Like addgroupcopy, but use the low level revlog._addrevision directly. + + It exercises some code paths that are hard to reach easily otherwise. + ''' + dlog = newrevlog(destname, recreate=True) + for r in rlog: + p1 = rlog.node(r - 1) + p2 = node.nullid + if r == 0: + text = rlog.revision(r, raw=True) + cachedelta = None + else: + # deltaparent is more interesting if it has the EXTSTORED flag. + deltaparent = max([0] + [p for p in range(r - 2) if rlog.flags(p)]) + text = None + cachedelta = (deltaparent, rlog.revdiff(deltaparent, r)) + flags = rlog.flags(r) + ifh = dlog.opener(dlog.indexfile, 'a+') + dfh = None + if not dlog._inline: + dfh = dlog.opener(dlog.datafile, 'a+') + dlog._addrevision(rlog.node(r), text, tr, r, p1, p2, flags, cachedelta, + ifh, dfh) + return dlog + +# Utilities to generate revisions for testing + +def genbits(n): + '''Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n). + i.e. the generated numbers have a width of n bits. + + The combination of two adjacent numbers will cover all possible cases. + That is to say, given any x, y where both x, and y are in range(2 ** n), + there is an x followed immediately by y in the generated sequence. + ''' + m = 2 ** n + + # Gray Code. See https://en.wikipedia.org/wiki/Gray_code + gray = lambda x: x ^ (x >> 1) + + # Generate (n * 2) bit gray code, yield lower n bits as X, and look for + # the next unused gray code where higher n bits equal to X. + + # For gray codes whose higher bits are X, a[X] of them have been used. + a = [0] * m + + # Iterate from 0. + x = 0 + yield x + for i in range(m * m): + y = gray(a[x] + x * m) & (m - 1) + a[x] += 1 + x = y + yield x + +def gentext(rev): + '''Given a revision number, generate dummy text''' + return b''.join(b'%d\n' % j for j in range(-1, rev % 5)) + +def writecases(rlog, tr): + '''Write some revisions interested to the test. + + The test is interested in 3 properties of a revision: + + - Is it a delta or a full text? (isdelta) + This is to catch some delta application issues. + - Does it have a flag of EXTSTORED? (isext) + This is to catch some flag processor issues. Especially when + interacted with revlog deltas. + - Is its text empty? (isempty) + This is less important. It is intended to try to catch some careless + checks like "if text" instead of "if text is None". Note: if flag + processor is involved, raw text may be not empty. + + Write 65 revisions. So that all combinations of the above flags for + adjacent revisions are covered. That is to say, + + len(set( + (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty) + for r in range(len(rlog) - 1) + )) is 64. + + Where "r.delta", "r.ext", and "r.empty" are booleans matching properties + mentioned above. + + Return expected [(text, rawtext)]. + ''' + result = [] + for i, x in enumerate(genbits(3)): + isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4) + if isempty: + text = b'' + else: + text = gentext(i) + rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta) + + # Verify text, rawtext, and rawsize + if isext: + rawtext = writeprocessor(None, text)[0] + else: + rawtext = text + if rlog.rawsize(rev) != len(rawtext): + abort('rev %d: wrong rawsize' % rev) + if rlog.revision(rev, raw=False) != text: + abort('rev %d: wrong text' % rev) + if rlog.revision(rev, raw=True) != rawtext: + abort('rev %d: wrong rawtext' % rev) + result.append((text, rawtext)) + + # Verify flags like isdelta, isext work as expected + if bool(rlog.deltaparent(rev) > -1) != isdelta: + abort('rev %d: isdelta is ineffective' % rev) + if bool(rlog.flags(rev)) != isext: + abort('rev %d: isext is ineffective' % rev) + return result + +# Main test and checking + +def checkrevlog(rlog, expected): + '''Check if revlog has expected contents. expected is [(text, rawtext)]''' + # Test using different access orders. This could expose some issues + # depending on revlog caching (see revlog._cache). + for r0 in range(len(rlog) - 1): + r1 = r0 + 1 + for revorder in [[r0, r1], [r1, r0]]: + for raworder in [[True], [False], [True, False], [False, True]]: + nlog = newrevlog() + for rev in revorder: + for raw in raworder: + t = nlog.revision(rev, raw=raw) + if t != expected[rev][int(raw)]: + abort('rev %d: corrupted %stext' + % (rev, raw and 'raw' or '')) + +def maintest(): + expected = rl = None + with newtransaction() as tr: + rl = newrevlog(recreate=True) + expected = writecases(rl, tr) + checkrevlog(rl, expected) + print('local test passed') + # Copy via revlog.addgroup + rl1 = addgroupcopy(rl, tr) + checkrevlog(rl1, expected) + rl2 = addgroupcopy(rl, tr, optimaldelta=False) + checkrevlog(rl2, expected) + print('addgroupcopy test passed') + # Copy via revlog.clone + rl3 = newrevlog(name='_destrevlog3.i', recreate=True) + rl.clone(tr, rl3) + checkrevlog(rl3, expected) + print('clone test passed') + # Copy via low-level revlog._addrevision + rl4 = lowlevelcopy(rl, tr) + checkrevlog(rl4, expected) + print('lowlevelcopy test passed') + +try: + maintest() +except Exception as ex: + abort('crashed: %s' % ex) diff -r aff7b32b3c05 -r 985de02b5b9d tests/test-revlog-raw.py.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test-revlog-raw.py.out Thu Mar 30 20:48:57 2017 -0700 @@ -0,0 +1,1 @@ +abort: rev 5: wrong text