Mercurial > hg
view tests/test-wireproto-clientreactor.py @ 44363:f7459da77f23
nodemap: introduce an option to use mmap to read the nodemap mapping
The performance and memory benefit is much greater if we don't have to copy all
the data in memory for each information. So we introduce an option (on by
default) to read the data using mmap.
This changeset is the last one definition the API for index support nodemap
data. (they have to be able to use the mmaping).
Below are some benchmark comparing the best we currently have in 5.3 with the
final step of this series (using the persistent nodemap implementation in
Rust). The benchmark run `hg perfindex` with various revset and the following
variants:
Before:
* do not use the persistent nodemap
* use the CPython implementation of the index for nodemap
* use mmapping of the changelog index
After:
* use the MixedIndex Rust code, with the NodeTree object for nodemap access
(still in review)
* use the persistent nodemap data from disk
* access the persistent nodemap data through mmap
* use mmapping of the changelog index
The persistent nodemap greatly speed up most operation on very large
repositories. Some of the previously very fast lookup end up a bit slower because
the persistent nodemap has to be setup. However the absolute slowdown is very
small and won't matters in the big picture.
Here are some numbers (in seconds) for the reference copy of mozilla-try:
Revset Before After abs-change speedup
-10000: 0.004622 0.005532 0.000910 × 0.83
-10: 0.000050 0.000132 0.000082 × 0.37
tip 0.000052 0.000085 0.000033 × 0.61
0 + (-10000:) 0.028222 0.005337 -0.022885 × 5.29
0 0.023521 0.000084 -0.023437 × 280.01
(-10000:) + 0 0.235539 0.005308 -0.230231 × 44.37
(-10:) + :9 0.232883 0.000180 -0.232703 ×1293.79
(-10000:) + (:99) 0.238735 0.005358 -0.233377 × 44.55
:99 + (-10000:) 0.317942 0.005593 -0.312349 × 56.84
:9 + (-10:) 0.313372 0.000179 -0.313193 ×1750.68
:9 0.316450 0.000143 -0.316307 ×2212.93
On smaller repositories, the cost of nodemap related operation is not as big, so
the win is much more modest. Yet it helps shaving a handful of millisecond here
and there.
Here are some numbers (in seconds) for the reference copy of mercurial:
Revset Before After abs-change speedup
-10: 0.000065 0.000097 0.000032 × 0.67
tip 0.000063 0.000078 0.000015 × 0.80
0 0.000561 0.000079 -0.000482 × 7.10
-10000: 0.004609 0.003648 -0.000961 × 1.26
0 + (-10000:) 0.005023 0.003715 -0.001307 × 1.35
(-10:) + :9 0.002187 0.000108 -0.002079 ×20.25
(-10000:) + 0 0.006252 0.003716 -0.002536 × 1.68
(-10000:) + (:99) 0.006367 0.003707 -0.002660 × 1.71
:9 + (-10:) 0.003846 0.000110 -0.003736 ×34.96
:9 0.003854 0.000099 -0.003755 ×38.92
:99 + (-10000:) 0.007644 0.003778 -0.003866 × 2.02
Differential Revision: https://phab.mercurial-scm.org/D7894
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Tue, 11 Feb 2020 11:18:52 +0100 |
parents | 2372284d9457 |
children | 89a2afe31e82 |
line wrap: on
line source
from __future__ import absolute_import import sys import unittest import zlib from mercurial import ( error, ui as uimod, wireprotoframing as framing, ) from mercurial.utils import cborutil try: from mercurial import zstd zstd.__version__ except ImportError: zstd = None ffs = framing.makeframefromhumanstring globalui = uimod.ui() def sendframe(reactor, frame): """Send a frame bytearray to a reactor.""" header = framing.parseheader(frame) payload = frame[framing.FRAME_HEADER_SIZE :] assert len(payload) == header.length return reactor.onframerecv( framing.frame( header.requestid, header.streamid, header.streamflags, header.typeid, header.flags, payload, ) ) class SingleSendTests(unittest.TestCase): """A reactor that can only send once rejects subsequent sends.""" if not getattr(unittest.TestCase, 'assertRaisesRegex', False): # Python 3.7 deprecates the regex*p* version, but 2.7 lacks # the regex version. assertRaisesRegex = ( # camelcase-required unittest.TestCase.assertRaisesRegexp ) def testbasic(self): reactor = framing.clientreactor( globalui, hasmultiplesend=False, buffersends=True ) request, action, meta = reactor.callcommand(b'foo', {}) self.assertEqual(request.state, b'pending') self.assertEqual(action, b'noop') action, meta = reactor.flushcommands() self.assertEqual(action, b'sendframes') for frame in meta[b'framegen']: self.assertEqual(request.state, b'sending') self.assertEqual(request.state, b'sent') with self.assertRaisesRegex( error.ProgrammingError, 'cannot issue new commands' ): reactor.callcommand(b'foo', {}) with self.assertRaisesRegex( error.ProgrammingError, 'cannot issue new commands' ): reactor.callcommand(b'foo', {}) class NoBufferTests(unittest.TestCase): """A reactor without send buffering sends requests immediately.""" def testbasic(self): reactor = framing.clientreactor( globalui, hasmultiplesend=True, buffersends=False ) request, action, meta = reactor.callcommand(b'command1', {}) self.assertEqual(request.requestid, 1) self.assertEqual(action, b'sendframes') self.assertEqual(request.state, b'pending') for frame in meta[b'framegen']: self.assertEqual(request.state, b'sending') self.assertEqual(request.state, b'sent') action, meta = reactor.flushcommands() self.assertEqual(action, b'noop') # And we can send another command. request, action, meta = reactor.callcommand(b'command2', {}) self.assertEqual(request.requestid, 3) self.assertEqual(action, b'sendframes') for frame in meta[b'framegen']: self.assertEqual(request.state, b'sending') self.assertEqual(request.state, b'sent') class BadFrameRecvTests(unittest.TestCase): if not getattr(unittest.TestCase, 'assertRaisesRegex', False): # Python 3.7 deprecates the regex*p* version, but 2.7 lacks # the regex version. assertRaisesRegex = ( # camelcase-required unittest.TestCase.assertRaisesRegexp ) def testoddstream(self): reactor = framing.clientreactor(globalui) action, meta = sendframe(reactor, ffs(b'1 1 0 1 0 foo')) self.assertEqual(action, b'error') self.assertEqual( meta[b'message'], b'received frame with odd numbered stream ID: 1' ) def testunknownstream(self): reactor = framing.clientreactor(globalui) action, meta = sendframe(reactor, ffs(b'1 0 0 1 0 foo')) self.assertEqual(action, b'error') self.assertEqual( meta[b'message'], b'received frame on unknown stream without beginning ' b'of stream flag set', ) def testunhandledframetype(self): reactor = framing.clientreactor(globalui, buffersends=False) request, action, meta = reactor.callcommand(b'foo', {}) for frame in meta[b'framegen']: pass with self.assertRaisesRegex( error.ProgrammingError, 'unhandled frame type' ): sendframe(reactor, ffs(b'1 0 stream-begin text-output 0 foo')) class StreamTests(unittest.TestCase): def testmultipleresponseframes(self): reactor = framing.clientreactor(globalui, buffersends=False) request, action, meta = reactor.callcommand(b'foo', {}) self.assertEqual(action, b'sendframes') for f in meta[b'framegen']: pass action, meta = sendframe( reactor, ffs( b'%d 0 stream-begin command-response 0 foo' % request.requestid ), ) self.assertEqual(action, b'responsedata') action, meta = sendframe( reactor, ffs(b'%d 0 0 command-response eos bar' % request.requestid) ) self.assertEqual(action, b'responsedata') class RedirectTests(unittest.TestCase): def testredirect(self): reactor = framing.clientreactor(globalui, buffersends=False) redirect = { b'targets': [b'a', b'b'], b'hashes': [b'sha256'], } request, action, meta = reactor.callcommand( b'foo', {}, redirect=redirect ) self.assertEqual(action, b'sendframes') frames = list(meta[b'framegen']) self.assertEqual(len(frames), 1) self.assertEqual( frames[0], ffs( b'1 1 stream-begin command-request new ' b"cbor:{b'name': b'foo', " b"b'redirect': {b'targets': [b'a', b'b'], " b"b'hashes': [b'sha256']}}" ), ) class StreamSettingsTests(unittest.TestCase): def testnoflags(self): reactor = framing.clientreactor(globalui, buffersends=False) request, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass action, meta = sendframe( reactor, ffs(b'1 2 stream-begin stream-settings 0 ') ) self.assertEqual(action, b'error') self.assertEqual( meta, { b'message': b'stream encoding settings frame must have ' b'continuation or end of stream flag set', }, ) def testconflictflags(self): reactor = framing.clientreactor(globalui, buffersends=False) request, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass action, meta = sendframe( reactor, ffs(b'1 2 stream-begin stream-settings continuation|eos ') ) self.assertEqual(action, b'error') self.assertEqual( meta, { b'message': b'stream encoding settings frame cannot have both ' b'continuation and end of stream flags set', }, ) def testemptypayload(self): reactor = framing.clientreactor(globalui, buffersends=False) request, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass action, meta = sendframe( reactor, ffs(b'1 2 stream-begin stream-settings eos ') ) self.assertEqual(action, b'error') self.assertEqual( meta, { b'message': b'stream encoding settings frame did not contain ' b'CBOR data' }, ) def testbadcbor(self): reactor = framing.clientreactor(globalui, buffersends=False) request, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass action, meta = sendframe( reactor, ffs(b'1 2 stream-begin stream-settings eos badvalue') ) self.assertEqual(action, b'error') def testsingleobject(self): reactor = framing.clientreactor(globalui, buffersends=False) request, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass action, meta = sendframe( reactor, ffs(b'1 2 stream-begin stream-settings eos cbor:b"identity"'), ) self.assertEqual(action, b'noop') self.assertEqual(meta, {}) def testmultipleobjects(self): reactor = framing.clientreactor(globalui, buffersends=False) request, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass data = b''.join( [ b''.join(cborutil.streamencode(b'identity')), b''.join(cborutil.streamencode({b'foo', b'bar'})), ] ) action, meta = sendframe( reactor, ffs(b'1 2 stream-begin stream-settings eos %s' % data) ) self.assertEqual(action, b'error') self.assertEqual( meta, { b'message': b'error setting stream decoder: identity decoder ' b'received unexpected additional values', }, ) def testmultipleframes(self): reactor = framing.clientreactor(globalui, buffersends=False) request, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass data = b''.join(cborutil.streamencode(b'identity')) action, meta = sendframe( reactor, ffs( b'1 2 stream-begin stream-settings continuation %s' % data[0:3] ), ) self.assertEqual(action, b'noop') self.assertEqual(meta, {}) action, meta = sendframe( reactor, ffs(b'1 2 0 stream-settings eos %s' % data[3:]) ) self.assertEqual(action, b'noop') self.assertEqual(meta, {}) def testinvalidencoder(self): reactor = framing.clientreactor(globalui, buffersends=False) request, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass action, meta = sendframe( reactor, ffs(b'1 2 stream-begin stream-settings eos cbor:b"badvalue"'), ) self.assertEqual(action, b'error') self.assertEqual( meta, { b'message': b'error setting stream decoder: unknown stream ' b'decoder: badvalue', }, ) def testzlibencoding(self): reactor = framing.clientreactor(globalui, buffersends=False) request, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass action, meta = sendframe( reactor, ffs( b'%d 2 stream-begin stream-settings eos cbor:b"zlib"' % request.requestid ), ) self.assertEqual(action, b'noop') self.assertEqual(meta, {}) result = { b'status': b'ok', } encoded = b''.join(cborutil.streamencode(result)) compressed = zlib.compress(encoded) self.assertEqual(zlib.decompress(compressed), encoded) action, meta = sendframe( reactor, ffs( b'%d 2 encoded command-response eos %s' % (request.requestid, compressed) ), ) self.assertEqual(action, b'responsedata') self.assertEqual(meta[b'data'], encoded) def testzlibencodingsinglebyteframes(self): reactor = framing.clientreactor(globalui, buffersends=False) request, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass action, meta = sendframe( reactor, ffs( b'%d 2 stream-begin stream-settings eos cbor:b"zlib"' % request.requestid ), ) self.assertEqual(action, b'noop') self.assertEqual(meta, {}) result = { b'status': b'ok', } encoded = b''.join(cborutil.streamencode(result)) compressed = zlib.compress(encoded) self.assertEqual(zlib.decompress(compressed), encoded) chunks = [] for i in range(len(compressed)): char = compressed[i : i + 1] if char == b'\\': char = b'\\\\' action, meta = sendframe( reactor, ffs( b'%d 2 encoded command-response continuation %s' % (request.requestid, char) ), ) self.assertEqual(action, b'responsedata') chunks.append(meta[b'data']) self.assertTrue(meta[b'expectmore']) self.assertFalse(meta[b'eos']) # zlib will have the full data decoded at this point, even though # we haven't flushed. self.assertEqual(b''.join(chunks), encoded) # End the stream for good measure. action, meta = sendframe( reactor, ffs(b'%d 2 stream-end command-response eos ' % request.requestid), ) self.assertEqual(action, b'responsedata') self.assertEqual(meta[b'data'], b'') self.assertFalse(meta[b'expectmore']) self.assertTrue(meta[b'eos']) def testzlibmultipleresponses(self): # We feed in zlib compressed data on the same stream but belonging to # 2 different requests. This tests our flushing behavior. reactor = framing.clientreactor( globalui, buffersends=False, hasmultiplesend=True ) request1, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass request2, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass outstream = framing.outputstream(2) outstream.setencoder(globalui, b'zlib') response1 = b''.join( cborutil.streamencode( {b'status': b'ok', b'extra': b'response1' * 10,} ) ) response2 = b''.join( cborutil.streamencode( {b'status': b'error', b'extra': b'response2' * 10,} ) ) action, meta = sendframe( reactor, ffs( b'%d 2 stream-begin stream-settings eos cbor:b"zlib"' % request1.requestid ), ) self.assertEqual(action, b'noop') self.assertEqual(meta, {}) # Feeding partial data in won't get anything useful out. action, meta = sendframe( reactor, ffs( b'%d 2 encoded command-response continuation %s' % (request1.requestid, outstream.encode(response1)) ), ) self.assertEqual(action, b'responsedata') self.assertEqual(meta[b'data'], b'') # But flushing data at both ends will get our original data. action, meta = sendframe( reactor, ffs( b'%d 2 encoded command-response eos %s' % (request1.requestid, outstream.flush()) ), ) self.assertEqual(action, b'responsedata') self.assertEqual(meta[b'data'], response1) # We should be able to reuse the compressor/decompressor for the # 2nd response. action, meta = sendframe( reactor, ffs( b'%d 2 encoded command-response continuation %s' % (request2.requestid, outstream.encode(response2)) ), ) self.assertEqual(action, b'responsedata') self.assertEqual(meta[b'data'], b'') action, meta = sendframe( reactor, ffs( b'%d 2 encoded command-response eos %s' % (request2.requestid, outstream.flush()) ), ) self.assertEqual(action, b'responsedata') self.assertEqual(meta[b'data'], response2) @unittest.skipUnless(zstd, 'zstd not available') def testzstd8mbencoding(self): reactor = framing.clientreactor(globalui, buffersends=False) request, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass action, meta = sendframe( reactor, ffs( b'%d 2 stream-begin stream-settings eos cbor:b"zstd-8mb"' % request.requestid ), ) self.assertEqual(action, b'noop') self.assertEqual(meta, {}) result = { b'status': b'ok', } encoded = b''.join(cborutil.streamencode(result)) encoder = framing.zstd8mbencoder(globalui) compressed = encoder.encode(encoded) + encoder.finish() self.assertEqual( zstd.ZstdDecompressor().decompress( compressed, max_output_size=len(encoded) ), encoded, ) action, meta = sendframe( reactor, ffs( b'%d 2 encoded command-response eos %s' % (request.requestid, compressed) ), ) self.assertEqual(action, b'responsedata') self.assertEqual(meta[b'data'], encoded) @unittest.skipUnless(zstd, 'zstd not available') def testzstd8mbencodingsinglebyteframes(self): reactor = framing.clientreactor(globalui, buffersends=False) request, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass action, meta = sendframe( reactor, ffs( b'%d 2 stream-begin stream-settings eos cbor:b"zstd-8mb"' % request.requestid ), ) self.assertEqual(action, b'noop') self.assertEqual(meta, {}) result = { b'status': b'ok', } encoded = b''.join(cborutil.streamencode(result)) compressed = zstd.ZstdCompressor().compress(encoded) self.assertEqual( zstd.ZstdDecompressor().decompress(compressed), encoded ) chunks = [] for i in range(len(compressed)): char = compressed[i : i + 1] if char == b'\\': char = b'\\\\' action, meta = sendframe( reactor, ffs( b'%d 2 encoded command-response continuation %s' % (request.requestid, char) ), ) self.assertEqual(action, b'responsedata') chunks.append(meta[b'data']) self.assertTrue(meta[b'expectmore']) self.assertFalse(meta[b'eos']) # zstd decompressor will flush at frame boundaries. self.assertEqual(b''.join(chunks), encoded) # End the stream for good measure. action, meta = sendframe( reactor, ffs(b'%d 2 stream-end command-response eos ' % request.requestid), ) self.assertEqual(action, b'responsedata') self.assertEqual(meta[b'data'], b'') self.assertFalse(meta[b'expectmore']) self.assertTrue(meta[b'eos']) @unittest.skipUnless(zstd, 'zstd not available') def testzstd8mbmultipleresponses(self): # We feed in zstd compressed data on the same stream but belonging to # 2 different requests. This tests our flushing behavior. reactor = framing.clientreactor( globalui, buffersends=False, hasmultiplesend=True ) request1, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass request2, action, meta = reactor.callcommand(b'foo', {}) for f in meta[b'framegen']: pass outstream = framing.outputstream(2) outstream.setencoder(globalui, b'zstd-8mb') response1 = b''.join( cborutil.streamencode( {b'status': b'ok', b'extra': b'response1' * 10,} ) ) response2 = b''.join( cborutil.streamencode( {b'status': b'error', b'extra': b'response2' * 10,} ) ) action, meta = sendframe( reactor, ffs( b'%d 2 stream-begin stream-settings eos cbor:b"zstd-8mb"' % request1.requestid ), ) self.assertEqual(action, b'noop') self.assertEqual(meta, {}) # Feeding partial data in won't get anything useful out. action, meta = sendframe( reactor, ffs( b'%d 2 encoded command-response continuation %s' % (request1.requestid, outstream.encode(response1)) ), ) self.assertEqual(action, b'responsedata') self.assertEqual(meta[b'data'], b'') # But flushing data at both ends will get our original data. action, meta = sendframe( reactor, ffs( b'%d 2 encoded command-response eos %s' % (request1.requestid, outstream.flush()) ), ) self.assertEqual(action, b'responsedata') self.assertEqual(meta[b'data'], response1) # We should be able to reuse the compressor/decompressor for the # 2nd response. action, meta = sendframe( reactor, ffs( b'%d 2 encoded command-response continuation %s' % (request2.requestid, outstream.encode(response2)) ), ) self.assertEqual(action, b'responsedata') self.assertEqual(meta[b'data'], b'') action, meta = sendframe( reactor, ffs( b'%d 2 encoded command-response eos %s' % (request2.requestid, outstream.flush()) ), ) self.assertEqual(action, b'responsedata') self.assertEqual(meta[b'data'], response2) if __name__ == '__main__': if (3, 6, 0) <= sys.version_info < (3, 6, 4): # Python 3.6.0 through 3.6.3 inclusive shipped with # https://bugs.python.org/issue31825 and we can't run these # tests on those specific versions of Python. Sigh. sys.exit(80) import silenttestrunner silenttestrunner.main(__name__)