# HG changeset patch # User Gregory Szorc # Date 1516651949 28800 # Node ID a84dbc87dae970396afcbe816f93a05ce28b8759 # Parent 8144f1b07e2184d68cd937e39ab008e492b6d8af exchange: send bundle2 stream clones uncompressed Stream clones don't compress well. And compression undermines a point of stream clones which is to trade significant CPU reductions by increasing size. Building upon our introduction of metadata to communicate bundle information back to callers of exchange.getbundlechunks(), we add an attribute to the bundler that communicates whether the bundle is best left uncompressed. We return this attribute as part of the bundle metadata. And the wire protocol honors it when determining whether to compress the wire protocol response. The added test demonstrates that the raw result from the wire protocol is not compressed. It also demonstrates that the server will serve stream responses when the feature isn't enabled. We'll address that in another commit. The effect of this change is that server-side CPU usage for bundle2 stream clones is significantly reduced by removing zstd compression. For the mozilla-unified repository: before: 37.69 user 8.01 system after: 27.38 user 7.34 system Assuming things are CPU bound, that ~10s reduction would translate to faster clones on the client. zstd can decompress at >1 GB/s. So the overhead from decompression on the client is small in the grand scheme of things. But if zlib compression were being used, the overhead would be much greater. Differential Revision: https://phab.mercurial-scm.org/D1926 diff -r 8144f1b07e21 -r a84dbc87dae9 mercurial/bundle2.py --- a/mercurial/bundle2.py Mon Jan 22 12:38:04 2018 -0800 +++ b/mercurial/bundle2.py Mon Jan 22 12:12:29 2018 -0800 @@ -599,6 +599,10 @@ self.capabilities = dict(capabilities) self._compengine = util.compengines.forbundletype('UN') self._compopts = None + # If compression is being handled by a consumer of the raw + # data (e.g. the wire protocol), unsetting this flag tells + # consumers that the bundle is best left uncompressed. + self.prefercompressed = True def setcompression(self, alg, compopts=None): """setup core part compression to """ diff -r 8144f1b07e21 -r a84dbc87dae9 mercurial/exchange.py --- a/mercurial/exchange.py Mon Jan 22 12:38:04 2018 -0800 +++ b/mercurial/exchange.py Mon Jan 22 12:12:29 2018 -0800 @@ -1763,6 +1763,8 @@ func(bundler, repo, source, bundlecaps=bundlecaps, b2caps=b2caps, **pycompat.strkwargs(kwargs)) + info['prefercompressed'] = bundler.prefercompressed + return info, bundler.getchunks() @getbundle2partsgenerator('stream') @@ -1770,6 +1772,12 @@ b2caps=None, heads=None, common=None, **kwargs): if not kwargs.get('stream', False): return + + # Stream clones don't compress well. And compression undermines a + # goal of stream clones, which is to be fast. Communicate the desire + # to avoid compression to consumers of the bundle. + bundler.prefercompressed = False + filecount, bytecount, it = streamclone.generatev2(repo) requirements = ' '.join(sorted(repo.requirements)) part = bundler.newpart('stream', data=it) diff -r 8144f1b07e21 -r a84dbc87dae9 mercurial/wireproto.py --- a/mercurial/wireproto.py Mon Jan 22 12:38:04 2018 -0800 +++ b/mercurial/wireproto.py Mon Jan 22 12:12:29 2018 -0800 @@ -862,7 +862,7 @@ raise error.Abort(bundle2requiredmain, hint=bundle2requiredhint) - preferuncompressed = False + prefercompressed = True try: if repo.ui.configbool('server', 'disablefullbundle'): @@ -879,6 +879,7 @@ info, chunks = exchange.getbundlechunks(repo, 'serve', **pycompat.strkwargs(opts)) + prefercompressed = info.get('prefercompressed', True) except error.Abort as exc: # cleanly forward Abort error to the client if not exchange.bundle2requested(opts.get('bundlecaps')): @@ -894,9 +895,9 @@ bundler.addpart(bundle2.bundlepart('error:abort', manargs, advargs)) chunks = bundler.getchunks() - preferuncompressed = True + prefercompressed = False - return streamres(gen=chunks, prefer_uncompressed=preferuncompressed) + return streamres(gen=chunks, prefer_uncompressed=not prefercompressed) @wireprotocommand('heads') def heads(repo, proto): diff -r 8144f1b07e21 -r a84dbc87dae9 tests/test-clone-uncompressed.t --- a/tests/test-clone-uncompressed.t Mon Jan 22 12:38:04 2018 -0800 +++ b/tests/test-clone-uncompressed.t Mon Jan 22 12:12:29 2018 -0800 @@ -47,6 +47,32 @@ rbc-revs-v1 #endif +getbundle requests with stream=1 are uncompressed + + $ get-with-headers.py $LOCALIP:$HGPORT '?cmd=getbundle' content-type --bodyfile body --hgproto '0.1 0.2 comp=zlib,none' --requestheader "x-hgarg-1=bundlecaps=HG20%2Cbundle2%3DHG20%250Abookmarks%250Achangegroup%253D01%252C02%250Adigests%253Dmd5%252Csha1%252Csha512%250Aerror%253Dabort%252Cunsupportedcontent%252Cpushraced%252Cpushkey%250Ahgtagsfnodes%250Alistkeys%250Aphases%253Dheads%250Apushkey%250Aremote-changegroup%253Dhttp%252Chttps&cg=0&common=0000000000000000000000000000000000000000&heads=c17445101a72edac06facd130d14808dfbd5c7c2&stream=1" + 200 Script output follows + content-type: application/mercurial-0.2 + + + $ f --size --hex --bytes 256 body + body: size=112328 + 0000: 04 6e 6f 6e 65 48 47 32 30 00 00 00 00 00 00 00 |.noneHG20.......| + 0010: 72 06 53 54 52 45 41 4d 00 00 00 00 04 00 09 05 |r.STREAM........| + 0020: 09 04 0c 2d 07 02 62 79 74 65 63 6f 75 6e 74 39 |...-..bytecount9| + 0030: 38 37 35 38 66 69 6c 65 63 6f 75 6e 74 31 30 33 |8758filecount103| + 0040: 30 72 65 71 75 69 72 65 6d 65 6e 74 73 64 6f 74 |0requirementsdot| + 0050: 65 6e 63 6f 64 65 20 66 6e 63 61 63 68 65 20 67 |encode fncache g| + 0060: 65 6e 65 72 61 6c 64 65 6c 74 61 20 72 65 76 6c |eneraldelta revl| + 0070: 6f 67 76 31 20 73 74 6f 72 65 76 65 72 73 69 6f |ogv1 storeversio| + 0080: 6e 76 32 00 00 10 00 73 08 42 64 61 74 61 2f 30 |nv2....s.Bdata/0| + 0090: 2e 69 00 03 00 01 00 00 00 00 00 00 00 02 00 00 |.i..............| + 00a0: 00 01 00 00 00 00 00 00 00 01 ff ff ff ff ff ff |................| + 00b0: ff ff 80 29 63 a0 49 d3 23 87 bf ce fe 56 67 92 |...)c.I.#....Vg.| + 00c0: 67 2c 69 d1 ec 39 00 00 00 00 00 00 00 00 00 00 |g,i..9..........| + 00d0: 00 00 75 30 73 08 42 64 61 74 61 2f 31 2e 69 00 |..u0s.Bdata/1.i.| + 00e0: 03 00 01 00 00 00 00 00 00 00 02 00 00 00 01 00 |................| + 00f0: 00 00 00 00 00 00 01 ff ff ff ff ff ff ff ff f9 |................| + --uncompressed is an alias to --stream #if stream-legacy