# HG changeset patch # User Pierre-Yves David # Date 1685378518 -7200 # Node ID 5460424092e2436c25b733def777404bdc3d2b25 # Parent 9caa860dcbeca5f5263ba259faa02586c11fe58e stream-clone: smoothly detect and handle a case were a revlog is split This detect and handle the most common case for a race condition around stream and revlog splitting. The one were the revlog is split between the initial collection of data and the time were we start considering stream that data. In such case, we repatch an inlined version of that revlog together when this happens. This is necessary as stream-v2 promised a specific number of bytes and a specific number of files to the client. In stream-v3, we will have the opportunity to just send a split revlog instead. Getting a better version of the protocol for stream-v3 is still useful, but it is no longer a blocket to fix that race condition. Note that another, rarer race condition exist, were the revlog is split while we creating the revlog and extracing content from it. This can be dealt with later. diff -r 9caa860dcbec -r 5460424092e2 mercurial/revlog.py --- a/mercurial/revlog.py Mon May 29 14:07:58 2023 +0200 +++ b/mercurial/revlog.py Mon May 29 18:41:58 2023 +0200 @@ -506,7 +506,7 @@ except FileNotFoundError: return b'' - def get_streams(self, max_linkrev): + def get_streams(self, max_linkrev, force_inline=False): n = len(self) index = self.index while n > 0: @@ -547,6 +547,29 @@ return [ (self._indexfile, inline_stream, index_size + data_size), ] + elif force_inline: + + def get_stream(): + with self._datafp() as fp_d: + yield None + + for rev in range(n): + idx = self.index.entry_binary(rev) + if rev == 0 and self._docket is None: + # re-inject the inline flag + header = self._format_flags + header |= self._format_version + header |= FLAG_INLINE_DATA + header = self.index.pack_header(header) + idx = header + idx + yield idx + yield self._getsegmentforrevs(rev, rev, df=fp_d)[1] + + inline_stream = get_stream() + next(inline_stream) + return [ + (self._indexfile, inline_stream, index_size + data_size), + ] else: def get_index_stream(): diff -r 9caa860dcbec -r 5460424092e2 mercurial/store.py --- a/mercurial/store.py Mon May 29 14:07:58 2023 +0200 +++ b/mercurial/store.py Mon May 29 18:41:58 2023 +0200 @@ -645,8 +645,11 @@ if name_to_ext[f.unencoded_path] not in (b'.d', b'.i') ] + is_inline = b'.d' not in self._details + rl = self.get_revlog_instance(repo).get_revlog() - rl_stream = rl.get_streams(max_changeset) + rl_stream = rl.get_streams(max_changeset, force_inline=is_inline) + for name, s, size in rl_stream: if name_to_size.get(name, 0) != size: msg = _(b"expected %d bytes but %d provided for %s") diff -r 9caa860dcbec -r 5460424092e2 tests/test-clone-stream-revlog-split.t --- a/tests/test-clone-stream-revlog-split.t Mon May 29 14:07:58 2023 +0200 +++ b/tests/test-clone-stream-revlog-split.t Mon May 29 18:41:58 2023 +0200 @@ -17,6 +17,8 @@ > [format] > # skip compression to make it easy to trigger a split > revlog-compression=none + > [phases] + > publish=no > EOF $ hg init server @@ -64,7 +66,11 @@ Start a client doing a streaming clone - $ (hg clone -q --stream -U http://localhost:$HGPORT1 clone-while-split > client.log 2>&1; touch "$HG_TEST_STREAM_WALKED_FILE_3") & + $ ( \ + > hg clone --debug --stream -U http://localhost:$HGPORT1 \ + > clone-while-split > client.log 2>&1; \ + > touch "$HG_TEST_STREAM_WALKED_FILE_3" \ + > ) & Wait for the server to be done collecting data @@ -86,65 +92,84 @@ Check everything is fine $ cat client.log - remote: abort: unexpected error: expected 0 bytes but 1067 provided for data/some-file.d (known-bad-output !) - abort: pull failed on remote (known-bad-output !) + using http://localhost:$HGPORT1/ + sending capabilities command + query 1; heads + sending batch command + streaming all changes + sending getbundle command + bundle2-input-bundle: with-transaction + bundle2-input-part: "stream2" (params: 3 mandatory) supported (stream-bundle2-v2 !) + bundle2-input-part: "stream3-exp" (params: 3 mandatory) supported (stream-bundle2-v3 !) + applying stream bundle + 7 files to transfer, 2.11 KB of data + adding [s] data/some-file.i (1.23 KB) + adding [s] phaseroots (43 bytes) + adding [s] 00manifest.i (348 bytes) + adding [s] 00changelog.i (381 bytes) + adding [c] branch2-served (94 bytes) + adding [c] rbc-names-v1 (7 bytes) + adding [c] rbc-revs-v1 (24 bytes) + updating the branch cache + transferred 2.11 KB in * seconds (* */sec) (glob) + bundle2-input-part: total payload size 2268 + bundle2-input-part: "listkeys" (params: 1 mandatory) supported + bundle2-input-bundle: 2 parts total + checking for updated bookmarks + updating the branch cache + (sent 3 HTTP requests and * bytes; received * bytes in responses) (glob) $ tail -2 errors.log - mercurial.error.Abort: expected 0 bytes but 1067 provided for data/some-file.d (known-bad-output !) - (known-bad-output !) $ hg -R clone-while-split verify - checking changesets (missing-correct-output !) - checking manifests (missing-correct-output !) - crosschecking files in changesets and manifests (missing-correct-output !) - checking files (missing-correct-output !) - checking dirstate (missing-correct-output !) - checked 3 changesets with 3 changes to 1 files (missing-correct-output !) - abort: repository clone-while-split not found (known-bad-output !) - [255] + checking changesets + checking manifests + crosschecking files in changesets and manifests + checking files + checking dirstate + checked 3 changesets with 3 changes to 1 files $ hg -R clone-while-split tip - changeset: 2:dbd9854c38a6 (missing-correct-output !) - tag: tip (missing-correct-output !) - user: test (missing-correct-output !) - date: Thu Jan 01 00:00:00 1970 +0000 (missing-correct-output !) - summary: c (missing-correct-output !) - (missing-correct-output !) - abort: repository clone-while-split not found (known-bad-output !) - [255] + changeset: 2:dbd9854c38a6 + tag: tip + user: test + date: Thu Jan 01 00:00:00 1970 +0000 + summary: c + $ hg -R clone-while-split debug-revlog-index some-file - rev linkrev nodeid p1-nodeid p2-nodeid (missing-correct-output !) - 0 0 ed70cecbc103 000000000000 000000000000 (missing-correct-output !) - 1 1 7241018db64c ed70cecbc103 000000000000 (missing-correct-output !) - 2 2 fa1120531cc1 7241018db64c 000000000000 (missing-correct-output !) - abort: repository clone-while-split not found (known-bad-output !) - [255] + rev linkrev nodeid p1-nodeid p2-nodeid + 0 0 ed70cecbc103 000000000000 000000000000 + 1 1 7241018db64c ed70cecbc103 000000000000 + 2 2 fa1120531cc1 7241018db64c 000000000000 + $ hg -R server phase --rev 'all()' + 0: draft + 1: draft + 2: draft + 3: draft + $ hg -R clone-while-split phase --rev 'all()' + 0: draft + 1: draft + 2: draft subsequent pull work $ hg -R clone-while-split pull - pulling from http://localhost:$HGPORT1/ (missing-correct-output !) - searching for changes (missing-correct-output !) - adding changesets (missing-correct-output !) - adding manifests (missing-correct-output !) - adding file changes (missing-correct-output !) - added 1 changesets with 1 changes to 1 files (missing-correct-output !) - new changesets df05c6cb1406 (missing-correct-output !) - (run 'hg update' to get a working copy) (missing-correct-output !) - abort: repository clone-while-split not found (known-bad-output !) - [255] + pulling from http://localhost:$HGPORT1/ + searching for changes + adding changesets + adding manifests + adding file changes + added 1 changesets with 1 changes to 1 files + new changesets df05c6cb1406 (1 drafts) + (run 'hg update' to get a working copy) $ hg -R clone-while-split debug-revlog-index some-file - rev linkrev nodeid p1-nodeid p2-nodeid (missing-correct-output !) - 0 0 ed70cecbc103 000000000000 000000000000 (missing-correct-output !) - 1 1 7241018db64c ed70cecbc103 000000000000 (missing-correct-output !) - 2 2 fa1120531cc1 7241018db64c 000000000000 (missing-correct-output !) - 3 3 a631378adaa3 fa1120531cc1 000000000000 (missing-correct-output !) - abort: repository clone-while-split not found (known-bad-output !) - [255] + rev linkrev nodeid p1-nodeid p2-nodeid + 0 0 ed70cecbc103 000000000000 000000000000 + 1 1 7241018db64c ed70cecbc103 000000000000 + 2 2 fa1120531cc1 7241018db64c 000000000000 + 3 3 a631378adaa3 fa1120531cc1 000000000000 $ hg -R clone-while-split verify - checking changesets (missing-correct-output !) - checking manifests (missing-correct-output !) - crosschecking files in changesets and manifests (missing-correct-output !) - checking files (missing-correct-output !) - checking dirstate (missing-correct-output !) - checked 4 changesets with 4 changes to 1 files (missing-correct-output !) - abort: repository clone-while-split not found (known-bad-output !) - [255] + checking changesets + checking manifests + crosschecking files in changesets and manifests + checking files + checking dirstate + checked 4 changesets with 4 changes to 1 files