changeset 49217:13dfad0f9f7a

branching: merge stable into default
author Raphaël Gomès <rgomes@octobus.net>
date Wed, 25 May 2022 13:28:24 +0200
parents d3d3495a5749 (current diff) 9ea9445c3223 (diff)
children 5b1495c37b0c
files contrib/check-py3-compat.py doc/check-seclevel.py mercurial/worker.py rust/hg-core/src/repo.rs rust/hg-core/src/revlog/revlog.rs tests/run-tests.py tests/test-dirstate.t
diffstat 11 files changed, 189 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.gitlab/merge_request_templates/Default.md	Wed May 25 13:28:24 2022 +0200
@@ -0,0 +1,18 @@
+---
+name: Official Review
+about: Submit a series for review
+---
+
+/assign_reviewer @mercurial.review
+
+Welcome to the Mercurial Merge Request creation process:
+
+* Set a simple title for your MR,
+* All important information should be contained in your changesets' content or description,
+* You can add some workflow-relevant information here (eg: when this depends on another MR),
+* If your changes are not ready for review yet, click `Start the title with Draft:` under the title.
+
+More details here:
+
+* https://www.mercurial-scm.org/wiki/ContributingChanges
+* https://www.mercurial-scm.org/wiki/Heptapod
--- a/contrib/check-py3-compat.py	Tue May 24 19:09:24 2022 +0400
+++ b/contrib/check-py3-compat.py	Wed May 25 13:28:24 2022 +0200
@@ -76,7 +76,7 @@
     # specified. When running as test-check-py3-compat.t, we technically
     # would import the correct paths, but it's cleaner to have both cases
     # use the same import logic.
-    sys.path.insert(0, '.')
+    sys.path.insert(0, os.getcwd())
 
     for f in sys.argv[1:]:
         with warnings.catch_warnings(record=True) as warns:
--- a/doc/check-seclevel.py	Tue May 24 19:09:24 2022 +0400
+++ b/doc/check-seclevel.py	Wed May 25 13:28:24 2022 +0200
@@ -9,7 +9,7 @@
 
 # import from the live mercurial repo
 os.environ['HGMODULEPOLICY'] = 'py'
-sys.path.insert(0, "..")
+sys.path.insert(0, os.path.abspath(".."))
 from mercurial import demandimport
 
 demandimport.enable()
--- a/mercurial/worker.py	Tue May 24 19:09:24 2022 +0400
+++ b/mercurial/worker.py	Wed May 25 13:28:24 2022 +0200
@@ -68,6 +68,52 @@
     return threading.current_thread() == threading.main_thread()
 
 
+class _blockingreader(object):
+    def __init__(self, wrapped):
+        self._wrapped = wrapped
+
+    # Do NOT implement readinto() by making it delegate to
+    # _wrapped.readinto(), since that is unbuffered. The unpickler is fine
+    # with just read() and readline(), so we don't need to implement it.
+
+    if (3, 8, 0) <= sys.version_info[:3] < (3, 8, 2):
+
+        # This is required for python 3.8, prior to 3.8.2.  See issue6444.
+        def readinto(self, b):
+            pos = 0
+            size = len(b)
+
+            while pos < size:
+                ret = self._wrapped.readinto(b[pos:])
+                if not ret:
+                    break
+                pos += ret
+
+            return pos
+
+    def readline(self):
+        return self._wrapped.readline()
+
+    # issue multiple reads until size is fulfilled
+    def read(self, size=-1):
+        if size < 0:
+            return self._wrapped.readall()
+
+        buf = bytearray(size)
+        view = memoryview(buf)
+        pos = 0
+
+        while pos < size:
+            ret = self._wrapped.readinto(view[pos:])
+            if not ret:
+                break
+            pos += ret
+
+        del view
+        del buf[pos:]
+        return bytes(buf)
+
+
 class _blockingreader:
     def __init__(self, wrapped):
         self._wrapped = wrapped
--- a/rust/hg-core/src/repo.rs	Tue May 24 19:09:24 2022 +0400
+++ b/rust/hg-core/src/repo.rs	Wed May 25 13:28:24 2022 +0200
@@ -456,29 +456,38 @@
             let data_filename = format!("dirstate.{}", uuid);
             let data_filename = self.hg_vfs().join(data_filename);
             let mut options = std::fs::OpenOptions::new();
-            if append {
-                options.append(true);
-            } else {
-                options.write(true).create_new(true);
+            options.write(true);
+
+            // Why are we not using the O_APPEND flag when appending?
+            //
+            // - O_APPEND makes it trickier to deal with garbage at the end of
+            //   the file, left by a previous uncommitted transaction. By
+            //   starting the write at [old_data_size] we make sure we erase
+            //   all such garbage.
+            //
+            // - O_APPEND requires to special-case 0-byte writes, whereas we
+            //   don't need that.
+            //
+            // - Some OSes have bugs in implementation O_APPEND:
+            //   revlog.py talks about a Solaris bug, but we also saw some ZFS
+            //   bug: https://github.com/openzfs/zfs/pull/3124,
+            //   https://github.com/openzfs/zfs/issues/13370
+            //
+            if !append {
+                options.create_new(true);
             }
+
             let data_size = (|| {
                 // TODO: loop and try another random ID if !append and this
                 // returns `ErrorKind::AlreadyExists`? Collision chance of two
                 // random IDs is one in 2**32
                 let mut file = options.open(&data_filename)?;
-                if data.is_empty() {
-                    // If we're not appending anything, the data size is the
-                    // same as in the previous docket. It is *not* the file
-                    // length, since it could have garbage at the end.
-                    // We don't have to worry about it when we do have data
-                    // to append since we rewrite the root node in this case.
-                    Ok(old_data_size as u64)
-                } else {
-                    file.write_all(&data)?;
-                    file.flush()?;
-                    // TODO: use https://doc.rust-lang.org/std/io/trait.Seek.html#method.stream_position when we require Rust 1.51+
-                    file.seek(SeekFrom::Current(0))
+                if append {
+                    file.seek(SeekFrom::Start(old_data_size as u64))?;
                 }
+                file.write_all(&data)?;
+                file.flush()?;
+                file.seek(SeekFrom::Current(0))
             })()
             .when_writing_file(&data_filename)?;
 
--- a/rust/hg-core/src/revlog/revlog.rs	Tue May 24 19:09:24 2022 +0400
+++ b/rust/hg-core/src/revlog/revlog.rs	Wed May 25 13:28:24 2022 +0200
@@ -32,6 +32,8 @@
     | REVISION_FLAG_EXTSTORED
     | REVISION_FLAG_HASCOPIESINFO;
 
+const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
+
 #[derive(Debug, derive_more::From)]
 pub enum RevlogError {
     InvalidRevision,
@@ -265,11 +267,29 @@
         }
     }
 
+    pub fn make_null_entry(&self) -> RevlogEntry {
+        RevlogEntry {
+            revlog: self,
+            rev: NULL_REVISION,
+            bytes: b"",
+            compressed_len: 0,
+            uncompressed_len: 0,
+            base_rev_or_base_of_delta_chain: None,
+            p1: NULL_REVISION,
+            p2: NULL_REVISION,
+            flags: NULL_REVLOG_ENTRY_FLAGS,
+            hash: NULL_NODE,
+        }
+    }
+
     /// Get an entry of the revlog.
     pub fn get_entry(
         &self,
         rev: Revision,
     ) -> Result<RevlogEntry, RevlogError> {
+        if rev == NULL_REVISION {
+            return Ok(self.make_null_entry());
+        }
         let index_entry = self
             .index
             .get_entry(rev)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/bundles/test-revlog-diff-relative-to-nullrev.sh	Wed May 25 13:28:24 2022 +0200
@@ -0,0 +1,23 @@
+#!/bin/bash
+#
+# Make sure to patch mercurial to create the delta against nullrev
+#          if deltainfo is None:
+#-             deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
+#+             deltainfo = self._builddeltainfo(revinfo, nullrev, fh)
+
+cd "`dirname \"$0\"`"
+export HGRCPATH=
+export HGMODULEPOLICY=py
+
+rm -rf nullrev-diff
+../../hg init nullrev-diff  --config format.revlog-compression=zlib
+cd nullrev-diff
+echo hi > a
+../../../hg commit -Am root-B
+../../../hg debugdeltachain a
+rm -rf .hg/cache/ .hg/wcache/
+cd ..
+
+tar cf test-revlog-diff-relative-to-nullrev.tar nullrev-diff
+
+rm -rf nullrev-diff
Binary file tests/bundles/test-revlog-diff-relative-to-nullrev.tar has changed
--- a/tests/run-tests.py	Tue May 24 19:09:24 2022 +0400
+++ b/tests/run-tests.py	Wed May 25 13:28:24 2022 +0200
@@ -2505,29 +2505,33 @@
         done = queue.Queue()
         running = 0
 
+        channels_lock = threading.Lock()
         channels = [""] * self._jobs
 
         def job(test, result):
-            for n, v in enumerate(channels):
-                if not v:
-                    channel = n
-                    break
-            else:
-                raise ValueError('Could not find output channel')
-            channels[channel] = "=" + test.name[5:].split(".")[0]
+            with channels_lock:
+                for n, v in enumerate(channels):
+                    if not v:
+                        channel = n
+                        break
+                else:
+                    raise ValueError('Could not find output channel')
+                channels[channel] = "=" + test.name[5:].split(".")[0]
+
+            r = None
             try:
                 test(result)
-                done.put(None)
             except KeyboardInterrupt:
                 pass
             except:  # re-raises
-                done.put(('!', test, 'run-test raised an error, see traceback'))
+                r = ('!', test, 'run-test raised an error, see traceback')
                 raise
             finally:
                 try:
                     channels[channel] = ''
                 except IndexError:
                     pass
+                done.put(r)
 
         def stat():
             count = 0
--- a/tests/test-dirstate.t	Tue May 24 19:09:24 2022 +0400
+++ b/tests/test-dirstate.t	Wed May 25 13:28:24 2022 +0200
@@ -131,6 +131,10 @@
   >   hg debugstate --docket | grep uuid | sed 's/.*uuid: \(.*\)/\1/'
   > }
 
+  $ find_dirstate_data_size () {
+  >   hg debugstate --docket | grep 'size of dirstate data' | sed 's/.*size of dirstate data: \(.*\)/\1/'
+  > }
+
   $ dirstate_uuid_has_not_changed () {
   >   # Non-Rust always rewrites the whole dirstate
   >   if [ $# -eq 1 ] || ([ -n "$HGMODULEPOLICY" ] && [ -z "${HGMODULEPOLICY##*rust*}" ]) || [ -n "$RHG_INSTALLED_AS_HG" ]; then
@@ -161,13 +165,19 @@
 
 Trigger an append with a small change
 
-  $ echo "modified" > dir2/f
+  $ current_data_size=$(find_dirstate_data_size)
+  $ rm dir2/f
   $ hg st
-  M dir2/f
+  ! dir2/f
   $ dirstate_data_files | wc -l
    *1 (re)
   $ dirstate_uuid_has_not_changed
   not testing because using Python implementation (no-rust no-rhg !)
+  $ new_data_size=$(find_dirstate_data_size)
+  $ [ "$current_data_size" -eq "$new_data_size" ]; echo $?
+  0 (no-rust no-rhg !)
+  1 (rust !)
+  1 (no-rust rhg !)
 
 Unused bytes counter is non-0 when appending
   $ touch file
@@ -176,8 +186,8 @@
 
 Trigger a rust/rhg run which updates the unused bytes value
   $ hg st
-  M dir2/f
   A file
+  ! dir2/f
   $ dirstate_data_files | wc -l
    *1 (re)
   $ dirstate_uuid_has_not_changed
--- a/tests/test-revlog.t	Tue May 24 19:09:24 2022 +0400
+++ b/tests/test-revlog.t	Wed May 25 13:28:24 2022 +0200
@@ -32,6 +32,7 @@
 
 Test for CVE-2016-3630
 
+  $ mkdir test2; cd test2
   $ hg init
 
   >>> import codecs
@@ -52,3 +53,31 @@
   >>> rl = revlog.revlog(tvfs, target=(KIND_OTHER, b'test'), radix=b'a')
   >>> rl.revision(1)
   mpatchError(*'patch cannot be decoded'*) (glob)
+
+  $ cd ..
+
+
+Regression test for support for the old repos with strange diff encoding.
+Apparently it used to be possible (maybe it's still possible, but we don't know how)
+to create commits whose diffs are encoded relative to a nullrev.
+This test checks that a repo with that encoding can still be read.
+
+This is what we did to produce the repo in test-revlog-diff-relative-to-nullrev.tar:
+
+- tweak the code in mercurial/revlogutils/deltas.py to produce such "trivial" deltas:
+>          if deltainfo is None:
+> -            deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
+> +            deltainfo = self._builddeltainfo(revinfo, nullrev, fh)
+- hg init
+- echo hi > a
+- hg commit -Am_
+- remove some cache files
+
+  $ tar --force-local -xf "$TESTDIR"/bundles/test-revlog-diff-relative-to-nullrev.tar
+  $ cd nullrev-diff
+  $ hg debugdeltachain a
+      rev  chain# chainlen     prev   delta       size    rawsize  chainsize     ratio   lindist extradist extraratio   readsize largestblk rddensity srchunks
+        0       1        2       -1      p1         15          3         15   5.00000        15         0    0.00000         15         15   1.00000        1
+  $ hg cat --config rhg.cat=true -r 0 a
+  hi
+  $ cd ..