--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.gitlab/merge_request_templates/Default.md Wed May 25 13:28:24 2022 +0200
@@ -0,0 +1,18 @@
+---
+name: Official Review
+about: Submit a series for review
+---
+
+/assign_reviewer @mercurial.review
+
+Welcome to the Mercurial Merge Request creation process:
+
+* Set a simple title for your MR,
+* All important information should be contained in your changesets' content or description,
+* You can add some workflow-relevant information here (eg: when this depends on another MR),
+* If your changes are not ready for review yet, click `Start the title with Draft:` under the title.
+
+More details here:
+
+* https://www.mercurial-scm.org/wiki/ContributingChanges
+* https://www.mercurial-scm.org/wiki/Heptapod
--- a/contrib/check-py3-compat.py Tue May 24 19:09:24 2022 +0400
+++ b/contrib/check-py3-compat.py Wed May 25 13:28:24 2022 +0200
@@ -76,7 +76,7 @@
# specified. When running as test-check-py3-compat.t, we technically
# would import the correct paths, but it's cleaner to have both cases
# use the same import logic.
- sys.path.insert(0, '.')
+ sys.path.insert(0, os.getcwd())
for f in sys.argv[1:]:
with warnings.catch_warnings(record=True) as warns:
--- a/doc/check-seclevel.py Tue May 24 19:09:24 2022 +0400
+++ b/doc/check-seclevel.py Wed May 25 13:28:24 2022 +0200
@@ -9,7 +9,7 @@
# import from the live mercurial repo
os.environ['HGMODULEPOLICY'] = 'py'
-sys.path.insert(0, "..")
+sys.path.insert(0, os.path.abspath(".."))
from mercurial import demandimport
demandimport.enable()
--- a/mercurial/worker.py Tue May 24 19:09:24 2022 +0400
+++ b/mercurial/worker.py Wed May 25 13:28:24 2022 +0200
@@ -68,6 +68,52 @@
return threading.current_thread() == threading.main_thread()
+class _blockingreader(object):
+ def __init__(self, wrapped):
+ self._wrapped = wrapped
+
+ # Do NOT implement readinto() by making it delegate to
+ # _wrapped.readinto(), since that is unbuffered. The unpickler is fine
+ # with just read() and readline(), so we don't need to implement it.
+
+ if (3, 8, 0) <= sys.version_info[:3] < (3, 8, 2):
+
+ # This is required for python 3.8, prior to 3.8.2. See issue6444.
+ def readinto(self, b):
+ pos = 0
+ size = len(b)
+
+ while pos < size:
+ ret = self._wrapped.readinto(b[pos:])
+ if not ret:
+ break
+ pos += ret
+
+ return pos
+
+ def readline(self):
+ return self._wrapped.readline()
+
+ # issue multiple reads until size is fulfilled
+ def read(self, size=-1):
+ if size < 0:
+ return self._wrapped.readall()
+
+ buf = bytearray(size)
+ view = memoryview(buf)
+ pos = 0
+
+ while pos < size:
+ ret = self._wrapped.readinto(view[pos:])
+ if not ret:
+ break
+ pos += ret
+
+ del view
+ del buf[pos:]
+ return bytes(buf)
+
+
class _blockingreader:
def __init__(self, wrapped):
self._wrapped = wrapped
--- a/rust/hg-core/src/repo.rs Tue May 24 19:09:24 2022 +0400
+++ b/rust/hg-core/src/repo.rs Wed May 25 13:28:24 2022 +0200
@@ -456,29 +456,38 @@
let data_filename = format!("dirstate.{}", uuid);
let data_filename = self.hg_vfs().join(data_filename);
let mut options = std::fs::OpenOptions::new();
- if append {
- options.append(true);
- } else {
- options.write(true).create_new(true);
+ options.write(true);
+
+ // Why are we not using the O_APPEND flag when appending?
+ //
+ // - O_APPEND makes it trickier to deal with garbage at the end of
+ // the file, left by a previous uncommitted transaction. By
+ // starting the write at [old_data_size] we make sure we erase
+ // all such garbage.
+ //
+ // - O_APPEND requires to special-case 0-byte writes, whereas we
+ // don't need that.
+ //
+ // - Some OSes have bugs in implementation O_APPEND:
+ // revlog.py talks about a Solaris bug, but we also saw some ZFS
+ // bug: https://github.com/openzfs/zfs/pull/3124,
+ // https://github.com/openzfs/zfs/issues/13370
+ //
+ if !append {
+ options.create_new(true);
}
+
let data_size = (|| {
// TODO: loop and try another random ID if !append and this
// returns `ErrorKind::AlreadyExists`? Collision chance of two
// random IDs is one in 2**32
let mut file = options.open(&data_filename)?;
- if data.is_empty() {
- // If we're not appending anything, the data size is the
- // same as in the previous docket. It is *not* the file
- // length, since it could have garbage at the end.
- // We don't have to worry about it when we do have data
- // to append since we rewrite the root node in this case.
- Ok(old_data_size as u64)
- } else {
- file.write_all(&data)?;
- file.flush()?;
- // TODO: use https://doc.rust-lang.org/std/io/trait.Seek.html#method.stream_position when we require Rust 1.51+
- file.seek(SeekFrom::Current(0))
+ if append {
+ file.seek(SeekFrom::Start(old_data_size as u64))?;
}
+ file.write_all(&data)?;
+ file.flush()?;
+ file.seek(SeekFrom::Current(0))
})()
.when_writing_file(&data_filename)?;
--- a/rust/hg-core/src/revlog/revlog.rs Tue May 24 19:09:24 2022 +0400
+++ b/rust/hg-core/src/revlog/revlog.rs Wed May 25 13:28:24 2022 +0200
@@ -32,6 +32,8 @@
| REVISION_FLAG_EXTSTORED
| REVISION_FLAG_HASCOPIESINFO;
+const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
+
#[derive(Debug, derive_more::From)]
pub enum RevlogError {
InvalidRevision,
@@ -265,11 +267,29 @@
}
}
+ pub fn make_null_entry(&self) -> RevlogEntry {
+ RevlogEntry {
+ revlog: self,
+ rev: NULL_REVISION,
+ bytes: b"",
+ compressed_len: 0,
+ uncompressed_len: 0,
+ base_rev_or_base_of_delta_chain: None,
+ p1: NULL_REVISION,
+ p2: NULL_REVISION,
+ flags: NULL_REVLOG_ENTRY_FLAGS,
+ hash: NULL_NODE,
+ }
+ }
+
/// Get an entry of the revlog.
pub fn get_entry(
&self,
rev: Revision,
) -> Result<RevlogEntry, RevlogError> {
+ if rev == NULL_REVISION {
+ return Ok(self.make_null_entry());
+ }
let index_entry = self
.index
.get_entry(rev)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/bundles/test-revlog-diff-relative-to-nullrev.sh Wed May 25 13:28:24 2022 +0200
@@ -0,0 +1,23 @@
+#!/bin/bash
+#
+# Make sure to patch mercurial to create the delta against nullrev
+# if deltainfo is None:
+#- deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
+#+ deltainfo = self._builddeltainfo(revinfo, nullrev, fh)
+
+cd "`dirname \"$0\"`"
+export HGRCPATH=
+export HGMODULEPOLICY=py
+
+rm -rf nullrev-diff
+../../hg init nullrev-diff --config format.revlog-compression=zlib
+cd nullrev-diff
+echo hi > a
+../../../hg commit -Am root-B
+../../../hg debugdeltachain a
+rm -rf .hg/cache/ .hg/wcache/
+cd ..
+
+tar cf test-revlog-diff-relative-to-nullrev.tar nullrev-diff
+
+rm -rf nullrev-diff
Binary file tests/bundles/test-revlog-diff-relative-to-nullrev.tar has changed
--- a/tests/run-tests.py Tue May 24 19:09:24 2022 +0400
+++ b/tests/run-tests.py Wed May 25 13:28:24 2022 +0200
@@ -2505,29 +2505,33 @@
done = queue.Queue()
running = 0
+ channels_lock = threading.Lock()
channels = [""] * self._jobs
def job(test, result):
- for n, v in enumerate(channels):
- if not v:
- channel = n
- break
- else:
- raise ValueError('Could not find output channel')
- channels[channel] = "=" + test.name[5:].split(".")[0]
+ with channels_lock:
+ for n, v in enumerate(channels):
+ if not v:
+ channel = n
+ break
+ else:
+ raise ValueError('Could not find output channel')
+ channels[channel] = "=" + test.name[5:].split(".")[0]
+
+ r = None
try:
test(result)
- done.put(None)
except KeyboardInterrupt:
pass
except: # re-raises
- done.put(('!', test, 'run-test raised an error, see traceback'))
+ r = ('!', test, 'run-test raised an error, see traceback')
raise
finally:
try:
channels[channel] = ''
except IndexError:
pass
+ done.put(r)
def stat():
count = 0
--- a/tests/test-dirstate.t Tue May 24 19:09:24 2022 +0400
+++ b/tests/test-dirstate.t Wed May 25 13:28:24 2022 +0200
@@ -131,6 +131,10 @@
> hg debugstate --docket | grep uuid | sed 's/.*uuid: \(.*\)/\1/'
> }
+ $ find_dirstate_data_size () {
+ > hg debugstate --docket | grep 'size of dirstate data' | sed 's/.*size of dirstate data: \(.*\)/\1/'
+ > }
+
$ dirstate_uuid_has_not_changed () {
> # Non-Rust always rewrites the whole dirstate
> if [ $# -eq 1 ] || ([ -n "$HGMODULEPOLICY" ] && [ -z "${HGMODULEPOLICY##*rust*}" ]) || [ -n "$RHG_INSTALLED_AS_HG" ]; then
@@ -161,13 +165,19 @@
Trigger an append with a small change
- $ echo "modified" > dir2/f
+ $ current_data_size=$(find_dirstate_data_size)
+ $ rm dir2/f
$ hg st
- M dir2/f
+ ! dir2/f
$ dirstate_data_files | wc -l
*1 (re)
$ dirstate_uuid_has_not_changed
not testing because using Python implementation (no-rust no-rhg !)
+ $ new_data_size=$(find_dirstate_data_size)
+ $ [ "$current_data_size" -eq "$new_data_size" ]; echo $?
+ 0 (no-rust no-rhg !)
+ 1 (rust !)
+ 1 (no-rust rhg !)
Unused bytes counter is non-0 when appending
$ touch file
@@ -176,8 +186,8 @@
Trigger a rust/rhg run which updates the unused bytes value
$ hg st
- M dir2/f
A file
+ ! dir2/f
$ dirstate_data_files | wc -l
*1 (re)
$ dirstate_uuid_has_not_changed
--- a/tests/test-revlog.t Tue May 24 19:09:24 2022 +0400
+++ b/tests/test-revlog.t Wed May 25 13:28:24 2022 +0200
@@ -32,6 +32,7 @@
Test for CVE-2016-3630
+ $ mkdir test2; cd test2
$ hg init
>>> import codecs
@@ -52,3 +53,31 @@
>>> rl = revlog.revlog(tvfs, target=(KIND_OTHER, b'test'), radix=b'a')
>>> rl.revision(1)
mpatchError(*'patch cannot be decoded'*) (glob)
+
+ $ cd ..
+
+
+Regression test for support for the old repos with strange diff encoding.
+Apparently it used to be possible (maybe it's still possible, but we don't know how)
+to create commits whose diffs are encoded relative to a nullrev.
+This test checks that a repo with that encoding can still be read.
+
+This is what we did to produce the repo in test-revlog-diff-relative-to-nullrev.tar:
+
+- tweak the code in mercurial/revlogutils/deltas.py to produce such "trivial" deltas:
+> if deltainfo is None:
+> - deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
+> + deltainfo = self._builddeltainfo(revinfo, nullrev, fh)
+- hg init
+- echo hi > a
+- hg commit -Am_
+- remove some cache files
+
+ $ tar --force-local -xf "$TESTDIR"/bundles/test-revlog-diff-relative-to-nullrev.tar
+ $ cd nullrev-diff
+ $ hg debugdeltachain a
+ rev chain# chainlen prev delta size rawsize chainsize ratio lindist extradist extraratio readsize largestblk rddensity srchunks
+ 0 1 2 -1 p1 15 3 15 5.00000 15 0 0.00000 15 15 1.00000 1
+ $ hg cat --config rhg.cat=true -r 0 a
+ hi
+ $ cd ..