contrib: add a partial-merge tool for sorted lists (such as Python imports)
This is a pretty naive tool that uses a regular expression for
matching lines. It is based on a Google-internal tool that worked in a
similar way.
For now, the regular expression is hard-coded to attempt to match
single-line Python imports. The only commit I've found in the hg core
repo where the tool helped was commit
9cd6292abfdf. I think that's
because we often use multiple imports per import statement. I think
this tool is still a decent first step (especially once the regex is
made configurable in the next patch). The merging should ideally use a
proper Python parser and do the merge at the AST (or CST?) level, but
that's significantly harder, especially if you want to preserve
comments and whitespace. It's also less generic.
Differential Revision: https://phab.mercurial-scm.org/D12380
--- a/.hgignore Tue Apr 05 17:31:18 2022 +0200
+++ b/.hgignore Fri Mar 04 16:12:56 2022 -0800
@@ -35,6 +35,7 @@
contrib/chg/chg
contrib/hgsh/hgsh
contrib/vagrant/.vagrant
+contrib/merge-lists/target/
dist
packages
doc/common.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/merge-lists/Cargo.lock Fri Mar 04 16:12:56 2022 -0800
@@ -0,0 +1,560 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "aho-corasick"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "assert_cmd"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93ae1ddd39efd67689deb1979d80bad3bf7f2b09c6e6117c8d1f2443b5e2f83e"
+dependencies = [
+ "bstr",
+ "doc-comment",
+ "predicates",
+ "predicates-core",
+ "predicates-tree",
+ "wait-timeout",
+]
+
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bstr"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
+dependencies = [
+ "lazy_static",
+ "memchr",
+ "regex-automata",
+]
+
+[[package]]
+name = "clap"
+version = "3.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8c93436c21e4698bacadf42917db28b23017027a4deccb35dbe47a7e7840123"
+dependencies = [
+ "atty",
+ "bitflags",
+ "clap_derive",
+ "indexmap",
+ "lazy_static",
+ "os_str_bytes",
+ "strsim",
+ "termcolor",
+ "textwrap",
+]
+
+[[package]]
+name = "clap_derive"
+version = "3.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da95d038ede1a964ce99f49cbe27a7fb538d1da595e4b4f70b8c8f338d17bf16"
+dependencies = [
+ "heck",
+ "proc-macro-error",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "console"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31"
+dependencies = [
+ "encode_unicode",
+ "libc",
+ "once_cell",
+ "terminal_size",
+ "winapi",
+]
+
+[[package]]
+name = "difflib"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
+
+[[package]]
+name = "doc-comment"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
+
+[[package]]
+name = "either"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
+
+[[package]]
+name = "encode_unicode"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
+
+[[package]]
+name = "fuchsia-cprng"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
+
+[[package]]
+name = "hashbrown"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
+
+[[package]]
+name = "heck"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9"
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "indexmap"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282a6247722caba404c065016bbfa522806e51714c34f5dfc3e4a3a46fcb4223"
+dependencies = [
+ "autocfg",
+ "hashbrown",
+]
+
+[[package]]
+name = "insta"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30a7e1911532a662f6b08b68f884080850f2fd9544963c3ab23a5af42bda1eac"
+dependencies = [
+ "console",
+ "once_cell",
+ "serde",
+ "serde_json",
+ "serde_yaml",
+ "similar",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "libc"
+version = "0.2.119"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bf2e165bb3457c8e098ea76f3e3bc9db55f87aa90d52d0e6be741470916aaa4"
+
+[[package]]
+name = "linked-hash-map"
+version = "0.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3"
+
+[[package]]
+name = "memchr"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
+
+[[package]]
+name = "merge-lists"
+version = "0.1.0"
+dependencies = [
+ "assert_cmd",
+ "clap",
+ "insta",
+ "itertools",
+ "regex",
+ "similar",
+ "tempdir",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9"
+
+[[package]]
+name = "os_str_bytes"
+version = "6.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "predicates"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5aab5be6e4732b473071984b3164dbbfb7a3674d30ea5ff44410b6bcd960c3c"
+dependencies = [
+ "difflib",
+ "itertools",
+ "predicates-core",
+]
+
+[[package]]
+name = "predicates-core"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da1c2388b1513e1b605fcec39a95e0a9e8ef088f71443ef37099fa9ae6673fcb"
+
+[[package]]
+name = "predicates-tree"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4d86de6de25020a36c6d3643a86d9a6a9f552107c0559c60ea03551b5e16c032"
+dependencies = [
+ "predicates-core",
+ "termtree",
+]
+
+[[package]]
+name = "proc-macro-error"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
+dependencies = [
+ "proc-macro-error-attr",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro-error-attr"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029"
+dependencies = [
+ "unicode-xid",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "864d3e96a899863136fc6e99f3d7cae289dafe43bf2c5ac19b70df7210c0a145"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rand"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293"
+dependencies = [
+ "fuchsia-cprng",
+ "libc",
+ "rand_core 0.3.1",
+ "rdrand",
+ "winapi",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
+dependencies = [
+ "rand_core 0.4.2",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc"
+
+[[package]]
+name = "rdrand"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
+dependencies = [
+ "rand_core 0.3.1",
+]
+
+[[package]]
+name = "regex"
+version = "1.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.1.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
+
+[[package]]
+name = "remove_dir_all"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "ryu"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
+
+[[package]]
+name = "serde"
+version = "1.0.136"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.136"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.79"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95"
+dependencies = [
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "serde_yaml"
+version = "0.8.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4a521f2940385c165a24ee286aa8599633d162077a54bdcae2a6fd5a7bfa7a0"
+dependencies = [
+ "indexmap",
+ "ryu",
+ "serde",
+ "yaml-rust",
+]
+
+[[package]]
+name = "similar"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e24979f63a11545f5f2c60141afe249d4f19f84581ea2138065e400941d83d3"
+dependencies = [
+ "bstr",
+]
+
+[[package]]
+name = "strsim"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
+
+[[package]]
+name = "syn"
+version = "1.0.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e59d925cf59d8151f25a3bedf97c9c157597c9df7324d32d68991cc399ed08b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
+]
+
+[[package]]
+name = "tempdir"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8"
+dependencies = [
+ "rand",
+ "remove_dir_all",
+]
+
+[[package]]
+name = "termcolor"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "terminal_size"
+version = "0.1.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "termtree"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "507e9898683b6c43a9aa55b64259b721b52ba226e0f3779137e50ad114a4c90b"
+
+[[package]]
+name = "textwrap"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
+
+[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
+[[package]]
+name = "wait-timeout"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "yaml-rust"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
+dependencies = [
+ "linked-hash-map",
+]
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/merge-lists/Cargo.toml Fri Mar 04 16:12:56 2022 -0800
@@ -0,0 +1,21 @@
+# A tool that performs a 3-way merge, resolving conflicts in sorted lists and
+# leaving other conflicts unchanged. This is useful with Mercurial's support
+# for partial merge tools (configured in `[partial-merge-tools]`).
+
+[package]
+name = "merge-lists"
+version = "0.1.0"
+edition = "2021"
+# We need https://github.com/rust-lang/rust/pull/89825
+rust-version = "1.59"
+
+[dependencies]
+clap = { version = "3.1.6", features = ["derive"] }
+itertools = "0.10.3"
+regex = "1.5.5"
+similar = { version="2.1.0", features = ["bytes"] }
+
+[dev-dependencies]
+assert_cmd = "2.0.4"
+insta = "1.13.0"
+tempdir = "0.3.7"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/merge-lists/src/main.rs Fri Mar 04 16:12:56 2022 -0800
@@ -0,0 +1,280 @@
+use clap::Parser;
+use itertools::Itertools;
+use regex::bytes::Regex;
+use similar::ChangeTag;
+use std::cmp::{max, min, Ordering};
+use std::collections::HashSet;
+use std::ffi::OsString;
+use std::ops::Range;
+use std::path::PathBuf;
+
+fn find_unchanged_ranges(
+ old_bytes: &[u8],
+ new_bytes: &[u8],
+) -> Vec<(Range<usize>, Range<usize>)> {
+ let diff = similar::TextDiff::configure()
+ .algorithm(similar::Algorithm::Patience)
+ .diff_lines(old_bytes, new_bytes);
+ let mut new_unchanged_ranges = vec![];
+ let mut old_index = 0;
+ let mut new_index = 0;
+ for diff in diff.iter_all_changes() {
+ match diff.tag() {
+ ChangeTag::Equal => {
+ new_unchanged_ranges.push((
+ old_index..old_index + diff.value().len(),
+ new_index..new_index + diff.value().len(),
+ ));
+ old_index += diff.value().len();
+ new_index += diff.value().len();
+ }
+ ChangeTag::Delete => {
+ old_index += diff.value().len();
+ }
+ ChangeTag::Insert => {
+ new_index += diff.value().len();
+ }
+ }
+ }
+ new_unchanged_ranges
+}
+
+/// Returns a list of all the lines in the input (including trailing newlines),
+/// but only if they all match the regex and they are sorted.
+fn get_lines<'input>(
+ input: &'input [u8],
+ regex: &Regex,
+) -> Option<Vec<&'input [u8]>> {
+ let lines = input.split_inclusive(|x| *x == b'\n').collect_vec();
+ let mut previous_line = "".as_bytes();
+ for line in &lines {
+ if *line < previous_line {
+ return None;
+ }
+ if !regex.is_match(line) {
+ return None;
+ }
+ previous_line = line;
+ }
+ Some(lines)
+}
+
+fn resolve_conflict(
+ base_slice: &[u8],
+ local_slice: &[u8],
+ other_slice: &[u8],
+ regex: &Regex,
+) -> Option<Vec<u8>> {
+ let base_lines = get_lines(base_slice, regex)?;
+ let local_lines = get_lines(local_slice, regex)?;
+ let other_lines = get_lines(other_slice, regex)?;
+ let base_lines_set: HashSet<_> = base_lines.iter().copied().collect();
+ let local_lines_set: HashSet<_> = local_lines.iter().copied().collect();
+ let other_lines_set: HashSet<_> = other_lines.iter().copied().collect();
+ let mut result = local_lines_set;
+ for to_add in other_lines_set.difference(&base_lines_set) {
+ result.insert(to_add);
+ }
+ for to_remove in base_lines_set.difference(&other_lines_set) {
+ result.remove(to_remove);
+ }
+ Some(result.into_iter().sorted().collect_vec().concat())
+}
+
+fn resolve(
+ base_bytes: &[u8],
+ local_bytes: &[u8],
+ other_bytes: &[u8],
+ regex: &Regex,
+) -> (Vec<u8>, Vec<u8>, Vec<u8>) {
+ // Find unchanged ranges between the base and the two sides. We do that by
+ // initially considering the whole base unchanged. Then we compare each
+ // side with the base and intersect the unchanged ranges we find with
+ // what we had before.
+ let unchanged_ranges = vec![UnchangedRange {
+ base_range: 0..base_bytes.len(),
+ offsets: vec![],
+ }];
+ let unchanged_ranges = intersect_regions(
+ unchanged_ranges,
+ &find_unchanged_ranges(base_bytes, local_bytes),
+ );
+ let mut unchanged_ranges = intersect_regions(
+ unchanged_ranges,
+ &find_unchanged_ranges(base_bytes, other_bytes),
+ );
+ // Add an empty UnchangedRange at the end to make it easier to find change
+ // ranges. That way there's a changed range before each UnchangedRange.
+ unchanged_ranges.push(UnchangedRange {
+ base_range: base_bytes.len()..base_bytes.len(),
+ offsets: vec![
+ local_bytes.len().wrapping_sub(base_bytes.len()) as isize,
+ other_bytes.len().wrapping_sub(base_bytes.len()) as isize,
+ ],
+ });
+
+ let mut new_base_bytes: Vec<u8> = vec![];
+ let mut new_local_bytes: Vec<u8> = vec![];
+ let mut new_other_bytes: Vec<u8> = vec![];
+ let mut previous = UnchangedRange {
+ base_range: 0..0,
+ offsets: vec![0, 0],
+ };
+ for current in unchanged_ranges {
+ let base_slice =
+ &base_bytes[previous.base_range.end..current.base_range.start];
+ let local_slice = &local_bytes[previous.end(0)..current.start(0)];
+ let other_slice = &other_bytes[previous.end(1)..current.start(1)];
+ if let Some(resolution) =
+ resolve_conflict(base_slice, local_slice, other_slice, regex)
+ {
+ new_base_bytes.extend(&resolution);
+ new_local_bytes.extend(&resolution);
+ new_other_bytes.extend(&resolution);
+ } else {
+ new_base_bytes.extend(base_slice);
+ new_local_bytes.extend(local_slice);
+ new_other_bytes.extend(other_slice);
+ }
+ new_base_bytes.extend(&base_bytes[current.base_range.clone()]);
+ new_local_bytes.extend(&local_bytes[current.start(0)..current.end(0)]);
+ new_other_bytes.extend(&other_bytes[current.start(1)..current.end(1)]);
+ previous = current;
+ }
+
+ (new_base_bytes, new_local_bytes, new_other_bytes)
+}
+
+/// A tool that performs a 3-way merge, resolving conflicts in sorted lists and
+/// leaving other conflicts unchanged. This is useful with Mercurial's support
+/// for partial merge tools (configured in `[partial-merge-tools]`).
+#[derive(Parser, Debug)]
+#[clap(version, about, long_about = None)]
+struct Args {
+ /// Path to the file's content in the "local" side
+ local: OsString,
+
+ /// Path to the file's content in the base
+ base: OsString,
+
+ /// Path to the file's content in the "other" side
+ other: OsString,
+}
+
+fn main() {
+ let args: Args = Args::parse();
+
+ let base_path = PathBuf::from(&args.base);
+ let local_path = PathBuf::from(&args.local);
+ let other_path = PathBuf::from(&args.other);
+
+ let base_bytes = std::fs::read(&base_path).unwrap();
+ let local_bytes = std::fs::read(&local_path).unwrap();
+ let other_bytes = std::fs::read(&other_path).unwrap();
+
+ let regex =
+ regex::bytes::Regex::new(r"import \w+(\.\w+)*( +#.*)?\n|from (\w+(\.\w+)* import \w+( as \w+)?(, \w+( as \w+)?)*( +#.*)?)\r?\n?").unwrap();
+ let (new_base_bytes, new_local_bytes, new_other_bytes) =
+ resolve(&base_bytes, &local_bytes, &other_bytes, ®ex);
+
+ // Write out the result if anything changed
+ if new_base_bytes != base_bytes {
+ std::fs::write(&base_path, new_base_bytes).unwrap();
+ }
+ if new_local_bytes != local_bytes {
+ std::fs::write(&local_path, new_local_bytes).unwrap();
+ }
+ if new_other_bytes != other_bytes {
+ std::fs::write(&other_path, new_other_bytes).unwrap();
+ }
+}
+
+fn checked_add(base: usize, offset: isize) -> usize {
+ if offset < 0 {
+ base.checked_sub(offset.checked_abs().unwrap() as usize)
+ .unwrap()
+ } else {
+ base.checked_add(offset as usize).unwrap()
+ }
+}
+
+// The remainder of the file is copied from
+// https://github.com/martinvonz/jj/blob/main/lib/src/diff.rs
+
+#[derive(Clone, PartialEq, Eq, Debug)]
+struct UnchangedRange {
+ base_range: Range<usize>,
+ offsets: Vec<isize>,
+}
+
+impl UnchangedRange {
+ fn start(&self, side: usize) -> usize {
+ checked_add(self.base_range.start, self.offsets[side])
+ }
+
+ fn end(&self, side: usize) -> usize {
+ checked_add(self.base_range.end, self.offsets[side])
+ }
+}
+
+impl PartialOrd for UnchangedRange {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for UnchangedRange {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.base_range
+ .start
+ .cmp(&other.base_range.start)
+ .then_with(|| self.base_range.end.cmp(&other.base_range.end))
+ }
+}
+
+/// Takes the current regions and intersects it with the new unchanged ranges
+/// from a 2-way diff. The result is a map of unchanged regions with one more
+/// offset in the map's values.
+fn intersect_regions(
+ current_ranges: Vec<UnchangedRange>,
+ new_unchanged_ranges: &[(Range<usize>, Range<usize>)],
+) -> Vec<UnchangedRange> {
+ let mut result = vec![];
+ let mut current_ranges_iter = current_ranges.into_iter().peekable();
+ for (new_base_range, other_range) in new_unchanged_ranges.iter() {
+ assert_eq!(new_base_range.len(), other_range.len());
+ while let Some(UnchangedRange {
+ base_range,
+ offsets,
+ }) = current_ranges_iter.peek()
+ {
+ // No need to look further if we're past the new range.
+ if base_range.start >= new_base_range.end {
+ break;
+ }
+ // Discard any current unchanged regions that don't match between
+ // the base and the new input.
+ if base_range.end <= new_base_range.start {
+ current_ranges_iter.next();
+ continue;
+ }
+ let new_start = max(base_range.start, new_base_range.start);
+ let new_end = min(base_range.end, new_base_range.end);
+ let mut new_offsets = offsets.clone();
+ new_offsets
+ .push(other_range.start.wrapping_sub(new_base_range.start)
+ as isize);
+ result.push(UnchangedRange {
+ base_range: new_start..new_end,
+ offsets: new_offsets,
+ });
+ if base_range.end >= new_base_range.end {
+ // Break without consuming the item; there may be other new
+ // ranges that overlap with it.
+ break;
+ }
+ current_ranges_iter.next();
+ }
+ }
+ result
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/merge-lists/tests/test-merge-lists.rs Fri Mar 04 16:12:56 2022 -0800
@@ -0,0 +1,156 @@
+use similar::DiffableStr;
+use tempdir::TempDir;
+
+fn run_test(input: &str) -> String {
+ let mut cmd = assert_cmd::Command::cargo_bin("merge-lists").unwrap();
+ let temp_dir = TempDir::new("test").unwrap();
+ let base_path = temp_dir.path().join("base");
+ let local_path = temp_dir.path().join("local");
+ let other_path = temp_dir.path().join("other");
+
+ let rest = input.strip_prefix("\nbase:\n").unwrap();
+ let mut split = rest.split("\nlocal:\n");
+ std::fs::write(&base_path, split.next().unwrap()).unwrap();
+ let rest = split.next().unwrap();
+ let mut split = rest.split("\nother:\n");
+ std::fs::write(&local_path, split.next().unwrap()).unwrap();
+ std::fs::write(&other_path, split.next().unwrap()).unwrap();
+ cmd.args(&[
+ local_path.as_os_str(),
+ base_path.as_os_str(),
+ other_path.as_os_str(),
+ ])
+ .assert()
+ .success();
+
+ let new_base_bytes = std::fs::read(&base_path).unwrap();
+ let new_local_bytes = std::fs::read(&local_path).unwrap();
+ let new_other_bytes = std::fs::read(&other_path).unwrap();
+ // No newline before "base:" because of https://github.com/mitsuhiko/insta/issues/117
+ format!(
+ "base:\n{}\nlocal:\n{}\nother:\n{}",
+ new_base_bytes.as_str().unwrap(),
+ new_local_bytes.as_str().unwrap(),
+ new_other_bytes.as_str().unwrap()
+ )
+}
+
+#[test]
+fn test_merge_lists_basic() {
+ let output = run_test(
+ r"
+base:
+import lib1
+import lib2
+
+local:
+import lib2
+import lib3
+
+other:
+import lib3
+import lib4
+",
+ );
+ insta::assert_snapshot!(output, @r###"
+ base:
+ import lib3
+ import lib4
+
+ local:
+ import lib3
+ import lib4
+
+ other:
+ import lib3
+ import lib4
+ "###);
+}
+
+#[test]
+fn test_merge_lists_from() {
+ // Test some "from x import y" statements and some non-import conflicts
+ // (unresolvable)
+ let output = run_test(
+ r"
+base:
+from . import x
+
+1+1
+
+local:
+from . import x
+from a import b
+
+2+2
+
+other:
+from a import c
+
+3+3
+",
+ );
+ insta::assert_snapshot!(output, @r###"
+ base:
+ from a import b
+ from a import c
+
+ 1+1
+
+ local:
+ from a import b
+ from a import c
+
+ 2+2
+
+ other:
+ from a import b
+ from a import c
+
+ 3+3
+ "###);
+}
+
+#[test]
+fn test_merge_lists_not_sorted() {
+ // Test that nothing is done if the elements in the conflicting hunks are
+ // not sorted
+ let output = run_test(
+ r"
+base:
+import x
+
+1+1
+
+local:
+import a
+import x
+
+2+2
+
+other:
+import z
+import y
+
+3+3
+",
+ );
+ insta::assert_snapshot!(output, @r###"
+ base:
+ import x
+
+ 1+1
+
+ local:
+ import a
+ import x
+
+ 2+2
+
+ other:
+ import z
+ import y
+
+ 3+3
+ "###);
+}