contrib/merge-lists/src/main.rs
changeset 49010 681b25ea579e
child 49011 b999edb15f8c
equal deleted inserted replaced
49009:82dbe3a2920d 49010:681b25ea579e
       
     1 use clap::Parser;
       
     2 use itertools::Itertools;
       
     3 use regex::bytes::Regex;
       
     4 use similar::ChangeTag;
       
     5 use std::cmp::{max, min, Ordering};
       
     6 use std::collections::HashSet;
       
     7 use std::ffi::OsString;
       
     8 use std::ops::Range;
       
     9 use std::path::PathBuf;
       
    10 
       
    11 fn find_unchanged_ranges(
       
    12     old_bytes: &[u8],
       
    13     new_bytes: &[u8],
       
    14 ) -> Vec<(Range<usize>, Range<usize>)> {
       
    15     let diff = similar::TextDiff::configure()
       
    16         .algorithm(similar::Algorithm::Patience)
       
    17         .diff_lines(old_bytes, new_bytes);
       
    18     let mut new_unchanged_ranges = vec![];
       
    19     let mut old_index = 0;
       
    20     let mut new_index = 0;
       
    21     for diff in diff.iter_all_changes() {
       
    22         match diff.tag() {
       
    23             ChangeTag::Equal => {
       
    24                 new_unchanged_ranges.push((
       
    25                     old_index..old_index + diff.value().len(),
       
    26                     new_index..new_index + diff.value().len(),
       
    27                 ));
       
    28                 old_index += diff.value().len();
       
    29                 new_index += diff.value().len();
       
    30             }
       
    31             ChangeTag::Delete => {
       
    32                 old_index += diff.value().len();
       
    33             }
       
    34             ChangeTag::Insert => {
       
    35                 new_index += diff.value().len();
       
    36             }
       
    37         }
       
    38     }
       
    39     new_unchanged_ranges
       
    40 }
       
    41 
       
    42 /// Returns a list of all the lines in the input (including trailing newlines),
       
    43 /// but only if they all match the regex and they are sorted.
       
    44 fn get_lines<'input>(
       
    45     input: &'input [u8],
       
    46     regex: &Regex,
       
    47 ) -> Option<Vec<&'input [u8]>> {
       
    48     let lines = input.split_inclusive(|x| *x == b'\n').collect_vec();
       
    49     let mut previous_line = "".as_bytes();
       
    50     for line in &lines {
       
    51         if *line < previous_line {
       
    52             return None;
       
    53         }
       
    54         if !regex.is_match(line) {
       
    55             return None;
       
    56         }
       
    57         previous_line = line;
       
    58     }
       
    59     Some(lines)
       
    60 }
       
    61 
       
    62 fn resolve_conflict(
       
    63     base_slice: &[u8],
       
    64     local_slice: &[u8],
       
    65     other_slice: &[u8],
       
    66     regex: &Regex,
       
    67 ) -> Option<Vec<u8>> {
       
    68     let base_lines = get_lines(base_slice, regex)?;
       
    69     let local_lines = get_lines(local_slice, regex)?;
       
    70     let other_lines = get_lines(other_slice, regex)?;
       
    71     let base_lines_set: HashSet<_> = base_lines.iter().copied().collect();
       
    72     let local_lines_set: HashSet<_> = local_lines.iter().copied().collect();
       
    73     let other_lines_set: HashSet<_> = other_lines.iter().copied().collect();
       
    74     let mut result = local_lines_set;
       
    75     for to_add in other_lines_set.difference(&base_lines_set) {
       
    76         result.insert(to_add);
       
    77     }
       
    78     for to_remove in base_lines_set.difference(&other_lines_set) {
       
    79         result.remove(to_remove);
       
    80     }
       
    81     Some(result.into_iter().sorted().collect_vec().concat())
       
    82 }
       
    83 
       
    84 fn resolve(
       
    85     base_bytes: &[u8],
       
    86     local_bytes: &[u8],
       
    87     other_bytes: &[u8],
       
    88     regex: &Regex,
       
    89 ) -> (Vec<u8>, Vec<u8>, Vec<u8>) {
       
    90     // Find unchanged ranges between the base and the two sides. We do that by
       
    91     // initially considering the whole base unchanged. Then we compare each
       
    92     // side with the base and intersect the unchanged ranges we find with
       
    93     // what we had before.
       
    94     let unchanged_ranges = vec![UnchangedRange {
       
    95         base_range: 0..base_bytes.len(),
       
    96         offsets: vec![],
       
    97     }];
       
    98     let unchanged_ranges = intersect_regions(
       
    99         unchanged_ranges,
       
   100         &find_unchanged_ranges(base_bytes, local_bytes),
       
   101     );
       
   102     let mut unchanged_ranges = intersect_regions(
       
   103         unchanged_ranges,
       
   104         &find_unchanged_ranges(base_bytes, other_bytes),
       
   105     );
       
   106     // Add an empty UnchangedRange at the end to make it easier to find change
       
   107     // ranges. That way there's a changed range before each UnchangedRange.
       
   108     unchanged_ranges.push(UnchangedRange {
       
   109         base_range: base_bytes.len()..base_bytes.len(),
       
   110         offsets: vec![
       
   111             local_bytes.len().wrapping_sub(base_bytes.len()) as isize,
       
   112             other_bytes.len().wrapping_sub(base_bytes.len()) as isize,
       
   113         ],
       
   114     });
       
   115 
       
   116     let mut new_base_bytes: Vec<u8> = vec![];
       
   117     let mut new_local_bytes: Vec<u8> = vec![];
       
   118     let mut new_other_bytes: Vec<u8> = vec![];
       
   119     let mut previous = UnchangedRange {
       
   120         base_range: 0..0,
       
   121         offsets: vec![0, 0],
       
   122     };
       
   123     for current in unchanged_ranges {
       
   124         let base_slice =
       
   125             &base_bytes[previous.base_range.end..current.base_range.start];
       
   126         let local_slice = &local_bytes[previous.end(0)..current.start(0)];
       
   127         let other_slice = &other_bytes[previous.end(1)..current.start(1)];
       
   128         if let Some(resolution) =
       
   129             resolve_conflict(base_slice, local_slice, other_slice, regex)
       
   130         {
       
   131             new_base_bytes.extend(&resolution);
       
   132             new_local_bytes.extend(&resolution);
       
   133             new_other_bytes.extend(&resolution);
       
   134         } else {
       
   135             new_base_bytes.extend(base_slice);
       
   136             new_local_bytes.extend(local_slice);
       
   137             new_other_bytes.extend(other_slice);
       
   138         }
       
   139         new_base_bytes.extend(&base_bytes[current.base_range.clone()]);
       
   140         new_local_bytes.extend(&local_bytes[current.start(0)..current.end(0)]);
       
   141         new_other_bytes.extend(&other_bytes[current.start(1)..current.end(1)]);
       
   142         previous = current;
       
   143     }
       
   144 
       
   145     (new_base_bytes, new_local_bytes, new_other_bytes)
       
   146 }
       
   147 
       
   148 /// A tool that performs a 3-way merge, resolving conflicts in sorted lists and
       
   149 /// leaving other conflicts unchanged. This is useful with Mercurial's support
       
   150 /// for partial merge tools (configured in `[partial-merge-tools]`).
       
   151 #[derive(Parser, Debug)]
       
   152 #[clap(version, about, long_about = None)]
       
   153 struct Args {
       
   154     /// Path to the file's content in the "local" side
       
   155     local: OsString,
       
   156 
       
   157     /// Path to the file's content in the base
       
   158     base: OsString,
       
   159 
       
   160     /// Path to the file's content in the "other" side
       
   161     other: OsString,
       
   162 }
       
   163 
       
   164 fn main() {
       
   165     let args: Args = Args::parse();
       
   166 
       
   167     let base_path = PathBuf::from(&args.base);
       
   168     let local_path = PathBuf::from(&args.local);
       
   169     let other_path = PathBuf::from(&args.other);
       
   170 
       
   171     let base_bytes = std::fs::read(&base_path).unwrap();
       
   172     let local_bytes = std::fs::read(&local_path).unwrap();
       
   173     let other_bytes = std::fs::read(&other_path).unwrap();
       
   174 
       
   175     let regex =
       
   176         regex::bytes::Regex::new(r"import \w+(\.\w+)*( +#.*)?\n|from (\w+(\.\w+)* import \w+( as \w+)?(, \w+( as \w+)?)*( +#.*)?)\r?\n?").unwrap();
       
   177     let (new_base_bytes, new_local_bytes, new_other_bytes) =
       
   178         resolve(&base_bytes, &local_bytes, &other_bytes, &regex);
       
   179 
       
   180     // Write out the result if anything changed
       
   181     if new_base_bytes != base_bytes {
       
   182         std::fs::write(&base_path, new_base_bytes).unwrap();
       
   183     }
       
   184     if new_local_bytes != local_bytes {
       
   185         std::fs::write(&local_path, new_local_bytes).unwrap();
       
   186     }
       
   187     if new_other_bytes != other_bytes {
       
   188         std::fs::write(&other_path, new_other_bytes).unwrap();
       
   189     }
       
   190 }
       
   191 
       
   192 fn checked_add(base: usize, offset: isize) -> usize {
       
   193     if offset < 0 {
       
   194         base.checked_sub(offset.checked_abs().unwrap() as usize)
       
   195             .unwrap()
       
   196     } else {
       
   197         base.checked_add(offset as usize).unwrap()
       
   198     }
       
   199 }
       
   200 
       
   201 // The remainder of the file is copied from
       
   202 // https://github.com/martinvonz/jj/blob/main/lib/src/diff.rs
       
   203 
       
   204 #[derive(Clone, PartialEq, Eq, Debug)]
       
   205 struct UnchangedRange {
       
   206     base_range: Range<usize>,
       
   207     offsets: Vec<isize>,
       
   208 }
       
   209 
       
   210 impl UnchangedRange {
       
   211     fn start(&self, side: usize) -> usize {
       
   212         checked_add(self.base_range.start, self.offsets[side])
       
   213     }
       
   214 
       
   215     fn end(&self, side: usize) -> usize {
       
   216         checked_add(self.base_range.end, self.offsets[side])
       
   217     }
       
   218 }
       
   219 
       
   220 impl PartialOrd for UnchangedRange {
       
   221     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
       
   222         Some(self.cmp(other))
       
   223     }
       
   224 }
       
   225 
       
   226 impl Ord for UnchangedRange {
       
   227     fn cmp(&self, other: &Self) -> Ordering {
       
   228         self.base_range
       
   229             .start
       
   230             .cmp(&other.base_range.start)
       
   231             .then_with(|| self.base_range.end.cmp(&other.base_range.end))
       
   232     }
       
   233 }
       
   234 
       
   235 /// Takes the current regions and intersects it with the new unchanged ranges
       
   236 /// from a 2-way diff. The result is a map of unchanged regions with one more
       
   237 /// offset in the map's values.
       
   238 fn intersect_regions(
       
   239     current_ranges: Vec<UnchangedRange>,
       
   240     new_unchanged_ranges: &[(Range<usize>, Range<usize>)],
       
   241 ) -> Vec<UnchangedRange> {
       
   242     let mut result = vec![];
       
   243     let mut current_ranges_iter = current_ranges.into_iter().peekable();
       
   244     for (new_base_range, other_range) in new_unchanged_ranges.iter() {
       
   245         assert_eq!(new_base_range.len(), other_range.len());
       
   246         while let Some(UnchangedRange {
       
   247             base_range,
       
   248             offsets,
       
   249         }) = current_ranges_iter.peek()
       
   250         {
       
   251             // No need to look further if we're past the new range.
       
   252             if base_range.start >= new_base_range.end {
       
   253                 break;
       
   254             }
       
   255             // Discard any current unchanged regions that don't match between
       
   256             // the base and the new input.
       
   257             if base_range.end <= new_base_range.start {
       
   258                 current_ranges_iter.next();
       
   259                 continue;
       
   260             }
       
   261             let new_start = max(base_range.start, new_base_range.start);
       
   262             let new_end = min(base_range.end, new_base_range.end);
       
   263             let mut new_offsets = offsets.clone();
       
   264             new_offsets
       
   265                 .push(other_range.start.wrapping_sub(new_base_range.start)
       
   266                     as isize);
       
   267             result.push(UnchangedRange {
       
   268                 base_range: new_start..new_end,
       
   269                 offsets: new_offsets,
       
   270             });
       
   271             if base_range.end >= new_base_range.end {
       
   272                 // Break without consuming the item; there may be other new
       
   273                 // ranges that overlap with it.
       
   274                 break;
       
   275             }
       
   276             current_ranges_iter.next();
       
   277         }
       
   278     }
       
   279     result
       
   280 }