|
1 use clap::Parser; |
|
2 use itertools::Itertools; |
|
3 use regex::bytes::Regex; |
|
4 use similar::ChangeTag; |
|
5 use std::cmp::{max, min, Ordering}; |
|
6 use std::collections::HashSet; |
|
7 use std::ffi::OsString; |
|
8 use std::ops::Range; |
|
9 use std::path::PathBuf; |
|
10 |
|
11 fn find_unchanged_ranges( |
|
12 old_bytes: &[u8], |
|
13 new_bytes: &[u8], |
|
14 ) -> Vec<(Range<usize>, Range<usize>)> { |
|
15 let diff = similar::TextDiff::configure() |
|
16 .algorithm(similar::Algorithm::Patience) |
|
17 .diff_lines(old_bytes, new_bytes); |
|
18 let mut new_unchanged_ranges = vec![]; |
|
19 let mut old_index = 0; |
|
20 let mut new_index = 0; |
|
21 for diff in diff.iter_all_changes() { |
|
22 match diff.tag() { |
|
23 ChangeTag::Equal => { |
|
24 new_unchanged_ranges.push(( |
|
25 old_index..old_index + diff.value().len(), |
|
26 new_index..new_index + diff.value().len(), |
|
27 )); |
|
28 old_index += diff.value().len(); |
|
29 new_index += diff.value().len(); |
|
30 } |
|
31 ChangeTag::Delete => { |
|
32 old_index += diff.value().len(); |
|
33 } |
|
34 ChangeTag::Insert => { |
|
35 new_index += diff.value().len(); |
|
36 } |
|
37 } |
|
38 } |
|
39 new_unchanged_ranges |
|
40 } |
|
41 |
|
42 /// Returns a list of all the lines in the input (including trailing newlines), |
|
43 /// but only if they all match the regex and they are sorted. |
|
44 fn get_lines<'input>( |
|
45 input: &'input [u8], |
|
46 regex: &Regex, |
|
47 ) -> Option<Vec<&'input [u8]>> { |
|
48 let lines = input.split_inclusive(|x| *x == b'\n').collect_vec(); |
|
49 let mut previous_line = "".as_bytes(); |
|
50 for line in &lines { |
|
51 if *line < previous_line { |
|
52 return None; |
|
53 } |
|
54 if !regex.is_match(line) { |
|
55 return None; |
|
56 } |
|
57 previous_line = line; |
|
58 } |
|
59 Some(lines) |
|
60 } |
|
61 |
|
62 fn resolve_conflict( |
|
63 base_slice: &[u8], |
|
64 local_slice: &[u8], |
|
65 other_slice: &[u8], |
|
66 regex: &Regex, |
|
67 ) -> Option<Vec<u8>> { |
|
68 let base_lines = get_lines(base_slice, regex)?; |
|
69 let local_lines = get_lines(local_slice, regex)?; |
|
70 let other_lines = get_lines(other_slice, regex)?; |
|
71 let base_lines_set: HashSet<_> = base_lines.iter().copied().collect(); |
|
72 let local_lines_set: HashSet<_> = local_lines.iter().copied().collect(); |
|
73 let other_lines_set: HashSet<_> = other_lines.iter().copied().collect(); |
|
74 let mut result = local_lines_set; |
|
75 for to_add in other_lines_set.difference(&base_lines_set) { |
|
76 result.insert(to_add); |
|
77 } |
|
78 for to_remove in base_lines_set.difference(&other_lines_set) { |
|
79 result.remove(to_remove); |
|
80 } |
|
81 Some(result.into_iter().sorted().collect_vec().concat()) |
|
82 } |
|
83 |
|
84 fn resolve( |
|
85 base_bytes: &[u8], |
|
86 local_bytes: &[u8], |
|
87 other_bytes: &[u8], |
|
88 regex: &Regex, |
|
89 ) -> (Vec<u8>, Vec<u8>, Vec<u8>) { |
|
90 // Find unchanged ranges between the base and the two sides. We do that by |
|
91 // initially considering the whole base unchanged. Then we compare each |
|
92 // side with the base and intersect the unchanged ranges we find with |
|
93 // what we had before. |
|
94 let unchanged_ranges = vec![UnchangedRange { |
|
95 base_range: 0..base_bytes.len(), |
|
96 offsets: vec![], |
|
97 }]; |
|
98 let unchanged_ranges = intersect_regions( |
|
99 unchanged_ranges, |
|
100 &find_unchanged_ranges(base_bytes, local_bytes), |
|
101 ); |
|
102 let mut unchanged_ranges = intersect_regions( |
|
103 unchanged_ranges, |
|
104 &find_unchanged_ranges(base_bytes, other_bytes), |
|
105 ); |
|
106 // Add an empty UnchangedRange at the end to make it easier to find change |
|
107 // ranges. That way there's a changed range before each UnchangedRange. |
|
108 unchanged_ranges.push(UnchangedRange { |
|
109 base_range: base_bytes.len()..base_bytes.len(), |
|
110 offsets: vec![ |
|
111 local_bytes.len().wrapping_sub(base_bytes.len()) as isize, |
|
112 other_bytes.len().wrapping_sub(base_bytes.len()) as isize, |
|
113 ], |
|
114 }); |
|
115 |
|
116 let mut new_base_bytes: Vec<u8> = vec![]; |
|
117 let mut new_local_bytes: Vec<u8> = vec![]; |
|
118 let mut new_other_bytes: Vec<u8> = vec![]; |
|
119 let mut previous = UnchangedRange { |
|
120 base_range: 0..0, |
|
121 offsets: vec![0, 0], |
|
122 }; |
|
123 for current in unchanged_ranges { |
|
124 let base_slice = |
|
125 &base_bytes[previous.base_range.end..current.base_range.start]; |
|
126 let local_slice = &local_bytes[previous.end(0)..current.start(0)]; |
|
127 let other_slice = &other_bytes[previous.end(1)..current.start(1)]; |
|
128 if let Some(resolution) = |
|
129 resolve_conflict(base_slice, local_slice, other_slice, regex) |
|
130 { |
|
131 new_base_bytes.extend(&resolution); |
|
132 new_local_bytes.extend(&resolution); |
|
133 new_other_bytes.extend(&resolution); |
|
134 } else { |
|
135 new_base_bytes.extend(base_slice); |
|
136 new_local_bytes.extend(local_slice); |
|
137 new_other_bytes.extend(other_slice); |
|
138 } |
|
139 new_base_bytes.extend(&base_bytes[current.base_range.clone()]); |
|
140 new_local_bytes.extend(&local_bytes[current.start(0)..current.end(0)]); |
|
141 new_other_bytes.extend(&other_bytes[current.start(1)..current.end(1)]); |
|
142 previous = current; |
|
143 } |
|
144 |
|
145 (new_base_bytes, new_local_bytes, new_other_bytes) |
|
146 } |
|
147 |
|
148 /// A tool that performs a 3-way merge, resolving conflicts in sorted lists and |
|
149 /// leaving other conflicts unchanged. This is useful with Mercurial's support |
|
150 /// for partial merge tools (configured in `[partial-merge-tools]`). |
|
151 #[derive(Parser, Debug)] |
|
152 #[clap(version, about, long_about = None)] |
|
153 struct Args { |
|
154 /// Path to the file's content in the "local" side |
|
155 local: OsString, |
|
156 |
|
157 /// Path to the file's content in the base |
|
158 base: OsString, |
|
159 |
|
160 /// Path to the file's content in the "other" side |
|
161 other: OsString, |
|
162 } |
|
163 |
|
164 fn main() { |
|
165 let args: Args = Args::parse(); |
|
166 |
|
167 let base_path = PathBuf::from(&args.base); |
|
168 let local_path = PathBuf::from(&args.local); |
|
169 let other_path = PathBuf::from(&args.other); |
|
170 |
|
171 let base_bytes = std::fs::read(&base_path).unwrap(); |
|
172 let local_bytes = std::fs::read(&local_path).unwrap(); |
|
173 let other_bytes = std::fs::read(&other_path).unwrap(); |
|
174 |
|
175 let regex = |
|
176 regex::bytes::Regex::new(r"import \w+(\.\w+)*( +#.*)?\n|from (\w+(\.\w+)* import \w+( as \w+)?(, \w+( as \w+)?)*( +#.*)?)\r?\n?").unwrap(); |
|
177 let (new_base_bytes, new_local_bytes, new_other_bytes) = |
|
178 resolve(&base_bytes, &local_bytes, &other_bytes, ®ex); |
|
179 |
|
180 // Write out the result if anything changed |
|
181 if new_base_bytes != base_bytes { |
|
182 std::fs::write(&base_path, new_base_bytes).unwrap(); |
|
183 } |
|
184 if new_local_bytes != local_bytes { |
|
185 std::fs::write(&local_path, new_local_bytes).unwrap(); |
|
186 } |
|
187 if new_other_bytes != other_bytes { |
|
188 std::fs::write(&other_path, new_other_bytes).unwrap(); |
|
189 } |
|
190 } |
|
191 |
|
192 fn checked_add(base: usize, offset: isize) -> usize { |
|
193 if offset < 0 { |
|
194 base.checked_sub(offset.checked_abs().unwrap() as usize) |
|
195 .unwrap() |
|
196 } else { |
|
197 base.checked_add(offset as usize).unwrap() |
|
198 } |
|
199 } |
|
200 |
|
201 // The remainder of the file is copied from |
|
202 // https://github.com/martinvonz/jj/blob/main/lib/src/diff.rs |
|
203 |
|
204 #[derive(Clone, PartialEq, Eq, Debug)] |
|
205 struct UnchangedRange { |
|
206 base_range: Range<usize>, |
|
207 offsets: Vec<isize>, |
|
208 } |
|
209 |
|
210 impl UnchangedRange { |
|
211 fn start(&self, side: usize) -> usize { |
|
212 checked_add(self.base_range.start, self.offsets[side]) |
|
213 } |
|
214 |
|
215 fn end(&self, side: usize) -> usize { |
|
216 checked_add(self.base_range.end, self.offsets[side]) |
|
217 } |
|
218 } |
|
219 |
|
220 impl PartialOrd for UnchangedRange { |
|
221 fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
|
222 Some(self.cmp(other)) |
|
223 } |
|
224 } |
|
225 |
|
226 impl Ord for UnchangedRange { |
|
227 fn cmp(&self, other: &Self) -> Ordering { |
|
228 self.base_range |
|
229 .start |
|
230 .cmp(&other.base_range.start) |
|
231 .then_with(|| self.base_range.end.cmp(&other.base_range.end)) |
|
232 } |
|
233 } |
|
234 |
|
235 /// Takes the current regions and intersects it with the new unchanged ranges |
|
236 /// from a 2-way diff. The result is a map of unchanged regions with one more |
|
237 /// offset in the map's values. |
|
238 fn intersect_regions( |
|
239 current_ranges: Vec<UnchangedRange>, |
|
240 new_unchanged_ranges: &[(Range<usize>, Range<usize>)], |
|
241 ) -> Vec<UnchangedRange> { |
|
242 let mut result = vec![]; |
|
243 let mut current_ranges_iter = current_ranges.into_iter().peekable(); |
|
244 for (new_base_range, other_range) in new_unchanged_ranges.iter() { |
|
245 assert_eq!(new_base_range.len(), other_range.len()); |
|
246 while let Some(UnchangedRange { |
|
247 base_range, |
|
248 offsets, |
|
249 }) = current_ranges_iter.peek() |
|
250 { |
|
251 // No need to look further if we're past the new range. |
|
252 if base_range.start >= new_base_range.end { |
|
253 break; |
|
254 } |
|
255 // Discard any current unchanged regions that don't match between |
|
256 // the base and the new input. |
|
257 if base_range.end <= new_base_range.start { |
|
258 current_ranges_iter.next(); |
|
259 continue; |
|
260 } |
|
261 let new_start = max(base_range.start, new_base_range.start); |
|
262 let new_end = min(base_range.end, new_base_range.end); |
|
263 let mut new_offsets = offsets.clone(); |
|
264 new_offsets |
|
265 .push(other_range.start.wrapping_sub(new_base_range.start) |
|
266 as isize); |
|
267 result.push(UnchangedRange { |
|
268 base_range: new_start..new_end, |
|
269 offsets: new_offsets, |
|
270 }); |
|
271 if base_range.end >= new_base_range.end { |
|
272 // Break without consuming the item; there may be other new |
|
273 // ranges that overlap with it. |
|
274 break; |
|
275 } |
|
276 current_ranges_iter.next(); |
|
277 } |
|
278 } |
|
279 result |
|
280 } |