Mercurial > hg
comparison mercurial/thirdparty/xdiff/xutils.c @ 36761:09f320067591
xdiff: remove whitespace related feature
In Mercurial, whitespace related handling are done at a higher level than
the low-level diff algorithm so "ignore spaces". So it's not used by mdiff.
Some of the upcoming optimizations would be more difficult with whitespace
related features kept. So let's remove them.
Differential Revision: https://phab.mercurial-scm.org/D2683
author | Jun Wu <quark@fb.com> |
---|---|
date | Sun, 04 Mar 2018 00:07:04 -0800 |
parents | 34e2ff1f9cd8 |
children | b5bb0f99064d |
comparison
equal
deleted
inserted
replaced
36760:7bf80d9d9543 | 36761:09f320067591 |
---|---|
141 nl = xdl_mmfile_size(mf) / (tsize / nl); | 141 nl = xdl_mmfile_size(mf) / (tsize / nl); |
142 | 142 |
143 return nl + 1; | 143 return nl + 1; |
144 } | 144 } |
145 | 145 |
146 int xdl_blankline(const char *line, long size, long flags) | |
147 { | |
148 long i; | |
149 | |
150 if (!(flags & XDF_WHITESPACE_FLAGS)) | |
151 return (size <= 1); | |
152 | |
153 for (i = 0; i < size && XDL_ISSPACE(line[i]); i++) | |
154 ; | |
155 | |
156 return (i == size); | |
157 } | |
158 | |
159 /* | |
160 * Have we eaten everything on the line, except for an optional | |
161 * CR at the very end? | |
162 */ | |
163 static int ends_with_optional_cr(const char *l, long s, long i) | |
164 { | |
165 int complete = s && l[s-1] == '\n'; | |
166 | |
167 if (complete) | |
168 s--; | |
169 if (s == i) | |
170 return 1; | |
171 /* do not ignore CR at the end of an incomplete line */ | |
172 if (complete && s == i + 1 && l[i] == '\r') | |
173 return 1; | |
174 return 0; | |
175 } | |
176 | |
177 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags) | 146 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags) |
178 { | 147 { |
179 int i1, i2; | |
180 | |
181 if (s1 == s2 && !memcmp(l1, l2, s1)) | 148 if (s1 == s2 && !memcmp(l1, l2, s1)) |
182 return 1; | 149 return 1; |
183 if (!(flags & XDF_WHITESPACE_FLAGS)) | 150 return 0; |
184 return 0; | |
185 | |
186 i1 = 0; | |
187 i2 = 0; | |
188 | |
189 /* | |
190 * -w matches everything that matches with -b, and -b in turn | |
191 * matches everything that matches with --ignore-space-at-eol, | |
192 * which in turn matches everything that matches with --ignore-cr-at-eol. | |
193 * | |
194 * Each flavor of ignoring needs different logic to skip whitespaces | |
195 * while we have both sides to compare. | |
196 */ | |
197 if (flags & XDF_IGNORE_WHITESPACE) { | |
198 goto skip_ws; | |
199 while (i1 < s1 && i2 < s2) { | |
200 if (l1[i1++] != l2[i2++]) | |
201 return 0; | |
202 skip_ws: | |
203 while (i1 < s1 && XDL_ISSPACE(l1[i1])) | |
204 i1++; | |
205 while (i2 < s2 && XDL_ISSPACE(l2[i2])) | |
206 i2++; | |
207 } | |
208 } else if (flags & XDF_IGNORE_WHITESPACE_CHANGE) { | |
209 while (i1 < s1 && i2 < s2) { | |
210 if (XDL_ISSPACE(l1[i1]) && XDL_ISSPACE(l2[i2])) { | |
211 /* Skip matching spaces and try again */ | |
212 while (i1 < s1 && XDL_ISSPACE(l1[i1])) | |
213 i1++; | |
214 while (i2 < s2 && XDL_ISSPACE(l2[i2])) | |
215 i2++; | |
216 continue; | |
217 } | |
218 if (l1[i1++] != l2[i2++]) | |
219 return 0; | |
220 } | |
221 } else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL) { | |
222 while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) { | |
223 i1++; | |
224 i2++; | |
225 } | |
226 } else if (flags & XDF_IGNORE_CR_AT_EOL) { | |
227 /* Find the first difference and see how the line ends */ | |
228 while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) { | |
229 i1++; | |
230 i2++; | |
231 } | |
232 return (ends_with_optional_cr(l1, s1, i1) && | |
233 ends_with_optional_cr(l2, s2, i2)); | |
234 } | |
235 | |
236 /* | |
237 * After running out of one side, the remaining side must have | |
238 * nothing but whitespace for the lines to match. Note that | |
239 * ignore-whitespace-at-eol case may break out of the loop | |
240 * while there still are characters remaining on both lines. | |
241 */ | |
242 if (i1 < s1) { | |
243 while (i1 < s1 && XDL_ISSPACE(l1[i1])) | |
244 i1++; | |
245 if (s1 != i1) | |
246 return 0; | |
247 } | |
248 if (i2 < s2) { | |
249 while (i2 < s2 && XDL_ISSPACE(l2[i2])) | |
250 i2++; | |
251 return (s2 == i2); | |
252 } | |
253 return 1; | |
254 } | |
255 | |
256 static unsigned long xdl_hash_record_with_whitespace(char const **data, | |
257 char const *top, long flags) { | |
258 unsigned long ha = 5381; | |
259 char const *ptr = *data; | |
260 int cr_at_eol_only = (flags & XDF_WHITESPACE_FLAGS) == XDF_IGNORE_CR_AT_EOL; | |
261 | |
262 for (; ptr < top && *ptr != '\n'; ptr++) { | |
263 if (cr_at_eol_only) { | |
264 /* do not ignore CR at the end of an incomplete line */ | |
265 if (*ptr == '\r' && | |
266 (ptr + 1 < top && ptr[1] == '\n')) | |
267 continue; | |
268 } | |
269 else if (XDL_ISSPACE(*ptr)) { | |
270 const char *ptr2 = ptr; | |
271 int at_eol; | |
272 while (ptr + 1 < top && XDL_ISSPACE(ptr[1]) | |
273 && ptr[1] != '\n') | |
274 ptr++; | |
275 at_eol = (top <= ptr + 1 || ptr[1] == '\n'); | |
276 if (flags & XDF_IGNORE_WHITESPACE) | |
277 ; /* already handled */ | |
278 else if (flags & XDF_IGNORE_WHITESPACE_CHANGE | |
279 && !at_eol) { | |
280 ha += (ha << 5); | |
281 ha ^= (unsigned long) ' '; | |
282 } | |
283 else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL | |
284 && !at_eol) { | |
285 while (ptr2 != ptr + 1) { | |
286 ha += (ha << 5); | |
287 ha ^= (unsigned long) *ptr2; | |
288 ptr2++; | |
289 } | |
290 } | |
291 continue; | |
292 } | |
293 ha += (ha << 5); | |
294 ha ^= (unsigned long) *ptr; | |
295 } | |
296 *data = ptr < top ? ptr + 1: ptr; | |
297 | |
298 return ha; | |
299 } | 151 } |
300 | 152 |
301 unsigned long xdl_hash_record(char const **data, char const *top, long flags) { | 153 unsigned long xdl_hash_record(char const **data, char const *top, long flags) { |
302 unsigned long ha = 5381; | 154 unsigned long ha = 5381; |
303 char const *ptr = *data; | 155 char const *ptr = *data; |
304 | |
305 if (flags & XDF_WHITESPACE_FLAGS) | |
306 return xdl_hash_record_with_whitespace(data, top, flags); | |
307 | 156 |
308 for (; ptr < top && *ptr != '\n'; ptr++) { | 157 for (; ptr < top && *ptr != '\n'; ptr++) { |
309 ha += (ha << 5); | 158 ha += (ha << 5); |
310 ha ^= (unsigned long) *ptr; | 159 ha ^= (unsigned long) *ptr; |
311 } | 160 } |