comparison mercurial/thirdparty/xdiff/xutils.c @ 36761:09f320067591

xdiff: remove whitespace related feature In Mercurial, whitespace related handling are done at a higher level than the low-level diff algorithm so "ignore spaces". So it's not used by mdiff. Some of the upcoming optimizations would be more difficult with whitespace related features kept. So let's remove them. Differential Revision: https://phab.mercurial-scm.org/D2683
author Jun Wu <quark@fb.com>
date Sun, 04 Mar 2018 00:07:04 -0800
parents 34e2ff1f9cd8
children b5bb0f99064d
comparison
equal deleted inserted replaced
36760:7bf80d9d9543 36761:09f320067591
141 nl = xdl_mmfile_size(mf) / (tsize / nl); 141 nl = xdl_mmfile_size(mf) / (tsize / nl);
142 142
143 return nl + 1; 143 return nl + 1;
144 } 144 }
145 145
146 int xdl_blankline(const char *line, long size, long flags)
147 {
148 long i;
149
150 if (!(flags & XDF_WHITESPACE_FLAGS))
151 return (size <= 1);
152
153 for (i = 0; i < size && XDL_ISSPACE(line[i]); i++)
154 ;
155
156 return (i == size);
157 }
158
159 /*
160 * Have we eaten everything on the line, except for an optional
161 * CR at the very end?
162 */
163 static int ends_with_optional_cr(const char *l, long s, long i)
164 {
165 int complete = s && l[s-1] == '\n';
166
167 if (complete)
168 s--;
169 if (s == i)
170 return 1;
171 /* do not ignore CR at the end of an incomplete line */
172 if (complete && s == i + 1 && l[i] == '\r')
173 return 1;
174 return 0;
175 }
176
177 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags) 146 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
178 { 147 {
179 int i1, i2;
180
181 if (s1 == s2 && !memcmp(l1, l2, s1)) 148 if (s1 == s2 && !memcmp(l1, l2, s1))
182 return 1; 149 return 1;
183 if (!(flags & XDF_WHITESPACE_FLAGS)) 150 return 0;
184 return 0;
185
186 i1 = 0;
187 i2 = 0;
188
189 /*
190 * -w matches everything that matches with -b, and -b in turn
191 * matches everything that matches with --ignore-space-at-eol,
192 * which in turn matches everything that matches with --ignore-cr-at-eol.
193 *
194 * Each flavor of ignoring needs different logic to skip whitespaces
195 * while we have both sides to compare.
196 */
197 if (flags & XDF_IGNORE_WHITESPACE) {
198 goto skip_ws;
199 while (i1 < s1 && i2 < s2) {
200 if (l1[i1++] != l2[i2++])
201 return 0;
202 skip_ws:
203 while (i1 < s1 && XDL_ISSPACE(l1[i1]))
204 i1++;
205 while (i2 < s2 && XDL_ISSPACE(l2[i2]))
206 i2++;
207 }
208 } else if (flags & XDF_IGNORE_WHITESPACE_CHANGE) {
209 while (i1 < s1 && i2 < s2) {
210 if (XDL_ISSPACE(l1[i1]) && XDL_ISSPACE(l2[i2])) {
211 /* Skip matching spaces and try again */
212 while (i1 < s1 && XDL_ISSPACE(l1[i1]))
213 i1++;
214 while (i2 < s2 && XDL_ISSPACE(l2[i2]))
215 i2++;
216 continue;
217 }
218 if (l1[i1++] != l2[i2++])
219 return 0;
220 }
221 } else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL) {
222 while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) {
223 i1++;
224 i2++;
225 }
226 } else if (flags & XDF_IGNORE_CR_AT_EOL) {
227 /* Find the first difference and see how the line ends */
228 while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) {
229 i1++;
230 i2++;
231 }
232 return (ends_with_optional_cr(l1, s1, i1) &&
233 ends_with_optional_cr(l2, s2, i2));
234 }
235
236 /*
237 * After running out of one side, the remaining side must have
238 * nothing but whitespace for the lines to match. Note that
239 * ignore-whitespace-at-eol case may break out of the loop
240 * while there still are characters remaining on both lines.
241 */
242 if (i1 < s1) {
243 while (i1 < s1 && XDL_ISSPACE(l1[i1]))
244 i1++;
245 if (s1 != i1)
246 return 0;
247 }
248 if (i2 < s2) {
249 while (i2 < s2 && XDL_ISSPACE(l2[i2]))
250 i2++;
251 return (s2 == i2);
252 }
253 return 1;
254 }
255
256 static unsigned long xdl_hash_record_with_whitespace(char const **data,
257 char const *top, long flags) {
258 unsigned long ha = 5381;
259 char const *ptr = *data;
260 int cr_at_eol_only = (flags & XDF_WHITESPACE_FLAGS) == XDF_IGNORE_CR_AT_EOL;
261
262 for (; ptr < top && *ptr != '\n'; ptr++) {
263 if (cr_at_eol_only) {
264 /* do not ignore CR at the end of an incomplete line */
265 if (*ptr == '\r' &&
266 (ptr + 1 < top && ptr[1] == '\n'))
267 continue;
268 }
269 else if (XDL_ISSPACE(*ptr)) {
270 const char *ptr2 = ptr;
271 int at_eol;
272 while (ptr + 1 < top && XDL_ISSPACE(ptr[1])
273 && ptr[1] != '\n')
274 ptr++;
275 at_eol = (top <= ptr + 1 || ptr[1] == '\n');
276 if (flags & XDF_IGNORE_WHITESPACE)
277 ; /* already handled */
278 else if (flags & XDF_IGNORE_WHITESPACE_CHANGE
279 && !at_eol) {
280 ha += (ha << 5);
281 ha ^= (unsigned long) ' ';
282 }
283 else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL
284 && !at_eol) {
285 while (ptr2 != ptr + 1) {
286 ha += (ha << 5);
287 ha ^= (unsigned long) *ptr2;
288 ptr2++;
289 }
290 }
291 continue;
292 }
293 ha += (ha << 5);
294 ha ^= (unsigned long) *ptr;
295 }
296 *data = ptr < top ? ptr + 1: ptr;
297
298 return ha;
299 } 151 }
300 152
301 unsigned long xdl_hash_record(char const **data, char const *top, long flags) { 153 unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
302 unsigned long ha = 5381; 154 unsigned long ha = 5381;
303 char const *ptr = *data; 155 char const *ptr = *data;
304
305 if (flags & XDF_WHITESPACE_FLAGS)
306 return xdl_hash_record_with_whitespace(data, top, flags);
307 156
308 for (; ptr < top && *ptr != '\n'; ptr++) { 157 for (; ptr < top && *ptr != '\n'; ptr++) {
309 ha += (ha << 5); 158 ha += (ha << 5);
310 ha ^= (unsigned long) *ptr; 159 ha ^= (unsigned long) *ptr;
311 } 160 }