Mercurial > hg
comparison rust/hg-core/src/filepatterns.rs @ 50857:796b5d6693a4
rust: simplify pattern file parsing
Previously we kept the pattern syntax as a &[u8] until the last
possible moment, which meant we had to handle potential errors when
parsing that &[u8]. But such errors could never actually occur, given
the structure of the code.
Now we parse it eagerly (in two places) and pass it around as a
PatternSyntax, so we can delete some error handling code.
parse_one_pattern is in particular useful for parsing patterns passed
on the command line, as we'll support later in this series.
author | Spencer Baugh <sbaugh@janestreet.com> |
---|---|
date | Wed, 09 Aug 2023 18:08:28 -0400 |
parents | 1c31b343e514 |
children | df6dfad5009a |
comparison
equal
deleted
inserted
replaced
50856:e037af7de2ce | 50857:796b5d6693a4 |
---|---|
333 Ok(Some(_build_single_regex(&entry))) | 333 Ok(Some(_build_single_regex(&entry))) |
334 } | 334 } |
335 } | 335 } |
336 | 336 |
337 lazy_static! { | 337 lazy_static! { |
338 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = { | 338 static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = { |
339 let mut m = FastHashMap::default(); | 339 let mut m = FastHashMap::default(); |
340 | 340 |
341 m.insert(b"re".as_ref(), b"relre:".as_ref()); | 341 m.insert(b"re:".as_ref(), PatternSyntax::Regexp); |
342 m.insert(b"regexp".as_ref(), b"relre:".as_ref()); | 342 m.insert(b"regexp:".as_ref(), PatternSyntax::Regexp); |
343 m.insert(b"glob".as_ref(), b"relglob:".as_ref()); | 343 m.insert(b"path:".as_ref(), PatternSyntax::Path); |
344 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref()); | 344 m.insert(b"filepath:".as_ref(), PatternSyntax::FilePath); |
345 m.insert(b"include".as_ref(), b"include:".as_ref()); | 345 m.insert(b"relpath:".as_ref(), PatternSyntax::RelPath); |
346 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref()); | 346 m.insert(b"rootfilesin:".as_ref(), PatternSyntax::RootFiles); |
347 m.insert(b"path".as_ref(), b"path:".as_ref()); | 347 m.insert(b"relglob:".as_ref(), PatternSyntax::RelGlob); |
348 m.insert(b"rootfilesin".as_ref(), b"rootfilesin:".as_ref()); | 348 m.insert(b"relre:".as_ref(), PatternSyntax::RelRegexp); |
349 m.insert(b"glob:".as_ref(), PatternSyntax::Glob); | |
350 m.insert(b"rootglob:".as_ref(), PatternSyntax::RootGlob); | |
351 m.insert(b"include:".as_ref(), PatternSyntax::Include); | |
352 m.insert(b"subinclude:".as_ref(), PatternSyntax::SubInclude); | |
353 | |
349 m | 354 m |
350 }; | 355 }; |
351 } | 356 } |
352 | 357 |
353 #[derive(Debug)] | 358 #[derive(Debug)] |
356 InvalidSyntax(PathBuf, Vec<u8>), | 361 InvalidSyntax(PathBuf, Vec<u8>), |
357 /// File path | 362 /// File path |
358 NoSuchFile(PathBuf), | 363 NoSuchFile(PathBuf), |
359 } | 364 } |
360 | 365 |
366 pub fn parse_one_pattern( | |
367 pattern: &[u8], | |
368 source: &Path, | |
369 default: PatternSyntax, | |
370 ) -> IgnorePattern { | |
371 let mut pattern_bytes: &[u8] = pattern; | |
372 let mut syntax = default; | |
373 | |
374 for (s, val) in SYNTAXES.iter() { | |
375 if let Some(rest) = pattern_bytes.drop_prefix(s) { | |
376 syntax = val.clone(); | |
377 pattern_bytes = rest; | |
378 break; | |
379 } | |
380 } | |
381 | |
382 let pattern = pattern_bytes.to_vec(); | |
383 | |
384 IgnorePattern { | |
385 syntax, | |
386 pattern, | |
387 source: source.to_owned(), | |
388 } | |
389 } | |
390 | |
361 pub fn parse_pattern_file_contents( | 391 pub fn parse_pattern_file_contents( |
362 lines: &[u8], | 392 lines: &[u8], |
363 file_path: &Path, | 393 file_path: &Path, |
364 default_syntax_override: Option<&[u8]>, | 394 default_syntax_override: Option<PatternSyntax>, |
365 warn: bool, | 395 warn: bool, |
396 relativize: bool, | |
366 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> { | 397 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> { |
367 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap(); | 398 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap(); |
368 | 399 |
369 #[allow(clippy::trivial_regex)] | 400 #[allow(clippy::trivial_regex)] |
370 let comment_escape_regex = Regex::new(r"\\#").unwrap(); | 401 let comment_escape_regex = Regex::new(r"\\#").unwrap(); |
371 let mut inputs: Vec<IgnorePattern> = vec![]; | 402 let mut inputs: Vec<IgnorePattern> = vec![]; |
372 let mut warnings: Vec<PatternFileWarning> = vec![]; | 403 let mut warnings: Vec<PatternFileWarning> = vec![]; |
373 | 404 |
374 let mut current_syntax = | 405 let mut current_syntax = |
375 default_syntax_override.unwrap_or_else(|| b"relre:".as_ref()); | 406 default_syntax_override.unwrap_or(PatternSyntax::RelRegexp); |
376 | 407 |
377 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() { | 408 for mut line in lines.split(|c| *c == b'\n') { |
378 let line_number = line_number + 1; | |
379 | |
380 let line_buf; | 409 let line_buf; |
381 if line.contains(&b'#') { | 410 if line.contains(&b'#') { |
382 if let Some(cap) = comment_regex.captures(line) { | 411 if let Some(cap) = comment_regex.captures(line) { |
383 line = &line[..cap.get(1).unwrap().end()] | 412 line = &line[..cap.get(1).unwrap().end()] |
384 } | 413 } |
385 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#")); | 414 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#")); |
386 line = &line_buf; | 415 line = &line_buf; |
387 } | 416 } |
388 | 417 |
389 let mut line = line.trim_end(); | 418 let line = line.trim_end(); |
390 | 419 |
391 if line.is_empty() { | 420 if line.is_empty() { |
392 continue; | 421 continue; |
393 } | 422 } |
394 | 423 |
395 if let Some(syntax) = line.drop_prefix(b"syntax:") { | 424 if let Some(syntax) = line.drop_prefix(b"syntax:") { |
396 let syntax = syntax.trim(); | 425 let syntax = syntax.trim(); |
397 | 426 |
398 if let Some(rel_syntax) = SYNTAXES.get(syntax) { | 427 if let Some(parsed) = |
399 current_syntax = rel_syntax; | 428 SYNTAXES.get([syntax, &b":"[..]].concat().as_slice()) |
429 { | |
430 current_syntax = parsed.clone(); | |
400 } else if warn { | 431 } else if warn { |
401 warnings.push(PatternFileWarning::InvalidSyntax( | 432 warnings.push(PatternFileWarning::InvalidSyntax( |
402 file_path.to_owned(), | 433 file_path.to_owned(), |
403 syntax.to_owned(), | 434 syntax.to_owned(), |
404 )); | 435 )); |
405 } | 436 } |
406 continue; | 437 } else { |
407 } | 438 let pattern = parse_one_pattern( |
408 | 439 line, |
409 let mut line_syntax: &[u8] = current_syntax; | 440 file_path, |
410 | 441 current_syntax.clone(), |
411 for (s, rels) in SYNTAXES.iter() { | 442 ); |
412 if let Some(rest) = line.drop_prefix(rels) { | 443 inputs.push(if relativize { |
413 line_syntax = rels; | 444 pattern.to_relative() |
414 line = rest; | 445 } else { |
415 break; | 446 pattern |
416 } | 447 }) |
417 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) { | 448 } |
418 line_syntax = rels; | |
419 line = rest; | |
420 break; | |
421 } | |
422 } | |
423 | |
424 inputs.push(IgnorePattern::new( | |
425 parse_pattern_syntax(line_syntax).map_err(|e| match e { | |
426 PatternError::UnsupportedSyntax(syntax) => { | |
427 PatternError::UnsupportedSyntaxInFile( | |
428 syntax, | |
429 file_path.to_string_lossy().into(), | |
430 line_number, | |
431 ) | |
432 } | |
433 _ => e, | |
434 })?, | |
435 line, | |
436 file_path, | |
437 )); | |
438 } | 449 } |
439 Ok((inputs, warnings)) | 450 Ok((inputs, warnings)) |
440 } | 451 } |
441 | 452 |
442 pub fn read_pattern_file( | 453 pub fn read_pattern_file( |
445 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), | 456 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), |
446 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> { | 457 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> { |
447 match std::fs::read(file_path) { | 458 match std::fs::read(file_path) { |
448 Ok(contents) => { | 459 Ok(contents) => { |
449 inspect_pattern_bytes(file_path, &contents); | 460 inspect_pattern_bytes(file_path, &contents); |
450 parse_pattern_file_contents(&contents, file_path, None, warn) | 461 parse_pattern_file_contents(&contents, file_path, None, warn, true) |
451 } | 462 } |
452 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(( | 463 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(( |
453 vec![], | 464 vec![], |
454 vec![PatternFileWarning::NoSuchFile(file_path.to_owned())], | 465 vec![PatternFileWarning::NoSuchFile(file_path.to_owned())], |
455 )), | 466 )), |
469 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self { | 480 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self { |
470 Self { | 481 Self { |
471 syntax, | 482 syntax, |
472 pattern: pattern.to_owned(), | 483 pattern: pattern.to_owned(), |
473 source: source.to_owned(), | 484 source: source.to_owned(), |
485 } | |
486 } | |
487 | |
488 pub fn to_relative(self) -> Self { | |
489 let Self { | |
490 syntax, | |
491 pattern, | |
492 source, | |
493 } = self; | |
494 Self { | |
495 syntax: match syntax { | |
496 PatternSyntax::Regexp => PatternSyntax::RelRegexp, | |
497 PatternSyntax::Glob => PatternSyntax::RelGlob, | |
498 x => x, | |
499 }, | |
500 pattern, | |
501 source, | |
474 } | 502 } |
475 } | 503 } |
476 } | 504 } |
477 | 505 |
478 pub type PatternResult<T> = Result<T, PatternError>; | 506 pub type PatternResult<T> = Result<T, PatternError>; |
637 assert_eq!( | 665 assert_eq!( |
638 parse_pattern_file_contents( | 666 parse_pattern_file_contents( |
639 lines, | 667 lines, |
640 Path::new("file_path"), | 668 Path::new("file_path"), |
641 None, | 669 None, |
642 false | 670 false, |
671 true, | |
643 ) | 672 ) |
644 .unwrap() | 673 .unwrap() |
645 .0, | 674 .0, |
646 vec![IgnorePattern::new( | 675 vec![IgnorePattern::new( |
647 PatternSyntax::RelGlob, | 676 PatternSyntax::RelGlob, |
655 assert_eq!( | 684 assert_eq!( |
656 parse_pattern_file_contents( | 685 parse_pattern_file_contents( |
657 lines, | 686 lines, |
658 Path::new("file_path"), | 687 Path::new("file_path"), |
659 None, | 688 None, |
660 false | 689 false, |
690 true, | |
661 ) | 691 ) |
662 .unwrap() | 692 .unwrap() |
663 .0, | 693 .0, |
664 vec![] | 694 vec![] |
665 ); | 695 ); |
667 assert_eq!( | 697 assert_eq!( |
668 parse_pattern_file_contents( | 698 parse_pattern_file_contents( |
669 lines, | 699 lines, |
670 Path::new("file_path"), | 700 Path::new("file_path"), |
671 None, | 701 None, |
672 false | 702 false, |
703 true, | |
673 ) | 704 ) |
674 .unwrap() | 705 .unwrap() |
675 .0, | 706 .0, |
676 vec![IgnorePattern::new( | 707 vec![IgnorePattern::new( |
677 PatternSyntax::RelGlob, | 708 PatternSyntax::RelGlob, |