comparison rust/hg-core/src/filepatterns.rs @ 50857:796b5d6693a4

rust: simplify pattern file parsing Previously we kept the pattern syntax as a &[u8] until the last possible moment, which meant we had to handle potential errors when parsing that &[u8]. But such errors could never actually occur, given the structure of the code. Now we parse it eagerly (in two places) and pass it around as a PatternSyntax, so we can delete some error handling code. parse_one_pattern is in particular useful for parsing patterns passed on the command line, as we'll support later in this series.
author Spencer Baugh <sbaugh@janestreet.com>
date Wed, 09 Aug 2023 18:08:28 -0400
parents 1c31b343e514
children df6dfad5009a
comparison
equal deleted inserted replaced
50856:e037af7de2ce 50857:796b5d6693a4
333 Ok(Some(_build_single_regex(&entry))) 333 Ok(Some(_build_single_regex(&entry)))
334 } 334 }
335 } 335 }
336 336
337 lazy_static! { 337 lazy_static! {
338 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = { 338 static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = {
339 let mut m = FastHashMap::default(); 339 let mut m = FastHashMap::default();
340 340
341 m.insert(b"re".as_ref(), b"relre:".as_ref()); 341 m.insert(b"re:".as_ref(), PatternSyntax::Regexp);
342 m.insert(b"regexp".as_ref(), b"relre:".as_ref()); 342 m.insert(b"regexp:".as_ref(), PatternSyntax::Regexp);
343 m.insert(b"glob".as_ref(), b"relglob:".as_ref()); 343 m.insert(b"path:".as_ref(), PatternSyntax::Path);
344 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref()); 344 m.insert(b"filepath:".as_ref(), PatternSyntax::FilePath);
345 m.insert(b"include".as_ref(), b"include:".as_ref()); 345 m.insert(b"relpath:".as_ref(), PatternSyntax::RelPath);
346 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref()); 346 m.insert(b"rootfilesin:".as_ref(), PatternSyntax::RootFiles);
347 m.insert(b"path".as_ref(), b"path:".as_ref()); 347 m.insert(b"relglob:".as_ref(), PatternSyntax::RelGlob);
348 m.insert(b"rootfilesin".as_ref(), b"rootfilesin:".as_ref()); 348 m.insert(b"relre:".as_ref(), PatternSyntax::RelRegexp);
349 m.insert(b"glob:".as_ref(), PatternSyntax::Glob);
350 m.insert(b"rootglob:".as_ref(), PatternSyntax::RootGlob);
351 m.insert(b"include:".as_ref(), PatternSyntax::Include);
352 m.insert(b"subinclude:".as_ref(), PatternSyntax::SubInclude);
353
349 m 354 m
350 }; 355 };
351 } 356 }
352 357
353 #[derive(Debug)] 358 #[derive(Debug)]
356 InvalidSyntax(PathBuf, Vec<u8>), 361 InvalidSyntax(PathBuf, Vec<u8>),
357 /// File path 362 /// File path
358 NoSuchFile(PathBuf), 363 NoSuchFile(PathBuf),
359 } 364 }
360 365
366 pub fn parse_one_pattern(
367 pattern: &[u8],
368 source: &Path,
369 default: PatternSyntax,
370 ) -> IgnorePattern {
371 let mut pattern_bytes: &[u8] = pattern;
372 let mut syntax = default;
373
374 for (s, val) in SYNTAXES.iter() {
375 if let Some(rest) = pattern_bytes.drop_prefix(s) {
376 syntax = val.clone();
377 pattern_bytes = rest;
378 break;
379 }
380 }
381
382 let pattern = pattern_bytes.to_vec();
383
384 IgnorePattern {
385 syntax,
386 pattern,
387 source: source.to_owned(),
388 }
389 }
390
361 pub fn parse_pattern_file_contents( 391 pub fn parse_pattern_file_contents(
362 lines: &[u8], 392 lines: &[u8],
363 file_path: &Path, 393 file_path: &Path,
364 default_syntax_override: Option<&[u8]>, 394 default_syntax_override: Option<PatternSyntax>,
365 warn: bool, 395 warn: bool,
396 relativize: bool,
366 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> { 397 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
367 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap(); 398 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
368 399
369 #[allow(clippy::trivial_regex)] 400 #[allow(clippy::trivial_regex)]
370 let comment_escape_regex = Regex::new(r"\\#").unwrap(); 401 let comment_escape_regex = Regex::new(r"\\#").unwrap();
371 let mut inputs: Vec<IgnorePattern> = vec![]; 402 let mut inputs: Vec<IgnorePattern> = vec![];
372 let mut warnings: Vec<PatternFileWarning> = vec![]; 403 let mut warnings: Vec<PatternFileWarning> = vec![];
373 404
374 let mut current_syntax = 405 let mut current_syntax =
375 default_syntax_override.unwrap_or_else(|| b"relre:".as_ref()); 406 default_syntax_override.unwrap_or(PatternSyntax::RelRegexp);
376 407
377 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() { 408 for mut line in lines.split(|c| *c == b'\n') {
378 let line_number = line_number + 1;
379
380 let line_buf; 409 let line_buf;
381 if line.contains(&b'#') { 410 if line.contains(&b'#') {
382 if let Some(cap) = comment_regex.captures(line) { 411 if let Some(cap) = comment_regex.captures(line) {
383 line = &line[..cap.get(1).unwrap().end()] 412 line = &line[..cap.get(1).unwrap().end()]
384 } 413 }
385 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#")); 414 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
386 line = &line_buf; 415 line = &line_buf;
387 } 416 }
388 417
389 let mut line = line.trim_end(); 418 let line = line.trim_end();
390 419
391 if line.is_empty() { 420 if line.is_empty() {
392 continue; 421 continue;
393 } 422 }
394 423
395 if let Some(syntax) = line.drop_prefix(b"syntax:") { 424 if let Some(syntax) = line.drop_prefix(b"syntax:") {
396 let syntax = syntax.trim(); 425 let syntax = syntax.trim();
397 426
398 if let Some(rel_syntax) = SYNTAXES.get(syntax) { 427 if let Some(parsed) =
399 current_syntax = rel_syntax; 428 SYNTAXES.get([syntax, &b":"[..]].concat().as_slice())
429 {
430 current_syntax = parsed.clone();
400 } else if warn { 431 } else if warn {
401 warnings.push(PatternFileWarning::InvalidSyntax( 432 warnings.push(PatternFileWarning::InvalidSyntax(
402 file_path.to_owned(), 433 file_path.to_owned(),
403 syntax.to_owned(), 434 syntax.to_owned(),
404 )); 435 ));
405 } 436 }
406 continue; 437 } else {
407 } 438 let pattern = parse_one_pattern(
408 439 line,
409 let mut line_syntax: &[u8] = current_syntax; 440 file_path,
410 441 current_syntax.clone(),
411 for (s, rels) in SYNTAXES.iter() { 442 );
412 if let Some(rest) = line.drop_prefix(rels) { 443 inputs.push(if relativize {
413 line_syntax = rels; 444 pattern.to_relative()
414 line = rest; 445 } else {
415 break; 446 pattern
416 } 447 })
417 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) { 448 }
418 line_syntax = rels;
419 line = rest;
420 break;
421 }
422 }
423
424 inputs.push(IgnorePattern::new(
425 parse_pattern_syntax(line_syntax).map_err(|e| match e {
426 PatternError::UnsupportedSyntax(syntax) => {
427 PatternError::UnsupportedSyntaxInFile(
428 syntax,
429 file_path.to_string_lossy().into(),
430 line_number,
431 )
432 }
433 _ => e,
434 })?,
435 line,
436 file_path,
437 ));
438 } 449 }
439 Ok((inputs, warnings)) 450 Ok((inputs, warnings))
440 } 451 }
441 452
442 pub fn read_pattern_file( 453 pub fn read_pattern_file(
445 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), 456 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
446 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> { 457 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
447 match std::fs::read(file_path) { 458 match std::fs::read(file_path) {
448 Ok(contents) => { 459 Ok(contents) => {
449 inspect_pattern_bytes(file_path, &contents); 460 inspect_pattern_bytes(file_path, &contents);
450 parse_pattern_file_contents(&contents, file_path, None, warn) 461 parse_pattern_file_contents(&contents, file_path, None, warn, true)
451 } 462 }
452 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(( 463 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
453 vec![], 464 vec![],
454 vec![PatternFileWarning::NoSuchFile(file_path.to_owned())], 465 vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
455 )), 466 )),
469 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self { 480 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
470 Self { 481 Self {
471 syntax, 482 syntax,
472 pattern: pattern.to_owned(), 483 pattern: pattern.to_owned(),
473 source: source.to_owned(), 484 source: source.to_owned(),
485 }
486 }
487
488 pub fn to_relative(self) -> Self {
489 let Self {
490 syntax,
491 pattern,
492 source,
493 } = self;
494 Self {
495 syntax: match syntax {
496 PatternSyntax::Regexp => PatternSyntax::RelRegexp,
497 PatternSyntax::Glob => PatternSyntax::RelGlob,
498 x => x,
499 },
500 pattern,
501 source,
474 } 502 }
475 } 503 }
476 } 504 }
477 505
478 pub type PatternResult<T> = Result<T, PatternError>; 506 pub type PatternResult<T> = Result<T, PatternError>;
637 assert_eq!( 665 assert_eq!(
638 parse_pattern_file_contents( 666 parse_pattern_file_contents(
639 lines, 667 lines,
640 Path::new("file_path"), 668 Path::new("file_path"),
641 None, 669 None,
642 false 670 false,
671 true,
643 ) 672 )
644 .unwrap() 673 .unwrap()
645 .0, 674 .0,
646 vec![IgnorePattern::new( 675 vec![IgnorePattern::new(
647 PatternSyntax::RelGlob, 676 PatternSyntax::RelGlob,
655 assert_eq!( 684 assert_eq!(
656 parse_pattern_file_contents( 685 parse_pattern_file_contents(
657 lines, 686 lines,
658 Path::new("file_path"), 687 Path::new("file_path"),
659 None, 688 None,
660 false 689 false,
690 true,
661 ) 691 )
662 .unwrap() 692 .unwrap()
663 .0, 693 .0,
664 vec![] 694 vec![]
665 ); 695 );
667 assert_eq!( 697 assert_eq!(
668 parse_pattern_file_contents( 698 parse_pattern_file_contents(
669 lines, 699 lines,
670 Path::new("file_path"), 700 Path::new("file_path"),
671 None, 701 None,
672 false 702 false,
703 true,
673 ) 704 )
674 .unwrap() 705 .unwrap()
675 .0, 706 .0,
676 vec![IgnorePattern::new( 707 vec![IgnorePattern::new(
677 PatternSyntax::RelGlob, 708 PatternSyntax::RelGlob,