diff rust/hg-core/src/utils/files.rs @ 44265:c18dd48cea4a

rust-pathauditor: add Rust implementation of the `pathauditor` It does not offer the same flexibility as the Python implementation, but should check incoming paths just as well. Differential Revision: https://phab.mercurial-scm.org/D7866
author Raphaël Gomès <rgomes@octobus.net>
date Wed, 05 Feb 2020 17:05:37 +0100
parents cf065c6a0197
children 0e9ac3968b56
line wrap: on
line diff
--- a/rust/hg-core/src/utils/files.rs	Wed Jan 22 03:17:06 2020 +0530
+++ b/rust/hg-core/src/utils/files.rs	Wed Feb 05 17:05:37 2020 +0100
@@ -12,6 +12,8 @@
 use crate::utils::hg_path::{HgPath, HgPathBuf};
 use std::iter::FusedIterator;
 
+use crate::utils::replace_slice;
+use lazy_static::lazy_static;
 use std::fs::Metadata;
 use std::path::Path;
 
@@ -85,6 +87,41 @@
     path.to_ascii_lowercase()
 }
 
+lazy_static! {
+    static ref IGNORED_CHARS: Vec<Vec<u8>> = {
+        [
+            0x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d,
+            0x202e, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
+        ]
+        .iter()
+        .map(|code| {
+            std::char::from_u32(*code)
+                .unwrap()
+                .encode_utf8(&mut [0; 3])
+                .bytes()
+                .collect()
+        })
+        .collect()
+    };
+}
+
+fn hfs_ignore_clean(bytes: &[u8]) -> Vec<u8> {
+    let mut buf = bytes.to_owned();
+    let needs_escaping = bytes.iter().any(|b| *b == b'\xe2' || *b == b'\xef');
+    if needs_escaping {
+        for forbidden in IGNORED_CHARS.iter() {
+            replace_slice(&mut buf, forbidden, &[])
+        }
+        buf
+    } else {
+        buf
+    }
+}
+
+pub fn lower_clean(bytes: &[u8]) -> Vec<u8> {
+    hfs_ignore_clean(&bytes.to_ascii_lowercase())
+}
+
 #[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone)]
 pub struct HgMetadata {
     pub st_dev: u64,