Mercurial > hg
comparison rust/hg-core/src/config/layer.rs @ 46187:95d6f31e88db
hg-core: add basic config module
The config module exposes a `Config` struct, unused for now.
It only reads the config file local to the repository, but handles all valid
patterns and includes/unsets.
It is structured in layers instead of erasing by reverse order of precedence,
allowing us to transparently know more about the config for debugging purposes,
and potentially other things I haven't thought about yet.
This change also introduces `format_bytes!` to `hg-core`.
Differential Revision: https://phab.mercurial-scm.org/D9408
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Tue, 29 Dec 2020 10:53:45 +0100 |
parents | |
children | 2e2033081274 |
comparison
equal
deleted
inserted
replaced
46186:5f27924a201d | 46187:95d6f31e88db |
---|---|
1 // layer.rs | |
2 // | |
3 // Copyright 2020 | |
4 // Valentin Gatien-Baron, | |
5 // Raphaël Gomès <rgomes@octobus.net> | |
6 // | |
7 // This software may be used and distributed according to the terms of the | |
8 // GNU General Public License version 2 or any later version. | |
9 | |
10 use crate::utils::files::{ | |
11 get_bytes_from_path, get_path_from_bytes, read_whole_file, | |
12 }; | |
13 use format_bytes::format_bytes; | |
14 use lazy_static::lazy_static; | |
15 use regex::bytes::Regex; | |
16 use std::collections::HashMap; | |
17 use std::io; | |
18 use std::path::{Path, PathBuf}; | |
19 | |
20 lazy_static! { | |
21 static ref SECTION_RE: Regex = make_regex(r"^\[([^\[]+)\]"); | |
22 static ref ITEM_RE: Regex = make_regex(r"^([^=\s][^=]*?)\s*=\s*((.*\S)?)"); | |
23 /// Continuation whitespace | |
24 static ref CONT_RE: Regex = make_regex(r"^\s+(\S|\S.*\S)\s*$"); | |
25 static ref EMPTY_RE: Regex = make_regex(r"^(;|#|\s*$)"); | |
26 static ref COMMENT_RE: Regex = make_regex(r"^(;|#)"); | |
27 /// A directive that allows for removing previous entries | |
28 static ref UNSET_RE: Regex = make_regex(r"^%unset\s+(\S+)"); | |
29 /// A directive that allows for including other config files | |
30 static ref INCLUDE_RE: Regex = make_regex(r"^%include\s+(\S|\S.*\S)\s*$"); | |
31 } | |
32 | |
33 /// All config values separated by layers of precedence. | |
34 /// Each config source may be split in multiple layers if `%include` directives | |
35 /// are used. | |
36 /// TODO detail the general precedence | |
37 #[derive(Clone)] | |
38 pub struct ConfigLayer { | |
39 /// Mapping of the sections to their items | |
40 sections: HashMap<Vec<u8>, ConfigItem>, | |
41 /// All sections (and their items/values) in a layer share the same origin | |
42 pub origin: ConfigOrigin, | |
43 /// Whether this layer comes from a trusted user or group | |
44 pub trusted: bool, | |
45 } | |
46 | |
47 impl ConfigLayer { | |
48 pub fn new(origin: ConfigOrigin) -> Self { | |
49 ConfigLayer { | |
50 sections: HashMap::new(), | |
51 trusted: true, // TODO check | |
52 origin, | |
53 } | |
54 } | |
55 | |
56 /// Add an entry to the config, overwriting the old one if already present. | |
57 pub fn add( | |
58 &mut self, | |
59 section: Vec<u8>, | |
60 item: Vec<u8>, | |
61 value: Vec<u8>, | |
62 line: Option<usize>, | |
63 ) { | |
64 self.sections | |
65 .entry(section) | |
66 .or_insert_with(|| HashMap::new()) | |
67 .insert(item, ConfigValue { bytes: value, line }); | |
68 } | |
69 | |
70 /// Returns the config value in `<section>.<item>` if it exists | |
71 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&ConfigValue> { | |
72 Some(self.sections.get(section)?.get(item)?) | |
73 } | |
74 | |
75 pub fn is_empty(&self) -> bool { | |
76 self.sections.is_empty() | |
77 } | |
78 | |
79 /// Returns a `Vec` of layers in order of precedence (so, in read order), | |
80 /// recursively parsing the `%include` directives if any. | |
81 pub fn parse(src: &Path, data: &[u8]) -> Result<Vec<Self>, ConfigError> { | |
82 let mut layers = vec![]; | |
83 | |
84 // Discard byte order mark if any | |
85 let data = if data.starts_with(b"\xef\xbb\xbf") { | |
86 &data[3..] | |
87 } else { | |
88 data | |
89 }; | |
90 | |
91 // TODO check if it's trusted | |
92 let mut current_layer = Self::new(ConfigOrigin::File(src.to_owned())); | |
93 | |
94 let mut lines_iter = | |
95 data.split(|b| *b == b'\n').enumerate().peekable(); | |
96 let mut section = b"".to_vec(); | |
97 | |
98 while let Some((index, bytes)) = lines_iter.next() { | |
99 if let Some(m) = INCLUDE_RE.captures(&bytes) { | |
100 let filename_bytes = &m[1]; | |
101 let filename_to_include = get_path_from_bytes(&filename_bytes); | |
102 match read_include(&src, &filename_to_include) { | |
103 (include_src, Ok(data)) => { | |
104 layers.push(current_layer); | |
105 layers.extend(Self::parse(&include_src, &data)?); | |
106 current_layer = | |
107 Self::new(ConfigOrigin::File(src.to_owned())); | |
108 } | |
109 (_, Err(e)) => { | |
110 return Err(ConfigError::IncludeError { | |
111 path: filename_to_include.to_owned(), | |
112 io_error: e, | |
113 }) | |
114 } | |
115 } | |
116 } else if let Some(_) = EMPTY_RE.captures(&bytes) { | |
117 } else if let Some(m) = SECTION_RE.captures(&bytes) { | |
118 section = m[1].to_vec(); | |
119 } else if let Some(m) = ITEM_RE.captures(&bytes) { | |
120 let item = m[1].to_vec(); | |
121 let mut value = m[2].to_vec(); | |
122 loop { | |
123 match lines_iter.peek() { | |
124 None => break, | |
125 Some((_, v)) => { | |
126 if let Some(_) = COMMENT_RE.captures(&v) { | |
127 } else if let Some(_) = CONT_RE.captures(&v) { | |
128 value.extend(b"\n"); | |
129 value.extend(&m[1]); | |
130 } else { | |
131 break; | |
132 } | |
133 } | |
134 }; | |
135 lines_iter.next(); | |
136 } | |
137 current_layer.add( | |
138 section.clone(), | |
139 item, | |
140 value, | |
141 Some(index + 1), | |
142 ); | |
143 } else if let Some(m) = UNSET_RE.captures(&bytes) { | |
144 if let Some(map) = current_layer.sections.get_mut(§ion) { | |
145 map.remove(&m[1]); | |
146 } | |
147 } else { | |
148 return Err(ConfigError::Parse { | |
149 origin: ConfigOrigin::File(src.to_owned()), | |
150 line: Some(index + 1), | |
151 bytes: bytes.to_owned(), | |
152 }); | |
153 } | |
154 } | |
155 if !current_layer.is_empty() { | |
156 layers.push(current_layer); | |
157 } | |
158 Ok(layers) | |
159 } | |
160 } | |
161 | |
162 impl std::fmt::Debug for ConfigLayer { | |
163 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
164 let mut sections: Vec<_> = self.sections.iter().collect(); | |
165 sections.sort_by(|e0, e1| e0.0.cmp(e1.0)); | |
166 | |
167 for (section, items) in sections.into_iter() { | |
168 let mut items: Vec<_> = items.into_iter().collect(); | |
169 items.sort_by(|e0, e1| e0.0.cmp(e1.0)); | |
170 | |
171 for (item, config_entry) in items { | |
172 writeln!( | |
173 f, | |
174 "{}", | |
175 String::from_utf8_lossy(&format_bytes!( | |
176 b"{}.{}={} # {}", | |
177 section, | |
178 item, | |
179 &config_entry.bytes, | |
180 &self.origin.to_bytes(), | |
181 )) | |
182 )? | |
183 } | |
184 } | |
185 Ok(()) | |
186 } | |
187 } | |
188 | |
189 /// Mapping of section item to value. | |
190 /// In the following: | |
191 /// ```text | |
192 /// [ui] | |
193 /// paginate=no | |
194 /// ``` | |
195 /// "paginate" is the section item and "no" the value. | |
196 pub type ConfigItem = HashMap<Vec<u8>, ConfigValue>; | |
197 | |
198 #[derive(Clone, Debug, PartialEq)] | |
199 pub struct ConfigValue { | |
200 /// The raw bytes of the value (be it from the CLI, env or from a file) | |
201 pub bytes: Vec<u8>, | |
202 /// Only present if the value comes from a file, 1-indexed. | |
203 pub line: Option<usize>, | |
204 } | |
205 | |
206 #[derive(Clone, Debug)] | |
207 pub enum ConfigOrigin { | |
208 /// The value comes from a configuration file | |
209 File(PathBuf), | |
210 /// The value comes from the environment like `$PAGER` or `$EDITOR` | |
211 Environment(Vec<u8>), | |
212 /* TODO cli | |
213 * TODO defaults (configitems.py) | |
214 * TODO extensions | |
215 * TODO Python resources? | |
216 * Others? */ | |
217 } | |
218 | |
219 impl ConfigOrigin { | |
220 /// TODO use some kind of dedicated trait? | |
221 pub fn to_bytes(&self) -> Vec<u8> { | |
222 match self { | |
223 ConfigOrigin::File(p) => get_bytes_from_path(p), | |
224 ConfigOrigin::Environment(e) => e.to_owned(), | |
225 } | |
226 } | |
227 } | |
228 | |
229 #[derive(Debug)] | |
230 pub enum ConfigError { | |
231 Parse { | |
232 origin: ConfigOrigin, | |
233 line: Option<usize>, | |
234 bytes: Vec<u8>, | |
235 }, | |
236 /// Failed to include a sub config file | |
237 IncludeError { | |
238 path: PathBuf, | |
239 io_error: std::io::Error, | |
240 }, | |
241 /// Any IO error that isn't expected | |
242 IO(std::io::Error), | |
243 } | |
244 | |
245 impl From<std::io::Error> for ConfigError { | |
246 fn from(e: std::io::Error) -> Self { | |
247 Self::IO(e) | |
248 } | |
249 } | |
250 | |
251 fn make_regex(pattern: &'static str) -> Regex { | |
252 Regex::new(pattern).expect("expected a valid regex") | |
253 } | |
254 | |
255 /// Includes are relative to the file they're defined in, unless they're | |
256 /// absolute. | |
257 fn read_include( | |
258 old_src: &Path, | |
259 new_src: &Path, | |
260 ) -> (PathBuf, io::Result<Vec<u8>>) { | |
261 if new_src.is_absolute() { | |
262 (new_src.to_path_buf(), read_whole_file(&new_src)) | |
263 } else { | |
264 let dir = old_src.parent().unwrap(); | |
265 let new_src = dir.join(&new_src); | |
266 (new_src.to_owned(), read_whole_file(&new_src)) | |
267 } | |
268 } |