comparison rust/hg-core/src/config/layer.rs @ 46187:95d6f31e88db

hg-core: add basic config module The config module exposes a `Config` struct, unused for now. It only reads the config file local to the repository, but handles all valid patterns and includes/unsets. It is structured in layers instead of erasing by reverse order of precedence, allowing us to transparently know more about the config for debugging purposes, and potentially other things I haven't thought about yet. This change also introduces `format_bytes!` to `hg-core`. Differential Revision: https://phab.mercurial-scm.org/D9408
author Raphaël Gomès <rgomes@octobus.net>
date Tue, 29 Dec 2020 10:53:45 +0100
parents
children 2e2033081274
comparison
equal deleted inserted replaced
46186:5f27924a201d 46187:95d6f31e88db
1 // layer.rs
2 //
3 // Copyright 2020
4 // Valentin Gatien-Baron,
5 // Raphaël Gomès <rgomes@octobus.net>
6 //
7 // This software may be used and distributed according to the terms of the
8 // GNU General Public License version 2 or any later version.
9
10 use crate::utils::files::{
11 get_bytes_from_path, get_path_from_bytes, read_whole_file,
12 };
13 use format_bytes::format_bytes;
14 use lazy_static::lazy_static;
15 use regex::bytes::Regex;
16 use std::collections::HashMap;
17 use std::io;
18 use std::path::{Path, PathBuf};
19
20 lazy_static! {
21 static ref SECTION_RE: Regex = make_regex(r"^\[([^\[]+)\]");
22 static ref ITEM_RE: Regex = make_regex(r"^([^=\s][^=]*?)\s*=\s*((.*\S)?)");
23 /// Continuation whitespace
24 static ref CONT_RE: Regex = make_regex(r"^\s+(\S|\S.*\S)\s*$");
25 static ref EMPTY_RE: Regex = make_regex(r"^(;|#|\s*$)");
26 static ref COMMENT_RE: Regex = make_regex(r"^(;|#)");
27 /// A directive that allows for removing previous entries
28 static ref UNSET_RE: Regex = make_regex(r"^%unset\s+(\S+)");
29 /// A directive that allows for including other config files
30 static ref INCLUDE_RE: Regex = make_regex(r"^%include\s+(\S|\S.*\S)\s*$");
31 }
32
33 /// All config values separated by layers of precedence.
34 /// Each config source may be split in multiple layers if `%include` directives
35 /// are used.
36 /// TODO detail the general precedence
37 #[derive(Clone)]
38 pub struct ConfigLayer {
39 /// Mapping of the sections to their items
40 sections: HashMap<Vec<u8>, ConfigItem>,
41 /// All sections (and their items/values) in a layer share the same origin
42 pub origin: ConfigOrigin,
43 /// Whether this layer comes from a trusted user or group
44 pub trusted: bool,
45 }
46
47 impl ConfigLayer {
48 pub fn new(origin: ConfigOrigin) -> Self {
49 ConfigLayer {
50 sections: HashMap::new(),
51 trusted: true, // TODO check
52 origin,
53 }
54 }
55
56 /// Add an entry to the config, overwriting the old one if already present.
57 pub fn add(
58 &mut self,
59 section: Vec<u8>,
60 item: Vec<u8>,
61 value: Vec<u8>,
62 line: Option<usize>,
63 ) {
64 self.sections
65 .entry(section)
66 .or_insert_with(|| HashMap::new())
67 .insert(item, ConfigValue { bytes: value, line });
68 }
69
70 /// Returns the config value in `<section>.<item>` if it exists
71 pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&ConfigValue> {
72 Some(self.sections.get(section)?.get(item)?)
73 }
74
75 pub fn is_empty(&self) -> bool {
76 self.sections.is_empty()
77 }
78
79 /// Returns a `Vec` of layers in order of precedence (so, in read order),
80 /// recursively parsing the `%include` directives if any.
81 pub fn parse(src: &Path, data: &[u8]) -> Result<Vec<Self>, ConfigError> {
82 let mut layers = vec![];
83
84 // Discard byte order mark if any
85 let data = if data.starts_with(b"\xef\xbb\xbf") {
86 &data[3..]
87 } else {
88 data
89 };
90
91 // TODO check if it's trusted
92 let mut current_layer = Self::new(ConfigOrigin::File(src.to_owned()));
93
94 let mut lines_iter =
95 data.split(|b| *b == b'\n').enumerate().peekable();
96 let mut section = b"".to_vec();
97
98 while let Some((index, bytes)) = lines_iter.next() {
99 if let Some(m) = INCLUDE_RE.captures(&bytes) {
100 let filename_bytes = &m[1];
101 let filename_to_include = get_path_from_bytes(&filename_bytes);
102 match read_include(&src, &filename_to_include) {
103 (include_src, Ok(data)) => {
104 layers.push(current_layer);
105 layers.extend(Self::parse(&include_src, &data)?);
106 current_layer =
107 Self::new(ConfigOrigin::File(src.to_owned()));
108 }
109 (_, Err(e)) => {
110 return Err(ConfigError::IncludeError {
111 path: filename_to_include.to_owned(),
112 io_error: e,
113 })
114 }
115 }
116 } else if let Some(_) = EMPTY_RE.captures(&bytes) {
117 } else if let Some(m) = SECTION_RE.captures(&bytes) {
118 section = m[1].to_vec();
119 } else if let Some(m) = ITEM_RE.captures(&bytes) {
120 let item = m[1].to_vec();
121 let mut value = m[2].to_vec();
122 loop {
123 match lines_iter.peek() {
124 None => break,
125 Some((_, v)) => {
126 if let Some(_) = COMMENT_RE.captures(&v) {
127 } else if let Some(_) = CONT_RE.captures(&v) {
128 value.extend(b"\n");
129 value.extend(&m[1]);
130 } else {
131 break;
132 }
133 }
134 };
135 lines_iter.next();
136 }
137 current_layer.add(
138 section.clone(),
139 item,
140 value,
141 Some(index + 1),
142 );
143 } else if let Some(m) = UNSET_RE.captures(&bytes) {
144 if let Some(map) = current_layer.sections.get_mut(&section) {
145 map.remove(&m[1]);
146 }
147 } else {
148 return Err(ConfigError::Parse {
149 origin: ConfigOrigin::File(src.to_owned()),
150 line: Some(index + 1),
151 bytes: bytes.to_owned(),
152 });
153 }
154 }
155 if !current_layer.is_empty() {
156 layers.push(current_layer);
157 }
158 Ok(layers)
159 }
160 }
161
162 impl std::fmt::Debug for ConfigLayer {
163 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164 let mut sections: Vec<_> = self.sections.iter().collect();
165 sections.sort_by(|e0, e1| e0.0.cmp(e1.0));
166
167 for (section, items) in sections.into_iter() {
168 let mut items: Vec<_> = items.into_iter().collect();
169 items.sort_by(|e0, e1| e0.0.cmp(e1.0));
170
171 for (item, config_entry) in items {
172 writeln!(
173 f,
174 "{}",
175 String::from_utf8_lossy(&format_bytes!(
176 b"{}.{}={} # {}",
177 section,
178 item,
179 &config_entry.bytes,
180 &self.origin.to_bytes(),
181 ))
182 )?
183 }
184 }
185 Ok(())
186 }
187 }
188
189 /// Mapping of section item to value.
190 /// In the following:
191 /// ```text
192 /// [ui]
193 /// paginate=no
194 /// ```
195 /// "paginate" is the section item and "no" the value.
196 pub type ConfigItem = HashMap<Vec<u8>, ConfigValue>;
197
198 #[derive(Clone, Debug, PartialEq)]
199 pub struct ConfigValue {
200 /// The raw bytes of the value (be it from the CLI, env or from a file)
201 pub bytes: Vec<u8>,
202 /// Only present if the value comes from a file, 1-indexed.
203 pub line: Option<usize>,
204 }
205
206 #[derive(Clone, Debug)]
207 pub enum ConfigOrigin {
208 /// The value comes from a configuration file
209 File(PathBuf),
210 /// The value comes from the environment like `$PAGER` or `$EDITOR`
211 Environment(Vec<u8>),
212 /* TODO cli
213 * TODO defaults (configitems.py)
214 * TODO extensions
215 * TODO Python resources?
216 * Others? */
217 }
218
219 impl ConfigOrigin {
220 /// TODO use some kind of dedicated trait?
221 pub fn to_bytes(&self) -> Vec<u8> {
222 match self {
223 ConfigOrigin::File(p) => get_bytes_from_path(p),
224 ConfigOrigin::Environment(e) => e.to_owned(),
225 }
226 }
227 }
228
229 #[derive(Debug)]
230 pub enum ConfigError {
231 Parse {
232 origin: ConfigOrigin,
233 line: Option<usize>,
234 bytes: Vec<u8>,
235 },
236 /// Failed to include a sub config file
237 IncludeError {
238 path: PathBuf,
239 io_error: std::io::Error,
240 },
241 /// Any IO error that isn't expected
242 IO(std::io::Error),
243 }
244
245 impl From<std::io::Error> for ConfigError {
246 fn from(e: std::io::Error) -> Self {
247 Self::IO(e)
248 }
249 }
250
251 fn make_regex(pattern: &'static str) -> Regex {
252 Regex::new(pattern).expect("expected a valid regex")
253 }
254
255 /// Includes are relative to the file they're defined in, unless they're
256 /// absolute.
257 fn read_include(
258 old_src: &Path,
259 new_src: &Path,
260 ) -> (PathBuf, io::Result<Vec<u8>>) {
261 if new_src.is_absolute() {
262 (new_src.to_path_buf(), read_whole_file(&new_src))
263 } else {
264 let dir = old_src.parent().unwrap();
265 let new_src = dir.join(&new_src);
266 (new_src.to_owned(), read_whole_file(&new_src))
267 }
268 }