hg-core: add basic config module
The config module exposes a `Config` struct, unused for now.
It only reads the config file local to the repository, but handles all valid
patterns and includes/unsets.
It is structured in layers instead of erasing by reverse order of precedence,
allowing us to transparently know more about the config for debugging purposes,
and potentially other things I haven't thought about yet.
This change also introduces `format_bytes!` to `hg-core`.
Differential Revision: https://phab.mercurial-scm.org/D9408
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/config.rs Tue Dec 29 10:53:45 2020 +0100
@@ -0,0 +1,14 @@
+// config.rs
+//
+// Copyright 2020
+// Valentin Gatien-Baron,
+// Raphaël Gomès <rgomes@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Mercurial config parsing and interfaces.
+
+mod config;
+mod layer;
+pub use config::Config;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/config/config.rs Tue Dec 29 10:53:45 2020 +0100
@@ -0,0 +1,197 @@
+// config.rs
+//
+// Copyright 2020
+// Valentin Gatien-Baron,
+// Raphaël Gomès <rgomes@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+use super::layer;
+use crate::config::layer::{ConfigError, ConfigLayer, ConfigValue};
+use std::path::PathBuf;
+
+use crate::operations::find_root;
+use crate::utils::files::read_whole_file;
+
+/// Holds the config values for the current repository
+/// TODO update this docstring once we support more sources
+pub struct Config {
+ layers: Vec<layer::ConfigLayer>,
+}
+
+impl std::fmt::Debug for Config {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ for (index, layer) in self.layers.iter().rev().enumerate() {
+ write!(
+ f,
+ "==== Layer {} (trusted: {}) ====\n{:?}",
+ index, layer.trusted, layer
+ )?;
+ }
+ Ok(())
+ }
+}
+
+pub enum ConfigSource {
+ /// Absolute path to a config file
+ AbsPath(PathBuf),
+ /// Already parsed (from the CLI, env, Python resources, etc.)
+ Parsed(layer::ConfigLayer),
+}
+
+pub fn parse_bool(v: &[u8]) -> Option<bool> {
+ match v.to_ascii_lowercase().as_slice() {
+ b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true),
+ b"0" | b"no" | b"false" | b"off" | b"never" => Some(false),
+ _ => None,
+ }
+}
+
+impl Config {
+ /// Loads in order, which means that the precedence is the same
+ /// as the order of `sources`.
+ pub fn load_from_explicit_sources(
+ sources: Vec<ConfigSource>,
+ ) -> Result<Self, ConfigError> {
+ let mut layers = vec![];
+
+ for source in sources.into_iter() {
+ match source {
+ ConfigSource::Parsed(c) => layers.push(c),
+ ConfigSource::AbsPath(c) => {
+ // TODO check if it should be trusted
+ // mercurial/ui.py:427
+ let data = match read_whole_file(&c) {
+ Err(_) => continue, // same as the python code
+ Ok(data) => data,
+ };
+ layers.extend(ConfigLayer::parse(&c, &data)?)
+ }
+ }
+ }
+
+ Ok(Config { layers })
+ }
+
+ /// Loads the local config. In a future version, this will also load the
+ /// `$HOME/.hgrc` and more to mirror the Python implementation.
+ pub fn load() -> Result<Self, ConfigError> {
+ let root = find_root().unwrap();
+ Ok(Self::load_from_explicit_sources(vec![
+ ConfigSource::AbsPath(root.join(".hg/hgrc")),
+ ])?)
+ }
+
+ /// Returns an `Err` if the first value found is not a valid boolean.
+ /// Otherwise, returns an `Ok(option)`, where `option` is the boolean if
+ /// found, or `None`.
+ pub fn get_option(
+ &self,
+ section: &[u8],
+ item: &[u8],
+ ) -> Result<Option<bool>, ConfigError> {
+ match self.get_inner(§ion, &item) {
+ Some((layer, v)) => match parse_bool(&v.bytes) {
+ Some(b) => Ok(Some(b)),
+ None => Err(ConfigError::Parse {
+ origin: layer.origin.to_owned(),
+ line: v.line,
+ bytes: v.bytes.to_owned(),
+ }),
+ },
+ None => Ok(None),
+ }
+ }
+
+ /// Returns the corresponding boolean in the config. Returns `Ok(false)`
+ /// if the value is not found, an `Err` if it's not a valid boolean.
+ pub fn get_bool(
+ &self,
+ section: &[u8],
+ item: &[u8],
+ ) -> Result<bool, ConfigError> {
+ Ok(self.get_option(section, item)?.unwrap_or(false))
+ }
+
+ /// Returns the raw value bytes of the first one found, or `None`.
+ pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> {
+ self.get_inner(section, item)
+ .map(|(_, value)| value.bytes.as_ref())
+ }
+
+ /// Returns the layer and the value of the first one found, or `None`.
+ fn get_inner(
+ &self,
+ section: &[u8],
+ item: &[u8],
+ ) -> Option<(&ConfigLayer, &ConfigValue)> {
+ for layer in self.layers.iter().rev() {
+ if !layer.trusted {
+ continue;
+ }
+ if let Some(v) = layer.get(§ion, &item) {
+ return Some((&layer, v));
+ }
+ }
+ None
+ }
+
+ /// Get raw values bytes from all layers (even untrusted ones) in order
+ /// of precedence.
+ #[cfg(test)]
+ fn get_all(&self, section: &[u8], item: &[u8]) -> Vec<&[u8]> {
+ let mut res = vec![];
+ for layer in self.layers.iter().rev() {
+ if let Some(v) = layer.get(§ion, &item) {
+ res.push(v.bytes.as_ref());
+ }
+ }
+ res
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use pretty_assertions::assert_eq;
+ use std::fs::File;
+ use std::io::Write;
+
+ #[test]
+ fn test_include_layer_ordering() {
+ let tmpdir = tempfile::tempdir().unwrap();
+ let tmpdir_path = tmpdir.path();
+ let mut included_file =
+ File::create(&tmpdir_path.join("included.rc")).unwrap();
+
+ included_file.write_all(b"[section]\nitem=value1").unwrap();
+ let base_config_path = tmpdir_path.join("base.rc");
+ let mut config_file = File::create(&base_config_path).unwrap();
+ let data =
+ b"[section]\nitem=value0\n%include included.rc\nitem=value2";
+ config_file.write_all(data).unwrap();
+
+ let sources = vec![ConfigSource::AbsPath(base_config_path)];
+ let config = Config::load_from_explicit_sources(sources)
+ .expect("expected valid config");
+
+ dbg!(&config);
+
+ let (_, value) = config.get_inner(b"section", b"item").unwrap();
+ assert_eq!(
+ value,
+ &ConfigValue {
+ bytes: b"value2".to_vec(),
+ line: Some(4)
+ }
+ );
+
+ let value = config.get(b"section", b"item").unwrap();
+ assert_eq!(value, b"value2",);
+ assert_eq!(
+ config.get_all(b"section", b"item"),
+ [b"value2", b"value1", b"value0"]
+ );
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/config/layer.rs Tue Dec 29 10:53:45 2020 +0100
@@ -0,0 +1,268 @@
+// layer.rs
+//
+// Copyright 2020
+// Valentin Gatien-Baron,
+// Raphaël Gomès <rgomes@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+use crate::utils::files::{
+ get_bytes_from_path, get_path_from_bytes, read_whole_file,
+};
+use format_bytes::format_bytes;
+use lazy_static::lazy_static;
+use regex::bytes::Regex;
+use std::collections::HashMap;
+use std::io;
+use std::path::{Path, PathBuf};
+
+lazy_static! {
+ static ref SECTION_RE: Regex = make_regex(r"^\[([^\[]+)\]");
+ static ref ITEM_RE: Regex = make_regex(r"^([^=\s][^=]*?)\s*=\s*((.*\S)?)");
+ /// Continuation whitespace
+ static ref CONT_RE: Regex = make_regex(r"^\s+(\S|\S.*\S)\s*$");
+ static ref EMPTY_RE: Regex = make_regex(r"^(;|#|\s*$)");
+ static ref COMMENT_RE: Regex = make_regex(r"^(;|#)");
+ /// A directive that allows for removing previous entries
+ static ref UNSET_RE: Regex = make_regex(r"^%unset\s+(\S+)");
+ /// A directive that allows for including other config files
+ static ref INCLUDE_RE: Regex = make_regex(r"^%include\s+(\S|\S.*\S)\s*$");
+}
+
+/// All config values separated by layers of precedence.
+/// Each config source may be split in multiple layers if `%include` directives
+/// are used.
+/// TODO detail the general precedence
+#[derive(Clone)]
+pub struct ConfigLayer {
+ /// Mapping of the sections to their items
+ sections: HashMap<Vec<u8>, ConfigItem>,
+ /// All sections (and their items/values) in a layer share the same origin
+ pub origin: ConfigOrigin,
+ /// Whether this layer comes from a trusted user or group
+ pub trusted: bool,
+}
+
+impl ConfigLayer {
+ pub fn new(origin: ConfigOrigin) -> Self {
+ ConfigLayer {
+ sections: HashMap::new(),
+ trusted: true, // TODO check
+ origin,
+ }
+ }
+
+ /// Add an entry to the config, overwriting the old one if already present.
+ pub fn add(
+ &mut self,
+ section: Vec<u8>,
+ item: Vec<u8>,
+ value: Vec<u8>,
+ line: Option<usize>,
+ ) {
+ self.sections
+ .entry(section)
+ .or_insert_with(|| HashMap::new())
+ .insert(item, ConfigValue { bytes: value, line });
+ }
+
+ /// Returns the config value in `<section>.<item>` if it exists
+ pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&ConfigValue> {
+ Some(self.sections.get(section)?.get(item)?)
+ }
+
+ pub fn is_empty(&self) -> bool {
+ self.sections.is_empty()
+ }
+
+ /// Returns a `Vec` of layers in order of precedence (so, in read order),
+ /// recursively parsing the `%include` directives if any.
+ pub fn parse(src: &Path, data: &[u8]) -> Result<Vec<Self>, ConfigError> {
+ let mut layers = vec![];
+
+ // Discard byte order mark if any
+ let data = if data.starts_with(b"\xef\xbb\xbf") {
+ &data[3..]
+ } else {
+ data
+ };
+
+ // TODO check if it's trusted
+ let mut current_layer = Self::new(ConfigOrigin::File(src.to_owned()));
+
+ let mut lines_iter =
+ data.split(|b| *b == b'\n').enumerate().peekable();
+ let mut section = b"".to_vec();
+
+ while let Some((index, bytes)) = lines_iter.next() {
+ if let Some(m) = INCLUDE_RE.captures(&bytes) {
+ let filename_bytes = &m[1];
+ let filename_to_include = get_path_from_bytes(&filename_bytes);
+ match read_include(&src, &filename_to_include) {
+ (include_src, Ok(data)) => {
+ layers.push(current_layer);
+ layers.extend(Self::parse(&include_src, &data)?);
+ current_layer =
+ Self::new(ConfigOrigin::File(src.to_owned()));
+ }
+ (_, Err(e)) => {
+ return Err(ConfigError::IncludeError {
+ path: filename_to_include.to_owned(),
+ io_error: e,
+ })
+ }
+ }
+ } else if let Some(_) = EMPTY_RE.captures(&bytes) {
+ } else if let Some(m) = SECTION_RE.captures(&bytes) {
+ section = m[1].to_vec();
+ } else if let Some(m) = ITEM_RE.captures(&bytes) {
+ let item = m[1].to_vec();
+ let mut value = m[2].to_vec();
+ loop {
+ match lines_iter.peek() {
+ None => break,
+ Some((_, v)) => {
+ if let Some(_) = COMMENT_RE.captures(&v) {
+ } else if let Some(_) = CONT_RE.captures(&v) {
+ value.extend(b"\n");
+ value.extend(&m[1]);
+ } else {
+ break;
+ }
+ }
+ };
+ lines_iter.next();
+ }
+ current_layer.add(
+ section.clone(),
+ item,
+ value,
+ Some(index + 1),
+ );
+ } else if let Some(m) = UNSET_RE.captures(&bytes) {
+ if let Some(map) = current_layer.sections.get_mut(§ion) {
+ map.remove(&m[1]);
+ }
+ } else {
+ return Err(ConfigError::Parse {
+ origin: ConfigOrigin::File(src.to_owned()),
+ line: Some(index + 1),
+ bytes: bytes.to_owned(),
+ });
+ }
+ }
+ if !current_layer.is_empty() {
+ layers.push(current_layer);
+ }
+ Ok(layers)
+ }
+}
+
+impl std::fmt::Debug for ConfigLayer {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ let mut sections: Vec<_> = self.sections.iter().collect();
+ sections.sort_by(|e0, e1| e0.0.cmp(e1.0));
+
+ for (section, items) in sections.into_iter() {
+ let mut items: Vec<_> = items.into_iter().collect();
+ items.sort_by(|e0, e1| e0.0.cmp(e1.0));
+
+ for (item, config_entry) in items {
+ writeln!(
+ f,
+ "{}",
+ String::from_utf8_lossy(&format_bytes!(
+ b"{}.{}={} # {}",
+ section,
+ item,
+ &config_entry.bytes,
+ &self.origin.to_bytes(),
+ ))
+ )?
+ }
+ }
+ Ok(())
+ }
+}
+
+/// Mapping of section item to value.
+/// In the following:
+/// ```text
+/// [ui]
+/// paginate=no
+/// ```
+/// "paginate" is the section item and "no" the value.
+pub type ConfigItem = HashMap<Vec<u8>, ConfigValue>;
+
+#[derive(Clone, Debug, PartialEq)]
+pub struct ConfigValue {
+ /// The raw bytes of the value (be it from the CLI, env or from a file)
+ pub bytes: Vec<u8>,
+ /// Only present if the value comes from a file, 1-indexed.
+ pub line: Option<usize>,
+}
+
+#[derive(Clone, Debug)]
+pub enum ConfigOrigin {
+ /// The value comes from a configuration file
+ File(PathBuf),
+ /// The value comes from the environment like `$PAGER` or `$EDITOR`
+ Environment(Vec<u8>),
+ /* TODO cli
+ * TODO defaults (configitems.py)
+ * TODO extensions
+ * TODO Python resources?
+ * Others? */
+}
+
+impl ConfigOrigin {
+ /// TODO use some kind of dedicated trait?
+ pub fn to_bytes(&self) -> Vec<u8> {
+ match self {
+ ConfigOrigin::File(p) => get_bytes_from_path(p),
+ ConfigOrigin::Environment(e) => e.to_owned(),
+ }
+ }
+}
+
+#[derive(Debug)]
+pub enum ConfigError {
+ Parse {
+ origin: ConfigOrigin,
+ line: Option<usize>,
+ bytes: Vec<u8>,
+ },
+ /// Failed to include a sub config file
+ IncludeError {
+ path: PathBuf,
+ io_error: std::io::Error,
+ },
+ /// Any IO error that isn't expected
+ IO(std::io::Error),
+}
+
+impl From<std::io::Error> for ConfigError {
+ fn from(e: std::io::Error) -> Self {
+ Self::IO(e)
+ }
+}
+
+fn make_regex(pattern: &'static str) -> Regex {
+ Regex::new(pattern).expect("expected a valid regex")
+}
+
+/// Includes are relative to the file they're defined in, unless they're
+/// absolute.
+fn read_include(
+ old_src: &Path,
+ new_src: &Path,
+) -> (PathBuf, io::Result<Vec<u8>>) {
+ if new_src.is_absolute() {
+ (new_src.to_path_buf(), read_whole_file(&new_src))
+ } else {
+ let dir = old_src.parent().unwrap();
+ let new_src = dir.join(&new_src);
+ (new_src.to_owned(), read_whole_file(&new_src))
+ }
+}
--- a/rust/hg-core/src/lib.rs Mon Dec 14 12:08:56 2020 +0100
+++ b/rust/hg-core/src/lib.rs Tue Dec 29 10:53:45 2020 +0100
@@ -26,6 +26,7 @@
pub mod repo;
pub mod revlog;
pub use revlog::*;
+pub mod config;
pub mod operations;
pub mod utils;
--- a/rust/hg-core/src/utils/files.rs Mon Dec 14 12:08:56 2020 +0100
+++ b/rust/hg-core/src/utils/files.rs Tue Dec 29 10:53:45 2020 +0100
@@ -18,6 +18,7 @@
use same_file::is_same_file;
use std::borrow::{Cow, ToOwned};
use std::fs::Metadata;
+use std::io::Read;
use std::iter::FusedIterator;
use std::ops::Deref;
use std::path::{Path, PathBuf};
@@ -308,6 +309,17 @@
}
}
+/// Reads a file in one big chunk instead of doing multiple reads
+pub fn read_whole_file(filepath: &Path) -> std::io::Result<Vec<u8>> {
+ let mut file = std::fs::File::open(filepath)?;
+ let size = file.metadata()?.len();
+
+ let mut res = vec![0; size as usize];
+ file.read_exact(&mut res)?;
+
+ Ok(res)
+}
+
#[cfg(test)]
mod tests {
use super::*;