parselist: move the function from config to stringutil
We move the function in a lower level module to avoid cycle. It moves next to
`parsebool` who had to migrate for the same reasons.
Differential Revision: https://phab.mercurial-scm.org/D10449
--- a/mercurial/config.py Fri Apr 16 02:14:21 2021 +0200
+++ b/mercurial/config.py Fri Apr 16 01:18:28 2021 +0200
@@ -258,93 +258,3 @@
self.parse(
path, fp.read(), sections=sections, remap=remap, include=include
)
-
-
-def parselist(value):
- """parse a configuration value as a list of comma/space separated strings
-
- >>> parselist(b'this,is "a small" ,test')
- ['this', 'is', 'a small', 'test']
- """
-
- def _parse_plain(parts, s, offset):
- whitespace = False
- while offset < len(s) and (
- s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','
- ):
- whitespace = True
- offset += 1
- if offset >= len(s):
- return None, parts, offset
- if whitespace:
- parts.append(b'')
- if s[offset : offset + 1] == b'"' and not parts[-1]:
- return _parse_quote, parts, offset + 1
- elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':
- parts[-1] = parts[-1][:-1] + s[offset : offset + 1]
- return _parse_plain, parts, offset + 1
- parts[-1] += s[offset : offset + 1]
- return _parse_plain, parts, offset + 1
-
- def _parse_quote(parts, s, offset):
- if offset < len(s) and s[offset : offset + 1] == b'"': # ""
- parts.append(b'')
- offset += 1
- while offset < len(s) and (
- s[offset : offset + 1].isspace()
- or s[offset : offset + 1] == b','
- ):
- offset += 1
- return _parse_plain, parts, offset
-
- while offset < len(s) and s[offset : offset + 1] != b'"':
- if (
- s[offset : offset + 1] == b'\\'
- and offset + 1 < len(s)
- and s[offset + 1 : offset + 2] == b'"'
- ):
- offset += 1
- parts[-1] += b'"'
- else:
- parts[-1] += s[offset : offset + 1]
- offset += 1
-
- if offset >= len(s):
- real_parts = _configlist(parts[-1])
- if not real_parts:
- parts[-1] = b'"'
- else:
- real_parts[0] = b'"' + real_parts[0]
- parts = parts[:-1]
- parts.extend(real_parts)
- return None, parts, offset
-
- offset += 1
- while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:
- offset += 1
-
- if offset < len(s):
- if offset + 1 == len(s) and s[offset : offset + 1] == b'"':
- parts[-1] += b'"'
- offset += 1
- else:
- parts.append(b'')
- else:
- return None, parts, offset
-
- return _parse_plain, parts, offset
-
- def _configlist(s):
- s = s.rstrip(b' ,')
- if not s:
- return []
- parser, parts, offset = _parse_plain, [b''], 0
- while parser:
- parser, parts, offset = parser(parts, s, offset)
- return parts
-
- if value is not None and isinstance(value, bytes):
- result = _configlist(value.lstrip(b' ,\n'))
- else:
- result = value
- return result or []
--- a/mercurial/ui.py Fri Apr 16 02:14:21 2021 +0200
+++ b/mercurial/ui.py Fri Apr 16 01:18:28 2021 +0200
@@ -887,10 +887,10 @@
"""
# default is not always a list
v = self.configwith(
- config.parselist, section, name, default, b'list', untrusted
+ stringutil.parselist, section, name, default, b'list', untrusted
)
if isinstance(v, bytes):
- return config.parselist(v)
+ return stringutil.parselist(v)
elif v is None:
return []
return v
--- a/mercurial/utils/stringutil.py Fri Apr 16 02:14:21 2021 +0200
+++ b/mercurial/utils/stringutil.py Fri Apr 16 01:18:28 2021 +0200
@@ -868,6 +868,96 @@
return _booleans.get(s.lower(), None)
+def parselist(value):
+ """parse a configuration value as a list of comma/space separated strings
+
+ >>> parselist(b'this,is "a small" ,test')
+ ['this', 'is', 'a small', 'test']
+ """
+
+ def _parse_plain(parts, s, offset):
+ whitespace = False
+ while offset < len(s) and (
+ s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','
+ ):
+ whitespace = True
+ offset += 1
+ if offset >= len(s):
+ return None, parts, offset
+ if whitespace:
+ parts.append(b'')
+ if s[offset : offset + 1] == b'"' and not parts[-1]:
+ return _parse_quote, parts, offset + 1
+ elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':
+ parts[-1] = parts[-1][:-1] + s[offset : offset + 1]
+ return _parse_plain, parts, offset + 1
+ parts[-1] += s[offset : offset + 1]
+ return _parse_plain, parts, offset + 1
+
+ def _parse_quote(parts, s, offset):
+ if offset < len(s) and s[offset : offset + 1] == b'"': # ""
+ parts.append(b'')
+ offset += 1
+ while offset < len(s) and (
+ s[offset : offset + 1].isspace()
+ or s[offset : offset + 1] == b','
+ ):
+ offset += 1
+ return _parse_plain, parts, offset
+
+ while offset < len(s) and s[offset : offset + 1] != b'"':
+ if (
+ s[offset : offset + 1] == b'\\'
+ and offset + 1 < len(s)
+ and s[offset + 1 : offset + 2] == b'"'
+ ):
+ offset += 1
+ parts[-1] += b'"'
+ else:
+ parts[-1] += s[offset : offset + 1]
+ offset += 1
+
+ if offset >= len(s):
+ real_parts = _configlist(parts[-1])
+ if not real_parts:
+ parts[-1] = b'"'
+ else:
+ real_parts[0] = b'"' + real_parts[0]
+ parts = parts[:-1]
+ parts.extend(real_parts)
+ return None, parts, offset
+
+ offset += 1
+ while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:
+ offset += 1
+
+ if offset < len(s):
+ if offset + 1 == len(s) and s[offset : offset + 1] == b'"':
+ parts[-1] += b'"'
+ offset += 1
+ else:
+ parts.append(b'')
+ else:
+ return None, parts, offset
+
+ return _parse_plain, parts, offset
+
+ def _configlist(s):
+ s = s.rstrip(b' ,')
+ if not s:
+ return []
+ parser, parts, offset = _parse_plain, [b''], 0
+ while parser:
+ parser, parts, offset = parser(parts, s, offset)
+ return parts
+
+ if value is not None and isinstance(value, bytes):
+ result = _configlist(value.lstrip(b' ,\n'))
+ else:
+ result = value
+ return result or []
+
+
def evalpythonliteral(s):
"""Evaluate a string containing a Python literal expression"""
# We could backport our tokenizer hack to rewrite '' to u'' if we want
--- a/rust/hg-core/src/config/config.rs Fri Apr 16 02:14:21 2021 +0200
+++ b/rust/hg-core/src/config/config.rs Fri Apr 16 01:18:28 2021 +0200
@@ -361,10 +361,11 @@
///
/// This is appropriate for new configuration keys. The value syntax is
/// **not** the same as most existing list-valued config, which has Python
- /// parsing implemented in `parselist()` in `mercurial/config.py`.
- /// Faithfully porting that parsing algorithm to Rust (including behavior
- /// that are arguably bugs) turned out to be non-trivial and hasn’t been
- /// completed as of this writing.
+ /// parsing implemented in `parselist()` in
+ /// `mercurial/utils/stringutil.py`. Faithfully porting that parsing
+ /// algorithm to Rust (including behavior that are arguably bugs)
+ /// turned out to be non-trivial and hasn’t been completed as of this
+ /// writing.
///
/// Instead, the "simple" syntax is: split on comma, then trim leading and
/// trailing whitespace of each component. Quotes or backslashes are not
--- a/tests/test-doctest.py Fri Apr 16 02:14:21 2021 +0200
+++ b/tests/test-doctest.py Fri Apr 16 01:18:28 2021 +0200
@@ -131,7 +131,6 @@
('mercurial.changelog', '{}'),
('mercurial.cmdutil', '{}'),
('mercurial.color', '{}'),
- ('mercurial.config', '{}'),
('mercurial.dagparser', "{'optionflags': 4}"),
('mercurial.encoding', '{}'),
('mercurial.fancyopts', '{}'),