Mercurial > hg
changeset 47189:b0e92313107e
parselist: move the function from config to stringutil
We move the function in a lower level module to avoid cycle. It moves next to
`parsebool` who had to migrate for the same reasons.
Differential Revision: https://phab.mercurial-scm.org/D10449
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Fri, 16 Apr 2021 01:18:28 +0200 |
parents | 353718f741a8 |
children | 294a0aa51b8b |
files | mercurial/config.py mercurial/ui.py mercurial/utils/stringutil.py rust/hg-core/src/config/config.rs tests/test-doctest.py |
diffstat | 5 files changed, 97 insertions(+), 97 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/config.py Fri Apr 16 02:14:21 2021 +0200 +++ b/mercurial/config.py Fri Apr 16 01:18:28 2021 +0200 @@ -258,93 +258,3 @@ self.parse( path, fp.read(), sections=sections, remap=remap, include=include ) - - -def parselist(value): - """parse a configuration value as a list of comma/space separated strings - - >>> parselist(b'this,is "a small" ,test') - ['this', 'is', 'a small', 'test'] - """ - - def _parse_plain(parts, s, offset): - whitespace = False - while offset < len(s) and ( - s[offset : offset + 1].isspace() or s[offset : offset + 1] == b',' - ): - whitespace = True - offset += 1 - if offset >= len(s): - return None, parts, offset - if whitespace: - parts.append(b'') - if s[offset : offset + 1] == b'"' and not parts[-1]: - return _parse_quote, parts, offset + 1 - elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\': - parts[-1] = parts[-1][:-1] + s[offset : offset + 1] - return _parse_plain, parts, offset + 1 - parts[-1] += s[offset : offset + 1] - return _parse_plain, parts, offset + 1 - - def _parse_quote(parts, s, offset): - if offset < len(s) and s[offset : offset + 1] == b'"': # "" - parts.append(b'') - offset += 1 - while offset < len(s) and ( - s[offset : offset + 1].isspace() - or s[offset : offset + 1] == b',' - ): - offset += 1 - return _parse_plain, parts, offset - - while offset < len(s) and s[offset : offset + 1] != b'"': - if ( - s[offset : offset + 1] == b'\\' - and offset + 1 < len(s) - and s[offset + 1 : offset + 2] == b'"' - ): - offset += 1 - parts[-1] += b'"' - else: - parts[-1] += s[offset : offset + 1] - offset += 1 - - if offset >= len(s): - real_parts = _configlist(parts[-1]) - if not real_parts: - parts[-1] = b'"' - else: - real_parts[0] = b'"' + real_parts[0] - parts = parts[:-1] - parts.extend(real_parts) - return None, parts, offset - - offset += 1 - while offset < len(s) and s[offset : offset + 1] in [b' ', b',']: - offset += 1 - - if offset < len(s): - if offset + 1 == len(s) and s[offset : offset + 1] == b'"': - parts[-1] += b'"' - offset += 1 - else: - parts.append(b'') - else: - return None, parts, offset - - return _parse_plain, parts, offset - - def _configlist(s): - s = s.rstrip(b' ,') - if not s: - return [] - parser, parts, offset = _parse_plain, [b''], 0 - while parser: - parser, parts, offset = parser(parts, s, offset) - return parts - - if value is not None and isinstance(value, bytes): - result = _configlist(value.lstrip(b' ,\n')) - else: - result = value - return result or []
--- a/mercurial/ui.py Fri Apr 16 02:14:21 2021 +0200 +++ b/mercurial/ui.py Fri Apr 16 01:18:28 2021 +0200 @@ -887,10 +887,10 @@ """ # default is not always a list v = self.configwith( - config.parselist, section, name, default, b'list', untrusted + stringutil.parselist, section, name, default, b'list', untrusted ) if isinstance(v, bytes): - return config.parselist(v) + return stringutil.parselist(v) elif v is None: return [] return v
--- a/mercurial/utils/stringutil.py Fri Apr 16 02:14:21 2021 +0200 +++ b/mercurial/utils/stringutil.py Fri Apr 16 01:18:28 2021 +0200 @@ -868,6 +868,96 @@ return _booleans.get(s.lower(), None) +def parselist(value): + """parse a configuration value as a list of comma/space separated strings + + >>> parselist(b'this,is "a small" ,test') + ['this', 'is', 'a small', 'test'] + """ + + def _parse_plain(parts, s, offset): + whitespace = False + while offset < len(s) and ( + s[offset : offset + 1].isspace() or s[offset : offset + 1] == b',' + ): + whitespace = True + offset += 1 + if offset >= len(s): + return None, parts, offset + if whitespace: + parts.append(b'') + if s[offset : offset + 1] == b'"' and not parts[-1]: + return _parse_quote, parts, offset + 1 + elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\': + parts[-1] = parts[-1][:-1] + s[offset : offset + 1] + return _parse_plain, parts, offset + 1 + parts[-1] += s[offset : offset + 1] + return _parse_plain, parts, offset + 1 + + def _parse_quote(parts, s, offset): + if offset < len(s) and s[offset : offset + 1] == b'"': # "" + parts.append(b'') + offset += 1 + while offset < len(s) and ( + s[offset : offset + 1].isspace() + or s[offset : offset + 1] == b',' + ): + offset += 1 + return _parse_plain, parts, offset + + while offset < len(s) and s[offset : offset + 1] != b'"': + if ( + s[offset : offset + 1] == b'\\' + and offset + 1 < len(s) + and s[offset + 1 : offset + 2] == b'"' + ): + offset += 1 + parts[-1] += b'"' + else: + parts[-1] += s[offset : offset + 1] + offset += 1 + + if offset >= len(s): + real_parts = _configlist(parts[-1]) + if not real_parts: + parts[-1] = b'"' + else: + real_parts[0] = b'"' + real_parts[0] + parts = parts[:-1] + parts.extend(real_parts) + return None, parts, offset + + offset += 1 + while offset < len(s) and s[offset : offset + 1] in [b' ', b',']: + offset += 1 + + if offset < len(s): + if offset + 1 == len(s) and s[offset : offset + 1] == b'"': + parts[-1] += b'"' + offset += 1 + else: + parts.append(b'') + else: + return None, parts, offset + + return _parse_plain, parts, offset + + def _configlist(s): + s = s.rstrip(b' ,') + if not s: + return [] + parser, parts, offset = _parse_plain, [b''], 0 + while parser: + parser, parts, offset = parser(parts, s, offset) + return parts + + if value is not None and isinstance(value, bytes): + result = _configlist(value.lstrip(b' ,\n')) + else: + result = value + return result or [] + + def evalpythonliteral(s): """Evaluate a string containing a Python literal expression""" # We could backport our tokenizer hack to rewrite '' to u'' if we want
--- a/rust/hg-core/src/config/config.rs Fri Apr 16 02:14:21 2021 +0200 +++ b/rust/hg-core/src/config/config.rs Fri Apr 16 01:18:28 2021 +0200 @@ -361,10 +361,11 @@ /// /// This is appropriate for new configuration keys. The value syntax is /// **not** the same as most existing list-valued config, which has Python - /// parsing implemented in `parselist()` in `mercurial/config.py`. - /// Faithfully porting that parsing algorithm to Rust (including behavior - /// that are arguably bugs) turned out to be non-trivial and hasn’t been - /// completed as of this writing. + /// parsing implemented in `parselist()` in + /// `mercurial/utils/stringutil.py`. Faithfully porting that parsing + /// algorithm to Rust (including behavior that are arguably bugs) + /// turned out to be non-trivial and hasn’t been completed as of this + /// writing. /// /// Instead, the "simple" syntax is: split on comma, then trim leading and /// trailing whitespace of each component. Quotes or backslashes are not
--- a/tests/test-doctest.py Fri Apr 16 02:14:21 2021 +0200 +++ b/tests/test-doctest.py Fri Apr 16 01:18:28 2021 +0200 @@ -131,7 +131,6 @@ ('mercurial.changelog', '{}'), ('mercurial.cmdutil', '{}'), ('mercurial.color', '{}'), - ('mercurial.config', '{}'), ('mercurial.dagparser', "{'optionflags': 4}"), ('mercurial.encoding', '{}'), ('mercurial.fancyopts', '{}'),