mercurial/utils/stringutil.py
author Raphaël Gomès <rgomes@octobus.net>
Mon, 29 Jul 2024 10:07:53 +0200
changeset 51737 48eb51494a7a
parent 51729 278af66e6595
child 51832 4eccb65e444f
permissions -rw-r--r--
rustfmt: update expected Rust edition In this case it doesn't change anything, but we've been using 2021 for a while now.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
37083
f99d64e8a4e4 stringutil: move generic string helpers to new module
Yuya Nishihara <yuya@tcha.org>
parents: 37082
diff changeset
     1
# stringutil.py - utility for generic string formatting, parsing, etc.
8226
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
     2
#
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
     3
#  Copyright 2005 K. Thananchayan <thananck@yahoo.com>
46819
d4ba4d51f85f contributor: change mentions of mpm to olivia
Raphaël Gomès <rgomes@octobus.net>
parents: 45942
diff changeset
     4
#  Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
8226
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
     5
#  Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
     6
#
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
     7
# This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 9996
diff changeset
     8
# GNU General Public License version 2 or any later version.
1082
ce96e316278a Update util.py docstrings, fix walk test
mpm@selenic.com
parents: 1081
diff changeset
     9
ce96e316278a Update util.py docstrings, fix walk test
mpm@selenic.com
parents: 1081
diff changeset
    10
37476
e9dea82ea1f3 wireproto: convert python literal to object without using unsafe eval()
Yuya Nishihara <yuya@tcha.org>
parents: 37322
diff changeset
    11
import ast
31453
3b7a6941a6ef py3: call codecs.escape_encode() directly
Yuya Nishihara <yuya@tcha.org>
parents: 31451
diff changeset
    12
import codecs
21907
7e5dfa00e3c2 util: rename 're' to 'remod'
Siddharth Agarwal <sid0@fb.com>
parents: 21857
diff changeset
    13
import re as remod
27358
ac839ee45b6a util: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 27357
diff changeset
    14
import textwrap
39296
ce145f8eface stringutil: teach pprint() to recognize generators
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39063
diff changeset
    15
import types
51729
278af66e6595 typing: induce pytype to use the standard `attr` instead of the vendored copy
Matt Harbison <matt_harbison@yahoo.com>
parents: 51703
diff changeset
    16
import typing
3769
96095d9ff1f8 Add encoding detection
Matt Mackall <mpm@selenic.com>
parents: 3767
diff changeset
    17
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    18
from typing import (
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    19
    Optional,
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    20
    overload,
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    21
)
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    22
37083
f99d64e8a4e4 stringutil: move generic string helpers to new module
Yuya Nishihara <yuya@tcha.org>
parents: 37082
diff changeset
    23
from ..i18n import _
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
    24
from ..thirdparty import attr
37083
f99d64e8a4e4 stringutil: move generic string helpers to new module
Yuya Nishihara <yuya@tcha.org>
parents: 37082
diff changeset
    25
51729
278af66e6595 typing: induce pytype to use the standard `attr` instead of the vendored copy
Matt Harbison <matt_harbison@yahoo.com>
parents: 51703
diff changeset
    26
# Force pytype to use the non-vendored package
278af66e6595 typing: induce pytype to use the standard `attr` instead of the vendored copy
Matt Harbison <matt_harbison@yahoo.com>
parents: 51703
diff changeset
    27
if typing.TYPE_CHECKING:
278af66e6595 typing: induce pytype to use the standard `attr` instead of the vendored copy
Matt Harbison <matt_harbison@yahoo.com>
parents: 51703
diff changeset
    28
    # noinspection PyPackageRequirements
278af66e6595 typing: induce pytype to use the standard `attr` instead of the vendored copy
Matt Harbison <matt_harbison@yahoo.com>
parents: 51703
diff changeset
    29
    import attr
278af66e6595 typing: induce pytype to use the standard `attr` instead of the vendored copy
Matt Harbison <matt_harbison@yahoo.com>
parents: 51703
diff changeset
    30
37083
f99d64e8a4e4 stringutil: move generic string helpers to new module
Yuya Nishihara <yuya@tcha.org>
parents: 37082
diff changeset
    31
from .. import (
27358
ac839ee45b6a util: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 27357
diff changeset
    32
    encoding,
ac839ee45b6a util: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 27357
diff changeset
    33
    error,
28818
6041fb8f2da8 pycompat: add empty and queue to handle py3 divergence
timeless <timeless@mozdev.org>
parents: 28497
diff changeset
    34
    pycompat,
27358
ac839ee45b6a util: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 27357
diff changeset
    35
)
37010
8453699a1f21 util: observable proxy objects for sockets
Gregory Szorc <gregory.szorc@gmail.com>
parents: 36991
diff changeset
    36
38474
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    37
# regex special chars pulled from https://bugs.python.org/issue29995
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    38
# which was part of Python 3.7.
38477
de275ab362cb stringutil: update list of re-special characters to include &~
Augie Fackler <augie@google.com>
parents: 38474
diff changeset
    39
_respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
38474
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    40
_regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
40684
e6c9ef5e11a0 match: provide and use a quick way to escape a single byte
Boris Feld <boris.feld@octobus.net>
parents: 40276
diff changeset
    41
regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
38474
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    42
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
    43
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    44
@overload
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    45
def reescape(pat: bytes) -> bytes:
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    46
    ...
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    47
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    48
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    49
@overload
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    50
def reescape(pat: str) -> str:
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    51
    ...
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    52
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    53
38474
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    54
def reescape(pat):
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    55
    """Drop-in replacement for re.escape."""
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    56
    # NOTE: it is intentional that this works on unicodes and not
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    57
    # bytes, as it's only possible to do the escaping with
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    58
    # unicode.translate, not bytes.translate. Sigh.
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    59
    wantuni = True
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    60
    if isinstance(pat, bytes):
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    61
        wantuni = False
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    62
        pat = pat.decode('latin1')
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    63
    pat = pat.translate(_regexescapemap)
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    64
    if wantuni:
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    65
        return pat
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    66
    return pat.encode('latin1')
96f65bdf0bf4 stringutil: add a new function to do minimal regex escaping
Augie Fackler <augie@google.com>
parents: 38264
diff changeset
    67
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
    68
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    69
def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes:
37300
2f859ad7ed8c stringutil: add function to pretty print an object
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37290
diff changeset
    70
    """Pretty print an object."""
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
    71
    return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    72
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
    73
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
    74
def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0):
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    75
    """Pretty print an object to a generator of atoms.
39353
0d21b1f1722c stringutil: refactor core of pprint so it emits chunks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39296
diff changeset
    76
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    77
    ``bprefix`` is a flag influencing whether bytestrings are preferred with
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    78
    a ``b''`` prefix.
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    79
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    80
    ``indent`` controls whether collections and nested data structures
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    81
    span multiple lines via the indentation amount in spaces. By default,
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    82
    no newlines are emitted.
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
    83
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
    84
    ``level`` specifies the initial indent level. Used if ``indent > 0``.
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
    85
    """
39353
0d21b1f1722c stringutil: refactor core of pprint so it emits chunks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39296
diff changeset
    86
37619
68132a95df31 stringutil: support more types with pprint()
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37476
diff changeset
    87
    if isinstance(o, bytes):
37750
f7194c925003 stringutil: make b prefixes on string output optional
Augie Fackler <augie@google.com>
parents: 37749
diff changeset
    88
        if bprefix:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    89
            yield b"b'%s'" % escapestr(o)
39353
0d21b1f1722c stringutil: refactor core of pprint so it emits chunks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39296
diff changeset
    90
        else:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    91
            yield b"'%s'" % escapestr(o)
37619
68132a95df31 stringutil: support more types with pprint()
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37476
diff changeset
    92
    elif isinstance(o, bytearray):
68132a95df31 stringutil: support more types with pprint()
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37476
diff changeset
    93
        # codecs.escape_encode() can't handle bytearray, so escapestr fails
68132a95df31 stringutil: support more types with pprint()
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37476
diff changeset
    94
        # without coercion.
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    95
        yield b"bytearray['%s']" % escapestr(bytes(o))
37300
2f859ad7ed8c stringutil: add function to pretty print an object
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37290
diff changeset
    96
    elif isinstance(o, list):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
    97
        if not o:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    98
            yield b'[]'
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
    99
            return
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   100
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   101
        yield b'['
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   102
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   103
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   104
            level += 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   105
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   106
            yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   107
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   108
        for i, a in enumerate(o):
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   109
            for chunk in pprintgen(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   110
                a, bprefix=bprefix, indent=indent, level=level
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   111
            ):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   112
                yield chunk
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   113
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   114
            if i + 1 < len(o):
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   115
                if indent:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   116
                    yield b',\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   117
                    yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   118
                else:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   119
                    yield b', '
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   120
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   121
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   122
            level -= 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   123
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   124
            yield b' ' * (level * indent)
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   125
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   126
        yield b']'
37300
2f859ad7ed8c stringutil: add function to pretty print an object
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37290
diff changeset
   127
    elif isinstance(o, dict):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   128
        if not o:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   129
            yield b'{}'
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   130
            return
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   131
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   132
        yield b'{'
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   133
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   134
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   135
            level += 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   136
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   137
            yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   138
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   139
        for i, (k, v) in enumerate(sorted(o.items())):
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   140
            for chunk in pprintgen(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   141
                k, bprefix=bprefix, indent=indent, level=level
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   142
            ):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   143
                yield chunk
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   144
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   145
            yield b': '
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   146
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   147
            for chunk in pprintgen(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   148
                v, bprefix=bprefix, indent=indent, level=level
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   149
            ):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   150
                yield chunk
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   151
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   152
            if i + 1 < len(o):
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   153
                if indent:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   154
                    yield b',\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   155
                    yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   156
                else:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   157
                    yield b', '
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   158
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   159
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   160
            level -= 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   161
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   162
            yield b' ' * (level * indent)
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   163
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   164
        yield b'}'
39051
2aebe138ef6e stringutil: teach pprint about sets
Augie Fackler <augie@google.com>
parents: 38783
diff changeset
   165
    elif isinstance(o, set):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   166
        if not o:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   167
            yield b'set([])'
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   168
            return
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   169
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   170
        yield b'set(['
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   171
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   172
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   173
            level += 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   174
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   175
            yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   176
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   177
        for i, k in enumerate(sorted(o)):
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   178
            for chunk in pprintgen(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   179
                k, bprefix=bprefix, indent=indent, level=level
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   180
            ):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   181
                yield chunk
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   182
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   183
            if i + 1 < len(o):
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   184
                if indent:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   185
                    yield b',\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   186
                    yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   187
                else:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   188
                    yield b', '
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   189
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   190
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   191
            level -= 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   192
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   193
            yield b' ' * (level * indent)
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   194
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   195
        yield b'])'
37932
bf6bb710b40f stringutil: teach pprint about tuples
Augie Fackler <augie@google.com>
parents: 37750
diff changeset
   196
    elif isinstance(o, tuple):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   197
        if not o:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   198
            yield b'()'
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   199
            return
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   200
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   201
        yield b'('
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   202
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   203
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   204
            level += 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   205
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   206
            yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   207
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   208
        for i, a in enumerate(o):
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   209
            for chunk in pprintgen(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   210
                a, bprefix=bprefix, indent=indent, level=level
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   211
            ):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   212
                yield chunk
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   213
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   214
            if i + 1 < len(o):
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   215
                if indent:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   216
                    yield b',\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   217
                    yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   218
                else:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   219
                    yield b', '
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   220
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   221
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   222
            level -= 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   223
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   224
            yield b' ' * (level * indent)
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   225
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   226
        yield b')'
39296
ce145f8eface stringutil: teach pprint() to recognize generators
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39063
diff changeset
   227
    elif isinstance(o, types.GeneratorType):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   228
        # Special case of empty generator.
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   229
        try:
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   230
            nextitem = next(o)
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   231
        except StopIteration:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   232
            yield b'gen[]'
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   233
            return
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   234
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   235
        yield b'gen['
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   236
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   237
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   238
            level += 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   239
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   240
            yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   241
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   242
        last = False
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   243
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   244
        while not last:
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   245
            current = nextitem
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   246
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   247
            try:
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   248
                nextitem = next(o)
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   249
            except StopIteration:
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   250
                last = True
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   251
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   252
            for chunk in pprintgen(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   253
                current, bprefix=bprefix, indent=indent, level=level
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   254
            ):
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   255
                yield chunk
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   256
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   257
            if not last:
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   258
                if indent:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   259
                    yield b',\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   260
                    yield b' ' * (level * indent)
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   261
                else:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   262
                    yield b', '
39378
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   263
0f549da54379 stringutil: teach pprint() to indent
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39354
diff changeset
   264
        if indent:
40276
be57c7019c70 stringutil: allow to specify initial indent level of pprint()
Yuya Nishihara <yuya@tcha.org>
parents: 40275
diff changeset
   265
            level -= 1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   266
            yield b'\n'
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   267
            yield b' ' * (level * indent)
39354
5ed7c6caf24d stringutil: emit multiple chunks when pretty printing
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39353
diff changeset
   268
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   269
        yield b']'
37300
2f859ad7ed8c stringutil: add function to pretty print an object
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37290
diff changeset
   270
    else:
39353
0d21b1f1722c stringutil: refactor core of pprint so it emits chunks
Gregory Szorc <gregory.szorc@gmail.com>
parents: 39296
diff changeset
   271
        yield pycompat.byterepr(o)
37300
2f859ad7ed8c stringutil: add function to pretty print an object
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37290
diff changeset
   272
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   273
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   274
def prettyrepr(o) -> bytes:
38261
f3033692ccef stringutil: promote smartset.prettyformat() to utility function
Yuya Nishihara <yuya@tcha.org>
parents: 37942
diff changeset
   275
    """Pretty print a representation of a possibly-nested object"""
f3033692ccef stringutil: promote smartset.prettyformat() to utility function
Yuya Nishihara <yuya@tcha.org>
parents: 37942
diff changeset
   276
    lines = []
f3033692ccef stringutil: promote smartset.prettyformat() to utility function
Yuya Nishihara <yuya@tcha.org>
parents: 37942
diff changeset
   277
    rs = pycompat.byterepr(o)
38264
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   278
    p0 = p1 = 0
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   279
    while p0 < len(rs):
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   280
        # '... field=<type ... field=<type ...'
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   281
        #      ~~~~~~~~~~~~~~~~
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   282
        #      p0    p1        q0    q1
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   283
        q0 = -1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   284
        q1 = rs.find(b'<', p1 + 1)
38264
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   285
        if q1 < 0:
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   286
            q1 = len(rs)
48479
dcdecec401ca pytype: stop excluding stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 47189
diff changeset
   287
            # pytype: disable=wrong-arg-count
dcdecec401ca pytype: stop excluding stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 47189
diff changeset
   288
            # TODO: figure out why pytype doesn't recognize the optional start
dcdecec401ca pytype: stop excluding stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 47189
diff changeset
   289
            #  arg
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   290
        elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):
48479
dcdecec401ca pytype: stop excluding stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 47189
diff changeset
   291
            # pytype: enable=wrong-arg-count
38264
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   292
            # backtrack for ' field=<'
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   293
            q0 = rs.rfind(b' ', p1 + 1, q1 - 1)
38264
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   294
        if q0 < 0:
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   295
            q0 = q1
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   296
        else:
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   297
            q0 += 1  # skip ' '
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   298
        l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)
38261
f3033692ccef stringutil: promote smartset.prettyformat() to utility function
Yuya Nishihara <yuya@tcha.org>
parents: 37942
diff changeset
   299
        assert l >= 0
38264
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   300
        lines.append((l, rs[p0:q0].rstrip()))
fbb2eddea4d2 stringutil: fix prettyrepr() to not orphan foo=<...> line
Yuya Nishihara <yuya@tcha.org>
parents: 38261
diff changeset
   301
        p0, p1 = q0, q1
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   302
    return b'\n'.join(b'  ' * l + s for l, s in lines)
38261
f3033692ccef stringutil: promote smartset.prettyformat() to utility function
Yuya Nishihara <yuya@tcha.org>
parents: 37942
diff changeset
   303
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   304
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   305
def buildrepr(r) -> bytes:
38576
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   306
    """Format an optional printable representation from unexpanded bits
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   307
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   308
    ========  =================================
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   309
    type(r)   example
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   310
    ========  =================================
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   311
    tuple     ('<not %r>', other)
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   312
    bytes     '<branch closed>'
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   313
    callable  lambda: '<branch %r>' % sorted(b)
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   314
    object    other
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   315
    ========  =================================
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   316
    """
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   317
    if r is None:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   318
        return b''
38576
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   319
    elif isinstance(r, tuple):
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   320
        return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   321
    elif isinstance(r, bytes):
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   322
        return r
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   323
    elif callable(r):
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   324
        return r()
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   325
    else:
39052
38409be2f521 stringutil: have buildrepr delegate to pprint for unknown types
Augie Fackler <augie@google.com>
parents: 39051
diff changeset
   326
        return pprint(r)
38576
a3130208db1c stringutil: move _formatsetrepr() from smartset
Yuya Nishihara <yuya@tcha.org>
parents: 38477
diff changeset
   327
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   328
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   329
def binary(s: bytes) -> bool:
6507
9699864de219 Let util.binary check entire data for \0 (issue1066, issue1079)
Christian Ebert <blacktrash@gmx.net>
parents: 6501
diff changeset
   330
    """return true if a string is binary data"""
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   331
    return bool(s and b'\0' in s)
6762
f67d1468ac50 util: add sort helper
Matt Mackall <mpm@selenic.com>
parents: 6746
diff changeset
   332
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   333
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   334
def _splitpattern(pattern: bytes):
45722
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   335
    if pattern.startswith(b're:'):
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   336
        return b're', pattern[3:]
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   337
    elif pattern.startswith(b'literal:'):
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   338
        return b'literal', pattern[8:]
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   339
    return b'literal', pattern
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   340
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   341
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   342
def stringmatcher(pattern: bytes, casesensitive: bool = True):
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   343
    """
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   344
    accepts a string, possibly starting with 're:' or 'literal:' prefix.
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   345
    returns the matcher name, pattern, and matcher function.
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   346
    missing or unknown prefixes are treated as literal matches.
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   347
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   348
    helper for tests:
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   349
    >>> def test(pattern, *tests):
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   350
    ...     kind, pattern, matcher = stringmatcher(pattern)
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   351
    ...     return (kind, pattern, [bool(matcher(t)) for t in tests])
30773
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   352
    >>> def itest(pattern, *tests):
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   353
    ...     kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   354
    ...     return (kind, pattern, [bool(matcher(t)) for t in tests])
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   355
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   356
    exact matching (no prefix):
34131
0fa781320203 doctest: bulk-replace string literals with b'' for Python 3
Yuya Nishihara <yuya@tcha.org>
parents: 34084
diff changeset
   357
    >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   358
    ('literal', 'abcdefg', [False, False, True])
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   359
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   360
    regex matching ('re:' prefix)
34131
0fa781320203 doctest: bulk-replace string literals with b'' for Python 3
Yuya Nishihara <yuya@tcha.org>
parents: 34084
diff changeset
   361
    >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   362
    ('re', 'a.+b', [False, False, True])
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   363
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   364
    force exact matches ('literal:' prefix)
34131
0fa781320203 doctest: bulk-replace string literals with b'' for Python 3
Yuya Nishihara <yuya@tcha.org>
parents: 34084
diff changeset
   365
    >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   366
    ('literal', 're:foobar', [False, True])
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   367
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   368
    unknown prefixes are ignored and treated as literals
34131
0fa781320203 doctest: bulk-replace string literals with b'' for Python 3
Yuya Nishihara <yuya@tcha.org>
parents: 34084
diff changeset
   369
    >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   370
    ('literal', 'foo:bar', [False, False, True])
30773
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   371
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   372
    case insensitive regex matches
34131
0fa781320203 doctest: bulk-replace string literals with b'' for Python 3
Yuya Nishihara <yuya@tcha.org>
parents: 34084
diff changeset
   373
    >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
30773
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   374
    ('re', 'A.+b', [False, False, True])
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   375
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   376
    case insensitive literal matches
34131
0fa781320203 doctest: bulk-replace string literals with b'' for Python 3
Yuya Nishihara <yuya@tcha.org>
parents: 34084
diff changeset
   377
    >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
30773
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   378
    ('literal', 'ABCDEFG', [False, False, True])
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   379
    """
45722
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   380
    kind, pattern = _splitpattern(pattern)
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   381
    if kind == b're':
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   382
        try:
30773
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   383
            flags = 0
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   384
            if not casesensitive:
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   385
                flags = remod.I
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   386
            regex = remod.compile(pattern, flags)
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   387
        except remod.error as e:
45723
edfc5820aae7 py3: fix stringmatcher() to byte-stringify exception message
Yuya Nishihara <yuya@tcha.org>
parents: 45722
diff changeset
   388
            raise error.ParseError(
edfc5820aae7 py3: fix stringmatcher() to byte-stringify exception message
Yuya Nishihara <yuya@tcha.org>
parents: 45722
diff changeset
   389
                _(b'invalid regular expression: %s') % forcebytestr(e)
edfc5820aae7 py3: fix stringmatcher() to byte-stringify exception message
Yuya Nishihara <yuya@tcha.org>
parents: 45722
diff changeset
   390
            )
45722
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   391
        return kind, pattern, regex.search
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   392
    elif kind == b'literal':
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   393
        if casesensitive:
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   394
            match = pattern.__eq__
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   395
        else:
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   396
            ipat = encoding.lower(pattern)
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   397
            match = lambda s: ipat == encoding.lower(s)
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   398
        return kind, pattern, match
30773
c390b40fe1d7 util: teach stringmatcher to handle forced case insensitive matches
Matt Harbison <matt_harbison@yahoo.com>
parents: 30761
diff changeset
   399
45722
d502caab76bc stringutil: extract helper function that splits stringmatcher() pattern
Yuya Nishihara <yuya@tcha.org>
parents: 44022
diff changeset
   400
    raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
26481
7d132557e44a util: extract stringmatcher() from revset
Matt Harbison <matt_harbison@yahoo.com>
parents: 26480
diff changeset
   401
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   402
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   403
def substringregexp(pattern: bytes, flags: int = 0):
45724
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   404
    """Build a regexp object from a string pattern possibly starting with
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   405
    're:' or 'literal:' prefix.
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   406
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   407
    helper for tests:
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   408
    >>> def test(pattern, *tests):
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   409
    ...     regexp = substringregexp(pattern)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   410
    ...     return [bool(regexp.search(t)) for t in tests]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   411
    >>> def itest(pattern, *tests):
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   412
    ...     regexp = substringregexp(pattern, remod.I)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   413
    ...     return [bool(regexp.search(t)) for t in tests]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   414
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   415
    substring matching (no prefix):
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   416
    >>> test(b'bcde', b'abc', b'def', b'abcdefg')
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   417
    [False, False, True]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   418
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   419
    substring pattern should be escaped:
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   420
    >>> substringregexp(b'.bc').pattern
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   421
    '\\\\.bc'
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   422
    >>> test(b'.bc', b'abc', b'def', b'abcdefg')
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   423
    [False, False, False]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   424
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   425
    regex matching ('re:' prefix)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   426
    >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   427
    [False, False, True]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   428
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   429
    force substring matches ('literal:' prefix)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   430
    >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   431
    [False, True]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   432
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   433
    case insensitive literal matches
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   434
    >>> itest(b'BCDE', b'abc', b'def', b'abcdefg')
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   435
    [False, False, True]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   436
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   437
    case insensitive regex matches
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   438
    >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   439
    [False, False, True]
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   440
    """
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   441
    kind, pattern = _splitpattern(pattern)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   442
    if kind == b're':
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   443
        try:
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   444
            return remod.compile(pattern, flags)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   445
        except remod.error as e:
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   446
            raise error.ParseError(
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   447
                _(b'invalid regular expression: %s') % forcebytestr(e)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   448
            )
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   449
    elif kind == b'literal':
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   450
        return remod.compile(remod.escape(pattern), flags)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   451
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   452
    raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   453
ac39a8a214b1 stringutil: add function to compile stringmatcher pattern into regexp
Yuya Nishihara <yuya@tcha.org>
parents: 45723
diff changeset
   454
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   455
def shortuser(user: bytes) -> bytes:
1903
e4abeafd6eb1 move shortuser into util module.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 1635
diff changeset
   456
    """Return a short representation of a user name or email address."""
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   457
    f = user.find(b'@')
1903
e4abeafd6eb1 move shortuser into util module.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 1635
diff changeset
   458
    if f >= 0:
e4abeafd6eb1 move shortuser into util module.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 1635
diff changeset
   459
        user = user[:f]
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   460
    f = user.find(b'<')
1903
e4abeafd6eb1 move shortuser into util module.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 1635
diff changeset
   461
    if f >= 0:
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   462
        user = user[f + 1 :]
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   463
    f = user.find(b' ')
3176
7492b33bdd9f shortuser should stop before the first space character.
Thomas Arendsen Hein <thomas@intevation.de>
parents: 3147
diff changeset
   464
    if f >= 0:
7492b33bdd9f shortuser should stop before the first space character.
Thomas Arendsen Hein <thomas@intevation.de>
parents: 3147
diff changeset
   465
        user = user[:f]
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   466
    f = user.find(b'.')
3533
bb44489b901f shortname: truncate at '.' too
Matt Mackall <mpm@selenic.com>
parents: 3466
diff changeset
   467
    if f >= 0:
bb44489b901f shortname: truncate at '.' too
Matt Mackall <mpm@selenic.com>
parents: 3466
diff changeset
   468
        user = user[:f]
1903
e4abeafd6eb1 move shortuser into util module.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 1635
diff changeset
   469
    return user
1920
b7cc0f323a4c merge with crew.
Vadim Gelfer <vadim.gelfer@gmail.com>
parents: 1903 1882
diff changeset
   470
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   471
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   472
def emailuser(user: bytes) -> bytes:
16360
e5788269741a templates/filters: extracting the user portion of an email address
Matteo Capobianco <m.capobianco@gmail.com>
parents: 15720
diff changeset
   473
    """Return the user portion of an email address."""
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   474
    f = user.find(b'@')
16360
e5788269741a templates/filters: extracting the user portion of an email address
Matteo Capobianco <m.capobianco@gmail.com>
parents: 15720
diff changeset
   475
    if f >= 0:
e5788269741a templates/filters: extracting the user portion of an email address
Matteo Capobianco <m.capobianco@gmail.com>
parents: 15720
diff changeset
   476
        user = user[:f]
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   477
    f = user.find(b'<')
16360
e5788269741a templates/filters: extracting the user portion of an email address
Matteo Capobianco <m.capobianco@gmail.com>
parents: 15720
diff changeset
   478
    if f >= 0:
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   479
        user = user[f + 1 :]
16360
e5788269741a templates/filters: extracting the user portion of an email address
Matteo Capobianco <m.capobianco@gmail.com>
parents: 15720
diff changeset
   480
    return user
e5788269741a templates/filters: extracting the user portion of an email address
Matteo Capobianco <m.capobianco@gmail.com>
parents: 15720
diff changeset
   481
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   482
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   483
def email(author: bytes) -> bytes:
5975
75d9fe70c654 templater: move email function to util
Matt Mackall <mpm@selenic.com>
parents: 5949
diff changeset
   484
    '''get email of author.'''
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   485
    r = author.find(b'>')
10282
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10264
diff changeset
   486
    if r == -1:
08a0f04b56bd many, many trivial check-code fixups
Matt Mackall <mpm@selenic.com>
parents: 10264
diff changeset
   487
        r = None
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   488
    return author[author.find(b'<') + 1 : r]
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   489
5975
75d9fe70c654 templater: move email function to util
Matt Mackall <mpm@selenic.com>
parents: 5949
diff changeset
   490
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   491
def person(author: bytes) -> bytes:
37155
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   492
    """Returns the name before an email address,
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   493
    interpreting it as per RFC 5322
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   494
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   495
    >>> person(b'foo@bar')
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   496
    'foo'
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   497
    >>> person(b'Foo Bar <foo@bar>')
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   498
    'Foo Bar'
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   499
    >>> person(b'"Foo Bar" <foo@bar>')
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   500
    'Foo Bar'
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   501
    >>> person(b'"Foo \"buz\" Bar" <foo@bar>')
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   502
    'Foo "buz" Bar'
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   503
    >>> # The following are invalid, but do exist in real-life
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   504
    ...
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   505
    >>> person(b'Foo "buz" Bar <foo@bar>')
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   506
    'Foo "buz" Bar'
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   507
    >>> person(b'"Foo Bar <foo@bar>')
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   508
    'Foo Bar'
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   509
    """
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   510
    if b'@' not in author:
37155
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   511
        return author
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   512
    f = author.find(b'<')
37155
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   513
    if f != -1:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   514
        return author[:f].strip(b' "').replace(b'\\"', b'"')
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   515
    f = author.find(b'@')
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   516
    return author[:f].replace(b'.', b' ')
37155
fb7140f1d09d stringutil: move person function from templatefilters
Connor Sheehan <sheehan@mozilla.com>
parents: 37154
diff changeset
   517
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   518
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   519
@attr.s(hash=True)
48946
642e31cb55f0 py3: use class X: instead of class X(object):
Gregory Szorc <gregory.szorc@gmail.com>
parents: 48911
diff changeset
   520
class mailmapping:
45942
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   521
    """Represents a username/email key or value in
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   522
    a mailmap file"""
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   523
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   524
    email = attr.ib()
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   525
    name = attr.ib(default=None)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   526
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   527
37246
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   528
def _ismailmaplineinvalid(names, emails):
45942
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   529
    """Returns True if the parsed names and emails
37246
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   530
    in a mailmap entry are invalid.
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   531
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   532
    >>> # No names or emails fails
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   533
    >>> names, emails = [], []
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   534
    >>> _ismailmaplineinvalid(names, emails)
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   535
    True
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   536
    >>> # Only one email fails
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   537
    >>> emails = [b'email@email.com']
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   538
    >>> _ismailmaplineinvalid(names, emails)
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   539
    True
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   540
    >>> # One email and one name passes
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   541
    >>> names = [b'Test Name']
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   542
    >>> _ismailmaplineinvalid(names, emails)
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   543
    False
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   544
    >>> # No names but two emails passes
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   545
    >>> names = []
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   546
    >>> emails = [b'proper@email.com', b'commit@email.com']
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   547
    >>> _ismailmaplineinvalid(names, emails)
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   548
    False
45942
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   549
    """
37246
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   550
    return not emails or not names and len(emails) < 2
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   551
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   552
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   553
def parsemailmap(mailmapcontent):
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   554
    """Parses data in the .mailmap format
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   555
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   556
    >>> mmdata = b"\\n".join([
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   557
    ... b'# Comment',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   558
    ... b'Name <commit1@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   559
    ... b'<name@email.xx> <commit2@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   560
    ... b'Name <proper@email.xx> <commit3@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   561
    ... b'Name <proper@email.xx> Commit <commit4@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   562
    ... ])
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   563
    >>> mm = parsemailmap(mmdata)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   564
    >>> for key in sorted(mm.keys()):
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   565
    ...     print(key)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   566
    mailmapping(email='commit1@email.xx', name=None)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   567
    mailmapping(email='commit2@email.xx', name=None)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   568
    mailmapping(email='commit3@email.xx', name=None)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   569
    mailmapping(email='commit4@email.xx', name='Commit')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   570
    >>> for val in sorted(mm.values()):
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   571
    ...     print(val)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   572
    mailmapping(email='commit1@email.xx', name='Name')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   573
    mailmapping(email='name@email.xx', name=None)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   574
    mailmapping(email='proper@email.xx', name='Name')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   575
    mailmapping(email='proper@email.xx', name='Name')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   576
    """
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   577
    mailmap = {}
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   578
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   579
    if mailmapcontent is None:
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   580
        return mailmap
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   581
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   582
    for line in mailmapcontent.splitlines():
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   583
        # Don't bother checking the line if it is a comment or
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   584
        # is an improperly formed author field
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   585
        if line.lstrip().startswith(b'#'):
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   586
            continue
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   587
37245
54b896f195d1 stringutil: rename local email/names variables to their plural forms
Connor Sheehan <sheehan@mozilla.com>
parents: 37210
diff changeset
   588
        # names, emails hold the parsed emails and names for each line
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   589
        # name_builder holds the words in a persons name
37245
54b896f195d1 stringutil: rename local email/names variables to their plural forms
Connor Sheehan <sheehan@mozilla.com>
parents: 37210
diff changeset
   590
        names, emails = [], []
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   591
        namebuilder = []
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   592
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   593
        for element in line.split():
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   594
            if element.startswith(b'#'):
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   595
                # If we reach a comment in the mailmap file, move on
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   596
                break
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   597
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   598
            elif element.startswith(b'<') and element.endswith(b'>'):
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   599
                # We have found an email.
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   600
                # Parse it, and finalize any names from earlier
37245
54b896f195d1 stringutil: rename local email/names variables to their plural forms
Connor Sheehan <sheehan@mozilla.com>
parents: 37210
diff changeset
   601
                emails.append(element[1:-1])  # Slice off the "<>"
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   602
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   603
                if namebuilder:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   604
                    names.append(b' '.join(namebuilder))
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   605
                    namebuilder = []
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   606
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   607
                # Break if we have found a second email, any other
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   608
                # data does not fit the spec for .mailmap
37245
54b896f195d1 stringutil: rename local email/names variables to their plural forms
Connor Sheehan <sheehan@mozilla.com>
parents: 37210
diff changeset
   609
                if len(emails) > 1:
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   610
                    break
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   611
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   612
            else:
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   613
                # We have found another word in the committers name
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   614
                namebuilder.append(element)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   615
37246
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   616
        # Check to see if we have parsed the line into a valid form
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   617
        # We require at least one email, and either at least one
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   618
        # name or a second email
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   619
        if _ismailmaplineinvalid(names, emails):
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   620
            continue
0e7550b0964c stringutil: improve check for failed mailmap line parsing
Connor Sheehan <sheehan@mozilla.com>
parents: 37245
diff changeset
   621
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   622
        mailmapkey = mailmapping(
45942
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   623
            email=emails[-1],
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   624
            name=names[-1] if len(names) == 2 else None,
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   625
        )
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   626
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   627
        mailmap[mailmapkey] = mailmapping(
45942
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   628
            email=emails[0],
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   629
            name=names[0] if names else None,
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   630
        )
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   631
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   632
    return mailmap
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   633
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   634
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   635
def mapname(mailmap, author: bytes) -> bytes:
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   636
    """Returns the author field according to the mailmap cache, or
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   637
    the original author field.
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   638
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   639
    >>> mmdata = b"\\n".join([
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   640
    ...     b'# Comment',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   641
    ...     b'Name <commit1@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   642
    ...     b'<name@email.xx> <commit2@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   643
    ...     b'Name <proper@email.xx> <commit3@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   644
    ...     b'Name <proper@email.xx> Commit <commit4@email.xx>',
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   645
    ... ])
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   646
    >>> m = parsemailmap(mmdata)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   647
    >>> mapname(m, b'Commit <commit1@email.xx>')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   648
    'Name <commit1@email.xx>'
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   649
    >>> mapname(m, b'Name <commit2@email.xx>')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   650
    'Name <name@email.xx>'
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   651
    >>> mapname(m, b'Commit <commit3@email.xx>')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   652
    'Name <proper@email.xx>'
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   653
    >>> mapname(m, b'Commit <commit4@email.xx>')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   654
    'Name <proper@email.xx>'
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   655
    >>> mapname(m, b'Unknown Name <unknown@email.com>')
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   656
    'Unknown Name <unknown@email.com>'
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   657
    """
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   658
    # If the author field coming in isn't in the correct format,
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   659
    # or the mailmap is empty just return the original author field
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   660
    if not isauthorwellformed(author) or not mailmap:
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   661
        return author
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   662
37247
2ed180117f76 stringutil: edit comment to reflect actual data type name
Connor Sheehan <sheehan@mozilla.com>
parents: 37246
diff changeset
   663
    # Turn the user name into a mailmapping
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   664
    commit = mailmapping(name=person(author), email=email(author))
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   665
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   666
    try:
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   667
        # Try and use both the commit email and name as the key
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   668
        proper = mailmap[commit]
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   669
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   670
    except KeyError:
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   671
        # If the lookup fails, use just the email as the key instead
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   672
        # We call this commit2 as not to erase original commit fields
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   673
        commit2 = mailmapping(email=commit.email)
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   674
        proper = mailmap.get(commit2, mailmapping(None, None))
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   675
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   676
    # Return the author field with proper values filled in
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   677
    return b'%s <%s>' % (
37210
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   678
        proper.name if proper.name else commit.name,
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   679
        proper.email if proper.email else commit.email,
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   680
    )
2a2ce93e12f4 templatefuncs: add mailmap template function
Connor Sheehan <sheehan@mozilla.com>
parents: 37155
diff changeset
   681
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   682
44022
c1ccefb513e4 cleanup: drop redundant character escapes outside of `[]`
Matt Harbison <matt_harbison@yahoo.com>
parents: 43506
diff changeset
   683
_correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
37154
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   684
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   685
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   686
def isauthorwellformed(author: bytes) -> bool:
45942
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   687
    """Return True if the author field is well formed
37154
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   688
    (ie "Contributor Name <contrib@email.dom>")
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   689
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   690
    >>> isauthorwellformed(b'Good Author <good@author.com>')
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   691
    True
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   692
    >>> isauthorwellformed(b'Author <good@author.com>')
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   693
    True
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   694
    >>> isauthorwellformed(b'Bad Author')
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   695
    False
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   696
    >>> isauthorwellformed(b'Bad Author <author@author.com')
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   697
    False
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   698
    >>> isauthorwellformed(b'Bad Author author@author.com')
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   699
    False
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   700
    >>> isauthorwellformed(b'<author@author.com>')
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   701
    False
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   702
    >>> isauthorwellformed(b'Bad Author <author>')
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   703
    False
45942
89a2afe31e82 formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents: 45724
diff changeset
   704
    """
37154
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   705
    return _correctauthorformat.match(author) is not None
f8e1f48de118 stringutil: add isauthorwellformed function
Connor Sheehan <sheehan@mozilla.com>
parents: 37083
diff changeset
   706
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   707
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   708
def firstline(text: bytes) -> bytes:
49021
51aed118f9dc templates: extract function to `stringutil` for getting first line of text
Martin von Zweigbergk <martinvonz@google.com>
parents: 48946
diff changeset
   709
    """Return the first line of the input"""
49030
75794847ef62 stringutil: try to avoid running `splitlines()` only to get first line
Martin von Zweigbergk <martinvonz@google.com>
parents: 49021
diff changeset
   710
    # Try to avoid running splitlines() on the whole string
75794847ef62 stringutil: try to avoid running `splitlines()` only to get first line
Martin von Zweigbergk <martinvonz@google.com>
parents: 49021
diff changeset
   711
    i = text.find(b'\n')
75794847ef62 stringutil: try to avoid running `splitlines()` only to get first line
Martin von Zweigbergk <martinvonz@google.com>
parents: 49021
diff changeset
   712
    if i != -1:
75794847ef62 stringutil: try to avoid running `splitlines()` only to get first line
Martin von Zweigbergk <martinvonz@google.com>
parents: 49021
diff changeset
   713
        text = text[:i]
49021
51aed118f9dc templates: extract function to `stringutil` for getting first line of text
Martin von Zweigbergk <martinvonz@google.com>
parents: 48946
diff changeset
   714
    try:
51aed118f9dc templates: extract function to `stringutil` for getting first line of text
Martin von Zweigbergk <martinvonz@google.com>
parents: 48946
diff changeset
   715
        return text.splitlines()[0]
51aed118f9dc templates: extract function to `stringutil` for getting first line of text
Martin von Zweigbergk <martinvonz@google.com>
parents: 48946
diff changeset
   716
    except IndexError:
51aed118f9dc templates: extract function to `stringutil` for getting first line of text
Martin von Zweigbergk <martinvonz@google.com>
parents: 48946
diff changeset
   717
        return b''
51aed118f9dc templates: extract function to `stringutil` for getting first line of text
Martin von Zweigbergk <martinvonz@google.com>
parents: 48946
diff changeset
   718
51aed118f9dc templates: extract function to `stringutil` for getting first line of text
Martin von Zweigbergk <martinvonz@google.com>
parents: 48946
diff changeset
   719
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   720
def ellipsis(text: bytes, maxlength: int = 400) -> bytes:
21857
86c2d792a4b7 util: replace 'ellipsis' implementation by 'encoding.trim'
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 21813
diff changeset
   721
    """Trim string to at most maxlength (default: 400) columns in display."""
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   722
    return encoding.trim(text, maxlength, ellipsis=b'...')
3767
1861fa38a6a7 Move ellipsis code to util.ellipsis() and improve maxlength handling.
Thomas Arendsen Hein <thomas@intevation.de>
parents: 3721
diff changeset
   723
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   724
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   725
def escapestr(s: bytes) -> bytes:
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   726
    # "bytes" is also a typing shortcut for bytes, bytearray, and memoryview
39063
1419ba5e3b56 stringutil: if we get a memoryview in escapestr, coerce it to bytes
Augie Fackler <augie@google.com>
parents: 39052
diff changeset
   727
    if isinstance(s, memoryview):
1419ba5e3b56 stringutil: if we get a memoryview in escapestr, coerce it to bytes
Augie Fackler <augie@google.com>
parents: 39052
diff changeset
   728
        s = bytes(s)
31453
3b7a6941a6ef py3: call codecs.escape_encode() directly
Yuya Nishihara <yuya@tcha.org>
parents: 31451
diff changeset
   729
    # call underlying function of s.encode('string_escape') directly for
3b7a6941a6ef py3: call codecs.escape_encode() directly
Yuya Nishihara <yuya@tcha.org>
parents: 31451
diff changeset
   730
    # Python 3 compatibility
49648
9be765b82a90 typing: minor tweaks to allow updating to pytype 2022.11.18
Matt Harbison <matt_harbison@yahoo.com>
parents: 49575
diff changeset
   731
    # pytype: disable=bad-return-type
48479
dcdecec401ca pytype: stop excluding stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 47189
diff changeset
   732
    return codecs.escape_encode(s)[0]  # pytype: disable=module-attr
49648
9be765b82a90 typing: minor tweaks to allow updating to pytype 2022.11.18
Matt Harbison <matt_harbison@yahoo.com>
parents: 49575
diff changeset
   733
    # pytype: enable=bad-return-type
31451
53865692a354 util: wrap s.encode('string_escape') call for future py3 compatibility
Yuya Nishihara <yuya@tcha.org>
parents: 31449
diff changeset
   734
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   735
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   736
def unescapestr(s: bytes) -> bytes:
49648
9be765b82a90 typing: minor tweaks to allow updating to pytype 2022.11.18
Matt Harbison <matt_harbison@yahoo.com>
parents: 49575
diff changeset
   737
    # pytype: disable=bad-return-type
48479
dcdecec401ca pytype: stop excluding stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 47189
diff changeset
   738
    return codecs.escape_decode(s)[0]  # pytype: disable=module-attr
49648
9be765b82a90 typing: minor tweaks to allow updating to pytype 2022.11.18
Matt Harbison <matt_harbison@yahoo.com>
parents: 49575
diff changeset
   739
    # pytype: enable=bad-return-type
31484
afb335353d28 util: wrap s.decode('string_escape') calls for future py3 compatibility
Yuya Nishihara <yuya@tcha.org>
parents: 31465
diff changeset
   740
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   741
33682
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   742
def forcebytestr(obj):
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   743
    """Portably format an arbitrary object (e.g. exception) into a byte
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   744
    string."""
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   745
    try:
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   746
        return pycompat.bytestr(obj)
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   747
    except UnicodeEncodeError:
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   748
        # non-ascii string, may be lossy
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   749
        return pycompat.bytestr(encoding.strtolocal(str(obj)))
1d5e497c08b3 py3: convert arbitrary exception object to byte string more reliably
Yuya Nishihara <yuya@tcha.org>
parents: 33619
diff changeset
   750
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   751
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   752
def uirepr(s: bytes) -> bytes:
5291
23651848d638 extdiff: avoid repr() doubling paths backslashes under Windows
Patrick Mezard <pmezard@gmail.com>
parents: 5213
diff changeset
   753
    # Avoid double backslash in Windows path repr()
36266
1fa33bd848ee py3: fix bytes-unicode dance while building docstring of extdiff
Yuya Nishihara <yuya@tcha.org>
parents: 36235
diff changeset
   754
    return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
7547
4949729ee9ee python implementation of diffstat
Alexander Solovyov <piranha@piranha.org.ua>
parents: 7537
diff changeset
   755
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   756
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   757
# delay import of textwrap
37079
736024df4498 util: mark MBTextWrapper as private
Yuya Nishihara <yuya@tcha.org>
parents: 37078
diff changeset
   758
def _MBTextWrapper(**kwargs):
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   759
    class tw(textwrap.TextWrapper):
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   760
        """
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   761
        Extend TextWrapper for width-awareness.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   762
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   763
        Neither number of 'bytes' in any encoding nor 'characters' is
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   764
        appropriate to calculate terminal columns for specified string.
12957
9f2ac318b92e util: clarify purpose of MBTextWrapper class
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 12938
diff changeset
   765
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   766
        Original TextWrapper implementation uses built-in 'len()' directly,
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   767
        so overriding is needed to use width information of each characters.
12957
9f2ac318b92e util: clarify purpose of MBTextWrapper class
Nicolas Dumazet <nicdumz.commits@gmail.com>
parents: 12938
diff changeset
   768
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   769
        In addition, characters classified into 'ambiguous' width are
17424
e7cfe3587ea4 fix trivial spelling errors
Mads Kiilerich <mads@kiilerich.com>
parents: 17391
diff changeset
   770
        treated as wide in East Asian area, but as narrow in other.
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   771
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   772
        This requires use decision to determine width of such characters.
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   773
        """
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   774
15065
24a6c3f903bb util: wrap lines with multi-byte characters correctly (issue2943)
Mads Kiilerich <mads@kiilerich.com>
parents: 15024
diff changeset
   775
        def _cutdown(self, ucstr, space_left):
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   776
            l = 0
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   777
            colwidth = encoding.ucolwidth
49284
d44e3c45f0e4 py3: replace `pycompat.xrange` by `range`
Manuel Jacob <me@manueljacob.de>
parents: 49030
diff changeset
   778
            for i in range(len(ucstr)):
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   779
                l += colwidth(ucstr[i])
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   780
                if space_left < l:
15065
24a6c3f903bb util: wrap lines with multi-byte characters correctly (issue2943)
Mads Kiilerich <mads@kiilerich.com>
parents: 15024
diff changeset
   781
                    return (ucstr[:i], ucstr[i:])
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   782
            return ucstr, b''
11297
d320e70442a5 replace Python standard textwrap by MBCS sensitive one for i18n text
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 11256
diff changeset
   783
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   784
        # overriding of base class
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   785
        def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   786
            space_left = max(width - cur_len, 1)
11297
d320e70442a5 replace Python standard textwrap by MBCS sensitive one for i18n text
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 11256
diff changeset
   787
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   788
            if self.break_long_words:
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   789
                cut, res = self._cutdown(reversed_chunks[-1], space_left)
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   790
                cur_line.append(cut)
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   791
                reversed_chunks[-1] = res
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   792
            elif not cur_line:
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   793
                cur_line.append(reversed_chunks.pop())
11297
d320e70442a5 replace Python standard textwrap by MBCS sensitive one for i18n text
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 11256
diff changeset
   794
26201
c5b2074ae8c0 util: capitalize Python in MBTextWrapper._wrap_chunks comment
timeless@mozdev.org
parents: 26126
diff changeset
   795
        # this overriding code is imported from TextWrapper of Python 2.6
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   796
        # to calculate columns of string by 'encoding.ucolwidth()'
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   797
        def _wrap_chunks(self, chunks):
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   798
            colwidth = encoding.ucolwidth
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   799
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   800
            lines = []
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   801
            if self.width <= 0:
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   802
                raise ValueError(b"invalid width %r (must be > 0)" % self.width)
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   803
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   804
            # Arrange in reverse order so items can be efficiently popped
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   805
            # from a stack of chucks.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   806
            chunks.reverse()
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   807
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   808
            while chunks:
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   809
                # Start the list of chunks that will make up the current line.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   810
                # cur_len is just the length of all the chunks in cur_line.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   811
                cur_line = []
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   812
                cur_len = 0
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   813
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   814
                # Figure out which static string will prefix this line.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   815
                if lines:
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   816
                    indent = self.subsequent_indent
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   817
                else:
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   818
                    indent = self.initial_indent
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   819
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   820
                # Maximum width for this line.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   821
                width = self.width - len(indent)
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   822
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   823
                # First chunk on line is whitespace -- drop it, unless this
17424
e7cfe3587ea4 fix trivial spelling errors
Mads Kiilerich <mads@kiilerich.com>
parents: 17391
diff changeset
   824
                # is the very beginning of the text (i.e. no lines started yet).
43506
9f70512ae2cf cleanup: remove pointless r-prefixes on single-quoted strings
Augie Fackler <augie@google.com>
parents: 43077
diff changeset
   825
                if self.drop_whitespace and chunks[-1].strip() == '' and lines:
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   826
                    del chunks[-1]
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   827
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   828
                while chunks:
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   829
                    l = colwidth(chunks[-1])
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   830
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   831
                    # Can at least squeeze this chunk onto the current line.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   832
                    if cur_len + l <= width:
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   833
                        cur_line.append(chunks.pop())
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   834
                        cur_len += l
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   835
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   836
                    # Nope, this line is full.
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   837
                    else:
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   838
                        break
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   839
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   840
                # The current line is full, and the next chunk is too big to
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   841
                # fit on *any* line (not just this one).
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   842
                if chunks and colwidth(chunks[-1]) > width:
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   843
                    self._handle_long_word(chunks, cur_line, cur_len, width)
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   844
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   845
                # If the last chunk on this line is all whitespace, drop it.
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   846
                if (
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   847
                    self.drop_whitespace
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   848
                    and cur_line
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   849
                    and cur_line[-1].strip() == r''
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   850
                ):
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   851
                    del cur_line[-1]
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   852
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   853
                # Convert current line back to a string and store it in list
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   854
                # of all lines (return value).
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   855
                if cur_line:
43506
9f70512ae2cf cleanup: remove pointless r-prefixes on single-quoted strings
Augie Fackler <augie@google.com>
parents: 43077
diff changeset
   856
                    lines.append(indent + ''.join(cur_line))
15066
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   857
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   858
            return lines
24efa83d81cb i18n: calculate terminal columns by width information of each characters
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 15065
diff changeset
   859
37079
736024df4498 util: mark MBTextWrapper as private
Yuya Nishihara <yuya@tcha.org>
parents: 37078
diff changeset
   860
    global _MBTextWrapper
736024df4498 util: mark MBTextWrapper as private
Yuya Nishihara <yuya@tcha.org>
parents: 37078
diff changeset
   861
    _MBTextWrapper = tw
13316
d119403fd266 util: delay loading of textwrap
Matt Mackall <mpm@selenic.com>
parents: 13313
diff changeset
   862
    return tw(**kwargs)
11297
d320e70442a5 replace Python standard textwrap by MBCS sensitive one for i18n text
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 11256
diff changeset
   863
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   864
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   865
def wrap(
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   866
    line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b''
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   867
) -> bytes:
11297
d320e70442a5 replace Python standard textwrap by MBCS sensitive one for i18n text
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 11256
diff changeset
   868
    maxindent = max(len(hangindent), len(initindent))
d320e70442a5 replace Python standard textwrap by MBCS sensitive one for i18n text
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 11256
diff changeset
   869
    if width <= maxindent:
9417
4c3fb45123e5 util, minirst: do not crash with COLUMNS=0
Martin Geisler <mg@lazybytes.net>
parents: 9397
diff changeset
   870
        # adjust for weird terminal size
11297
d320e70442a5 replace Python standard textwrap by MBCS sensitive one for i18n text
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents: 11256
diff changeset
   871
        width = max(78, maxindent + 1)
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   872
    line = line.decode(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   873
        pycompat.sysstr(encoding.encoding),
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   874
        pycompat.sysstr(encoding.encodingmode),
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   875
    )
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   876
    initindent = initindent.decode(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   877
        pycompat.sysstr(encoding.encoding),
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   878
        pycompat.sysstr(encoding.encodingmode),
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   879
    )
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   880
    hangindent = hangindent.decode(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   881
        pycompat.sysstr(encoding.encoding),
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   882
        pycompat.sysstr(encoding.encodingmode),
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   883
    )
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   884
    wrapper = _MBTextWrapper(
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   885
        width=width, initial_indent=initindent, subsequent_indent=hangindent
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   886
    )
31338
a9a28ca17615 util: pass encoding.[encoding|encodingmode] as unicodes
Pulkit Goyal <7895pulkit@gmail.com>
parents: 31315
diff changeset
   887
    return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
8938
9b8c9266c59d commands: wrap short descriptions in 'hg help'
Martin Geisler <mg@lazybytes.net>
parents: 8785
diff changeset
   888
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   889
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   890
_booleans = {
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   891
    b'1': True,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   892
    b'yes': True,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   893
    b'true': True,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   894
    b'on': True,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   895
    b'always': True,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   896
    b'0': False,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   897
    b'no': False,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   898
    b'false': False,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   899
    b'off': False,
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   900
    b'never': False,
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   901
}
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   902
12087
a88a4720c2f0 parsebool: create new function and use it for config parsing
Augie Fackler <durin42@gmail.com>
parents: 12086
diff changeset
   903
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   904
def parsebool(s: bytes) -> Optional[bool]:
12087
a88a4720c2f0 parsebool: create new function and use it for config parsing
Augie Fackler <durin42@gmail.com>
parents: 12086
diff changeset
   905
    """Parse s into a boolean.
a88a4720c2f0 parsebool: create new function and use it for config parsing
Augie Fackler <durin42@gmail.com>
parents: 12086
diff changeset
   906
a88a4720c2f0 parsebool: create new function and use it for config parsing
Augie Fackler <durin42@gmail.com>
parents: 12086
diff changeset
   907
    If s is not a valid boolean, returns None.
a88a4720c2f0 parsebool: create new function and use it for config parsing
Augie Fackler <durin42@gmail.com>
parents: 12086
diff changeset
   908
    """
a88a4720c2f0 parsebool: create new function and use it for config parsing
Augie Fackler <durin42@gmail.com>
parents: 12086
diff changeset
   909
    return _booleans.get(s.lower(), None)
37290
cc5a040fe150 wireproto: syntax for encoding CBOR into frames
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37247
diff changeset
   910
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40684
diff changeset
   911
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   912
# TODO: make arg mandatory (and fix code below?)
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
   913
def parselist(value: Optional[bytes]):
47189
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   914
    """parse a configuration value as a list of comma/space separated strings
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   915
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   916
    >>> parselist(b'this,is "a small" ,test')
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   917
    ['this', 'is', 'a small', 'test']
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   918
    """
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   919
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   920
    def _parse_plain(parts, s, offset):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   921
        whitespace = False
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   922
        while offset < len(s) and (
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   923
            s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   924
        ):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   925
            whitespace = True
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   926
            offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   927
        if offset >= len(s):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   928
            return None, parts, offset
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   929
        if whitespace:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   930
            parts.append(b'')
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   931
        if s[offset : offset + 1] == b'"' and not parts[-1]:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   932
            return _parse_quote, parts, offset + 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   933
        elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   934
            parts[-1] = parts[-1][:-1] + s[offset : offset + 1]
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   935
            return _parse_plain, parts, offset + 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   936
        parts[-1] += s[offset : offset + 1]
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   937
        return _parse_plain, parts, offset + 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   938
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   939
    def _parse_quote(parts, s, offset):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   940
        if offset < len(s) and s[offset : offset + 1] == b'"':  # ""
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   941
            parts.append(b'')
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   942
            offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   943
            while offset < len(s) and (
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   944
                s[offset : offset + 1].isspace()
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   945
                or s[offset : offset + 1] == b','
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   946
            ):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   947
                offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   948
            return _parse_plain, parts, offset
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   949
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   950
        while offset < len(s) and s[offset : offset + 1] != b'"':
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   951
            if (
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   952
                s[offset : offset + 1] == b'\\'
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   953
                and offset + 1 < len(s)
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   954
                and s[offset + 1 : offset + 2] == b'"'
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   955
            ):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   956
                offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   957
                parts[-1] += b'"'
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   958
            else:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   959
                parts[-1] += s[offset : offset + 1]
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   960
            offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   961
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   962
        if offset >= len(s):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   963
            real_parts = _configlist(parts[-1])
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   964
            if not real_parts:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   965
                parts[-1] = b'"'
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   966
            else:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   967
                real_parts[0] = b'"' + real_parts[0]
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   968
                parts = parts[:-1]
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   969
                parts.extend(real_parts)
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   970
            return None, parts, offset
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   971
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   972
        offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   973
        while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   974
            offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   975
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   976
        if offset < len(s):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   977
            if offset + 1 == len(s) and s[offset : offset + 1] == b'"':
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   978
                parts[-1] += b'"'
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   979
                offset += 1
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   980
            else:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   981
                parts.append(b'')
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   982
        else:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   983
            return None, parts, offset
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   984
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   985
        return _parse_plain, parts, offset
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   986
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   987
    def _configlist(s):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   988
        s = s.rstrip(b' ,')
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   989
        if not s:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   990
            return []
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   991
        parser, parts, offset = _parse_plain, [b''], 0
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   992
        while parser:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   993
            parser, parts, offset = parser(parts, s, offset)
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   994
        return parts
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   995
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   996
    if value is not None and isinstance(value, bytes):
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   997
        result = _configlist(value.lstrip(b' ,\n'))
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   998
    else:
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   999
        result = value
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
  1000
    return result or []
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
  1001
b0e92313107e parselist: move the function from config to stringutil
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
  1002
49575
bbbb5213d043 typing: add basic type hints to stringutil.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 49284
diff changeset
  1003
def evalpythonliteral(s: bytes):
37476
e9dea82ea1f3 wireproto: convert python literal to object without using unsafe eval()
Yuya Nishihara <yuya@tcha.org>
parents: 37322
diff changeset
  1004
    """Evaluate a string containing a Python literal expression"""
e9dea82ea1f3 wireproto: convert python literal to object without using unsafe eval()
Yuya Nishihara <yuya@tcha.org>
parents: 37322
diff changeset
  1005
    # We could backport our tokenizer hack to rewrite '' to u'' if we want
48911
46b3ecfb16e2 stringutil: remove Python 2 support code
Gregory Szorc <gregory.szorc@gmail.com>
parents: 48875
diff changeset
  1006
    return ast.literal_eval(s.decode('latin1'))