Mercurial > hg
changeset 38389:1d68fd5f614a
byteify-strings: do not rewrite system string literals to u''
It would make things worse on Python 2 because unicode processing is generally
slower than byte string. We should just leave system strings unmodified.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Thu, 31 May 2018 23:44:35 +0900 |
parents | f701bc936e7f |
children | 47dd23e6b116 |
files | contrib/byteify-strings.py |
diffstat | 1 files changed, 10 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/contrib/byteify-strings.py Thu May 31 22:34:23 2018 +0900 +++ b/contrib/byteify-strings.py Thu May 31 23:44:35 2018 +0900 @@ -27,6 +27,8 @@ The input token list may be mutated as part of processing. However, its changes do not necessarily match the output token stream. """ + sysstrtokens = set() + # The following utility functions access the tokens list and i index of # the for i, t enumerate(tokens) loop below def _isop(j, *o): @@ -62,11 +64,11 @@ return None - def _ensureunicode(j): - """Make sure the token at j is a unicode string + def _ensuresysstr(j): + """Make sure the token at j is a system string - This rewrites a string token to include the unicode literal prefix - so the string transformer won't add the byte prefix. + Remember the given token so the string transformer won't add + the byte prefix. Ignores tokens that are not strings. Assumes bounds checking has already been done. @@ -74,7 +76,7 @@ """ st = tokens[j] if st.type == token.STRING and st.string.startswith(("'", '"')): - tokens[j] = st._replace(string='u%s' % st.string) + sysstrtokens.add(st) for i, t in enumerate(tokens): # Convert most string literals to byte literals. String literals @@ -83,7 +85,7 @@ # Rather than rewrite all string literals to use ``b''`` to indicate # byte strings, we apply this token transformer to insert the ``b`` # prefix nearly everywhere. - if t.type == token.STRING: + if t.type == token.STRING and t not in sysstrtokens: s = t.string # Preserve docstrings as string literals. This is inconsistent @@ -117,7 +119,7 @@ not _isop(i - 1, '.')): arg1idx = _findargnofcall(1) if arg1idx is not None: - _ensureunicode(arg1idx) + _ensuresysstr(arg1idx) # .encode() and .decode() on str/bytes/unicode don't accept # byte strings on Python 3. @@ -125,7 +127,7 @@ for argn in range(2): argidx = _findargnofcall(argn) if argidx is not None: - _ensureunicode(argidx) + _ensuresysstr(argidx) # It changes iteritems/values to items/values as they are not # present in Python 3 world.