Mercurial > hg
changeset 38391:f77bbd34a1df
byteify-strings: remove superfluous "if True" block
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sun, 03 Jun 2018 18:19:54 +0900 |
parents | 47dd23e6b116 |
children | 81a4be7099fa |
files | contrib/byteify-strings.py |
diffstat | 1 files changed, 128 insertions(+), 129 deletions(-) [+] |
line wrap: on
line diff
--- a/contrib/byteify-strings.py Fri Jun 01 00:13:55 2018 +0900 +++ b/contrib/byteify-strings.py Sun Jun 03 18:19:54 2018 +0900 @@ -23,155 +23,154 @@ return t._replace(start=(t.start[0], t.start[1] + ofs), end=(t.end[0], t.end[1] + ofs)) -if True: - def replacetokens(tokens, opts): - """Transform a stream of tokens from raw to Python 3. +def replacetokens(tokens, opts): + """Transform a stream of tokens from raw to Python 3. + + Returns a generator of possibly rewritten tokens. + + The input token list may be mutated as part of processing. However, + its changes do not necessarily match the output token stream. + """ + sysstrtokens = set() - Returns a generator of possibly rewritten tokens. + # The following utility functions access the tokens list and i index of + # the for i, t enumerate(tokens) loop below + def _isop(j, *o): + """Assert that tokens[j] is an OP with one of the given values""" + try: + return tokens[j].type == token.OP and tokens[j].string in o + except IndexError: + return False - The input token list may be mutated as part of processing. However, - its changes do not necessarily match the output token stream. - """ - sysstrtokens = set() + def _findargnofcall(n): + """Find arg n of a call expression (start at 0) + + Returns index of the first token of that argument, or None if + there is not that many arguments. + + Assumes that token[i + 1] is '('. - # The following utility functions access the tokens list and i index of - # the for i, t enumerate(tokens) loop below - def _isop(j, *o): - """Assert that tokens[j] is an OP with one of the given values""" - try: - return tokens[j].type == token.OP and tokens[j].string in o - except IndexError: - return False + """ + nested = 0 + for j in range(i + 2, len(tokens)): + if _isop(j, ')', ']', '}'): + # end of call, tuple, subscription or dict / set + nested -= 1 + if nested < 0: + return None + elif n == 0: + # this is the starting position of arg + return j + elif _isop(j, '(', '[', '{'): + nested += 1 + elif _isop(j, ',') and nested == 0: + n -= 1 - def _findargnofcall(n): - """Find arg n of a call expression (start at 0) + return None + + def _ensuresysstr(j): + """Make sure the token at j is a system string - Returns index of the first token of that argument, or None if - there is not that many arguments. + Remember the given token so the string transformer won't add + the byte prefix. - Assumes that token[i + 1] is '('. + Ignores tokens that are not strings. Assumes bounds checking has + already been done. - """ - nested = 0 - for j in range(i + 2, len(tokens)): - if _isop(j, ')', ']', '}'): - # end of call, tuple, subscription or dict / set - nested -= 1 - if nested < 0: - return None - elif n == 0: - # this is the starting position of arg - return j - elif _isop(j, '(', '[', '{'): - nested += 1 - elif _isop(j, ',') and nested == 0: - n -= 1 + """ + st = tokens[j] + if st.type == token.STRING and st.string.startswith(("'", '"')): + sysstrtokens.add(st) - return None - - def _ensuresysstr(j): - """Make sure the token at j is a system string - - Remember the given token so the string transformer won't add - the byte prefix. - - Ignores tokens that are not strings. Assumes bounds checking has - already been done. + coldelta = 0 # column increment for new opening parens + coloffset = -1 # column offset for the current line (-1: TBD) + parens = [(0, 0, 0)] # stack of (line, end-column, column-offset) + for i, t in enumerate(tokens): + # Compute the column offset for the current line, such that + # the current line will be aligned to the last opening paren + # as before. + if coloffset < 0: + if t.start[1] == parens[-1][1]: + coloffset = parens[-1][2] + elif t.start[1] + 1 == parens[-1][1]: + # fix misaligned indent of s/util.Abort/error.Abort/ + coloffset = parens[-1][2] + (parens[-1][1] - t.start[1]) + else: + coloffset = 0 - """ - st = tokens[j] - if st.type == token.STRING and st.string.startswith(("'", '"')): - sysstrtokens.add(st) + # Reset per-line attributes at EOL. + if t.type in (token.NEWLINE, tokenize.NL): + yield adjusttokenpos(t, coloffset) + coldelta = 0 + coloffset = -1 + continue + + # Remember the last paren position. + if _isop(i, '(', '[', '{'): + parens.append(t.end + (coloffset + coldelta,)) + elif _isop(i, ')', ']', '}'): + parens.pop() - coldelta = 0 # column increment for new opening parens - coloffset = -1 # column offset for the current line (-1: TBD) - parens = [(0, 0, 0)] # stack of (line, end-column, column-offset) - for i, t in enumerate(tokens): - # Compute the column offset for the current line, such that - # the current line will be aligned to the last opening paren - # as before. - if coloffset < 0: - if t.start[1] == parens[-1][1]: - coloffset = parens[-1][2] - elif t.start[1] + 1 == parens[-1][1]: - # fix misaligned indent of s/util.Abort/error.Abort/ - coloffset = parens[-1][2] + (parens[-1][1] - t.start[1]) - else: - coloffset = 0 + # Convert most string literals to byte literals. String literals + # in Python 2 are bytes. String literals in Python 3 are unicode. + # Most strings in Mercurial are bytes and unicode strings are rare. + # Rather than rewrite all string literals to use ``b''`` to indicate + # byte strings, we apply this token transformer to insert the ``b`` + # prefix nearly everywhere. + if t.type == token.STRING and t not in sysstrtokens: + s = t.string - # Reset per-line attributes at EOL. - if t.type in (token.NEWLINE, tokenize.NL): + # Preserve docstrings as string literals. This is inconsistent + # with regular unprefixed strings. However, the + # "from __future__" parsing (which allows a module docstring to + # exist before it) doesn't properly handle the docstring if it + # is b''' prefixed, leading to a SyntaxError. We leave all + # docstrings as unprefixed to avoid this. This means Mercurial + # components touching docstrings need to handle unicode, + # unfortunately. + if s[0:3] in ("'''", '"""'): yield adjusttokenpos(t, coloffset) - coldelta = 0 - coloffset = -1 continue - # Remember the last paren position. - if _isop(i, '(', '[', '{'): - parens.append(t.end + (coloffset + coldelta,)) - elif _isop(i, ')', ']', '}'): - parens.pop() - - # Convert most string literals to byte literals. String literals - # in Python 2 are bytes. String literals in Python 3 are unicode. - # Most strings in Mercurial are bytes and unicode strings are rare. - # Rather than rewrite all string literals to use ``b''`` to indicate - # byte strings, we apply this token transformer to insert the ``b`` - # prefix nearly everywhere. - if t.type == token.STRING and t not in sysstrtokens: - s = t.string - - # Preserve docstrings as string literals. This is inconsistent - # with regular unprefixed strings. However, the - # "from __future__" parsing (which allows a module docstring to - # exist before it) doesn't properly handle the docstring if it - # is b''' prefixed, leading to a SyntaxError. We leave all - # docstrings as unprefixed to avoid this. This means Mercurial - # components touching docstrings need to handle unicode, - # unfortunately. - if s[0:3] in ("'''", '"""'): - yield adjusttokenpos(t, coloffset) - continue - - # If the first character isn't a quote, it is likely a string - # prefixing character (such as 'b', 'u', or 'r'. Ignore. - if s[0] not in ("'", '"'): - yield adjusttokenpos(t, coloffset) - continue - - # String literal. Prefix to make a b'' string. - yield adjusttokenpos(t._replace(string='b%s' % t.string), - coloffset) - coldelta += 1 + # If the first character isn't a quote, it is likely a string + # prefixing character (such as 'b', 'u', or 'r'. Ignore. + if s[0] not in ("'", '"'): + yield adjusttokenpos(t, coloffset) continue - # This looks like a function call. - if t.type == token.NAME and _isop(i + 1, '('): - fn = t.string + # String literal. Prefix to make a b'' string. + yield adjusttokenpos(t._replace(string='b%s' % t.string), + coloffset) + coldelta += 1 + continue - # *attr() builtins don't accept byte strings to 2nd argument. - if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and - not _isop(i - 1, '.')): - arg1idx = _findargnofcall(1) - if arg1idx is not None: - _ensuresysstr(arg1idx) + # This looks like a function call. + if t.type == token.NAME and _isop(i + 1, '('): + fn = t.string + + # *attr() builtins don't accept byte strings to 2nd argument. + if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr') and + not _isop(i - 1, '.')): + arg1idx = _findargnofcall(1) + if arg1idx is not None: + _ensuresysstr(arg1idx) - # .encode() and .decode() on str/bytes/unicode don't accept - # byte strings on Python 3. - elif fn in ('encode', 'decode') and _isop(i - 1, '.'): - for argn in range(2): - argidx = _findargnofcall(argn) - if argidx is not None: - _ensuresysstr(argidx) + # .encode() and .decode() on str/bytes/unicode don't accept + # byte strings on Python 3. + elif fn in ('encode', 'decode') and _isop(i - 1, '.'): + for argn in range(2): + argidx = _findargnofcall(argn) + if argidx is not None: + _ensuresysstr(argidx) - # It changes iteritems/values to items/values as they are not - # present in Python 3 world. - elif opts['dictiter'] and fn in ('iteritems', 'itervalues'): - yield adjusttokenpos(t._replace(string=fn[4:]), coloffset) - continue + # It changes iteritems/values to items/values as they are not + # present in Python 3 world. + elif opts['dictiter'] and fn in ('iteritems', 'itervalues'): + yield adjusttokenpos(t._replace(string=fn[4:]), coloffset) + continue - # Emit unmodified token. - yield adjusttokenpos(t, coloffset) + # Emit unmodified token. + yield adjusttokenpos(t, coloffset) def process(fin, fout, opts): tokens = tokenize.tokenize(fin.readline)