# HG changeset patch # User Yuya Nishihara # Date 1527779635 -32400 # Node ID 47dd23e6b116fa4dec5be612169332896c936c5b # Parent 1d68fd5f614a56a2a8df9c172c1b139b4f44f79d byteify-strings: try to preserve column alignment diff -r 1d68fd5f614a -r 47dd23e6b116 contrib/byteify-strings.py --- a/contrib/byteify-strings.py Thu May 31 23:44:35 2018 +0900 +++ b/contrib/byteify-strings.py Fri Jun 01 00:13:55 2018 +0900 @@ -18,6 +18,11 @@ import token import tokenize +def adjusttokenpos(t, ofs): + """Adjust start/end column of the given token""" + return t._replace(start=(t.start[0], t.start[1] + ofs), + end=(t.end[0], t.end[1] + ofs)) + if True: def replacetokens(tokens, opts): """Transform a stream of tokens from raw to Python 3. @@ -78,7 +83,35 @@ if st.type == token.STRING and st.string.startswith(("'", '"')): sysstrtokens.add(st) + coldelta = 0 # column increment for new opening parens + coloffset = -1 # column offset for the current line (-1: TBD) + parens = [(0, 0, 0)] # stack of (line, end-column, column-offset) for i, t in enumerate(tokens): + # Compute the column offset for the current line, such that + # the current line will be aligned to the last opening paren + # as before. + if coloffset < 0: + if t.start[1] == parens[-1][1]: + coloffset = parens[-1][2] + elif t.start[1] + 1 == parens[-1][1]: + # fix misaligned indent of s/util.Abort/error.Abort/ + coloffset = parens[-1][2] + (parens[-1][1] - t.start[1]) + else: + coloffset = 0 + + # Reset per-line attributes at EOL. + if t.type in (token.NEWLINE, tokenize.NL): + yield adjusttokenpos(t, coloffset) + coldelta = 0 + coloffset = -1 + continue + + # Remember the last paren position. + if _isop(i, '(', '[', '{'): + parens.append(t.end + (coloffset + coldelta,)) + elif _isop(i, ')', ']', '}'): + parens.pop() + # Convert most string literals to byte literals. String literals # in Python 2 are bytes. String literals in Python 3 are unicode. # Most strings in Mercurial are bytes and unicode strings are rare. @@ -97,17 +130,19 @@ # components touching docstrings need to handle unicode, # unfortunately. if s[0:3] in ("'''", '"""'): - yield t + yield adjusttokenpos(t, coloffset) continue # If the first character isn't a quote, it is likely a string # prefixing character (such as 'b', 'u', or 'r'. Ignore. if s[0] not in ("'", '"'): - yield t + yield adjusttokenpos(t, coloffset) continue # String literal. Prefix to make a b'' string. - yield t._replace(string='b%s' % t.string) + yield adjusttokenpos(t._replace(string='b%s' % t.string), + coloffset) + coldelta += 1 continue # This looks like a function call. @@ -132,11 +167,11 @@ # It changes iteritems/values to items/values as they are not # present in Python 3 world. elif opts['dictiter'] and fn in ('iteritems', 'itervalues'): - yield t._replace(string=fn[4:]) + yield adjusttokenpos(t._replace(string=fn[4:]), coloffset) continue # Emit unmodified token. - yield t + yield adjusttokenpos(t, coloffset) def process(fin, fout, opts): tokens = tokenize.tokenize(fin.readline)