comparison mercurial/__init__.py @ 43103:c95b2f40db7c

py3: stop normalizing 2nd argument of *attr() to unicode Now that we don't byteify strings, we can stop normalizing the 2nd string argument to getattr() and remove explicit overrides we were using in the code base. We no longer use some helper functions in the source transformer, so we remove those as well. Differential Revision: https://phab.mercurial-scm.org/D7012
author Gregory Szorc <gregory.szorc@gmail.com>
date Sun, 06 Oct 2019 17:45:05 -0400
parents 127cc1f72e70
children 74802979dd9d
comparison
equal deleted inserted replaced
43102:829088e87032 43103:c95b2f40db7c
108 try: 108 try:
109 return tokens[j].type == token.OP and tokens[j].string in o 109 return tokens[j].type == token.OP and tokens[j].string in o
110 except IndexError: 110 except IndexError:
111 return False 111 return False
112 112
113 def _findargnofcall(n):
114 """Find arg n of a call expression (start at 0)
115
116 Returns index of the first token of that argument, or None if
117 there is not that many arguments.
118
119 Assumes that token[i + 1] is '('.
120
121 """
122 nested = 0
123 for j in range(i + 2, len(tokens)):
124 if _isop(j, ')', ']', '}'):
125 # end of call, tuple, subscription or dict / set
126 nested -= 1
127 if nested < 0:
128 return None
129 elif n == 0:
130 # this is the starting position of arg
131 return j
132 elif _isop(j, '(', '[', '{'):
133 nested += 1
134 elif _isop(j, ',') and nested == 0:
135 n -= 1
136
137 return None
138
139 def _ensureunicode(j):
140 """Make sure the token at j is a unicode string
141
142 This rewrites a string token to include the unicode literal prefix
143 so the string transformer won't add the byte prefix.
144
145 Ignores tokens that are not strings. Assumes bounds checking has
146 already been done.
147
148 """
149 st = tokens[j]
150 if st.type == token.STRING and st.string.startswith(("'", '"')):
151 tokens[j] = st._replace(string='u%s' % st.string)
152
153 for i, t in enumerate(tokens): 113 for i, t in enumerate(tokens):
154 # This looks like a function call. 114 # This looks like a function call.
155 if t.type == token.NAME and _isop(i + 1, '('): 115 if t.type == token.NAME and _isop(i + 1, '('):
156 fn = t.string 116 fn = t.string
157 117
158 # *attr() builtins don't accept byte strings to 2nd argument.
159 if fn in (
160 'getattr',
161 'setattr',
162 'hasattr',
163 'safehasattr',
164 ) and not _isop(i - 1, '.'):
165 arg1idx = _findargnofcall(1)
166 if arg1idx is not None:
167 _ensureunicode(arg1idx)
168
169 # It changes iteritems/values to items/values as they are not 118 # It changes iteritems/values to items/values as they are not
170 # present in Python 3 world. 119 # present in Python 3 world.
171 elif fn in ('iteritems', 'itervalues') and not ( 120 if fn in ('iteritems', 'itervalues') and not (
172 tokens[i - 1].type == token.NAME 121 tokens[i - 1].type == token.NAME
173 and tokens[i - 1].string == 'def' 122 and tokens[i - 1].string == 'def'
174 ): 123 ):
175 yield t._replace(string=fn[4:]) 124 yield t._replace(string=fn[4:])
176 continue 125 continue
180 129
181 # Header to add to bytecode files. This MUST be changed when 130 # Header to add to bytecode files. This MUST be changed when
182 # ``replacetoken`` or any mechanism that changes semantics of module 131 # ``replacetoken`` or any mechanism that changes semantics of module
183 # loading is changed. Otherwise cached bytecode may get loaded without 132 # loading is changed. Otherwise cached bytecode may get loaded without
184 # the new transformation mechanisms applied. 133 # the new transformation mechanisms applied.
185 BYTECODEHEADER = b'HG\x00\x13' 134 BYTECODEHEADER = b'HG\x00\x14'
186 135
187 class hgloader(importlib.machinery.SourceFileLoader): 136 class hgloader(importlib.machinery.SourceFileLoader):
188 """Custom module loader that transforms source code. 137 """Custom module loader that transforms source code.
189 138
190 When the source code is converted to a code object, we transform 139 When the source code is converted to a code object, we transform