py3kcompat: added a "compatibility layer" for py3k
This patch adds some ugly constructs. The first of them is bytesformatter, a
function that formats strings like when '%' is called. The main motivation for
this function is py3k's strange behavior:
>>> 'foo %s' % b'bar'
"foo b'bar'"
>>> b'foo %s' % b'bar'
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: unsupported operand type(s) for %: 'bytes' and 'bytes'
>>> b'foo %s' % 'bar'
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: unsupported operand type(s) for %: 'bytes' and 'str'
In other words, if we can't format bytes with bytes, and recall that all
mercurial strings will be converted by a fixer, then things will break badly if
we don't take a similar approach.
The other addition with this patch is that the os.environ dictionary is
monkeypatched to have bytes items. Hopefully this won't be needed in the
future, as python 3.2 might get a os.environb dictionary that holds bytes
items.
"""Fixer that changes plain strings to bytes strings."""
import re
from lib2to3 import fixer_base
from lib2to3.pgen2 import token
from lib2to3.fixer_util import Name
from lib2to3.pygram import python_symbols as syms
_re = re.compile(r'[rR]?[\'\"]')
# XXX: Implementing a blacklist in 2to3 turned out to be more troublesome than
# blacklisting some modules inside the fixers. So, this is what I came with.
blacklist = ['mercurial/demandimport.py',
'mercurial/py3kcompat.py', # valid python 3 already
'mercurial/i18n.py',
]
def isdocstring(node):
def isclassorfunction(ancestor):
symbols = (syms.funcdef, syms.classdef)
# if the current node is a child of a function definition, a class
# definition or a file, then it is a docstring
if ancestor.type == syms.simple_stmt:
try:
while True:
if ancestor.type in symbols:
return True
ancestor = ancestor.parent
except AttributeError:
return False
return False
def ismodule(ancestor):
# Our child is a docstring if we are a simple statement, and our
# ancestor is file_input. In other words, our child is a lone string in
# the source file.
try:
if (ancestor.type == syms.simple_stmt and
ancestor.parent.type == syms.file_input):
return True
except AttributeError:
return False
def isdocassignment(ancestor):
# Assigning to __doc__, definitely a string
try:
while True:
if (ancestor.type == syms.expr_stmt and
Name('__doc__') in ancestor.children):
return True
ancestor = ancestor.parent
except AttributeError:
return False
if ismodule(node.parent) or \
isdocassignment(node.parent) or \
isclassorfunction(node.parent):
return True
return False
def shouldtransform(node):
specialnames = ['__main__']
if node.value in specialnames:
return False
ggparent = node.parent.parent.parent
sggparent = str(ggparent)
if 'getattr' in sggparent or \
'hasattr' in sggparent or \
'setattr' in sggparent or \
'encode' in sggparent or \
'decode' in sggparent:
return False
return True
class FixBytes(fixer_base.BaseFix):
PATTERN = 'STRING'
def transform(self, node, results):
if self.filename in blacklist:
return
if node.type == token.STRING:
if _re.match(node.value):
if isdocstring(node):
return
if not shouldtransform(node):
return
new = node.clone()
new.value = 'b' + new.value
return new