comparison contrib/hgfixes/fix_bytes.py @ 11747:40d5633889bb

hgfixes: add a fixer to convert plain strings to bytestrings This patch implements a 2to3 fixer that converts all plain strings in a python source file to byte strings syntax. Example: foo = 'Normal string' would become foo = b'Normal string' The motivation behind this fixer can be found in http://selenic.com/pipermail/mercurial-devel/2010-June/022363.html or, in other words: the current hg source assumes that _most_ strings are "meant" to be byte sequences, so it makes sense to make the convertion implemented by this patch. As mentioned above, not all mercurial modules want to use strings as bytes, examples include i18n (which uses unicode), and demandimport (in py3k, module names are normal strings, thus unicode, and there's no need for a convertion). Therefore, these modules are blacklisted in the fixer. There are also a few functions that can take only unicode arguments, thus the convertion shouldn't be done for those.
author Renato Cunha <renatoc@gmail.com>
date Tue, 03 Aug 2010 13:41:47 -0300
parents
children 37a70a784397
comparison
equal deleted inserted replaced
11746:46ac30b17978 11747:40d5633889bb
1 """Fixer that changes plain strings to bytes strings."""
2
3 import re
4
5 from lib2to3 import fixer_base
6 from lib2to3.pgen2 import token
7 from lib2to3.fixer_util import Name
8 from lib2to3.pygram import python_symbols as syms
9
10 _re = re.compile(r'[rR]?[\'\"]')
11
12 # XXX: Implementing a blacklist in 2to3 turned out to be more troublesome than
13 # blacklisting some modules inside the fixers. So, this is what I came with.
14
15 blacklist = ['mercurial/demandimport.py',
16 'mercurial/i18n.py',
17 ]
18
19 def isdocstring(node):
20 def isclassorfunction(ancestor):
21 symbols = (syms.funcdef, syms.classdef)
22 # if the current node is a child of a function definition, a class
23 # definition or a file, then it is a docstring
24 if ancestor.type == syms.simple_stmt:
25 try:
26 while True:
27 if ancestor.type in symbols:
28 return True
29 ancestor = ancestor.parent
30 except AttributeError:
31 return False
32 return False
33
34 def ismodule(ancestor):
35 # Our child is a docstring if we are a simple statement, and our
36 # ancestor is file_input. In other words, our child is a lone string in
37 # the source file.
38 try:
39 if (ancestor.type == syms.simple_stmt and
40 ancestor.parent.type == syms.file_input):
41 return True
42 except AttributeError:
43 return False
44
45 def isdocassignment(ancestor):
46 # Assigning to __doc__, definitely a string
47 try:
48 while True:
49 if (ancestor.type == syms.expr_stmt and
50 Name('__doc__') in ancestor.children):
51 return True
52 ancestor = ancestor.parent
53 except AttributeError:
54 return False
55
56 if ismodule(node.parent) or \
57 isdocassignment(node.parent) or \
58 isclassorfunction(node.parent):
59 return True
60 return False
61
62 def shouldtransform(node):
63 specialnames = ['__main__']
64
65 if node.value in specialnames:
66 return False
67
68 ggparent = node.parent.parent.parent
69 sggparent = str(ggparent)
70
71 if 'getattr' in sggparent or \
72 'hasattr' in sggparent or \
73 'setattr' in sggparent or \
74 'encode' in sggparent or \
75 'decode' in sggparent:
76 return False
77
78 return True
79
80 class FixBytes(fixer_base.BaseFix):
81
82 PATTERN = 'STRING'
83
84 def transform(self, node, results):
85 if self.filename in blacklist:
86 return
87 if node.type == token.STRING:
88 if _re.match(node.value):
89 if isdocstring(node):
90 return
91 if not shouldtransform(node):
92 return
93 new = node.clone()
94 new.value = 'b' + new.value
95 return new
96