author | Siddharth Agarwal <sid0@fb.com> |
Wed, 13 Aug 2014 15:51:33 -0700 | |
branch | stable |
changeset 22170 | 0e1b02f984c7 |
parent 20391 | 466e4c574db0 |
child 22974 | 6bd43614d387 |
permissions | -rw-r--r-- |
20036 | 1 |
import ast |
2 |
import os |
|
3 |
import sys |
|
4 |
||
20198
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
5 |
# Import a minimal set of stdlib modules needed for list_stdlib_modules() |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
6 |
# to work when run from a virtualenv. The modules were chosen empirically |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
7 |
# so that the return value matches the return value without virtualenv. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
8 |
import BaseHTTPServer |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
9 |
import zlib |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
10 |
|
20036 | 11 |
def dotted_name_of_path(path): |
12 |
"""Given a relative path to a source file, return its dotted module name. |
|
13 |
||
14 |
>>> dotted_name_of_path('mercurial/error.py') |
|
15 |
'mercurial.error' |
|
20383
4990abb4729d
import-checker: fix names of dynamically loaded modules
Mads Kiilerich <madski@unity3d.com>
parents:
20238
diff
changeset
|
16 |
>>> dotted_name_of_path('zlibmodule.so') |
4990abb4729d
import-checker: fix names of dynamically loaded modules
Mads Kiilerich <madski@unity3d.com>
parents:
20238
diff
changeset
|
17 |
'zlib' |
20036 | 18 |
""" |
19 |
parts = path.split('/') |
|
20391
466e4c574db0
import-checker: handle standard modules with arch in the filename
Mads Kiilerich <madski@unity3d.com>
parents:
20386
diff
changeset
|
20 |
parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so |
20383
4990abb4729d
import-checker: fix names of dynamically loaded modules
Mads Kiilerich <madski@unity3d.com>
parents:
20238
diff
changeset
|
21 |
if parts[-1].endswith('module'): |
4990abb4729d
import-checker: fix names of dynamically loaded modules
Mads Kiilerich <madski@unity3d.com>
parents:
20238
diff
changeset
|
22 |
parts[-1] = parts[-1][:-6] |
20036 | 23 |
return '.'.join(parts) |
24 |
||
25 |
||
26 |
def list_stdlib_modules(): |
|
27 |
"""List the modules present in the stdlib. |
|
28 |
||
29 |
>>> mods = set(list_stdlib_modules()) |
|
30 |
>>> 'BaseHTTPServer' in mods |
|
31 |
True |
|
32 |
||
33 |
os.path isn't really a module, so it's missing: |
|
34 |
||
35 |
>>> 'os.path' in mods |
|
36 |
False |
|
37 |
||
38 |
sys requires special treatment, because it's baked into the |
|
39 |
interpreter, but it should still appear: |
|
40 |
||
41 |
>>> 'sys' in mods |
|
42 |
True |
|
43 |
||
44 |
>>> 'collections' in mods |
|
45 |
True |
|
46 |
||
47 |
>>> 'cStringIO' in mods |
|
48 |
True |
|
49 |
""" |
|
50 |
for m in sys.builtin_module_names: |
|
51 |
yield m |
|
52 |
# These modules only exist on windows, but we should always |
|
53 |
# consider them stdlib. |
|
54 |
for m in ['msvcrt', '_winreg']: |
|
55 |
yield m |
|
56 |
# These get missed too |
|
57 |
for m in 'ctypes', 'email': |
|
58 |
yield m |
|
59 |
yield 'builtins' # python3 only |
|
20197
761f2929a6ad
import-checker: refactor sys.path prefix check (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20038
diff
changeset
|
60 |
stdlib_prefixes = set([sys.prefix, sys.exec_prefix]) |
20198
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
61 |
# We need to supplement the list of prefixes for the search to work |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
62 |
# when run from within a virtualenv. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
63 |
for mod in (BaseHTTPServer, zlib): |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
64 |
try: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
65 |
# Not all module objects have a __file__ attribute. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
66 |
filename = mod.__file__ |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
67 |
except AttributeError: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
68 |
continue |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
69 |
dirname = os.path.dirname(filename) |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
70 |
for prefix in stdlib_prefixes: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
71 |
if dirname.startswith(prefix): |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
72 |
# Then this directory is redundant. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
73 |
break |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
74 |
else: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
75 |
stdlib_prefixes.add(dirname) |
20036 | 76 |
for libpath in sys.path: |
20201
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
77 |
# We want to walk everything in sys.path that starts with |
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
78 |
# something in stdlib_prefixes. check-code suppressed because |
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
79 |
# the ast module used by this script implies the availability |
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
80 |
# of any(). |
20238
81e905790b30
check-code: do not skip entire file, skip only one match instead
Simon Heimberg <simohe@besonet.ch>
parents:
20201
diff
changeset
|
81 |
if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24 |
20036 | 82 |
continue |
83 |
if 'site-packages' in libpath: |
|
84 |
continue |
|
85 |
for top, dirs, files in os.walk(libpath): |
|
86 |
for name in files: |
|
87 |
if name == '__init__.py': |
|
88 |
continue |
|
89 |
if not (name.endswith('.py') or name.endswith('.so')): |
|
90 |
continue |
|
91 |
full_path = os.path.join(top, name) |
|
92 |
if 'site-packages' in full_path: |
|
93 |
continue |
|
94 |
rel_path = full_path[len(libpath) + 1:] |
|
95 |
mod = dotted_name_of_path(rel_path) |
|
96 |
yield mod |
|
97 |
||
98 |
stdlib_modules = set(list_stdlib_modules()) |
|
99 |
||
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
100 |
def imported_modules(source, ignore_nested=False): |
20036 | 101 |
"""Given the source of a file as a string, yield the names |
102 |
imported by that file. |
|
103 |
||
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
104 |
Args: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
105 |
source: The python source to examine as a string. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
106 |
ignore_nested: If true, import statements that do not start in |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
107 |
column zero will be ignored. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
108 |
|
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
109 |
Returns: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
110 |
A list of module names imported by the given source. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
111 |
|
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
112 |
>>> sorted(imported_modules( |
20036 | 113 |
... 'import foo ; from baz import bar; import foo.qux')) |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
114 |
['baz.bar', 'foo', 'foo.qux'] |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
115 |
>>> sorted(imported_modules( |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
116 |
... '''import foo |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
117 |
... def wat(): |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
118 |
... import bar |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
119 |
... ''', ignore_nested=True)) |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
120 |
['foo'] |
20036 | 121 |
""" |
122 |
for node in ast.walk(ast.parse(source)): |
|
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
123 |
if ignore_nested and getattr(node, 'col_offset', 0) > 0: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
124 |
continue |
20036 | 125 |
if isinstance(node, ast.Import): |
126 |
for n in node.names: |
|
127 |
yield n.name |
|
128 |
elif isinstance(node, ast.ImportFrom): |
|
129 |
prefix = node.module + '.' |
|
130 |
for n in node.names: |
|
131 |
yield prefix + n.name |
|
132 |
||
133 |
def verify_stdlib_on_own_line(source): |
|
134 |
"""Given some python source, verify that stdlib imports are done |
|
135 |
in separate statements from relative local module imports. |
|
136 |
||
137 |
Observing this limitation is important as it works around an |
|
138 |
annoying lib2to3 bug in relative import rewrites: |
|
139 |
http://bugs.python.org/issue19510. |
|
140 |
||
141 |
>>> list(verify_stdlib_on_own_line('import sys, foo')) |
|
20386
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
142 |
['mixed imports\\n stdlib: sys\\n relative: foo'] |
20036 | 143 |
>>> list(verify_stdlib_on_own_line('import sys, os')) |
144 |
[] |
|
145 |
>>> list(verify_stdlib_on_own_line('import foo, bar')) |
|
146 |
[] |
|
147 |
""" |
|
148 |
for node in ast.walk(ast.parse(source)): |
|
149 |
if isinstance(node, ast.Import): |
|
20386
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
150 |
from_stdlib = {False: [], True: []} |
20036 | 151 |
for n in node.names: |
20386
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
152 |
from_stdlib[n.name in stdlib_modules].append(n.name) |
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
153 |
if from_stdlib[True] and from_stdlib[False]: |
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
154 |
yield ('mixed imports\n stdlib: %s\n relative: %s' % |
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
155 |
(', '.join(sorted(from_stdlib[True])), |
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
156 |
', '.join(sorted(from_stdlib[False])))) |
20036 | 157 |
|
158 |
class CircularImport(Exception): |
|
159 |
pass |
|
160 |
||
161 |
||
162 |
def cyclekey(names): |
|
163 |
return tuple(sorted(set(names))) |
|
164 |
||
165 |
def check_one_mod(mod, imports, path=None, ignore=None): |
|
166 |
if path is None: |
|
167 |
path = [] |
|
168 |
if ignore is None: |
|
169 |
ignore = [] |
|
170 |
path = path + [mod] |
|
171 |
for i in sorted(imports.get(mod, [])): |
|
172 |
if i not in stdlib_modules: |
|
173 |
i = mod.rsplit('.', 1)[0] + '.' + i |
|
174 |
if i in path: |
|
175 |
firstspot = path.index(i) |
|
176 |
cycle = path[firstspot:] + [i] |
|
177 |
if cyclekey(cycle) not in ignore: |
|
178 |
raise CircularImport(cycle) |
|
179 |
continue |
|
180 |
check_one_mod(i, imports, path=path, ignore=ignore) |
|
181 |
||
20038
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
182 |
def rotatecycle(cycle): |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
183 |
"""arrange a cycle so that the lexicographically first module listed first |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
184 |
|
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
185 |
>>> rotatecycle(['foo', 'bar', 'foo']) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
186 |
['bar', 'foo', 'bar'] |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
187 |
""" |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
188 |
lowest = min(cycle) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
189 |
idx = cycle.index(lowest) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
190 |
return cycle[idx:] + cycle[1:idx] + [lowest] |
20036 | 191 |
|
192 |
def find_cycles(imports): |
|
193 |
"""Find cycles in an already-loaded import graph. |
|
194 |
||
195 |
>>> imports = {'top.foo': ['bar', 'os.path', 'qux'], |
|
196 |
... 'top.bar': ['baz', 'sys'], |
|
197 |
... 'top.baz': ['foo'], |
|
198 |
... 'top.qux': ['foo']} |
|
199 |
>>> print '\\n'.join(sorted(find_cycles(imports))) |
|
20038
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
200 |
top.bar -> top.baz -> top.foo -> top.bar -> top.bar |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
201 |
top.foo -> top.qux -> top.foo -> top.foo |
20036 | 202 |
""" |
203 |
cycles = {} |
|
204 |
for mod in sorted(imports.iterkeys()): |
|
205 |
try: |
|
206 |
check_one_mod(mod, imports, ignore=cycles) |
|
207 |
except CircularImport, e: |
|
208 |
cycle = e.args[0] |
|
209 |
cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle)) |
|
210 |
return cycles.values() |
|
211 |
||
212 |
def _cycle_sortkey(c): |
|
213 |
return len(c), c |
|
214 |
||
215 |
def main(argv): |
|
216 |
if len(argv) < 2: |
|
217 |
print 'Usage: %s file [file] [file] ...' |
|
218 |
return 1 |
|
219 |
used_imports = {} |
|
220 |
any_errors = False |
|
221 |
for source_path in argv[1:]: |
|
222 |
f = open(source_path) |
|
223 |
modname = dotted_name_of_path(source_path) |
|
224 |
src = f.read() |
|
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
225 |
used_imports[modname] = sorted( |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
226 |
imported_modules(src, ignore_nested=True)) |
20036 | 227 |
for error in verify_stdlib_on_own_line(src): |
228 |
any_errors = True |
|
229 |
print source_path, error |
|
230 |
f.close() |
|
231 |
cycles = find_cycles(used_imports) |
|
232 |
if cycles: |
|
233 |
firstmods = set() |
|
234 |
for c in sorted(cycles, key=_cycle_sortkey): |
|
235 |
first = c.split()[0] |
|
236 |
# As a rough cut, ignore any cycle that starts with the |
|
237 |
# same module as some other cycle. Otherwise we see lots |
|
238 |
# of cycles that are effectively duplicates. |
|
239 |
if first in firstmods: |
|
240 |
continue |
|
241 |
print 'Import cycle:', c |
|
242 |
firstmods.add(first) |
|
243 |
any_errors = True |
|
244 |
return not any_errors |
|
245 |
||
246 |
if __name__ == '__main__': |
|
247 |
sys.exit(int(main(sys.argv))) |