Mercurial > hg
annotate contrib/import-checker.py @ 20038:c65a6937b828
import-checker: try a little harder to show fewer cycles
This makes sure that all cycles begin with the lexicographically first
module, so that we're less likely to show overlapping cycles in the
final analysis.
author | Augie Fackler <raf@durin42.com> |
---|---|
date | Sun, 17 Nov 2013 13:33:20 -0500 |
parents | 957b43371928 |
children | 761f2929a6ad |
rev | line source |
---|---|
20036 | 1 import ast |
2 import os | |
3 import sys | |
4 | |
5 def dotted_name_of_path(path): | |
6 """Given a relative path to a source file, return its dotted module name. | |
7 | |
8 | |
9 >>> dotted_name_of_path('mercurial/error.py') | |
10 'mercurial.error' | |
11 """ | |
12 parts = path.split('/') | |
13 parts[-1] = parts[-1][:-3] # remove .py | |
14 return '.'.join(parts) | |
15 | |
16 | |
17 def list_stdlib_modules(): | |
18 """List the modules present in the stdlib. | |
19 | |
20 >>> mods = set(list_stdlib_modules()) | |
21 >>> 'BaseHTTPServer' in mods | |
22 True | |
23 | |
24 os.path isn't really a module, so it's missing: | |
25 | |
26 >>> 'os.path' in mods | |
27 False | |
28 | |
29 sys requires special treatment, because it's baked into the | |
30 interpreter, but it should still appear: | |
31 | |
32 >>> 'sys' in mods | |
33 True | |
34 | |
35 >>> 'collections' in mods | |
36 True | |
37 | |
38 >>> 'cStringIO' in mods | |
39 True | |
40 """ | |
41 for m in sys.builtin_module_names: | |
42 yield m | |
43 # These modules only exist on windows, but we should always | |
44 # consider them stdlib. | |
45 for m in ['msvcrt', '_winreg']: | |
46 yield m | |
47 # These get missed too | |
48 for m in 'ctypes', 'email': | |
49 yield m | |
50 yield 'builtins' # python3 only | |
51 for libpath in sys.path: | |
52 # We want to walk everything in sys.path that starts with | |
53 # either sys.prefix or sys.exec_prefix. | |
54 if not (libpath.startswith(sys.prefix) | |
55 or libpath.startswith(sys.exec_prefix)): | |
56 continue | |
57 if 'site-packages' in libpath: | |
58 continue | |
59 for top, dirs, files in os.walk(libpath): | |
60 for name in files: | |
61 if name == '__init__.py': | |
62 continue | |
63 if not (name.endswith('.py') or name.endswith('.so')): | |
64 continue | |
65 full_path = os.path.join(top, name) | |
66 if 'site-packages' in full_path: | |
67 continue | |
68 rel_path = full_path[len(libpath) + 1:] | |
69 mod = dotted_name_of_path(rel_path) | |
70 yield mod | |
71 | |
72 stdlib_modules = set(list_stdlib_modules()) | |
73 | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
74 def imported_modules(source, ignore_nested=False): |
20036 | 75 """Given the source of a file as a string, yield the names |
76 imported by that file. | |
77 | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
78 Args: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
79 source: The python source to examine as a string. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
80 ignore_nested: If true, import statements that do not start in |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
81 column zero will be ignored. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
82 |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
83 Returns: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
84 A list of module names imported by the given source. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
85 |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
86 >>> sorted(imported_modules( |
20036 | 87 ... 'import foo ; from baz import bar; import foo.qux')) |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
88 ['baz.bar', 'foo', 'foo.qux'] |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
89 >>> sorted(imported_modules( |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
90 ... '''import foo |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
91 ... def wat(): |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
92 ... import bar |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
93 ... ''', ignore_nested=True)) |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
94 ['foo'] |
20036 | 95 """ |
96 for node in ast.walk(ast.parse(source)): | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
97 if ignore_nested and getattr(node, 'col_offset', 0) > 0: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
98 continue |
20036 | 99 if isinstance(node, ast.Import): |
100 for n in node.names: | |
101 yield n.name | |
102 elif isinstance(node, ast.ImportFrom): | |
103 prefix = node.module + '.' | |
104 for n in node.names: | |
105 yield prefix + n.name | |
106 | |
107 def verify_stdlib_on_own_line(source): | |
108 """Given some python source, verify that stdlib imports are done | |
109 in separate statements from relative local module imports. | |
110 | |
111 Observing this limitation is important as it works around an | |
112 annoying lib2to3 bug in relative import rewrites: | |
113 http://bugs.python.org/issue19510. | |
114 | |
115 >>> list(verify_stdlib_on_own_line('import sys, foo')) | |
116 ['mixed stdlib and relative imports:\\n foo, sys'] | |
117 >>> list(verify_stdlib_on_own_line('import sys, os')) | |
118 [] | |
119 >>> list(verify_stdlib_on_own_line('import foo, bar')) | |
120 [] | |
121 """ | |
122 for node in ast.walk(ast.parse(source)): | |
123 if isinstance(node, ast.Import): | |
124 from_stdlib = {} | |
125 for n in node.names: | |
126 from_stdlib[n.name] = n.name in stdlib_modules | |
127 num_std = len([x for x in from_stdlib.values() if x]) | |
128 if num_std not in (len(from_stdlib.values()), 0): | |
129 yield ('mixed stdlib and relative imports:\n %s' % | |
130 ', '.join(sorted(from_stdlib.iterkeys()))) | |
131 | |
132 class CircularImport(Exception): | |
133 pass | |
134 | |
135 | |
136 def cyclekey(names): | |
137 return tuple(sorted(set(names))) | |
138 | |
139 def check_one_mod(mod, imports, path=None, ignore=None): | |
140 if path is None: | |
141 path = [] | |
142 if ignore is None: | |
143 ignore = [] | |
144 path = path + [mod] | |
145 for i in sorted(imports.get(mod, [])): | |
146 if i not in stdlib_modules: | |
147 i = mod.rsplit('.', 1)[0] + '.' + i | |
148 if i in path: | |
149 firstspot = path.index(i) | |
150 cycle = path[firstspot:] + [i] | |
151 if cyclekey(cycle) not in ignore: | |
152 raise CircularImport(cycle) | |
153 continue | |
154 check_one_mod(i, imports, path=path, ignore=ignore) | |
155 | |
20038
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
156 def rotatecycle(cycle): |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
157 """arrange a cycle so that the lexicographically first module listed first |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
158 |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
159 >>> rotatecycle(['foo', 'bar', 'foo']) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
160 ['bar', 'foo', 'bar'] |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
161 """ |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
162 lowest = min(cycle) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
163 idx = cycle.index(lowest) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
164 return cycle[idx:] + cycle[1:idx] + [lowest] |
20036 | 165 |
166 def find_cycles(imports): | |
167 """Find cycles in an already-loaded import graph. | |
168 | |
169 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'], | |
170 ... 'top.bar': ['baz', 'sys'], | |
171 ... 'top.baz': ['foo'], | |
172 ... 'top.qux': ['foo']} | |
173 >>> print '\\n'.join(sorted(find_cycles(imports))) | |
20038
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
174 top.bar -> top.baz -> top.foo -> top.bar -> top.bar |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
175 top.foo -> top.qux -> top.foo -> top.foo |
20036 | 176 """ |
177 cycles = {} | |
178 for mod in sorted(imports.iterkeys()): | |
179 try: | |
180 check_one_mod(mod, imports, ignore=cycles) | |
181 except CircularImport, e: | |
182 cycle = e.args[0] | |
183 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle)) | |
184 return cycles.values() | |
185 | |
186 def _cycle_sortkey(c): | |
187 return len(c), c | |
188 | |
189 def main(argv): | |
190 if len(argv) < 2: | |
191 print 'Usage: %s file [file] [file] ...' | |
192 return 1 | |
193 used_imports = {} | |
194 any_errors = False | |
195 for source_path in argv[1:]: | |
196 f = open(source_path) | |
197 modname = dotted_name_of_path(source_path) | |
198 src = f.read() | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
199 used_imports[modname] = sorted( |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
200 imported_modules(src, ignore_nested=True)) |
20036 | 201 for error in verify_stdlib_on_own_line(src): |
202 any_errors = True | |
203 print source_path, error | |
204 f.close() | |
205 cycles = find_cycles(used_imports) | |
206 if cycles: | |
207 firstmods = set() | |
208 for c in sorted(cycles, key=_cycle_sortkey): | |
209 first = c.split()[0] | |
210 # As a rough cut, ignore any cycle that starts with the | |
211 # same module as some other cycle. Otherwise we see lots | |
212 # of cycles that are effectively duplicates. | |
213 if first in firstmods: | |
214 continue | |
215 print 'Import cycle:', c | |
216 firstmods.add(first) | |
217 any_errors = True | |
218 return not any_errors | |
219 | |
220 if __name__ == '__main__': | |
221 sys.exit(int(main(sys.argv))) |