Mercurial > hg
annotate contrib/import-checker.py @ 20301:4988e4246537
record: re-enable whitespace-ignoring options
It looks like somewhere down the line, patch.diffopts changed the
names of the options that it recognises, but record.recordfunc wasn't
updated to the new names. Instead of trying to write down names at
all, we now use whatever names are provided in commands.diffwsopts and
pass that along to patch.diffopts, along with a couple of custom
options
author | Jordi Gutiérrez Hermoso <jordigh@octave.org> |
---|---|
date | Thu, 16 Jan 2014 15:05:03 -0500 |
parents | 81e905790b30 |
children | 4990abb4729d |
rev | line source |
---|---|
20036 | 1 import ast |
2 import os | |
3 import sys | |
4 | |
20198
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules() |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
6 # to work when run from a virtualenv. The modules were chosen empirically |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
7 # so that the return value matches the return value without virtualenv. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
8 import BaseHTTPServer |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
9 import zlib |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
10 |
20036 | 11 def dotted_name_of_path(path): |
12 """Given a relative path to a source file, return its dotted module name. | |
13 | |
14 | |
15 >>> dotted_name_of_path('mercurial/error.py') | |
16 'mercurial.error' | |
17 """ | |
18 parts = path.split('/') | |
19 parts[-1] = parts[-1][:-3] # remove .py | |
20 return '.'.join(parts) | |
21 | |
22 | |
23 def list_stdlib_modules(): | |
24 """List the modules present in the stdlib. | |
25 | |
26 >>> mods = set(list_stdlib_modules()) | |
27 >>> 'BaseHTTPServer' in mods | |
28 True | |
29 | |
30 os.path isn't really a module, so it's missing: | |
31 | |
32 >>> 'os.path' in mods | |
33 False | |
34 | |
35 sys requires special treatment, because it's baked into the | |
36 interpreter, but it should still appear: | |
37 | |
38 >>> 'sys' in mods | |
39 True | |
40 | |
41 >>> 'collections' in mods | |
42 True | |
43 | |
44 >>> 'cStringIO' in mods | |
45 True | |
46 """ | |
47 for m in sys.builtin_module_names: | |
48 yield m | |
49 # These modules only exist on windows, but we should always | |
50 # consider them stdlib. | |
51 for m in ['msvcrt', '_winreg']: | |
52 yield m | |
53 # These get missed too | |
54 for m in 'ctypes', 'email': | |
55 yield m | |
56 yield 'builtins' # python3 only | |
20197
761f2929a6ad
import-checker: refactor sys.path prefix check (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20038
diff
changeset
|
57 stdlib_prefixes = set([sys.prefix, sys.exec_prefix]) |
20198
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
58 # We need to supplement the list of prefixes for the search to work |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
59 # when run from within a virtualenv. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
60 for mod in (BaseHTTPServer, zlib): |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
61 try: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
62 # Not all module objects have a __file__ attribute. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
63 filename = mod.__file__ |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
64 except AttributeError: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
65 continue |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
66 dirname = os.path.dirname(filename) |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
67 for prefix in stdlib_prefixes: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
68 if dirname.startswith(prefix): |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
69 # Then this directory is redundant. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
70 break |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
71 else: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
72 stdlib_prefixes.add(dirname) |
20036 | 73 for libpath in sys.path: |
20201
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
74 # We want to walk everything in sys.path that starts with |
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
75 # something in stdlib_prefixes. check-code suppressed because |
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
76 # the ast module used by this script implies the availability |
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
77 # of any(). |
20238
81e905790b30
check-code: do not skip entire file, skip only one match instead
Simon Heimberg <simohe@besonet.ch>
parents:
20201
diff
changeset
|
78 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24 |
20036 | 79 continue |
80 if 'site-packages' in libpath: | |
81 continue | |
82 for top, dirs, files in os.walk(libpath): | |
83 for name in files: | |
84 if name == '__init__.py': | |
85 continue | |
86 if not (name.endswith('.py') or name.endswith('.so')): | |
87 continue | |
88 full_path = os.path.join(top, name) | |
89 if 'site-packages' in full_path: | |
90 continue | |
91 rel_path = full_path[len(libpath) + 1:] | |
92 mod = dotted_name_of_path(rel_path) | |
93 yield mod | |
94 | |
95 stdlib_modules = set(list_stdlib_modules()) | |
96 | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
97 def imported_modules(source, ignore_nested=False): |
20036 | 98 """Given the source of a file as a string, yield the names |
99 imported by that file. | |
100 | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
101 Args: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
102 source: The python source to examine as a string. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
103 ignore_nested: If true, import statements that do not start in |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
104 column zero will be ignored. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
105 |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
106 Returns: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
107 A list of module names imported by the given source. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
108 |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
109 >>> sorted(imported_modules( |
20036 | 110 ... 'import foo ; from baz import bar; import foo.qux')) |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
111 ['baz.bar', 'foo', 'foo.qux'] |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
112 >>> sorted(imported_modules( |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
113 ... '''import foo |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
114 ... def wat(): |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
115 ... import bar |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
116 ... ''', ignore_nested=True)) |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
117 ['foo'] |
20036 | 118 """ |
119 for node in ast.walk(ast.parse(source)): | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
120 if ignore_nested and getattr(node, 'col_offset', 0) > 0: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
121 continue |
20036 | 122 if isinstance(node, ast.Import): |
123 for n in node.names: | |
124 yield n.name | |
125 elif isinstance(node, ast.ImportFrom): | |
126 prefix = node.module + '.' | |
127 for n in node.names: | |
128 yield prefix + n.name | |
129 | |
130 def verify_stdlib_on_own_line(source): | |
131 """Given some python source, verify that stdlib imports are done | |
132 in separate statements from relative local module imports. | |
133 | |
134 Observing this limitation is important as it works around an | |
135 annoying lib2to3 bug in relative import rewrites: | |
136 http://bugs.python.org/issue19510. | |
137 | |
138 >>> list(verify_stdlib_on_own_line('import sys, foo')) | |
139 ['mixed stdlib and relative imports:\\n foo, sys'] | |
140 >>> list(verify_stdlib_on_own_line('import sys, os')) | |
141 [] | |
142 >>> list(verify_stdlib_on_own_line('import foo, bar')) | |
143 [] | |
144 """ | |
145 for node in ast.walk(ast.parse(source)): | |
146 if isinstance(node, ast.Import): | |
147 from_stdlib = {} | |
148 for n in node.names: | |
149 from_stdlib[n.name] = n.name in stdlib_modules | |
150 num_std = len([x for x in from_stdlib.values() if x]) | |
151 if num_std not in (len(from_stdlib.values()), 0): | |
152 yield ('mixed stdlib and relative imports:\n %s' % | |
153 ', '.join(sorted(from_stdlib.iterkeys()))) | |
154 | |
155 class CircularImport(Exception): | |
156 pass | |
157 | |
158 | |
159 def cyclekey(names): | |
160 return tuple(sorted(set(names))) | |
161 | |
162 def check_one_mod(mod, imports, path=None, ignore=None): | |
163 if path is None: | |
164 path = [] | |
165 if ignore is None: | |
166 ignore = [] | |
167 path = path + [mod] | |
168 for i in sorted(imports.get(mod, [])): | |
169 if i not in stdlib_modules: | |
170 i = mod.rsplit('.', 1)[0] + '.' + i | |
171 if i in path: | |
172 firstspot = path.index(i) | |
173 cycle = path[firstspot:] + [i] | |
174 if cyclekey(cycle) not in ignore: | |
175 raise CircularImport(cycle) | |
176 continue | |
177 check_one_mod(i, imports, path=path, ignore=ignore) | |
178 | |
20038
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
179 def rotatecycle(cycle): |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
180 """arrange a cycle so that the lexicographically first module listed first |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
181 |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
182 >>> rotatecycle(['foo', 'bar', 'foo']) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
183 ['bar', 'foo', 'bar'] |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
184 """ |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
185 lowest = min(cycle) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
186 idx = cycle.index(lowest) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
187 return cycle[idx:] + cycle[1:idx] + [lowest] |
20036 | 188 |
189 def find_cycles(imports): | |
190 """Find cycles in an already-loaded import graph. | |
191 | |
192 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'], | |
193 ... 'top.bar': ['baz', 'sys'], | |
194 ... 'top.baz': ['foo'], | |
195 ... 'top.qux': ['foo']} | |
196 >>> print '\\n'.join(sorted(find_cycles(imports))) | |
20038
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
197 top.bar -> top.baz -> top.foo -> top.bar -> top.bar |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
198 top.foo -> top.qux -> top.foo -> top.foo |
20036 | 199 """ |
200 cycles = {} | |
201 for mod in sorted(imports.iterkeys()): | |
202 try: | |
203 check_one_mod(mod, imports, ignore=cycles) | |
204 except CircularImport, e: | |
205 cycle = e.args[0] | |
206 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle)) | |
207 return cycles.values() | |
208 | |
209 def _cycle_sortkey(c): | |
210 return len(c), c | |
211 | |
212 def main(argv): | |
213 if len(argv) < 2: | |
214 print 'Usage: %s file [file] [file] ...' | |
215 return 1 | |
216 used_imports = {} | |
217 any_errors = False | |
218 for source_path in argv[1:]: | |
219 f = open(source_path) | |
220 modname = dotted_name_of_path(source_path) | |
221 src = f.read() | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
222 used_imports[modname] = sorted( |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
223 imported_modules(src, ignore_nested=True)) |
20036 | 224 for error in verify_stdlib_on_own_line(src): |
225 any_errors = True | |
226 print source_path, error | |
227 f.close() | |
228 cycles = find_cycles(used_imports) | |
229 if cycles: | |
230 firstmods = set() | |
231 for c in sorted(cycles, key=_cycle_sortkey): | |
232 first = c.split()[0] | |
233 # As a rough cut, ignore any cycle that starts with the | |
234 # same module as some other cycle. Otherwise we see lots | |
235 # of cycles that are effectively duplicates. | |
236 if first in firstmods: | |
237 continue | |
238 print 'Import cycle:', c | |
239 firstmods.add(first) | |
240 any_errors = True | |
241 return not any_errors | |
242 | |
243 if __name__ == '__main__': | |
244 sys.exit(int(main(sys.argv))) |