Mercurial > hg
annotate contrib/import-checker.py @ 23642:7fd1a6c27e60
largefiles: don't duplicate 'actions' into 'actionbyfile'
author | Martin von Zweigbergk <martinvonz@google.com> |
---|---|
date | Thu, 11 Dec 2014 22:51:29 -0800 |
parents | 461342e1c8aa |
children | 642d245ff537 |
rev | line source |
---|---|
20036 | 1 import ast |
2 import os | |
3 import sys | |
4 | |
20198
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules() |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
6 # to work when run from a virtualenv. The modules were chosen empirically |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
7 # so that the return value matches the return value without virtualenv. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
8 import BaseHTTPServer |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
9 import zlib |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
10 |
22975
461342e1c8aa
import-checker: check modules for pure Python build correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
22974
diff
changeset
|
11 def dotted_name_of_path(path, trimpure=False): |
20036 | 12 """Given a relative path to a source file, return its dotted module name. |
13 | |
14 >>> dotted_name_of_path('mercurial/error.py') | |
15 'mercurial.error' | |
22975
461342e1c8aa
import-checker: check modules for pure Python build correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
22974
diff
changeset
|
16 >>> dotted_name_of_path('mercurial/pure/parsers.py', trimpure=True) |
461342e1c8aa
import-checker: check modules for pure Python build correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
22974
diff
changeset
|
17 'mercurial.parsers' |
20383
4990abb4729d
import-checker: fix names of dynamically loaded modules
Mads Kiilerich <madski@unity3d.com>
parents:
20238
diff
changeset
|
18 >>> dotted_name_of_path('zlibmodule.so') |
4990abb4729d
import-checker: fix names of dynamically loaded modules
Mads Kiilerich <madski@unity3d.com>
parents:
20238
diff
changeset
|
19 'zlib' |
20036 | 20 """ |
21 parts = path.split('/') | |
20391
466e4c574db0
import-checker: handle standard modules with arch in the filename
Mads Kiilerich <madski@unity3d.com>
parents:
20386
diff
changeset
|
22 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so |
20383
4990abb4729d
import-checker: fix names of dynamically loaded modules
Mads Kiilerich <madski@unity3d.com>
parents:
20238
diff
changeset
|
23 if parts[-1].endswith('module'): |
4990abb4729d
import-checker: fix names of dynamically loaded modules
Mads Kiilerich <madski@unity3d.com>
parents:
20238
diff
changeset
|
24 parts[-1] = parts[-1][:-6] |
22975
461342e1c8aa
import-checker: check modules for pure Python build correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
22974
diff
changeset
|
25 if trimpure: |
461342e1c8aa
import-checker: check modules for pure Python build correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
22974
diff
changeset
|
26 return '.'.join(p for p in parts if p != 'pure') |
20036 | 27 return '.'.join(parts) |
28 | |
29 | |
30 def list_stdlib_modules(): | |
31 """List the modules present in the stdlib. | |
32 | |
33 >>> mods = set(list_stdlib_modules()) | |
34 >>> 'BaseHTTPServer' in mods | |
35 True | |
36 | |
37 os.path isn't really a module, so it's missing: | |
38 | |
39 >>> 'os.path' in mods | |
40 False | |
41 | |
42 sys requires special treatment, because it's baked into the | |
43 interpreter, but it should still appear: | |
44 | |
45 >>> 'sys' in mods | |
46 True | |
47 | |
48 >>> 'collections' in mods | |
49 True | |
50 | |
51 >>> 'cStringIO' in mods | |
52 True | |
53 """ | |
54 for m in sys.builtin_module_names: | |
55 yield m | |
56 # These modules only exist on windows, but we should always | |
57 # consider them stdlib. | |
58 for m in ['msvcrt', '_winreg']: | |
59 yield m | |
60 # These get missed too | |
61 for m in 'ctypes', 'email': | |
62 yield m | |
63 yield 'builtins' # python3 only | |
20197
761f2929a6ad
import-checker: refactor sys.path prefix check (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20038
diff
changeset
|
64 stdlib_prefixes = set([sys.prefix, sys.exec_prefix]) |
20198
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
65 # We need to supplement the list of prefixes for the search to work |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
66 # when run from within a virtualenv. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
67 for mod in (BaseHTTPServer, zlib): |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
68 try: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
69 # Not all module objects have a __file__ attribute. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
70 filename = mod.__file__ |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
71 except AttributeError: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
72 continue |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
73 dirname = os.path.dirname(filename) |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
74 for prefix in stdlib_prefixes: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
75 if dirname.startswith(prefix): |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
76 # Then this directory is redundant. |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
77 break |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
78 else: |
f5393a9dc4e5
import-checker: make test-module-imports.t work using virtualenv (issue4129)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20197
diff
changeset
|
79 stdlib_prefixes.add(dirname) |
20036 | 80 for libpath in sys.path: |
20201
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
81 # We want to walk everything in sys.path that starts with |
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
82 # something in stdlib_prefixes. check-code suppressed because |
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
83 # the ast module used by this script implies the availability |
bc3b48b0f5c8
import-checker: suppress check-code about any()
Augie Fackler <raf@durin42.com>
parents:
20200
diff
changeset
|
84 # of any(). |
20238
81e905790b30
check-code: do not skip entire file, skip only one match instead
Simon Heimberg <simohe@besonet.ch>
parents:
20201
diff
changeset
|
85 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24 |
20036 | 86 continue |
87 if 'site-packages' in libpath: | |
88 continue | |
89 for top, dirs, files in os.walk(libpath): | |
90 for name in files: | |
91 if name == '__init__.py': | |
92 continue | |
93 if not (name.endswith('.py') or name.endswith('.so')): | |
94 continue | |
95 full_path = os.path.join(top, name) | |
96 if 'site-packages' in full_path: | |
97 continue | |
98 rel_path = full_path[len(libpath) + 1:] | |
99 mod = dotted_name_of_path(rel_path) | |
100 yield mod | |
101 | |
102 stdlib_modules = set(list_stdlib_modules()) | |
103 | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
104 def imported_modules(source, ignore_nested=False): |
20036 | 105 """Given the source of a file as a string, yield the names |
106 imported by that file. | |
107 | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
108 Args: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
109 source: The python source to examine as a string. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
110 ignore_nested: If true, import statements that do not start in |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
111 column zero will be ignored. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
112 |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
113 Returns: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
114 A list of module names imported by the given source. |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
115 |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
116 >>> sorted(imported_modules( |
20036 | 117 ... 'import foo ; from baz import bar; import foo.qux')) |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
118 ['baz.bar', 'foo', 'foo.qux'] |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
119 >>> sorted(imported_modules( |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
120 ... '''import foo |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
121 ... def wat(): |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
122 ... import bar |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
123 ... ''', ignore_nested=True)) |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
124 ['foo'] |
20036 | 125 """ |
126 for node in ast.walk(ast.parse(source)): | |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
127 if ignore_nested and getattr(node, 'col_offset', 0) > 0: |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
128 continue |
20036 | 129 if isinstance(node, ast.Import): |
130 for n in node.names: | |
131 yield n.name | |
132 elif isinstance(node, ast.ImportFrom): | |
133 prefix = node.module + '.' | |
134 for n in node.names: | |
135 yield prefix + n.name | |
136 | |
137 def verify_stdlib_on_own_line(source): | |
138 """Given some python source, verify that stdlib imports are done | |
139 in separate statements from relative local module imports. | |
140 | |
141 Observing this limitation is important as it works around an | |
142 annoying lib2to3 bug in relative import rewrites: | |
143 http://bugs.python.org/issue19510. | |
144 | |
145 >>> list(verify_stdlib_on_own_line('import sys, foo')) | |
20386
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
146 ['mixed imports\\n stdlib: sys\\n relative: foo'] |
20036 | 147 >>> list(verify_stdlib_on_own_line('import sys, os')) |
148 [] | |
149 >>> list(verify_stdlib_on_own_line('import foo, bar')) | |
150 [] | |
151 """ | |
152 for node in ast.walk(ast.parse(source)): | |
153 if isinstance(node, ast.Import): | |
20386
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
154 from_stdlib = {False: [], True: []} |
20036 | 155 for n in node.names: |
20386
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
156 from_stdlib[n.name in stdlib_modules].append(n.name) |
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
157 if from_stdlib[True] and from_stdlib[False]: |
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
158 yield ('mixed imports\n stdlib: %s\n relative: %s' % |
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
159 (', '.join(sorted(from_stdlib[True])), |
a05d31b040d7
import-checker: show stdlib and relative imports separately
Mads Kiilerich <madski@unity3d.com>
parents:
20383
diff
changeset
|
160 ', '.join(sorted(from_stdlib[False])))) |
20036 | 161 |
162 class CircularImport(Exception): | |
163 pass | |
164 | |
165 | |
166 def cyclekey(names): | |
167 return tuple(sorted(set(names))) | |
168 | |
169 def check_one_mod(mod, imports, path=None, ignore=None): | |
170 if path is None: | |
171 path = [] | |
172 if ignore is None: | |
173 ignore = [] | |
174 path = path + [mod] | |
175 for i in sorted(imports.get(mod, [])): | |
22974
6bd43614d387
import-checker: treat "from mercurial import XXXX" style correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
20391
diff
changeset
|
176 if i not in stdlib_modules and not i.startswith('mercurial.'): |
20036 | 177 i = mod.rsplit('.', 1)[0] + '.' + i |
178 if i in path: | |
179 firstspot = path.index(i) | |
180 cycle = path[firstspot:] + [i] | |
181 if cyclekey(cycle) not in ignore: | |
182 raise CircularImport(cycle) | |
183 continue | |
184 check_one_mod(i, imports, path=path, ignore=ignore) | |
185 | |
20038
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
186 def rotatecycle(cycle): |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
187 """arrange a cycle so that the lexicographically first module listed first |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
188 |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
189 >>> rotatecycle(['foo', 'bar', 'foo']) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
190 ['bar', 'foo', 'bar'] |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
191 """ |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
192 lowest = min(cycle) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
193 idx = cycle.index(lowest) |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
194 return cycle[idx:] + cycle[1:idx] + [lowest] |
20036 | 195 |
196 def find_cycles(imports): | |
197 """Find cycles in an already-loaded import graph. | |
198 | |
199 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'], | |
200 ... 'top.bar': ['baz', 'sys'], | |
201 ... 'top.baz': ['foo'], | |
202 ... 'top.qux': ['foo']} | |
203 >>> print '\\n'.join(sorted(find_cycles(imports))) | |
20038
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
204 top.bar -> top.baz -> top.foo -> top.bar -> top.bar |
c65a6937b828
import-checker: try a little harder to show fewer cycles
Augie Fackler <raf@durin42.com>
parents:
20037
diff
changeset
|
205 top.foo -> top.qux -> top.foo -> top.foo |
20036 | 206 """ |
207 cycles = {} | |
208 for mod in sorted(imports.iterkeys()): | |
209 try: | |
210 check_one_mod(mod, imports, ignore=cycles) | |
211 except CircularImport, e: | |
212 cycle = e.args[0] | |
213 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle)) | |
214 return cycles.values() | |
215 | |
216 def _cycle_sortkey(c): | |
217 return len(c), c | |
218 | |
219 def main(argv): | |
220 if len(argv) < 2: | |
221 print 'Usage: %s file [file] [file] ...' | |
222 return 1 | |
223 used_imports = {} | |
224 any_errors = False | |
225 for source_path in argv[1:]: | |
226 f = open(source_path) | |
22975
461342e1c8aa
import-checker: check modules for pure Python build correctly
FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
parents:
22974
diff
changeset
|
227 modname = dotted_name_of_path(source_path, trimpure=True) |
20036 | 228 src = f.read() |
20037
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
229 used_imports[modname] = sorted( |
957b43371928
import-checker: ignore nested imports
Augie Fackler <raf@durin42.com>
parents:
20036
diff
changeset
|
230 imported_modules(src, ignore_nested=True)) |
20036 | 231 for error in verify_stdlib_on_own_line(src): |
232 any_errors = True | |
233 print source_path, error | |
234 f.close() | |
235 cycles = find_cycles(used_imports) | |
236 if cycles: | |
237 firstmods = set() | |
238 for c in sorted(cycles, key=_cycle_sortkey): | |
239 first = c.split()[0] | |
240 # As a rough cut, ignore any cycle that starts with the | |
241 # same module as some other cycle. Otherwise we see lots | |
242 # of cycles that are effectively duplicates. | |
243 if first in firstmods: | |
244 continue | |
245 print 'Import cycle:', c | |
246 firstmods.add(first) | |
247 any_errors = True | |
248 return not any_errors | |
249 | |
250 if __name__ == '__main__': | |
251 sys.exit(int(main(sys.argv))) |