--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/import-checker.py Sun Nov 17 13:04:18 2013 -0500
@@ -0,0 +1,195 @@
+import ast
+import os
+import sys
+
+def dotted_name_of_path(path):
+ """Given a relative path to a source file, return its dotted module name.
+
+
+ >>> dotted_name_of_path('mercurial/error.py')
+ 'mercurial.error'
+ """
+ parts = path.split('/')
+ parts[-1] = parts[-1][:-3] # remove .py
+ return '.'.join(parts)
+
+
+def list_stdlib_modules():
+ """List the modules present in the stdlib.
+
+ >>> mods = set(list_stdlib_modules())
+ >>> 'BaseHTTPServer' in mods
+ True
+
+ os.path isn't really a module, so it's missing:
+
+ >>> 'os.path' in mods
+ False
+
+ sys requires special treatment, because it's baked into the
+ interpreter, but it should still appear:
+
+ >>> 'sys' in mods
+ True
+
+ >>> 'collections' in mods
+ True
+
+ >>> 'cStringIO' in mods
+ True
+ """
+ for m in sys.builtin_module_names:
+ yield m
+ # These modules only exist on windows, but we should always
+ # consider them stdlib.
+ for m in ['msvcrt', '_winreg']:
+ yield m
+ # These get missed too
+ for m in 'ctypes', 'email':
+ yield m
+ yield 'builtins' # python3 only
+ for libpath in sys.path:
+ # We want to walk everything in sys.path that starts with
+ # either sys.prefix or sys.exec_prefix.
+ if not (libpath.startswith(sys.prefix)
+ or libpath.startswith(sys.exec_prefix)):
+ continue
+ if 'site-packages' in libpath:
+ continue
+ for top, dirs, files in os.walk(libpath):
+ for name in files:
+ if name == '__init__.py':
+ continue
+ if not (name.endswith('.py') or name.endswith('.so')):
+ continue
+ full_path = os.path.join(top, name)
+ if 'site-packages' in full_path:
+ continue
+ rel_path = full_path[len(libpath) + 1:]
+ mod = dotted_name_of_path(rel_path)
+ yield mod
+
+stdlib_modules = set(list_stdlib_modules())
+
+def imported_modules(source):
+ """Given the source of a file as a string, yield the names
+ imported by that file.
+
+ >>> list(imported_modules(
+ ... 'import foo ; from baz import bar; import foo.qux'))
+ ['foo', 'baz.bar', 'foo.qux']
+ """
+ for node in ast.walk(ast.parse(source)):
+ if isinstance(node, ast.Import):
+ for n in node.names:
+ yield n.name
+ elif isinstance(node, ast.ImportFrom):
+ prefix = node.module + '.'
+ for n in node.names:
+ yield prefix + n.name
+
+def verify_stdlib_on_own_line(source):
+ """Given some python source, verify that stdlib imports are done
+ in separate statements from relative local module imports.
+
+ Observing this limitation is important as it works around an
+ annoying lib2to3 bug in relative import rewrites:
+ http://bugs.python.org/issue19510.
+
+ >>> list(verify_stdlib_on_own_line('import sys, foo'))
+ ['mixed stdlib and relative imports:\\n foo, sys']
+ >>> list(verify_stdlib_on_own_line('import sys, os'))
+ []
+ >>> list(verify_stdlib_on_own_line('import foo, bar'))
+ []
+ """
+ for node in ast.walk(ast.parse(source)):
+ if isinstance(node, ast.Import):
+ from_stdlib = {}
+ for n in node.names:
+ from_stdlib[n.name] = n.name in stdlib_modules
+ num_std = len([x for x in from_stdlib.values() if x])
+ if num_std not in (len(from_stdlib.values()), 0):
+ yield ('mixed stdlib and relative imports:\n %s' %
+ ', '.join(sorted(from_stdlib.iterkeys())))
+
+class CircularImport(Exception):
+ pass
+
+
+def cyclekey(names):
+ return tuple(sorted(set(names)))
+
+def check_one_mod(mod, imports, path=None, ignore=None):
+ if path is None:
+ path = []
+ if ignore is None:
+ ignore = []
+ path = path + [mod]
+ for i in sorted(imports.get(mod, [])):
+ if i not in stdlib_modules:
+ i = mod.rsplit('.', 1)[0] + '.' + i
+ if i in path:
+ firstspot = path.index(i)
+ cycle = path[firstspot:] + [i]
+ if cyclekey(cycle) not in ignore:
+ raise CircularImport(cycle)
+ continue
+ check_one_mod(i, imports, path=path, ignore=ignore)
+
+
+def find_cycles(imports):
+ """Find cycles in an already-loaded import graph.
+
+ >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
+ ... 'top.bar': ['baz', 'sys'],
+ ... 'top.baz': ['foo'],
+ ... 'top.qux': ['foo']}
+ >>> print '\\n'.join(sorted(find_cycles(imports)))
+ top.bar -> top.baz -> top.foo -> top.bar
+ top.foo -> top.qux -> top.foo
+ """
+ cycles = {}
+ for mod in sorted(imports.iterkeys()):
+ try:
+ check_one_mod(mod, imports, ignore=cycles)
+ except CircularImport, e:
+ cycle = e.args[0]
+ cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
+ return cycles.values()
+
+def _cycle_sortkey(c):
+ return len(c), c
+
+def main(argv):
+ if len(argv) < 2:
+ print 'Usage: %s file [file] [file] ...'
+ return 1
+ used_imports = {}
+ any_errors = False
+ for source_path in argv[1:]:
+ f = open(source_path)
+ modname = dotted_name_of_path(source_path)
+ src = f.read()
+ used_imports[modname] = sorted(imported_modules(src))
+ for error in verify_stdlib_on_own_line(src):
+ any_errors = True
+ print source_path, error
+ f.close()
+ cycles = find_cycles(used_imports)
+ if cycles:
+ firstmods = set()
+ for c in sorted(cycles, key=_cycle_sortkey):
+ first = c.split()[0]
+ # As a rough cut, ignore any cycle that starts with the
+ # same module as some other cycle. Otherwise we see lots
+ # of cycles that are effectively duplicates.
+ if first in firstmods:
+ continue
+ print 'Import cycle:', c
+ firstmods.add(first)
+ any_errors = True
+ return not any_errors
+
+if __name__ == '__main__':
+ sys.exit(int(main(sys.argv)))