import-checker: make imported_modules yield absolute dotted_name_of_path
authorFUJIWARA Katsunori <foozy@lares.dti.ne.jp>
Mon, 18 May 2015 02:52:55 +0900
changeset 25174 86298718b01c
parent 25173 7358b5d9991e
child 25175 10e6c4b7121b
import-checker: make imported_modules yield absolute dotted_name_of_path This patch makes `imported_modules()` always yield absolute `dotted_name_of_path()`-ed name by strict detection with `fromlocal()`. This change improves circular detection in some points: - locally defined modules, of which name collides against one of standard library, can be examined correctly For example, circular import related to `commands` is overlooked before this patch. - names not useful for circular detection are ignored Names below are also yielded before this patch: - module names of standard library (= not locally defined one) - non-module names (e.g. `node.nullid` of `from node import nullid`) These redundant names decrease performance of circular detection. For example, with files at 1ef96a3b8b89, average loops per file in `checkmod()` is reduced from 165 to 109. - `__init__` can be handled correctly in `checkmod()` For example, current implementation has problems below: - `from xxx import yyy` doesn't recognize `xxx.__init__` as imported - `xxx.__init__` imported via `import xxx` is treated as `xxx`, and circular detection is aborted, because `key` of such module name is not `xxx` but `xxx.__init__` - it is easy to enhance for `from . import xxx` style or so (in the future) Module name detection in `imported_modules()` can use information in `ast.ImportFrom` fully. It is assumed that all locally defined modules are correctly specified to `import-checker.py` at once. Strictly speaking, when `from foo.bar.baz import module1` imports `foo.bar.baz.module1` module, current `imported_modules()` yields only `foo.bar.baz.__init__`, even though also `foo.__init__` and `foo.bar.__init__` should be yielded to detect circular import exactly. But this limitation is reasonable one for improvement in this patch, because current `__init__` files in Mercurial seems to be implemented carefully.
contrib/import-checker.py
tests/test-module-imports.t
--- a/contrib/import-checker.py	Mon May 18 02:50:22 2015 +0900
+++ b/contrib/import-checker.py	Mon May 18 02:52:55 2015 +0900
@@ -170,38 +170,94 @@
 
 stdlib_modules = set(list_stdlib_modules())
 
-def imported_modules(source, ignore_nested=False):
+def imported_modules(source, modulename, localmods, ignore_nested=False):
     """Given the source of a file as a string, yield the names
     imported by that file.
 
     Args:
       source: The python source to examine as a string.
+      modulename: of specified python source (may have `__init__`)
+      localmods: dict of locally defined module names (may have `__init__`)
       ignore_nested: If true, import statements that do not start in
                      column zero will be ignored.
 
     Returns:
-      A list of module names imported by the given source.
+      A list of absolute module names imported by the given source.
 
+    >>> modulename = 'foo.xxx'
+    >>> localmods = {'foo.__init__': True,
+    ...              'foo.foo1': True, 'foo.foo2': True,
+    ...              'foo.bar.__init__': True, 'foo.bar.bar1': True,
+    ...              'baz.__init__': True, 'baz.baz1': True }
+    >>> # standard library (= not locally defined ones)
+    >>> sorted(imported_modules(
+    ...        'from stdlib1 import foo, bar; import stdlib2',
+    ...        modulename, localmods))
+    []
+    >>> # relative importing
     >>> sorted(imported_modules(
-    ...         'import foo ; from baz import bar; import foo.qux'))
-    ['baz.bar', 'foo', 'foo.qux']
+    ...        'import foo1; from bar import bar1',
+    ...        modulename, localmods))
+    ['foo.bar.__init__', 'foo.bar.bar1', 'foo.foo1']
+    >>> sorted(imported_modules(
+    ...        'from bar.bar1 import name1, name2, name3',
+    ...        modulename, localmods))
+    ['foo.bar.bar1']
+    >>> # absolute importing
+    >>> sorted(imported_modules(
+    ...        'from baz import baz1, name1',
+    ...        modulename, localmods))
+    ['baz.__init__', 'baz.baz1']
+    >>> # mixed importing, even though it shouldn't be recommended
+    >>> sorted(imported_modules(
+    ...        'import stdlib, foo1, baz',
+    ...        modulename, localmods))
+    ['baz.__init__', 'foo.foo1']
+    >>> # ignore_nested
     >>> sorted(imported_modules(
     ... '''import foo
     ... def wat():
     ...     import bar
-    ... ''', ignore_nested=True))
-    ['foo']
+    ... ''', modulename, localmods))
+    ['foo.__init__', 'foo.bar.__init__']
+    >>> sorted(imported_modules(
+    ... '''import foo
+    ... def wat():
+    ...     import bar
+    ... ''', modulename, localmods, ignore_nested=True))
+    ['foo.__init__']
     """
+    fromlocal = fromlocalfunc(modulename, localmods)
     for node in ast.walk(ast.parse(source)):
         if ignore_nested and getattr(node, 'col_offset', 0) > 0:
             continue
         if isinstance(node, ast.Import):
             for n in node.names:
-                yield n.name
+                found = fromlocal(n.name)
+                if not found:
+                    # this should import standard library
+                    continue
+                yield found[1]
         elif isinstance(node, ast.ImportFrom):
-            prefix = node.module + '.'
+            found = fromlocal(node.module)
+            if not found:
+                # this should import standard library
+                continue
+
+            absname, dottedpath, hassubmod = found
+            yield dottedpath
+            if not hassubmod:
+                # examination of "node.names" should be redundant
+                # e.g.: from mercurial.node import nullid, nullrev
+                continue
+
+            prefix = absname + '.'
             for n in node.names:
-                yield prefix + n.name
+                found = fromlocal(prefix + n.name)
+                if not found:
+                    # this should be a function or a property of "node.module"
+                    continue
+                yield found[1]
 
 def verify_stdlib_on_own_line(source):
     """Given some python source, verify that stdlib imports are done
@@ -297,7 +353,7 @@
         f = open(source_path)
         src = f.read()
         used_imports[modname] = sorted(
-            imported_modules(src, ignore_nested=True))
+            imported_modules(src, modname, localmods, ignore_nested=True))
         for error in verify_stdlib_on_own_line(src):
             any_errors = True
             print source_path, error
--- a/tests/test-module-imports.t	Mon May 18 02:50:22 2015 +0900
+++ b/tests/test-module-imports.t	Mon May 18 02:52:55 2015 +0900
@@ -37,3 +37,4 @@
      stdlib:    formatter
      relative:  config, error, scmutil, util
   Import cycle: mercurial.cmdutil -> mercurial.context -> mercurial.subrepo -> mercurial.cmdutil
+  Import cycle: mercurial.commands -> mercurial.commandserver -> mercurial.dispatch -> mercurial.commands