comparison contrib/import-checker.py @ 25703:1a6a117d0b95

import-checker: establish modern import convention We introduce a new convention for declaring imports and enforce it via the import checker script. The new convention is only active when absolute imports are used, which is currently nowhere. Keying off "from __future__ import absolute_import" to engage the new import convention seems like the easiest solution. It is also beneficial for Mercurial to use this mode because it means less work and ambiguity for the importer and potentially better performance due to fewer stat() system calls because the importer won't look for modules in relative paths unless explicitly asked. Once all files are converted to use absolute import, we can refactor this code to again only have a single import convention and we can require use of absolute import in the style checker. The rules for the new convention are documented in the docstring of the added function. Tests have been added to test-module-imports.t. Some tests are sensitive to newlines and source column position, which makes docstring testing difficult and/or impossible.
author Gregory Szorc <gregory.szorc@gmail.com>
date Sun, 28 Jun 2015 12:46:34 -0700
parents ab2c5163900e
children cd1daab5d036
comparison
equal deleted inserted replaced
25702:ab2c5163900e 25703:1a6a117d0b95
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules() 5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
6 # to work when run from a virtualenv. The modules were chosen empirically 6 # to work when run from a virtualenv. The modules were chosen empirically
7 # so that the return value matches the return value without virtualenv. 7 # so that the return value matches the return value without virtualenv.
8 import BaseHTTPServer 8 import BaseHTTPServer
9 import zlib 9 import zlib
10
11 # Whitelist of modules that symbols can be directly imported from.
12 allowsymbolimports = (
13 '__future__',
14 'mercurial.i18n',
15 'mercurial.node',
16 )
17
18 # Modules that must be aliased because they are commonly confused with
19 # common variables and can create aliasing and readability issues.
20 requirealias = {
21 'ui': 'uimod',
22 }
23
24 def usingabsolute(root):
25 """Whether absolute imports are being used."""
26 if sys.version_info[0] >= 3:
27 return True
28
29 for node in ast.walk(root):
30 if isinstance(node, ast.ImportFrom):
31 if node.module == '__future__':
32 for n in node.names:
33 if n.name == 'absolute_import':
34 return True
35
36 return False
10 37
11 def dotted_name_of_path(path, trimpure=False): 38 def dotted_name_of_path(path, trimpure=False):
12 """Given a relative path to a source file, return its dotted module name. 39 """Given a relative path to a source file, return its dotted module name.
13 40
14 >>> dotted_name_of_path('mercurial/error.py') 41 >>> dotted_name_of_path('mercurial/error.py')
271 # this should be a function or a property of "node.module" 298 # this should be a function or a property of "node.module"
272 continue 299 continue
273 yield found[1] 300 yield found[1]
274 301
275 def verify_import_convention(module, source): 302 def verify_import_convention(module, source):
276 """Verify imports match our established coding convention.""" 303 """Verify imports match our established coding convention.
304
305 We have 2 conventions: legacy and modern. The modern convention is in
306 effect when using absolute imports.
307
308 The legacy convention only looks for mixed imports. The modern convention
309 is much more thorough.
310 """
277 root = ast.parse(source) 311 root = ast.parse(source)
278 312 absolute = usingabsolute(root)
279 return verify_stdlib_on_own_line(root) 313
314 if absolute:
315 return verify_modern_convention(module, root)
316 else:
317 return verify_stdlib_on_own_line(root)
318
319 def verify_modern_convention(module, root):
320 """Verify a file conforms to the modern import convention rules.
321
322 The rules of the modern convention are:
323
324 * Ordering is stdlib followed by local imports. Each group is lexically
325 sorted.
326 * Importing multiple modules via "import X, Y" is not allowed: use
327 separate import statements.
328 * Importing multiple modules via "from X import ..." is allowed if using
329 parenthesis and one entry per line.
330 * Only 1 relative import statement per import level ("from .", "from ..")
331 is allowed.
332 * Relative imports from higher levels must occur before lower levels. e.g.
333 "from .." must be before "from .".
334 * Imports from peer packages should use relative import (e.g. do not
335 "import mercurial.foo" from a "mercurial.*" module).
336 * Symbols can only be imported from specific modules (see
337 `allowsymbolimports`). For other modules, first import the module then
338 assign the symbol to a module-level variable. In addition, these imports
339 must be performed before other relative imports. This rule only
340 applies to import statements outside of any blocks.
341 * Relative imports from the standard library are not allowed.
342 * Certain modules must be aliased to alternate names to avoid aliasing
343 and readability problems. See `requirealias`.
344 """
345 topmodule = module.split('.')[0]
346
347 # Whether a local/non-stdlib import has been performed.
348 seenlocal = False
349 # Whether a relative, non-symbol import has been seen.
350 seennonsymbolrelative = False
351 # The last name to be imported (for sorting).
352 lastname = None
353 # Relative import levels encountered so far.
354 seenlevels = set()
355
356 for node in ast.walk(root):
357 if isinstance(node, ast.Import):
358 # Disallow "import foo, bar" and require separate imports
359 # for each module.
360 if len(node.names) > 1:
361 yield 'multiple imported names: %s' % ', '.join(
362 n.name for n in node.names)
363
364 name = node.names[0].name
365 asname = node.names[0].asname
366
367 # Ignore sorting rules on imports inside blocks.
368 if node.col_offset == 0:
369 if lastname and name < lastname:
370 yield 'imports not lexically sorted: %s < %s' % (
371 name, lastname)
372
373 lastname = name
374
375 # stdlib imports should be before local imports.
376 stdlib = name in stdlib_modules
377 if stdlib and seenlocal and node.col_offset == 0:
378 yield 'stdlib import follows local import: %s' % name
379
380 if not stdlib:
381 seenlocal = True
382
383 # Import of sibling modules should use relative imports.
384 topname = name.split('.')[0]
385 if topname == topmodule:
386 yield 'import should be relative: %s' % name
387
388 if name in requirealias and asname != requirealias[name]:
389 yield '%s module must be "as" aliased to %s' % (
390 name, requirealias[name])
391
392 elif isinstance(node, ast.ImportFrom):
393 # Resolve the full imported module name.
394 if node.level > 0:
395 fullname = '.'.join(module.split('.')[:-node.level])
396 if node.module:
397 fullname += '.%s' % node.module
398 else:
399 assert node.module
400 fullname = node.module
401
402 topname = fullname.split('.')[0]
403 if topname == topmodule:
404 yield 'import should be relative: %s' % fullname
405
406 # __future__ is special since it needs to come first and use
407 # symbol import.
408 if fullname != '__future__':
409 if not fullname or fullname in stdlib_modules:
410 yield 'relative import of stdlib module'
411 else:
412 seenlocal = True
413
414 # Direct symbol import is only allowed from certain modules and
415 # must occur before non-symbol imports.
416 if node.module and node.col_offset == 0:
417 if fullname not in allowsymbolimports:
418 yield 'direct symbol import from %s' % fullname
419
420 if seennonsymbolrelative:
421 yield ('symbol import follows non-symbol import: %s' %
422 fullname)
423
424 if not node.module:
425 assert node.level
426 seennonsymbolrelative = True
427
428 # Only allow 1 group per level.
429 if node.level in seenlevels and node.col_offset == 0:
430 yield 'multiple "from %s import" statements' % (
431 '.' * node.level)
432
433 # Higher-level groups come before lower-level groups.
434 if any(node.level > l for l in seenlevels):
435 yield 'higher-level import should come first: %s' % (
436 fullname)
437
438 seenlevels.add(node.level)
439
440 # Entries in "from .X import ( ... )" lists must be lexically
441 # sorted.
442 lastentryname = None
443
444 for n in node.names:
445 if lastentryname and n.name < lastentryname:
446 yield 'imports from %s not lexically sorted: %s < %s' % (
447 fullname, n.name, lastentryname)
448
449 lastentryname = n.name
450
451 if n.name in requirealias and n.asname != requirealias[n.name]:
452 yield '%s from %s must be "as" aliased to %s' % (
453 n.name, fullname, requirealias[n.name])
280 454
281 def verify_stdlib_on_own_line(root): 455 def verify_stdlib_on_own_line(root):
282 """Given some python source, verify that stdlib imports are done 456 """Given some python source, verify that stdlib imports are done
283 in separate statements from relative local module imports. 457 in separate statements from relative local module imports.
284 458