changeset 33896:bcf53149ebce

contrib: make import checker always think in terms of bytes The doctests now do some regrettable things, but they'll always work since we're dealing with ASCII module names etc.
author Augie Fackler <raf@durin42.com>
date Tue, 22 Aug 2017 12:58:28 -0400
parents 998fad4b3072
children c90014d5cdc3
files contrib/import-checker.py
diffstat 1 files changed, 24 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/contrib/import-checker.py	Tue Aug 22 10:58:29 2017 -0400
+++ b/contrib/import-checker.py	Tue Aug 22 12:58:28 2017 -0400
@@ -616,22 +616,26 @@
 def embedded(f, modname, src):
     """Extract embedded python code
 
+    >>> def _forcestr(thing):
+    ...     if not isinstance(thing, str):
+    ...         return thing.decode('ascii')
+    ...     return thing
     >>> def test(fn, lines):
-    ...     for s, m, f, l in embedded(fn, "example", lines):
-    ...         print("%s %s %s" % (m, f, l))
-    ...         print(repr(s))
+    ...     for s, m, f, l in embedded(fn, b"example", lines):
+    ...         print("%s %s %d" % (_forcestr(m), _forcestr(f), l))
+    ...         print(repr(_forcestr(s)))
     >>> lines = [
-    ...   'comment',
-    ...   '  >>> from __future__ import print_function',
-    ...   "  >>> ' multiline",
-    ...   "  ... string'",
-    ...   '  ',
-    ...   'comment',
-    ...   '  $ cat > foo.py <<EOF',
-    ...   '  > from __future__ import print_function',
-    ...   '  > EOF',
+    ...   b'comment',
+    ...   b'  >>> from __future__ import print_function',
+    ...   b"  >>> ' multiline",
+    ...   b"  ... string'",
+    ...   b'  ',
+    ...   b'comment',
+    ...   b'  $ cat > foo.py <<EOF',
+    ...   b'  > from __future__ import print_function',
+    ...   b'  > EOF',
     ... ]
-    >>> test("example.t", lines)
+    >>> test(b"example.t", lines)
     example[2] doctest.py 2
     "from __future__ import print_function\\n' multiline\\nstring'\\n"
     example[7] foo.py 7
@@ -653,16 +657,16 @@
             if not inlinepython:
                 # We've just entered a Python block.
                 inlinepython = n
-                t = 'doctest.py'
+                t = b'doctest.py'
             script.append(l[prefix:])
             continue
         if l.startswith(b'  ... '): # python inlines
             script.append(l[prefix:])
             continue
-        cat = re.search(r"\$ \s*cat\s*>\s*(\S+\.py)\s*<<\s*EOF", l)
+        cat = re.search(br"\$ \s*cat\s*>\s*(\S+\.py)\s*<<\s*EOF", l)
         if cat:
             if inlinepython:
-                yield ''.join(script), ("%s[%d]" %
+                yield b''.join(script), (b"%s[%d]" %
                        (modname, inlinepython)), t, inlinepython
                 script = []
                 inlinepython = 0
@@ -671,7 +675,7 @@
             continue
         if shpython and l.startswith(b'  > '): # sh continuation
             if l == b'  > EOF\n':
-                yield ''.join(script), ("%s[%d]" %
+                yield b''.join(script), (b"%s[%d]" %
                        (modname, shpython)), t, shpython
                 script = []
                 shpython = 0
@@ -679,7 +683,7 @@
                 script.append(l[4:])
             continue
         if inlinepython and l == b'  \n':
-            yield ''.join(script), ("%s[%d]" %
+            yield b''.join(script), (b"%s[%d]" %
                    (modname, inlinepython)), t, inlinepython
             script = []
             inlinepython = 0
@@ -697,11 +701,11 @@
     """
     py = False
     if not f.endswith('.t'):
-        with open(f) as src:
+        with open(f, 'rb') as src:
             yield src.read(), modname, f, 0
             py = True
     if py or f.endswith('.t'):
-        with open(f) as src:
+        with open(f, 'rb') as src:
             for script, modname, t, line in embedded(f, modname, src):
                 yield script, modname, t, line