--- a/mercurial/__init__.py Fri Jul 15 23:54:56 2016 +0900
+++ b/mercurial/__init__.py Mon Jul 04 11:18:03 2016 -0700
@@ -121,9 +121,238 @@
sys.modules[name] = mod
return mod
+# Python 3 uses a custom module loader that transforms source code between
+# source file reading and compilation. This is done by registering a custom
+# finder that changes the spec for Mercurial modules to use a custom loader.
+if sys.version_info[0] >= 3:
+ from . import pure
+ import importlib
+ import io
+ import token
+ import tokenize
+
+ class hgpathentryfinder(importlib.abc.MetaPathFinder):
+ """A sys.meta_path finder that uses a custom module loader."""
+ def find_spec(self, fullname, path, target=None):
+ # Only handle Mercurial-related modules.
+ if not fullname.startswith(('mercurial.', 'hgext.', 'hgext3rd.')):
+ return None
+
+ # This assumes Python 3 doesn't support loading C modules.
+ if fullname in _dualmodules:
+ stem = fullname.split('.')[-1]
+ fullname = 'mercurial.pure.%s' % stem
+ target = pure
+ assert len(path) == 1
+ path = [os.path.join(path[0], 'pure')]
+
+ # Try to find the module using other registered finders.
+ spec = None
+ for finder in sys.meta_path:
+ if finder == self:
+ continue
+
+ spec = finder.find_spec(fullname, path, target=target)
+ if spec:
+ break
+
+ # This is a Mercurial-related module but we couldn't find it
+ # using the previously-registered finders. This likely means
+ # the module doesn't exist.
+ if not spec:
+ return None
+
+ if fullname.startswith('mercurial.pure.'):
+ spec.name = spec.name.replace('.pure.', '.')
+
+ # TODO need to support loaders from alternate specs, like zip
+ # loaders.
+ spec.loader = hgloader(spec.name, spec.origin)
+ return spec
+
+ def replacetokens(tokens):
+ """Transform a stream of tokens from raw to Python 3.
+
+ It is called by the custom module loading machinery to rewrite
+ source/tokens between source decoding and compilation.
+
+ Returns a generator of possibly rewritten tokens.
+
+ The input token list may be mutated as part of processing. However,
+ its changes do not necessarily match the output token stream.
+
+ REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
+ OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
+ """
+ for i, t in enumerate(tokens):
+ # Convert most string literals to byte literals. String literals
+ # in Python 2 are bytes. String literals in Python 3 are unicode.
+ # Most strings in Mercurial are bytes and unicode strings are rare.
+ # Rather than rewrite all string literals to use ``b''`` to indicate
+ # byte strings, we apply this token transformer to insert the ``b``
+ # prefix nearly everywhere.
+ if t.type == token.STRING:
+ s = t.string
+
+ # Preserve docstrings as string literals. This is inconsistent
+ # with regular unprefixed strings. However, the
+ # "from __future__" parsing (which allows a module docstring to
+ # exist before it) doesn't properly handle the docstring if it
+ # is b''' prefixed, leading to a SyntaxError. We leave all
+ # docstrings as unprefixed to avoid this. This means Mercurial
+ # components touching docstrings need to handle unicode,
+ # unfortunately.
+ if s[0:3] in ("'''", '"""'):
+ yield t
+ continue
+
+ # If the first character isn't a quote, it is likely a string
+ # prefixing character (such as 'b', 'u', or 'r'. Ignore.
+ if s[0] not in ("'", '"'):
+ yield t
+ continue
+
+ # String literal. Prefix to make a b'' string.
+ yield tokenize.TokenInfo(t.type, 'b%s' % s, t.start, t.end,
+ t.line)
+ continue
+
+ try:
+ nexttoken = tokens[i + 1]
+ except IndexError:
+ nexttoken = None
+
+ try:
+ prevtoken = tokens[i - 1]
+ except IndexError:
+ prevtoken = None
+
+ # This looks like a function call.
+ if (t.type == token.NAME and nexttoken and
+ nexttoken.type == token.OP and nexttoken.string == '('):
+ fn = t.string
+
+ # *attr() builtins don't accept byte strings to 2nd argument.
+ # Rewrite the token to include the unicode literal prefix so
+ # the string transformer above doesn't add the byte prefix.
+ if fn in ('getattr', 'setattr', 'hasattr', 'safehasattr'):
+ try:
+ # (NAME, 'getattr')
+ # (OP, '(')
+ # (NAME, 'foo')
+ # (OP, ',')
+ # (NAME|STRING, foo)
+ st = tokens[i + 4]
+ if (st.type == token.STRING and
+ st.string[0] in ("'", '"')):
+ rt = tokenize.TokenInfo(st.type, 'u%s' % st.string,
+ st.start, st.end, st.line)
+ tokens[i + 4] = rt
+ except IndexError:
+ pass
+
+ # .encode() and .decode() on str/bytes/unicode don't accept
+ # byte strings on Python 3. Rewrite the token to include the
+ # unicode literal prefix so the string transformer above doesn't
+ # add the byte prefix.
+ if (fn in ('encode', 'decode') and
+ prevtoken.type == token.OP and prevtoken.string == '.'):
+ # (OP, '.')
+ # (NAME, 'encode')
+ # (OP, '(')
+ # (STRING, 'utf-8')
+ # (OP, ')')
+ try:
+ st = tokens[i + 2]
+ if (st.type == token.STRING and
+ st.string[0] in ("'", '"')):
+ rt = tokenize.TokenInfo(st.type, 'u%s' % st.string,
+ st.start, st.end, st.line)
+ tokens[i + 2] = rt
+ except IndexError:
+ pass
+
+ # Emit unmodified token.
+ yield t
+
+ # Header to add to bytecode files. This MUST be changed when
+ # ``replacetoken`` or any mechanism that changes semantics of module
+ # loading is changed. Otherwise cached bytecode may get loaded without
+ # the new transformation mechanisms applied.
+ BYTECODEHEADER = b'HG\x00\x01'
+
+ class hgloader(importlib.machinery.SourceFileLoader):
+ """Custom module loader that transforms source code.
+
+ When the source code is converted to a code object, we transform
+ certain patterns to be Python 3 compatible. This allows us to write code
+ that is natively Python 2 and compatible with Python 3 without
+ making the code excessively ugly.
+
+ We do this by transforming the token stream between parse and compile.
+
+ Implementing transformations invalidates caching assumptions made
+ by the built-in importer. The built-in importer stores a header on
+ saved bytecode files indicating the Python/bytecode version. If the
+ version changes, the cached bytecode is ignored. The Mercurial
+ transformations could change at any time. This means we need to check
+ that cached bytecode was generated with the current transformation
+ code or there could be a mismatch between cached bytecode and what
+ would be generated from this class.
+
+ We supplement the bytecode caching layer by wrapping ``get_data``
+ and ``set_data``. These functions are called when the
+ ``SourceFileLoader`` retrieves and saves bytecode cache files,
+ respectively. We simply add an additional header on the file. As
+ long as the version in this file is changed when semantics change,
+ cached bytecode should be invalidated when transformations change.
+
+ The added header has the form ``HG<VERSION>``. That is a literal
+ ``HG`` with 2 binary bytes indicating the transformation version.
+ """
+ def get_data(self, path):
+ data = super(hgloader, self).get_data(path)
+
+ if not path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
+ return data
+
+ # There should be a header indicating the Mercurial transformation
+ # version. If it doesn't exist or doesn't match the current version,
+ # we raise an OSError because that is what
+ # ``SourceFileLoader.get_code()`` expects when loading bytecode
+ # paths to indicate the cached file is "bad."
+ if data[0:2] != b'HG':
+ raise OSError('no hg header')
+ if data[0:4] != BYTECODEHEADER:
+ raise OSError('hg header version mismatch')
+
+ return data[4:]
+
+ def set_data(self, path, data, *args, **kwargs):
+ if path.endswith(tuple(importlib.machinery.BYTECODE_SUFFIXES)):
+ data = BYTECODEHEADER + data
+
+ return super(hgloader, self).set_data(path, data, *args, **kwargs)
+
+ def source_to_code(self, data, path):
+ """Perform token transformation before compilation."""
+ buf = io.BytesIO(data)
+ tokens = tokenize.tokenize(buf.readline)
+ data = tokenize.untokenize(replacetokens(list(tokens)))
+ # Python's built-in importer strips frames from exceptions raised
+ # for this code. Unfortunately, that mechanism isn't extensible
+ # and our frame will be blamed for the import failure. There
+ # are extremely hacky ways to do frame stripping. We haven't
+ # implemented them because they are very ugly.
+ return super(hgloader, self).source_to_code(data, path)
+
# We automagically register our custom importer as a side-effect of loading.
# This is necessary to ensure that any entry points are able to import
# mercurial.* modules without having to perform this registration themselves.
-if not any(isinstance(x, hgimporter) for x in sys.meta_path):
+if sys.version_info[0] >= 3:
+ _importercls = hgpathentryfinder
+else:
+ _importercls = hgimporter
+if not any(isinstance(x, _importercls) for x in sys.meta_path):
# meta_path is used before any implicit finders and before sys.path.
- sys.meta_path.insert(0, hgimporter())
+ sys.meta_path.insert(0, _importercls())