highlight: fix encoding issues to enable Py3 compatibility
This commit fixes various encoding issues with the `highlight` extension
to enable compatibility with Python 3. Python `.encode()` and `.decode()`
requires the target encoding to be passed as a `str`, so the value of
`mercurial.encoding.encoding` must be converted before passing to the
function. Pygments also assumes the `str` type for values it works with,
so we must perform conversions before and after receiving values from its
APIs.
After applying this patch, `test-highlight.t` passes under Python 3. We
add it to `python3-whitelist` as well.
Tested with Pygments 2.4.2.
Differential Revision: https://phab.mercurial-scm.org/D6832
--- a/contrib/python3-whitelist Tue Sep 10 12:32:07 2019 -0400
+++ b/contrib/python3-whitelist Mon Sep 09 14:26:43 2019 -0400
@@ -296,6 +296,7 @@
test-hgwebdir-paths.py
test-hgwebdir.t
test-hgwebdirsym.t
+test-highlight.t
test-histedit-arguments.t
test-histedit-base.t
test-histedit-bookmark-motion.t
--- a/hgext/highlight/__init__.py Tue Sep 10 12:32:07 2019 -0400
+++ b/hgext/highlight/__init__.py Mon Sep 09 14:26:43 2019 -0400
@@ -36,6 +36,7 @@
from mercurial import (
extensions,
+ pycompat,
)
# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
@@ -79,11 +80,12 @@
def generate_css(web):
pg_style = web.config('web', 'pygments_style', 'colorful')
- fmter = highlight.HtmlFormatter(style=pg_style)
+ fmter = highlight.HtmlFormatter(style=pycompat.sysstr(pg_style))
web.res.headers['Content-Type'] = 'text/css'
+ style_defs = fmter.get_style_defs(pycompat.sysstr(''))
web.res.setbodybytes(''.join([
'/* pygments_style = %s */\n\n' % pg_style,
- fmter.get_style_defs(''),
+ pycompat.bytestr(style_defs),
]))
return web.res.sendresponse()
--- a/hgext/highlight/highlight.py Tue Sep 10 12:32:07 2019 -0400
+++ b/hgext/highlight/highlight.py Mon Sep 09 14:26:43 2019 -0400
@@ -15,6 +15,7 @@
from mercurial import (
encoding,
+ pycompat,
)
from mercurial.utils import (
@@ -61,11 +62,12 @@
# Pygments is best used with Unicode strings:
# <http://pygments.org/docs/unicode/>
- text = text.decode(encoding.encoding, 'replace')
+ text = text.decode(pycompat.sysstr(encoding.encoding), 'replace')
# To get multi-line strings right, we can't format line-by-line
try:
- lexer = guess_lexer_for_filename(fctx.path(), text[:1024],
+ path = pycompat.sysstr(fctx.path())
+ lexer = guess_lexer_for_filename(path, text[:1024],
stripnl=False)
except (ClassNotFound, ValueError):
# guess_lexer will return a lexer if *any* lexer matches. There is
@@ -84,10 +86,10 @@
if isinstance(lexer, TextLexer):
return
- formatter = HtmlFormatter(nowrap=True, style=style)
+ formatter = HtmlFormatter(nowrap=True, style=pycompat.sysstr(style))
colorized = highlight(text, lexer, formatter)
- coloriter = (s.encode(encoding.encoding, 'replace')
+ coloriter = (s.encode(pycompat.sysstr(encoding.encoding), 'replace')
for s in colorized.splitlines())
tmpl._filters['colorize'] = lambda x: next(coloriter)