hgext/highlight/highlight.py
author Augie Fackler <augie@google.com>
Sat, 30 Sep 2017 07:45:41 -0400
changeset 34381 4735c1caf6b3
parent 32908 661025fd3e1c
child 35329 169d66db5920
permissions -rw-r--r--
registrar: remove superfluous pass statements
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
8251
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     1
# highlight.py - highlight extension implementation file
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     2
#
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     3
#  Copyright 2007-2009 Adam Hupp <adam@hupp.org> and others
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     4
#
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     5
# This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 9424
diff changeset
     6
# GNU General Public License version 2 or any later version.
6938
ce94b3236ea4 highlight: split code to improve startup times
Patrick Mezard <pmezard@gmail.com>
parents: 6666
diff changeset
     7
#
ce94b3236ea4 highlight: split code to improve startup times
Patrick Mezard <pmezard@gmail.com>
parents: 6666
diff changeset
     8
# The original module was split in an interface and an implementation
ce94b3236ea4 highlight: split code to improve startup times
Patrick Mezard <pmezard@gmail.com>
parents: 6666
diff changeset
     9
# file to defer pygments loading and speedup extension setup.
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    10
29485
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    11
from __future__ import absolute_import
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    12
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    13
from mercurial import demandimport
10394
4612cded5176 fix coding style (reported by pylint)
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 10263
diff changeset
    14
demandimport.ignore.extend(['pkgutil', 'pkg_resources', '__main__'])
29485
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    15
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    16
from mercurial import (
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    17
    encoding,
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    18
    util,
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    19
)
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    20
32908
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    21
with demandimport.deactivated():
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    22
    import pygments
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    23
    import pygments.formatters
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    24
    import pygments.lexers
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    25
    import pygments.util
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    26
29485
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    27
highlight = pygments.highlight
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    28
ClassNotFound = pygments.util.ClassNotFound
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    29
guess_lexer = pygments.lexers.guess_lexer
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    30
guess_lexer_for_filename = pygments.lexers.guess_lexer_for_filename
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    31
TextLexer = pygments.lexers.TextLexer
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    32
HtmlFormatter = pygments.formatters.HtmlFormatter
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    33
6485
938319418d8c highlight: Generate pygments style sheet dynamically
Isaac Jurado <diptongo@gmail.com>
parents: 6394
diff changeset
    34
SYNTAX_CSS = ('\n<link rel="stylesheet" href="{url}highlightcss" '
5533
6cf7d7fe7d3d highlight: clean up coding style a little
Bryan O'Sullivan <bos@serpentine.com>
parents: 5532
diff changeset
    35
              'type="text/css" />')
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    36
26680
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    37
def pygmentize(field, fctx, style, tmpl, guessfilenameonly=False):
6394
55bc0a035e1f highlight: some small cleanups
Dirkjan Ochtman <dirkjan@ochtman.nl>
parents: 6393
diff changeset
    38
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    39
    # append a <link ...> to the syntax highlighting css
10959
d1f4657f55e4 highlight: fix to work with caching templater
Matt Mackall <mpm@selenic.com>
parents: 10394
diff changeset
    40
    old_header = tmpl.load('header')
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    41
    if SYNTAX_CSS not in old_header:
27637
b502138f5faa cleanup: remove superfluous space after space after equals (python)
timeless <timeless@mozdev.org>
parents: 26680
diff changeset
    42
        new_header = old_header + SYNTAX_CSS
5616
88ca3e0fb6e5 highlight: adapt to hgweb_mode refactoring
Christian Ebert <blacktrash@gmx.net>
parents: 5533
diff changeset
    43
        tmpl.cache['header'] = new_header
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    44
6194
fe54e7501de1 highlight: bail out if file is binary
Brendan Cully <brendan@kublai.com>
parents: 6193
diff changeset
    45
    text = fctx.data()
fe54e7501de1 highlight: bail out if file is binary
Brendan Cully <brendan@kublai.com>
parents: 6193
diff changeset
    46
    if util.binary(text):
fe54e7501de1 highlight: bail out if file is binary
Brendan Cully <brendan@kublai.com>
parents: 6193
diff changeset
    47
        return
fe54e7501de1 highlight: bail out if file is binary
Brendan Cully <brendan@kublai.com>
parents: 6193
diff changeset
    48
23613
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    49
    # str.splitlines() != unicode.splitlines() because "reasons"
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    50
    for c in "\x0c\x1c\x1d\x1e":
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    51
        if c in text:
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    52
            text = text.replace(c, '')
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    53
9424
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    54
    # Pygments is best used with Unicode strings:
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    55
    # <http://pygments.org/docs/unicode/>
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    56
    text = text.decode(encoding.encoding, 'replace')
7120
db7557359636 highlight: convert text to local before passing to pygmentize (issue1341)
Christian Ebert <blacktrash@gmx.net>
parents: 6938
diff changeset
    57
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    58
    # To get multi-line strings right, we can't format line-by-line
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    59
    try:
19169
bcdfb6078b9f highlight: fix page layout with empty first and last lines
Alexander Plavin <me@aplavin.ru>
parents: 18054
diff changeset
    60
        lexer = guess_lexer_for_filename(fctx.path(), text[:1024],
bcdfb6078b9f highlight: fix page layout with empty first and last lines
Alexander Plavin <me@aplavin.ru>
parents: 18054
diff changeset
    61
                                         stripnl=False)
6494
c30849d4c8ba highlight: backward compatibility with pygments 0.5.1
Benoit Allard <benoit@aeteurope.nl>
parents: 6212
diff changeset
    62
    except (ClassNotFound, ValueError):
26680
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    63
        # guess_lexer will return a lexer if *any* lexer matches. There is
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    64
        # no way to specify a minimum match score. This can give a high rate of
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    65
        # false positives on files with an unknown filename pattern.
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    66
        if guessfilenameonly:
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    67
            return
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    68
6198
358cc9cf54db highlight: guess by text when path name is ambiguous
Brendan Cully <brendan@kublai.com>
parents: 6197
diff changeset
    69
        try:
19169
bcdfb6078b9f highlight: fix page layout with empty first and last lines
Alexander Plavin <me@aplavin.ru>
parents: 18054
diff changeset
    70
            lexer = guess_lexer(text[:1024], stripnl=False)
6494
c30849d4c8ba highlight: backward compatibility with pygments 0.5.1
Benoit Allard <benoit@aeteurope.nl>
parents: 6212
diff changeset
    71
        except (ClassNotFound, ValueError):
25899
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    72
            # Don't highlight unknown files
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    73
            return
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    74
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    75
    # Don't highlight text files
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    76
    if isinstance(lexer, TextLexer):
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    77
        return
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    78
25867
a74e9806d17d highlight: produce correct markup when there's a blank line just before EOF
Anton Shestakov <av6@dwimlabs.net>
parents: 23613
diff changeset
    79
    formatter = HtmlFormatter(nowrap=True, style=style)
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    80
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    81
    colorized = highlight(text, lexer, formatter)
9424
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    82
    coloriter = (s.encode(encoding.encoding, 'replace')
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    83
                 for s in colorized.splitlines())
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    84
29216
ead25aa27a43 py3: convert to next() function
timeless <timeless@mozdev.org>
parents: 27637
diff changeset
    85
    tmpl.filters['colorize'] = lambda x: next(coloriter)
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    86
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    87
    oldl = tmpl.cache[field]
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    88
    newl = oldl.replace('line|escape', 'line|colorize')
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    89
    tmpl.cache[field] = newl