hgext/highlight/highlight.py
author Pulkit Goyal <pulkit@yandex-team.ru>
Mon, 27 Aug 2018 14:08:18 +0300
changeset 39358 57b2a02420cd
parent 38383 23dc901cdf13
child 42923 a7abc6081bc5
permissions -rw-r--r--
tests: add flat manifest case in test-narrow-widen-non-ellipsis.t We had this test with tree manifest only because the tests were broken and we were fixing them and maintaing two different cases of broken tests was bit hard. Now that things work fine, let's add the flat manifest case. The test shows that things work fine in both cases. Differential Revision: https://phab.mercurial-scm.org/D4385
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
8251
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     1
# highlight.py - highlight extension implementation file
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     2
#
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     3
#  Copyright 2007-2009 Adam Hupp <adam@hupp.org> and others
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     4
#
7fc30044b514 highlight: add copyright and license header
Martin Geisler <mg@lazybytes.net>
parents: 7948
diff changeset
     5
# This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 9424
diff changeset
     6
# GNU General Public License version 2 or any later version.
6938
ce94b3236ea4 highlight: split code to improve startup times
Patrick Mezard <pmezard@gmail.com>
parents: 6666
diff changeset
     7
#
ce94b3236ea4 highlight: split code to improve startup times
Patrick Mezard <pmezard@gmail.com>
parents: 6666
diff changeset
     8
# The original module was split in an interface and an implementation
ce94b3236ea4 highlight: split code to improve startup times
Patrick Mezard <pmezard@gmail.com>
parents: 6666
diff changeset
     9
# file to defer pygments loading and speedup extension setup.
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    10
29485
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    11
from __future__ import absolute_import
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    12
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    13
from mercurial import demandimport
37843
670eb4fa1b86 demandimport: make module ignores a set (API)
Gregory Szorc <gregory.szorc@gmail.com>
parents: 37084
diff changeset
    14
demandimport.IGNORES.update(['pkgutil', 'pkg_resources', '__main__'])
29485
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    15
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    16
from mercurial import (
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    17
    encoding,
37084
f0b6fbea00cf stringutil: bulk-replace call sites to point to new module
Yuya Nishihara <yuya@tcha.org>
parents: 35329
diff changeset
    18
)
f0b6fbea00cf stringutil: bulk-replace call sites to point to new module
Yuya Nishihara <yuya@tcha.org>
parents: 35329
diff changeset
    19
f0b6fbea00cf stringutil: bulk-replace call sites to point to new module
Yuya Nishihara <yuya@tcha.org>
parents: 35329
diff changeset
    20
from mercurial.utils import (
f0b6fbea00cf stringutil: bulk-replace call sites to point to new module
Yuya Nishihara <yuya@tcha.org>
parents: 35329
diff changeset
    21
    stringutil,
29485
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    22
)
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    23
32908
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    24
with demandimport.deactivated():
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    25
    import pygments
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    26
    import pygments.formatters
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    27
    import pygments.lexers
35329
169d66db5920 highlight: eagerly discover plugin lexers while demandimport is off
Augie Fackler <augie@google.com>
parents: 32908
diff changeset
    28
    import pygments.plugin
32908
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    29
    import pygments.util
661025fd3e1c highlight: put pygments import inside demandimport.deactivated
Augie Fackler <raf@durin42.com>
parents: 29485
diff changeset
    30
35329
169d66db5920 highlight: eagerly discover plugin lexers while demandimport is off
Augie Fackler <augie@google.com>
parents: 32908
diff changeset
    31
    for unused in pygments.plugin.find_plugin_lexers():
169d66db5920 highlight: eagerly discover plugin lexers while demandimport is off
Augie Fackler <augie@google.com>
parents: 32908
diff changeset
    32
        pass
169d66db5920 highlight: eagerly discover plugin lexers while demandimport is off
Augie Fackler <augie@google.com>
parents: 32908
diff changeset
    33
29485
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    34
highlight = pygments.highlight
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    35
ClassNotFound = pygments.util.ClassNotFound
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    36
guess_lexer = pygments.lexers.guess_lexer
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    37
guess_lexer_for_filename = pygments.lexers.guess_lexer_for_filename
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    38
TextLexer = pygments.lexers.TextLexer
6a98f9408a50 py3: make files use absolute_import and print_function
Pulkit Goyal <7895pulkit@gmail.com>
parents: 29216
diff changeset
    39
HtmlFormatter = pygments.formatters.HtmlFormatter
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    40
6485
938319418d8c highlight: Generate pygments style sheet dynamically
Isaac Jurado <diptongo@gmail.com>
parents: 6394
diff changeset
    41
SYNTAX_CSS = ('\n<link rel="stylesheet" href="{url}highlightcss" '
5533
6cf7d7fe7d3d highlight: clean up coding style a little
Bryan O'Sullivan <bos@serpentine.com>
parents: 5532
diff changeset
    42
              'type="text/css" />')
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    43
26680
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    44
def pygmentize(field, fctx, style, tmpl, guessfilenameonly=False):
6394
55bc0a035e1f highlight: some small cleanups
Dirkjan Ochtman <dirkjan@ochtman.nl>
parents: 6393
diff changeset
    45
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    46
    # append a <link ...> to the syntax highlighting css
38383
23dc901cdf13 highlight: get around tmpl.load() which now returns a parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38359
diff changeset
    47
    tmpl.load('header')
23dc901cdf13 highlight: get around tmpl.load() which now returns a parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38359
diff changeset
    48
    old_header = tmpl.cache['header']
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    49
    if SYNTAX_CSS not in old_header:
27637
b502138f5faa cleanup: remove superfluous space after space after equals (python)
timeless <timeless@mozdev.org>
parents: 26680
diff changeset
    50
        new_header = old_header + SYNTAX_CSS
5616
88ca3e0fb6e5 highlight: adapt to hgweb_mode refactoring
Christian Ebert <blacktrash@gmx.net>
parents: 5533
diff changeset
    51
        tmpl.cache['header'] = new_header
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    52
6194
fe54e7501de1 highlight: bail out if file is binary
Brendan Cully <brendan@kublai.com>
parents: 6193
diff changeset
    53
    text = fctx.data()
37084
f0b6fbea00cf stringutil: bulk-replace call sites to point to new module
Yuya Nishihara <yuya@tcha.org>
parents: 35329
diff changeset
    54
    if stringutil.binary(text):
6194
fe54e7501de1 highlight: bail out if file is binary
Brendan Cully <brendan@kublai.com>
parents: 6193
diff changeset
    55
        return
fe54e7501de1 highlight: bail out if file is binary
Brendan Cully <brendan@kublai.com>
parents: 6193
diff changeset
    56
23613
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    57
    # str.splitlines() != unicode.splitlines() because "reasons"
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    58
    for c in "\x0c\x1c\x1d\x1e":
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    59
        if c in text:
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    60
            text = text.replace(c, '')
7b8ff3fd11d3 highlight: ignore Unicode's extra linebreaks (issue4291)
Matt Mackall <mpm@selenic.com>
parents: 19169
diff changeset
    61
9424
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    62
    # Pygments is best used with Unicode strings:
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    63
    # <http://pygments.org/docs/unicode/>
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    64
    text = text.decode(encoding.encoding, 'replace')
7120
db7557359636 highlight: convert text to local before passing to pygmentize (issue1341)
Christian Ebert <blacktrash@gmx.net>
parents: 6938
diff changeset
    65
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    66
    # To get multi-line strings right, we can't format line-by-line
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    67
    try:
19169
bcdfb6078b9f highlight: fix page layout with empty first and last lines
Alexander Plavin <me@aplavin.ru>
parents: 18054
diff changeset
    68
        lexer = guess_lexer_for_filename(fctx.path(), text[:1024],
bcdfb6078b9f highlight: fix page layout with empty first and last lines
Alexander Plavin <me@aplavin.ru>
parents: 18054
diff changeset
    69
                                         stripnl=False)
6494
c30849d4c8ba highlight: backward compatibility with pygments 0.5.1
Benoit Allard <benoit@aeteurope.nl>
parents: 6212
diff changeset
    70
    except (ClassNotFound, ValueError):
26680
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    71
        # guess_lexer will return a lexer if *any* lexer matches. There is
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    72
        # no way to specify a minimum match score. This can give a high rate of
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    73
        # false positives on files with an unknown filename pattern.
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    74
        if guessfilenameonly:
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    75
            return
7a3f6490ef97 highlight: add option to prevent content-only based fallback
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25899
diff changeset
    76
6198
358cc9cf54db highlight: guess by text when path name is ambiguous
Brendan Cully <brendan@kublai.com>
parents: 6197
diff changeset
    77
        try:
19169
bcdfb6078b9f highlight: fix page layout with empty first and last lines
Alexander Plavin <me@aplavin.ru>
parents: 18054
diff changeset
    78
            lexer = guess_lexer(text[:1024], stripnl=False)
6494
c30849d4c8ba highlight: backward compatibility with pygments 0.5.1
Benoit Allard <benoit@aeteurope.nl>
parents: 6212
diff changeset
    79
        except (ClassNotFound, ValueError):
25899
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    80
            # Don't highlight unknown files
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    81
            return
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    82
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    83
    # Don't highlight text files
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    84
    if isinstance(lexer, TextLexer):
c35ee1bbbbdc highlight: exit early on textual and unknown files (issue3005)
Anton Shestakov <av6@dwimlabs.net>
parents: 25867
diff changeset
    85
        return
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    86
25867
a74e9806d17d highlight: produce correct markup when there's a blank line just before EOF
Anton Shestakov <av6@dwimlabs.net>
parents: 23613
diff changeset
    87
    formatter = HtmlFormatter(nowrap=True, style=style)
5532
40a06e39f010 extension for synax highlighting in the hgweb file revision view
Adam Hupp <adam@hupp.org>
parents:
diff changeset
    88
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    89
    colorized = highlight(text, lexer, formatter)
9424
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    90
    coloriter = (s.encode(encoding.encoding, 'replace')
799373ff2554 highlight: fixes garbled text in non-UTF-8 environment
Yuya Nishihara <yuya@tcha.org>
parents: 8360
diff changeset
    91
                 for s in colorized.splitlines())
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    92
38359
876352ea831e highlight: adjust to attribute being private
Augie Fackler <raf@durin42.com>
parents: 37843
diff changeset
    93
    tmpl._filters['colorize'] = lambda x: next(coloriter)
6193
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    94
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    95
    oldl = tmpl.cache[field]
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    96
    newl = oldl.replace('line|escape', 'line|colorize')
2344da8eb9b4 highlight: support annotate, and reduce layering violations.
Brendan Cully <brendan@kublai.com>
parents: 5991
diff changeset
    97
    tmpl.cache[field] = newl