view hgext/highlight/__init__.py @ 37711:65a23cc8e75b

cborutil: implement support for streaming encoding, bytestring decoding The vendored cbor2 package is... a bit disappointing. On the encoding side, it insists that you pass it something with a write() to send data to. That means if you want to emit data to a generator, you have to construct an e.g. io.BytesIO(), write() to it, then get the data back out. There can be non-trivial overhead involved. The encoder also doesn't support indefinite types - bytestrings, arrays, and maps that don't have a known length. Again, this is really unfortunate because it requires you to buffer the entire source and destination in memory to encode large things. On the decoding side, it supports reading indefinite length types. But it buffers them completely before returning. More sadness. This commit implements "streaming" encoders for various CBOR types. Encoding emits a generator of hunks. So you can efficiently stream encoded data elsewhere. It also implements support for emitting indefinite length bytestrings, arrays, and maps. On the decoding side, we only implement support for decoding an indefinite length bytestring from a file object. It will emit a generator of raw chunks from the source. I didn't want to reinvent so many wheels. But profiling the wire protocol revealed that the overhead of constructing io.BytesIO() instances to temporarily hold results has a non-trivial overhead. We're talking >15% of execution time for operations like "transfer the fulltexts of all files in a revision." So I can justify this effort. Fortunately, CBOR is a relatively straightforward format. And we have a reference implementation in the repo we can test against. Differential Revision: https://phab.mercurial-scm.org/D3303
author Gregory Szorc <gregory.szorc@gmail.com>
date Sat, 14 Apr 2018 16:36:15 -0700
parents 3e74d3cc500f
children 2570dca0f21c
line wrap: on
line source

# highlight - syntax highlighting in hgweb, based on Pygments
#
#  Copyright 2008, 2009 Patrick Mezard <pmezard@gmail.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
#
# The original module was split in an interface and an implementation
# file to defer pygments loading and speedup extension setup.

"""syntax highlighting for hgweb (requires Pygments)

It depends on the Pygments syntax highlighting library:
http://pygments.org/

There are the following configuration options::

  [web]
  pygments_style = <style> (default: colorful)
  highlightfiles = <fileset> (default: size('<5M'))
  highlightonlymatchfilename = <bool> (default False)

``highlightonlymatchfilename`` will only highlight files if their type could
be identified by their filename. When this is not enabled (the default),
Pygments will try very hard to identify the file type from content and any
match (even matches with a low confidence score) will be used.
"""

from __future__ import absolute_import

from . import highlight
from mercurial.hgweb import (
    webcommands,
    webutil,
)

from mercurial import (
    extensions,
    fileset,
)

# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
# be specifying the version(s) of Mercurial they are tested with, or
# leave the attribute unspecified.
testedwith = 'ships-with-hg-core'

def pygmentize(web, field, fctx, tmpl):
    style = web.config('web', 'pygments_style', 'colorful')
    expr = web.config('web', 'highlightfiles', "size('<5M')")
    filenameonly = web.configbool('web', 'highlightonlymatchfilename', False)

    ctx = fctx.changectx()
    tree = fileset.parse(expr)
    mctx = fileset.matchctx(ctx, subset=[fctx.path()], status=None)
    if fctx.path() in fileset.getset(mctx, tree):
        highlight.pygmentize(field, fctx, style, tmpl,
                guessfilenameonly=filenameonly)

def filerevision_highlight(orig, web, fctx):
    mt = web.res.headers['Content-Type']
    # only pygmentize for mimetype containing 'html' so we both match
    # 'text/html' and possibly 'application/xhtml+xml' in the future
    # so that we don't have to touch the extension when the mimetype
    # for a template changes; also hgweb optimizes the case that a
    # raw file is sent using rawfile() and doesn't call us, so we
    # can't clash with the file's content-type here in case we
    # pygmentize a html file
    if 'html' in mt:
        pygmentize(web, 'fileline', fctx, web.tmpl)

    return orig(web, fctx)

def annotate_highlight(orig, web):
    mt = web.res.headers['Content-Type']
    if 'html' in mt:
        fctx = webutil.filectx(web.repo, web.req)
        pygmentize(web, 'annotateline', fctx, web.tmpl)

    return orig(web)

def generate_css(web):
    pg_style = web.config('web', 'pygments_style', 'colorful')
    fmter = highlight.HtmlFormatter(style=pg_style)
    web.res.headers['Content-Type'] = 'text/css'
    web.res.setbodybytes(''.join([
        '/* pygments_style = %s */\n\n' % pg_style,
        fmter.get_style_defs(''),
    ]))
    return web.res.sendresponse()

def extsetup():
    # monkeypatch in the new version
    extensions.wrapfunction(webcommands, '_filerevision',
                            filerevision_highlight)
    extensions.wrapfunction(webcommands, 'annotate', annotate_highlight)
    webcommands.highlightcss = generate_css
    webcommands.__all__.append('highlightcss')