blob: 600a7cf08762ec2c1fc352e81d2cfa2b27a92f2f [file] [log] [blame]
# -*- coding: utf-8 -*-
"""
sphinx.highlighting
~~~~~~~~~~~~~~~~~~~
Highlight code blocks using Pygments.
:copyright: Copyright 2007-2014 by the Sphinx team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import sys
import re
import textwrap
try:
import parser
except ImportError:
# parser is not available on Jython
parser = None
from sphinx.util.pycompat import htmlescape
from sphinx.util.texescape import tex_hl_escape_map_new
from sphinx.ext import doctest
try:
import pygments
from pygments import highlight
from pygments.lexers import PythonLexer, PythonConsoleLexer, CLexer, \
TextLexer, RstLexer
from pygments.lexers import get_lexer_by_name, guess_lexer
from pygments.formatters import HtmlFormatter, LatexFormatter
from pygments.filters import ErrorToken
from pygments.styles import get_style_by_name
from pygments.util import ClassNotFound
from sphinx.pygments_styles import SphinxStyle, NoneStyle
except ImportError:
pygments = None
lexers = None
HtmlFormatter = LatexFormatter = None
else:
lexers = dict(
none = TextLexer(),
python = PythonLexer(),
pycon = PythonConsoleLexer(),
pycon3 = PythonConsoleLexer(python3=True),
rest = RstLexer(),
c = CLexer(),
)
for _lexer in lexers.values():
_lexer.add_filter('raiseonerror')
escape_hl_chars = {ord(u'\\'): u'\\PYGZbs{}',
ord(u'{'): u'\\PYGZob{}',
ord(u'}'): u'\\PYGZcb{}'}
# used if Pygments is not available
_LATEX_STYLES = r'''
\newcommand\PYGZbs{\char`\\}
\newcommand\PYGZob{\char`\{}
\newcommand\PYGZcb{\char`\}}
'''
# used if Pygments is available
# use textcomp quote to get a true single quote
_LATEX_ADD_STYLES = r'''
\renewcommand\PYGZsq{\textquotesingle}
'''
parsing_exceptions = (SyntaxError, UnicodeEncodeError)
if sys.version_info < (2, 5):
# Python <= 2.4 raises MemoryError when parsing an
# invalid encoding cookie
parsing_exceptions += MemoryError,
class PygmentsBridge(object):
# Set these attributes if you want to have different Pygments formatters
# than the default ones.
html_formatter = HtmlFormatter
latex_formatter = LatexFormatter
def __init__(self, dest='html', stylename='sphinx',
trim_doctest_flags=False):
self.dest = dest
if not pygments:
return
if stylename is None or stylename == 'sphinx':
style = SphinxStyle
elif stylename == 'none':
style = NoneStyle
elif '.' in stylename:
module, stylename = stylename.rsplit('.', 1)
style = getattr(__import__(module, None, None, ['__name__']),
stylename)
else:
style = get_style_by_name(stylename)
self.trim_doctest_flags = trim_doctest_flags
self.formatter_args = {'style' : style}
if dest == 'html':
self.formatter = self.html_formatter
else:
self.formatter = self.latex_formatter
self.formatter_args['commandprefix'] = 'PYG'
def get_formatter(self, **kwargs):
kwargs.update(self.formatter_args)
return self.formatter(**kwargs)
def unhighlighted(self, source):
if self.dest == 'html':
return '<pre>' + htmlescape(source) + '</pre>\n'
else:
# first, escape highlighting characters like Pygments does
source = source.translate(escape_hl_chars)
# then, escape all characters nonrepresentable in LaTeX
source = source.translate(tex_hl_escape_map_new)
return '\\begin{Verbatim}[commandchars=\\\\\\{\\}]\n' + \
source + '\\end{Verbatim}\n'
def try_parse(self, src):
# Make sure it ends in a newline
src += '\n'
# Ignore consistent indentation.
if src.lstrip('\n').startswith(' '):
src = textwrap.dedent(src)
# Replace "..." by a mark which is also a valid python expression
# (Note, the highlighter gets the original source, this is only done
# to allow "..." in code and still highlight it as Python code.)
mark = "__highlighting__ellipsis__"
src = src.replace("...", mark)
# lines beginning with "..." are probably placeholders for suite
src = re.sub(r"(?m)^(\s*)" + mark + "(.)", r"\1"+ mark + r"# \2", src)
# if we're using 2.5, use the with statement
if sys.version_info >= (2, 5):
src = 'from __future__ import with_statement\n' + src
if sys.version_info < (3, 0) and isinstance(src, unicode):
# Non-ASCII chars will only occur in string literals
# and comments. If we wanted to give them to the parser
# correctly, we'd have to find out the correct source
# encoding. Since it may not even be given in a snippet,
# just replace all non-ASCII characters.
src = src.encode('ascii', 'replace')
if (3, 0) <= sys.version_info < (3, 2):
# Python 3.1 can't process '\r' as linesep.
# `parser.suite("print('hello')\r\n")` cause error.
if '\r\n' in src:
src = src.replace('\r\n', '\n')
if parser is None:
return True
try:
parser.suite(src)
except parsing_exceptions:
return False
else:
return True
def highlight_block(self, source, lang, warn=None, force=False, **kwargs):
if not isinstance(source, unicode):
source = source.decode()
if not pygments:
return self.unhighlighted(source)
# find out which lexer to use
if lang in ('py', 'python'):
if source.startswith('>>>'):
# interactive session
lexer = lexers['pycon']
elif not force:
# maybe Python -- try parsing it
if self.try_parse(source):
lexer = lexers['python']
else:
lexer = lexers['none']
else:
lexer = lexers['python']
elif lang in ('python3', 'py3') and source.startswith('>>>'):
# for py3, recognize interactive sessions, but do not try parsing...
lexer = lexers['pycon3']
elif lang == 'guess':
try:
lexer = guess_lexer(source)
except Exception:
lexer = lexers['none']
else:
if lang in lexers:
lexer = lexers[lang]
else:
try:
lexer = lexers[lang] = get_lexer_by_name(lang)
except ClassNotFound:
if warn:
warn('Pygments lexer name %r is not known' % lang)
lexer = lexers['none']
else:
raise
else:
lexer.add_filter('raiseonerror')
# trim doctest options if wanted
if isinstance(lexer, PythonConsoleLexer) and self.trim_doctest_flags:
source = doctest.blankline_re.sub('', source)
source = doctest.doctestopt_re.sub('', source)
# highlight via Pygments
formatter = self.get_formatter(**kwargs)
try:
hlsource = highlight(source, lexer, formatter)
except ErrorToken:
# this is most probably not the selected language,
# so let it pass unhighlighted
hlsource = highlight(source, lexers['none'], formatter)
if self.dest == 'html':
return hlsource
else:
if not isinstance(hlsource, unicode): # Py2 / Pygments < 1.6
hlsource = hlsource.decode()
return hlsource.translate(tex_hl_escape_map_new)
def get_stylesheet(self):
if not pygments:
if self.dest == 'latex':
return _LATEX_STYLES
# no HTML styles needed
return ''
formatter = self.get_formatter()
if self.dest == 'html':
return formatter.get_style_defs('.highlight')
else:
return formatter.get_style_defs() + _LATEX_ADD_STYLES