sphinx/highlighting.py - external/bitbucket.org/birkenfeld/sphinx - Git at Google

 # -*- coding: utf-8 -*-
 """
     sphinx.highlighting
     ~~~~~~~~~~~~~~~~~~~

     Highlight code blocks using Pygments.

     :copyright: Copyright 2007-2014 by the Sphinx team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """

 import sys
 import re
 import textwrap

 try:
     import parser
 except ImportError:
     # parser is not available on Jython
     parser = None

 from sphinx.util.pycompat import htmlescape
 from sphinx.util.texescape import tex_hl_escape_map_new
 from sphinx.ext import doctest

 try:
     import pygments
     from pygments import highlight
     from pygments.lexers import PythonLexer, PythonConsoleLexer, CLexer, \
          TextLexer, RstLexer
     from pygments.lexers import get_lexer_by_name, guess_lexer
     from pygments.formatters import HtmlFormatter, LatexFormatter
     from pygments.filters import ErrorToken
     from pygments.styles import get_style_by_name
     from pygments.util import ClassNotFound
     from sphinx.pygments_styles import SphinxStyle, NoneStyle
 except ImportError:
     pygments = None
     lexers = None
     HtmlFormatter = LatexFormatter = None
 else:

     lexers = dict(
         none = TextLexer(),
         python = PythonLexer(),
         pycon = PythonConsoleLexer(),
         pycon3 = PythonConsoleLexer(python3=True),
         rest = RstLexer(),
         c = CLexer(),
     )
     for _lexer in lexers.values():
         _lexer.add_filter('raiseonerror')


 escape_hl_chars = {ord(u'\\'): u'\\PYGZbs{}',
                    ord(u'{'): u'\\PYGZob{}',
                    ord(u'}'): u'\\PYGZcb{}'}

 # used if Pygments is not available
 _LATEX_STYLES = r'''
 \newcommand\PYGZbs{\char`\\}
 \newcommand\PYGZob{\char`\{}
 \newcommand\PYGZcb{\char`\}}
 '''

 # used if Pygments is available
 # use textcomp quote to get a true single quote
 _LATEX_ADD_STYLES = r'''
 \renewcommand\PYGZsq{\textquotesingle}
 '''

 parsing_exceptions = (SyntaxError, UnicodeEncodeError)
 if sys.version_info < (2, 5):
     # Python <= 2.4 raises MemoryError when parsing an
     # invalid encoding cookie
     parsing_exceptions += MemoryError,


 class PygmentsBridge(object):
     # Set these attributes if you want to have different Pygments formatters
     # than the default ones.
     html_formatter = HtmlFormatter
     latex_formatter = LatexFormatter

     def __init__(self, dest='html', stylename='sphinx',
                  trim_doctest_flags=False):
         self.dest = dest
         if not pygments:
             return
         if stylename is None or stylename == 'sphinx':
             style = SphinxStyle
         elif stylename == 'none':
             style = NoneStyle
         elif '.' in stylename:
             module, stylename = stylename.rsplit('.', 1)
             style = getattr(__import__(module, None, None, ['__name__']),
                             stylename)
         else:
             style = get_style_by_name(stylename)
         self.trim_doctest_flags = trim_doctest_flags
         self.formatter_args = {'style' : style}
         if dest == 'html':
             self.formatter = self.html_formatter
         else:
             self.formatter = self.latex_formatter
             self.formatter_args['commandprefix'] = 'PYG'

     def get_formatter(self, **kwargs):
         kwargs.update(self.formatter_args)
         return self.formatter(**kwargs)

     def unhighlighted(self, source):
         if self.dest == 'html':
             return '<pre>' + htmlescape(source) + '</pre>\n'
         else:
             # first, escape highlighting characters like Pygments does
             source = source.translate(escape_hl_chars)
             # then, escape all characters nonrepresentable in LaTeX
             source = source.translate(tex_hl_escape_map_new)
             return '\\begin{Verbatim}[commandchars=\\\\\\{\\}]\n' + \
                    source + '\\end{Verbatim}\n'

     def try_parse(self, src):
         # Make sure it ends in a newline
         src += '\n'

         # Ignore consistent indentation.
         if src.lstrip('\n').startswith(' '):
             src = textwrap.dedent(src)

         # Replace "..." by a mark which is also a valid python expression
         # (Note, the highlighter gets the original source, this is only done
         #  to allow "..." in code and still highlight it as Python code.)
         mark = "__highlighting__ellipsis__"
         src = src.replace("...", mark)

         # lines beginning with "..." are probably placeholders for suite
         src = re.sub(r"(?m)^(\s*)" + mark + "(.)", r"\1"+ mark + r"# \2", src)

         # if we're using 2.5, use the with statement
         if sys.version_info >= (2, 5):
             src = 'from __future__ import with_statement\n' + src

         if sys.version_info < (3, 0) and isinstance(src, unicode):
             # Non-ASCII chars will only occur in string literals
             # and comments.  If we wanted to give them to the parser
             # correctly, we'd have to find out the correct source
             # encoding.  Since it may not even be given in a snippet,
             # just replace all non-ASCII characters.
             src = src.encode('ascii', 'replace')

         if (3, 0) <= sys.version_info < (3, 2):
             # Python 3.1 can't process '\r' as linesep.
             # `parser.suite("print('hello')\r\n")` cause error.
             if '\r\n' in src:
                 src = src.replace('\r\n', '\n')

         if parser is None:
             return True

         try:
             parser.suite(src)
         except parsing_exceptions:
             return False
         else:
             return True

     def highlight_block(self, source, lang, warn=None, force=False, **kwargs):
         if not isinstance(source, unicode):
             source = source.decode()
         if not pygments:
             return self.unhighlighted(source)

         # find out which lexer to use
         if lang in ('py', 'python'):
             if source.startswith('>>>'):
                 # interactive session
                 lexer = lexers['pycon']
             elif not force:
                 # maybe Python -- try parsing it
                 if self.try_parse(source):
                     lexer = lexers['python']
                 else:
                     lexer = lexers['none']
             else:
                 lexer = lexers['python']
         elif lang in ('python3', 'py3') and source.startswith('>>>'):
             # for py3, recognize interactive sessions, but do not try parsing...
             lexer = lexers['pycon3']
         elif lang == 'guess':
             try:
                 lexer = guess_lexer(source)
             except Exception:
                 lexer = lexers['none']
         else:
             if lang in lexers:
                 lexer = lexers[lang]
             else:
                 try:
                     lexer = lexers[lang] = get_lexer_by_name(lang)
                 except ClassNotFound:
                     if warn:
                         warn('Pygments lexer name %r is not known' % lang)
                         lexer = lexers['none']
                     else:
                         raise
                 else:
                     lexer.add_filter('raiseonerror')

         # trim doctest options if wanted
         if isinstance(lexer, PythonConsoleLexer) and self.trim_doctest_flags:
             source = doctest.blankline_re.sub('', source)
             source = doctest.doctestopt_re.sub('', source)

         # highlight via Pygments
         formatter = self.get_formatter(**kwargs)
         try:
             hlsource = highlight(source, lexer, formatter)
         except ErrorToken:
             # this is most probably not the selected language,
             # so let it pass unhighlighted
             hlsource = highlight(source, lexers['none'], formatter)
         if self.dest == 'html':
             return hlsource
         else:
             if not isinstance(hlsource, unicode):  # Py2 / Pygments < 1.6
                 hlsource = hlsource.decode()
             return hlsource.translate(tex_hl_escape_map_new)

     def get_stylesheet(self):
         if not pygments:
             if self.dest == 'latex':
                 return _LATEX_STYLES
             # no HTML styles needed
             return ''
         formatter = self.get_formatter()
         if self.dest == 'html':
             return formatter.get_style_defs('.highlight')
         else:
             return formatter.get_style_defs() + _LATEX_ADD_STYLES
	# -- coding: utf-8 --
	"""
	sphinx.highlighting
	~~~~~~~~~~~~~~~~~~~

	Highlight code blocks using Pygments.

	:copyright: Copyright 2007-2014 by the Sphinx team, see AUTHORS.
	:license: BSD, see LICENSE for details.
	"""

	import sys
	import re
	import textwrap

	try:
	import parser
	except ImportError:
	# parser is not available on Jython
	parser = None

	from sphinx.util.pycompat import htmlescape
	from sphinx.util.texescape import tex_hl_escape_map_new
	from sphinx.ext import doctest

	try:
	import pygments
	from pygments import highlight
	from pygments.lexers import PythonLexer, PythonConsoleLexer, CLexer, \
	TextLexer, RstLexer
	from pygments.lexers import get_lexer_by_name, guess_lexer
	from pygments.formatters import HtmlFormatter, LatexFormatter
	from pygments.filters import ErrorToken
	from pygments.styles import get_style_by_name
	from pygments.util import ClassNotFound
	from sphinx.pygments_styles import SphinxStyle, NoneStyle
	except ImportError:
	pygments = None
	lexers = None
	HtmlFormatter = LatexFormatter = None
	else:

	lexers = dict(
	none = TextLexer(),
	python = PythonLexer(),
	pycon = PythonConsoleLexer(),
	pycon3 = PythonConsoleLexer(python3=True),
	rest = RstLexer(),
	c = CLexer(),
	)
	for _lexer in lexers.values():
	_lexer.add_filter('raiseonerror')


	escape_hl_chars = {ord(u'\\'): u'\\PYGZbs{}',
	ord(u'{'): u'\\PYGZob{}',
	ord(u'}'): u'\\PYGZcb{}'}

	# used if Pygments is not available
	_LATEX_STYLES = r'''
	\newcommand\PYGZbs{\char`\\}
	\newcommand\PYGZob{\char`\{}
	\newcommand\PYGZcb{\char`\}}
	'''

	# used if Pygments is available
	# use textcomp quote to get a true single quote
	_LATEX_ADD_STYLES = r'''
	\renewcommand\PYGZsq{\textquotesingle}
	'''

	parsing_exceptions = (SyntaxError, UnicodeEncodeError)
	if sys.version_info < (2, 5):
	# Python <= 2.4 raises MemoryError when parsing an
	# invalid encoding cookie
	parsing_exceptions += MemoryError,


	class PygmentsBridge(object):
	# Set these attributes if you want to have different Pygments formatters
	# than the default ones.
	html_formatter = HtmlFormatter
	latex_formatter = LatexFormatter

	def __init__(self, dest='html', stylename='sphinx',
	trim_doctest_flags=False):
	self.dest = dest
	if not pygments:
	return
	if stylename is None or stylename == 'sphinx':
	style = SphinxStyle
	elif stylename == 'none':
	style = NoneStyle
	elif '.' in stylename:
	module, stylename = stylename.rsplit('.', 1)
	style = getattr(__import__(module, None, None, ['__name__']),
	stylename)
	else:
	style = get_style_by_name(stylename)
	self.trim_doctest_flags = trim_doctest_flags
	self.formatter_args = {'style' : style}
	if dest == 'html':
	self.formatter = self.html_formatter
	else:
	self.formatter = self.latex_formatter
	self.formatter_args['commandprefix'] = 'PYG'

	def get_formatter(self, **kwargs):
	kwargs.update(self.formatter_args)
	return self.formatter(**kwargs)

	def unhighlighted(self, source):
	if self.dest == 'html':
	return '<pre>' + htmlescape(source) + '</pre>\n'
	else:
	# first, escape highlighting characters like Pygments does
	source = source.translate(escape_hl_chars)
	# then, escape all characters nonrepresentable in LaTeX
	source = source.translate(tex_hl_escape_map_new)
	return '\\begin{Verbatim}[commandchars=\\\\\\{\\}]\n' + \
	source + '\\end{Verbatim}\n'

	def try_parse(self, src):
	# Make sure it ends in a newline
	src += '\n'

	# Ignore consistent indentation.
	if src.lstrip('\n').startswith(' '):
	src = textwrap.dedent(src)

	# Replace "..." by a mark which is also a valid python expression
	# (Note, the highlighter gets the original source, this is only done
	# to allow "..." in code and still highlight it as Python code.)
	mark = "__highlighting__ellipsis__"
	src = src.replace("...", mark)

	# lines beginning with "..." are probably placeholders for suite
	src = re.sub(r"(?m)^(\s*)" + mark + "(.)", r"\1"+ mark + r"# \2", src)

	# if we're using 2.5, use the with statement
	if sys.version_info >= (2, 5):
	src = 'from __future__ import with_statement\n' + src

	if sys.version_info < (3, 0) and isinstance(src, unicode):
	# Non-ASCII chars will only occur in string literals
	# and comments. If we wanted to give them to the parser
	# correctly, we'd have to find out the correct source
	# encoding. Since it may not even be given in a snippet,
	# just replace all non-ASCII characters.
	src = src.encode('ascii', 'replace')

	if (3, 0) <= sys.version_info < (3, 2):
	# Python 3.1 can't process '\r' as linesep.
	# `parser.suite("print('hello')\r\n")` cause error.
	if '\r\n' in src:
	src = src.replace('\r\n', '\n')

	if parser is None:
	return True

	try:
	parser.suite(src)
	except parsing_exceptions:
	return False
	else:
	return True

	def highlight_block(self, source, lang, warn=None, force=False, **kwargs):
	if not isinstance(source, unicode):
	source = source.decode()
	if not pygments:
	return self.unhighlighted(source)

	# find out which lexer to use
	if lang in ('py', 'python'):
	if source.startswith('>>>'):
	# interactive session
	lexer = lexers['pycon']
	elif not force:
	# maybe Python -- try parsing it
	if self.try_parse(source):
	lexer = lexers['python']
	else:
	lexer = lexers['none']
	else:
	lexer = lexers['python']
	elif lang in ('python3', 'py3') and source.startswith('>>>'):
	# for py3, recognize interactive sessions, but do not try parsing...
	lexer = lexers['pycon3']
	elif lang == 'guess':
	try:
	lexer = guess_lexer(source)
	except Exception:
	lexer = lexers['none']
	else:
	if lang in lexers:
	lexer = lexers[lang]
	else:
	try:
	lexer = lexers[lang] = get_lexer_by_name(lang)
	except ClassNotFound:
	if warn:
	warn('Pygments lexer name %r is not known' % lang)
	lexer = lexers['none']
	else:
	raise
	else:
	lexer.add_filter('raiseonerror')

	# trim doctest options if wanted
	if isinstance(lexer, PythonConsoleLexer) and self.trim_doctest_flags:
	source = doctest.blankline_re.sub('', source)
	source = doctest.doctestopt_re.sub('', source)

	# highlight via Pygments
	formatter = self.get_formatter(**kwargs)
	try:
	hlsource = highlight(source, lexer, formatter)
	except ErrorToken:
	# this is most probably not the selected language,
	# so let it pass unhighlighted
	hlsource = highlight(source, lexers['none'], formatter)
	if self.dest == 'html':
	return hlsource
	else:
	if not isinstance(hlsource, unicode): # Py2 / Pygments < 1.6
	hlsource = hlsource.decode()
	return hlsource.translate(tex_hl_escape_map_new)

	def get_stylesheet(self):
	if not pygments:
	if self.dest == 'latex':
	return _LATEX_STYLES
	# no HTML styles needed
	return ''
	formatter = self.get_formatter()
	if self.dest == 'html':
	return formatter.get_style_defs('.highlight')
	else:
	return formatter.get_style_defs() + _LATEX_ADD_STYLES