blob: 84a82322a786870941a8c8f9e882f7253ced964d [file] [log] [blame]
# -*- coding: utf-8 -*-
"""
sphinx.web.markup
~~~~~~~~~~~~~~~~~
Awfully simple markup used in comments. Syntax:
`this is some <code>`
like <tt> in HTML
``this is like ` just that i can contain backticks``
like <tt> in HTML
*emphasized*
translates to <em class="important">
**strong**
translates to <strong>
!!!very important message!!!
use this to mark important or dangerous things.
Translates to <em class="dangerous">
[[http://www.google.com/]]
Simple link with the link target as caption. If the
URL is relative the provided callback is called to get
the full URL.
[[http://www.google.com/ go to google]]
Link with "go to google" as caption.
<code>preformatted code that could by python code</code>
Python code (most of the time), otherwise preformatted.
<quote>cite someone</quote>
Like <blockquote> in HTML.
:copyright: 2007 by Armin Ronacher.
:license: Python license.
"""
import cgi
import re
from urlparse import urlparse
from ..highlighting import highlight_block
inline_formatting = {
'escaped_code': ('``', '``'),
'code': ('`', '`'),
'strong': ('**', '**'),
'emphasized': ('*', '*'),
'important': ('!!!', '!!!'),
'link': ('[[', ']]'),
'quote': ('<quote>', '</quote>'),
'code_block': ('<code>', '</code>'),
'paragraph': (r'\n{2,}', None),
'newline': (r'\\$', None)
}
simple_formattings = {
'strong_begin': '<strong>',
'strong_end': '</strong>',
'emphasized_begin': '<em>',
'emphasized_end': '</em>',
'important_begin': '<em class="important">',
'important_end': '</em>',
'quote_begin': '<blockquote>',
'quote_end': '</blockquote>'
}
raw_formatting = set(['link', 'code', 'escaped_code', 'code_block'])
formatting_start_re = re.compile('|'.join(
'(?P<%s>%s)' % (name, end is not None and re.escape(start) or start)
for name, (start, end)
in sorted(inline_formatting.items(), key=lambda x: -len(x[1][0]))
), re.S | re.M)
formatting_end_res = dict(
(name, re.compile(re.escape(end))) for name, (start, end)
in inline_formatting.iteritems() if end is not None
)
without_end_tag = set(name for name, (_, end) in inline_formatting.iteritems()
if end is None)
class StreamProcessor(object):
def __init__(self, stream):
self._pushed = []
self._stream = stream
def __iter__(self):
return self
def next(self):
if self._pushed:
return self._pushed.pop()
return self._stream.next()
def push(self, token, data):
self._pushed.append((token, data))
def get_data(self, drop_needle=False):
result = []
try:
while True:
token, data = self.next()
if token != 'text':
if not drop_needle:
self.push(token, data)
break
result.append(data)
except StopIteration:
pass
return ''.join(result)
class MarkupParser(object):
def __init__(self, make_rel_url):
self.make_rel_url = make_rel_url
def tokenize(self, text):
text = '\n'.join(text.splitlines())
last_pos = 0
pos = 0
end = len(text)
stack = []
text_buffer = []
while pos < end:
if stack:
m = formatting_end_res[stack[-1]].match(text, pos)
if m is not None:
if text_buffer:
yield 'text', ''.join(text_buffer)
del text_buffer[:]
yield stack[-1] + '_end', None
stack.pop()
pos = m.end()
continue
m = formatting_start_re.match(text, pos)
if m is not None:
if text_buffer:
yield 'text', ''.join(text_buffer)
del text_buffer[:]
for key, value in m.groupdict().iteritems():
if value is not None:
if key in without_end_tag:
yield key, None
else:
if key in raw_formatting:
regex = formatting_end_res[key]
m2 = regex.search(text, m.end())
if m2 is None:
yield key, text[m.end():]
else:
yield key, text[m.end():m2.start()]
m = m2
else:
yield key + '_begin', None
stack.append(key)
break
if m is None:
break
else:
pos = m.end()
continue
text_buffer.append(text[pos])
pos += 1
yield 'text', ''.join(text_buffer)
for token in reversed(stack):
yield token + '_end', None
def stream_to_html(self, text):
stream = StreamProcessor(self.tokenize(text))
paragraph = []
result = []
def new_paragraph():
result.append(paragraph[:])
del paragraph[:]
for token, data in stream:
if token in simple_formattings:
paragraph.append(simple_formattings[token])
elif token in ('text', 'escaped_code', 'code'):
if data:
data = cgi.escape(data)
if token in ('escaped_code', 'code'):
data = '<tt>%s</tt>' % data
paragraph.append(data)
elif token == 'link':
if ' ' in data:
href, caption = data.split(' ', 1)
else:
href = caption = data
protocol = urlparse(href)[0]
nofollow = True
if not protocol:
href = self.make_rel_url(href)
nofollow = False
elif protocol == 'javascript':
href = href[11:]
paragraph.append('<a href="%s"%s>%s</a>' % (cgi.escape(href),
' rel="nofollow"' if nofollow else '',
cgi.escape(caption)))
elif token == 'code_block':
result.append(highlight_block(data, 'python'))
new_paragraph()
elif token == 'paragraph':
new_paragraph()
elif token == 'newline':
paragraph.append('<br>')
if paragraph:
result.append(paragraph)
for item in result:
if isinstance(item, list):
if item:
yield '<p>%s</p>' % ''.join(item)
else:
yield item
def to_html(self, text):
return ''.join(self.stream_to_html(text))
def markup(text, make_rel_url=lambda x: './' + x):
return MarkupParser(make_rel_url).to_html(text)