| # -*- coding: utf-8 -*- |
| # markdown is released under the BSD license |
| # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later) |
| # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) |
| # Copyright 2004 Manfred Stienstra (the original version) |
| # |
| # All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions are met: |
| # |
| # * Redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer. |
| # * Redistributions in binary form must reproduce the above copyright |
| # notice, this list of conditions and the following disclaimer in the |
| # documentation and/or other materials provided with the distribution. |
| # * Neither the name of the <organization> nor the |
| # names of its contributors may be used to endorse or promote products |
| # derived from this software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY |
| # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT |
| # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| # POSSIBILITY OF SUCH DAMAGE. |
| |
| |
| from __future__ import unicode_literals |
| import re |
| import sys |
| |
| |
| """ |
| Python 3 Stuff |
| ============================================================================= |
| """ |
| PY3 = sys.version_info[0] == 3 |
| |
| if PY3: |
| string_type = str |
| text_type = str |
| int2str = chr |
| else: |
| string_type = basestring |
| text_type = unicode |
| int2str = unichr |
| |
| |
| """ |
| Constants you might want to modify |
| ----------------------------------------------------------------------------- |
| """ |
| |
| BLOCK_LEVEL_ELEMENTS = re.compile("^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul" |
| "|script|noscript|form|fieldset|iframe|math" |
| "|hr|hr/|style|li|dt|dd|thead|tbody" |
| "|tr|th|td|section|footer|header|group|figure" |
| "|figcaption|aside|article|canvas|output" |
| "|progress|video)$", re.IGNORECASE) |
| # Placeholders |
| STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder |
| ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder |
| INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" |
| INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX |
| INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})') |
| AMP_SUBSTITUTE = STX+"amp"+ETX |
| |
| """ |
| Constants you probably do not need to change |
| ----------------------------------------------------------------------------- |
| """ |
| |
| RTL_BIDI_RANGES = ( ('\u0590', '\u07FF'), |
| # Hebrew (0590-05FF), Arabic (0600-06FF), |
| # Syriac (0700-074F), Arabic supplement (0750-077F), |
| # Thaana (0780-07BF), Nko (07C0-07FF). |
| ('\u2D30', '\u2D7F'), # Tifinagh |
| ) |
| |
| # Extensions should use "markdown.util.etree" instead of "etree" (or do `from |
| # markdown.util import etree`). Do not import it by yourself. |
| |
| try: # Is the C implemenation of ElementTree available? |
| import xml.etree.cElementTree as etree |
| from xml.etree.ElementTree import Comment |
| # Serializers (including ours) test with non-c Comment |
| etree.test_comment = Comment |
| if etree.VERSION < "1.0.5": |
| raise RuntimeError("cElementTree version 1.0.5 or higher is required.") |
| except (ImportError, RuntimeError): |
| # Use the Python implementation of ElementTree? |
| import xml.etree.ElementTree as etree |
| if etree.VERSION < "1.1": |
| raise RuntimeError("ElementTree version 1.1 or higher is required") |
| |
| |
| """ |
| AUXILIARY GLOBAL FUNCTIONS |
| ============================================================================= |
| """ |
| |
| |
| def isBlockLevel(tag): |
| """Check if the tag is a block level HTML tag.""" |
| if isinstance(tag, string_type): |
| return BLOCK_LEVEL_ELEMENTS.match(tag) |
| # Some ElementTree tags are not strings, so return False. |
| return False |
| |
| """ |
| MISC AUXILIARY CLASSES |
| ============================================================================= |
| """ |
| |
| class AtomicString(text_type): |
| """A string which should not be further processed.""" |
| pass |
| |
| |
| class Processor(object): |
| def __init__(self, markdown_instance=None): |
| if markdown_instance: |
| self.markdown = markdown_instance |
| |
| |
| class HtmlStash(object): |
| """ |
| This class is used for stashing HTML objects that we extract |
| in the beginning and replace with place-holders. |
| """ |
| |
| def __init__ (self): |
| """ Create a HtmlStash. """ |
| self.html_counter = 0 # for counting inline html segments |
| self.rawHtmlBlocks=[] |
| |
| def store(self, html, safe=False): |
| """ |
| Saves an HTML segment for later reinsertion. Returns a |
| placeholder string that needs to be inserted into the |
| document. |
| |
| Keyword arguments: |
| |
| * html: an html segment |
| * safe: label an html segment as safe for safemode |
| |
| Returns : a placeholder string |
| |
| """ |
| self.rawHtmlBlocks.append((html, safe)) |
| placeholder = self.get_placeholder(self.html_counter) |
| self.html_counter += 1 |
| return placeholder |
| |
| def reset(self): |
| self.html_counter = 0 |
| self.rawHtmlBlocks = [] |
| |
| def get_placeholder(self, key): |
| return "%swzxhzdk:%d%s" % (STX, key, ETX) |
| |