third_party/Python-Markdown/markdown/serializers.py - chromium/src - Git at Google

 # markdown/searializers.py
 #
 # Add x/html serialization to Elementree
 # Taken from ElementTree 1.3 preview with slight modifications
 #
 # Copyright (c) 1999-2007 by Fredrik Lundh.  All rights reserved.
 #
 # fredrik@pythonware.com
 # http://www.pythonware.com
 #
 # --------------------------------------------------------------------
 # The ElementTree toolkit is
 #
 # Copyright (c) 1999-2007 by Fredrik Lundh
 #
 # By obtaining, using, and/or copying this software and/or its
 # associated documentation, you agree that you have read, understood,
 # and will comply with the following terms and conditions:
 #
 # Permission to use, copy, modify, and distribute this software and
 # its associated documentation for any purpose and without fee is
 # hereby granted, provided that the above copyright notice appears in
 # all copies, and that both that copyright notice and this permission
 # notice appear in supporting documentation, and that the name of
 # Secret Labs AB or the author not be used in advertising or publicity
 # pertaining to distribution of the software without specific, written
 # prior permission.
 #
 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
 # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 # OF THIS SOFTWARE.
 # --------------------------------------------------------------------


 from __future__ import absolute_import
 from __future__ import unicode_literals
 from . import util
 ElementTree = util.etree.ElementTree
 QName = util.etree.QName
 if hasattr(util.etree, 'test_comment'):  # pragma: no cover
     Comment = util.etree.test_comment
 else:  # pragma: no cover
     Comment = util.etree.Comment
 PI = util.etree.PI
 ProcessingInstruction = util.etree.ProcessingInstruction

 __all__ = ['to_html_string', 'to_xhtml_string']

 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
               "img", "input", "isindex", "link", "meta" "param")

 try:
     HTML_EMPTY = set(HTML_EMPTY)
 except NameError:  # pragma: no cover
     pass

 _namespace_map = {
     # "well-known" namespace prefixes
     "http://www.w3.org/XML/1998/namespace": "xml",
     "http://www.w3.org/1999/xhtml": "html",
     "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
     "http://schemas.xmlsoap.org/wsdl/": "wsdl",
     # xml schema
     "http://www.w3.org/2001/XMLSchema": "xs",
     "http://www.w3.org/2001/XMLSchema-instance": "xsi",
     # dublic core
     "http://purl.org/dc/elements/1.1/": "dc",
 }


 def _raise_serialization_error(text):  # pragma: no cover
     raise TypeError(
         "cannot serialize %r (type %s)" % (text, type(text).__name__)
         )


 def _encode(text, encoding):
     try:
         return text.encode(encoding, "xmlcharrefreplace")
     except (TypeError, AttributeError):  # pragma: no cover
         _raise_serialization_error(text)


 def _escape_cdata(text):
     # escape character data
     try:
         # it's worth avoiding do-nothing calls for strings that are
         # shorter than 500 character, or so.  assume that's, by far,
         # the most common case in most applications.
         if "&" in text:
             text = text.replace("&", "&amp;")
         if "<" in text:
             text = text.replace("<", "&lt;")
         if ">" in text:
             text = text.replace(">", "&gt;")
         return text
     except (TypeError, AttributeError):  # pragma: no cover
         _raise_serialization_error(text)


 def _escape_attrib(text):
     # escape attribute value
     try:
         if "&" in text:
             text = text.replace("&", "&amp;")
         if "<" in text:
             text = text.replace("<", "&lt;")
         if ">" in text:
             text = text.replace(">", "&gt;")
         if "\"" in text:
             text = text.replace("\"", "&quot;")
         if "\n" in text:
             text = text.replace("\n", "&#10;")
         return text
     except (TypeError, AttributeError):  # pragma: no cover
         _raise_serialization_error(text)


 def _escape_attrib_html(text):
     # escape attribute value
     try:
         if "&" in text:
             text = text.replace("&", "&amp;")
         if "<" in text:
             text = text.replace("<", "&lt;")
         if ">" in text:
             text = text.replace(">", "&gt;")
         if "\"" in text:
             text = text.replace("\"", "&quot;")
         return text
     except (TypeError, AttributeError):  # pragma: no cover
         _raise_serialization_error(text)


 def _serialize_html(write, elem, qnames, namespaces, format):
     tag = elem.tag
     text = elem.text
     if tag is Comment:
         write("<!--%s-->" % _escape_cdata(text))
     elif tag is ProcessingInstruction:
         write("<?%s?>" % _escape_cdata(text))
     else:
         tag = qnames[tag]
         if tag is None:
             if text:
                 write(_escape_cdata(text))
             for e in elem:
                 _serialize_html(write, e, qnames, None, format)
         else:
             write("<" + tag)
             items = elem.items()
             if items or namespaces:
                 items = sorted(items)  # lexical order
                 for k, v in items:
                     if isinstance(k, QName):
                         k = k.text
                     if isinstance(v, QName):
                         v = qnames[v.text]
                     else:
                         v = _escape_attrib_html(v)
                     if qnames[k] == v and format == 'html':
                         # handle boolean attributes
                         write(" %s" % v)
                     else:
                         write(" %s=\"%s\"" % (qnames[k], v))
                 if namespaces:
                     items = namespaces.items()
                     items.sort(key=lambda x: x[1])  # sort on prefix
                     for v, k in items:
                         if k:
                             k = ":" + k
                         write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v)))
             if format == "xhtml" and tag.lower() in HTML_EMPTY:
                 write(" />")
             else:
                 write(">")
                 if text:
                     if tag.lower() in ["script", "style"]:
                         write(text)
                     else:
                         write(_escape_cdata(text))
                 for e in elem:
                     _serialize_html(write, e, qnames, None, format)
                 if tag.lower() not in HTML_EMPTY:
                     write("</" + tag + ">")
     if elem.tail:
         write(_escape_cdata(elem.tail))


 def _write_html(root,
                 encoding=None,
                 default_namespace=None,
                 format="html"):
     assert root is not None
     data = []
     write = data.append
     qnames, namespaces = _namespaces(root, default_namespace)
     _serialize_html(write, root, qnames, namespaces, format)
     if encoding is None:
         return "".join(data)
     else:
         return _encode("".join(data))


 # --------------------------------------------------------------------
 # serialization support

 def _namespaces(elem, default_namespace=None):
     # identify namespaces used in this tree

     # maps qnames to *encoded* prefix:local names
     qnames = {None: None}

     # maps uri:s to prefixes
     namespaces = {}
     if default_namespace:
         namespaces[default_namespace] = ""

     def add_qname(qname):
         # calculate serialized qname representation
         try:
             if qname[:1] == "{":
                 uri, tag = qname[1:].split("}", 1)
                 prefix = namespaces.get(uri)
                 if prefix is None:
                     prefix = _namespace_map.get(uri)
                     if prefix is None:
                         prefix = "ns%d" % len(namespaces)
                     if prefix != "xml":
                         namespaces[uri] = prefix
                 if prefix:
                     qnames[qname] = "%s:%s" % (prefix, tag)
                 else:
                     qnames[qname] = tag  # default element
             else:
                 if default_namespace:
                     raise ValueError(
                         "cannot use non-qualified names with "
                         "default_namespace option"
                         )
                 qnames[qname] = qname
         except TypeError:  # pragma: no cover
             _raise_serialization_error(qname)

     # populate qname and namespaces table
     try:
         iterate = elem.iter
     except AttributeError:
         iterate = elem.getiterator  # cET compatibility
     for elem in iterate():
         tag = elem.tag
         if isinstance(tag, QName) and tag.text not in qnames:
             add_qname(tag.text)
         elif isinstance(tag, util.string_type):
             if tag not in qnames:
                 add_qname(tag)
         elif tag is not None and tag is not Comment and tag is not PI:
             _raise_serialization_error(tag)
         for key, value in elem.items():
             if isinstance(key, QName):
                 key = key.text
             if key not in qnames:
                 add_qname(key)
             if isinstance(value, QName) and value.text not in qnames:
                 add_qname(value.text)
         text = elem.text
         if isinstance(text, QName) and text.text not in qnames:
             add_qname(text.text)
     return qnames, namespaces


 def to_html_string(element):
     return _write_html(ElementTree(element).getroot(), format="html")


 def to_xhtml_string(element):
     return _write_html(ElementTree(element).getroot(), format="xhtml")
	# markdown/searializers.py
	#
	# Add x/html serialization to Elementree
	# Taken from ElementTree 1.3 preview with slight modifications
	#
	# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.
	#
	# fredrik@pythonware.com
	# http://www.pythonware.com
	#
	# --------------------------------------------------------------------
	# The ElementTree toolkit is
	#
	# Copyright (c) 1999-2007 by Fredrik Lundh
	#
	# By obtaining, using, and/or copying this software and/or its
	# associated documentation, you agree that you have read, understood,
	# and will comply with the following terms and conditions:
	#
	# Permission to use, copy, modify, and distribute this software and
	# its associated documentation for any purpose and without fee is
	# hereby granted, provided that the above copyright notice appears in
	# all copies, and that both that copyright notice and this permission
	# notice appear in supporting documentation, and that the name of
	# Secret Labs AB or the author not be used in advertising or publicity
	# pertaining to distribution of the software without specific, written
	# prior permission.
	#
	# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
	# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
	# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
	# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
	# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
	# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
	# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
	# OF THIS SOFTWARE.
	# --------------------------------------------------------------------


	from __future__ import absolute_import
	from __future__ import unicode_literals
	from . import util
	ElementTree = util.etree.ElementTree
	QName = util.etree.QName
	if hasattr(util.etree, 'test_comment'): # pragma: no cover
	Comment = util.etree.test_comment
	else: # pragma: no cover
	Comment = util.etree.Comment
	PI = util.etree.PI
	ProcessingInstruction = util.etree.ProcessingInstruction

	__all__ = ['to_html_string', 'to_xhtml_string']

	HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
	"img", "input", "isindex", "link", "meta" "param")

	try:
	HTML_EMPTY = set(HTML_EMPTY)
	except NameError: # pragma: no cover
	pass

	_namespace_map = {
	# "well-known" namespace prefixes
	"http://www.w3.org/XML/1998/namespace": "xml",
	"http://www.w3.org/1999/xhtml": "html",
	"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
	"http://schemas.xmlsoap.org/wsdl/": "wsdl",
	# xml schema
	"http://www.w3.org/2001/XMLSchema": "xs",
	"http://www.w3.org/2001/XMLSchema-instance": "xsi",
	# dublic core
	"http://purl.org/dc/elements/1.1/": "dc",
	}


	def _raise_serialization_error(text): # pragma: no cover
	raise TypeError(
	"cannot serialize %r (type %s)" % (text, type(text).__name__)
	)


	def _encode(text, encoding):
	try:
	return text.encode(encoding, "xmlcharrefreplace")
	except (TypeError, AttributeError): # pragma: no cover
	_raise_serialization_error(text)


	def _escape_cdata(text):
	# escape character data
	try:
	# it's worth avoiding do-nothing calls for strings that are
	# shorter than 500 character, or so. assume that's, by far,
	# the most common case in most applications.
	if "&" in text:
	text = text.replace("&", "&")
	if "<" in text:
	text = text.replace("<", "<")
	if ">" in text:
	text = text.replace(">", ">")
	return text
	except (TypeError, AttributeError): # pragma: no cover
	_raise_serialization_error(text)


	def _escape_attrib(text):
	# escape attribute value
	try:
	if "&" in text:
	text = text.replace("&", "&")
	if "<" in text:
	text = text.replace("<", "<")
	if ">" in text:
	text = text.replace(">", ">")
	if "\"" in text:
	text = text.replace("\"", """)
	if "\n" in text:
	text = text.replace("\n", " ")
	return text
	except (TypeError, AttributeError): # pragma: no cover
	_raise_serialization_error(text)


	def _escape_attrib_html(text):
	# escape attribute value
	try:
	if "&" in text:
	text = text.replace("&", "&")
	if "<" in text:
	text = text.replace("<", "<")
	if ">" in text:
	text = text.replace(">", ">")
	if "\"" in text:
	text = text.replace("\"", """)
	return text
	except (TypeError, AttributeError): # pragma: no cover
	_raise_serialization_error(text)


	def _serialize_html(write, elem, qnames, namespaces, format):
	tag = elem.tag
	text = elem.text
	if tag is Comment:
	write("<!--%s-->" % _escape_cdata(text))
	elif tag is ProcessingInstruction:
	write("<?%s?>" % _escape_cdata(text))
	else:
	tag = qnames[tag]
	if tag is None:
	if text:
	write(_escape_cdata(text))
	for e in elem:
	_serialize_html(write, e, qnames, None, format)
	else:
	write("<" + tag)
	items = elem.items()
	if items or namespaces:
	items = sorted(items) # lexical order
	for k, v in items:
	if isinstance(k, QName):
	k = k.text
	if isinstance(v, QName):
	v = qnames[v.text]
	else:
	v = _escape_attrib_html(v)
	if qnames[k] == v and format == 'html':
	# handle boolean attributes
	write(" %s" % v)
	else:
	write(" %s=\"%s\"" % (qnames[k], v))
	if namespaces:
	items = namespaces.items()
	items.sort(key=lambda x: x[1]) # sort on prefix
	for v, k in items:
	if k:
	k = ":" + k
	write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v)))
	if format == "xhtml" and tag.lower() in HTML_EMPTY:
	write(" />")
	else:
	write(">")
	if text:
	if tag.lower() in ["script", "style"]:
	write(text)
	else:
	write(_escape_cdata(text))
	for e in elem:
	_serialize_html(write, e, qnames, None, format)
	if tag.lower() not in HTML_EMPTY:
	write("</" + tag + ">")
	if elem.tail:
	write(_escape_cdata(elem.tail))


	def _write_html(root,
	encoding=None,
	default_namespace=None,
	format="html"):
	assert root is not None
	data = []
	write = data.append
	qnames, namespaces = _namespaces(root, default_namespace)
	_serialize_html(write, root, qnames, namespaces, format)
	if encoding is None:
	return "".join(data)
	else:
	return _encode("".join(data))


	# --------------------------------------------------------------------
	# serialization support

	def _namespaces(elem, default_namespace=None):
	# identify namespaces used in this tree

	# maps qnames to encoded prefix:local names
	qnames = {None: None}

	# maps uri:s to prefixes
	namespaces = {}
	if default_namespace:
	namespaces[default_namespace] = ""

	def add_qname(qname):
	# calculate serialized qname representation
	try:
	if qname[:1] == "{":
	uri, tag = qname[1:].split("}", 1)
	prefix = namespaces.get(uri)
	if prefix is None:
	prefix = _namespace_map.get(uri)
	if prefix is None:
	prefix = "ns%d" % len(namespaces)
	if prefix != "xml":
	namespaces[uri] = prefix
	if prefix:
	qnames[qname] = "%s:%s" % (prefix, tag)
	else:
	qnames[qname] = tag # default element
	else:
	if default_namespace:
	raise ValueError(
	"cannot use non-qualified names with "
	"default_namespace option"
	)
	qnames[qname] = qname
	except TypeError: # pragma: no cover
	_raise_serialization_error(qname)

	# populate qname and namespaces table
	try:
	iterate = elem.iter
	except AttributeError:
	iterate = elem.getiterator # cET compatibility
	for elem in iterate():
	tag = elem.tag
	if isinstance(tag, QName) and tag.text not in qnames:
	add_qname(tag.text)
	elif isinstance(tag, util.string_type):
	if tag not in qnames:
	add_qname(tag)
	elif tag is not None and tag is not Comment and tag is not PI:
	_raise_serialization_error(tag)
	for key, value in elem.items():
	if isinstance(key, QName):
	key = key.text
	if key not in qnames:
	add_qname(key)
	if isinstance(value, QName) and value.text not in qnames:
	add_qname(value.text)
	text = elem.text
	if isinstance(text, QName) and text.text not in qnames:
	add_qname(text.text)
	return qnames, namespaces


	def to_html_string(element):
	return _write_html(ElementTree(element).getroot(), format="html")


	def to_xhtml_string(element):
	return _write_html(ElementTree(element).getroot(), format="xhtml")