third_party/Python-Markdown/markdown/preprocessors.py - chromium/src.git - Git at Google

 """
 Python Markdown

 A Python implementation of John Gruber's Markdown.

 Documentation: https://python-markdown.github.io/
 GitHub: https://github.com/Python-Markdown/markdown/
 PyPI: https://pypi.org/project/Markdown/

 Started by Manfred Stienstra (http://www.dwerg.net/).
 Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
 Currently maintained by Waylan Limberg (https://github.com/waylan),
 Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).

 Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
 Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
 Copyright 2004 Manfred Stienstra (the original version)

 License: BSD (see LICENSE.md for details).

 PRE-PROCESSORS
 =============================================================================

 Preprocessors work on source text before we start doing anything too
 complicated.
 """

 from . import util
 from .htmlparser import HTMLExtractor
 import re


 def build_preprocessors(md, **kwargs):
     """ Build the default set of preprocessors used by Markdown. """
     preprocessors = util.Registry()
     preprocessors.register(NormalizeWhitespace(md), 'normalize_whitespace', 30)
     preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20)
     return preprocessors


 class Preprocessor(util.Processor):
     """
     Preprocessors are run after the text is broken into lines.

     Each preprocessor implements a "run" method that takes a pointer to a
     list of lines of the document, modifies it as necessary and returns
     either the same pointer or a pointer to a new list.

     Preprocessors must extend markdown.Preprocessor.

     """
     def run(self, lines):
         """
         Each subclass of Preprocessor should override the `run` method, which
         takes the document as a list of strings split by newlines and returns
         the (possibly modified) list of lines.

         """
         pass  # pragma: no cover


 class NormalizeWhitespace(Preprocessor):
     """ Normalize whitespace for consistent parsing. """

     def run(self, lines):
         source = '\n'.join(lines)
         source = source.replace(util.STX, "").replace(util.ETX, "")
         source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
         source = source.expandtabs(self.md.tab_length)
         source = re.sub(r'(?<=\n) +\n', '\n', source)
         return source.split('\n')


 class HtmlBlockPreprocessor(Preprocessor):
     """Remove html blocks from the text and store them for later retrieval."""

     def run(self, lines):
         source = '\n'.join(lines)
         parser = HTMLExtractor(self.md)
         parser.feed(source)
         parser.close()
         return ''.join(parser.cleandoc).split('\n')
	"""
	Python Markdown

	A Python implementation of John Gruber's Markdown.

	Documentation: https://python-markdown.github.io/
	GitHub: https://github.com/Python-Markdown/markdown/
	PyPI: https://pypi.org/project/Markdown/

	Started by Manfred Stienstra (http://www.dwerg.net/).
	Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
	Currently maintained by Waylan Limberg (https://github.com/waylan),
	Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).

	Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
	Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
	Copyright 2004 Manfred Stienstra (the original version)

	License: BSD (see LICENSE.md for details).

	PRE-PROCESSORS
	=============================================================================

	Preprocessors work on source text before we start doing anything too
	complicated.
	"""

	from . import util
	from .htmlparser import HTMLExtractor
	import re


	def build_preprocessors(md, **kwargs):
	""" Build the default set of preprocessors used by Markdown. """
	preprocessors = util.Registry()
	preprocessors.register(NormalizeWhitespace(md), 'normalize_whitespace', 30)
	preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20)
	return preprocessors


	class Preprocessor(util.Processor):
	"""
	Preprocessors are run after the text is broken into lines.

	Each preprocessor implements a "run" method that takes a pointer to a
	list of lines of the document, modifies it as necessary and returns
	either the same pointer or a pointer to a new list.

	Preprocessors must extend markdown.Preprocessor.

	"""
	def run(self, lines):
	"""
	Each subclass of Preprocessor should override the `run` method, which
	takes the document as a list of strings split by newlines and returns
	the (possibly modified) list of lines.

	"""
	pass # pragma: no cover


	class NormalizeWhitespace(Preprocessor):
	""" Normalize whitespace for consistent parsing. """

	def run(self, lines):
	source = '\n'.join(lines)
	source = source.replace(util.STX, "").replace(util.ETX, "")
	source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
	source = source.expandtabs(self.md.tab_length)
	source = re.sub(r'(?<=\n) +\n', '\n', source)
	return source.split('\n')


	class HtmlBlockPreprocessor(Preprocessor):
	"""Remove html blocks from the text and store them for later retrieval."""

	def run(self, lines):
	source = '\n'.join(lines)
	parser = HTMLExtractor(self.md)
	parser.feed(source)
	parser.close()
	return ''.join(parser.cleandoc).split('\n')