| ''' |
| Abbreviation Extension for Python-Markdown |
| ========================================== |
| |
| This extension adds abbreviation handling to Python-Markdown. |
| |
| See <https://Python-Markdown.github.io/extensions/abbreviations> |
| for documentation. |
| |
| Oringinal code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/) and |
| [Seemant Kulleen](http://www.kulleen.org/) |
| |
| All changes Copyright 2008-2014 The Python Markdown Project |
| |
| License: [BSD](https://opensource.org/licenses/bsd-license.php) |
| |
| ''' |
| |
| from . import Extension |
| from ..blockprocessors import BlockProcessor |
| from ..inlinepatterns import InlineProcessor |
| from ..util import AtomicString |
| import re |
| import xml.etree.ElementTree as etree |
| |
| |
| class AbbrExtension(Extension): |
| """ Abbreviation Extension for Python-Markdown. """ |
| |
| def extendMarkdown(self, md): |
| """ Insert AbbrPreprocessor before ReferencePreprocessor. """ |
| md.parser.blockprocessors.register(AbbrPreprocessor(md.parser), 'abbr', 16) |
| |
| |
| class AbbrPreprocessor(BlockProcessor): |
| """ Abbreviation Preprocessor - parse text for abbr references. """ |
| |
| RE = re.compile(r'^[*]\[(?P<abbr>[^\]]*)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE) |
| |
| def test(self, parent, block): |
| return True |
| |
| def run(self, parent, blocks): |
| ''' |
| Find and remove all Abbreviation references from the text. |
| Each reference is set as a new AbbrPattern in the markdown instance. |
| |
| ''' |
| block = blocks.pop(0) |
| m = self.RE.search(block) |
| if m: |
| abbr = m.group('abbr').strip() |
| title = m.group('title').strip() |
| self.parser.md.inlinePatterns.register( |
| AbbrInlineProcessor(self._generate_pattern(abbr), title), 'abbr-%s' % abbr, 2 |
| ) |
| if block[m.end():].strip(): |
| # Add any content after match back to blocks as separate block |
| blocks.insert(0, block[m.end():].lstrip('\n')) |
| if block[:m.start()].strip(): |
| # Add any content before match back to blocks as separate block |
| blocks.insert(0, block[:m.start()].rstrip('\n')) |
| return True |
| # No match. Restore block. |
| blocks.insert(0, block) |
| return False |
| |
| def _generate_pattern(self, text): |
| ''' |
| Given a string, returns an regex pattern to match that string. |
| |
| 'HTML' -> r'(?P<abbr>[H][T][M][L])' |
| |
| Note: we force each char as a literal match (in brackets) as we don't |
| know what they will be beforehand. |
| |
| ''' |
| chars = list(text) |
| for i in range(len(chars)): |
| chars[i] = r'[%s]' % chars[i] |
| return r'(?P<abbr>\b%s\b)' % (r''.join(chars)) |
| |
| |
| class AbbrInlineProcessor(InlineProcessor): |
| """ Abbreviation inline pattern. """ |
| |
| def __init__(self, pattern, title): |
| super().__init__(pattern) |
| self.title = title |
| |
| def handleMatch(self, m, data): |
| abbr = etree.Element('abbr') |
| abbr.text = AtomicString(m.group('abbr')) |
| abbr.set('title', self.title) |
| return abbr, m.start(0), m.end(0) |
| |
| |
| def makeExtension(**kwargs): # pragma: no cover |
| return AbbrExtension(**kwargs) |