| """Support for documenting Python's grammar.""" |
| |
| from __future__ import annotations |
| |
| import re |
| from typing import TYPE_CHECKING |
| |
| from docutils import nodes |
| from docutils.parsers.rst import directives |
| from sphinx import addnodes |
| from sphinx.domains.std import token_xrefs |
| from sphinx.util.docutils import SphinxDirective |
| from sphinx.util.nodes import make_id |
| |
| if TYPE_CHECKING: |
| from collections.abc import Iterable, Iterator, Sequence |
| from typing import Any, Final |
| |
| from docutils.nodes import Node |
| from sphinx.application import Sphinx |
| from sphinx.util.typing import ExtensionMetadata |
| |
| |
| class snippet_string_node(nodes.inline): # noqa: N801 (snake_case is fine) |
| """Node for a string literal in a grammar snippet.""" |
| |
| def __init__( |
| self, |
| rawsource: str = '', |
| text: str = '', |
| *children: Node, |
| **attributes: Any, |
| ) -> None: |
| super().__init__(rawsource, text, *children, **attributes) |
| # Use the Pygments highlight class for `Literal.String.Other` |
| self['classes'].append('sx') |
| |
| |
| class GrammarSnippetBase(SphinxDirective): |
| """Common functionality for GrammarSnippetDirective & CompatProductionList.""" |
| |
| # The option/argument handling is left to the individual classes. |
| |
| grammar_re: Final = re.compile( |
| r""" |
| (?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line |
| (?=:) # ... followed by a colon |
| | |
| (?P<rule_ref>`[^\s`]+`) # identifier in backquotes |
| | |
| (?P<single_quoted>'[^']*') # string in 'quotes' |
| | |
| (?P<double_quoted>"[^"]*") # string in "quotes" |
| """, |
| re.VERBOSE, |
| ) |
| |
| def make_grammar_snippet( |
| self, options: dict[str, Any], content: Sequence[str] |
| ) -> list[addnodes.productionlist]: |
| """Create a literal block from options & content.""" |
| |
| group_name = options['group'] |
| node_location = self.get_location() |
| production_nodes = [] |
| for rawsource, production_defs in self.production_definitions(content): |
| production = self.make_production( |
| rawsource, |
| production_defs, |
| group_name=group_name, |
| location=node_location, |
| ) |
| production_nodes.append(production) |
| |
| node = addnodes.productionlist( |
| '', |
| *production_nodes, |
| support_smartquotes=False, |
| classes=['highlight'], |
| ) |
| self.set_source_info(node) |
| return [node] |
| |
| def production_definitions( |
| self, lines: Iterable[str], / |
| ) -> Iterator[tuple[str, list[tuple[str, str]]]]: |
| """Yield pairs of rawsource and production content dicts.""" |
| production_lines: list[str] = [] |
| production_content: list[tuple[str, str]] = [] |
| for line in lines: |
| # If this line is the start of a new rule (text in the column 1), |
| # emit the current production and start a new one. |
| if not line[:1].isspace(): |
| rawsource = '\n'.join(production_lines) |
| production_lines.clear() |
| if production_content: |
| yield rawsource, production_content |
| production_content = [] |
| |
| # Append the current line for the raw source |
| production_lines.append(line) |
| |
| # Parse the line into constituent parts |
| last_pos = 0 |
| for match in self.grammar_re.finditer(line): |
| # Handle text between matches |
| if match.start() > last_pos: |
| unmatched_text = line[last_pos : match.start()] |
| production_content.append(('text', unmatched_text)) |
| last_pos = match.end() |
| |
| # Handle matches. |
| # After filtering None (non-matches), exactly one groupdict() |
| # entry should remain. |
| [(re_group_name, content)] = ( |
| (re_group_name, content) |
| for re_group_name, content in match.groupdict().items() |
| if content is not None |
| ) |
| production_content.append((re_group_name, content)) |
| production_content.append(('text', line[last_pos:] + '\n')) |
| |
| # Emit the final production |
| if production_content: |
| rawsource = '\n'.join(production_lines) |
| yield rawsource, production_content |
| |
| def make_production( |
| self, |
| rawsource: str, |
| production_defs: list[tuple[str, str]], |
| *, |
| group_name: str, |
| location: str, |
| ) -> addnodes.production: |
| """Create a production node from a list of parts.""" |
| production_node = addnodes.production(rawsource) |
| for re_group_name, content in production_defs: |
| match re_group_name: |
| case 'rule_name': |
| production_node += self.make_name_target( |
| name=content, |
| production_group=group_name, |
| location=location, |
| ) |
| case 'rule_ref': |
| production_node += token_xrefs(content, group_name) |
| case 'single_quoted' | 'double_quoted': |
| production_node += snippet_string_node('', content) |
| case 'text': |
| production_node += nodes.Text(content) |
| case _: |
| raise ValueError(f'unhandled match: {re_group_name!r}') |
| return production_node |
| |
| def make_name_target( |
| self, |
| *, |
| name: str, |
| production_group: str, |
| location: str, |
| ) -> addnodes.literal_strong: |
| """Make a link target for the given production.""" |
| |
| # Cargo-culted magic to make `name_node` a link target |
| # similar to Sphinx `production`. |
| # This needs to be the same as what Sphinx does |
| # to avoid breaking existing links. |
| |
| name_node = addnodes.literal_strong(name, name) |
| prefix = f'grammar-token-{production_group}' |
| node_id = make_id(self.env, self.state.document, prefix, name) |
| name_node['ids'].append(node_id) |
| self.state.document.note_implicit_target(name_node, name_node) |
| obj_name = f'{production_group}:{name}' if production_group else name |
| std = self.env.domains.standard_domain |
| std.note_object('token', obj_name, node_id, location=location) |
| return name_node |
| |
| |
| class GrammarSnippetDirective(GrammarSnippetBase): |
| """Transform a grammar-snippet directive to a Sphinx literal_block |
| |
| That is, turn something like: |
| |
| .. grammar-snippet:: file |
| :group: python-grammar |
| |
| file: (NEWLINE | statement)* |
| |
| into something similar to Sphinx productionlist, but better suited |
| for our needs: |
| - Instead of `::=`, use a colon, as in `Grammar/python.gram` |
| - Show the listing almost as is, with no auto-aligment. |
| The only special character is the backtick, which marks tokens. |
| |
| Unlike Sphinx's productionlist, this directive supports options. |
| The "group" must be given as a named option. |
| The content must be preceded by a blank line (like with most ReST |
| directives). |
| """ |
| |
| has_content = True |
| option_spec = { |
| 'group': directives.unchanged_required, |
| } |
| |
| # We currently ignore arguments. |
| required_arguments = 0 |
| optional_arguments = 1 |
| final_argument_whitespace = True |
| |
| def run(self) -> list[addnodes.productionlist]: |
| return self.make_grammar_snippet(self.options, self.content) |
| |
| |
| class CompatProductionList(GrammarSnippetBase): |
| """Create grammar snippets from reST productionlist syntax |
| |
| This is intended to be a transitional directive, used while we switch |
| from productionlist to grammar-snippet. |
| It makes existing docs that use the ReST syntax look like grammar-snippet, |
| as much as possible. |
| """ |
| |
| has_content = False |
| required_arguments = 1 |
| optional_arguments = 0 |
| final_argument_whitespace = True |
| option_spec = {} |
| |
| def run(self) -> list[addnodes.productionlist]: |
| # The "content" of a productionlist is actually the first and only |
| # argument. The first line is the group; the rest is the content lines. |
| lines = self.arguments[0].splitlines() |
| group = lines[0].strip() |
| options = {'group': group} |
| # We assume there's a colon in each line; align on it. |
| align_column = max(line.index(':') for line in lines[1:]) + 1 |
| content = [] |
| for line in lines[1:]: |
| rule_name, _colon, text = line.partition(':') |
| rule_name = rule_name.strip() |
| if rule_name: |
| name_part = rule_name + ':' |
| else: |
| name_part = '' |
| content.append(f'{name_part:<{align_column}}{text}') |
| return self.make_grammar_snippet(options, content) |
| |
| |
| def setup(app: Sphinx) -> ExtensionMetadata: |
| app.add_directive('grammar-snippet', GrammarSnippetDirective) |
| app.add_directive_to_domain( |
| 'std', 'productionlist', CompatProductionList, override=True |
| ) |
| return { |
| 'version': '1.0', |
| 'parallel_read_safe': True, |
| 'parallel_write_safe': True, |
| } |