Doc/tools/extensions/grammar_snippet.py - external/github.com/python/cpython - Git at Google

 """Support for documenting Python's grammar."""

 from __future__ import annotations

 import re
 from typing import TYPE_CHECKING

 from docutils import nodes
 from docutils.parsers.rst import directives
 from sphinx import addnodes
 from sphinx.domains.std import token_xrefs
 from sphinx.util.docutils import SphinxDirective
 from sphinx.util.nodes import make_id

 if TYPE_CHECKING:
     from collections.abc import Iterable, Iterator, Sequence
     from typing import Any, Final

     from docutils.nodes import Node
     from sphinx.application import Sphinx
     from sphinx.util.typing import ExtensionMetadata


 class snippet_string_node(nodes.inline):  # noqa: N801 (snake_case is fine)
     """Node for a string literal in a grammar snippet."""

     def __init__(
         self,
         rawsource: str = '',
         text: str = '',
         *children: Node,
         **attributes: Any,
     ) -> None:
         super().__init__(rawsource, text, *children, **attributes)
         # Use the Pygments highlight class for `Literal.String.Other`
         self['classes'].append('sx')


 class GrammarSnippetBase(SphinxDirective):
     """Common functionality for GrammarSnippetDirective & CompatProductionList."""

     # The option/argument handling is left to the individual classes.

     grammar_re: Final = re.compile(
         r"""
             (?P<rule_name>^[a-zA-Z0-9_]+)     # identifier at start of line
             (?=:)                             # ... followed by a colon
         |
             (?P<rule_ref>`[^\s`]+`)           # identifier in backquotes
         |
             (?P<single_quoted>'[^']*')        # string in 'quotes'
         |
             (?P<double_quoted>"[^"]*")        # string in "quotes"
         """,
         re.VERBOSE,
     )

     def make_grammar_snippet(
         self, options: dict[str, Any], content: Sequence[str]
     ) -> list[addnodes.productionlist]:
         """Create a literal block from options & content."""

         group_name = options['group']
         node_location = self.get_location()
         production_nodes = []
         for rawsource, production_defs in self.production_definitions(content):
             production = self.make_production(
                 rawsource,
                 production_defs,
                 group_name=group_name,
                 location=node_location,
             )
             production_nodes.append(production)

         node = addnodes.productionlist(
             '',
             *production_nodes,
             support_smartquotes=False,
             classes=['highlight'],
         )
         self.set_source_info(node)
         return [node]

     def production_definitions(
         self, lines: Iterable[str], /
     ) -> Iterator[tuple[str, list[tuple[str, str]]]]:
         """Yield pairs of rawsource and production content dicts."""
         production_lines: list[str] = []
         production_content: list[tuple[str, str]] = []
         for line in lines:
             # If this line is the start of a new rule (text in the column 1),
             # emit the current production and start a new one.
             if not line[:1].isspace():
                 rawsource = '\n'.join(production_lines)
                 production_lines.clear()
                 if production_content:
                     yield rawsource, production_content
                     production_content = []

             # Append the current line for the raw source
             production_lines.append(line)

             # Parse the line into constituent parts
             last_pos = 0
             for match in self.grammar_re.finditer(line):
                 # Handle text between matches
                 if match.start() > last_pos:
                     unmatched_text = line[last_pos : match.start()]
                     production_content.append(('text', unmatched_text))
                 last_pos = match.end()

                 # Handle matches.
                 # After filtering None (non-matches), exactly one groupdict()
                 # entry should remain.
                 [(re_group_name, content)] = (
                     (re_group_name, content)
                     for re_group_name, content in match.groupdict().items()
                     if content is not None
                 )
                 production_content.append((re_group_name, content))
             production_content.append(('text', line[last_pos:] + '\n'))

         # Emit the final production
         if production_content:
             rawsource = '\n'.join(production_lines)
             yield rawsource, production_content

     def make_production(
         self,
         rawsource: str,
         production_defs: list[tuple[str, str]],
         *,
         group_name: str,
         location: str,
     ) -> addnodes.production:
         """Create a production node from a list of parts."""
         production_node = addnodes.production(rawsource)
         for re_group_name, content in production_defs:
             match re_group_name:
                 case 'rule_name':
                     production_node += self.make_name_target(
                         name=content,
                         production_group=group_name,
                         location=location,
                     )
                 case 'rule_ref':
                     production_node += token_xrefs(content, group_name)
                 case 'single_quoted' | 'double_quoted':
                     production_node += snippet_string_node('', content)
                 case 'text':
                     production_node += nodes.Text(content)
                 case _:
                     raise ValueError(f'unhandled match: {re_group_name!r}')
         return production_node

     def make_name_target(
         self,
         *,
         name: str,
         production_group: str,
         location: str,
     ) -> addnodes.literal_strong:
         """Make a link target for the given production."""

         # Cargo-culted magic to make `name_node` a link target
         # similar to Sphinx `production`.
         # This needs to be the same as what Sphinx does
         # to avoid breaking existing links.

         name_node = addnodes.literal_strong(name, name)
         prefix = f'grammar-token-{production_group}'
         node_id = make_id(self.env, self.state.document, prefix, name)
         name_node['ids'].append(node_id)
         self.state.document.note_implicit_target(name_node, name_node)
         obj_name = f'{production_group}:{name}' if production_group else name
         std = self.env.domains.standard_domain
         std.note_object('token', obj_name, node_id, location=location)
         return name_node


 class GrammarSnippetDirective(GrammarSnippetBase):
     """Transform a grammar-snippet directive to a Sphinx literal_block

     That is, turn something like:

         .. grammar-snippet:: file
            :group: python-grammar

            file: (NEWLINE | statement)*

     into something similar to Sphinx productionlist, but better suited
     for our needs:
     - Instead of `::=`, use a colon, as in `Grammar/python.gram`
     - Show the listing almost as is, with no auto-aligment.
       The only special character is the backtick, which marks tokens.

     Unlike Sphinx's productionlist, this directive supports options.
     The "group" must be given as a named option.
     The content must be preceded by a blank line (like with most ReST
     directives).
     """

     has_content = True
     option_spec = {
         'group': directives.unchanged_required,
     }

     # We currently ignore arguments.
     required_arguments = 0
     optional_arguments = 1
     final_argument_whitespace = True

     def run(self) -> list[addnodes.productionlist]:
         return self.make_grammar_snippet(self.options, self.content)


 class CompatProductionList(GrammarSnippetBase):
     """Create grammar snippets from reST productionlist syntax

     This is intended to be a transitional directive, used while we switch
     from productionlist to grammar-snippet.
     It makes existing docs that use the ReST syntax look like grammar-snippet,
     as much as possible.
     """

     has_content = False
     required_arguments = 1
     optional_arguments = 0
     final_argument_whitespace = True
     option_spec = {}

     def run(self) -> list[addnodes.productionlist]:
         # The "content" of a productionlist is actually the first and only
         # argument. The first line is the group; the rest is the content lines.
         lines = self.arguments[0].splitlines()
         group = lines[0].strip()
         options = {'group': group}
         # We assume there's a colon in each line; align on it.
         align_column = max(line.index(':') for line in lines[1:]) + 1
         content = []
         for line in lines[1:]:
             rule_name, _colon, text = line.partition(':')
             rule_name = rule_name.strip()
             if rule_name:
                 name_part = rule_name + ':'
             else:
                 name_part = ''
             content.append(f'{name_part:<{align_column}}{text}')
         return self.make_grammar_snippet(options, content)


 def setup(app: Sphinx) -> ExtensionMetadata:
     app.add_directive('grammar-snippet', GrammarSnippetDirective)
     app.add_directive_to_domain(
         'std', 'productionlist', CompatProductionList, override=True
     )
     return {
         'version': '1.0',
         'parallel_read_safe': True,
         'parallel_write_safe': True,
     }
	"""Support for documenting Python's grammar."""

	from __future__ import annotations

	import re
	from typing import TYPE_CHECKING

	from docutils import nodes
	from docutils.parsers.rst import directives
	from sphinx import addnodes
	from sphinx.domains.std import token_xrefs
	from sphinx.util.docutils import SphinxDirective
	from sphinx.util.nodes import make_id

	if TYPE_CHECKING:
	from collections.abc import Iterable, Iterator, Sequence
	from typing import Any, Final

	from docutils.nodes import Node
	from sphinx.application import Sphinx
	from sphinx.util.typing import ExtensionMetadata


	class snippet_string_node(nodes.inline): # noqa: N801 (snake_case is fine)
	"""Node for a string literal in a grammar snippet."""

	def __init__(
	self,
	rawsource: str = '',
	text: str = '',
	*children: Node,
	**attributes: Any,
	) -> None:
	super().__init__(rawsource, text, children, *attributes)
	# Use the Pygments highlight class for `Literal.String.Other`
	self['classes'].append('sx')


	class GrammarSnippetBase(SphinxDirective):
	"""Common functionality for GrammarSnippetDirective & CompatProductionList."""

	# The option/argument handling is left to the individual classes.

	grammar_re: Final = re.compile(
	r"""
	(?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line
	(?=:) # ... followed by a colon
	\|
	(?P<rule_ref>`[^\s`]+`) # identifier in backquotes
	\|
	(?P<single_quoted>'[^']*') # string in 'quotes'
	\|
	(?P<double_quoted>"[^"]*") # string in "quotes"
	""",
	re.VERBOSE,
	)

	def make_grammar_snippet(
	self, options: dict[str, Any], content: Sequence[str]
	) -> list[addnodes.productionlist]:
	"""Create a literal block from options & content."""

	group_name = options['group']
	node_location = self.get_location()
	production_nodes = []
	for rawsource, production_defs in self.production_definitions(content):
	production = self.make_production(
	rawsource,
	production_defs,
	group_name=group_name,
	location=node_location,
	)
	production_nodes.append(production)

	node = addnodes.productionlist(
	'',
	*production_nodes,
	support_smartquotes=False,
	classes=['highlight'],
	)
	self.set_source_info(node)
	return [node]

	def production_definitions(
	self, lines: Iterable[str], /
	) -> Iterator[tuple[str, list[tuple[str, str]]]]:
	"""Yield pairs of rawsource and production content dicts."""
	production_lines: list[str] = []
	production_content: list[tuple[str, str]] = []
	for line in lines:
	# If this line is the start of a new rule (text in the column 1),
	# emit the current production and start a new one.
	if not line[:1].isspace():
	rawsource = '\n'.join(production_lines)
	production_lines.clear()
	if production_content:
	yield rawsource, production_content
	production_content = []

	# Append the current line for the raw source
	production_lines.append(line)

	# Parse the line into constituent parts
	last_pos = 0
	for match in self.grammar_re.finditer(line):
	# Handle text between matches
	if match.start() > last_pos:
	unmatched_text = line[last_pos : match.start()]
	production_content.append(('text', unmatched_text))
	last_pos = match.end()

	# Handle matches.
	# After filtering None (non-matches), exactly one groupdict()
	# entry should remain.
	[(re_group_name, content)] = (
	(re_group_name, content)
	for re_group_name, content in match.groupdict().items()
	if content is not None
	)
	production_content.append((re_group_name, content))
	production_content.append(('text', line[last_pos:] + '\n'))

	# Emit the final production
	if production_content:
	rawsource = '\n'.join(production_lines)
	yield rawsource, production_content

	def make_production(
	self,
	rawsource: str,
	production_defs: list[tuple[str, str]],
	*,
	group_name: str,
	location: str,
	) -> addnodes.production:
	"""Create a production node from a list of parts."""
	production_node = addnodes.production(rawsource)
	for re_group_name, content in production_defs:
	match re_group_name:
	case 'rule_name':
	production_node += self.make_name_target(
	name=content,
	production_group=group_name,
	location=location,
	)
	case 'rule_ref':
	production_node += token_xrefs(content, group_name)
	case 'single_quoted' \| 'double_quoted':
	production_node += snippet_string_node('', content)
	case 'text':
	production_node += nodes.Text(content)
	case _:
	raise ValueError(f'unhandled match: {re_group_name!r}')
	return production_node

	def make_name_target(
	self,
	*,
	name: str,
	production_group: str,
	location: str,
	) -> addnodes.literal_strong:
	"""Make a link target for the given production."""

	# Cargo-culted magic to make `name_node` a link target
	# similar to Sphinx `production`.
	# This needs to be the same as what Sphinx does
	# to avoid breaking existing links.

	name_node = addnodes.literal_strong(name, name)
	prefix = f'grammar-token-{production_group}'
	node_id = make_id(self.env, self.state.document, prefix, name)
	name_node['ids'].append(node_id)
	self.state.document.note_implicit_target(name_node, name_node)
	obj_name = f'{production_group}:{name}' if production_group else name
	std = self.env.domains.standard_domain
	std.note_object('token', obj_name, node_id, location=location)
	return name_node


	class GrammarSnippetDirective(GrammarSnippetBase):
	"""Transform a grammar-snippet directive to a Sphinx literal_block

	That is, turn something like:

	.. grammar-snippet:: file
	:group: python-grammar

	file: (NEWLINE \| statement)*

	into something similar to Sphinx productionlist, but better suited
	for our needs:
	- Instead of `::=`, use a colon, as in `Grammar/python.gram`
	- Show the listing almost as is, with no auto-aligment.
	The only special character is the backtick, which marks tokens.

	Unlike Sphinx's productionlist, this directive supports options.
	The "group" must be given as a named option.
	The content must be preceded by a blank line (like with most ReST
	directives).
	"""

	has_content = True
	option_spec = {
	'group': directives.unchanged_required,
	}

	# We currently ignore arguments.
	required_arguments = 0
	optional_arguments = 1
	final_argument_whitespace = True

	def run(self) -> list[addnodes.productionlist]:
	return self.make_grammar_snippet(self.options, self.content)


	class CompatProductionList(GrammarSnippetBase):
	"""Create grammar snippets from reST productionlist syntax

	This is intended to be a transitional directive, used while we switch
	from productionlist to grammar-snippet.
	It makes existing docs that use the ReST syntax look like grammar-snippet,
	as much as possible.
	"""

	has_content = False
	required_arguments = 1
	optional_arguments = 0
	final_argument_whitespace = True
	option_spec = {}

	def run(self) -> list[addnodes.productionlist]:
	# The "content" of a productionlist is actually the first and only
	# argument. The first line is the group; the rest is the content lines.
	lines = self.arguments[0].splitlines()
	group = lines[0].strip()
	options = {'group': group}
	# We assume there's a colon in each line; align on it.
	align_column = max(line.index(':') for line in lines[1:]) + 1
	content = []
	for line in lines[1:]:
	rule_name, _colon, text = line.partition(':')
	rule_name = rule_name.strip()
	if rule_name:
	name_part = rule_name + ':'
	else:
	name_part = ''
	content.append(f'{name_part:<{align_column}}{text}')
	return self.make_grammar_snippet(options, content)


	def setup(app: Sphinx) -> ExtensionMetadata:
	app.add_directive('grammar-snippet', GrammarSnippetDirective)
	app.add_directive_to_domain(
	'std', 'productionlist', CompatProductionList, override=True
	)
	return {
	'version': '1.0',
	'parallel_read_safe': True,
	'parallel_write_safe': True,
	}