| from typing import Any |
| |
| from pegen import grammar |
| from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule |
| |
| |
| class ValidationError(Exception): |
| pass |
| |
| |
| class GrammarValidator(GrammarVisitor): |
| def __init__(self, grammar: grammar.Grammar) -> None: |
| self.grammar = grammar |
| self.rulename: str | None = None |
| |
| def validate_rule(self, rulename: str, node: Rule) -> None: |
| self.rulename = rulename |
| self.visit(node) |
| self.rulename = None |
| |
| |
| class SubRuleValidator(GrammarValidator): |
| def visit_Rhs(self, node: Rhs) -> None: |
| for index, alt in enumerate(node.alts): |
| alts_to_consider = node.alts[index + 1 :] |
| for other_alt in alts_to_consider: |
| self.check_intersection(alt, other_alt) |
| |
| def check_intersection(self, first_alt: Alt, second_alt: Alt) -> None: |
| if str(second_alt).startswith(str(first_alt)): |
| raise ValidationError( |
| f"In {self.rulename} there is an alternative that will " |
| f"never be visited:\n{second_alt}" |
| ) |
| |
| |
| class RaiseRuleValidator(GrammarValidator): |
| def visit_Alt(self, node: Alt) -> None: |
| if self.rulename and self.rulename.startswith('invalid'): |
| # raising is allowed in invalid rules |
| return |
| if node.action and 'RAISE_SYNTAX_ERROR' in node.action: |
| raise ValidationError( |
| f"In {self.rulename!r} there is an alternative that contains " |
| f"RAISE_SYNTAX_ERROR; this is only allowed in invalid_ rules" |
| ) |
| |
| |
| class CutValidator(GrammarValidator): |
| """Fail if Cut is not directly in a rule. |
| |
| For simplicity, we currently document that a Cut affects alternatives |
| of the *rule* it is in. |
| However, the implementation makes cuts local to enclosing Rhs |
| (e.g. parenthesized list of choices). |
| Additionally, in academic papers about PEG, repeats and optional items |
| are "desugared" to choices with an empty alternative, and thus contain |
| a Cut's effect. |
| |
| Please update documentation and tests when adding this cut, |
| then get rid of this validator. |
| |
| See gh-143054. |
| """ |
| |
| def visit(self, node: Any, parents: tuple[Any, ...] = ()) -> None: |
| super().visit(node, parents=(*parents, node)) |
| |
| def visit_Cut(self, node: Alt, parents: tuple[Any, ...] = ()) -> None: |
| parent_types = [type(p).__name__ for p in parents] |
| if parent_types != ['Rule', 'Rhs', 'Alt', 'NamedItem', 'Cut']: |
| raise ValidationError( |
| f"Rule {self.rulename!r} contains cut that's not on the " |
| "top level. " |
| "The intended semantics of such cases need " |
| "to be clarified; see the CutValidator docstring." |
| f"\nThe cut is inside: {parent_types}" |
| ) |
| |
| def validate_grammar(the_grammar: grammar.Grammar) -> None: |
| for validator_cls in GrammarValidator.__subclasses__(): |
| validator = validator_cls(the_grammar) |
| for rule_name, rule in the_grammar.rules.items(): |
| validator.validate_rule(rule_name, rule) |