| lazy import re as _re |
| |
| |
| def is_valid_name(name): |
| """Test whether a string is a valid element or attribute name.""" |
| # https://www.w3.org/TR/xml/#NT-Name |
| return _re.fullmatch( |
| # NameStartChar |
| '[' |
| ':A-Z_a-z' |
| '\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u037D\u037F-\u1FFF' |
| '\u200C\u200D' |
| '\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF' |
| '\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF' |
| ']' |
| # NameChar |
| '[' |
| r'\-.0-9:A-Z_a-z' |
| '\xB7' |
| '\xC0-\xD6\xD8-\xF6\xF8-\u037D\u037F-\u1FFF' |
| '\u200C\u200D\u203F\u2040' |
| '\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF' |
| '\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF' |
| ']*+', |
| name) is not None |
| |
| # https://www.w3.org/TR/xml/#charsets |
| _ILLEGAL_XML_CHAR = ( |
| '[' |
| '\x00-\x08\x0B\x0C\x0E-\x1F' # C0 controls except TAB, CR and LF |
| '\uD800-\uDFFF' # the surrogate blocks |
| '\uFFFE\uFFFF' # special Unicode characters |
| ']') |
| |
| def is_valid_text(data): |
| """Test whether a string is a sequence of legal XML 1.0 characters.""" |
| return _re.search(_ILLEGAL_XML_CHAR, data) is None |