| #!/usr/bin/env python3 |
| # Copyright 2017 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Utilities for generating IDN test cases. |
| |
| Either use the command-line interface (see --help) or directly call make_case |
| from Python shell (see make_case documentation). |
| """ |
| |
| |
| import argparse |
| import codecs |
| import doctest |
| import sys |
| |
| |
| def str_to_c_string(string): |
| """Converts a Python bytes to a C++ string literal. |
| |
| >>> str_to_c_string(b'abc\x8c') |
| '"abc\\\\x8c"' |
| """ |
| return repr(string).replace("'", '"').removeprefix('b') |
| |
| |
| def unicode_to_c_ustring(string): |
| """Converts a Python unicode string to a C++ u16-string literal. |
| |
| >>> unicode_to_c_ustring(u'b\u00fccher.de') |
| 'u"b\\\\u00fccher.de"' |
| """ |
| result = ['u"'] |
| for c in string: |
| if (ord(c) > 0xffff): |
| escaped = '\\U%08x' % ord(c) |
| elif (ord(c) > 0x7f): |
| escaped = '\\u%04x' % ord(c) |
| else: |
| escaped = c |
| result.append(escaped) |
| result.append('"') |
| return ''.join(result) |
| |
| |
| def make_case(unicode_domain, unicode_allowed=True, case_name=None): |
| """Generates a C++ test case for an IDN domain test. |
| |
| This is designed specifically for the IDNTestCase struct in the file |
| components/url_formatter/url_formatter_unittest.cc. It generates a row of |
| the idn_cases array, specifying a test for a particular domain. |
| |
| |unicode_domain| is a Unicode string of the domain (NOT IDNA-encoded). |
| |unicode_allowed| specifies whether the test case should expect the domain |
| to be displayed in Unicode form (kSafe) or in IDNA/Punycode ASCII encoding |
| (kUnsafe). |case_name| is just for the comment. |
| |
| This function will automatically convert the domain to its IDNA format, and |
| prepare the test case in C++ syntax. |
| |
| >>> make_case(u'\u5317\u4eac\u5927\u5b78.cn', True, 'Hanzi (Chinese)') |
| // Hanzi (Chinese) |
| {"xn--1lq90ic7f1rc.cn", u"\\u5317\\u4eac\\u5927\\u5b78.cn", kSafe}, |
| >>> make_case(u'b\u00fccher.de', True) |
| {"xn--bcher-kva.de", u"b\\u00fccher.de", kSafe}, |
| |
| This will also apply normalization to the Unicode domain, as required by the |
| IDNA algorithm. This example shows U+210F normalized to U+0127 (this |
| generates the exact same test case as u'\u0127ello'): |
| |
| >>> make_case(u'\u210fello', True) |
| {"xn--ello-4xa", u"\\u0127ello", kSafe}, |
| """ |
| idna_input = codecs.encode(unicode_domain, 'idna') |
| # Round-trip to ensure normalization. |
| unicode_output = codecs.decode(idna_input, 'idna') |
| if case_name: |
| print(' // %s' % case_name) |
| print(' {%s, %s, %s},' % |
| (str_to_c_string(idna_input), unicode_to_c_ustring(unicode_output), |
| 'kSafe' if unicode_allowed else 'kUnsafe')) |
| |
| |
| def main(args=None): |
| if args is None: |
| args = sys.argv[1:] |
| |
| parser = argparse.ArgumentParser(description='Generate an IDN test case.') |
| parser.add_argument('domain', |
| metavar='DOMAIN', |
| nargs='?', |
| help='the Unicode domain (not encoded)') |
| parser.add_argument('--name', |
| metavar='NAME', |
| help='the name of the test case') |
| parser.add_argument('--no-unicode', |
| action='store_false', |
| dest='unicode_allowed', |
| default=True, |
| help='expect the domain to be Punycoded') |
| parser.add_argument('--test', |
| action='store_true', |
| dest='run_tests', |
| help='run unit tests') |
| |
| args = parser.parse_args(args) |
| |
| if args.run_tests: |
| import doctest |
| doctest.testmod() |
| return |
| |
| if not args.domain: |
| parser.error('Required argument: DOMAIN') |
| |
| if '://' in args.domain: |
| parser.error('A URL must not be passed as the domain argument') |
| |
| make_case(args.domain, |
| unicode_allowed=args.unicode_allowed, |
| case_name=args.name) |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |