| >>> from mechanize._rfc3986 import urlsplit, urljoin, remove_dot_segments |
| |
| Some common cases |
| |
| >>> urlsplit("http://example.com/spam/eggs/spam.html?apples=pears&a=b#foo") |
| ('http', 'example.com', '/spam/eggs/spam.html', 'apples=pears&a=b', 'foo') |
| >>> urlsplit("http://example.com/spam.html#foo") |
| ('http', 'example.com', '/spam.html', None, 'foo') |
| >>> urlsplit("ftp://example.com/foo.gif") |
| ('ftp', 'example.com', '/foo.gif', None, None) |
| >>> urlsplit('ftp://joe:password@example.com:port') |
| ('ftp', 'joe:password@example.com:port', '', None, None) |
| >>> urlsplit("mailto:jjl@pobox.com") |
| ('mailto', None, 'jjl@pobox.com', None, None) |
| |
| The five path productions |
| |
| path-abempty: |
| |
| >>> urlsplit("http://www.example.com") |
| ('http', 'www.example.com', '', None, None) |
| >>> urlsplit("http://www.example.com/foo") |
| ('http', 'www.example.com', '/foo', None, None) |
| |
| path-absolute: |
| |
| >>> urlsplit("a:/") |
| ('a', None, '/', None, None) |
| >>> urlsplit("a:/b:/c/") |
| ('a', None, '/b:/c/', None, None) |
| |
| path-noscheme: |
| |
| >>> urlsplit("a:b/:c/") |
| ('a', None, 'b/:c/', None, None) |
| |
| path-rootless: |
| |
| >>> urlsplit("a:b:/c/") |
| ('a', None, 'b:/c/', None, None) |
| |
| path-empty: |
| |
| >>> urlsplit("quack:") |
| ('quack', None, '', None, None) |
| |
| |
| >>> remove_dot_segments("/a/b/c/./../../g") |
| '/a/g' |
| >>> remove_dot_segments("mid/content=5/../6") |
| 'mid/6' |
| >>> remove_dot_segments("/b/c/.") |
| '/b/c/' |
| >>> remove_dot_segments("/b/c/./.") |
| '/b/c/' |
| >>> remove_dot_segments(".") |
| '' |
| >>> remove_dot_segments("/.") |
| '/' |
| >>> remove_dot_segments("./") |
| '' |
| >>> remove_dot_segments("/..") |
| '/' |
| >>> remove_dot_segments("/../") |
| '/' |
| |
| |
| Examples from RFC 3986 section 5.4 |
| |
| Normal Examples |
| |
| >>> base = "http://a/b/c/d;p?q" |
| >>> def join(uri): return urljoin(base, uri) |
| >>> join("g:h") |
| 'g:h' |
| >>> join("g") |
| 'http://a/b/c/g' |
| >>> join("./g") |
| 'http://a/b/c/g' |
| >>> join("g/") |
| 'http://a/b/c/g/' |
| >>> join("/g") |
| 'http://a/g' |
| >>> join("//g") |
| 'http://g' |
| >>> join("?y") |
| 'http://a/b/c/d;p?y' |
| >>> join("g?y") |
| 'http://a/b/c/g?y' |
| >>> join("#s") |
| 'http://a/b/c/d;p?q#s' |
| >>> join("g#s") |
| 'http://a/b/c/g#s' |
| >>> join("g?y#s") |
| 'http://a/b/c/g?y#s' |
| >>> join(";x") |
| 'http://a/b/c/;x' |
| >>> join("g;x") |
| 'http://a/b/c/g;x' |
| >>> join("g;x?y#s") |
| 'http://a/b/c/g;x?y#s' |
| >>> join("") |
| 'http://a/b/c/d;p?q' |
| >>> join(".") |
| 'http://a/b/c/' |
| >>> join("./") |
| 'http://a/b/c/' |
| >>> join("..") |
| 'http://a/b/' |
| >>> join("../") |
| 'http://a/b/' |
| >>> join("../g") |
| 'http://a/b/g' |
| >>> join("../..") |
| 'http://a/' |
| >>> join("../../") |
| 'http://a/' |
| >>> join("../../g") |
| 'http://a/g' |
| |
| Abnormal Examples |
| |
| >>> join("../../../g") |
| 'http://a/g' |
| >>> join("../../../../g") |
| 'http://a/g' |
| >>> join("/./g") |
| 'http://a/g' |
| >>> join("/../g") |
| 'http://a/g' |
| >>> join("g.") |
| 'http://a/b/c/g.' |
| >>> join(".g") |
| 'http://a/b/c/.g' |
| >>> join("g..") |
| 'http://a/b/c/g..' |
| >>> join("..g") |
| 'http://a/b/c/..g' |
| >>> join("./../g") |
| 'http://a/b/g' |
| >>> join("./g/.") |
| 'http://a/b/c/g/' |
| >>> join("g/./h") |
| 'http://a/b/c/g/h' |
| >>> join("g/../h") |
| 'http://a/b/c/h' |
| >>> join("g;x=1/./y") |
| 'http://a/b/c/g;x=1/y' |
| >>> join("g;x=1/../y") |
| 'http://a/b/c/y' |
| >>> join("g?y/./x") |
| 'http://a/b/c/g?y/./x' |
| >>> join("g?y/../x") |
| 'http://a/b/c/g?y/../x' |
| >>> join("g#s/./x") |
| 'http://a/b/c/g#s/./x' |
| >>> join("g#s/../x") |
| 'http://a/b/c/g#s/../x' |
| >>> join("http:g") |
| 'http://a/b/c/g' |
| |
| |
| Additional urljoin tests, not taken from RFC: |
| |
| >>> join("/..") |
| 'http://a/' |
| >>> join("/../") |
| 'http://a/' |