blob: dd69b5e155170f7408246c9d0c33bae21b236e6e [file] [log] [blame]
>>> from mechanize._rfc3986 import urlsplit, urljoin, remove_dot_segments
Some common cases
>>> urlsplit("http://example.com/spam/eggs/spam.html?apples=pears&a=b#foo")
('http', 'example.com', '/spam/eggs/spam.html', 'apples=pears&a=b', 'foo')
>>> urlsplit("http://example.com/spam.html#foo")
('http', 'example.com', '/spam.html', None, 'foo')
>>> urlsplit("ftp://example.com/foo.gif")
('ftp', 'example.com', '/foo.gif', None, None)
>>> urlsplit('ftp://joe:password@example.com:port')
('ftp', 'joe:password@example.com:port', '', None, None)
>>> urlsplit("mailto:jjl@pobox.com")
('mailto', None, 'jjl@pobox.com', None, None)
The five path productions
path-abempty:
>>> urlsplit("http://www.example.com")
('http', 'www.example.com', '', None, None)
>>> urlsplit("http://www.example.com/foo")
('http', 'www.example.com', '/foo', None, None)
path-absolute:
>>> urlsplit("a:/")
('a', None, '/', None, None)
>>> urlsplit("a:/b:/c/")
('a', None, '/b:/c/', None, None)
path-noscheme:
>>> urlsplit("a:b/:c/")
('a', None, 'b/:c/', None, None)
path-rootless:
>>> urlsplit("a:b:/c/")
('a', None, 'b:/c/', None, None)
path-empty:
>>> urlsplit("quack:")
('quack', None, '', None, None)
>>> remove_dot_segments("/a/b/c/./../../g")
'/a/g'
>>> remove_dot_segments("mid/content=5/../6")
'mid/6'
>>> remove_dot_segments("/b/c/.")
'/b/c/'
>>> remove_dot_segments("/b/c/./.")
'/b/c/'
>>> remove_dot_segments(".")
''
>>> remove_dot_segments("/.")
'/'
>>> remove_dot_segments("./")
''
>>> remove_dot_segments("/..")
'/'
>>> remove_dot_segments("/../")
'/'
Examples from RFC 3986 section 5.4
Normal Examples
>>> base = "http://a/b/c/d;p?q"
>>> def join(uri): return urljoin(base, uri)
>>> join("g:h")
'g:h'
>>> join("g")
'http://a/b/c/g'
>>> join("./g")
'http://a/b/c/g'
>>> join("g/")
'http://a/b/c/g/'
>>> join("/g")
'http://a/g'
>>> join("//g")
'http://g'
>>> join("?y")
'http://a/b/c/d;p?y'
>>> join("g?y")
'http://a/b/c/g?y'
>>> join("#s")
'http://a/b/c/d;p?q#s'
>>> join("g#s")
'http://a/b/c/g#s'
>>> join("g?y#s")
'http://a/b/c/g?y#s'
>>> join(";x")
'http://a/b/c/;x'
>>> join("g;x")
'http://a/b/c/g;x'
>>> join("g;x?y#s")
'http://a/b/c/g;x?y#s'
>>> join("")
'http://a/b/c/d;p?q'
>>> join(".")
'http://a/b/c/'
>>> join("./")
'http://a/b/c/'
>>> join("..")
'http://a/b/'
>>> join("../")
'http://a/b/'
>>> join("../g")
'http://a/b/g'
>>> join("../..")
'http://a/'
>>> join("../../")
'http://a/'
>>> join("../../g")
'http://a/g'
Abnormal Examples
>>> join("../../../g")
'http://a/g'
>>> join("../../../../g")
'http://a/g'
>>> join("/./g")
'http://a/g'
>>> join("/../g")
'http://a/g'
>>> join("g.")
'http://a/b/c/g.'
>>> join(".g")
'http://a/b/c/.g'
>>> join("g..")
'http://a/b/c/g..'
>>> join("..g")
'http://a/b/c/..g'
>>> join("./../g")
'http://a/b/g'
>>> join("./g/.")
'http://a/b/c/g/'
>>> join("g/./h")
'http://a/b/c/g/h'
>>> join("g/../h")
'http://a/b/c/h'
>>> join("g;x=1/./y")
'http://a/b/c/g;x=1/y'
>>> join("g;x=1/../y")
'http://a/b/c/y'
>>> join("g?y/./x")
'http://a/b/c/g?y/./x'
>>> join("g?y/../x")
'http://a/b/c/g?y/../x'
>>> join("g#s/./x")
'http://a/b/c/g#s/./x'
>>> join("g#s/../x")
'http://a/b/c/g#s/../x'
>>> join("http:g")
'http://a/b/c/g'
Additional urljoin tests, not taken from RFC:
>>> join("/..")
'http://a/'
>>> join("/../")
'http://a/'