| #!/usr/bin/env python |
| |
| from unittest import TestCase |
| import StringIO, re, UserDict |
| |
| import ClientCookie |
| |
| import mechanize |
| |
| class MockMethod: |
| def __init__(self, meth_name, action, handle): |
| self.meth_name = meth_name |
| self.handle = handle |
| self.action = action |
| def __call__(self, *args): |
| return apply(self.handle, (self.meth_name, self.action)+args) |
| |
| class MockHeaders(UserDict.UserDict): |
| def getallmatchingheaders(self, name): |
| return ["%s: %s" % (k, v) for k, v in self.data.iteritems()] |
| def getheaders(self, name): |
| return self.data.values() |
| |
| class MockResponse: |
| def __init__(self, url="http://example.com/", data=None, info=None): |
| self.url = url |
| self._f = StringIO.StringIO(data) |
| if info is None: info = {} |
| self._info = MockHeaders(info) |
| def info(self): return self._info |
| def geturl(self): return self.url |
| def read(self, size=-1): return self._f.read(size) |
| def seek(self, whence): |
| assert whence == 0 |
| self._f.seek(0) |
| |
| class MockHandler: |
| processor_order = 500 |
| handler_order = -1 |
| def __init__(self, methods): |
| self._define_methods(methods) |
| def _define_methods(self, methods): |
| for name, action in methods: |
| if name.endswith("_open"): |
| meth = MockMethod(name, action, self.handle) |
| else: |
| meth = MockMethod(name, action, self.process) |
| setattr(self.__class__, name, meth) |
| def handle(self, fn_name, response, *args, **kwds): |
| self.parent.calls.append((self, fn_name, args, kwds)) |
| if response: |
| r = response |
| r.seek(0) |
| else: |
| r = MockResponse() |
| req = args[0] |
| r.url = req.get_full_url() |
| return r |
| def process(self, fn_name, action, *args, **kwds): |
| self.parent.calls.append((self, fn_name, args, kwds)) |
| if fn_name.endswith("_request"): |
| return args[0] |
| else: |
| return args[1] |
| def close(self): pass |
| def add_parent(self, parent): |
| self.parent = parent |
| self.parent.calls = [] |
| def __cmp__(self, other): |
| if hasattr(other, "handler_order"): |
| return cmp(self.handler_order, other.handler_order) |
| # No handler_order, leave in original order. Yuck. |
| return -1 |
| |
| class TestBrowser(mechanize.Browser): |
| default_features = ["_seek"] |
| default_others = [] |
| default_schemes = [] |
| |
| |
| class BrowserTests(TestCase): |
| def test_referer(self): |
| import ClientCookie |
| b = TestBrowser() |
| url = "http://www.example.com/" |
| r = MockResponse(url, |
| """<html> |
| <head><title>Title</title></head> |
| <body> |
| <form name="form1"> |
| <input type="hidden" name="foo" value="bar"></input> |
| <input type="submit"></input> |
| </form> |
| <a href="http://example.com/foo/bar.html" name="apples"></a> |
| <a href="https://example.com/spam/eggs.html" name="secure"></a> |
| <a href="blah://example.com/" name="pears"></a> |
| </body> |
| </html> |
| """, {"content-type": "text/html"}) |
| b.add_handler(MockHandler([("http_open", r)])) |
| |
| # Referer not added by .open()... |
| req = ClientCookie.Request(url) |
| b.open(req) |
| self.assert_(req.get_header("Referer") is None) |
| # ...even if we're visiting a document |
| b.open(req) |
| self.assert_(req.get_header("Referer") is None) |
| # Referer added by .click_link() and .click() |
| b.select_form("form1") |
| req2 = b.click() |
| self.assert_(req2.get_header("Referer") == url) |
| r2 = b.open(req2) |
| req3 = b.click_link(name="apples") |
| self.assert_(req3.get_header("Referer") == url+"?foo=bar") |
| # Referer not added when going from https to http URL |
| b.add_handler(MockHandler([("https_open", r)])) |
| r3 = b.open(req3) |
| req4 = b.click_link(name="secure") |
| self.assert_(req4.get_header("Referer") == |
| "http://example.com/foo/bar.html") |
| r4 = b.open(req4) |
| req5 = b.click_link(name="apples") |
| self.assert_(not req5.has_header("Referer")) |
| # Referer not added for non-http, non-https requests |
| b.add_handler(MockHandler([("blah_open", r)])) |
| req6 = b.click_link(name="pears") |
| self.assert_(not req6.has_header("Referer")) |
| # Referer not added when going from non-http, non-https URL |
| r4 = b.open(req6) |
| req7 = b.click_link(name="apples") |
| self.assert_(not req7.has_header("Referer")) |
| |
| # XXX Referer added for redirect |
| |
| def test_encoding(self): |
| import mechanize |
| from StringIO import StringIO |
| import urllib, mimetools |
| # always take first encoding, since that's the one |
| b = mechanize.Browser() |
| for s, ct in [("", b.default_encoding), |
| |
| ("Foo: Bar\r\n\r\n", b.default_encoding), |
| |
| ("Content-Type: text/html; charset=UTF-8\r\n\r\n", |
| "UTF-8"), |
| |
| ("Content-Type: text/html; charset=UTF-8\r\n" |
| "Content-Type: text/html: charset=KOI8-R\r\n\r\n", |
| "UTF-8"), |
| ]: |
| msg = mimetools.Message(StringIO(s)) |
| r = urllib.addinfourl(StringIO(""), msg, "http://www.example.com/") |
| self.assert_(b._encoding(r) == ct) |
| |
| def test_history(self): |
| import mechanize |
| b = TestBrowser() |
| b.add_handler(MockHandler([("http_open", None)])) |
| self.assertRaises(mechanize.BrowserStateError, b.back) |
| r1 = b.open("http://example.com/") |
| self.assertRaises(mechanize.BrowserStateError, b.back) |
| r2 = b.open("http://example.com/foo") |
| self.assert_(b.back() is r1) |
| r3 = b.open("http://example.com/bar") |
| r4 = b.open("http://example.com/spam") |
| self.assert_(b.back() is r3) |
| self.assert_(b.back() is r1) |
| self.assertRaises(mechanize.BrowserStateError, b.back) |
| # reloading does a real HTTP fetch rather than using history cache |
| r5 = b.reload() |
| self.assert_(r5 is not r1) |
| # .geturl() gets fed through to b.response |
| self.assertEquals(b.geturl(), "http://example.com/") |
| # can go back n times |
| r6 = b.open("http://example.com/spam") |
| r7 = b.open("http://example.com/spam") |
| self.assert_(b.back(2) is r5) |
| self.assertRaises(mechanize.BrowserStateError, b.back, 2) |
| |
| def test_empty(self): |
| import mechanize |
| url = "http://example.com/" |
| |
| b = TestBrowser() |
| b.add_handler(MockHandler([("http_open", MockResponse(url, "", {}))])) |
| r = b.open(url) |
| self.assert_(not b.viewing_html()) |
| self.assertRaises(mechanize.BrowserStateError, b.links) |
| self.assertRaises(mechanize.BrowserStateError, b.forms) |
| self.assertRaises(mechanize.BrowserStateError, b.title) |
| self.assertRaises(mechanize.BrowserStateError, b.select_form) |
| self.assertRaises(mechanize.BrowserStateError, b.select_form, |
| name="blah") |
| self.assertRaises(mechanize.BrowserStateError, b.find_link, |
| name="blah") |
| |
| b = TestBrowser() |
| r = MockResponse(url, |
| """<html> |
| <head><title>Title</title></head> |
| <body> |
| </body> |
| </html> |
| """, {"content-type": "text/html"}) |
| b.add_handler(MockHandler([("http_open", r)])) |
| r = b.open(url) |
| self.assert_(b.title() == "Title") |
| self.assert_(len(list(b.links())) == 0) |
| self.assert_(len(list(b.forms())) == 0) |
| self.assertRaises(ValueError, b.select_form) |
| self.assertRaises(mechanize.FormNotFoundError, b.select_form, |
| name="blah") |
| self.assertRaises(mechanize.FormNotFoundError, b.select_form, |
| predicate=lambda x: True) |
| self.assertRaises(mechanize.LinkNotFoundError, b.find_link, |
| name="blah") |
| self.assertRaises(mechanize.LinkNotFoundError, b.find_link, |
| predicate=lambda x: True) |
| |
| def test_forms(self): |
| import mechanize |
| url = "http://example.com" |
| |
| b = TestBrowser() |
| r = MockResponse(url, |
| """<html> |
| <head><title>Title</title></head> |
| <body> |
| <form name="form1"> |
| <input type="text"></input> |
| <input type="checkbox" name="cheeses" value="cheddar"></input> |
| <input type="checkbox" name="cheeses" value="edam"></input> |
| <input type="submit" name="one"></input> |
| </form> |
| <a href="http://example.com/foo/bar.html" name="apples"> |
| <form name="form2"> |
| <input type="submit" name="two"> |
| </form> |
| </body> |
| </html> |
| """, {"content-type": "text/html"}) |
| b.add_handler(MockHandler([("http_open", r)])) |
| r = b.open(url) |
| |
| forms = b.forms() |
| self.assert_(len(forms) == 2) |
| for got, expect in zip([f.name for f in forms], [ |
| "form1", "form2"]): |
| self.assert_(got == expect) |
| |
| self.assertRaises(mechanize.FormNotFoundError, b.select_form, "foo") |
| |
| # no form is set yet |
| self.assertRaises(AttributeError, getattr, b, "possible_items") |
| b.select_form("form1") |
| # now unknown methods are fed through to selected ClientForm.HTMLForm |
| self.assert_(b.possible_items("cheeses") == ["cheddar", "edam"]) |
| b["cheeses"] = ["cheddar", "edam"] |
| self.assert_(b.click_pairs() == [ |
| ("cheeses", "cheddar"), ("cheeses", "edam"), ("one", "")]) |
| |
| b.select_form(nr=1) |
| self.assert_(b.name == "form2") |
| self.assert_(b.click_pairs() == [("two", "")]) |
| |
| def test_links(self): |
| import mechanize |
| url = "http://example.com/" |
| |
| b = TestBrowser() |
| r = MockResponse(url, |
| """<html> |
| <head><title>Title</title></head> |
| <body> |
| <a href="http://example.com/foo/bar.html" name="apples"></a> |
| <a name="pears"></a> |
| <a href="spam" name="pears"></a> |
| <area href="blah" name="foo"></area> |
| <form name="form2"> |
| <input type="submit" name="two"> |
| </form> |
| <frame name="name" href="href" src="src"></frame> |
| <iframe name="name2" href="href" src="src"></iframe> |
| <a name="name3" href="one">yada yada</a> |
| <a name="pears" href="two" weird="stuff">rhubarb</a> |
| <a></a> |
| <a/> |
| <iframe src="foo"/> |
| </body> |
| </html> |
| """, {"content-type": "text/html"}) |
| b.add_handler(MockHandler([("http_open", r)])) |
| r = b.open(url) |
| |
| Link = mechanize.Link |
| exp_links = [ |
| # base_url, url, text, tag, attrs |
| Link(url, "http://example.com/foo/bar.html", "", "a", |
| [("href", "http://example.com/foo/bar.html"), |
| ("name", "apples")]), |
| Link(url, "spam", "", "a", [("href", "spam"), ("name", "pears")]), |
| Link(url, "blah", None, "area", |
| [("href", "blah"), ("name", "foo")]), |
| Link(url, "src", None, "frame", |
| [("name", "name"), ("href", "href"), ("src", "src")]), |
| Link(url, "src", None, "iframe", |
| [("name", "name2"), ("href", "href"), ("src", "src")]), |
| Link(url, "one", "yada yada", "a", |
| [("name", "name3"), ("href", "one")]), |
| Link(url, "two", "rhubarb", "a", |
| [("name", "pears"), ("href", "two"), ("weird", "stuff")]), |
| Link(url, "foo", None, "iframe", |
| [("src", "foo")]), |
| ] |
| links = b.links() |
| self.assert_(len(links) == len(exp_links)) |
| for got, expect in zip(links, exp_links): |
| self.assert_(got == expect) |
| # nr |
| l = b.find_link() |
| self.assert_(l.url == "http://example.com/foo/bar.html") |
| l = b.find_link(nr=1) |
| self.assert_(l.url == "spam") |
| # text |
| l = b.find_link(text="yada yada") |
| self.assert_(l.url == "one") |
| self.assertRaises(mechanize.LinkNotFoundError, |
| b.find_link, text="da ya") |
| l = b.find_link(text_regex=re.compile("da ya")) |
| self.assert_(l.url == "one") |
| # name |
| l = b.find_link(name="name3") |
| self.assert_(l.url == "one") |
| l = b.find_link(name_regex=re.compile("oo")) |
| self.assert_(l.url == "blah") |
| # url |
| l = b.find_link(url="spam") |
| self.assert_(l.url == "spam") |
| l = b.find_link(url_regex=re.compile("pam")) |
| self.assert_(l.url == "spam") |
| # tag |
| l = b.find_link(tag="area") |
| self.assert_(l.url == "blah") |
| # predicate |
| l = b.find_link(predicate= |
| lambda l: dict(l.attrs).get("weird") == "stuff") |
| self.assert_(l.url == "two") |
| # combinations |
| l = b.find_link(name="pears", nr=1) |
| self.assert_(l.text == "rhubarb") |
| l = b.find_link(url="src", nr=0, name="name2") |
| self.assert_(l.tag == "iframe") |
| self.assert_(l.url == "src") |
| self.assertRaises(mechanize.LinkNotFoundError, b.find_link, |
| url="src", nr=1, name="name2") |
| l = b.find_link(tag="a", predicate= |
| lambda l: dict(l.attrs).get("weird") == "stuff") |
| self.assert_(l.url == "two") |
| |
| # .links() |
| self.assert_(list(b.links(url="src")) == [ |
| Link(url, url='src', text=None, tag='frame', |
| attrs=[('name', 'name'), ('href', 'href'), ('src', 'src')]), |
| Link(url, url='src', text=None, tag='iframe', |
| attrs=[('name', 'name2'), ('href', 'href'), ('src', 'src')]), |
| ]) |
| |
| def test_base_uri(self): |
| import mechanize |
| url = "http://example.com/" |
| |
| for html, urls in [ |
| ( |
| """<base href="http://www.python.org/foo/"> |
| <a href="bar/baz.html"></a> |
| <a href="/bar/baz.html"></a> |
| <a href="http://example.com/bar/baz.html"></a> |
| """, |
| [ |
| "http://www.python.org/foo/bar/baz.html", |
| "http://www.python.org/bar/baz.html", |
| "http://example.com/bar/baz.html", |
| ]), |
| ( |
| """<a href="bar/baz.html"></a> |
| <a href="/bar/baz.html"></a> |
| <a href="http://example.com/bar/baz.html"></a> |
| """, |
| [ |
| "http://example.com/bar/baz.html", |
| "http://example.com/bar/baz.html", |
| "http://example.com/bar/baz.html", |
| ] |
| ), |
| ]: |
| b = TestBrowser() |
| r = MockResponse(url, html, {"content-type": "text/html"}) |
| b.add_handler(MockHandler([("http_open", r)])) |
| r = b.open(url) |
| self.assert_([link.absolute_url for link in b.links()] == urls) |
| |
| |
| class UserAgentTests(TestCase): |
| def test_set_handled_schemes(self): |
| import mechanize |
| class MockHandlerClass(MockHandler): |
| def __call__(self): return self |
| class BlahHandlerClass(MockHandlerClass): pass |
| class BlahProcessorClass(MockHandlerClass): pass |
| BlahHandler = BlahHandlerClass([("blah_open", None)]) |
| BlahProcessor = BlahProcessorClass([("blah_request", None)]) |
| class TestUserAgent(mechanize.UserAgent): |
| default_others = [] |
| default_features = [] |
| handler_classes = mechanize.UserAgent.handler_classes.copy() |
| handler_classes.update( |
| {"blah": BlahHandler, "_blah": BlahProcessor}) |
| ua = TestUserAgent() |
| |
| self.assert_(len(ua.handlers) == 5) |
| ua.set_handled_schemes(["http", "https"]) |
| self.assert_(len(ua.handlers) == 2) |
| self.assertRaises(ValueError, |
| ua.set_handled_schemes, ["blah", "non-existent"]) |
| self.assertRaises(ValueError, |
| ua.set_handled_schemes, ["blah", "_blah"]) |
| ua.set_handled_schemes(["blah"]) |
| |
| req = ClientCookie.Request("blah://example.com/") |
| r = ua.open(req) |
| exp_calls = [("blah_open", (req,), {})] |
| assert len(ua.calls) == len(exp_calls) |
| for got, expect in zip(ua.calls, exp_calls): |
| self.assert_(expect == got[1:]) |
| |
| ua.calls = [] |
| req = ClientCookie.Request("blah://example.com/") |
| ua._set_handler("_blah", True) |
| r = ua.open(req) |
| exp_calls = [ |
| ("blah_request", (req,), {}), |
| ("blah_open", (req,), {})] |
| assert len(ua.calls) == len(exp_calls) |
| for got, expect in zip(ua.calls, exp_calls): |
| self.assert_(expect == got[1:]) |
| ua._set_handler("_blah", True) |
| |
| if __name__ == "__main__": |
| import unittest |
| unittest.main() |