Fall back to another encoding if an unknown one is declared
Closes #30
diff --git a/mechanize/_html.py b/mechanize/_html.py
index 5c49a56..1a4e2c0 100644
--- a/mechanize/_html.py
+++ b/mechanize/_html.py
@@ -8,6 +8,7 @@
"""
+import codecs
import copy
import htmlentitydefs
import re
@@ -53,9 +54,16 @@
for ct in response.info().getheaders("content-type"):
for k, v in split_header_words([ct])[0]:
if k == "charset":
- return v
+ encoding = v
+ try:
+ codecs.lookup(v)
+ except LookupError:
+ continue
+ else:
+ return encoding
return self._default_encoding
+
class ResponseTypeFinder:
def __init__(self, allow_xhtml):
self._allow_xhtml = allow_xhtml
diff --git a/test/test_html.py b/test/test_html.py
index cda4be7..a4258b1 100644
--- a/test/test_html.py
+++ b/test/test_html.py
@@ -118,6 +118,34 @@
self.assertEqual(got, expect)
+class EncodingFinderTests(TestCase):
+
+ def make_response(self, encodings):
+ return mechanize._response.test_response(
+ headers=[("Content-type", "text/html; charset=\"%s\"" % encoding)
+ for encoding in encodings])
+
+ def test_known_encoding(self):
+ encoding_finder = mechanize._html.EncodingFinder("default")
+ response = self.make_response(["utf-8"])
+ self.assertEqual(encoding_finder.encoding(response), "utf-8")
+
+ def test_unknown_encoding(self):
+ encoding_finder = mechanize._html.EncodingFinder("default")
+ response = self.make_response(["bogus"])
+ self.assertEqual(encoding_finder.encoding(response), "default")
+
+ def test_precedence(self):
+ encoding_finder = mechanize._html.EncodingFinder("default")
+ response = self.make_response(["latin-1", "utf-8"])
+ self.assertEqual(encoding_finder.encoding(response), "latin-1")
+
+ def test_fallback(self):
+ encoding_finder = mechanize._html.EncodingFinder("default")
+ response = self.make_response(["bogus", "utf-8"])
+ self.assertEqual(encoding_finder.encoding(response), "utf-8")
+
+
if __name__ == "__main__":
import unittest
unittest.main()