blob: ce98e3242b0a2004e313110e01e16f1b646523b8 [file] [log] [blame]
#pylint: skip-file
"""
Extract client information from http user agent
The module does not try to detect all capabilities of browser in current form (it can easily be extended though).
Aim is
* fast
* very easy to extend
* reliable enough for practical purposes
* and assist python web apps to detect clients.
"""
import sys
import re
class DetectorsHub(dict):
_known_types = ['os', 'dist', 'flavor', 'browser']
def __init__(self, *args, **kw):
dict.__init__(self, *args, **kw)
for typ in self._known_types:
self.setdefault(typ, [])
self.registerDetectors()
def register(self, detector):
if detector.info_type not in self._known_types:
self[detector.info_type] = [detector]
self._known_types.insert(detector.order, detector.info_type)
else:
self[detector.info_type].append(detector)
def reorderByPrefs(self, detectors, prefs):
if prefs == None:
return []
elif prefs == []:
return detectors
else:
prefs.insert(0, '')
return sorted(detectors, key=lambda d: d.name in prefs and prefs.index(d.name) or sys.maxint)
def __iter__(self):
return iter(self._known_types)
def registerDetectors(self):
detectors = [v() for v in globals().values() if DetectorBase in getattr(v, '__mro__', [])]
for d in detectors:
if d.can_register:
self.register(d)
class DetectorBase(object):
name = "" # "to perform match in DetectorsHub object"
info_type = "override me"
result_key = "override me"
order = 10 # 0 is highest
look_for = "string to look for"
can_register = False
prefs = dict() # dict(info_type = [name1, name2], ..)
version_splitters = ["/", " "]
_suggested_detectors = None
def __init__(self):
if not self.name:
self.name = self.__class__.__name__
self.can_register = (self.__class__.__dict__.get('can_register', True))
def detect(self, agent, result):
# -> True/None
if self.checkWords(agent):
result[self.info_type] = dict(name = self.name)
version = self.getVersion(agent)
if version:
result[self.info_type]['version'] = version
return True
def checkWords(self, agent):
# -> True/None
if self.look_for in agent:
return True
def getVersion(self, agent):
# -> version string /None
return agent.split(self.look_for + self.version_splitters[0])[-1].split(self.version_splitters[1])[0].strip()
class OS(DetectorBase):
info_type = "os"
can_register = False
version_splitters = [";", " "]
class Dist(DetectorBase):
info_type = "dist"
can_register = False
class Flavor(DetectorBase):
info_type = "flavor"
can_register = False
class Browser(DetectorBase):
info_type = "browser"
can_register = False
class Android(OS):
look_for = 'Android'
def getVersion(self, agent):
m = re.search(r"Android ([\d.]+)", agent, re.IGNORECASE)
if m:
return m.group(1)
return None
class Macintosh(OS):
look_for = 'Macintosh'
prefs = dict(dist = None)
def getVersion(self, agent): pass
class Firefox(Browser):
look_for = "Firefox"
class Konqueror(Browser):
look_for = "Konqueror"
version_splitters = ["/", ";"]
class Opera(Browser):
look_for = "Opera"
class MSIE(Browser):
look_for = "MSIE"
name = "Microsoft Internet Explorer"
version_splitters = [" ", ";"]
class Galeon(Browser):
look_for = "Galeon"
class Safari(Browser):
look_for = "Safari"
def checkWords(self, agent):
unless_list = ["Chrome", "OmniWeb"]
if self.look_for in agent:
for word in unless_list:
if word in agent:
return False
return True
def getVersion(self, agent):
if "Version/" in agent:
return agent.split('Version/')[-1].split(' ')[0].strip()
else:
return agent.split('Safari ')[-1].split(' ')[0].strip() # Mobile Safari
class Linux(OS):
look_for = 'Linux'
prefs = dict(browser = ["Firefox"], dist=["Ubuntu"], flavor=None)
def getVersion(self, agent): pass
def checkWords(self, agent):
unless_list = ["Android"]
if self.look_for in agent:
for word in unless_list:
if word in agent:
return False
return True
class Macintosh(OS):
look_for = 'Macintosh'
prefs = dict (dist = None, flavor = ['MacOS'])
def getVersion(self, agent): pass
class MacOS(Flavor):
look_for = 'Mac OS'
prefs = dict (browser = ['Firefox', 'Opera', "Microsoft Internet Explorer"])
def getVersion(self, agent):
version_end_chars = [';', ')']
part = agent.split('Mac ')[-1].strip()
version_list = []
for c in part:
if c in version_end_chars:
break
version_list.append(c)
version = ''.join(version_list).replace('_', '.')
return version
class Windows(OS):
look_for = 'Windows'
prefs = dict (browser = ["Microsoft Internet Explorer", 'Firefox'], dict = None, flavor = None)
def getVersion(self, agent):
match = re.search(r"Windows.*?(\d+\.\d+)[^\d]*", agent, re.DOTALL | re.IGNORECASE)
if match:
return match.group(1)
return None
class Ubuntu(Dist):
look_for = 'Ubuntu'
version_splitters = ["/", " "]
prefs = dict (browser = ['Firefox'])
class Debian(Dist):
look_for = 'Debian'
version_splitters = ["/", " "]
prefs = dict (browser = ['Firefox'])
class Chrome(Browser):
look_for = "Chrome"
version_splitters = ["/", " "]
class ChromeOS(OS):
look_for = "CrOS"
def getVersion(self, agent):
match = re.search(r"CrOS.*?\s([\d.]+).*?[;)]", agent, re.DOTALL | re.IGNORECASE)
if match:
return match.group(1)
return None
detectorshub = DetectorsHub()
def detect(agent):
result = dict()
prefs = dict ()
_suggested_detectors = []
for info_type in detectorshub:
if not _suggested_detectors:
detectors = detectorshub[info_type]
_d_prefs = prefs.get(info_type, [])
detectors = detectorshub.reorderByPrefs(detectors, _d_prefs)
if "detector" in locals():
detector._suggested_detectors = detectors
else:
detectors = _suggested_detectors
for detector in detectors:
#print "detector name: ", detector.name
if detector.detect(agent, result):
prefs = detector.prefs
_suggested_detectors = detector._suggested_detectors
break
return result
class Result(dict):
def __missing__(self, k):
return ""
def detect(agent):
result = Result()
_suggested_detectors = []
for info_type in detectorshub:
detectors = _suggested_detectors or detectorshub[info_type]
for detector in detectors:
if detector.detect(agent, result):
if detector.prefs and not detector._suggested_detectors:
_suggested_detectors = detectorshub.reorderByPrefs(detectors, detector.prefs.get(info_type))
detector._suggested_detectors = _suggested_detectors
break
return result
def simple_detect(agent):
"""
-> (os, browser) # tuple of strings
"""
result = detect(agent)
os_list = []
if 'flavor' in result: os_list.append(result['flavor']['name'])
if 'dist' in result: os_list.append(result['dist']['name'])
if 'os' in result: os_list.append(result['os']['name'])
os = os_list and " ".join(os_list) or "Unknown OS"
os_version = os_list and (result['flavor'] and result['flavor'].get('version')) or (result['dist'] and result['dist'].get('version')) or (result['os'] and result['os'].get('version')) or ""
browser = 'browser' in result and result['browser']['name'] or 'Unknown Browser'
browser_version = 'browser' in result and result['browser'].get('version') or ""
if browser_version:
browser = " ".join((browser, browser_version))
if os_version:
os = " ".join((os, os_version))
return os, browser
def test():
import datetime
#execfile("testdata", globals())
agents = [
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-GB; rv:1.9.0.10) Gecko/2009042315 Firefox/3.0.10",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.3 Safari/534.24,gzip(gfe)",
"Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19"
]
then = datetime.datetime.now()
for agent in agents * 10:
print agent
#print detect(agent)
print "* ", simple_detect(agent)
#s = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-GB; rv:1.9.0.10) Gecko/2009042315 Firefox/3.0.10"
#print s
#print detect(s)
now = datetime.datetime.now()
print len(agents), "analysed in ", now - then
if __name__ == '__main__':
test()