| # Copyright (c) 2014 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """ |
| A http client with support for https connections with certificate verification. |
| |
| The verification is based on http://tools.ietf.org/html/rfc6125#section-6.4.3 |
| and the code is from Lib/ssl.py in python3: |
| http://hg.python.org/cpython/file/4dac45f88d45/Lib/ssl.py |
| |
| One use case is to download Chromium DEPS file in a secure way: |
| https://src.chromium.org/chrome/trunk/src/DEPS |
| |
| Notice: python 2.7 or newer is required. |
| """ |
| |
| import cookielib |
| import httplib |
| import os |
| import re |
| import socket |
| import ssl |
| import time |
| import urllib |
| import urllib2 |
| |
| import http_client |
| |
| |
| _SCRIPT_DIR = os.path.dirname(__file__) |
| _TRUSTED_ROOT_CERTS = os.path.join(_SCRIPT_DIR, 'cacert.pem') |
| |
| |
| class CertificateError(ValueError): |
| pass |
| |
| |
| def _DNSNameMatch(dn, hostname, max_wildcards=1): |
| """Matching according to RFC 6125, section 6.4.3 |
| |
| http://tools.ietf.org/html/rfc6125#section-6.4.3 |
| """ |
| pats = [] |
| if not dn: |
| return False |
| |
| parts = dn.split(r'.') |
| leftmost = parts[0] |
| remainder = parts[1:] |
| |
| wildcards = leftmost.count('*') |
| if wildcards > max_wildcards: |
| # Issue #17980: avoid denials of service by refusing more |
| # than one wildcard per fragment. A survery of established |
| # policy among SSL implementations showed it to be a |
| # reasonable choice. |
| raise CertificateError( |
| 'too many wildcards in certificate DNS name: ' + repr(dn)) |
| |
| # speed up common case w/o wildcards |
| if not wildcards: |
| return dn.lower() == hostname.lower() |
| |
| # RFC 6125, section 6.4.3, subitem 1. |
| # The client SHOULD NOT attempt to match a presented identifier in which |
| # the wildcard character comprises a label other than the left-most label. |
| if leftmost == '*': |
| # When '*' is a fragment by itself, it matches a non-empty dotless |
| # fragment. |
| pats.append('[^.]+') |
| elif leftmost.startswith('xn--') or hostname.startswith('xn--'): |
| # RFC 6125, section 6.4.3, subitem 3. |
| # The client SHOULD NOT attempt to match a presented identifier |
| # where the wildcard character is embedded within an A-label or |
| # U-label of an internationalized domain name. |
| pats.append(re.escape(leftmost)) |
| else: |
| # Otherwise, '*' matches any dotless string, e.g. www* |
| pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) |
| |
| # add the remaining fragments, ignore any wildcards |
| for frag in remainder: |
| pats.append(re.escape(frag)) |
| |
| pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) |
| return pat.match(hostname) |
| |
| |
| def _MatchHostname(cert, hostname): |
| """Verify that *cert* (in decoded format as returned by |
| SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 |
| rules are followed, but IP addresses are not accepted for *hostname*. |
| |
| CertificateError is raised on failure. On success, the function |
| returns nothing. |
| """ |
| if not cert: |
| raise ValueError('empty or no certificate, match_hostname needs a ' |
| 'SSL socket or SSL context with either ' |
| 'CERT_OPTIONAL or CERT_REQUIRED') |
| dnsnames = [] |
| san = cert.get('subjectAltName', ()) |
| for key, value in san: |
| if key == 'DNS': |
| if _DNSNameMatch(value, hostname): |
| return |
| dnsnames.append(value) |
| if not dnsnames: |
| # The subject is only checked when there is no dNSName entry |
| # in subjectAltName |
| for sub in cert.get('subject', ()): |
| for key, value in sub: |
| # XXX according to RFC 2818, the most specific Common Name |
| # must be used. |
| if key == 'commonName': |
| if _DNSNameMatch(value, hostname): |
| return |
| dnsnames.append(value) |
| if len(dnsnames) > 1: |
| raise CertificateError('hostname %r doesn\'t match either of %s' |
| % (hostname, ', '.join(map(repr, dnsnames)))) |
| elif len(dnsnames) == 1: |
| raise CertificateError('hostname %r doesn\'t match %r' |
| % (hostname, dnsnames[0])) |
| else: |
| raise CertificateError('no appropriate commonName or ' |
| 'subjectAltName fields were found') |
| |
| |
| class HTTPSConnection(httplib.HTTPSConnection): |
| |
| def __init__(self, host, root_certs=_TRUSTED_ROOT_CERTS, **kwargs): |
| self.root_certs = root_certs |
| httplib.HTTPSConnection.__init__(self, host, **kwargs) |
| |
| def connect(self): |
| # Overrides for certificate verification. |
| args = [(self.host, self.port), self.timeout,] |
| if self.source_address: |
| args.append(self.source_address) |
| sock = socket.create_connection(*args) |
| |
| if self._tunnel_host: |
| self.sock = sock |
| self._tunnel() |
| |
| # Wrap the socket for verification with the root certs. |
| kwargs = {} |
| if self.root_certs is not None: |
| kwargs.update(cert_reqs=ssl.CERT_REQUIRED, ca_certs=self.root_certs) |
| self.sock = ssl.wrap_socket(sock, **kwargs) |
| |
| # Check hostname. |
| try: |
| _MatchHostname(self.sock.getpeercert(), self.host) |
| except CertificateError: |
| self.sock.shutdown(socket.SHUT_RDWR) |
| self.sock.close() |
| raise |
| |
| |
| class HTTPSHandler(urllib2.HTTPSHandler): |
| |
| def __init__(self, root_certs=_TRUSTED_ROOT_CERTS): |
| urllib2.HTTPSHandler.__init__(self) |
| self.root_certs = root_certs |
| |
| def https_open(self, req): |
| # Pass a reference to the function below so that verification against |
| # trusted root certs could be injected. |
| return self.do_open(self.GetConnection, req) |
| |
| def GetConnection(self, host, **kwargs): |
| params = dict(root_certs=self.root_certs) |
| params.update(kwargs) |
| return HTTPSConnection(host, **params) |
| |
| |
| def _SendRequest(url, timeout=None): |
| """Send request to the given https url, and return the server response. |
| |
| Args: |
| url: The https url to send request to. |
| |
| Returns: |
| An integer: http code of the response. |
| A string: content of the response. |
| |
| Raises: |
| CertificateError: Certificate verification fails. |
| """ |
| if not url: |
| return None, None |
| |
| handlers = [] |
| if url.startswith('https://'): |
| # HTTPSHandler has to go first, because we don't want to send secure cookies |
| # to a man in the middle. |
| handlers.append(HTTPSHandler()) |
| |
| |
| cookie_file = os.environ.get('COOKIE_FILE') |
| if cookie_file and os.path.exists(cookie_file): |
| handlers.append( |
| urllib2.HTTPCookieProcessor(cookielib.MozillaCookieJar(cookie_file))) |
| |
| url_opener = urllib2.build_opener(*handlers) |
| |
| status_code = None |
| content = None |
| |
| try: |
| response = url_opener.open(url, timeout=timeout) |
| |
| status_code = response.code |
| content = response.read() |
| except urllib2.HTTPError as e: |
| status_code = e.code |
| content = None |
| except (ssl.SSLError, httplib.BadStatusLine, IOError): |
| status_code = -1 |
| content = None |
| |
| return status_code, content |
| |
| |
| class HttpClientLocal(http_client.HttpClient): |
| """This http client is used locally in a workstation, GCE VMs, etc.""" |
| |
| @staticmethod |
| def Get(url, params={}, timeout=120, retries=5, retry_interval=0.5, |
| retry_if_not=None): |
| if params: |
| url = '%s?%s' % (url, urllib.urlencode(params)) |
| |
| count = 0 |
| while True: |
| count += 1 |
| |
| status_code, content = _SendRequest(url, timeout=timeout) |
| if status_code == 200: |
| return status_code, content |
| if retry_if_not and status_code == retry_if_not: |
| return status_code, content |
| |
| if count < retries: |
| time.sleep(retry_interval) |
| else: |
| return status_code, content |
| |
| # Should never be reached. |
| return status_code, content |