blob: 15f7983b7722afe2a43a781a2eb5b77d81826610 [file] [log] [blame]
#!/usr/bin/python
# Copyright (c) 2012 The Native Client Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Download a file from a URL to a file on disk.
This module supports username and password with basic authentication.
"""
import base64
import os
import os.path
import sys
import urllib2
import download_utils
def _CreateDirectory(path):
"""Create a directory tree, ignore if it's already there."""
try:
os.makedirs(path)
return True
except os.error:
return False
def HttpDownload(url, target, username=None, password=None, verbose=True,
logger=None):
"""Download a file from a remote server.
Args:
url: A URL to download from.
target: Filename to write download to.
username: Optional username for download.
password: Optional password for download (ignored if no username).
logger: Function to log events to.
"""
# Log to stdout by default.
if logger is None:
logger = sys.stdout.write
headers = [('Accept', '*/*')]
if username:
if password:
auth_code = base64.b64encode(username + ':' + password)
else:
auth_code = base64.b64encode(username)
headers.append(('Authorization', 'Basic ' + auth_code))
if os.environ.get('http_proxy'):
proxy = os.environ.get('http_proxy')
proxy_handler = urllib2.ProxyHandler({
'http': proxy,
'https': proxy})
opener = urllib2.build_opener(proxy_handler)
else:
opener = urllib2.build_opener()
opener.addheaders = headers
urllib2.install_opener(opener)
_CreateDirectory(os.path.split(target)[0])
# Retry up to 10 times (appengine logger is flaky).
for i in xrange(10):
if i:
logger('Download failed on %s, retrying... (%d)\n' % (url, i))
try:
# 30 second timeout to ensure we fail and retry on stalled connections.
src = urllib2.urlopen(url, timeout=30)
try:
download_utils.WriteDataFromStream(target, src, chunk_size=2**20,
verbose=verbose)
content_len = src.headers.get('Content-Length')
if content_len:
content_len = int(content_len)
file_size = os.path.getsize(target)
if content_len != file_size:
logger('Filesize:%d does not match Content-Length:%d' % (
file_size, content_len))
continue
finally:
src.close()
break
except urllib2.HTTPError, e:
if e.code == 404:
logger('Resource does not exist.\n')
raise
logger('Failed to open.\n')
except urllib2.URLError:
logger('Failed mid stream.\n')
else:
logger('Download failed on %s, giving up.\n' % url)
raise