pynacl/download_utils.py - native_client/src/native_client - Git at Google

 #!/usr/bin/python
 # Copyright (c) 2012 The Native Client Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """A library to assist automatically downloading files.

 This library is used by scripts that download tarballs, zipfiles, etc. as part
 of the build process.
 """

 import hashlib
 import os.path
 import re
 import sys
 import urllib2

 import http_download

 SOURCE_STAMP = 'SOURCE_URL'
 HASH_STAMP = 'SOURCE_SHA1'

 class HashError(Exception):
   def __init__(self, download_url, expected_hash, actual_hash):
     self.download_url = download_url
     self.expected_hash = expected_hash
     self.actual_hash = actual_hash

   def __str__(self):
     return 'Got hash "%s" but expected hash "%s" for "%s"' % (
         self.actual_hash, self.expected_hash, self.download_url)

 def EnsureFileCanBeWritten(filename):
   directory = os.path.dirname(filename)
   if not os.path.exists(directory):
     os.makedirs(directory)


 def WriteData(filename, data):
   EnsureFileCanBeWritten(filename)
   f = open(filename, 'wb')
   f.write(data)
   f.close()


 def WriteDataFromStream(filename, stream, chunk_size, verbose=True):
   EnsureFileCanBeWritten(filename)
   dst = open(filename, 'wb')
   try:
     while True:
       data = stream.read(chunk_size)
       if len(data) == 0:
         break
       dst.write(data)
       if verbose:
         # Indicate that we're still writing.
         sys.stdout.write('.')
         sys.stdout.flush()
   finally:
     if verbose:
       sys.stdout.write('\n')
     dst.close()


 def DoesStampMatch(stampfile, expected, index):
   try:
     f = open(stampfile, 'r')
     stamp = f.read()
     f.close()
     if stamp.split('\n')[index] == expected:
       return 'already up-to-date.'
     elif stamp.startswith('manual'):
       return 'manual override.'
     return False
   except IOError:
     return False


 def WriteStamp(stampfile, data):
   EnsureFileCanBeWritten(stampfile)
   f = open(stampfile, 'w')
   f.write(data)
   f.close()


 def StampIsCurrent(path, stamp_name, stamp_contents, min_time=None, index=0):
   stampfile = os.path.join(path, stamp_name)

   stampmatch = DoesStampMatch(stampfile, stamp_contents, index)

   # If toolchain was downloaded and/or created manually then keep it untouched
   if stampmatch == 'manual override.':
     return stampmatch

   # Check if the stampfile is older than the minimum last mod time
   if min_time:
     try:
       stamp_time = os.stat(stampfile).st_mtime
       if stamp_time <= min_time:
         return False
     except OSError:
       return False

   return stampmatch


 def WriteSourceStamp(path, url):
   stampfile = os.path.join(path, SOURCE_STAMP)
   WriteStamp(stampfile, url)


 def WriteHashStamp(path, hash_val):
   hash_stampfile = os.path.join(path, HASH_STAMP)
   WriteStamp(hash_stampfile, hash_val)


 def _HashFileHandle(fh):
   """sha1 of a file like object.

   Arguments:
     fh: file handle like object to hash.
   Returns:
     sha1 as a string.
   """
   hasher = hashlib.sha1()
   try:
     while True:
       data = fh.read(4096)
       if not data:
         break
       hasher.update(data)
   finally:
     fh.close()
   return hasher.hexdigest()


 def HashFile(filename):
   """sha1 a file on disk.

   Arguments:
     filename: filename to hash.
   Returns:
     sha1 as a string.
   """
   fh = open(filename, 'rb')
   return _HashFileHandle(fh)


 def HashUrlByDownloading(url):
   """sha1 the data at an url.

   Arguments:
     url: url to download from.
   Returns:
     sha1 of the data at the url.
   """
   try:
     fh = urllib2.urlopen(url)
   except:
     sys.stderr.write('Failed fetching URL: %s\n' % url)
     raise
   return _HashFileHandle(fh)


 # Attempts to get the SHA1 hash of a file given a URL by looking for
 # an adjacent file with a ".sha1hash" suffix.  This saves having to
 # download a large tarball just to get its hash.  Otherwise, we fall
 # back to downloading the main file.
 def HashUrl(url):
   hash_url = '%s.sha1hash' % url
   try:
     fh = urllib2.urlopen(hash_url)
     data = fh.read(100)
     fh.close()
   except urllib2.HTTPError, exn:
     if exn.code == 404:
       return HashUrlByDownloading(url)
     raise
   else:
     if not re.match('[0-9a-f]{40}\n?$', data):
       raise AssertionError('Bad SHA1 hash file: %r' % data)
     return data.strip()


 def SyncURL(url, filename=None, stamp_dir=None, min_time=None,
             hash_val=None, keep=False, verbose=False, stamp_index=0):
   """Synchronize a destination file with a URL

   if the URL does not match the URL stamp, then we must re-download it.

   Arugments:
     url: the url which will to compare against and download
     filename: the file to create on download
     path: the download path
     stamp_dir: the filename containing the URL stamp to check against
     hash_val: if set, the expected hash which must be matched
     verbose: prints out status as it runs
     stamp_index: index within the stamp file to check.
   Returns:
     True if the file is replaced
     False if the file is not replaced
   Exception:
     HashError: if the hash does not match
   """

   assert url and filename

   # If we are not keeping the tarball, or we already have it, we can
   # skip downloading it for this reason. If we are keeping it,
   # it must exist.
   if keep:
     tarball_ok = os.path.isfile(filename)
   else:
     tarball_ok = True

   # If we don't need the tarball and the stamp_file matches the url, then
   # we must be up to date.  If the URL differs but the recorded hash matches
   # the one we'll insist the tarball has, then that's good enough too.
   # TODO(mcgrathr): Download the .sha1sum file first to compare with
   # the cached hash, in case --file-hash options weren't used.
   if tarball_ok and stamp_dir is not None:
     if StampIsCurrent(stamp_dir, SOURCE_STAMP, url, min_time):
       if verbose:
         print '%s is already up to date.' % filename
       return False
     if (hash_val is not None and
         StampIsCurrent(stamp_dir, HASH_STAMP, hash_val, min_time, stamp_index)):
       if verbose:
         print '%s is identical to the up to date file.' % filename
       return False

   if (os.path.isfile(filename)
       and hash_val is not None
       and hash_val == HashFile(filename)):
     return True

   if verbose:
     print 'Updating %s\n\tfrom %s.' % (filename, url)
   EnsureFileCanBeWritten(filename)
   http_download.HttpDownload(url, filename)

   if hash_val:
     tar_hash = HashFile(filename)
     if hash_val != tar_hash:
       raise HashError(actual_hash=tar_hash, expected_hash=hash_val,
                       download_url=url)

   return True
	#!/usr/bin/python
	# Copyright (c) 2012 The Native Client Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""A library to assist automatically downloading files.

	This library is used by scripts that download tarballs, zipfiles, etc. as part
	of the build process.
	"""

	import hashlib
	import os.path
	import re
	import sys
	import urllib2

	import http_download

	SOURCE_STAMP = 'SOURCE_URL'
	HASH_STAMP = 'SOURCE_SHA1'

	class HashError(Exception):
	def __init__(self, download_url, expected_hash, actual_hash):
	self.download_url = download_url
	self.expected_hash = expected_hash
	self.actual_hash = actual_hash

	def __str__(self):
	return 'Got hash "%s" but expected hash "%s" for "%s"' % (
	self.actual_hash, self.expected_hash, self.download_url)

	def EnsureFileCanBeWritten(filename):
	directory = os.path.dirname(filename)
	if not os.path.exists(directory):
	os.makedirs(directory)


	def WriteData(filename, data):
	EnsureFileCanBeWritten(filename)
	f = open(filename, 'wb')
	f.write(data)
	f.close()


	def WriteDataFromStream(filename, stream, chunk_size, verbose=True):
	EnsureFileCanBeWritten(filename)
	dst = open(filename, 'wb')
	try:
	while True:
	data = stream.read(chunk_size)
	if len(data) == 0:
	break
	dst.write(data)
	if verbose:
	# Indicate that we're still writing.
	sys.stdout.write('.')
	sys.stdout.flush()
	finally:
	if verbose:
	sys.stdout.write('\n')
	dst.close()


	def DoesStampMatch(stampfile, expected, index):
	try:
	f = open(stampfile, 'r')
	stamp = f.read()
	f.close()
	if stamp.split('\n')[index] == expected:
	return 'already up-to-date.'
	elif stamp.startswith('manual'):
	return 'manual override.'
	return False
	except IOError:
	return False


	def WriteStamp(stampfile, data):
	EnsureFileCanBeWritten(stampfile)
	f = open(stampfile, 'w')
	f.write(data)
	f.close()


	def StampIsCurrent(path, stamp_name, stamp_contents, min_time=None, index=0):
	stampfile = os.path.join(path, stamp_name)

	stampmatch = DoesStampMatch(stampfile, stamp_contents, index)

	# If toolchain was downloaded and/or created manually then keep it untouched
	if stampmatch == 'manual override.':
	return stampmatch

	# Check if the stampfile is older than the minimum last mod time
	if min_time:
	try:
	stamp_time = os.stat(stampfile).st_mtime
	if stamp_time <= min_time:
	return False
	except OSError:
	return False

	return stampmatch


	def WriteSourceStamp(path, url):
	stampfile = os.path.join(path, SOURCE_STAMP)
	WriteStamp(stampfile, url)


	def WriteHashStamp(path, hash_val):
	hash_stampfile = os.path.join(path, HASH_STAMP)
	WriteStamp(hash_stampfile, hash_val)


	def _HashFileHandle(fh):
	"""sha1 of a file like object.

	Arguments:
	fh: file handle like object to hash.
	Returns:
	sha1 as a string.
	"""
	hasher = hashlib.sha1()
	try:
	while True:
	data = fh.read(4096)
	if not data:
	break
	hasher.update(data)
	finally:
	fh.close()
	return hasher.hexdigest()


	def HashFile(filename):
	"""sha1 a file on disk.

	Arguments:
	filename: filename to hash.
	Returns:
	sha1 as a string.
	"""
	fh = open(filename, 'rb')
	return _HashFileHandle(fh)


	def HashUrlByDownloading(url):
	"""sha1 the data at an url.

	Arguments:
	url: url to download from.
	Returns:
	sha1 of the data at the url.
	"""
	try:
	fh = urllib2.urlopen(url)
	except:
	sys.stderr.write('Failed fetching URL: %s\n' % url)
	raise
	return _HashFileHandle(fh)


	# Attempts to get the SHA1 hash of a file given a URL by looking for
	# an adjacent file with a ".sha1hash" suffix. This saves having to
	# download a large tarball just to get its hash. Otherwise, we fall
	# back to downloading the main file.
	def HashUrl(url):
	hash_url = '%s.sha1hash' % url
	try:
	fh = urllib2.urlopen(hash_url)
	data = fh.read(100)
	fh.close()
	except urllib2.HTTPError, exn:
	if exn.code == 404:
	return HashUrlByDownloading(url)
	raise
	else:
	if not re.match('[0-9a-f]{40}\n?$', data):
	raise AssertionError('Bad SHA1 hash file: %r' % data)
	return data.strip()


	def SyncURL(url, filename=None, stamp_dir=None, min_time=None,
	hash_val=None, keep=False, verbose=False, stamp_index=0):
	"""Synchronize a destination file with a URL

	if the URL does not match the URL stamp, then we must re-download it.

	Arugments:
	url: the url which will to compare against and download
	filename: the file to create on download
	path: the download path
	stamp_dir: the filename containing the URL stamp to check against
	hash_val: if set, the expected hash which must be matched
	verbose: prints out status as it runs
	stamp_index: index within the stamp file to check.
	Returns:
	True if the file is replaced
	False if the file is not replaced
	Exception:
	HashError: if the hash does not match
	"""

	assert url and filename

	# If we are not keeping the tarball, or we already have it, we can
	# skip downloading it for this reason. If we are keeping it,
	# it must exist.
	if keep:
	tarball_ok = os.path.isfile(filename)
	else:
	tarball_ok = True

	# If we don't need the tarball and the stamp_file matches the url, then
	# we must be up to date. If the URL differs but the recorded hash matches
	# the one we'll insist the tarball has, then that's good enough too.
	# TODO(mcgrathr): Download the .sha1sum file first to compare with
	# the cached hash, in case --file-hash options weren't used.
	if tarball_ok and stamp_dir is not None:
	if StampIsCurrent(stamp_dir, SOURCE_STAMP, url, min_time):
	if verbose:
	print '%s is already up to date.' % filename
	return False
	if (hash_val is not None and
	StampIsCurrent(stamp_dir, HASH_STAMP, hash_val, min_time, stamp_index)):
	if verbose:
	print '%s is identical to the up to date file.' % filename
	return False

	if (os.path.isfile(filename)
	and hash_val is not None
	and hash_val == HashFile(filename)):
	return True

	if verbose:
	print 'Updating %s\n\tfrom %s.' % (filename, url)
	EnsureFileCanBeWritten(filename)
	http_download.HttpDownload(url, filename)

	if hash_val:
	tar_hash = HashFile(filename)
	if hash_val != tar_hash:
	raise HashError(actual_hash=tar_hash, expected_hash=hash_val,
	download_url=url)

	return True