blob: 19f3e33df5cf02fe6b57c0e1a3271717511267be [file] [log] [blame]
#!/usr/bin/env python
# Copyright (c) 2014 The Native Client Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Scan online binary packages to produce new package index.
This script is indended to be run periodically and the
results checked into source control. This script depends on
gsutil being installed.
"""
import collections
import hashlib
import optparse
import os
import subprocess
import sys
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(os.path.dirname(SCRIPT_DIR), 'lib'))
import naclports
import naclports.package
import naclports.package_index
def Log(msg):
sys.stderr.write(msg + '\n')
def FormatSize(num_bytes):
"""Create a human readable string from a byte count."""
for x in ['bytes','KB','MB','GB','TB']:
if num_bytes < 1024.0:
return "%3.1f %s" % (num_bytes, x)
num_bytes /= 1024.0
FileInfo = collections.namedtuple('FileInfo', ['name', 'size', 'url', 'etag'])
def ParseGsUtilLs(output):
"""Parse the output of gsutil -le.
gsutil -le outputs one file per line with the following format:
<size_in_bytes> 2014-07-01T00:21:05Z gs://bucket/file.txt etag=<sha1>
Returns:
List of FileInfo objects.
"""
result = []
for line in output.splitlines():
if line.startswith("TOTAL"):
continue
size, data, filename, etag = line.split()
etag = etag.split('=', 1)[1]
filename = filename[len('gs://'):]
url = naclports.GS_URL + filename
if filename:
result.append(FileInfo(filename, int(size), url, etag))
return result
def CheckHash(filename, md5sum):
"""Return True is filename has the given md5sum, False otherwise."""
with open(filename) as f:
file_md5sum = hashlib.md5(f.read()).hexdigest()
return md5sum == file_md5sum
def DownloadFiles(files, check_hashes=True):
"""Download one of more files to the local disk.
Args:
files: List of FileInfo objects to download.
check_hashes: When False assume local files have the correct
hash otherwise always check the hashes match the onces in the
FileInfo ojects.
Returns:
List of (filename, url) tuples.
"""
files_to_download = []
filenames = []
download_dir = os.path.join(naclports.package.PACKAGES_ROOT, 'prebuilt')
if not os.path.exists(download_dir):
os.makedirs(download_dir)
for file_info in files:
basename = os.path.basename(file_info.url)
fullname = os.path.join(download_dir, basename)
filenames.append((fullname, file_info.url))
if os.path.exists(fullname):
if not check_hashes or CheckHash(fullname, file_info.etag):
Log('Up-to-date: %s' % file_info.name)
continue
files_to_download.append(FileInfo(fullname, file_info.size, file_info.url,
file_info.etag))
if not files_to_download:
Log('All files up-to-date')
else:
total_size = sum(f[1] for f in files_to_download)
Log('Need to download %d/%d files [%s]' % (len(files_to_download),
len(files), FormatSize(total_size)))
for file_info in files_to_download:
naclports.DownloadFile(file_info.name, file_info.url)
if check_hashes and not CheckHash(file_info.name, file_info.etag):
raise naclports.Error('Checksum failed: %s' % file_info.name)
return filenames
def main(args):
usage = 'Usage: %proc [options] <revision>'
parser = optparse.OptionParser(description=__doc__, usage=usage)
parser.add_option('-v', '--verbose', action='store_true',
help='Output extra information.')
parser.add_option('-l', '--cache-listing', action='store_true',
help='Cached output of gsutil -le (for testing).')
parser.add_option('--skip-md5', action='store_true',
help='Assume on-disk files are up-to-date (for testing).')
options, args = parser.parse_args(args)
if options.verbose:
naclports.verbose = True
if len(args) != 1:
parser.error('Expected exactly one argument. See --help.')
ports_revision = args[0]
sdk_version = naclports.GetSDKVersion()
Log('Scanning packages built for pepper_%s at revsion %s' %
(sdk_version, ports_revision))
base_path = '%s/builds/pepper_%s/%s/packages' % (naclports.GS_BUCKET,
sdk_version,
ports_revision)
gs_url = 'gs://' + base_path
listing_file = os.path.join(naclports.NACLPORTS_ROOT, 'lib', 'listing.txt')
if options.cache_listing and os.path.exists(listing_file):
Log('Using pre-cached gs listing: %s' % listing_file)
with open(listing_file) as f:
listing = f.read()
else:
try:
listing = subprocess.check_output(['gsutil', 'ls', '-le', gs_url])
except subprocess.CalledProcessError as e:
naclports.Error(e)
return 1
all_files = ParseGsUtilLs(listing)
if options.cache_listing and not os.path.exists(listing_file):
with open(listing_file, 'w') as f:
f.write(listing)
Log('Found %d packages [%s]' % (len(all_files),
FormatSize(sum(f.size for f in all_files))))
binaries = DownloadFiles(all_files, not options.skip_md5)
index_file = os.path.join(naclports.NACLPORTS_ROOT, 'lib', 'prebuilt.txt')
Log('Generating %s' % index_file)
naclports.package_index.WriteIndex(index_file, binaries)
Log('Done')
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))