#!/usr/bin/env python
# Copyright (c) 2014 The Native Client Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Scan online binary packages to produce new package index.
This script is indended to be run periodically and the
results checked into source control.
from __future__ import print_function
import argparse
import collections
import hashlib
import os
import subprocess
import sys
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(os.path.dirname(SCRIPT_DIR), 'lib'))
import naclports
import naclports.package
import naclports.package_index
from naclports.util import Log, LogVerbose
def FormatSize(num_bytes):
"""Create a human readable string from a byte count."""
for x in ('bytes', 'KB', 'MB', 'GB', 'TB'):
if num_bytes < 1024.0:
return "%3.1f %s" % (num_bytes, x)
num_bytes /= 1024.0
FileInfo = collections.namedtuple('FileInfo', ['name', 'size', 'url', 'etag'])
def ParseGsUtilLs(output):
"""Parse the output of gsutil -le.
gsutil -le outputs one file per line with the following format:
<size_in_bytes> 2014-07-01T00:21:05Z gs://bucket/file.txt etag=<sha1>
List of FileInfo objects.
result = []
for line in output.splitlines():
if line.startswith("TOTAL"):
size, _, filename, etag = line.split()
etag = etag.split('=', 1)[1]
filename = filename[len('gs://'):]
url = naclports.GS_URL + filename
if filename:
result.append(FileInfo(filename, int(size), url, etag))
return result
def CheckHash(filename, md5sum):
"""Return True is filename has the given md5sum, False otherwise."""
with open(filename) as f:
file_md5sum = hashlib.md5(
return md5sum == file_md5sum
def DownloadFiles(files, check_hashes=True):
"""Download one of more files to the local disk.
files: List of FileInfo objects to download.
check_hashes: When False assume local files have the correct
hash otherwise always check the hashes match the onces in the
FileInfo ojects.
List of (filename, url) tuples.
files_to_download = []
filenames = []
download_dir = naclports.package_index.PREBUILT_ROOT
if not os.path.exists(download_dir):
for file_info in files:
basename = os.path.basename(file_info.url)
fullname = os.path.join(download_dir, basename)
filenames.append((fullname, file_info.url))
if os.path.exists(fullname):
if not check_hashes or CheckHash(fullname, file_info.etag):
Log('Up-to-date: %s' %
files_to_download.append(FileInfo(fullname, file_info.size, file_info.url,
if not files_to_download:
Log('All files up-to-date')
total_size = sum(f[1] for f in files_to_download)
Log('Need to download %d/%d files [%s]' % (len(files_to_download),
len(files), FormatSize(total_size)))
for file_info in files_to_download:
naclports.DownloadFile(, file_info.url)
if check_hashes and not CheckHash(, file_info.etag):
raise naclports.Error('Checksum failed: %s' %
return filenames
def main(args):
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('revision', metavar='REVISION',
help='naclports revision to to scan for.')
parser.add_argument('-v', '--verbose', action='store_true',
help='Output extra information.')
parser.add_argument('-l', '--cache-listing', action='store_true',
help='Cached output of gsutil -le (for testing).')
parser.add_argument('--skip-md5', action='store_true',
help='Assume on-disk files are up-to-date (for testing).')
args = parser.parse_args(args)
if args.verbose:
sdk_version = naclports.util.GetSDKVersion()
Log('Scanning packages built for pepper_%s at revsion %s' %
(sdk_version, args.revision))
base_path = '%s/builds/pepper_%s/%s/packages' % (naclports.GS_BUCKET,
gs_url = 'gs://' + base_path
gsutil = naclports.util.FindInPath('')
listing_file = os.path.join(naclports.NACLPORTS_ROOT, 'lib', 'listing.txt')
if args.cache_listing and os.path.exists(listing_file):
Log('Using pre-cached gs listing: %s' % listing_file)
with open(listing_file) as f:
listing =
Log('Searching for packages at: %s' % gs_url)
cmd = [sys.executable, gsutil, 'ls', '-le', gs_url]
LogVerbose('Running: %s' % str(cmd))
listing = subprocess.check_output(cmd)
except subprocess.CalledProcessError as e:
return 1
all_files = ParseGsUtilLs(listing)
if args.cache_listing and not os.path.exists(listing_file):
with open(listing_file, 'w') as f:
Log('Found %d packages [%s]' % (len(all_files),
FormatSize(sum(f.size for f in all_files))))
binaries = DownloadFiles(all_files, not args.skip_md5)
index_file = os.path.join(naclports.NACLPORTS_ROOT, 'lib', 'prebuilt.txt')
Log('Generating %s' % index_file)
naclports.package_index.WriteIndex(index_file, binaries)
return 0
if __name__ == '__main__':