| #!/usr/bin/python |
| # Copyright (c) 2008-2010 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Generate index.html files for a Google Storage for Developers directory. |
| |
| Google Storage for Developers provides only a raw set of objects. |
| For some buckets we would like to be able to support browsing of the directory |
| tree. This utility will generate the needed index and upload/update it. |
| """ |
| |
| import optparse |
| import posixpath |
| import re |
| import subprocess |
| import sys |
| import tempfile |
| import threading |
| |
| |
| GENERATED_INDEX = '_index.html' |
| NUM_THREADS = 100 |
| |
| |
| def PathToLink(path): |
| return path.replace('gs://', 'https://sandbox.google.com/storage/') |
| |
| |
| def FixupSize(sz): |
| """Convert a size string in bytes to human readable form. |
| |
| Arguments: |
| sz: a size string in bytes |
| Returns: |
| A human readable size in bytes/K/M/G. |
| """ |
| sz = int(sz) |
| if sz < 1000: |
| sz = str(sz) |
| elif sz < 1000000: |
| sz = str(int(sz / 100) / 10.0) + 'K' |
| elif sz < 1000000000: |
| sz = str(int(sz / 100000) / 10.0) + 'M' |
| else: |
| sz = str(int(sz / 100000000) / 10.0) + 'G' |
| return sz |
| |
| |
| def GetPathInfo(path, options): |
| """Collect size, date, md5 for a give gsd path.""" |
| # Check current state. |
| cmd = [options.gsutil, 'ls', '-L', path] |
| p = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
| p_stdout, _ = p.communicate() |
| assert p.returncode == 0 |
| # Extract intersting fields. |
| fields = {} |
| fields['size'] = FixupSize(re.search('\tObject size:\t([0-9]+)\n', |
| p_stdout).group(1)) |
| fields['md5'] = re.search('\tMD5:\t([^\n]+)\n', p_stdout).group(1) |
| fields['date'] = re.search('\tLast mod:\t([^\n]+)\n', p_stdout).group(1) |
| return fields |
| |
| |
| def GenerateIndex(path, children, directories, options): |
| """Generate index for a given path as needed.""" |
| # Generate index content. |
| index = '' |
| index += '<html>' |
| index += '<head>' |
| index += '<title>Index of %s</title>' % path |
| index += '</head>' |
| index += '<body>' |
| index += '<h1>Index of %s</h1>' % path |
| index += '<table>' |
| index += '<tr>' |
| index += '<th align="left">Name</th>' |
| index += '<th align="left">Last modified</th>' |
| index += '<th align="left">Size</th>' |
| index += '<th align="left">MD5</th>' |
| index += '</tr>' |
| index += '<tr><th colspan="4"><hr></th></tr>' |
| parent = posixpath.dirname(path) |
| if parent != 'gs:': |
| index += '<tr>' |
| index += '<td><a href="%s">Parent Directory</a></td>' % ( |
| PathToLink(posixpath.join(parent, GENERATED_INDEX))) |
| index += '<td> </td>' |
| index += '<td> </td>' |
| index += '<td> </td>' |
| index += '</tr>' |
| for child in children: |
| index += '<tr>' |
| if child in directories: |
| index += '<td><a href="%s">%s</a></td>' % ( |
| PathToLink(posixpath.join(child, GENERATED_INDEX)), |
| posixpath.basename(child)) |
| index += '<td> </td>' |
| index += '<td> </td>' |
| index += '<td> </td>' |
| else: |
| fields = GetPathInfo(child, options) |
| index += '<td><a href="%s">%s</a></td>' % ( |
| PathToLink(child), posixpath.basename(child)) |
| index += '<td>%s</td>' % fields['date'] |
| index += '<td><b>%s</b></td>' % fields['size'] |
| index += '<td>%s</td>' % fields['md5'] |
| index += '</tr>' |
| index += '<tr><th colspan="4"><hr></th></tr>' |
| index += '</table>' |
| index += '</body>' |
| index += '</html>' |
| # Check current state. |
| cmd = [options.gsutil, 'cat', posixpath.join(path, GENERATED_INDEX)] |
| p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| p_stdout, _ = p.communicate() |
| # Done if it's alrady right (and the cat worked). |
| if p.returncode == 0 and p_stdout == index and not options.force: |
| print '%s -- skipping, up to date' % path |
| return |
| # Write to a file. |
| f = tempfile.NamedTemporaryFile(suffix='.html') |
| filename = f.name |
| f.write(index) |
| f.flush() |
| # Upload index. |
| cmd = [options.gsutil, 'cp'] |
| cmd += [filename, posixpath.join(path, GENERATED_INDEX)] |
| p = subprocess.Popen(cmd) |
| p.communicate() |
| assert p.returncode == 0 |
| # Optionally update acl. |
| if options.acl: |
| cmd = [options.gsutil, 'setacl', options.acl] |
| cmd += [posixpath.join(path, GENERATED_INDEX)] |
| p = subprocess.Popen(cmd) |
| p.communicate() |
| assert p.returncode == 0 |
| print '%s -- updated index' % path |
| |
| |
| def IndexWorker(index_list, mutex, directories, objects, options): |
| while True: |
| # Pluck out one index to work on, or quit if no more work left. |
| mutex.acquire() |
| if not len(index_list): |
| mutex.release() |
| return |
| d = index_list.pop(0) |
| mutex.release() |
| # Find just this directories children. |
| children = [o for o in objects if posixpath.dirname(o) == d] |
| # Generate it. |
| GenerateIndex(d, children, directories, options) |
| |
| |
| def GenerateIndexes(path, options): |
| """Generate all relevant indexes for a given gsd path.""" |
| # Get a list of objects under this prefix. |
| cmd = [options.gsutil, 'ls', posixpath.join(path, '*')] |
| p = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
| p_stdout, _ = p.communicate() |
| assert p.returncode == 0 |
| objects = str(p_stdout).splitlines() |
| objects = [o for o in objects if posixpath.basename(o) != GENERATED_INDEX] |
| # Find common prefixes. |
| directories = set() |
| for o in objects: |
| part = posixpath.dirname(o) |
| while part.startswith(path): |
| directories.add(part) |
| part = posixpath.dirname(part) |
| objects += list(directories) |
| # Generate index for each directory. |
| index_list = [i for i in directories |
| if not options.path or options.path.startswith(i)] |
| # Spawn workers |
| mutex = threading.Lock() |
| workers = [threading.Thread(target=IndexWorker, |
| args=(index_list, mutex, |
| directories, objects, options)) |
| for _ in range(0, NUM_THREADS)] |
| # Start threads. |
| for w in workers: |
| w.start() |
| # Wait for them to finish. |
| for w in workers: |
| w.join() |
| return 0 |
| |
| |
| def main(argv): |
| parser = optparse.OptionParser(usage='usage: %prog [options] gs://base-dir') |
| parser.add_option('-p', '--path', dest='path', |
| help='only update indexes on a given path') |
| parser.add_option('-a', dest='acl', help='acl to set on indexes') |
| parser.add_option('-f', '--force', action='store_true', default=False, |
| dest='force', help='upload all indexes even on match') |
| parser.add_option('', '--gsutil', default='gsutil', |
| dest='gsutil', help='path to gsutil') |
| options, args = parser.parse_args(argv) |
| if len(args) != 2 or not args[1].startswith('gs://'): |
| parser.print_help() |
| return 1 |
| return GenerateIndexes(args[1], options) |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main(sys.argv)) |