Adding a script to generated _index.html files directly on Google Storage for Developers. While less ideal than using a live generated index, this allows the sandbox.google.com/storage interface to be used to authenticate ACL controlled access to particular paths/buckets. BUG=None TEST=None Review URL: http://codereview.chromium.org/2838033 git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/gsd_generate_index@51169 0039d316-1c4b-4281-b951-d872f2087c98
diff --git a/gsd_generate_index.py b/gsd_generate_index.py new file mode 100755 index 0000000..ff4bd89 --- /dev/null +++ b/gsd_generate_index.py
@@ -0,0 +1,179 @@ +#!/usr/bin/python +# Copyright (c) 2008-2010 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Generate index.html files for a Google Storage for Developers directory. + +Google Storage for Developers provides only a raw set of objects. +For some buckets we would like to be able to support browsing of the directory +tree. This utility will generate the needed index and upload/update it. +""" + +import optparse +import posixpath +import re +import subprocess +import sys +import tempfile + + +GENERATED_INDEX = '_index.html' + + +def PathToLink(path): + return path.replace('gs://', 'https://sandbox.google.com/storage/') + + +def FixupSize(sz): + """Convert a size string in bytes to human readable form. + + Arguments: + sz: a size string in bytes + Returns: + A human readable size in bytes/K/M/G. + """ + sz = int(sz) + if sz < 1000: + sz = str(sz) + elif sz < 1000000: + sz = str(int(sz / 100) / 10.0) + 'K' + elif sz < 1000000000: + sz = str(int(sz / 100000) / 10.0) + 'M' + else: + sz = str(int(sz / 100000000) / 10.0) + 'G' + return sz + + +def GetPathInfo(path, options): + """Collect size, date, md5 for a give gsd path.""" + # Check current state. + cmd = [options.gsutil, 'ls', '-l', path] + p = subprocess.Popen(cmd, stdout=subprocess.PIPE) + p_stdout, _ = p.communicate() + # Extract intersting fields. + fields = {} + fields['size'] = FixupSize(re.search('\tObject size:\t([0-9]+)\n', + p_stdout).group(1)) + fields['md5'] = re.search('\tMD5:\t([^\n]+)\n', p_stdout).group(1) + fields['date'] = re.search('\tLast mod:\t([^\n]+)\n', p_stdout).group(1) + return fields + + +def GenerateIndex(path, children, directories, options): + """Generate index for a given path as needed.""" + # Generate index content. + index = '' + index += '<html>' + index += '<head>' + index += '<title>Index of %s</title>' % path + index += '</head>' + index += '<body>' + index += '<h1>Index of %s</h1>' % path + index += '<table>' + index += '<tr>' + index += '<th align="left">Name</th>' + index += '<th align="left">Last modified</th>' + index += '<th align="left">Size</th>' + index += '<th align="left">MD5</th>' + index += '</tr>' + index += '<tr><th colspan="4"><hr></th></tr>' + parent = posixpath.dirname(path) + if parent != 'gs:': + index += '<tr>' + index += '<td><a href="%s">Parent Directory</a></td>' % ( + PathToLink(posixpath.join(parent, GENERATED_INDEX))) + index += '<td> </td>' + index += '<td> </td>' + index += '<td> </td>' + index += '</tr>' + for child in children: + index += '<tr>' + if child in directories: + index += '<td><a href="%s">%s</a></td>' % ( + PathToLink(posixpath.join(child, GENERATED_INDEX)), + posixpath.basename(child)) + index += '<td> </td>' + index += '<td> </td>' + index += '<td> </td>' + else: + fields = GetPathInfo(child, options) + index += '<td><a href="%s">%s</a></td>' % ( + PathToLink(child), posixpath.basename(child)) + index += '<td>%s</td>' % fields['date'] + index += '<td><b>%s</b></td>' % fields['size'] + index += '<td>%s</td>' % fields['md5'] + index += '</tr>' + index += '<tr><th colspan="4"><hr></th></tr>' + index += '</table>' + index += '</body>' + index += '</html>' + # Check current state. + cmd = [options.gsutil, 'cat', posixpath.join(path, GENERATED_INDEX)] + p = subprocess.Popen(cmd, stdout=subprocess.PIPE) + p_stdout, _ = p.communicate() + # Done if it's alrady right. + if p_stdout == index and not options.force: + print '%s -- skipping, up to date' % path + return + # Write to a file. + f = tempfile.NamedTemporaryFile(suffix='.html') + filename = f.name + f.write(index) + f.flush() + # Upload index. + cmd = [options.gsutil, 'cp'] + if options.acl: + cmd += ['-a', options.acl] + cmd += [filename, posixpath.join(path, GENERATED_INDEX)] + p = subprocess.Popen(cmd) + p.communicate() + print '%s -- updated index' % path + + +def GenerateIndexes(path, options): + """Generate all relevant indexes for a given gsd path.""" + # Get a list of objects under this prefix. + cmd = [options.gsutil, 'ls', posixpath.join(path, '*')] + p = subprocess.Popen(cmd, stdout=subprocess.PIPE) + p_stdout, _ = p.communicate() + objects = str(p_stdout).splitlines() + objects = [o for o in objects if posixpath.basename(o) != GENERATED_INDEX] + # Find common prefixes. + directories = set() + for o in objects: + part = posixpath.dirname(o) + while part.startswith(path): + directories.add(part) + part = posixpath.dirname(part) + objects += list(directories) + # Generate index for each directory. + for d in directories: + # Skip directories not on the target path if any. + if options.path and not options.path.startswith(d): + continue + # Find just this directories children. + children = [o for o in objects if posixpath.dirname(o) == d] + # Generate this directory's index if needed. + GenerateIndex(d, children, directories, options) + return 0 + + +def main(argv): + parser = optparse.OptionParser(usage='usage: %prog [options] gs://base-dir') + parser.add_option('-p', '--path', dest='path', + help='only update indexes on a given path') + parser.add_option('-a', dest='acl', help='acl to set on indexes') + parser.add_option('-f', '--force', action='store_true', default=False, + dest='force', help='upload all indexes even on match') + parser.add_option('', '--gsutil', default='gsutil', + dest='gsutil', help='path to gsutil') + options, args = parser.parse_args(argv) + if len(args) != 2 or not args[1].startswith('gs://'): + parser.print_help() + return 1 + return GenerateIndexes(args[1], options) + + +if __name__ == '__main__': + sys.exit(main(sys.argv))