Adding a script to generated _index.html files directly on Google Storage for
Developers. While less ideal than using a live generated index, this allows the
sandbox.google.com/storage interface to be used to authenticate ACL controlled
access to particular paths/buckets.

BUG=None
TEST=None

Review URL: http://codereview.chromium.org/2838033

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/gsd_generate_index@51169 0039d316-1c4b-4281-b951-d872f2087c98
diff --git a/gsd_generate_index.py b/gsd_generate_index.py
new file mode 100755
index 0000000..ff4bd89
--- /dev/null
+++ b/gsd_generate_index.py
@@ -0,0 +1,179 @@
+#!/usr/bin/python
+# Copyright (c) 2008-2010 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Generate index.html files for a Google Storage for Developers directory.
+
+Google Storage for Developers provides only a raw set of objects.
+For some buckets we would like to be able to support browsing of the directory
+tree. This utility will generate the needed index and upload/update it.
+"""
+
+import optparse
+import posixpath
+import re
+import subprocess
+import sys
+import tempfile
+
+
+GENERATED_INDEX = '_index.html'
+
+
+def PathToLink(path):
+  return path.replace('gs://', 'https://sandbox.google.com/storage/')
+
+
+def FixupSize(sz):
+  """Convert a size string in bytes to human readable form.
+
+  Arguments:
+    sz: a size string in bytes
+  Returns:
+    A human readable size in bytes/K/M/G.
+  """
+  sz = int(sz)
+  if sz < 1000:
+    sz = str(sz)
+  elif sz < 1000000:
+    sz = str(int(sz / 100) / 10.0) + 'K'
+  elif sz < 1000000000:
+    sz = str(int(sz / 100000) / 10.0) + 'M'
+  else:
+    sz = str(int(sz / 100000000) / 10.0) + 'G'
+  return sz
+
+
+def GetPathInfo(path, options):
+  """Collect size, date, md5 for a give gsd path."""
+  # Check current state.
+  cmd = [options.gsutil, 'ls', '-l', path]
+  p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+  p_stdout, _ = p.communicate()
+  # Extract intersting fields.
+  fields = {}
+  fields['size'] = FixupSize(re.search('\tObject size:\t([0-9]+)\n',
+                                       p_stdout).group(1))
+  fields['md5'] = re.search('\tMD5:\t([^\n]+)\n', p_stdout).group(1)
+  fields['date'] = re.search('\tLast mod:\t([^\n]+)\n', p_stdout).group(1)
+  return fields
+
+
+def GenerateIndex(path, children, directories, options):
+  """Generate index for a given path as needed."""
+  # Generate index content.
+  index = ''
+  index += '<html>'
+  index += '<head>'
+  index += '<title>Index of %s</title>' % path
+  index += '</head>'
+  index += '<body>'
+  index += '<h1>Index of %s</h1>' % path
+  index += '<table>'
+  index += '<tr>'
+  index += '<th align="left">Name</th>'
+  index += '<th align="left">Last modified</th>'
+  index += '<th align="left">Size</th>'
+  index += '<th align="left">MD5</th>'
+  index += '</tr>'
+  index += '<tr><th colspan="4"><hr></th></tr>'
+  parent = posixpath.dirname(path)
+  if parent != 'gs:':
+    index += '<tr>'
+    index += '<td><a href="%s">Parent Directory</a></td>' % (
+        PathToLink(posixpath.join(parent, GENERATED_INDEX)))
+    index += '<td> </td>'
+    index += '<td> </td>'
+    index += '<td> </td>'
+    index += '</tr>'
+  for child in children:
+    index += '<tr>'
+    if child in directories:
+      index += '<td><a href="%s">%s</a></td>' % (
+          PathToLink(posixpath.join(child, GENERATED_INDEX)),
+          posixpath.basename(child))
+      index += '<td> </td>'
+      index += '<td> </td>'
+      index += '<td> </td>'
+    else:
+      fields = GetPathInfo(child, options)
+      index += '<td><a href="%s">%s</a></td>' % (
+          PathToLink(child), posixpath.basename(child))
+      index += '<td>%s</td>' % fields['date']
+      index += '<td><b>%s</b></td>' % fields['size']
+      index += '<td>%s</td>' % fields['md5']
+    index += '</tr>'
+  index += '<tr><th colspan="4"><hr></th></tr>'
+  index += '</table>'
+  index += '</body>'
+  index += '</html>'
+  # Check current state.
+  cmd = [options.gsutil, 'cat', posixpath.join(path, GENERATED_INDEX)]
+  p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+  p_stdout, _ = p.communicate()
+  # Done if it's alrady right.
+  if p_stdout == index and not options.force:
+    print '%s -- skipping, up to date' % path
+    return
+  # Write to a file.
+  f = tempfile.NamedTemporaryFile(suffix='.html')
+  filename = f.name
+  f.write(index)
+  f.flush()
+  # Upload index.
+  cmd = [options.gsutil, 'cp']
+  if options.acl:
+    cmd += ['-a', options.acl]
+  cmd += [filename, posixpath.join(path, GENERATED_INDEX)]
+  p = subprocess.Popen(cmd)
+  p.communicate()
+  print '%s -- updated index' % path
+
+
+def GenerateIndexes(path, options):
+  """Generate all relevant indexes for a given gsd path."""
+  # Get a list of objects under this prefix.
+  cmd = [options.gsutil, 'ls', posixpath.join(path, '*')]
+  p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+  p_stdout, _ = p.communicate()
+  objects = str(p_stdout).splitlines()
+  objects = [o for o in objects if posixpath.basename(o) != GENERATED_INDEX]
+  # Find common prefixes.
+  directories = set()
+  for o in objects:
+    part = posixpath.dirname(o)
+    while part.startswith(path):
+      directories.add(part)
+      part = posixpath.dirname(part)
+  objects += list(directories)
+  # Generate index for each directory.
+  for d in directories:
+    # Skip directories not on the target path if any.
+    if options.path and not options.path.startswith(d):
+      continue
+    # Find just this directories children.
+    children = [o for o in objects if posixpath.dirname(o) == d]
+    # Generate this directory's index if needed.
+    GenerateIndex(d, children, directories, options)
+  return 0
+
+
+def main(argv):
+  parser = optparse.OptionParser(usage='usage: %prog [options] gs://base-dir')
+  parser.add_option('-p', '--path', dest='path',
+                    help='only update indexes on a given path')
+  parser.add_option('-a', dest='acl', help='acl to set on indexes')
+  parser.add_option('-f', '--force', action='store_true', default=False,
+                    dest='force', help='upload all indexes even on match')
+  parser.add_option('', '--gsutil', default='gsutil',
+                    dest='gsutil', help='path to gsutil')
+  options, args = parser.parse_args(argv)
+  if len(args) != 2 or not args[1].startswith('gs://'):
+    parser.print_help()
+    return 1
+  return GenerateIndexes(args[1], options)
+
+
+if __name__ == '__main__':
+  sys.exit(main(sys.argv))