Adding a retry and logging utility + appengine backend.
This will be used to monitor and diagnose gsutil failures for nacl + chromeos.


Review URL:

git-svn-id: svn:// 0039d316-1c4b-4281-b951-d872f2087c98
diff --git a/ b/
new file mode 100755
index 0000000..48f19d2
--- /dev/null
+++ b/
@@ -0,0 +1,94 @@
+# Copyright (c) 2010 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Wrapper that does auto-retry and stats logging for command invocation.
+Various command line tools in use: gsutil, curl have spurious failure.
+This wrapper will track stats to an AppEngine based service to
+help track down the cause of failures, as well as add retry logic.
+import optparse
+import os
+import subprocess
+import sys
+import time
+import urllib
+import uuid
+def LogCommand(options, command_id,
+               attempt, cmd, returncode, stdout, stderr, runtime):
+  """Log a command invocation and result to a central location.
+  Arguments:
+    options: parsed options
+    command_id: unique id for this command (shared by all retries)
+    attempt: which try numbered from 0
+    cmd: command run
+    returncode: return code from running command
+    stdout: text of stdout
+    stderr: text of stderr
+    runtime: command runtime in seconds
+  """
+  uname = os.uname()
+  params = urllib.urlencode({
+      'attempt': str(attempt),
+      'cwd': os.getcwd(),
+      'command_id': command_id,
+      'command': cmd,
+      'returncode': str(returncode),
+      'stdout': stdout[:400],
+      'stderr': stderr[:400],
+      'runtime': str(runtime),
+      'retries': str(options.retries),
+      'uname_sysname': uname[0],
+      'uname_nodename': uname[1],
+      'uname_release': uname[2],
+      'uname_version': uname[3],
+      'uname_machine': uname[4],
+  })
+  f = urllib.urlopen(options.logurl, params)
+  f.close()
+def main(argv):
+  parser = optparse.OptionParser()
+  parser.add_option('-r', '--retries', dest='retries',
+                    type='int', default=10,
+                    help='number of times to retry on failure')
+  parser.add_option('-u', '--logurl', dest='logurl',
+                    default='',
+                    help='URL to log invocations/failures to')
+  (options, args) = parser.parse_args(args=argv[1:])
+  command_id = uuid.uuid1()
+  cmd = ' '.join(args)
+  for r in range(options.retries):
+    tm = time.time()
+    p = subprocess.Popen(cmd, shell=True,
+                         stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE)
+    (p_stdout, p_stderr) = p.communicate()
+    sys.stdout.write(p_stdout)
+    sys.stderr.write(p_stderr)
+    runtime = time.time() - tm
+    LogCommand(options, command_id, r, cmd,
+               p.returncode, p_stdout, p_stderr, runtime)
+    if p.returncode == 0:
+      return 0
+    print 'Command %s failed with retcode %d, try %d.' % (
+        ' '.join(args), p.returncode, r + 1)
+  print 'Command %s failed %d retries, giving up.' % (
+      ' '.join(args), options.retries)
+  return p.returncode
+if __name__ == '__main__':
+  sys.exit(main(sys.argv))