blob: 64736918ca66b66e523a359a85d27635082f2cef [file] [log] [blame]
#!/usr/bin/python
#
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Wrapper that does auto-retry and stats logging for command invocation.
Various command line tools in use: gsutil, curl have spurious failure.
This wrapper will track stats to an AppEngine based service to
help track down the cause of failures, as well as add retry logic.
"""
import optparse
import os
import platform
import socket
import subprocess
import sys
import threading
import time
import urllib
import uuid
LOG_TIMEOUT = 10
def LogCommand(options, command_id,
attempt, cmd, returncode, stdout, stderr, runtime):
"""Log a command invocation and result to a central location.
Arguments:
options: parsed options
command_id: unique id for this command (shared by all retries)
attempt: which try numbered from 0
cmd: command run
returncode: return code from running command
stdout: text of stdout
stderr: text of stderr
runtime: command runtime in seconds
Returns:
True/False indicating if the current result should be accepted without
further retry.
"""
uname = platform.uname()
params = urllib.urlencode({
'attempt': str(attempt),
'cwd': os.getcwd(),
'command_id': command_id,
'command': cmd,
'returncode': str(returncode),
'stdout': stdout[:400],
'stderr': stderr[:400],
'runtime': str(runtime),
'retries': str(options.retries),
'uname_sysname': uname[0],
'uname_nodename': uname[1],
'uname_release': uname[2],
'uname_version': uname[3],
'uname_machine': uname[4],
'uname_processor': uname[5],
})
f = urllib.urlopen(options.logurl, params)
ret = f.read()
f.close()
try:
return int(ret) != 0
except ValueError:
return 0
def RunWithTimeout(timeout, func, *args, **kwargs):
wrapper = { 'result': None }
def CallFunc():
wrapper['result'] = func(*args, **kwargs)
th = threading.Thread(target=CallFunc)
th.start()
th.join(timeout)
return wrapper['result']
def main(argv):
parser = optparse.OptionParser()
parser.add_option('-r', '--retries', dest='retries',
type='int', default=10,
help='number of times to retry on failure')
parser.add_option('-u', '--logurl', dest='logurl',
default='https://command-wrapper.appspot.com/log',
help='URL to log invocations/failures to')
(options, args) = parser.parse_args(args=argv[1:])
# Limit tcp connnection timeouts to 10 seconds.
socket.setdefaulttimeout(10)
command_id = uuid.uuid1()
cmd = ' '.join(args)
# Log that we're even starting.
RunWithTimeout(LOG_TIMEOUT, LogCommand,
options, command_id, -1, cmd, -1, '', '', 0)
# Try up to a certain number of times.
for r in range(options.retries):
tm = time.time()
p = subprocess.Popen(cmd, shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
(p_stdout, p_stderr) = p.communicate()
sys.stdout.write(p_stdout)
sys.stderr.write(p_stderr)
runtime = time.time() - tm
accept = RunWithTimeout(LOG_TIMEOUT, LogCommand,
options, command_id, r, cmd,
p.returncode, p_stdout, p_stderr, runtime)
if accept:
return p.returncode
if p.returncode == 0:
return 0
print 'Command %s failed with retcode %d, try %d.' % (
' '.join(args), p.returncode, r + 1)
print 'Command %s failed %d retries, giving up.' % (
' '.join(args), options.retries)
return p.returncode
if __name__ == '__main__':
sys.exit(main(sys.argv))