blob: 6cf0c14bb6f11da8c9ce915cfa670aacda69b836 [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2018 The LUCI Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Finds jobs with old entries (>1d) in ActiveInvocations list.
Usage:
prpc login
./detect_stuck_active_invs.py luci-scheduler-dev.appspot.com
Requires the caller to be in 'administrators' group.
"""
import json
import subprocess
import sys
import time
def prpc(host, method, body):
p = subprocess.Popen(
['prpc', 'call', host, method],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
out, _ = p.communicate(json.dumps(body))
if p.returncode:
raise Exception('Call to %s failed' % method)
return json.loads(out)
def check_job(host, job_ref):
print 'Checking %s/%s' % (job_ref['project'], job_ref['job'])
state = prpc(host, 'internal.admin.Admin.GetDebugJobState', job_ref)
active_invs = state.get('activeInvocations', [])
if not active_invs:
print ' No active invocations'
return []
stuck = []
for inv_id in active_invs:
print ' ...checking %s' % inv_id
inv = prpc(host, 'scheduler.Scheduler.GetInvocation', {
'jobRef': job_ref,
'invocationId': inv_id,
})
started = time.time() - int(inv['startedTs']) / 1000000.0
if started > 24 * 3600:
print ' it is stuck!'
stuck.append((job_ref, inv_id))
return stuck
def main():
if len(sys.argv) != 2:
print >> sys.stderr, 'Usage: %s <host>' % sys.argv[0]
return 1
host = sys.argv[1]
stuck = []
for job in prpc(host, 'scheduler.Scheduler.GetJobs', {})['jobs']:
stuck.extend(check_job(host, job['jobRef']))
if not stuck:
print 'No invocations are stuck'
return
print
print 'All stuck invocations: '
for job_ref, inv_id in stuck:
print '%s/%s %s' % (job_ref['project'], job_ref['job'], inv_id)
return 0
if __name__ == '__main__':
sys.exit(main())