infra_libs/ts_mon/config.py - infra/infra/packages/infra_libs - Git at Google

 # Copyright 2015 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 from __future__ import print_function
 import json
 import logging
 import os
 import socket
 import sys
 import re

 import requests

 from infra_libs.ts_mon.common import interface
 from infra_libs.ts_mon.common import metric_store
 from infra_libs.ts_mon.common import monitors
 from infra_libs.ts_mon.common import standard_metrics
 from infra_libs.ts_mon.common import targets


 def load_machine_config(filename):
   if not os.path.exists(filename):
     logging.info('Configuration file does not exist, ignoring: %s', filename)
     return {}

   try:
     with open(filename) as fh:
       return json.load(fh)
   except Exception:
     logging.error('Configuration file couldn\'t be read: %s', filename)
     raise


 def _default_region(fqdn):
   # Check if we're running in a GCE instance.
   try:
     r = requests.get(
         'http://metadata.google.internal/computeMetadata/v1/instance/zone',
         headers={'Metadata-Flavor': 'Google'},
         timeout=1.0)
   except requests.exceptions.RequestException:
     pass
   else:
     if r.status_code == requests.codes.ok:
       # The zone is the last slash-separated component.
       return r.text.split('/')[-1]

   try:
     return fqdn.split('.')[1]  # [chrome|golo]
   except IndexError:
     return ''


 def _default_network(host):
   try:
     # Regular expression that matches the vast majority of our host names.
     # Matches everything of the form 'masterN', 'masterNa', and 'foo-xN'.
     return re.match(r'^([\w-]*?-[acm]|master)(\d+)a?$', host).group(2)  # N
   except AttributeError:
     return ''


 def add_argparse_options(parser):
   """Add monitoring related flags to a process' argument parser.

   Args:
     parser (argparse.ArgumentParser): the parser for the main process.
   """
   if sys.platform == 'win32':  # pragma: no cover
     default_config_file = 'C:\\chrome-infra\\ts-mon.json'
   else:  # pragma: no cover
     default_config_file = '/etc/chrome-infra/ts-mon.json'

   parser = parser.add_argument_group('Timeseries Monitoring Options')
   parser.add_argument(
       '--ts-mon-config-file',
       default=default_config_file,
       help='path to a JSON config file that contains suitable values for '
            '"endpoint" and "credentials" for this machine. This config file is '
            'intended to be shared by all processes on the machine, as the '
            'values depend on the machine\'s position in the network, IP '
            'whitelisting and deployment of credentials. (default: %(default)s)')
   parser.add_argument(
       '--ts-mon-endpoint',
       help='url (file:// or https://) to post monitoring metrics to. If set, '
            'overrides the value in --ts-mon-config-file')
   parser.add_argument(
       '--ts-mon-credentials',
       help='path to a pkcs8 json credential file. If set, overrides the value '
            'in --ts-mon-config-file')
   parser.add_argument(
       '--ts-mon-ca-certs',
       help='path to file containing root CA certificates for SSL server '
            'certificate validation. If not set, a CA cert file bundled with '
            'httplib2 is used.')
   parser.add_argument(
       '--ts-mon-flush',
       choices=('manual', 'auto'), default='auto',
       help=('metric push behavior: manual (only send when flush() is called), '
             'or auto (send automatically every --ts-mon-flush-interval-secs '
             'seconds). (default: %(default)s)'))
   parser.add_argument(
       '--ts-mon-flush-interval-secs',
       type=int,
       default=60,
       help=('automatically push metrics on this interval if '
             '--ts-mon-flush=auto.'))
   parser.add_argument(
       '--ts-mon-autogen-hostname',
       action="store_true",
       help=('Indicate that the hostname is autogenerated. '
             'This option must be set on autoscaled GCE VMs, Kubernetes pods, '
             'or any other hosts with dynamically generated names.'))

   parser.add_argument(
       '--ts-mon-target-type',
       choices=('device', 'task'),
       default='device',
       help='the type of target that is being monitored ("device" or "task").'
            ' (default: %(default)s)')

   fqdn = socket.getfqdn().lower()  # foo-[a|m]N.[chrome|golo].chromium.org
   host = fqdn.split('.')[0]  # foo-[a|m]N
   region = _default_region(fqdn)
   network = _default_network(host)

   parser.add_argument(
       '--ts-mon-device-hostname',
       default=host,
       help='name of this device, (default: %(default)s)')
   parser.add_argument(
       '--ts-mon-device-region',
       default=region,
       help='name of the region this devices lives in. (default: %(default)s)')
   parser.add_argument(
       '--ts-mon-device-role',
       default='default',
       help='Role of the device. (default: %(default)s)')
   parser.add_argument(
       '--ts-mon-device-network',
       default=network,
       help='name of the network this device is connected to. '
            '(default: %(default)s)')

   parser.add_argument(
       '--ts-mon-task-service-name',
       help='name of the service being monitored')
   parser.add_argument(
       '--ts-mon-task-job-name',
       help='name of this job instance of the task')
   parser.add_argument(
       '--ts-mon-task-region',
       default=region,
       help='name of the region in which this task is running '
            '(default: %(default)s)')
   parser.add_argument(
       '--ts-mon-task-hostname',
       default=host,
       help='name of the host on which this task is running '
            '(default: %(default)s)')
   parser.add_argument(
       '--ts-mon-task-number', type=int, default=0,
       help='number (e.g. for replication) of this instance of this task '
            '(default: %(default)s)')

   parser.add_argument(
       '--ts-mon-metric-name-prefix',
       default='/chrome/infra/',
       help='metric name prefix for all metrics (default: %(default)s)')

   parser.add_argument(
       '--ts-mon-use-new-proto',
       default=True, action='store_true',
       help='deprecated and ignored')


 def process_argparse_options(args):
   """Process command line arguments to initialize the global monitor.

   Also initializes the default target.

   Starts a background thread to automatically flush monitoring metrics if not
   disabled by command line arguments.

   Args:
     args (argparse.Namespace): the result of parsing the command line arguments
   """
   # Parse the config file if it exists.
   config = load_machine_config(args.ts_mon_config_file)
   endpoint = config.get('endpoint', '')
   credentials = config.get('credentials', '')
   autogen_hostname = config.get('autogen_hostname', False)

   # Command-line args override the values in the config file.
   if args.ts_mon_endpoint is not None:
     endpoint = args.ts_mon_endpoint
   if args.ts_mon_credentials is not None:
     credentials = args.ts_mon_credentials

   if args.ts_mon_target_type == 'device':
     hostname = args.ts_mon_device_hostname
     if args.ts_mon_autogen_hostname or autogen_hostname:
       hostname = 'autogen:' + hostname
     interface.state.target = targets.DeviceTarget(
         args.ts_mon_device_region,
         args.ts_mon_device_role,
         args.ts_mon_device_network,
         hostname)
   if args.ts_mon_target_type == 'task':
     # Reimplement ArgumentParser.error, since we don't have access to the parser
     if not args.ts_mon_task_service_name:
       print('Argument --ts-mon-task-service-name must be provided '
             'when the target type is "task".',
             file=sys.stderr)
       sys.exit(2)
     if not args.ts_mon_task_job_name:
       print('Argument --ts-mon-task-job-name must be provided '
             'when the target type is "task".',
             file=sys.stderr)
       sys.exit(2)
     hostname = args.ts_mon_task_hostname
     if args.ts_mon_autogen_hostname or autogen_hostname:
       hostname = 'autogen:' + hostname
     interface.state.target = targets.TaskTarget(
         args.ts_mon_task_service_name,
         args.ts_mon_task_job_name,
         args.ts_mon_task_region,
         hostname,
         args.ts_mon_task_number)

   interface.state.metric_name_prefix = args.ts_mon_metric_name_prefix
   interface.state.global_monitor = monitors.NullMonitor()

   if endpoint.startswith('file://'):
     interface.state.global_monitor = monitors.DebugMonitor(
         endpoint[len('file://'):])
   elif endpoint.startswith('https://'):
     interface.state.global_monitor = monitors.HttpsMonitor(
         endpoint, monitors.CredentialFactory.from_string(credentials),
         ca_certs=args.ts_mon_ca_certs)
   elif endpoint.lower() == 'none':
     logging.info('ts_mon monitoring has been explicitly disabled')
   else:
     logging.error('ts_mon monitoring is disabled because the endpoint provided'
                   ' is invalid or not supported: %s', endpoint)

   interface.state.flush_mode = args.ts_mon_flush

   if args.ts_mon_flush == 'auto':
     interface.state.flush_thread = interface._FlushThread(
         args.ts_mon_flush_interval_secs)
     interface.state.flush_thread.start()

   standard_metrics.init()
	# Copyright 2015 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	from __future__ import print_function
	import json
	import logging
	import os
	import socket
	import sys
	import re

	import requests

	from infra_libs.ts_mon.common import interface
	from infra_libs.ts_mon.common import metric_store
	from infra_libs.ts_mon.common import monitors
	from infra_libs.ts_mon.common import standard_metrics
	from infra_libs.ts_mon.common import targets


	def load_machine_config(filename):
	if not os.path.exists(filename):
	logging.info('Configuration file does not exist, ignoring: %s', filename)
	return {}

	try:
	with open(filename) as fh:
	return json.load(fh)
	except Exception:
	logging.error('Configuration file couldn\'t be read: %s', filename)
	raise


	def _default_region(fqdn):
	# Check if we're running in a GCE instance.
	try:
	r = requests.get(
	'http://metadata.google.internal/computeMetadata/v1/instance/zone',
	headers={'Metadata-Flavor': 'Google'},
	timeout=1.0)
	except requests.exceptions.RequestException:
	pass
	else:
	if r.status_code == requests.codes.ok:
	# The zone is the last slash-separated component.
	return r.text.split('/')[-1]

	try:
	return fqdn.split('.')[1] # [chrome\|golo]
	except IndexError:
	return ''


	def _default_network(host):
	try:
	# Regular expression that matches the vast majority of our host names.
	# Matches everything of the form 'masterN', 'masterNa', and 'foo-xN'.
	return re.match(r'^([\w-]*?-[acm]\|master)(\d+)a?$', host).group(2) # N
	except AttributeError:
	return ''


	def add_argparse_options(parser):
	"""Add monitoring related flags to a process' argument parser.

	Args:
	parser (argparse.ArgumentParser): the parser for the main process.
	"""
	if sys.platform == 'win32': # pragma: no cover
	default_config_file = 'C:\\chrome-infra\\ts-mon.json'
	else: # pragma: no cover
	default_config_file = '/etc/chrome-infra/ts-mon.json'

	parser = parser.add_argument_group('Timeseries Monitoring Options')
	parser.add_argument(
	'--ts-mon-config-file',
	default=default_config_file,
	help='path to a JSON config file that contains suitable values for '
	'"endpoint" and "credentials" for this machine. This config file is '
	'intended to be shared by all processes on the machine, as the '
	'values depend on the machine\'s position in the network, IP '
	'whitelisting and deployment of credentials. (default: %(default)s)')
	parser.add_argument(
	'--ts-mon-endpoint',
	help='url (file:// or https://) to post monitoring metrics to. If set, '
	'overrides the value in --ts-mon-config-file')
	parser.add_argument(
	'--ts-mon-credentials',
	help='path to a pkcs8 json credential file. If set, overrides the value '
	'in --ts-mon-config-file')
	parser.add_argument(
	'--ts-mon-ca-certs',
	help='path to file containing root CA certificates for SSL server '
	'certificate validation. If not set, a CA cert file bundled with '
	'httplib2 is used.')
	parser.add_argument(
	'--ts-mon-flush',
	choices=('manual', 'auto'), default='auto',
	help=('metric push behavior: manual (only send when flush() is called), '
	'or auto (send automatically every --ts-mon-flush-interval-secs '
	'seconds). (default: %(default)s)'))
	parser.add_argument(
	'--ts-mon-flush-interval-secs',
	type=int,
	default=60,
	help=('automatically push metrics on this interval if '
	'--ts-mon-flush=auto.'))
	parser.add_argument(
	'--ts-mon-autogen-hostname',
	action="store_true",
	help=('Indicate that the hostname is autogenerated. '
	'This option must be set on autoscaled GCE VMs, Kubernetes pods, '
	'or any other hosts with dynamically generated names.'))

	parser.add_argument(
	'--ts-mon-target-type',
	choices=('device', 'task'),
	default='device',
	help='the type of target that is being monitored ("device" or "task").'
	' (default: %(default)s)')

	fqdn = socket.getfqdn().lower() # foo-[a\|m]N.[chrome\|golo].chromium.org
	host = fqdn.split('.')[0] # foo-[a\|m]N
	region = _default_region(fqdn)
	network = _default_network(host)

	parser.add_argument(
	'--ts-mon-device-hostname',
	default=host,
	help='name of this device, (default: %(default)s)')
	parser.add_argument(
	'--ts-mon-device-region',
	default=region,
	help='name of the region this devices lives in. (default: %(default)s)')
	parser.add_argument(
	'--ts-mon-device-role',
	default='default',
	help='Role of the device. (default: %(default)s)')
	parser.add_argument(
	'--ts-mon-device-network',
	default=network,
	help='name of the network this device is connected to. '
	'(default: %(default)s)')

	parser.add_argument(
	'--ts-mon-task-service-name',
	help='name of the service being monitored')
	parser.add_argument(
	'--ts-mon-task-job-name',
	help='name of this job instance of the task')
	parser.add_argument(
	'--ts-mon-task-region',
	default=region,
	help='name of the region in which this task is running '
	'(default: %(default)s)')
	parser.add_argument(
	'--ts-mon-task-hostname',
	default=host,
	help='name of the host on which this task is running '
	'(default: %(default)s)')
	parser.add_argument(
	'--ts-mon-task-number', type=int, default=0,
	help='number (e.g. for replication) of this instance of this task '
	'(default: %(default)s)')

	parser.add_argument(
	'--ts-mon-metric-name-prefix',
	default='/chrome/infra/',
	help='metric name prefix for all metrics (default: %(default)s)')

	parser.add_argument(
	'--ts-mon-use-new-proto',
	default=True, action='store_true',
	help='deprecated and ignored')


	def process_argparse_options(args):
	"""Process command line arguments to initialize the global monitor.

	Also initializes the default target.

	Starts a background thread to automatically flush monitoring metrics if not
	disabled by command line arguments.

	Args:
	args (argparse.Namespace): the result of parsing the command line arguments
	"""
	# Parse the config file if it exists.
	config = load_machine_config(args.ts_mon_config_file)
	endpoint = config.get('endpoint', '')
	credentials = config.get('credentials', '')
	autogen_hostname = config.get('autogen_hostname', False)

	# Command-line args override the values in the config file.
	if args.ts_mon_endpoint is not None:
	endpoint = args.ts_mon_endpoint
	if args.ts_mon_credentials is not None:
	credentials = args.ts_mon_credentials

	if args.ts_mon_target_type == 'device':
	hostname = args.ts_mon_device_hostname
	if args.ts_mon_autogen_hostname or autogen_hostname:
	hostname = 'autogen:' + hostname
	interface.state.target = targets.DeviceTarget(
	args.ts_mon_device_region,
	args.ts_mon_device_role,
	args.ts_mon_device_network,
	hostname)
	if args.ts_mon_target_type == 'task':
	# Reimplement ArgumentParser.error, since we don't have access to the parser
	if not args.ts_mon_task_service_name:
	print('Argument --ts-mon-task-service-name must be provided '
	'when the target type is "task".',
	file=sys.stderr)
	sys.exit(2)
	if not args.ts_mon_task_job_name:
	print('Argument --ts-mon-task-job-name must be provided '
	'when the target type is "task".',
	file=sys.stderr)
	sys.exit(2)
	hostname = args.ts_mon_task_hostname
	if args.ts_mon_autogen_hostname or autogen_hostname:
	hostname = 'autogen:' + hostname
	interface.state.target = targets.TaskTarget(
	args.ts_mon_task_service_name,
	args.ts_mon_task_job_name,
	args.ts_mon_task_region,
	hostname,
	args.ts_mon_task_number)

	interface.state.metric_name_prefix = args.ts_mon_metric_name_prefix
	interface.state.global_monitor = monitors.NullMonitor()

	if endpoint.startswith('file://'):
	interface.state.global_monitor = monitors.DebugMonitor(
	endpoint[len('file://'):])
	elif endpoint.startswith('https://'):
	interface.state.global_monitor = monitors.HttpsMonitor(
	endpoint, monitors.CredentialFactory.from_string(credentials),
	ca_certs=args.ts_mon_ca_certs)
	elif endpoint.lower() == 'none':
	logging.info('ts_mon monitoring has been explicitly disabled')
	else:
	logging.error('ts_mon monitoring is disabled because the endpoint provided'
	' is invalid or not supported: %s', endpoint)

	interface.state.flush_mode = args.ts_mon_flush

	if args.ts_mon_flush == 'auto':
	interface.state.flush_thread = interface._FlushThread(
	args.ts_mon_flush_interval_secs)
	interface.state.flush_thread.start()

	standard_metrics.init()