dashboard/dashboard/add_point.py - catapult.git - Git at Google

 # Copyright 2015 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """URL endpoint to allow Buildbot slaves to post data to the dashboard."""
 from __future__ import print_function
 from __future__ import division
 from __future__ import absolute_import

 import copy
 import json
 import logging
 import math
 import re
 import six

 from google.appengine.api import datastore_errors
 from google.appengine.api import taskqueue
 from google.appengine.ext import ndb

 from dashboard.api import api_auth
 from dashboard.common import request_handler
 from dashboard.common import datastore_hooks
 from dashboard.common import histogram_helpers
 from dashboard.common import math_utils
 from dashboard.models import graph_data

 from flask import request, make_response

 _TASK_QUEUE_NAME = 'new-points-queue'

 # Number of rows to process per task queue task. This limits the task size
 # and execution time (Limits: 100KB object size and 10 minutes execution time).
 _TASK_QUEUE_SIZE = 32

 # Max length for a Row property name.
 _MAX_COLUMN_NAME_LENGTH = 25

 # Maximum length of a value for a string property.
 _STRING_COLUMN_MAX_LENGTH = 400

 # Maximum number of properties for a Row.
 _MAX_NUM_COLUMNS = 30

 # Maximum length for a test path. This limit is required because the test path
 # used as the string ID for TestContainer (the parent in the datastore for Row
 # entities), and datastore imposes a maximum string ID length.
 _MAX_TEST_PATH_LENGTH = 500


 class BadRequestError(Exception):
   """An error indicating that a 400 response status should be returned."""


 def AddPointPost():
   """Validates data parameter and add task to queue to process points.

   The row data comes from a "data" parameter, which is a JSON encoding of a
   list of dictionaries, each of which represents one performance result
   (one point in a graph) and associated data.

     [
       {
         "master": "ChromiumPerf",
         "bot": "xp-release-dual-core",
         "test": "dromaeo/dom/modify",
         "revision": 123456789,
         "value": 24.66,
         "error": 2.33,
         "units": "ms",
         "supplemental_columns": {
           "d_median": 24234.12,
           "d_mean": 23.553,
           "r_webkit": 423340,
           ...
         },
         ...
       },
       ...
     ]

   In general, the required fields are "master", "bot", "test" (which together
   form the test path which identifies the series that this point belongs to),
   and "revision" and "value", which are the X and Y values for the point.

   This API also supports the Dashboard JSON v1.0 format (go/telemetry-json),
   the first producer of which is Telemetry. Telemetry provides lightweight
   serialization of values it produces, as JSON. If a dashboard JSON object is
   passed, it will be a single dict rather than a list, with the test,
   value, error, and units fields replaced by a chart_data field containing a
   Chart JSON dict (see design doc, and example below). Dashboard JSON v1.0 is
   processed by converting it into rows (which can be viewed as Dashboard JSON
   v0).

   {
     "master": "ChromiumPerf",
     <other row fields>,
     "chart_data": {
       "foo": {
         "bar": {
           "type": "scalar",
           "name": "foo.bar",
           "units": "ms",
           "value": 4.2,
         },
         "summary": {
           "type": "list_of_scalar_values",
           "name": "foo",
           "units": "ms",
           "values": [4.2, 5.7, 6.8],
           "std": 1.30512,
         },
     },
   }

   Request parameters:
     data: JSON encoding of a list of dictionaries.

   Outputs:
     Empty 200 response with if successful,
     200 response with warning message if optional data is invalid,
     403 response with error message if sender IP is not white-listed,
     400 response with error message if required data is invalid.
     500 with error message otherwise.
   """
   datastore_hooks.SetPrivilegedRequest()
   try:
     api_auth.Authorize()
   except api_auth.ApiAuthException as error:
     logging.error('Auth error: %s', error)
     return request_handler.RequestHandlerReportError('User unauthorized.', 403)

   data_str = request.values.get('data')
   if not data_str:
     return request_handler.RequestHandlerReportError(
         'Missing "data" parameter.', status=400)

   return AddData(data_str)


 def AddData(data_str):
   try:
     data = json.loads(data_str)
   except ValueError:
     return request_handler.RequestHandlerReportError(
         'Invalid JSON string.', status=400)

   try:
     if isinstance(data, dict):
       if data.get('chart_data'):
         data = _DashboardJsonToRawRows(data)
         if not data:
           return make_response('')  # No data to add, bail out.
       else:
         return request_handler.RequestHandlerReportError(
             'Data should be a list of rows or a Dashboard JSON v1.0 dict.',
             status=400)

     if data:
       # We only need to validate the row ID for one point, since all points
       # being handled by this upload should have the same row ID.
       last_added_entity = _GetLastAddedEntityForRow(data[0])
       _ValidateRowId(data[0], last_added_entity)

     for row_dict in data:
       ValidateRowDict(row_dict)
     _AddTasks(data)
     return make_response('')
   except BadRequestError as e:
     # If any of the data was invalid, abort immediately and return an error.
     return request_handler.RequestHandlerReportError(str(e), status=400)


 def _ValidateNameString(value, name):
   if not value:
     raise BadRequestError('No %s name given.' % name)
   if not isinstance(value, six.string_types):
     raise BadRequestError('Error: %s must be a string' % name)
   if '/' in value:
     raise BadRequestError('Illegal slash in %s' % name)


 def _ValidateDashboardJson(dash_json_dict):
   assert isinstance(dash_json_dict, dict)
   # A Dashboard JSON dict should at least have all charts coming from the
   # same master, bot and rev. It can contain multiple charts, however.
   _ValidateNameString(dash_json_dict.get('master'), 'master')
   _ValidateNameString(dash_json_dict.get('bot'), 'bot')

   if not dash_json_dict.get('point_id'):
     raise BadRequestError('No point_id number given.')
   if not dash_json_dict.get('chart_data'):
     raise BadRequestError('No chart data given.')

   charts = dash_json_dict.get('chart_data', {}).get('charts', {})

   for _, v in charts.items():
     if not isinstance(v, dict):
       raise BadRequestError('Expected be dict: %s' % str(v))


 def _DashboardJsonToRawRows(dash_json_dict):
   """Formats a Dashboard JSON dict as a list of row dicts.

   For the dashboard to begin accepting the Telemetry Dashboard JSON format
   as per go/telemetry-json, this function chunks a Dashboard JSON literal
   into rows and passes the resulting list to _AddTasks.

   Args:
     dash_json_dict: A dashboard JSON v1.0 dict.

   Returns:
     A list of dicts, each of which represents a point.

   Raises:
     AssertionError: The given argument wasn't a dict.
     BadRequestError: The content of the input wasn't valid.
   """
   _ValidateDashboardJson(dash_json_dict)

   test_suite_name = _TestSuiteName(dash_json_dict)

   chart_data = dash_json_dict.get('chart_data', {})
   charts = chart_data.get('charts', {})
   if not charts:
     return []  # No charts implies no data to add.

   # Links to about:tracing traces are listed under 'trace'; if they
   # exist copy them to a separate dictionary and delete from the chartjson
   # so that we don't try to process them as data points.
   tracing_links = None
   if 'trace' in charts:
     tracing_links = charts['trace'].copy()
     del charts['trace']
   row_template = _MakeRowTemplate(dash_json_dict)

   benchmark_description = chart_data.get('benchmark_description', '')
   is_ref = bool(dash_json_dict.get('is_ref'))
   rows = []

   for chart in charts:
     for trace in charts[chart]:
       # Need to do a deep copy here so we don't copy a_tracing_uri data.
       row = copy.deepcopy(row_template)
       specific_vals = _FlattenTrace(test_suite_name, chart, trace,
                                     charts[chart][trace], is_ref, tracing_links,
                                     benchmark_description)
       # Telemetry may validly produce rows that represent a value of NaN. To
       # avoid getting into messy situations with alerts, we do not add such
       # rows to be processed.
       if not (math.isnan(specific_vals['value'])
               or math.isnan(specific_vals['error'])):
         if specific_vals['tracing_uri']:
           row['supplemental_columns']['a_tracing_uri'] = specific_vals[
               'tracing_uri']
         row.update(specific_vals)
         rows.append(row)

   return rows


 def _TestSuiteName(dash_json_dict):
   """Extracts a test suite name from Dashboard JSON.

   The dashboard JSON may contain a field "test_suite_name". If this is not
   present or it is None, the dashboard will fall back to using "benchmark_name"
   in the "chart_data" dict.
   """
   name = None
   if dash_json_dict.get('test_suite_name'):
     name = dash_json_dict['test_suite_name']
   else:
     try:
       name = dash_json_dict['chart_data']['benchmark_name']
     except KeyError as e:
       six.raise_from(
           BadRequestError('Could not find test suite name. ' + str(e)), e)

   _ValidateNameString(name, 'test_suite_name')

   return name


 def _AddTasks(data):
   """Puts tasks on queue for adding data.

   Args:
     data: A list of dictionaries, each of which represents one point.
   """
   task_list = []
   for data_sublist in _Chunk(data, _TASK_QUEUE_SIZE):
     task_list.append(
         taskqueue.Task(
             url='/add_point_queue', params={'data': json.dumps(data_sublist)}))

   queue = taskqueue.Queue(_TASK_QUEUE_NAME)
   futures = [
       queue.add_async(t) for t in _Chunk(task_list, taskqueue.MAX_TASKS_PER_ADD)
   ]
   for f in futures:
     f.get_result()


 def _Chunk(items, chunk_size):
   """Breaks a long list into sub-lists of a particular size."""
   chunks = []
   for i in range(0, len(items), chunk_size):
     chunks.append(items[i:i + chunk_size])
   return chunks


 def _MakeRowTemplate(dash_json_dict):
   """Produces a template for rows created from a Dashboard JSON v1.0 dict.

   _DashboardJsonToRawRows adds metadata fields to every row that it creates.
   These include things like master, bot, point ID, versions, and other
   supplementary data. This method produces a dict containing this metadata
   to which row-specific information (like value and error) can be added.
   Some metadata needs to be transformed to conform to the v0 format, and this
   method is also responsible for that transformation.

   Some validation is deferred until after the input is converted to a list
   of row dicts, since revision format correctness is checked on a per-point
   basis.

   Args:
     dash_json_dict: A dashboard JSON v1.0 dict.

   Returns:
     A dict containing data to include in each row dict that is created from
     |dash_json_dict|.
   """
   row_template = dash_json_dict.copy()

   del row_template['chart_data']
   del row_template['point_id']

   row_template['revision'] = dash_json_dict['point_id']

   annotations = row_template['supplemental']
   versions = row_template['versions']

   del row_template['supplemental']
   del row_template['versions']
   row_template['supplemental_columns'] = {}
   supplemental = row_template['supplemental_columns']

   for annotation in annotations:
     supplemental['a_' + annotation] = annotations[annotation]

   for version in versions:
     supplemental['r_' + version] = versions[version]

   return row_template


 def _FlattenTrace(test_suite_name,
                   chart_name,
                   trace_name,
                   trace,
                   is_ref=False,
                   tracing_links=None,
                   benchmark_description=''):
   """Takes a trace dict from dashboard JSON and readies it for display.

   Traces can be either scalars or lists; if scalar we take the value directly;
   if list we average the values and compute their standard deviation. We also
   extract fields that are normally part of v0 row dicts that are uploaded
   using add_point but are actually part of traces in the v1.0 format.

   Args:
     test_suite_name: The name of the test suite (benchmark).
     chart_name: The name of the chart to which this trace belongs.
     trace_name: The name of the passed trace.
     trace: A trace dict extracted from a dashboard JSON chart.
     is_ref: A boolean which indicates whether this trace comes from a
         reference build.
     tracing_links: A dictionary mapping trace names to about:tracing trace
         urls in cloud storage
     benchmark_description: A string documenting the benchmark suite to which
         this trace belongs.

   Returns:
     A dict containing units, value, and error for this trace.

   Raises:
     BadRequestError: The data wasn't valid.
   """
   if '@@' in chart_name:
     grouping_label, chart_name = chart_name.split('@@')
     chart_name = chart_name + '/' + grouping_label

   value, error = _ExtractValueAndError(trace)

   # If there is a link to an about:tracing trace in cloud storage for this
   # test trace_name, cache it.
   tracing_uri = None
   if (tracing_links and trace_name in tracing_links
       and 'cloud_url' in tracing_links[trace_name]):
     tracing_uri = tracing_links[trace_name]['cloud_url'].replace('\\/', '/')

   story_name = trace_name
   trace_name = histogram_helpers.EscapeName(trace_name)
   if trace_name == 'summary':
     subtest_name = chart_name
   else:
     subtest_name = chart_name + '/' + trace_name

   name = test_suite_name + '/' + subtest_name
   if trace_name == 'summary' and is_ref:
     name += '/ref'
   elif trace_name != 'summary' and is_ref:
     name += '_ref'

   units = trace.get('units')
   if units is None:
     raise BadRequestError('Units must be specified in the chart data')

   row_dict = {
       'test': name,
       'value': value,
       'error': error,
       'units': units,
       'tracing_uri': tracing_uri,
       'benchmark_description': benchmark_description,
   }

   if 'improvement_direction' in trace:
     improvement_direction_str = trace['improvement_direction']
     if improvement_direction_str is None:
       raise BadRequestError('improvement_direction must not be None')
     row_dict['higher_is_better'] = _ImprovementDirectionToHigherIsBetter(
         improvement_direction_str)

   if story_name != trace_name:
     row_dict['unescaped_story_name'] = story_name

   return row_dict


 def _ExtractValueAndError(trace):
   """Returns the value and measure of error from a chartjson trace dict.

   Args:
     trace: A dict that has one "result" from a performance test, e.g. one
         "value" in a Telemetry test, with the keys "trace_type", "value", etc.

   Returns:
     A pair (value, error) where |value| is a float and |error| is some measure
     of variance used to show error bars; |error| could be None.

   Raises:
     BadRequestError: Data format was invalid.
   """
   trace_type = trace.get('type')

   if trace_type == 'scalar':
     value = trace.get('value')
     if value is None and trace.get('none_value_reason'):
       return float('nan'), 0
     try:
       return float(value), 0
     except Exception as e:  # pylint: disable=broad-except
       six.raise_from(
           BadRequestError('Expected scalar value, got: %r' % value), e)

   if trace_type == 'list_of_scalar_values':
     values = trace.get('values')
     if not isinstance(values, list) and values is not None:
       # Something else (such as a single scalar, or string) was given.
       raise BadRequestError('Expected list of scalar values, got: %r' % values)
     if not values or None in values:
       # None was included or values is None; this is not an error if there
       # is a reason.
       if trace.get('none_value_reason'):
         return float('nan'), float('nan')
       raise BadRequestError('Expected list of scalar values, got: %r' % values)
     if not all(_IsNumber(v) for v in values):
       raise BadRequestError('Non-number found in values list: %r' % values)
     value = math_utils.Mean(values)
     std = trace.get('std')
     if std is not None:
       error = std
     else:
       error = math_utils.StandardDeviation(values)
     return value, error

   if trace_type == 'histogram':
     return _GeomMeanAndStdDevFromHistogram(trace)

   raise BadRequestError('Invalid value type in chart object: %r' % trace_type)


 def _IsNumber(v):
   return isinstance(v, (float, int))


 def _GeomMeanAndStdDevFromHistogram(histogram):
   """Generates the geom. mean and std. dev. for a histogram.

   A histogram is a collection of numerical buckets with associated
   counts; a bucket can either represent a number of instances of a single
   value ('low'), or from within a range of values (in which case 'high' will
   specify the upper bound). We compute the statistics by treating the
   histogram analogously to a list of individual values, where the counts tell
   us how many of each value there are.

   Args:
     histogram: A histogram dict with a list 'buckets' of buckets.

   Returns:
     The geometric mean and standard deviation of the given histogram.
   """
   # Note: This code comes originally from
   # build/scripts/common/chromium_utils.py and was used initially for
   # processing histogram results on the buildbot side previously.
   if 'buckets' not in histogram:
     return 0.0, 0.0
   count = 0
   sum_of_logs = 0
   for bucket in histogram['buckets']:
     if 'high' in bucket:
       bucket['mean'] = (bucket['low'] + bucket['high']) / 2.0
     else:
       bucket['mean'] = bucket['low']
     if bucket['mean'] > 0:
       sum_of_logs += math.log(bucket['mean']) * bucket['count']
       count += bucket['count']

   if count == 0:
     return 0.0, 0.0

   sum_of_squares = 0
   geom_mean = math.exp(sum_of_logs / count)
   for bucket in histogram['buckets']:
     if bucket['mean'] > 0:
       sum_of_squares += (bucket['mean'] - geom_mean)**2 * bucket['count']
   return geom_mean, math.sqrt(sum_of_squares / count)


 def _ImprovementDirectionToHigherIsBetter(improvement_direction_str):
   """Converts an improvement direction string to a higher_is_better boolean.

   Args:
     improvement_direction_str: a string, either 'up' or 'down'.

   Returns:
     A boolean expressing the appropriate higher_is_better value.

   Raises:
     BadRequestError: if improvement_direction_str is invalid.
   """
   # We use improvement_direction if given. Otherwise, by not providing it here
   # we'll fall back to a default from dashboard.units_to_direction module.
   # TODO(eakuefner): Fail instead of falling back after fixing crbug.com/459450.
   if improvement_direction_str == 'up':
     return True
   if improvement_direction_str == 'down':
     return False
   raise BadRequestError('Invalid improvement direction string: ' +
                         improvement_direction_str)


 def _GetLastAddedEntityForRow(row):
   if not ('master' in row and 'bot' in row and 'test' in row):
     return None
   path = '%s/%s/%s' % (row['master'], row['bot'], row['test'].strip('/'))
   if len(path) > _MAX_TEST_PATH_LENGTH:
     return None

   try:
     last_added_revision_entity = ndb.Key('LastAddedRevision', path).get()
   except datastore_errors.BadRequestError:
     logging.warning('Datastore BadRequestError when getting %s', path)
     return None

   return last_added_revision_entity


 def ValidateRowDict(row):
   """Checks all fields in the input dictionary.

   Args:
     row: A dictionary which represents one point.

   Raises:
     BadRequestError: The input was not valid.
   """
   required_fields = ['master', 'bot', 'test']
   for field in required_fields:
     if field not in row:
       raise BadRequestError('No "%s" field in row dict.' % field)
   _ValidateMasterBotTest(row['master'], row['bot'], row['test'])
   GetAndValidateRowProperties(row)


 def _ValidateMasterBotTest(master, bot, test):
   """Validates the master, bot, and test properties of a row dict."""
   # Trailing and leading slashes in the test name are ignored.
   # The test name must consist of at least a test suite plus sub-test.
   test = test.strip('/')
   if '/' not in test:
     raise BadRequestError('Test name must have more than one part.')

   if len(test.split('/')) > graph_data.MAX_TEST_ANCESTORS:
     raise BadRequestError('Invalid test name: %s' % test)

   _ValidateNameString(master, 'master')
   _ValidateNameString(bot, 'bot')
   _ValidateTestPath('%s/%s/%s' % (master, bot, test))


 def _ValidateTestPath(test_path):
   """Checks whether all the parts of the test path are valid."""
   # A test with a test path length over the max key length shouldn't be
   # created, since the test path is used in TestContainer keys.
   if len(test_path) > _MAX_TEST_PATH_LENGTH:
     raise BadRequestError('Test path too long: %s' % test_path)

   # Stars are reserved for test path patterns, so they can't be used in names.
   if '*' in test_path:
     raise BadRequestError('Illegal asterisk in test name.')

   for name in test_path.split('/'):
     _ValidateTestPathPartName(name)


 def _ValidateTestPathPartName(name):
   """Checks whether a Master, Bot or TestMetadata name is OK."""
   # NDB Datastore doesn't allow key names to start and with "__" and "__".
   if name.startswith('__') and name.endswith('__'):
     raise BadRequestError(
         'Invalid name: "%s". Names cannot start and end with "__".' % name)


 def _ValidateRowId(row_dict, last_added_entity):
   """Checks whether the ID for a Row is OK.

   Args:
     row_dict: A dictionary with new point properties, including "revision".
     last_added_entity: The last previous added revision entity for the test.

   Raises:
     BadRequestError: The revision is not acceptable for some reason.
   """
   row_id = GetAndValidateRowId(row_dict)

   # Get the last added revision number for this test.
   last_row_id = None
   if not last_added_entity:
     master, bot, test = row_dict['master'], row_dict['bot'], row_dict['test']
     test_path = '%s/%s/%s' % (master, bot, test)
     # Could be first point in test.
     logging.warning('Test %s has no last added revision entry.', test_path)
   else:
     last_row_id = last_added_entity.revision

   if not _IsAcceptableRowId(row_id, last_row_id):
     raise BadRequestError(
         'Invalid ID (revision) %d; compared to previous ID %s, it was larger '
         'or smaller by too much and must not be <= 0.' % (row_id, last_row_id))


 def _IsAcceptableRowId(row_id, last_row_id):
   """Checks whether the given row id (aka revision) is not too large or small.

   For each data series (i.e. TestMetadata entity), we assume that row IDs are
   monotonically increasing. On a given chart, points are sorted by these
   row IDs. This way, points can arrive out of order but still be shown
   correctly in the chart.

   However, sometimes a bot might start to use a different *type* of row ID;
   for example it might change from revision numbers or build numbers to
   timestamps, or from timestamps to build numbers. This causes a lot of
   problems, including points being put out of order.

   If a sender of data actually wants to switch to a different type of
   row ID, it would be much cleaner for them to start sending it under a new
   chart name.

   Args:
     row_id: The proposed Row entity id (usually sent as "revision")
     last_row_id: The previous Row id, or None if there were none previous.

   Returns:
     True if acceptable, False otherwise.
   """
   if row_id <= 0:
     return False
   if last_row_id is None:
     return True
   # Too big of a decrease.
   if row_id < 0.5 * last_row_id:
     return False
   # Too big of an increase.
   if row_id > 2 * last_row_id:
     return False
   return True


 def GetAndValidateRowId(row_dict):
   """Returns the integer ID for a new Row.

   This method is also responsible for validating the input fields related
   to making the new row ID.

   Args:
     row_dict: A dictionary obtained from the input JSON.

   Returns:
     An integer row ID.

   Raises:
     BadRequestError: The input wasn't formatted properly.
   """
   if 'revision' not in row_dict:
     raise BadRequestError('Required field "revision" missing.')
   try:
     return int(row_dict['revision'])
   except (ValueError, TypeError) as e:
     raise BadRequestError(
         'Bad value for "revision", should be numerical.') from e


 def GetAndValidateRowProperties(row):
   """From the object received, make a dictionary of properties for a Row.

   This includes the default "value" and "error" columns as well as all
   supplemental columns, but it doesn't include "revision", and it doesn't
   include input fields that are properties of the parent TestMetadata, such as
   "units".

   This method is responsible for validating all properties that are to be
   properties of the new Row.

   Args:
     row: A dictionary obtained from the input JSON.

   Returns:
     A dictionary of the properties and property values to set when creating
     a Row. This will include "value" and "error" as well as all supplemental
     columns.

   Raises:
     BadRequestError: The properties weren't formatted correctly.
   """
   columns = {}

   # Value and error must be floating point numbers.
   if 'value' not in row:
     raise BadRequestError('No "value" given.')
   try:
     columns['value'] = float(row['value'])
   except (ValueError, TypeError) as e:
     six.raise_from(
         BadRequestError('Bad value for "value", should be numerical.'), e)
   if 'error' in row:
     try:
       error = float(row['error'])
       columns['error'] = error
     except (ValueError, TypeError):
       logging.warning('Bad value for "error".')
   if 'swarming_bot_id' in row:
     try:
       swarming_bot_id = str(row['swarming_bot_id'])
       columns['swarming_bot_id'] = swarming_bot_id
     except (ValueError, TypeError):
       logging.warning('Bad value for "swarming_bot_id".')
   columns.update(_GetSupplementalColumns(row))
   return columns


 def _GetSupplementalColumns(row):
   """Gets a dict of supplemental columns.

   If any columns are invalid, a warning is logged and they just aren't included,
   but no exception is raised.

   Individual rows may specify up to _MAX_NUM_COLUMNS extra data, revision,
   and annotation columns. These columns must follow formatting rules for
   their type. Invalid columns are dropped with an error log, but the valid
   data will still be graphed.

   Args:
     row: A dict, possibly with the key "supplemental_columns", the value of
         which should be a dict.

   Returns:
     A dict of valid supplemental columns.
   """
   columns = {}
   for (name, value) in row.get('supplemental_columns', {}).items():
     # Don't allow too many columns
     if len(columns) == _MAX_NUM_COLUMNS:
       logging.warning('Too many columns, some being dropped.')
       break
     value = _CheckSupplementalColumn(name, value)
     if value:
       columns[name] = value
   return columns


 def _CheckSupplementalColumn(name, value):
   """Returns a possibly modified value for a supplemental column, or None."""
   # Check length of column name.
   name = str(name)
   if len(name) > _MAX_COLUMN_NAME_LENGTH:
     logging.warning('Supplemental column name too long.')
     return None

   # The column name has a prefix which indicates type of value.
   if name[:2] not in ('d_', 'r_', 'a_'):
     logging.warning('Bad column name "%s", invalid prefix.', name)
     return None

   # The d_ prefix means "data column", intended to hold numbers.
   if name.startswith('d_'):
     try:
       value = float(value)
     except (ValueError, TypeError):
       logging.warning('Bad value for column "%s", should be numerical.', name)
       return None

   # The r_ prefix means "revision", and the value should look like a number,
   # a version number, or a git commit hash.
   if name.startswith('r_'):
     revision_patterns = [
         r'^\d+$',
         r'^\d+\.\d+\.\d+\.\d+$',
         r'^[A-Fa-f0-9]{40}$',
     ]
     if (not value or len(str(value)) > _STRING_COLUMN_MAX_LENGTH
         or not any(re.match(p, str(value)) for p in revision_patterns)):
       logging.warning('Bad value for revision column "%s". Value: %s', name,
                       value)
       return None
     value = str(value)

   if name.startswith('a_'):
     # Annotation column, is typically a short string.
     # Bot_ID lists can be long, truncate if exceed max length
     if len(str(value)) > _STRING_COLUMN_MAX_LENGTH:
       logging.warning('Value for "%s" too long, truncated to max length %d.',
                       name,
                       _STRING_COLUMN_MAX_LENGTH)
       if isinstance(value, list):
         while len(str(value)) > _STRING_COLUMN_MAX_LENGTH:
           value.pop()
       elif isinstance(value, str):
         value = value[:_STRING_COLUMN_MAX_LENGTH]
       else:
         logging.warning('Value for "%s" is not truncatable', name)
         return None

   return value