blob: c0b7f1557936865eba1f08fa460a8accac2985e6 [file] [log] [blame]
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""URL endpoint to allow Buildbot slaves to post data to the dashboard."""
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
import copy
import json
import logging
import math
import re
import six
from google.appengine.api import datastore_errors
from google.appengine.api import taskqueue
from google.appengine.ext import ndb
from dashboard.api import api_auth
from dashboard.common import request_handler
from dashboard.common import datastore_hooks
from dashboard.common import histogram_helpers
from dashboard.common import math_utils
from dashboard.models import graph_data
from flask import request, make_response
_TASK_QUEUE_NAME = 'new-points-queue'
# Number of rows to process per task queue task. This limits the task size
# and execution time (Limits: 100KB object size and 10 minutes execution time).
_TASK_QUEUE_SIZE = 32
# Max length for a Row property name.
_MAX_COLUMN_NAME_LENGTH = 25
# Maximum length of a value for a string property.
_STRING_COLUMN_MAX_LENGTH = 400
# Maximum number of properties for a Row.
_MAX_NUM_COLUMNS = 30
# Maximum length for a test path. This limit is required because the test path
# used as the string ID for TestContainer (the parent in the datastore for Row
# entities), and datastore imposes a maximum string ID length.
_MAX_TEST_PATH_LENGTH = 500
class BadRequestError(Exception):
"""An error indicating that a 400 response status should be returned."""
def AddPointPost():
"""Validates data parameter and add task to queue to process points.
The row data comes from a "data" parameter, which is a JSON encoding of a
list of dictionaries, each of which represents one performance result
(one point in a graph) and associated data.
[
{
"master": "ChromiumPerf",
"bot": "xp-release-dual-core",
"test": "dromaeo/dom/modify",
"revision": 123456789,
"value": 24.66,
"error": 2.33,
"units": "ms",
"supplemental_columns": {
"d_median": 24234.12,
"d_mean": 23.553,
"r_webkit": 423340,
...
},
...
},
...
]
In general, the required fields are "master", "bot", "test" (which together
form the test path which identifies the series that this point belongs to),
and "revision" and "value", which are the X and Y values for the point.
This API also supports the Dashboard JSON v1.0 format (go/telemetry-json),
the first producer of which is Telemetry. Telemetry provides lightweight
serialization of values it produces, as JSON. If a dashboard JSON object is
passed, it will be a single dict rather than a list, with the test,
value, error, and units fields replaced by a chart_data field containing a
Chart JSON dict (see design doc, and example below). Dashboard JSON v1.0 is
processed by converting it into rows (which can be viewed as Dashboard JSON
v0).
{
"master": "ChromiumPerf",
<other row fields>,
"chart_data": {
"foo": {
"bar": {
"type": "scalar",
"name": "foo.bar",
"units": "ms",
"value": 4.2,
},
"summary": {
"type": "list_of_scalar_values",
"name": "foo",
"units": "ms",
"values": [4.2, 5.7, 6.8],
"std": 1.30512,
},
},
}
Request parameters:
data: JSON encoding of a list of dictionaries.
Outputs:
Empty 200 response with if successful,
200 response with warning message if optional data is invalid,
403 response with error message if sender IP is not white-listed,
400 response with error message if required data is invalid.
500 with error message otherwise.
"""
datastore_hooks.SetPrivilegedRequest()
try:
api_auth.Authorize()
except api_auth.ApiAuthException as error:
logging.error('Auth error: %s', error)
return request_handler.RequestHandlerReportError('User unauthorized.', 403)
data_str = request.values.get('data')
if not data_str:
return request_handler.RequestHandlerReportError(
'Missing "data" parameter.', status=400)
return AddData(data_str)
def AddData(data_str):
try:
data = json.loads(data_str)
except ValueError:
return request_handler.RequestHandlerReportError(
'Invalid JSON string.', status=400)
try:
if isinstance(data, dict):
if data.get('chart_data'):
data = _DashboardJsonToRawRows(data)
if not data:
return make_response('') # No data to add, bail out.
else:
return request_handler.RequestHandlerReportError(
'Data should be a list of rows or a Dashboard JSON v1.0 dict.',
status=400)
if data:
# We only need to validate the row ID for one point, since all points
# being handled by this upload should have the same row ID.
last_added_entity = _GetLastAddedEntityForRow(data[0])
_ValidateRowId(data[0], last_added_entity)
for row_dict in data:
ValidateRowDict(row_dict)
_AddTasks(data)
return make_response('')
except BadRequestError as e:
# If any of the data was invalid, abort immediately and return an error.
return request_handler.RequestHandlerReportError(str(e), status=400)
def _ValidateNameString(value, name):
if not value:
raise BadRequestError('No %s name given.' % name)
if not isinstance(value, six.string_types):
raise BadRequestError('Error: %s must be a string' % name)
if '/' in value:
raise BadRequestError('Illegal slash in %s' % name)
def _ValidateDashboardJson(dash_json_dict):
assert isinstance(dash_json_dict, dict)
# A Dashboard JSON dict should at least have all charts coming from the
# same master, bot and rev. It can contain multiple charts, however.
_ValidateNameString(dash_json_dict.get('master'), 'master')
_ValidateNameString(dash_json_dict.get('bot'), 'bot')
if not dash_json_dict.get('point_id'):
raise BadRequestError('No point_id number given.')
if not dash_json_dict.get('chart_data'):
raise BadRequestError('No chart data given.')
charts = dash_json_dict.get('chart_data', {}).get('charts', {})
for _, v in charts.items():
if not isinstance(v, dict):
raise BadRequestError('Expected be dict: %s' % str(v))
def _DashboardJsonToRawRows(dash_json_dict):
"""Formats a Dashboard JSON dict as a list of row dicts.
For the dashboard to begin accepting the Telemetry Dashboard JSON format
as per go/telemetry-json, this function chunks a Dashboard JSON literal
into rows and passes the resulting list to _AddTasks.
Args:
dash_json_dict: A dashboard JSON v1.0 dict.
Returns:
A list of dicts, each of which represents a point.
Raises:
AssertionError: The given argument wasn't a dict.
BadRequestError: The content of the input wasn't valid.
"""
_ValidateDashboardJson(dash_json_dict)
test_suite_name = _TestSuiteName(dash_json_dict)
chart_data = dash_json_dict.get('chart_data', {})
charts = chart_data.get('charts', {})
if not charts:
return [] # No charts implies no data to add.
# Links to about:tracing traces are listed under 'trace'; if they
# exist copy them to a separate dictionary and delete from the chartjson
# so that we don't try to process them as data points.
tracing_links = None
if 'trace' in charts:
tracing_links = charts['trace'].copy()
del charts['trace']
row_template = _MakeRowTemplate(dash_json_dict)
benchmark_description = chart_data.get('benchmark_description', '')
is_ref = bool(dash_json_dict.get('is_ref'))
rows = []
for chart in charts:
for trace in charts[chart]:
# Need to do a deep copy here so we don't copy a_tracing_uri data.
row = copy.deepcopy(row_template)
specific_vals = _FlattenTrace(test_suite_name, chart, trace,
charts[chart][trace], is_ref, tracing_links,
benchmark_description)
# Telemetry may validly produce rows that represent a value of NaN. To
# avoid getting into messy situations with alerts, we do not add such
# rows to be processed.
if not (math.isnan(specific_vals['value'])
or math.isnan(specific_vals['error'])):
if specific_vals['tracing_uri']:
row['supplemental_columns']['a_tracing_uri'] = specific_vals[
'tracing_uri']
row.update(specific_vals)
rows.append(row)
return rows
def _TestSuiteName(dash_json_dict):
"""Extracts a test suite name from Dashboard JSON.
The dashboard JSON may contain a field "test_suite_name". If this is not
present or it is None, the dashboard will fall back to using "benchmark_name"
in the "chart_data" dict.
"""
name = None
if dash_json_dict.get('test_suite_name'):
name = dash_json_dict['test_suite_name']
else:
try:
name = dash_json_dict['chart_data']['benchmark_name']
except KeyError as e:
six.raise_from(
BadRequestError('Could not find test suite name. ' + str(e)), e)
_ValidateNameString(name, 'test_suite_name')
return name
def _AddTasks(data):
"""Puts tasks on queue for adding data.
Args:
data: A list of dictionaries, each of which represents one point.
"""
task_list = []
for data_sublist in _Chunk(data, _TASK_QUEUE_SIZE):
task_list.append(
taskqueue.Task(
url='/add_point_queue', params={'data': json.dumps(data_sublist)}))
queue = taskqueue.Queue(_TASK_QUEUE_NAME)
futures = [
queue.add_async(t) for t in _Chunk(task_list, taskqueue.MAX_TASKS_PER_ADD)
]
for f in futures:
f.get_result()
def _Chunk(items, chunk_size):
"""Breaks a long list into sub-lists of a particular size."""
chunks = []
for i in range(0, len(items), chunk_size):
chunks.append(items[i:i + chunk_size])
return chunks
def _MakeRowTemplate(dash_json_dict):
"""Produces a template for rows created from a Dashboard JSON v1.0 dict.
_DashboardJsonToRawRows adds metadata fields to every row that it creates.
These include things like master, bot, point ID, versions, and other
supplementary data. This method produces a dict containing this metadata
to which row-specific information (like value and error) can be added.
Some metadata needs to be transformed to conform to the v0 format, and this
method is also responsible for that transformation.
Some validation is deferred until after the input is converted to a list
of row dicts, since revision format correctness is checked on a per-point
basis.
Args:
dash_json_dict: A dashboard JSON v1.0 dict.
Returns:
A dict containing data to include in each row dict that is created from
|dash_json_dict|.
"""
row_template = dash_json_dict.copy()
del row_template['chart_data']
del row_template['point_id']
row_template['revision'] = dash_json_dict['point_id']
annotations = row_template['supplemental']
versions = row_template['versions']
del row_template['supplemental']
del row_template['versions']
row_template['supplemental_columns'] = {}
supplemental = row_template['supplemental_columns']
for annotation in annotations:
supplemental['a_' + annotation] = annotations[annotation]
for version in versions:
supplemental['r_' + version] = versions[version]
return row_template
def _FlattenTrace(test_suite_name,
chart_name,
trace_name,
trace,
is_ref=False,
tracing_links=None,
benchmark_description=''):
"""Takes a trace dict from dashboard JSON and readies it for display.
Traces can be either scalars or lists; if scalar we take the value directly;
if list we average the values and compute their standard deviation. We also
extract fields that are normally part of v0 row dicts that are uploaded
using add_point but are actually part of traces in the v1.0 format.
Args:
test_suite_name: The name of the test suite (benchmark).
chart_name: The name of the chart to which this trace belongs.
trace_name: The name of the passed trace.
trace: A trace dict extracted from a dashboard JSON chart.
is_ref: A boolean which indicates whether this trace comes from a
reference build.
tracing_links: A dictionary mapping trace names to about:tracing trace
urls in cloud storage
benchmark_description: A string documenting the benchmark suite to which
this trace belongs.
Returns:
A dict containing units, value, and error for this trace.
Raises:
BadRequestError: The data wasn't valid.
"""
if '@@' in chart_name:
grouping_label, chart_name = chart_name.split('@@')
chart_name = chart_name + '/' + grouping_label
value, error = _ExtractValueAndError(trace)
# If there is a link to an about:tracing trace in cloud storage for this
# test trace_name, cache it.
tracing_uri = None
if (tracing_links and trace_name in tracing_links
and 'cloud_url' in tracing_links[trace_name]):
tracing_uri = tracing_links[trace_name]['cloud_url'].replace('\\/', '/')
story_name = trace_name
trace_name = histogram_helpers.EscapeName(trace_name)
if trace_name == 'summary':
subtest_name = chart_name
else:
subtest_name = chart_name + '/' + trace_name
name = test_suite_name + '/' + subtest_name
if trace_name == 'summary' and is_ref:
name += '/ref'
elif trace_name != 'summary' and is_ref:
name += '_ref'
units = trace.get('units')
if units is None:
raise BadRequestError('Units must be specified in the chart data')
row_dict = {
'test': name,
'value': value,
'error': error,
'units': units,
'tracing_uri': tracing_uri,
'benchmark_description': benchmark_description,
}
if 'improvement_direction' in trace:
improvement_direction_str = trace['improvement_direction']
if improvement_direction_str is None:
raise BadRequestError('improvement_direction must not be None')
row_dict['higher_is_better'] = _ImprovementDirectionToHigherIsBetter(
improvement_direction_str)
if story_name != trace_name:
row_dict['unescaped_story_name'] = story_name
return row_dict
def _ExtractValueAndError(trace):
"""Returns the value and measure of error from a chartjson trace dict.
Args:
trace: A dict that has one "result" from a performance test, e.g. one
"value" in a Telemetry test, with the keys "trace_type", "value", etc.
Returns:
A pair (value, error) where |value| is a float and |error| is some measure
of variance used to show error bars; |error| could be None.
Raises:
BadRequestError: Data format was invalid.
"""
trace_type = trace.get('type')
if trace_type == 'scalar':
value = trace.get('value')
if value is None and trace.get('none_value_reason'):
return float('nan'), 0
try:
return float(value), 0
except Exception as e: # pylint: disable=broad-except
six.raise_from(
BadRequestError('Expected scalar value, got: %r' % value), e)
if trace_type == 'list_of_scalar_values':
values = trace.get('values')
if not isinstance(values, list) and values is not None:
# Something else (such as a single scalar, or string) was given.
raise BadRequestError('Expected list of scalar values, got: %r' % values)
if not values or None in values:
# None was included or values is None; this is not an error if there
# is a reason.
if trace.get('none_value_reason'):
return float('nan'), float('nan')
raise BadRequestError('Expected list of scalar values, got: %r' % values)
if not all(_IsNumber(v) for v in values):
raise BadRequestError('Non-number found in values list: %r' % values)
value = math_utils.Mean(values)
std = trace.get('std')
if std is not None:
error = std
else:
error = math_utils.StandardDeviation(values)
return value, error
if trace_type == 'histogram':
return _GeomMeanAndStdDevFromHistogram(trace)
raise BadRequestError('Invalid value type in chart object: %r' % trace_type)
def _IsNumber(v):
return isinstance(v, (float, int))
def _GeomMeanAndStdDevFromHistogram(histogram):
"""Generates the geom. mean and std. dev. for a histogram.
A histogram is a collection of numerical buckets with associated
counts; a bucket can either represent a number of instances of a single
value ('low'), or from within a range of values (in which case 'high' will
specify the upper bound). We compute the statistics by treating the
histogram analogously to a list of individual values, where the counts tell
us how many of each value there are.
Args:
histogram: A histogram dict with a list 'buckets' of buckets.
Returns:
The geometric mean and standard deviation of the given histogram.
"""
# Note: This code comes originally from
# build/scripts/common/chromium_utils.py and was used initially for
# processing histogram results on the buildbot side previously.
if 'buckets' not in histogram:
return 0.0, 0.0
count = 0
sum_of_logs = 0
for bucket in histogram['buckets']:
if 'high' in bucket:
bucket['mean'] = (bucket['low'] + bucket['high']) / 2.0
else:
bucket['mean'] = bucket['low']
if bucket['mean'] > 0:
sum_of_logs += math.log(bucket['mean']) * bucket['count']
count += bucket['count']
if count == 0:
return 0.0, 0.0
sum_of_squares = 0
geom_mean = math.exp(sum_of_logs / count)
for bucket in histogram['buckets']:
if bucket['mean'] > 0:
sum_of_squares += (bucket['mean'] - geom_mean)**2 * bucket['count']
return geom_mean, math.sqrt(sum_of_squares / count)
def _ImprovementDirectionToHigherIsBetter(improvement_direction_str):
"""Converts an improvement direction string to a higher_is_better boolean.
Args:
improvement_direction_str: a string, either 'up' or 'down'.
Returns:
A boolean expressing the appropriate higher_is_better value.
Raises:
BadRequestError: if improvement_direction_str is invalid.
"""
# We use improvement_direction if given. Otherwise, by not providing it here
# we'll fall back to a default from dashboard.units_to_direction module.
# TODO(eakuefner): Fail instead of falling back after fixing crbug.com/459450.
if improvement_direction_str == 'up':
return True
if improvement_direction_str == 'down':
return False
raise BadRequestError('Invalid improvement direction string: ' +
improvement_direction_str)
def _GetLastAddedEntityForRow(row):
if not ('master' in row and 'bot' in row and 'test' in row):
return None
path = '%s/%s/%s' % (row['master'], row['bot'], row['test'].strip('/'))
if len(path) > _MAX_TEST_PATH_LENGTH:
return None
try:
last_added_revision_entity = ndb.Key('LastAddedRevision', path).get()
except datastore_errors.BadRequestError:
logging.warning('Datastore BadRequestError when getting %s', path)
return None
return last_added_revision_entity
def ValidateRowDict(row):
"""Checks all fields in the input dictionary.
Args:
row: A dictionary which represents one point.
Raises:
BadRequestError: The input was not valid.
"""
required_fields = ['master', 'bot', 'test']
for field in required_fields:
if field not in row:
raise BadRequestError('No "%s" field in row dict.' % field)
_ValidateMasterBotTest(row['master'], row['bot'], row['test'])
GetAndValidateRowProperties(row)
def _ValidateMasterBotTest(master, bot, test):
"""Validates the master, bot, and test properties of a row dict."""
# Trailing and leading slashes in the test name are ignored.
# The test name must consist of at least a test suite plus sub-test.
test = test.strip('/')
if '/' not in test:
raise BadRequestError('Test name must have more than one part.')
if len(test.split('/')) > graph_data.MAX_TEST_ANCESTORS:
raise BadRequestError('Invalid test name: %s' % test)
_ValidateNameString(master, 'master')
_ValidateNameString(bot, 'bot')
_ValidateTestPath('%s/%s/%s' % (master, bot, test))
def _ValidateTestPath(test_path):
"""Checks whether all the parts of the test path are valid."""
# A test with a test path length over the max key length shouldn't be
# created, since the test path is used in TestContainer keys.
if len(test_path) > _MAX_TEST_PATH_LENGTH:
raise BadRequestError('Test path too long: %s' % test_path)
# Stars are reserved for test path patterns, so they can't be used in names.
if '*' in test_path:
raise BadRequestError('Illegal asterisk in test name.')
for name in test_path.split('/'):
_ValidateTestPathPartName(name)
def _ValidateTestPathPartName(name):
"""Checks whether a Master, Bot or TestMetadata name is OK."""
# NDB Datastore doesn't allow key names to start and with "__" and "__".
if name.startswith('__') and name.endswith('__'):
raise BadRequestError(
'Invalid name: "%s". Names cannot start and end with "__".' % name)
def _ValidateRowId(row_dict, last_added_entity):
"""Checks whether the ID for a Row is OK.
Args:
row_dict: A dictionary with new point properties, including "revision".
last_added_entity: The last previous added revision entity for the test.
Raises:
BadRequestError: The revision is not acceptable for some reason.
"""
row_id = GetAndValidateRowId(row_dict)
# Get the last added revision number for this test.
last_row_id = None
if not last_added_entity:
master, bot, test = row_dict['master'], row_dict['bot'], row_dict['test']
test_path = '%s/%s/%s' % (master, bot, test)
# Could be first point in test.
logging.warning('Test %s has no last added revision entry.', test_path)
else:
last_row_id = last_added_entity.revision
if not _IsAcceptableRowId(row_id, last_row_id):
raise BadRequestError(
'Invalid ID (revision) %d; compared to previous ID %s, it was larger '
'or smaller by too much and must not be <= 0.' % (row_id, last_row_id))
def _IsAcceptableRowId(row_id, last_row_id):
"""Checks whether the given row id (aka revision) is not too large or small.
For each data series (i.e. TestMetadata entity), we assume that row IDs are
monotonically increasing. On a given chart, points are sorted by these
row IDs. This way, points can arrive out of order but still be shown
correctly in the chart.
However, sometimes a bot might start to use a different *type* of row ID;
for example it might change from revision numbers or build numbers to
timestamps, or from timestamps to build numbers. This causes a lot of
problems, including points being put out of order.
If a sender of data actually wants to switch to a different type of
row ID, it would be much cleaner for them to start sending it under a new
chart name.
Args:
row_id: The proposed Row entity id (usually sent as "revision")
last_row_id: The previous Row id, or None if there were none previous.
Returns:
True if acceptable, False otherwise.
"""
if row_id <= 0:
return False
if last_row_id is None:
return True
# Too big of a decrease.
if row_id < 0.5 * last_row_id:
return False
# Too big of an increase.
if row_id > 2 * last_row_id:
return False
return True
def GetAndValidateRowId(row_dict):
"""Returns the integer ID for a new Row.
This method is also responsible for validating the input fields related
to making the new row ID.
Args:
row_dict: A dictionary obtained from the input JSON.
Returns:
An integer row ID.
Raises:
BadRequestError: The input wasn't formatted properly.
"""
if 'revision' not in row_dict:
raise BadRequestError('Required field "revision" missing.')
try:
return int(row_dict['revision'])
except (ValueError, TypeError) as e:
raise BadRequestError(
'Bad value for "revision", should be numerical.') from e
def GetAndValidateRowProperties(row):
"""From the object received, make a dictionary of properties for a Row.
This includes the default "value" and "error" columns as well as all
supplemental columns, but it doesn't include "revision", and it doesn't
include input fields that are properties of the parent TestMetadata, such as
"units".
This method is responsible for validating all properties that are to be
properties of the new Row.
Args:
row: A dictionary obtained from the input JSON.
Returns:
A dictionary of the properties and property values to set when creating
a Row. This will include "value" and "error" as well as all supplemental
columns.
Raises:
BadRequestError: The properties weren't formatted correctly.
"""
columns = {}
# Value and error must be floating point numbers.
if 'value' not in row:
raise BadRequestError('No "value" given.')
try:
columns['value'] = float(row['value'])
except (ValueError, TypeError) as e:
six.raise_from(
BadRequestError('Bad value for "value", should be numerical.'), e)
if 'error' in row:
try:
error = float(row['error'])
columns['error'] = error
except (ValueError, TypeError):
logging.warning('Bad value for "error".')
if 'swarming_bot_id' in row:
try:
swarming_bot_id = str(row['swarming_bot_id'])
columns['swarming_bot_id'] = swarming_bot_id
except (ValueError, TypeError):
logging.warning('Bad value for "swarming_bot_id".')
columns.update(_GetSupplementalColumns(row))
return columns
def _GetSupplementalColumns(row):
"""Gets a dict of supplemental columns.
If any columns are invalid, a warning is logged and they just aren't included,
but no exception is raised.
Individual rows may specify up to _MAX_NUM_COLUMNS extra data, revision,
and annotation columns. These columns must follow formatting rules for
their type. Invalid columns are dropped with an error log, but the valid
data will still be graphed.
Args:
row: A dict, possibly with the key "supplemental_columns", the value of
which should be a dict.
Returns:
A dict of valid supplemental columns.
"""
columns = {}
for (name, value) in row.get('supplemental_columns', {}).items():
# Don't allow too many columns
if len(columns) == _MAX_NUM_COLUMNS:
logging.warning('Too many columns, some being dropped.')
break
value = _CheckSupplementalColumn(name, value)
if value:
columns[name] = value
return columns
def _CheckSupplementalColumn(name, value):
"""Returns a possibly modified value for a supplemental column, or None."""
# Check length of column name.
name = str(name)
if len(name) > _MAX_COLUMN_NAME_LENGTH:
logging.warning('Supplemental column name too long.')
return None
# The column name has a prefix which indicates type of value.
if name[:2] not in ('d_', 'r_', 'a_'):
logging.warning('Bad column name "%s", invalid prefix.', name)
return None
# The d_ prefix means "data column", intended to hold numbers.
if name.startswith('d_'):
try:
value = float(value)
except (ValueError, TypeError):
logging.warning('Bad value for column "%s", should be numerical.', name)
return None
# The r_ prefix means "revision", and the value should look like a number,
# a version number, or a git commit hash.
if name.startswith('r_'):
revision_patterns = [
r'^\d+$',
r'^\d+\.\d+\.\d+\.\d+$',
r'^[A-Fa-f0-9]{40}$',
]
if (not value or len(str(value)) > _STRING_COLUMN_MAX_LENGTH
or not any(re.match(p, str(value)) for p in revision_patterns)):
logging.warning('Bad value for revision column "%s". Value: %s', name,
value)
return None
value = str(value)
if name.startswith('a_'):
# Annotation column, is typically a short string.
# Bot_ID lists can be long, truncate if exceed max length
if len(str(value)) > _STRING_COLUMN_MAX_LENGTH:
logging.warning('Value for "%s" too long, truncated to max length %d.',
name,
_STRING_COLUMN_MAX_LENGTH)
if isinstance(value, list):
while len(str(value)) > _STRING_COLUMN_MAX_LENGTH:
value.pop()
elif isinstance(value, str):
value = value[:_STRING_COLUMN_MAX_LENGTH]
else:
logging.warning('Value for "%s" is not truncatable', name)
return None
return value