blob: fe435c3e134f603201d734a03937f65e4f8cc972 [file] [log] [blame]
#!/bin/sh
# Copyright 2018 The LUCI Authors. All rights reserved.
# Use of this source code is governed under the Apache License, Version 2.0
# that can be found in the LICENSE file.
set -eu
cd "$(dirname $0)"
if ! (which bq) > /dev/null; then
echo "Please install 'bq' from gcloud SDK"
echo " https://cloud.google.com/sdk/install"
exit 1
fi
if ! (which bqschemaupdater) > /dev/null; then
echo "Please install 'bqschemaupdater' from Chrome's infra.git"
echo " Checkout infra.git then run: eval \`./go/env.py\`"
exit 1
fi
if [ $# != 1 ]; then
echo "usage: setup_bigquery.sh <instanceid>"
echo ""
echo "Pass one argument which is the instance name"
exit 1
fi
APPID=$1
echo "- Make sure the BigQuery API is enabled for the project:"
# It is enabled by default for new projects, but it wasn't for older projects.
gcloud services enable --project ${APPID} bigquery-json.googleapis.com
# TODO(maruel): The stock role "roles/bigquery.dataEditor" grants too much
# rights. Create a new custom role with only access
# "bigquery.tables.updateData".
#echo "- Create a BQ write-only role account:"
# https://cloud.google.com/iam/docs/understanding-custom-roles
# https://cloud.google.com/iam/docs/granting-roles-to-service-accounts
# https://cloud.google.com/bigquery/docs/access-control
echo "- Grant access to the AppEngine app to the role account:"
gcloud projects add-iam-policy-binding ${APPID} \
--member serviceAccount:${APPID}@appspot.gserviceaccount.com \
--role roles/bigquery.dataEditor
echo "- Create the dataset:"
echo ""
echo " Warning: On first 'bq' invocation, it'll try to find out default"
echo " credentials and will ask to select a default app; just press enter to"
echo " not select a default."
if ! (bq --location=US mk --dataset \
--description 'Swarming statistics' ${APPID}:swarming); then
echo ""
echo "Dataset creation failed. Assuming the dataset already exists. At worst"
echo "the following command will fail."
fi
echo "- Populate the BigQuery schema:"
echo ""
echo " Warning: On first 'bqschemaupdater' invocation, it'll request default"
echo " credentials which is stored independently than 'bq'."
cd proto/api
# 1.5 years ~= 540 days = (360 + 180) * 24 hours.
if ! (bqschemaupdater -force \
-partitioning-expiration 12960h \
-message swarming.v1.BotEvent \
-table ${APPID}.swarming.bot_events \
-partitioning-field event_time); then
echo ""
echo ""
echo "Oh no! You may need to restart from scratch. You can do so with:"
echo ""
echo " bq rm ${APPID}:swarming.bot_events"
echo ""
echo "and run this script again."
exit 1
fi
if ! (bqschemaupdater -force \
-partitioning-expiration 12960h \
-message swarming.v1.TaskRequest \
-table ${APPID}.swarming.task_requests \
-partitioning-field create_time); then
echo ""
echo ""
echo "Oh no! You may need to restart from scratch. You can do so with:"
echo ""
echo " bq rm ${APPID}:swarming.task_requests"
echo ""
echo "and run this script again."
exit 1
fi
if ! (bqschemaupdater -force \
-partitioning-expiration 12960h \
-message swarming.v1.TaskResult \
-table ${APPID}.swarming.task_results_run \
-partitioning-field end_time); then
echo ""
echo ""
echo "Oh no! You may need to restart from scratch. You can do so with:"
echo ""
echo " bq rm ${APPID}:swarming.task_results_run"
echo ""
echo "and run this script again."
exit 1
fi
if ! (bqschemaupdater -force \
-partitioning-expiration 12960h \
-message swarming.v1.TaskResult \
-table ${APPID}.swarming.task_results_summary \
-partitioning-field end_time); then
echo ""
echo ""
echo "Oh no! You may need to restart from scratch. You can do so with:"
echo ""
echo " bq rm ${APPID}:swarming.task_results_summary"
echo ""
echo "and run this script again."
exit 1
fi
cd -
echo "- Create BigQuery views:"
echo ""
echo " - swarming.bot_events_delta"
QUERY='SELECT
DATE(event_time) AS day,
TIMESTAMP_TRUNC(event_time, SECOND) AS time,
ROUND(
TIMESTAMP_DIFF(
event_time,
CAST(
LEAD(event_time) OVER(PARTITION BY bot.bot_id ORDER BY event_time DESC)
AS TIMESTAMP),
MICROSECOND)*0.000001,
3) AS since_last,
ROUND(
TIMESTAMP_DIFF(
CAST(
LAG(event_time) OVER(PARTITION BY bot.bot_id ORDER BY event_time DESC)
AS TIMESTAMP),
event_time,
MICROSECOND)*0.000001,
3) AS until_next,
ARRAY(
SELECT FORMAT("%s:%s", d.key, v) FROM e.bot.dimensions AS d, d.values AS v)
AS dimensions_flat,
*
FROM `'${APPID}'.swarming.bot_events` AS e
WHERE event_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 22 DAY)
ORDER BY event_time DESC'
DESC="Augmented view of last 3 weeks with deltas between events and \
dimensions_flat"
if !(bq mk --use_legacy_sql=false --view "${QUERY}" \
--description "{$DESC}" \
--project_id ${APPID} swarming.bot_events_delta); then
echo ""
echo "The view already exists. You can delete it with:"
echo ""
echo " bq rm ${APPID}:swarming.bot_events_delta"
echo ""
echo "and run this script again."
# Don't fail here.
fi
echo ""
echo " - swarming.task_requests_flat"
QUERY='SELECT
DATE(create_time) AS day,
TIMESTAMP_TRUNC(create_time, SECOND) AS time,
ARRAY(
SELECT DISTINCT FORMAT("%s:%s", dimensions.key, value)
FROM
UNNEST(r.task_slices) AS slice,
UNNEST(slice.properties.dimensions) AS dimensions,
UNNEST(dimensions.values) AS value)
AS dimensions_flat,
*
FROM `'${APPID}'.swarming.task_requests` AS r
WHERE create_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 22 DAY)
ORDER BY create_time DESC'
DESC="Augmented view of last 3 weeks with dimensions_flat"
if !(bq mk --use_legacy_sql=false --view "${QUERY}" \
--description "{$DESC}" \
--project_id ${APPID} swarming.task_requests_flat); then
echo ""
echo "The view already exists. You can delete it with:"
echo ""
echo " bq rm ${APPID}:swarming.task_requests_flat"
echo ""
echo "and run this script again."
# Don't fail here.
fi
echo ""
echo " - swarming.task_results_run_flat"
QUERY='SELECT
DATE(end_time) AS day,
TIMESTAMP_TRUNC(end_time, SECOND) AS time,
ARRAY(
SELECT DISTINCT FORMAT("%s:%s", dimensions.key, value)
FROM
UNNEST(r.request.task_slices[
OFFSET(r.current_task_slice)].properties.dimensions) AS dimensions,
UNNEST(dimensions.values) AS value)
AS dimensions_flat,
*
FROM `'${APPID}'.swarming.task_results_run` AS r
WHERE end_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 22 DAY)
ORDER BY end_time DESC'
DESC="Augmented view of last 3 weeks with dimensions_flat"
if !(bq mk --use_legacy_sql=false --view "${QUERY}" \
--description "{$DESC}" \
--project_id ${APPID} swarming.task_results_run_flat); then
echo ""
echo "The view already exists. You can delete it with:"
echo ""
echo " bq rm ${APPID}:swarming.task_results_run_flat"
echo ""
echo "and run this script again."
# Don't fail here.
fi
echo ""
echo " - swarming.task_results_summary_flat"
QUERY='SELECT
DATE(end_time) AS day,
TIMESTAMP_TRUNC(end_time, SECOND) AS time,
ARRAY(
SELECT DISTINCT FORMAT("%s:%s", dimensions.key, value)
FROM
UNNEST(r.request.task_slices[
OFFSET(r.current_task_slice)].properties.dimensions) AS dimensions,
UNNEST(dimensions.values) AS value)
AS dimensions_flat,
*
FROM `'${APPID}'.swarming.task_results_summary` AS r
WHERE end_time >= TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 22 DAY)
ORDER BY end_time DESC'
DESC="Augmented view of last 3 weeks with dimensions_flat"
if !(bq mk --use_legacy_sql=false --view "${QUERY}" \
--description "{$DESC}" \
--project_id ${APPID} swarming.task_results_summary_flat); then
echo ""
echo "The view already exists. You can delete it with:"
echo ""
echo " bq rm ${APPID}:swarming.task_results_summary_flat"
echo ""
echo "and run this script again."
# Don't fail here.
fi