blob: b1fb8a27ba672e2e94abe2efad4f2966bb3d9f98 [file] [log] [blame]
// Copyright 2021 The LUCI Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"context"
"fmt"
"hash/fnv"
"reflect"
"strings"
"time"
"golang.org/x/sync/errgroup"
"go.chromium.org/luci/common/clock"
"go.chromium.org/luci/common/data/stringset"
"go.chromium.org/luci/common/errors"
"go.chromium.org/luci/common/sync/parallel"
"go.chromium.org/luci/common/tsmon"
"go.chromium.org/luci/common/tsmon/target"
tsmonpb "go.chromium.org/luci/common/tsmon/ts_mon_proto"
"go.chromium.org/luci/common/tsmon/types"
"go.chromium.org/luci/gae/service/datastore"
"go.chromium.org/luci/buildbucket/appengine/model"
pb "go.chromium.org/luci/buildbucket/proto"
"go.chromium.org/luci/buildbucket/protoutil"
)
// Builder is a metric target that represents a LUCI Builder.
type Builder struct {
// Project is the LUCI project of the Builder.
Project string
// Bucket is the bucket name of the Builder.
Bucket string
// Builder is the name of the Builder.
Builder string
// ServiceName is the Cloud project ID of the Buildbucket service.
ServiceName string
// JobName is the Cloud service module ID of the Buildbucket service.
JobName string
// InstanceID is the ID of the worker instance that reported the Builder
// to metrics.
InstanceID string
}
// Clone returns a deep copy.
func (b *Builder) Clone() types.Target {
clone := *b
return &clone
}
// Type returns the metric type identification.
func (b *Builder) Type() types.TargetType {
return types.TargetType{Name: "buildbucket.Builder", Type: reflect.TypeOf(&Builder{})}
}
// Hash computes a hash of the Builder object.
func (b *Builder) Hash() uint64 {
h := fnv.New64a()
h.Write([]byte(b.Project))
h.Write([]byte(b.Bucket))
h.Write([]byte(b.Builder))
h.Write([]byte(b.ServiceName))
h.Write([]byte(b.JobName))
h.Write([]byte(b.InstanceID))
return h.Sum64()
}
// PopulateProto populates root labels into the proto for the target fields.
func (b *Builder) PopulateProto(d *tsmonpb.MetricsCollection) {
d.RootLabels = []*tsmonpb.MetricsCollection_RootLabels{
target.RootLabel("project", b.Project),
target.RootLabel("bucket", b.Bucket),
target.RootLabel("builder", b.Builder),
target.RootLabel("service_name", b.ServiceName),
target.RootLabel("job_name", b.JobName),
target.RootLabel("instance_id", b.InstanceID),
}
}
// ReportBuilderMetrics computes and reports Builder metrics.
func ReportBuilderMetrics(ctx context.Context) error {
// Reset the metric to stop reporting no-longer-existing builders.
tsmon.GetState(ctx).Store().Reset(ctx, V2.BuilderPresence)
luciBuckets, err := fetchLUCIBuckets(ctx)
if err != nil {
return errors.Annotate(err, "fetching LUCI buckets w/ swarming config").Err()
}
return parallel.WorkPool(256, func(taskC chan<- func() error) {
q := datastore.NewQuery(model.BuilderStatKind)
err := datastore.RunBatch(ctx, 256, q, func(k *datastore.Key) error {
project, bucket, builder := mustParseBuilderStatID(k.StringID())
tctx := WithBuilder(ctx, project, bucket, builder)
legacyBucket := bucket
// V1 metrics format the bucket name in "luci.$project.$bucket"
// if the bucket config has a swarming config.
if luciBuckets.Has(protoutil.FormatBucketID(project, bucket)) {
legacyBucket = legacyBucketName(project, bucket)
}
V2.BuilderPresence.Set(tctx, true)
taskC <- func() error {
return errors.Annotate(
reportMaxAge(tctx, project, bucket, legacyBucket, builder),
"reportMaxAge",
).Err()
}
taskC <- func() error {
return errors.Annotate(
reportBuildCount(tctx, project, bucket, legacyBucket, builder),
"reportBuildCount",
).Err()
}
return nil
})
if err != nil {
taskC <- func() error { return errors.Annotate(err, "datastore.RunBatch").Err() }
}
})
}
func mustParseBuilderStatID(id string) (project, bucket, builder string) {
parts := strings.Split(id, ":")
if len(parts) != 3 {
panic(fmt.Errorf("invalid BuilderStatID: %s", id))
}
project, bucket, builder = parts[0], parts[1], parts[2]
return
}
// fetchLUCIBuckets returns a stringset.Set with the ID of the buckets
// w/ swarming config.
func fetchLUCIBuckets(ctx context.Context) (stringset.Set, error) {
ret := stringset.Set{}
err := datastore.RunBatch(
ctx, 128, datastore.NewQuery(model.BucketKind),
func(bucket *model.Bucket) error {
if bucket.Proto.GetSwarming() != nil {
ret.Add(protoutil.FormatBucketID(bucket.Parent.StringID(), bucket.ID))
}
return nil
},
)
return ret, err
}
// reportMaxAge computes and reports the age of the oldest builds with SCHEDULED.
func reportMaxAge(ctx context.Context, project, bucket, legacyBucket, builder string) error {
var leasedCT, neverLeasedCT time.Time
q := datastore.NewQuery(model.BuildKind).
Eq("bucket_id", protoutil.FormatBucketID(project, bucket)).
Eq("tags", "builder:"+builder).
Eq("status_v2", pb.Status_SCHEDULED).
Eq("experimental", false).
Order("create_time").
Limit(1)
eg, ctx := errgroup.WithContext(ctx)
eg.Go(func() error {
var b []*model.Build
if err := datastore.GetAll(ctx, q.Eq("never_leased", false), &b); err != nil {
return err
}
if len(b) > 0 {
leasedCT = b[0].CreateTime
}
return nil
})
eg.Go(func() error {
var b []*model.Build
if err := datastore.GetAll(ctx, q.Eq("never_leased", true), &b); err != nil {
return err
}
if len(b) > 0 {
neverLeasedCT = b[0].CreateTime
}
return nil
})
if err := eg.Wait(); err != nil {
return err
}
var max, neverLeasedMax float64
now := clock.Now(ctx)
if !neverLeasedCT.IsZero() {
neverLeasedMax = now.Sub(neverLeasedCT).Seconds()
}
// In V1, the metric value of a stream with "must_be_never_leased == false"
// is the age of the oldest build w/ "must_be_never_leased == true|false".
//
// That is, it's the age of the oldest build regardless of the value
// in must_be_never_leased.
if !leasedCT.IsZero() {
max = now.Sub(leasedCT).Seconds()
}
if max < neverLeasedMax {
max = neverLeasedMax
}
V1.MaxAgeScheduled.Set(ctx, max, legacyBucket, builder, false /*must_be_never_leased*/)
V1.MaxAgeScheduled.Set(ctx, neverLeasedMax, legacyBucket, builder, true)
V2.MaxAgeScheduled.Set(ctx, max)
return nil
}
// reportBuildCount computes and reports # of builds with SCHEDULED and STARTED.
func reportBuildCount(ctx context.Context, project, bucket, legacyBucket, builder string) error {
var nScheduled, nStarted int64
q := datastore.NewQuery(model.BuildKind).
Eq("bucket_id", protoutil.FormatBucketID(project, bucket)).
Eq("experimental", false).
Eq("tags", "builder:"+builder)
eg, ctx := errgroup.WithContext(ctx)
eg.Go(func() (err error) {
nScheduled, err = datastore.Count(ctx, q.Eq("status_v2", pb.Status_SCHEDULED))
return
})
eg.Go(func() (err error) {
nStarted, err = datastore.Count(ctx, q.Eq("status_v2", pb.Status_STARTED))
return
})
if err := eg.Wait(); err != nil {
return err
}
V1.BuildCount.Set(ctx, nScheduled, legacyBucket, builder, pb.Status_name[int32(pb.Status_SCHEDULED)])
V1.BuildCount.Set(ctx, nStarted, legacyBucket, builder, pb.Status_name[int32(pb.Status_STARTED)])
V2.BuildCount.Set(ctx, nScheduled, pb.Status_name[int32(pb.Status_SCHEDULED)])
V2.BuildCount.Set(ctx, nStarted, pb.Status_name[int32(pb.Status_STARTED)])
return nil
}