| // Copyright 2020 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| package main |
| |
| import ( |
| "bufio" |
| "context" |
| "io/ioutil" |
| "os" |
| "path/filepath" |
| |
| "cloud.google.com/go/bigquery" |
| "github.com/maruel/subcommands" |
| "golang.org/x/sync/errgroup" |
| "google.golang.org/protobuf/encoding/protojson" |
| |
| "go.chromium.org/luci/auth" |
| "go.chromium.org/luci/common/cli" |
| "go.chromium.org/luci/common/data/text" |
| "go.chromium.org/luci/common/errors" |
| "go.chromium.org/luci/common/logging" |
| |
| "infra/rts/filegraph/git" |
| "infra/rts/presubmit/eval" |
| ) |
| |
| func cmdCreateModel(authOpt *auth.Options) *subcommands.Command { |
| return &subcommands.Command{ |
| UsageLine: `create-model -model-dir <path>`, |
| ShortDesc: "create a model to be used by select subcommand", |
| LongDesc: "Create a model to be used by select subcommand", |
| CommandRun: func() subcommands.CommandRun { |
| r := &createModelRun{authOpt: authOpt} |
| r.Flags.StringVar(&r.modelDir, "model-dir", "", text.Doc(` |
| Path to the directory where to write the model files. |
| The directory will be created if it does not exist. |
| `)) |
| |
| r.Flags.StringVar(&r.checkout, "checkout", "", "Path to a src.git checkout") |
| r.Flags.IntVar(&r.loadOptions.MaxCommitSize, "fg-max-commit-size", 100, text.Doc(` |
| Maximum number of files touched by a commit. |
| Commits that exceed this limit are ignored. |
| The rationale is that large commits provide a weak signal of file |
| relatedness and are expensive to process, O(N^2). |
| `)) |
| |
| r.ev.LogProgressInterval = 100 |
| r.ev.RegisterFlags(&r.Flags) |
| return r |
| }, |
| } |
| } |
| |
| type createModelRun struct { |
| baseCommandRun |
| modelDir string |
| |
| checkout string |
| loadOptions git.LoadOptions |
| fg *git.Graph |
| |
| ev eval.Eval |
| |
| authOpt *auth.Options |
| bqClient *bigquery.Client |
| } |
| |
| func (r *createModelRun) validateFlags() error { |
| if err := r.ev.ValidateFlags(); err != nil { |
| return err |
| } |
| switch { |
| case r.modelDir == "": |
| return errors.New("-model-dir is required") |
| case r.checkout == "": |
| return errors.New("-checkout is required") |
| default: |
| return nil |
| } |
| } |
| |
| func (r *createModelRun) Run(a subcommands.Application, args []string, env subcommands.Env) int { |
| ctx := cli.GetContext(a, r, env) |
| if len(args) != 0 { |
| return r.done(errors.New("unexpected positional arguments")) |
| } |
| if err := r.validateFlags(); err != nil { |
| return r.done(err) |
| } |
| |
| var err error |
| if r.bqClient, err = newBQClient(ctx, auth.NewAuthenticator(ctx, auth.InteractiveLogin, *r.authOpt)); err != nil { |
| return r.done(errors.Annotate(err, "failed to create BigQuery client").Err()) |
| } |
| |
| return r.done(r.writeModel(ctx, r.modelDir)) |
| } |
| |
| // writeModel writes the model files to the directory. |
| func (r *createModelRun) writeModel(ctx context.Context, dir string) error { |
| // Ensure model dir exists. |
| if err := os.MkdirAll(dir, 0777); err != nil { |
| return errors.Annotate(err, "failed to create model dir at %q", dir).Err() |
| } |
| |
| eg, ctx := errgroup.WithContext(ctx) |
| defer eg.Wait() |
| |
| eg.Go(func() error { |
| err := r.writeFileGraphModel(ctx, filepath.Join(dir, "git-file-graph")) |
| return errors.Annotate(err, "failed to write file graph model").Err() |
| }) |
| |
| eg.Go(func() error { |
| err := r.writeTestFileSet(ctx, filepath.Join(dir, "test-files.jsonl")) |
| return errors.Annotate(err, "failed to write test file set").Err() |
| }) |
| |
| return eg.Wait() |
| } |
| |
| // writeFileGraphModel writes the file graph model to the model dir. |
| func (r *createModelRun) writeFileGraphModel(ctx context.Context, dir string) error { |
| var err error |
| if r.fg, err = git.Load(ctx, r.checkout, r.loadOptions); err != nil { |
| return err |
| } |
| |
| if err := os.MkdirAll(dir, 0777); err != nil { |
| return err |
| } |
| |
| eg, ctx := errgroup.WithContext(ctx) |
| defer eg.Wait() |
| |
| eg.Go(func() error { |
| err := r.writeFileGraph(ctx, filepath.Join(dir, "graph.fg")) |
| return errors.Annotate(err, "failed to write file graph").Err() |
| }) |
| |
| eg.Go(func() error { |
| err := r.writeStrategyConfig(ctx, filepath.Join(dir, "config.json")) |
| return errors.Annotate(err, "failed to write strategy config").Err() |
| }) |
| |
| return eg.Wait() |
| } |
| |
| // writeFileGraph writes the graph file. |
| func (r *createModelRun) writeFileGraph(ctx context.Context, fileName string) error { |
| f, err := os.Create(fileName) |
| if err != nil { |
| return err |
| } |
| defer f.Close() |
| bufW := bufio.NewWriter(f) |
| if err := r.fg.Write(bufW); err != nil { |
| return err |
| } |
| return bufW.Flush() |
| } |
| |
| // writeStrategyConfig computes and writes the GitBasedStrategyConfig. |
| func (r *createModelRun) writeStrategyConfig(ctx context.Context, fileName string) error { |
| // Compute max distance for change-log-based strategy. |
| logging.Infof(ctx, "Computing stats for the change-log-based strategy...") |
| changeLogRes, err := r.ev.EvaluateSafety(ctx, r.evalStrategy(&git.EdgeReader{ |
| ChangeLogDistanceFactor: 1, |
| })) |
| if err != nil { |
| return err |
| } |
| |
| // Compute max distance for file-structured-based strategy. |
| logging.Infof(ctx, "Computing stats for the file-structure-based strategy...") |
| fsRes, err := r.ev.EvaluateSafety(ctx, r.evalStrategy(&git.EdgeReader{ |
| FileStructureDistanceFactor: 1, |
| })) |
| if err != nil { |
| return err |
| } |
| |
| // Use both strategies with normalized distances. |
| logging.Infof(ctx, "Evaluating the combined strategy...") |
| er := &git.EdgeReader{ |
| // Normalize distances, but also use the scale [0, 100] for readability. |
| ChangeLogDistanceFactor: 100 / float64(changeLogRes.RejectionClosestDistanceStats.MaxNonInf), |
| FileStructureDistanceFactor: 100 / float64(fsRes.RejectionClosestDistanceStats.MaxNonInf), |
| } |
| res, err := r.ev.Run(ctx, r.evalStrategy(er)) |
| if err != nil { |
| return err |
| } |
| |
| eval.PrintResults(res, os.Stdout, 0.97) |
| cfgBytes, err := protojson.Marshal(&GitBasedStrategyConfig{ |
| ChangeLogDistanceFactor: float32(er.ChangeLogDistanceFactor), |
| FileStructureDistanceFactor: float32(er.FileStructureDistanceFactor), |
| Thresholds: res.Thresholds, |
| }) |
| if err != nil { |
| return err |
| } |
| return ioutil.WriteFile(fileName, cfgBytes, 0777) |
| } |
| |
| // writeTestFileSet writes the test file set in Chromium to the file. |
| // It skips tests that match neverSkipTestFileRegexp. |
| // |
| // The file format is JSON Lines of TestFile protobufs. |
| func (r *createModelRun) writeTestFileSet(ctx context.Context, fileName string) error { |
| f, err := os.Create(fileName) |
| if err != nil { |
| return err |
| } |
| defer f.Close() |
| bufW := bufio.NewWriter(f) |
| |
| if err := writeTestFiles(ctx, r.bqClient, bufW); err != nil { |
| return err |
| } |
| |
| if err := bufW.Flush(); err != nil { |
| return err |
| } |
| return f.Close() |
| } |