blob: 27139b6add1c8c09e341c9eda383e54ee9956dde [file] [log] [blame]
// Copyright 2019 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package crash contains utilties common to tests that use crash_reporter and
// crash_sender.
package crash
import (
"context"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
"time"
"github.com/shirou/gopsutil/process"
"chromiumos/tast/errors"
"chromiumos/tast/fsutil"
"chromiumos/tast/local/session"
"chromiumos/tast/testing"
)
const (
crashTestInProgressDir = "/run/crash_reporter"
// crashTestInProgressFile is a special control file that tells crash_reporter
// to act normally during a crash test. Usually, crash_reporter is being told
// (by /mnt/stateful_partition/etc/collect_chrome_crashes) to be more
// aggressive about gathering crash data so that we can debug other, non-
// crash_reporter tests more easily.
crashTestInProgressFile = "crash-test-in-progress"
// anomalyDetectorReadyFile is an indicator that the anomaly detector
// has started and can detect any new anomalies.
anomalyDetectorReadyFile = "anomaly-detector-ready"
// mockConsentFile is a special control file that tells crash_reporter and
// crash_sender to act as if the user has given consent for crash collection
// and uploading.
mockConsentFile = "mock-consent"
// senderPausePath is the path to the file whose existence indicates that
// crash_sender should be paused.
senderPausePath = "/var/lib/crash_sender_paused"
// senderProcName is the name of the crash_sender process.
senderProcName = "crash_sender"
// EarlyCrashDir is the directory where system crashes are stored in absence of persistent storage.
EarlyCrashDir = "/run/crash_reporter/crash"
// SystemCrashDir is the directory where system crash reports go.
SystemCrashDir = "/var/spool/crash"
// systemCrashStash is a directory to stash pre-existing system crashes during crash tests.
systemCrashStash = "/var/spool/crash.real"
// LocalCrashDir is the directory where user crash reports go.
LocalCrashDir = "/home/chronos/crash"
// localCrashStash is a directory to stash pre-existing user crashes during crash tests.
localCrashStash = "/home/chronos/crash.real"
// UserCrashDir is the directory where crash reports of currently logged in user go.
UserCrashDir = "/home/chronos/user/crash"
// userCrashStash is a directory to stash pre-existing crash reports of currently logged in user during crash tests.
userCrashStash = "/home/chronos/user/crash.real"
// ClobberCrashDir is a directory where crash reports after an FS clobber go.
ClobberCrashDir = "/mnt/stateful_partition/reboot_vault/crash"
// clobberCrashStash is a directory used to stash pre-existing crash reports after an FS clobber. Used in crash tests.
clobberCrashStash = "/mnt/stateful_partition/reboot_vault/crash.real"
// userCrashDirs is used for finding the directory name containing a hash for current logged-in user,
// in order to compare it with crash reporter log.
userCrashDirs = "/home/chronos/u-*/crash"
// FilterInPath is the path to the filter-in file.
FilterInPath = "/run/crash_reporter/filter-in"
// FilterOutPath is the path to the filter-out file.
FilterOutPath = "/run/crash_reporter/filter-out"
// testInProgressPath is the path to a file containing the name of the
// currently-running test, if any.
testInProgressPath = "/run/crash_reporter/test-in-prog"
// BIOSExt is the extension for bios crash files.
BIOSExt = ".bios_log"
// CoreExt is the extension for core files.
CoreExt = ".core"
// MinidumpExt is the extension for minidump crash files.
MinidumpExt = ".dmp"
// LogExt is the extension for log files containing additional information that are written by crash_reporter.
LogExt = ".log"
// InfoExt is the extention for info files.
InfoExt = ".info"
// ProclogExt is the extention for proclog files.
ProclogExt = ".proclog"
// KCrashExt is the extension for log files created by kernel warnings and crashes.
KCrashExt = ".kcrash"
// GPUStateExt is the extension for GPU state files written by crash_reporter.
GPUStateExt = ".i915_error_state.log.xz"
// MetadataExt is the extension for metadata files written by crash collectors and read by crash_sender.
MetadataExt = ".meta"
// CompressedTxtExt is an extension on the compressed log files written by crash_reporter.
CompressedTxtExt = ".txt.gz"
// CompressedLogExt is an extension on the compressed log files written by crash_reporter.
CompressedLogExt = ".log.gz"
// DevCoredumpExt is an extension for device coredump files.
DevCoredumpExt = ".devcore.gz"
// ECCrashExt is an extension for ec crash dumps
ECCrashExt = ".eccrash"
// JavaScriptStackExt is the extension for JavaScript stacks.
JavaScriptStackExt = ".js_stack"
// ChromeVerboseConsentFlags provides the flags to enable verbose logging about consent.
ChromeVerboseConsentFlags = "--vmodule=stats_reporting_controller=1,autotest_private_api=1"
// FilterInIgnoreAllCrashes is a value to put in the filter-in file if
// you wish to ignore all crashes that happen during a test.
FilterInIgnoreAllCrashes = "none"
)
var (
markTestInProgressVar = testing.RegisterVarString(
"crash.markTestInProgress", // The variable controls if the test-in-prog file will be created.
// Default value is true, create file test-in-prog by default.
// When set the var to "false", test-in-prog file will not be created.
"true",
"The variable that controls if test-in-prog file will be created")
testInProgressPrefixVar = testing.RegisterVarString(
"crash.testInProgressPrefix", // The variable prefixed to the test case name in test-in-prog file.
"", // By default no prefix will be added.
"The string that will be prefixed to the test case name in test-in-prog file")
)
// DefaultDirs returns all standard directories to which crashes are written.
func DefaultDirs() []string {
return []string{SystemCrashDir, LocalCrashDir, UserCrashDir}
}
// isCrashFile returns true if filename could be the name of a file generated by
// crashes or crash_reporter.
func isCrashFile(filename string) bool {
knownExts := []string{
BIOSExt,
CoreExt,
MinidumpExt,
LogExt,
ProclogExt,
InfoExt,
KCrashExt,
GPUStateExt,
MetadataExt,
CompressedTxtExt,
CompressedLogExt,
DevCoredumpExt,
ECCrashExt,
JavaScriptStackExt,
}
for _, ext := range knownExts {
if strings.HasSuffix(filename, ext) {
return true
}
}
return false
}
// GetCrashes returns the paths of all files in dirs generated in response to crashes.
// Nonexistent directories are skipped.
func GetCrashes(dirs ...string) ([]string, error) {
var crashFiles []string
for _, dir := range dirs {
files, err := ioutil.ReadDir(dir)
if os.IsNotExist(err) {
continue
} else if err != nil {
return nil, err
}
for _, fi := range files {
if isCrashFile(fi.Name()) {
crashFiles = append(crashFiles, filepath.Join(dir, fi.Name()))
}
}
}
return crashFiles, nil
}
// GetCrashDir gives the path to the crash directory for given username.
func GetCrashDir(username string) (string, error) {
if username == "root" || username == "crash" {
return SystemCrashDir, nil
}
p, err := filepath.Glob(userCrashDirs)
if err != nil {
// This only happens when userCrashDirs is malformed.
return "", errors.Wrapf(err, "failed to list up files with pattern [%s]", userCrashDirs)
}
if len(p) == 0 {
return LocalCrashDir, nil
}
if len(p) > 1 {
return "", errors.Errorf("Wrong number of users logged in; got %d, want 1 or 0", len(p))
}
return p[0], nil
}
// GetDaemonStoreCrashDirs gives the paths to the daemon store crash directories for the currently active sessions.
func GetDaemonStoreCrashDirs(ctx context.Context) ([]string, error) {
sessionManager, err := session.NewSessionManager(ctx)
if err != nil {
return []string{}, errors.Wrap(err, "couldn't start session manager")
}
sessions, err := sessionManager.RetrieveActiveSessions(ctx)
if err != nil {
return []string{}, errors.Wrap(err, "couldn't retrieve active sessions")
}
var ret []string
for k := range sessions {
userhash := sessions[k]
ret = append(ret, fmt.Sprintf("/home/root/%s/crash", userhash))
}
return ret, nil
}
// RegexesNotFound is an error type, used to indicate that
// WaitForCrashFiles didn't find matches for all of the regexs.
type RegexesNotFound struct {
// Missing lists all the regexs that weren't matched.
Missing []string
// Files lists all the files that were checked against the regexes.
Files []string
// PartialMatches gives all the regexes that were matched and the files that
// matched them.
PartialMatches map[string][]string
// Dirs lists all directories where files are searched.
Dirs []string
}
// Error returns a string describing the error. The classic Error function for
// the error interface.
func (e RegexesNotFound) Error() string {
return fmt.Sprintf("timed out while waiting for crash files: no file matched %s (dirs %s) (found %s)",
strings.Join(e.Missing, ", "), strings.Join(e.Dirs, ", "), strings.Join(e.Files, ", "))
}
// waitForCrashFilesOptions is a list of options for the WaitForCrashFiles
// function. External users can manipulate via the WaitForCrashFilesOpt-returning
// functions below.
type waitForCrashFilesOptions struct {
timeout time.Duration
optionalRegexes []string
}
// WaitForCrashFilesOpt is a self-referential function can be used to configure WaitForCrashFiles.
// See https://commandcenter.blogspot.com.au/2014/01/self-referential-functions-and-design.html
// for details about this pattern.
type WaitForCrashFilesOpt func(w *waitForCrashFilesOptions)
// Timeout returns a WaitForCrashFilesOpts which will set the timeout of WaitForCrashFiles
// to the indicated duration.
func Timeout(timeout time.Duration) WaitForCrashFilesOpt {
return func(w *waitForCrashFilesOptions) {
w.timeout = timeout
}
}
// OptionalRegexes instructs WaitForCrashFiles to look for files matching the
// given regexes and return those as normal in the return map. However, if
// the optional regexes are not matched, the polling loop will still exit and
// WaitForCrashFiles will not return an error.
func OptionalRegexes(optionalRegexes []string) WaitForCrashFilesOpt {
return func(w *waitForCrashFilesOptions) {
w.optionalRegexes = optionalRegexes
}
}
// WaitForCrashFiles waits for each regex in regexes to match a file in dirs.
// The directory is not matched against the regex, and the regex must match the
// entire filename. (So /var/spool/crash/hello_world.20200331.1234.log will NOT
// match 'world\.\d{1,8}\.\d{1,8}\.log'.)
// One might use it by
// 1. Doing some operation that will create new files in that directory (e.g. inducing a crash).
// 2. Calling this method to wait for the expected files to appear.
// On success, WaitForCrashFiles returns a map from a regex to a list of files that matched that regex.
// If any regex was not matched, instead returns an error of type RegexesNotFound.
//
// When it comes to deleting files, tests should:
// * Remove matching files that they expect to generate
// * Leave matching files they do not expect to generate
// If there are more matches than expected and the test can't tell which are expected, it shouldn't delete any.
func WaitForCrashFiles(ctx context.Context, dirs, regexes []string, opts ...WaitForCrashFilesOpt) (map[string][]string, error) {
w := &waitForCrashFilesOptions{timeout: 15 * time.Second}
for _, opt := range opts {
opt(w)
}
var files map[string][]string
err := testing.Poll(ctx, func(c context.Context) error {
var newFiles []string
for _, dir := range dirs {
dirFiles, err := GetCrashes(dir)
if err != nil {
return testing.PollBreak(errors.Wrap(err, "failed to get new crashes"))
}
newFiles = append(newFiles, dirFiles...)
}
// Reset files each time the poll function is invoked, to avoid
// repeatedly adding the same file
files = make(map[string][]string)
// track regexes that weren't matched.
var missing []string
for _, rx := range []struct {
regexp []string
optional bool
}{
{regexes, false},
{w.optionalRegexes, true},
} {
for _, re := range rx.regexp {
match := false
for _, f := range newFiles {
base := filepath.Base(f)
matchThisFile, err := regexp.MatchString("^"+re, base)
if err != nil {
return testing.PollBreak(errors.Wrapf(err, "invalid regexp %s", re))
}
if matchThisFile {
// Wait for meta files to have "done=1".
if strings.HasSuffix(f, ".meta") {
var contents []byte
if contents, err = ioutil.ReadFile(f); err != nil {
// There's a known issue with cryptohome 'flickering'
// occasionally. (b/189707927) If one process writes a file, a
// different process trying to read it the instant the file
// shows up may not be able to. So don't testing.PollBreak here,
// just retry and see if we can read on the next go-round.
return errors.Wrap(err, "failed to read .meta file")
}
if !strings.Contains(string(contents), "done=1") {
// Not there yet.
matchThisFile = false
}
}
}
if matchThisFile {
files[re] = append(files[re], f)
match = true
}
}
if !match && !rx.optional {
missing = append(missing, re)
}
}
}
if len(missing) != 0 {
return &RegexesNotFound{Missing: missing, Files: newFiles, PartialMatches: files, Dirs: dirs}
}
return nil
}, &testing.PollOptions{Timeout: w.timeout})
if err != nil {
var regexesNotFoundError *RegexesNotFound
if errors.As(err, &regexesNotFoundError) {
// Return unwrapped error, since we promise to return a RegexesNotFound
// error, not an error that wraps a RegexesNotFound error. testing.Poll
// will run errors.Wrap on the error returned from the lambda.
return nil, *regexesNotFoundError
}
return nil, errors.Wrap(err, "unable to find crash files")
}
return files, nil
}
// MoveFilesToOut moves all given files to s.OutDir(). Useful when further
// investigation of some files is needed to debug a test failure.
func MoveFilesToOut(ctx context.Context, outDir string, files ...string) error {
var firstErr error
for _, f := range files {
base := filepath.Base(f)
testing.ContextLogf(ctx, "Saving %s", base)
if err := fsutil.MoveFile(f, filepath.Join(outDir, base)); err != nil {
if firstErr == nil {
firstErr = errors.Wrapf(err, "couldn't save %s", base)
}
testing.ContextLogf(ctx, "Couldn't save %s: %v", base, err)
}
}
return firstErr
}
// RemoveAllFiles removes all files in the values of map.
func RemoveAllFiles(ctx context.Context, files map[string][]string) error {
var firstErr error
for _, v := range files {
for _, f := range v {
if err := os.Remove(f); err != nil && !os.IsNotExist(err) {
if firstErr == nil {
firstErr = errors.Wrapf(err, "couldn't clean up %s", f)
}
testing.ContextLogf(ctx, "Couldn't clean up %s: %v", f, err)
}
}
}
return firstErr
}
// DeleteCoreDumps deletes core dumps whose corresponding minidumps are available.
// It waits for crash_reporter to finish if it is running, in order to avoid
// deleting intermediate core dumps used to generate minidumps. Deleted core
// dumps are logged via ctx.
func DeleteCoreDumps(ctx context.Context) error {
reporterRunning := func() (bool, error) {
return processRunning("crash_reporter")
}
return deleteCoreDumps(ctx, DefaultDirs(), reporterRunning)
}
func deleteCoreDumps(ctx context.Context, dirs []string, reporterRunning func() (bool, error)) error {
// First, take a snapshot of core dumps to be deleted.
paths, size := findCoreDumps(dirs)
if len(paths) == 0 {
return nil
}
testing.ContextLogf(ctx, "Found %d core dumps (%d bytes)", len(paths), size)
// Wait for crash_reporter to finish if it is running, in order to avoid
// deleting intermediate core dumps used to generate minidumps.
if err := testing.Poll(ctx, func(ctx context.Context) error {
running, err := reporterRunning()
if err != nil {
return testing.PollBreak(err)
}
if running {
return errors.New("crash_reporter is still running")
}
return nil
}, &testing.PollOptions{Timeout: 10 * time.Second}); err != nil {
return errors.Wrap(err, "failed to wait for crash_reporter to finish")
}
// Finally delete core dumps. Note that it is important to use the snapshot
// taken at the beginning to avoid removing coredumps created by
// a crash_reporter process started after the wait.
var firstErr error
for _, path := range paths {
if err := os.Remove(path); err != nil {
testing.ContextLogf(ctx, "Failed to delete %s: %v", path, err)
if firstErr == nil {
firstErr = err
}
continue
}
testing.ContextLog(ctx, "Deleted ", path)
}
return firstErr
}
// findCoreDumps returns a list of paths of core dumps whose corresponding
// minidumps are available, and the total size of them.
func findCoreDumps(dirs []string) (paths []string, size int64) {
const extension = ".core"
for _, dir := range dirs {
fis, err := ioutil.ReadDir(dir)
if err != nil {
continue
}
nameSet := make(map[string]struct{})
for _, fi := range fis {
nameSet[fi.Name()] = struct{}{}
}
for _, fi := range fis {
if !strings.HasSuffix(fi.Name(), extension) {
continue
}
dmpName := strings.TrimSuffix(fi.Name(), extension) + ".dmp"
if _, ok := nameSet[dmpName]; !ok {
continue
}
paths = append(paths, filepath.Join(dir, fi.Name()))
size += fi.Size()
}
}
sort.Strings(paths)
return paths, size
}
// processRunning checks if a process named procName is running.
func processRunning(procName string) (bool, error) {
ps, err := process.Processes()
if err != nil {
return false, err
}
for _, p := range ps {
n, err := p.Name()
if err != nil {
continue
}
if n == procName {
return true, nil
}
}
return false, nil
}
// shouldMarkTestInProgress parses markTestInProgressVar to a boolean value.
// If the value of markTestInProgressVar is not supported by strconv.ParseBool(), it will return true.
func shouldMarkTestInProgress(ctx context.Context) bool {
markTestInProgress, err := strconv.ParseBool(markTestInProgressVar.Value())
//If any parse error happens, set the value to true.
if err != nil {
testing.ContextLogf(ctx, "Failed to parse crash.markTestInProgress value %q, use default value true", markTestInProgressVar.Value())
markTestInProgress = true
}
return markTestInProgress
}
// MarkTestInProgress writes |name| to |testInProgressPath|, indicating to crash_reporter
// that the given test is in progress.
func MarkTestInProgress(ctx context.Context, name string) error {
if !shouldMarkTestInProgress(ctx) {
return nil
}
if err := ioutil.WriteFile(testInProgressPath, []byte(testInProgressPrefixVar.Value()+name), 0644); err != nil {
return errors.Wrap(err, "failed to write in-progress test name")
}
return nil
}
// MarkTestDone removes the file indicating which test is running.
func MarkTestDone(ctx context.Context) error {
// Don't remove the test-in-prog file if it's not created by tast in MarkTestInProgress function.
if !shouldMarkTestInProgress(ctx) {
return nil
}
if err := os.Remove(testInProgressPath); err != nil && !os.IsNotExist(err) {
return errors.Wrap(err, "failed to remove in-progress test name")
}
return nil
}