blob: 3fa7d4975e32654e6e0d68eeb97bd176f0dd9792 [file] [log] [blame]
// Copyright 2020 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package crash
import (
"context"
"path"
"path/filepath"
"strings"
"time"
"github.com/golang/protobuf/ptypes/empty"
crash_service "go.chromium.org/tast-tests/cros/services/cros/crash"
"go.chromium.org/tast/core/ctxutil"
"go.chromium.org/tast/core/dut"
"go.chromium.org/tast/core/rpc"
"go.chromium.org/tast/core/ssh/linuxssh"
"go.chromium.org/tast/core/testing"
"go.chromium.org/tast/core/testing/hwdep"
)
func init() {
testing.AddTest(&testing.Test{
Func: WatchdogCrash,
Desc: "Verify artificial watchdog crash creates crash files",
Contacts: []string{"chromeos-data-eng@google.com", "swboyd@chromium.org", "dianders@chromium.org"},
BugComponent: "b:1032705",
Attr: []string{"group:mainline", "informational"},
SoftwareDeps: []string{"device_crash", "pstore", "reboot", "watchdog"},
ServiceDeps: []string{"tast.cros.crash.FixtureService"},
HardwareDeps: hwdep.D(hwdep.SkipOnPlatform(
// See https://crbug.com/1069618 for discussion of bob, scarlet, kevin issues.
"bob",
"scarlet",
"kevin"),
hwdep.SkipOnModel(
"crystaldrift", /* TODO(b/282821025): Watchdog broken on Skyrim */
"frostflow", /* TODO(b/282821025): Watchdog broken on Skyrim */
"markarth", /* TODO(b/282821025): Watchdog broken on Skyrim */
"skyrim15w", /* TODO(b/282821025): Watchdog broken on Skyrim */
"skyrim15w360", /* TODO(b/282821025): Watchdog broken on Skyrim */
"skyrim6w", /* TODO(b/282821025): Watchdog broken on Skyrim */
"skyrim6w360", /* TODO(b/282821025): Watchdog broken on Skyrim */
"whiterun", /* TODO(b/282821025): Watchdog broken on Skyrim */
"jax", /* TODO(b/340814753): Re-enable after FW uprev */
"kench", /* TODO(b/340814753): Re-enable after FW uprev */
"sion", /* TODO(b/340814753): Re-enable after FW uprev */
"wukong", /* TODO(b/340814753): Re-enable after FW uprev */
)),
Timeout: 10 * time.Minute,
})
}
func saveAllFiles(ctx context.Context, d *dut.DUT, matches []*crash_service.RegexMatch, dir string) error {
var firstErr error
for _, m := range matches {
for _, f := range m.Files {
if err := linuxssh.GetFile(ctx, d.Conn(), f, filepath.Join(dir, path.Base(f)), linuxssh.PreserveSymlinks); err != nil {
testing.ContextLogf(ctx, "Failed to save file %s: %s", f, err)
if firstErr == nil {
firstErr = err
}
}
}
}
return firstErr
}
func WatchdogCrash(ctx context.Context, s *testing.State) {
const systemCrashDir = "/var/spool/crash"
d := s.DUT()
cl, err := rpc.Dial(ctx, d, s.RPCHint())
if err != nil {
s.Fatal("Failed to connect to the RPC service on the DUT: ", err)
}
fs := crash_service.NewFixtureServiceClient(cl.Conn)
req := crash_service.SetUpCrashTestRequest{
Consent: crash_service.SetUpCrashTestRequest_MOCK_CONSENT,
}
// Shorten deadline to leave time for cleanup
cleanupCtx := ctx
ctx, cancel := ctxutil.Shorten(ctx, 5*time.Second)
defer cancel()
if _, err := fs.SetUp(ctx, &req); err != nil {
s.Error("Failed to set up: ", err)
cl.Close(cleanupCtx)
return
}
// This is a bit delicate. If the test fails _before_ we panic the machine,
// we need to do TearDown then, and on the same connection (so we can close Chrome).
//
// If it fails to reconnect, we do not need to clean these up.
//
// Otherwise, we need to re-establish a connection to the machine and
// run TearDown.
defer func() {
s.Log("Cleaning up")
if fs != nil {
if _, err := fs.TearDown(cleanupCtx, &empty.Empty{}); err != nil {
s.Error("Couldn't tear down: ", err)
}
}
if cl != nil {
cl.Close(cleanupCtx)
}
}()
// Sync filesystem to minimize impact of the crash on other tests
if out, err := d.Conn().CommandContext(ctx, "sync").CombinedOutput(); err != nil {
s.Fatalf("Failed to sync filesystems: %s. err: %v", out, err)
}
// Trigger a watchdog reset
// Daisydog is the watchdog service
cmd := "stop daisydog; sleep 60 > /dev/watchdog"
if err := d.RebootWithCommand(ctx, "sh", "-c", cmd); err != nil {
s.Fatal("Failed to panic DUT: ", err)
}
// When we lost the connection, these connections broke.
cl.Close(ctx)
cl = nil
fs = nil
cl, err = rpc.Dial(ctx, d, s.RPCHint())
if err != nil {
s.Fatal("Failed to connect to the RPC service on the DUT: ", err)
}
fs = crash_service.NewFixtureServiceClient(cl.Conn)
base := `kernel\.\d{8}\.\d{6}\.\d+\.0`
biosLogMatches := &crash_service.RegexMatch{
Regex: base + `\.bios_log`,
Files: nil,
}
waitReq := &crash_service.WaitForCrashFilesRequest{
Dirs: []string{systemCrashDir},
Regexes: []string{base + `\.kcrash`, base + `\.meta`, base + `\.log`},
}
s.Log("Waiting for files to become present")
res, err := fs.WaitForCrashFiles(ctx, waitReq)
if err != nil {
if err := d.GetFile(cleanupCtx, "/var/log/messages",
filepath.Join(s.OutDir(), "messages")); err != nil {
s.Log("Failed to get messages log")
}
s.Fatal("Failed to find crash files: ", err.Error())
}
for _, m := range res.Matches {
if strings.HasSuffix(m.Regex, ".meta") {
// Also remove the bios log if it was created.
for _, f := range m.Files {
biosLogMatches.Files = append(biosLogMatches.Files, strings.TrimSuffix(f, filepath.Ext(f))+".bios_log")
}
if len(m.Files) != 1 {
s.Errorf("Unexpected number of kernel crashes: %d, want 1", len(m.Files))
continue
}
if err := d.Conn().CommandContext(ctx, "/bin/grep", "-q", "sig=kernel-(WATCHDOG)", m.Files[0]).Run(); err != nil {
// get all files to help debug test failures
if err := saveAllFiles(cleanupCtx, d, append(res.Matches, biosLogMatches), s.OutDir()); err != nil {
s.Log("Failed to get meta file: ", err)
}
s.Error("Did not find correct pattern in meta file: ", err)
}
}
}
removeReq := &crash_service.RemoveAllFilesRequest{
Matches: append(res.Matches, biosLogMatches),
}
if _, err := fs.RemoveAllFiles(ctx, removeReq); err != nil {
s.Error("Error removing files: ", err)
}
}