blob: 63cda23e7c9de7eb3cc236cbb81e898b0c2ee067 [file] [log] [blame]
// Copyright 2019 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Package diagnose implements diagnosis logic for run failures.
package diagnose
import (
"context"
"fmt"
"io/ioutil"
"path/filepath"
"regexp"
"strings"
"go.chromium.org/tast/core/internal/linuxssh"
"go.chromium.org/tast/core/internal/logging"
"go.chromium.org/tast/core/internal/minidriver/target"
"go.chromium.org/tast/core/internal/testingutil"
)
// SSHDrop diagnoses a SSH connection drop during local test runs
// and returns a diagnosis message. Files useful for diagnosis might be saved
// under outDir.
func SSHDrop(ctx context.Context, cc *target.ConnCache, outDir string) string {
if cc.InitBootID() == "" {
return "failed to diagnose: initial boot_id is not available"
}
logging.Info(ctx, "Reconnecting to diagnose lost SSH connection")
if err := testingutil.Poll(ctx, func(ctx context.Context) error {
return cc.EnsureConn(ctx, true)
}, &testingutil.PollOptions{Timeout: cc.DefaultTimeout()}); err != nil {
return fmt.Sprint("target did not come back: ", err)
}
// Compare boot_id to see if the target rebooted.
bootID, err := linuxssh.ReadBootID(ctx, cc.Conn().SSHConn())
if err != nil {
return fmt.Sprint("failed to diagnose: failed to read boot_id: ", err)
}
if bootID == cc.InitBootID() {
return "target did not reboot, probably network issue"
}
// Target rebooted.
return diagnoseReboot(ctx, cc, outDir)
}
var (
// shutdownReasonRe matches pre-shutdown message in systemd journal. The first submatch is
// the type of shutdown (reboot/halt), and the second submatch is the reason.
shutdownReasonRe = regexp.MustCompile(`pre-shutdown.*Shutting down for (.*): (.*)`)
// hungRe matches kernel hung log messages. The first submatch is the name of the hung
// kernel thread, and the second submatch contains its backtrace.
hungRe = regexp.MustCompile(`(?s)INFO: task ([^\n]+) blocked for more than \d+ seconds\.
.*?Call Trace:
((?:\[[^]]*\] [^\n]*\n)+).*Kernel panic - not syncing: hung_task: blocked tasks`)
// crashSymbolRe matches kernel crash log messages. The first submatch is a symbolized
// function name of the instruction pointer, e.g. "sysrq_handle_crash".
crashSymbolRe = regexp.MustCompile(`(?:RIP:|PC is at).* (\S+)`)
)
// diagnoseReboot diagnoses the target reboot during local test runs
// and returns a diagnosis message. Files useful for diagnosis might be saved
// under outDir.
func diagnoseReboot(ctx context.Context, cc *target.ConnCache, outDir string) string {
conn := cc.Conn().SSHConn()
// Read the unified system log just before the reboot.
denseBootID := strings.Replace(cc.InitBootID(), "-", "", -1)
out, err := conn.CommandContext(ctx, "croslog", "--quiet", "--boot="+denseBootID, "--lines=1000").Output()
if err != nil {
logging.Info(ctx, "Failed to execute croslog command: ", err)
out, err = conn.CommandContext(ctx, "journalctl", "--quiet", "--boot="+denseBootID, "--lines=1000").Output()
if err != nil {
logging.Info(ctx, "Failed to execute journalctl command: ", err)
} else {
logging.Info(ctx, "Fell back to journalctl command")
}
}
logs := string(out)
if logs != "" {
const fn = "unified-logs.before-reboot.txt"
if err := ioutil.WriteFile(filepath.Join(outDir, fn), []byte(logs), 0666); err != nil {
logging.Infof(ctx, "Failed to save %s: %v", fn, err)
}
}
// Read console-ramoops. Its path varies by systems, and it might not exist
// for normal reboots.
out, err = conn.CommandContext(ctx, "cat", "/sys/fs/pstore/console-ramoops-0").Output()
if err != nil {
out, err = conn.CommandContext(ctx, "cat", "/sys/fs/pstore/console-ramoops").Output()
if err != nil {
logging.Info(ctx, "console-ramoops not found")
}
}
ramOops := string(out)
if ramOops != "" {
const fn = "console-ramoops.txt"
if err := ioutil.WriteFile(filepath.Join(outDir, fn), []byte(ramOops), 0666); err != nil {
logging.Infof(ctx, "Failed to save %s: %v", fn, err)
}
}
if m := shutdownReasonRe.FindStringSubmatch(logs); m != nil {
return fmt.Sprintf("target normally shut down for %s (%s)", m[1], m[2])
}
if m := hungRe.FindStringSubmatch(ramOops); m != nil {
proc := m[1]
for _, line := range strings.Split(m[2], "\n") {
fs := strings.Fields(line)
if len(fs) == 0 {
continue
}
f := fs[len(fs)-1]
// Skip schedule functions.
if strings.Contains(f, "schedule") {
continue
}
return fmt.Sprintf("kernel crashed: %s hung in %s", proc, f)
}
return fmt.Sprintf("kernel crashed: %s hung", proc)
}
if ms := crashSymbolRe.FindAllStringSubmatch(ramOops, -1); ms != nil {
m := ms[len(ms)-1]
return fmt.Sprint("kernel crashed in ", m[1])
}
return "target rebooted for unknown crash"
}