blob: e23977bfe2222284d716a728136331928e7e215e [file] [edit]
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package cros
import (
"context"
"fmt"
"slices"
"time"
"go.chromium.org/luci/common/errors"
provision_utils "go.chromium.org/infra/cros/cmd/provision/common-utils"
"go.chromium.org/infra/cros/recovery/internal/components"
"go.chromium.org/infra/cros/recovery/internal/components/cros/storage"
"go.chromium.org/infra/cros/recovery/internal/components/servo"
"go.chromium.org/infra/cros/recovery/internal/retry"
"go.chromium.org/infra/cros/recovery/logger"
"go.chromium.org/infra/cros/recovery/logger/metrics"
"go.chromium.org/infra/cros/recovery/tlw"
)
// BootInRecoveryRequest holds info to boot device in recovery mode.
type BootInRecoveryRequest struct {
DUT *tlw.Dut
// Booting time value to verify when device booted and available for SSH.
BootRetry int
BootTimeout time.Duration
BootInterval time.Duration
// Prevent PD switch to snk before boot.
PreventPowerSnk bool
// Call function to cal after device booted in recovery mode.
Callback func(context.Context) error
AddObservation func(*metrics.Observation)
// Options to ignore errors happened during restoring stage.
IgnoreServoRestoreFailure bool
IgnoreRebootFailure bool
// After reboot params specified to check if device booted or not.
AfterRebootVerify bool
AfterRebootTimeout time.Duration
AfterRebootAllowUseServoReset bool
// The image on the USB stick is a Light-Provision image used for booting.
UseLightProvisionImage bool
}
const powerStateChangeInterval = 10 * time.Second
const pdRoleChangeInterval = 5 * time.Second
// BootInRecoveryMode perform boot device in recovery mode.
//
// Boot in recovery mode performed by RO firmware and in some cases required stopPD negotiation.
// Please specify callback function to perform needed actions when device booted in recovery mode.
func BootInRecoveryMode(ctx context.Context, req *BootInRecoveryRequest, dutRun, dutBackgroundRun components.Runner, dutPing components.Pinger, ha components.HostAccess, servod components.Servod, log logger.Logger) (rErr error) {
if req.BootRetry < 1 {
// We retry at least once when method called.
req.BootRetry = 1
}
// If observation is not provided then we create fake to print to logs
if req.AddObservation == nil {
req.AddObservation = func(observation *metrics.Observation) {
if observation != nil {
log.Debugf("Observation created kind:%q with %v", observation.MetricKind, observation.Value)
}
}
}
// Flag specified if we need set PD to `snk` before boot in recovery mode.
var needSink bool
if req.PreventPowerSnk {
log.Infof("Recovery boot will be performed without PD:snk by request.")
needSink = false
} else {
var err error
needSink, err = RecoveryModeRequiredPDOff(ctx, dutRun, servod, req.DUT)
if err != nil {
return errors.WrapIf(err, "boot in recovery mode")
}
}
defer func() {
// Record the label at the end as it can be changed.
req.AddObservation(metrics.NewStringObservation("recovery_boot_power_snk_used", fmt.Sprintf("%v", needSink)))
}()
restoreStates := func() error {
log.Debugf("Boot in recovery mode: recover servo states...")
// Turn on the DUT at the end in case it was not.
// All errors just logging as the action to clean up the state.
if err := servo.SetPDRole(ctx, servod, servo.PD_ON); err != nil {
log.Debugf("Restore PD for DUT failed: %s", err)
}
time.Sleep(pdRoleChangeInterval)
if err := servo.SetPowerState(ctx, servod, servo.PowerStateValueOFF); err != nil {
log.Debugf("Turn off DUT failed: %s", err)
}
if err := servo.UpdateUSBVisibility(ctx, servo.USBVisibleOff, servod); err != nil {
log.Debugf("Turn off USB drive on servo failed: %s", err)
}
time.Sleep(powerStateChangeInterval)
if err := servo.SetPowerState(ctx, servod, servo.PowerStateValueON); err != nil {
return errors.WrapIf(err, "restore DUT state")
}
time.Sleep(powerStateChangeInterval)
log.Debugf("Boot in recovery mode: DUT should start booting.")
return nil
}
// Always restore servo state by the end!
defer func() {
if err := restoreStates(); err != nil {
log.Debugf("Boot in recovery mode: %s", err)
// Don't override the original error.
if !req.IgnoreServoRestoreFailure && rErr == nil {
// We cannot return it, so we set it.
// If we fail when restored the states then we have issues.
rErr = err
return
}
}
// Verify the boot only if pass the execution or restore states.
if rErr == nil && req.AfterRebootVerify {
log.Debugf("Boot in recovery mode: starting verification of the boot...")
for {
if err := WaitUntilDutAccessible(ctx, req.DUT, req.AfterRebootTimeout, req.BootInterval, dutRun, dutPing); err != nil {
if req.AfterRebootAllowUseServoReset {
req.AfterRebootAllowUseServoReset = false
if err := servo.SetPowerState(ctx, servod, servo.PowerStateValueReset); err != nil {
log.Infof("Fail to reset by servo: %s", err)
}
continue
}
log.Debugf("Device is not accessible after reboot!")
rErr = err
} else {
log.Debugf("Device is accessible!")
}
break
}
}
}()
retryBootFunc := func() error {
// On Android everything tries to use ADB, so switch to Chrome OS to be able to use SSH access.
cacheIsAndroid := req.DUT.GetChromeos().GetIsAndroidBased()
defer func() {
req.DUT.GetChromeos().IsAndroidBased = cacheIsAndroid
}()
//here we need understand if we expecting android install and if provision image is expecting to be a special model
// By default use SSH.
useSSHAccessWehnBootFromUSBDrive := true
if req.UseLightProvisionImage && cacheIsAndroid {
// For certain models, their provision image expects adb access instead of ssh so we don't need to switch.
// TODO: b/477653120, to consolidate this logic with the one in cros_install_exec.go
if slices.Contains(provision_utils.GetAdbOnlyModelList(), req.DUT.GetModel()) {
log.Debugf("USB-drive image for model %s is expected to use adb connection!", req.DUT.GetModel())
useSSHAccessWehnBootFromUSBDrive = false
}
}
if useSSHAccessWehnBootFromUSBDrive {
log.Infof("Boot in Recovery Mode: will use SSH access to verify the boot.")
req.DUT.GetChromeos().IsAndroidBased = false
} else {
log.Infof("Boot in Recovery Mode: will use ADB access to verify the boot.")
req.DUT.GetChromeos().IsAndroidBased = true
}
log.Infof("Boot in Recovery Mode: starting retry...")
// If c2d2, then set cold_reset_select:gsc_ecrst_pulse
sType, err := servo.WrappedServoType(ctx, servod, req.DUT.GetChromeos().GetServo())
if err != nil {
log.Infof("Check for C2D2: could not determine the servo type.")
return errors.WrapIf(err, "retry boot: reading servo type")
}
// Validate required capabilities for recovery boot.
if err := sType.HasCapability(servo.CapabilityUSBRecBoot, servo.CapabilityUSBImageMux); err != nil {
return errors.WrapIf(err, "retry boot: validate capabilities")
}
if sType.IsC2D2() {
if err := servod.Set(ctx, "cold_reset_select", "gsc_ecrst_pulse"); err != nil {
return errors.WrapIf(err, "retry boot: adjust reset for C2D2")
}
}
// Turn power off.
if err := servo.SetPowerState(ctx, servod, servo.PowerStateValueOFF); err != nil {
return errors.WrapIf(err, "retry boot")
}
time.Sleep(powerStateChangeInterval)
// Doing the battery check after power off the DUT as certain models has limited EC capability
// and EC only report battery level when AP is shutdown.
if needSink {
if batteryLevel, err := servo.BatteryChargePercent(ctx, servod); err != nil {
req.AddObservation(metrics.NewInt64Observation("battery_level", -1))
log.Debugf("Fail to read battery level from device %s.", err)
log.Debugf("We will not set PD to snk mode when boot in recovery mode.")
needSink = false
} else {
req.AddObservation(metrics.NewInt64Observation("battery_level", int64(batteryLevel)))
// If device has less 30% of battery then we will not try to recover it.
// If device lost power in middle of install it damage the disk.
const minBatterLevel = int32(30)
if batteryLevel < minBatterLevel {
log.Debugf("Battery level %d%% is lower minimum expectation of %d%%.", batteryLevel, minBatterLevel)
log.Debugf("We will not set PD to snk mode when boot in recovery mode.")
needSink = false
}
}
}
log.Debugf("Servo OS Install Repair: needSink :%t", needSink)
// Next:Boot in an image loaded on servo USB drive via recovery mode. The steps are:
// Step 1. Switch the USB to DUT on the servo multiplexer
if err := servo.UpdateUSBVisibility(ctx, servo.USBVisibleDUT, servod); err != nil {
return errors.WrapIf(err, "retry boot")
}
// Step 2. Switch power delivery to sink mode if supported by the servo.
// Context: b/187900184.
if needSink && sType.HasCapability(servo.CapabilityPDRole) == nil {
if err := servo.SetPDRole(ctx, servod, servo.PD_OFF); err != nil {
return errors.WrapIf(err, "retry boot")
}
} else if needSink {
log.Infof("Skipping PD Role routing: servo type %q lacks capability.", sType.String())
}
time.Sleep(pdRoleChangeInterval)
// Step 3. Boot in recovery mode via servo power_state control.
log.Infof("Boot in Recovery Mode: Started try to boot in recovery mode by power_state:rec.")
if err := servo.SetPowerState(ctx, servod, servo.PowerStateValueRecoveryMode); err != nil {
log.Debugf("Boot in Recovery Mode: Failure when trying to set power_state:rec with error: %s", err)
}
log.Debugf("Boot in Recovery Mode: Waiting to device to be accessable.")
if err := WaitUntilDutAccessible(ctx, req.DUT, req.BootTimeout, req.BootInterval, dutRun, dutPing); err != nil {
return errors.WrapIf(err, "retry boot")
}
if cacheIsAndroid {
// With Android we can SSH only to the provision image.
// The provision image is very limited and does not have many tools.
} else if req.UseLightProvisionImage {
if _, err := dutRun(ctx, 10*time.Second, "which al-install"); err != nil {
return errors.WrapIf(err, "retry boot: device is not booted from light-provision image")
}
} else {
if err := storage.IsBootedFromExternalStorage(ctx, dutRun); err != nil {
log.Infof("Device booted from internal storage.")
return errors.WrapIf(err, "retry boot")
}
// List information about block devices.
// This informcation helps to understand which devices present and visible on the DUT.
if out, err := dutRun(ctx, 10*time.Second, "lsblk"); err != nil {
log.Infof("Fail to list device of the DUT: %s", err)
} else {
log.Debugf("lsblk output:\n%s", out)
}
}
log.Infof("Device successfully booted in recovery mode from USB-drive.")
return nil
}
if retryErr := retry.LimitCount(ctx, req.BootRetry, req.BootInterval, retryBootFunc, "boot in recovery mode"); retryErr != nil {
return errors.WrapIf(retryErr, "boot in recovery mode")
}
if req.Callback != nil {
log.Infof("Boot in recovery mode: passing control to call back.")
if err := req.Callback(ctx); err != nil {
return errors.WrapIf(err, "boot in recovery mode: callback")
}
log.Infof("Boot in recovery mode: control returned.")
}
return nil
}