| // Copyright 2022 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| package cros |
| |
| import ( |
| "context" |
| "fmt" |
| "slices" |
| "time" |
| |
| "go.chromium.org/luci/common/errors" |
| |
| provision_utils "go.chromium.org/infra/cros/cmd/provision/common-utils" |
| "go.chromium.org/infra/cros/recovery/internal/components" |
| "go.chromium.org/infra/cros/recovery/internal/components/cros/storage" |
| "go.chromium.org/infra/cros/recovery/internal/components/servo" |
| "go.chromium.org/infra/cros/recovery/internal/retry" |
| "go.chromium.org/infra/cros/recovery/logger" |
| "go.chromium.org/infra/cros/recovery/logger/metrics" |
| "go.chromium.org/infra/cros/recovery/tlw" |
| ) |
| |
| // BootInRecoveryRequest holds info to boot device in recovery mode. |
| type BootInRecoveryRequest struct { |
| DUT *tlw.Dut |
| // Booting time value to verify when device booted and available for SSH. |
| BootRetry int |
| BootTimeout time.Duration |
| BootInterval time.Duration |
| // Prevent PD switch to snk before boot. |
| PreventPowerSnk bool |
| // Call function to cal after device booted in recovery mode. |
| Callback func(context.Context) error |
| AddObservation func(*metrics.Observation) |
| // Options to ignore errors happened during restoring stage. |
| IgnoreServoRestoreFailure bool |
| IgnoreRebootFailure bool |
| // After reboot params specified to check if device booted or not. |
| AfterRebootVerify bool |
| AfterRebootTimeout time.Duration |
| AfterRebootAllowUseServoReset bool |
| // The image on the USB stick is a Light-Provision image used for booting. |
| UseLightProvisionImage bool |
| } |
| |
| const powerStateChangeInterval = 10 * time.Second |
| const pdRoleChangeInterval = 5 * time.Second |
| |
| // BootInRecoveryMode perform boot device in recovery mode. |
| // |
| // Boot in recovery mode performed by RO firmware and in some cases required stopPD negotiation. |
| // Please specify callback function to perform needed actions when device booted in recovery mode. |
| func BootInRecoveryMode(ctx context.Context, req *BootInRecoveryRequest, dutRun, dutBackgroundRun components.Runner, dutPing components.Pinger, ha components.HostAccess, servod components.Servod, log logger.Logger) (rErr error) { |
| if req.BootRetry < 1 { |
| // We retry at least once when method called. |
| req.BootRetry = 1 |
| } |
| // If observation is not provided then we create fake to print to logs |
| if req.AddObservation == nil { |
| req.AddObservation = func(observation *metrics.Observation) { |
| if observation != nil { |
| log.Debugf("Observation created kind:%q with %v", observation.MetricKind, observation.Value) |
| } |
| } |
| } |
| // Flag specified if we need set PD to `snk` before boot in recovery mode. |
| var needSink bool |
| if req.PreventPowerSnk { |
| log.Infof("Recovery boot will be performed without PD:snk by request.") |
| needSink = false |
| } else { |
| var err error |
| needSink, err = RecoveryModeRequiredPDOff(ctx, dutRun, servod, req.DUT) |
| if err != nil { |
| return errors.WrapIf(err, "boot in recovery mode") |
| } |
| } |
| defer func() { |
| // Record the label at the end as it can be changed. |
| req.AddObservation(metrics.NewStringObservation("recovery_boot_power_snk_used", fmt.Sprintf("%v", needSink))) |
| }() |
| restoreStates := func() error { |
| log.Debugf("Boot in recovery mode: recover servo states...") |
| // Turn on the DUT at the end in case it was not. |
| // All errors just logging as the action to clean up the state. |
| if err := servo.SetPDRole(ctx, servod, servo.PD_ON); err != nil { |
| log.Debugf("Restore PD for DUT failed: %s", err) |
| } |
| time.Sleep(pdRoleChangeInterval) |
| if err := servo.SetPowerState(ctx, servod, servo.PowerStateValueOFF); err != nil { |
| log.Debugf("Turn off DUT failed: %s", err) |
| } |
| if err := servo.UpdateUSBVisibility(ctx, servo.USBVisibleOff, servod); err != nil { |
| log.Debugf("Turn off USB drive on servo failed: %s", err) |
| } |
| time.Sleep(powerStateChangeInterval) |
| if err := servo.SetPowerState(ctx, servod, servo.PowerStateValueON); err != nil { |
| return errors.WrapIf(err, "restore DUT state") |
| } |
| time.Sleep(powerStateChangeInterval) |
| log.Debugf("Boot in recovery mode: DUT should start booting.") |
| return nil |
| } |
| // Always restore servo state by the end! |
| defer func() { |
| if err := restoreStates(); err != nil { |
| log.Debugf("Boot in recovery mode: %s", err) |
| // Don't override the original error. |
| if !req.IgnoreServoRestoreFailure && rErr == nil { |
| // We cannot return it, so we set it. |
| // If we fail when restored the states then we have issues. |
| rErr = err |
| return |
| } |
| } |
| // Verify the boot only if pass the execution or restore states. |
| if rErr == nil && req.AfterRebootVerify { |
| log.Debugf("Boot in recovery mode: starting verification of the boot...") |
| for { |
| if err := WaitUntilDutAccessible(ctx, req.DUT, req.AfterRebootTimeout, req.BootInterval, dutRun, dutPing); err != nil { |
| if req.AfterRebootAllowUseServoReset { |
| req.AfterRebootAllowUseServoReset = false |
| if err := servo.SetPowerState(ctx, servod, servo.PowerStateValueReset); err != nil { |
| log.Infof("Fail to reset by servo: %s", err) |
| } |
| continue |
| } |
| log.Debugf("Device is not accessible after reboot!") |
| rErr = err |
| } else { |
| log.Debugf("Device is accessible!") |
| } |
| break |
| } |
| } |
| }() |
| retryBootFunc := func() error { |
| // On Android everything tries to use ADB, so switch to Chrome OS to be able to use SSH access. |
| cacheIsAndroid := req.DUT.GetChromeos().GetIsAndroidBased() |
| defer func() { |
| req.DUT.GetChromeos().IsAndroidBased = cacheIsAndroid |
| }() |
| //here we need understand if we expecting android install and if provision image is expecting to be a special model |
| // By default use SSH. |
| useSSHAccessWehnBootFromUSBDrive := true |
| if req.UseLightProvisionImage && cacheIsAndroid { |
| // For certain models, their provision image expects adb access instead of ssh so we don't need to switch. |
| // TODO: b/477653120, to consolidate this logic with the one in cros_install_exec.go |
| if slices.Contains(provision_utils.GetAdbOnlyModelList(), req.DUT.GetModel()) { |
| log.Debugf("USB-drive image for model %s is expected to use adb connection!", req.DUT.GetModel()) |
| useSSHAccessWehnBootFromUSBDrive = false |
| } |
| } |
| if useSSHAccessWehnBootFromUSBDrive { |
| log.Infof("Boot in Recovery Mode: will use SSH access to verify the boot.") |
| req.DUT.GetChromeos().IsAndroidBased = false |
| } else { |
| log.Infof("Boot in Recovery Mode: will use ADB access to verify the boot.") |
| req.DUT.GetChromeos().IsAndroidBased = true |
| } |
| |
| log.Infof("Boot in Recovery Mode: starting retry...") |
| // If c2d2, then set cold_reset_select:gsc_ecrst_pulse |
| sType, err := servo.WrappedServoType(ctx, servod, req.DUT.GetChromeos().GetServo()) |
| if err != nil { |
| log.Infof("Check for C2D2: could not determine the servo type.") |
| return errors.WrapIf(err, "retry boot: reading servo type") |
| } |
| // Validate required capabilities for recovery boot. |
| if err := sType.HasCapability(servo.CapabilityUSBRecBoot, servo.CapabilityUSBImageMux); err != nil { |
| return errors.WrapIf(err, "retry boot: validate capabilities") |
| } |
| |
| if sType.IsC2D2() { |
| if err := servod.Set(ctx, "cold_reset_select", "gsc_ecrst_pulse"); err != nil { |
| return errors.WrapIf(err, "retry boot: adjust reset for C2D2") |
| } |
| } |
| // Turn power off. |
| if err := servo.SetPowerState(ctx, servod, servo.PowerStateValueOFF); err != nil { |
| return errors.WrapIf(err, "retry boot") |
| } |
| time.Sleep(powerStateChangeInterval) |
| // Doing the battery check after power off the DUT as certain models has limited EC capability |
| // and EC only report battery level when AP is shutdown. |
| if needSink { |
| if batteryLevel, err := servo.BatteryChargePercent(ctx, servod); err != nil { |
| req.AddObservation(metrics.NewInt64Observation("battery_level", -1)) |
| log.Debugf("Fail to read battery level from device %s.", err) |
| log.Debugf("We will not set PD to snk mode when boot in recovery mode.") |
| needSink = false |
| } else { |
| req.AddObservation(metrics.NewInt64Observation("battery_level", int64(batteryLevel))) |
| // If device has less 30% of battery then we will not try to recover it. |
| // If device lost power in middle of install it damage the disk. |
| const minBatterLevel = int32(30) |
| if batteryLevel < minBatterLevel { |
| log.Debugf("Battery level %d%% is lower minimum expectation of %d%%.", batteryLevel, minBatterLevel) |
| log.Debugf("We will not set PD to snk mode when boot in recovery mode.") |
| needSink = false |
| } |
| } |
| } |
| log.Debugf("Servo OS Install Repair: needSink :%t", needSink) |
| // Next:Boot in an image loaded on servo USB drive via recovery mode. The steps are: |
| // Step 1. Switch the USB to DUT on the servo multiplexer |
| if err := servo.UpdateUSBVisibility(ctx, servo.USBVisibleDUT, servod); err != nil { |
| return errors.WrapIf(err, "retry boot") |
| } |
| // Step 2. Switch power delivery to sink mode if supported by the servo. |
| // Context: b/187900184. |
| if needSink && sType.HasCapability(servo.CapabilityPDRole) == nil { |
| if err := servo.SetPDRole(ctx, servod, servo.PD_OFF); err != nil { |
| return errors.WrapIf(err, "retry boot") |
| } |
| } else if needSink { |
| log.Infof("Skipping PD Role routing: servo type %q lacks capability.", sType.String()) |
| } |
| time.Sleep(pdRoleChangeInterval) |
| // Step 3. Boot in recovery mode via servo power_state control. |
| log.Infof("Boot in Recovery Mode: Started try to boot in recovery mode by power_state:rec.") |
| if err := servo.SetPowerState(ctx, servod, servo.PowerStateValueRecoveryMode); err != nil { |
| log.Debugf("Boot in Recovery Mode: Failure when trying to set power_state:rec with error: %s", err) |
| } |
| log.Debugf("Boot in Recovery Mode: Waiting to device to be accessable.") |
| if err := WaitUntilDutAccessible(ctx, req.DUT, req.BootTimeout, req.BootInterval, dutRun, dutPing); err != nil { |
| return errors.WrapIf(err, "retry boot") |
| } |
| if cacheIsAndroid { |
| // With Android we can SSH only to the provision image. |
| // The provision image is very limited and does not have many tools. |
| } else if req.UseLightProvisionImage { |
| if _, err := dutRun(ctx, 10*time.Second, "which al-install"); err != nil { |
| return errors.WrapIf(err, "retry boot: device is not booted from light-provision image") |
| } |
| } else { |
| if err := storage.IsBootedFromExternalStorage(ctx, dutRun); err != nil { |
| log.Infof("Device booted from internal storage.") |
| return errors.WrapIf(err, "retry boot") |
| } |
| // List information about block devices. |
| // This informcation helps to understand which devices present and visible on the DUT. |
| if out, err := dutRun(ctx, 10*time.Second, "lsblk"); err != nil { |
| log.Infof("Fail to list device of the DUT: %s", err) |
| } else { |
| log.Debugf("lsblk output:\n%s", out) |
| } |
| } |
| log.Infof("Device successfully booted in recovery mode from USB-drive.") |
| return nil |
| } |
| if retryErr := retry.LimitCount(ctx, req.BootRetry, req.BootInterval, retryBootFunc, "boot in recovery mode"); retryErr != nil { |
| return errors.WrapIf(retryErr, "boot in recovery mode") |
| } |
| if req.Callback != nil { |
| log.Infof("Boot in recovery mode: passing control to call back.") |
| if err := req.Callback(ctx); err != nil { |
| return errors.WrapIf(err, "boot in recovery mode: callback") |
| } |
| log.Infof("Boot in recovery mode: control returned.") |
| } |
| return nil |
| } |