blob: 64e1e4342d34d33ddf8ce5ffcbac01b2987193f1 [file] [log] [blame]
# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Suspends and resumes the device an adjustable number of times for
adjustable random lengths of time.
This uses the powerd_suspend utility and the rtc's wakealarm entry in sysfs.
Note that the rtc sysfs entry may vary from device to device, the test_list
must define the path to the correct sysfs entry for the specific device, the
default assumes a typical /sys/class/rtc/rtc0 entry.
"""
import errno
import logging
import os
import random
import re
import threading
import time
import unittest
import factory_common # pylint: disable=unused-import
from cros.factory.test import event_log
from cros.factory.test import factory
from cros.factory.test.i18n import test_ui as i18n_test_ui
from cros.factory.test import test_ui
from cros.factory.test import ui_templates
from cros.factory.utils.arg_utils import Arg
from cros.factory.utils import debug_utils
from cros.factory.utils import file_utils
from cros.factory.utils import process_utils
from cros.factory.utils import sync_utils
_TEST_TITLE = i18n_test_ui.MakeI18nLabel('Suspend/Resume Test')
_MSG_CYCLE = i18n_test_ui.MakeI18nLabel('Suspend/Resume:')
_ID_CYCLES = 'sr_cycles'
_ID_RUN = 'sr_run'
_TEST_BODY = ('<font size="20">%s <div id="%s"></div> of \n'
'<div id="%s"></div></font>') % (_MSG_CYCLE, _ID_RUN, _ID_CYCLES)
_MIN_SUSPEND_MARGIN_SECS = 5
_MESSAGES = '/var/log/messages'
_KERNEL_DEBUG_WAKEUP_SOURCES = '/sys/kernel/debug/wakeup_sources'
_MAX_EARLY_RESUME_RETRY_COUNT = 3
class SuspendResumeTest(unittest.TestCase):
ARGS = [
Arg('cycles', int, 'Number of cycles to suspend/resume', default=1),
Arg('suspend_delay_max_secs', int,
'Max time in sec during suspend per '
'cycle', default=10),
Arg('suspend_delay_min_secs', int,
'Min time in sec during suspend per '
'cycle', default=5),
Arg('resume_delay_max_secs', int,
'Max time in sec during resume per cycle', default=10),
Arg('resume_delay_min_secs', int,
'Min time in sec during resume per cycle', default=5),
Arg('resume_early_margin_secs', int,
'The allowable margin for the '
'DUT to wake early', default=0),
Arg('resume_worst_case_secs', int,
'The worst case time a device is '
'expected to take to resume', default=30),
Arg('suspend_worst_case_secs', int,
'The worst case time a device is '
'expected to take to suspend', default=60),
Arg('wakealarm_path', str, 'Path to the wakealarm file',
default='/sys/class/rtc/rtc0/wakealarm'),
Arg('time_path', str, 'Path to the time (since_epoch) file',
default='/sys/class/rtc/rtc0/since_epoch'),
Arg('wakeup_count_path', str, 'Path to the wakeup_count file',
default='/sys/power/wakeup_count'),
Arg('suspend_type', str, 'Suspend type',
default='mem'),
Arg('ignore_wakeup_source', str, 'Wakeup source to ignore',
default=None, optional=True),
Arg('early_resume_retry_wait_secs', int,
'Time to wait before re-suspending after early resume',
default=3, optional=True)]
def setUp(self):
self.assertTrue(os.path.exists(self.args.wakealarm_path), 'wakealarm_path '
'%s is not found, bad path?' % (self.args.wakealarm_path))
self.assertTrue(os.path.exists(self.args.time_path), 'time_path %s is not '
'found, bad path?' % (self.args.time_path))
self.assertGreaterEqual(self.args.suspend_delay_min_secs,
_MIN_SUSPEND_MARGIN_SECS, 'The '
'suspend_delay_min_secs is too low, bad '
'test_list?')
self.assertGreaterEqual(self.args.suspend_delay_max_secs,
self.args.suspend_delay_min_secs, 'Invalid suspend '
'timings provided in test_list (max < min).')
self.assertGreaterEqual(self.args.resume_delay_max_secs,
self.args.resume_delay_min_secs, 'Invalid resume '
'timings provided in test_list (max < min).')
self.goofy = factory.get_state_instance()
self._ui = test_ui.UI()
self._template = ui_templates.OneSection(self._ui)
self._template.SetTitle(_TEST_TITLE)
self._template.SetState(_TEST_BODY)
# Remove lid-opened, which will prevent suspend.
file_utils.TryUnlink('/run/power_manager/lid_opened')
# Create this directory so powerd_suspend doesn't fail.
file_utils.TryMakeDirs('/run/power_manager/root')
self.done = False
self.wakeup_count = ''
self.start_time = 0
self.resume_at = 0
self.attempted_wake_extensions = 0
self.actual_wake_extensions = 0
self.initial_suspend_count = 0
self.alarm_started = threading.Event()
self.alarm_thread = threading.Thread()
self.messages = None
def tearDown(self):
# Always log the last suspend/resume block we saw. This is most
# useful for failures, of course, but we log the last block for
# successes too to make them easier to compare.
if self.messages:
# Remove useless lines that have any of these right after the square
# bracket:
# call
# G[A-Z]{2}\d? (a register name)
# save
messages = re.sub(r'^.*\] (call|G[A-Z]{2}\d?|save).*$\n?', '',
self.messages, flags=re.MULTILINE)
logging.info('Last suspend block:\n' +
re.sub('^', ' ', messages, flags=re.MULTILINE))
self.done = True
self.alarm_thread.join(5)
self.assertFalse(self.alarm_thread.isAlive(), 'Alarm thread failed join.')
# Clear any active wake alarms
open(self.args.wakealarm_path, 'w').write('0')
def _GetIgnoredWakeupSourceCount(self):
"""Return the recorded wakeup count for the ignored wakeup source."""
if not self.args.ignore_wakeup_source:
return None
with open(_KERNEL_DEBUG_WAKEUP_SOURCES, 'r') as f:
# The output has the format of:
#
# name active_count event_count wakeup_count expire_count active_since \
# total_time max_time last_change prevent_suspend_time
# mmc2:0001:1 0 0 0 0 0 00 33154 0
# ...
#
# We want to get the 'wakeup_count' column
for line in f.readlines()[1:]:
parts = line.split()
if parts[0] == self.args.ignore_wakeup_source:
return int(parts[3])
raise RuntimeError('Ignore wakeup source %s not found' %
self.args.ignore_wakeup_source)
def _MonitorWakealarm(self):
"""Start and extend the wakealarm as needed for the main thread."""
open(self.args.wakealarm_path, 'w').write(str(self.resume_at))
self.alarm_started.set()
# CAUTION: the loop below is subject to race conditions with suspend time.
while (self._ReadSuspendCount() < self.initial_suspend_count + self.run
and not self.done):
cur_time = self._ReadCurrentTime()
if cur_time >= self.resume_at - 1:
self.attempted_wake_extensions += 1
logging.warn('Late suspend detected, attempting wake extension')
try:
with open(self.args.wakealarm_path, 'w') as f:
f.write('+=' + str(_MIN_SUSPEND_MARGIN_SECS))
except IOError:
# The write to wakealarm returns EINVAL (22) if no alarm is active
logging.warn('Write to wakealarm failed, assuming we woke: %s',
debug_utils.FormatExceptionOnly())
break
if (self._ReadSuspendCount() >= self.initial_suspend_count + self.run
and self._ReadCurrentTime() < cur_time + _MIN_SUSPEND_MARGIN_SECS):
logging.info('Attempted wake time extension, but suspended before.')
break
self.resume_at = self.resume_at + _MIN_SUSPEND_MARGIN_SECS
self.actual_wake_extensions += 1
logging.info('Attempted extending the wake timer %d s, resume is now '
'at %d.', _MIN_SUSPEND_MARGIN_SECS, self.resume_at)
self.assertGreaterEqual(
self.start_time + self.args.suspend_worst_case_secs,
cur_time,
'Suspend timeout, device did not suspend within %d sec.' %
self.args.suspend_worst_case_secs)
time.sleep(0.1)
self.alarm_started.clear()
def _Suspend(self, retry_count=0):
"""Suspend the device by writing to /sys/power/state."""
# Explicitly sync the filesystem
process_utils.Spawn(['sync'], check_call=True, log_stderr_on_error=True)
# Write out our expected wakeup_count
try:
with open(self.args.wakeup_count_path, 'w') as f:
f.write(self.wakeup_count)
except IOError:
# This happens if wakeup_count does not match, typically this means
# there was an unexpected early wake event.
raise IOError('Failed to write to wakeup_count (early wake): %s' %
debug_utils.FormatExceptionOnly())
prev_suspend_ignore_count = self._GetIgnoredWakeupSourceCount()
logging.info('Suspending at %d', self._ReadCurrentTime())
try:
with open('/sys/power/state', 'w') as f:
f.write(self.args.suspend_type)
except IOError as err:
if err.errno == errno.EBUSY:
if prev_suspend_ignore_count:
logging.info('Early wake event when attempting suspend')
if prev_suspend_ignore_count != self._GetIgnoredWakeupSourceCount():
if retry_count == _MAX_EARLY_RESUME_RETRY_COUNT:
raise RuntimeError('Maximum re-suspend retry exceeded for '
'ignored wakeup source %s',
self.args.ignore_wakeup_source)
logging.info('Wakeup source ignored, re-suspending...')
time.sleep(self.args.early_resume_retry_wait_secs)
with open(self.args.wakeup_count_path, 'r') as f:
self.wakeup_count = f.read().strip()
return self._Suspend(retry_count + 1)
else:
raise IOError('EBUSY: Early wake event when attempting suspend: %s'
% debug_utils.FormatExceptionOnly())
else:
raise IOError('Failed to write to /sys/power/state: %s' %
debug_utils.FormatExceptionOnly())
logging.info('Returning from suspend at %d', self._ReadCurrentTime())
def _ReadSuspendCount(self):
"""Read the current suspend count from /sys/kernel/debug/suspend_stats.
This assumes the first line of suspend_stats contains the number of
successfull suspend cycles.
Args:
None.
Returns:
Int, the number of suspends the system has executed since last reboot.
"""
self.assertTrue(os.path.exists('/sys/kernel/debug/suspend_stats'),
'suspend_stats file not found.')
# If we just resumed, the suspend_stats file can take some time to update.
time.sleep(0.1)
line_content = open('/sys/kernel/debug/suspend_stats').read().strip()
return int(re.search(r'[0-9]+', line_content).group(0))
def _ReadCurrentTime(self):
"""Read the current time in seconds since_epoch.
Args:
None.
Returns:
Int, the time since_epoch in seconds.
"""
with open(self.args.time_path, 'r') as f:
return int(f.read().strip())
def _VerifySuspended(self, wake_time, wake_source, count, resume_at):
"""Verify that a reasonable suspend has taken place.
Args:
wake_time: the time at which the device resumed
wake_source: the wake source, if known
count: expected number of suspends the system has executed
resume_at: expected time since epoch to have resumed
Returns:
Boolean, True if suspend was valid, False if not.
"""
self.assertGreaterEqual(
wake_time, resume_at - self.args.resume_early_margin_secs,
'Premature wake detected (%d s early, source=%s), spurious event? '
'(got touched?)' % (resume_at - wake_time, wake_source or 'unknown'))
self.assertLessEqual(
wake_time, resume_at + self.args.resume_worst_case_secs,
'Late wake detected (%ds > %ds delay, source=%s), timer failure?' % (
wake_time - resume_at, self.args.resume_worst_case_secs,
wake_source or 'unknown'))
actual_count = self._ReadSuspendCount()
self.assertEqual(
count, actual_count,
'Incorrect suspend count: ' + (
'no suspend?' if actual_count < count else 'spurious suspend?'))
def _HandleMessages(self, messages_start):
"""Finds the suspend/resume chunk in /var/log/messages.
The contents are saved to self.messages to be logged on failure.
Returns:
The wake source, or none if unknown.
"""
# The last chunk we read. In a list so it can be written from
# ReadMessages.
last_messages = ['']
def ReadMessages(messages_start):
try:
with open(_MESSAGES) as f:
# Read from messages_start to the end of the file.
f.seek(messages_start)
last_messages[0] = messages = f.read()
# If we see this, we can consider resume to be done.
match = re.search(
r'\] Restarting tasks \.\.\.' # "Restarting tasks" line
r'.+' # Any number of charcaters
r'\] done\.\n', # "done." line
messages, re.DOTALL | re.MULTILINE)
if match:
messages = messages[:match.end()]
return messages
except IOError:
logging.exception('Unable to read %s', _MESSAGES)
return None
messages = sync_utils.Retry(10, 0.2, None, ReadMessages, messages_start)
if not messages:
# We never found it. Just use the entire last chunk read
messages = last_messages[0]
logging.info(
'To view suspend/resume messages: '
'dd if=/var/log/messages skip=%d count=%d '
'iflag=skip_bytes,count_bytes', messages_start, len(messages))
# Find the wake source
match = re.search('active wakeup source: (.+)', messages)
wake_source = match.group(1) if match else None
logging.info('Wakeup source: %s', wake_source or 'unknown')
self.messages = messages
return wake_source
def runTest(self):
self._ui.Run(blocking=False)
self._ui.SetHTML(self.args.cycles, id=_ID_CYCLES)
self.initial_suspend_count = self._ReadSuspendCount()
logging.info('The initial suspend count is %d.', self.initial_suspend_count)
random.seed(0) # Make test deterministic
for self.run in range(1, self.args.cycles + 1):
self.attempted_wake_extensions = 0
self.actual_wake_extensions = 0
alarm_suspend_delays = 0
self.alarm_thread = threading.Thread(target=self._MonitorWakealarm)
self._ui.SetHTML(self.run, id=_ID_RUN)
self.start_time = self._ReadCurrentTime()
suspend_time = random.randint(self.args.suspend_delay_min_secs,
self.args.suspend_delay_max_secs)
resume_time = random.randint(self.args.resume_delay_min_secs,
self.args.resume_delay_max_secs)
self.resume_at = suspend_time + self.start_time
logging.info('Suspend %d of %d for %d seconds, starting at %d.',
self.run, self.args.cycles, suspend_time, self.start_time)
self.wakeup_count = open(self.args.wakeup_count_path).read().strip()
self.alarm_thread.start()
self.assertTrue(self.alarm_started.wait(_MIN_SUSPEND_MARGIN_SECS),
'Alarm thread timed out.')
messages_start = os.path.getsize(_MESSAGES)
self.goofy.SuspendDUTMonitoring(suspend_time +
self.args.resume_worst_case_secs)
logging.info('Monitoring suspended for %d seconds',
suspend_time + self.args.resume_worst_case_secs)
self._Suspend()
wake_time = self._ReadCurrentTime()
wake_source = self._HandleMessages(messages_start)
self._VerifySuspended(wake_time,
wake_source,
self.initial_suspend_count + self.run,
self.resume_at)
logging.info('Resumed %d of %d for %d seconds.',
self.run, self.args.cycles, resume_time)
time.sleep(resume_time)
try:
logging.info('Resuming monitoring')
self.goofy.ResumeDUTMonitoring()
except: # pylint: disable=bare-except
# Monitoring suspension will time out eventually
logging.exception('Failed to resume monitoring. Ignore.')
while self.alarm_thread.isAlive():
alarm_suspend_delays += 1
logging.warn('alarm thread is taking a while to return, waiting 1s.')
time.sleep(1)
self.assertGreaterEqual(self.start_time +
self.args.suspend_worst_case_secs,
int(open(self.args.time_path).read().strip()),
'alarm thread did not return within %d sec.' %
self.args.suspend_worst_case_secs)
event_log.Log('suspend_resume_cycle',
run=self.run, start_time=self.start_time,
suspend_time=suspend_time, resume_time=resume_time,
resume_at=self.resume_at, wakeup_count=self.wakeup_count,
suspend_count=self._ReadSuspendCount(),
initial_suspend_count=self.initial_suspend_count,
attempted_wake_extensions=self.attempted_wake_extensions,
actual_wake_extensions=self.actual_wake_extensions,
alarm_suspend_delays=alarm_suspend_delays,
wake_source=wake_source)