blob: 15d43763ad6024373a54d8ab74883e84a922b26c [file] [log] [blame]
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/threading/hang_watcher.h"
#include <atomic>
#include <memory>
#include "base/barrier_closure.h"
#include "base/bind.h"
#include "base/callback.h"
#include "base/callback_helpers.h"
#include "base/run_loop.h"
#include "base/strings/string_number_conversions.h"
#include "base/synchronization/lock.h"
#include "base/synchronization/waitable_event.h"
#include "base/test/bind_test_util.h"
#include "base/test/simple_test_tick_clock.h"
#include "base/test/task_environment.h"
#include "base/test/test_timeouts.h"
#include "base/threading/platform_thread.h"
#include "base/threading/thread_checker.h"
#include "base/time/tick_clock.h"
#include "base/time/time.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace base {
namespace {
// Waits on provided WaitableEvent before executing and signals when done.
class BlockingThread : public DelegateSimpleThread::Delegate {
public:
explicit BlockingThread(base::WaitableEvent* unblock_thread,
base::TimeDelta timeout)
: thread_(this, "BlockingThread"),
unblock_thread_(unblock_thread),
timeout_(timeout) {}
~BlockingThread() override = default;
void Run() override {
// (Un)Register the thread here instead of in ctor/dtor so that the action
// happens on the right thread.
base::ScopedClosureRunner unregister_closure =
base::HangWatcher::GetInstance()->RegisterThread();
HangWatchScope scope(timeout_);
wait_until_entered_scope_.Signal();
unblock_thread_->Wait();
run_event_.Signal();
}
bool IsDone() { return run_event_.IsSignaled(); }
void StartAndWaitForScopeEntered() {
thread_.Start();
// Block until this thread registered itself for hang watching and has
// entered a HangWatchScope.
wait_until_entered_scope_.Wait();
}
void Join() { thread_.Join(); }
PlatformThreadId GetId() { return thread_.tid(); }
private:
base::DelegateSimpleThread thread_;
// Will be signaled once the thread is properly registered for watching and
// scope has been entered.
WaitableEvent wait_until_entered_scope_;
// Will be signaled once ThreadMain has run.
WaitableEvent run_event_;
base::WaitableEvent* const unblock_thread_;
base::TimeDelta timeout_;
};
class HangWatcherTest : public testing::Test {
public:
const base::TimeDelta kTimeout = base::TimeDelta::FromSeconds(10);
const base::TimeDelta kHangTime = kTimeout + base::TimeDelta::FromSeconds(1);
HangWatcherTest() {
hang_watcher_.SetAfterMonitorClosureForTesting(base::BindRepeating(
&WaitableEvent::Signal, base::Unretained(&monitor_event_)));
hang_watcher_.SetOnHangClosureForTesting(base::BindRepeating(
&WaitableEvent::Signal, base::Unretained(&hang_event_)));
// We're not testing the monitoring loop behavior in this test so we want to
// trigger monitoring manually.
hang_watcher_.SetMonitoringPeriodForTesting(base::TimeDelta::Max());
}
HangWatcherTest(const HangWatcherTest& other) = delete;
HangWatcherTest& operator=(const HangWatcherTest& other) = delete;
protected:
// Used to wait for monitoring. Will be signaled by the HangWatcher thread and
// so needs to outlive it.
WaitableEvent monitor_event_;
// Signaled from the HangWatcher thread when a hang is detected. Needs to
// outlive the HangWatcher thread.
WaitableEvent hang_event_;
HangWatcher hang_watcher_;
// Used exclusively for MOCK_TIME. No tasks will be run on the environment.
// Single threaded to avoid ThreadPool WorkerThreads registering.
test::SingleThreadTaskEnvironment task_environment_{
test::TaskEnvironment::TimeSource::MOCK_TIME};
};
class HangWatcherBlockingThreadTest : public HangWatcherTest {
public:
HangWatcherBlockingThreadTest() : thread_(&unblock_thread_, kTimeout) {}
HangWatcherBlockingThreadTest(const HangWatcherBlockingThreadTest& other) =
delete;
HangWatcherBlockingThreadTest& operator=(
const HangWatcherBlockingThreadTest& other) = delete;
protected:
void JoinThread() {
unblock_thread_.Signal();
// Thread is joinable since we signaled |unblock_thread_|.
thread_.Join();
// If thread is done then it signaled.
ASSERT_TRUE(thread_.IsDone());
}
void StartBlockedThread() {
// Thread has not run yet.
ASSERT_FALSE(thread_.IsDone());
// Start the thread. It will block since |unblock_thread_| was not
// signaled yet.
thread_.StartAndWaitForScopeEntered();
// Thread registration triggered a call to HangWatcher::Monitor() which
// signaled |monitor_event_|. Reset it so it's ready for waiting later on.
monitor_event_.Reset();
}
void MonitorHangsAndJoinThread() {
// HangWatcher::Monitor() should not be set which would mean a call to
// HangWatcher::Monitor() happened and was unacounted for.
ASSERT_FALSE(monitor_event_.IsSignaled());
// Triger a monitoring on HangWatcher thread and verify results.
hang_watcher_.SignalMonitorEventForTesting();
monitor_event_.Wait();
JoinThread();
}
// Used to unblock the monitored thread. Signaled from the test main thread.
WaitableEvent unblock_thread_;
BlockingThread thread_;
};
} // namespace
TEST_F(HangWatcherTest, NoRegisteredThreads) {
ASSERT_FALSE(monitor_event_.IsSignaled());
// Signal to advance the Run() loop.
base::HangWatcher::GetInstance()->SignalMonitorEventForTesting();
// Monitoring should just not happen when there are no registered threads.
// Wait a while to make sure it does not.
ASSERT_FALSE(monitor_event_.TimedWait(base::TimeDelta::FromSeconds(1)));
ASSERT_FALSE(hang_event_.IsSignaled());
}
TEST_F(HangWatcherTest, NestedScopes) {
// Create a state object for the test thread since this test is single
// threaded.
auto current_hang_watch_state =
base::internal::HangWatchState::CreateHangWatchStateForCurrentThread();
ASSERT_FALSE(current_hang_watch_state->IsOverDeadline());
base::TimeTicks original_deadline = current_hang_watch_state->GetDeadline();
constexpr base::TimeDelta kFirstTimeout(
base::TimeDelta::FromMilliseconds(500));
base::TimeTicks first_deadline = base::TimeTicks::Now() + kFirstTimeout;
constexpr base::TimeDelta kSecondTimeout(
base::TimeDelta::FromMilliseconds(250));
base::TimeTicks second_deadline = base::TimeTicks::Now() + kSecondTimeout;
// At this point we have not set any timeouts.
{
// Create a first timeout which is more restrictive than the default.
HangWatchScope first_scope(kFirstTimeout);
// We are on mock time. There is no time advancement and as such no hangs.
ASSERT_FALSE(current_hang_watch_state->IsOverDeadline());
ASSERT_EQ(current_hang_watch_state->GetDeadline(), first_deadline);
{
// Set a yet more restrictive deadline. Still no hang.
HangWatchScope second_scope(kSecondTimeout);
ASSERT_FALSE(current_hang_watch_state->IsOverDeadline());
ASSERT_EQ(current_hang_watch_state->GetDeadline(), second_deadline);
}
// First deadline we set should be restored.
ASSERT_FALSE(current_hang_watch_state->IsOverDeadline());
ASSERT_EQ(current_hang_watch_state->GetDeadline(), first_deadline);
}
// Original deadline should now be restored.
ASSERT_FALSE(current_hang_watch_state->IsOverDeadline());
ASSERT_EQ(current_hang_watch_state->GetDeadline(), original_deadline);
}
TEST_F(HangWatcherBlockingThreadTest, Hang) {
StartBlockedThread();
// Simulate hang.
task_environment_.FastForwardBy(kHangTime);
MonitorHangsAndJoinThread();
ASSERT_TRUE(hang_event_.IsSignaled());
}
TEST_F(HangWatcherBlockingThreadTest, NoHang) {
StartBlockedThread();
MonitorHangsAndJoinThread();
ASSERT_FALSE(hang_event_.IsSignaled());
}
namespace {
class HangWatcherSnapshotTest : public testing::Test {
public:
HangWatcherSnapshotTest() = default;
HangWatcherSnapshotTest(const HangWatcherSnapshotTest& other) = delete;
HangWatcherSnapshotTest& operator=(const HangWatcherSnapshotTest& other) =
delete;
protected:
void TriggerMonitorAndWaitForCompletion() {
monitor_event_.Reset();
hang_watcher_.SignalMonitorEventForTesting();
monitor_event_.Wait();
}
// Verify that a capture takes place and that at the time of the capture the
// list of hung thread ids is correct.
void TestIDList(const std::string& id_list) {
list_of_hung_thread_ids_during_capture_ = id_list;
TriggerMonitorAndWaitForCompletion();
ASSERT_EQ(++reference_capture_count_, hang_capture_count_);
}
// Verify that even if hang monitoring takes place no hangs are detected.
void ExpectNoCapture() {
int old_capture_count = hang_capture_count_;
TriggerMonitorAndWaitForCompletion();
ASSERT_EQ(old_capture_count, hang_capture_count_);
}
std::string ConcatenateThreadIds(
const std::vector<base::PlatformThreadId>& ids) const {
std::string result;
constexpr char kSeparator{'|'};
for (PlatformThreadId id : ids) {
result += base::NumberToString(id) + kSeparator;
}
return result;
}
// Will be signaled once monitoring took place. Marks the end of the test.
WaitableEvent monitor_event_;
const PlatformThreadId test_thread_id_ = PlatformThread::CurrentId();
// This is written to by the test main thread and read from the hang watching
// thread. It does not need to be protected because access to it is
// synchronized by always setting before triggering the execution of the
// reading code through HangWatcher::SignalMonitorEventForTesting().
std::string list_of_hung_thread_ids_during_capture_;
// This is written to by from the hang watching thread and read the test main
// thread. It does not need to be protected because access to it is
// synchronized by always reading after monitor_event_ has been signaled.
int hang_capture_count_ = 0;
// Increases at the same time as |hang_capture_count_| to test that capture
// actually took place.
int reference_capture_count_ = 0;
HangWatcher hang_watcher_;
};
} // namespace
// TODO(crbug.com/2193655): Test flaky on iPad.
TEST_F(HangWatcherSnapshotTest, DISABLED_HungThreadIDs) {
// During hang capture the list of hung threads should be populated.
hang_watcher_.SetOnHangClosureForTesting(base::BindLambdaForTesting([this]() {
EXPECT_EQ(hang_watcher_.GrabWatchStateSnapshotForTesting()
.PrepareHungThreadListCrashKey(),
list_of_hung_thread_ids_during_capture_);
++hang_capture_count_;
}));
// When hang capture is over the list should be empty.
hang_watcher_.SetAfterMonitorClosureForTesting(
base::BindLambdaForTesting([this]() {
EXPECT_EQ(hang_watcher_.GrabWatchStateSnapshotForTesting()
.PrepareHungThreadListCrashKey(),
"");
monitor_event_.Signal();
}));
// Register the main test thread for hang watching.
auto unregister_thread_closure_ = hang_watcher_.RegisterThread();
BlockingThread blocking_thread(&monitor_event_, base::TimeDelta{});
blocking_thread.StartAndWaitForScopeEntered();
{
// Start a hang watch scope that expires right away. Ensures that
// the first monitor will detect a hang. This scope will naturally have a
// later deadline than the one in |blocking_thread_| since it was created
// after.
HangWatchScope expires_instantly(base::TimeDelta{});
// Hung thread list should contain the id the blocking thread and then the
// id of the test main thread since that is the order of increasing
// deadline.
TestIDList(
ConcatenateThreadIds({blocking_thread.GetId(), test_thread_id_}));
// |expires_instantly| and the scope from |blocking_thread| are still live
// but already recorded so should be ignored.
ExpectNoCapture();
// Thread is joinable since we signaled |monitor_event_|. This closes the
// scope in |blocking_thread|.
blocking_thread.Join();
// |expires_instantly| is still live but already recorded so should be
// ignored.
ExpectNoCapture();
}
// All hang watch scopes are over. There should be no capture.
ExpectNoCapture();
// Once all recorded scopes are over creating a new one and monitoring will
// trigger a hang detection.
HangWatchScope expires_instantly(base::TimeDelta{});
TestIDList(ConcatenateThreadIds({test_thread_id_}));
}
namespace {
// Determines how long the HangWatcher will wait between calls to
// Monitor(). Choose a low value so that that successive invocations happens
// fast. This makes tests that wait for monitoring run fast and makes tests that
// expect no monitoring fail fast.
const base::TimeDelta kMonitoringPeriod = base::TimeDelta::FromMilliseconds(1);
// Test if and how often the HangWatcher periodically monitors for hangs.
class HangWatcherPeriodicMonitoringTest : public testing::Test {
public:
HangWatcherPeriodicMonitoringTest() {
hang_watcher_.SetMonitoringPeriodForTesting(kMonitoringPeriod);
hang_watcher_.SetOnHangClosureForTesting(base::BindRepeating(
&WaitableEvent::Signal, base::Unretained(&hang_event_)));
// HangWatcher uses a TickClock to detect how long it slept in between calls
// to Monitor(). Override that clock to control its subjective passage of
// time.
hang_watcher_.SetTickClockForTesting(&test_clock_);
}
HangWatcherPeriodicMonitoringTest(
const HangWatcherPeriodicMonitoringTest& other) = delete;
HangWatcherPeriodicMonitoringTest& operator=(
const HangWatcherPeriodicMonitoringTest& other) = delete;
protected:
// Setup the callback invoked after waiting in HangWatcher to advance the
// tick clock by the desired time delta.
void InstallAfterWaitCallback(base::TimeDelta time_delta) {
hang_watcher_.SetAfterWaitCallbackForTesting(base::BindLambdaForTesting(
[this, time_delta](base::TimeTicks time_before_wait) {
test_clock_.Advance(time_delta);
}));
}
base::SimpleTestTickClock test_clock_;
// Single threaded to avoid ThreadPool WorkerThreads registering. Will run
// delayed tasks created by the tests.
test::SingleThreadTaskEnvironment task_environment_;
std::unique_ptr<base::TickClock> fake_tick_clock_;
HangWatcher hang_watcher_;
// Signaled when a hang is detected.
WaitableEvent hang_event_;
base::ScopedClosureRunner unregister_thread_closure_;
};
} // namespace
// Don't register any threads for hang watching. HangWatcher should not monitor.
TEST_F(HangWatcherPeriodicMonitoringTest,
NoPeriodicMonitoringWithoutRegisteredThreads) {
RunLoop run_loop;
// If a call to HangWatcher::Monitor() takes place the test will instantly
// fail.
hang_watcher_.SetAfterMonitorClosureForTesting(
base::BindLambdaForTesting([&run_loop]() {
ADD_FAILURE() << "Monitoring took place!";
run_loop.Quit();
}));
// Make the HangWatcher tick clock advance by exactly the monitoring period
// after waiting so it will never detect oversleeping between attempts to call
// Monitor(). This would inhibit monitoring and make the test pass for the
// wrong reasons.
InstallAfterWaitCallback(kMonitoringPeriod);
// Unblock the test thread. No thread ever registered after the HangWatcher
// was created in the test's constructor. No monitoring should have taken
// place.
task_environment_.GetMainThreadTaskRunner()->PostDelayedTask(
FROM_HERE, run_loop.QuitClosure(), TestTimeouts::tiny_timeout());
run_loop.Run();
// NOTE:
// A lack of calls could technically also be caused by the HangWatcher thread
// executing too slowly / being descheduled. This is a known limitation.
// It's expected for |TestTimeouts::tiny_timeout()| to be large enough that
// this is rare.
}
// During normal execution periodic monitorings should take place.
TEST_F(HangWatcherPeriodicMonitoringTest, PeriodicCallsTakePlace) {
// HangWatcher::Monitor() will run once right away on thread registration.
// We want to make sure it runs at a couple more times from being scheduled.
constexpr int kMinimumMonitorCount = 3;
RunLoop run_loop;
// Setup the HangWatcher to unblock run_loop when the Monitor() has been
// invoked enough times.
hang_watcher_.SetAfterMonitorClosureForTesting(BarrierClosure(
kMinimumMonitorCount, base::BindLambdaForTesting([&run_loop, this]() {
// Test condition are confirmed, stop monitoring.
hang_watcher_.StopMonitoringForTesting();
// Unblock the test main thread.
run_loop.Quit();
})));
// Make the HangWatcher tick clock advance by exactly the monitoring period
// after waiting so it will never detect oversleeping between attempts to call
// Monitor(). This would inhibit monitoring.
InstallAfterWaitCallback(kMonitoringPeriod);
// Register a thread, kicks off monitoring.
unregister_thread_closure_ = hang_watcher_.RegisterThread();
run_loop.Run();
// No monitored scope means no possible hangs.
ASSERT_FALSE(hang_event_.IsSignaled());
}
// If the HangWatcher detects it slept for longer than expected it will not
// monitor.
TEST_F(HangWatcherPeriodicMonitoringTest, NoMonitorOnOverSleep) {
RunLoop run_loop;
// If a call to HangWatcher::Monitor() takes place the test will instantly
// fail.
hang_watcher_.SetAfterMonitorClosureForTesting(
base::BindLambdaForTesting([&run_loop]() {
ADD_FAILURE() << "Monitoring took place!";
run_loop.Quit();
}));
// Make the HangWatcher tick clock advance so much after waiting that it will
// detect oversleeping every time. This will keep it from monitoring.
InstallAfterWaitCallback(base::TimeDelta::FromMinutes(1));
// Register a thread, kicks off monitoring.
unregister_thread_closure_ = hang_watcher_.RegisterThread();
// Unblock the test thread. All waits were perceived as oversleeping so all
// monitoring was inhibited.
task_environment_.GetMainThreadTaskRunner()->PostDelayedTask(
FROM_HERE, run_loop.QuitClosure(), TestTimeouts::tiny_timeout());
run_loop.Run();
// NOTE: A lack of calls could technically also be caused by the HangWatcher
// thread executing too slowly / being descheduled. This is a known
// limitation. It's expected for |TestTimeouts::tiny_timeout()| to be large
// enough that this happens rarely.
}
namespace {
class HangWatchScopeBlockingTest : public testing::Test {
public:
HangWatchScopeBlockingTest() {
hang_watcher_.SetOnHangClosureForTesting(base::BindLambdaForTesting([&] {
capture_started_.Signal();
// Simulate capturing that takes a long time.
PlatformThread::Sleep(base::TimeDelta::FromMilliseconds(100));
completed_capture_ = true;
}));
hang_watcher_.SetAfterMonitorClosureForTesting(
base::BindLambdaForTesting([&]() {
// Simulate monitoring that takes a long time.
PlatformThread::Sleep(base::TimeDelta::FromMilliseconds(100));
completed_monitoring_.Signal();
}));
// Make sure no periodic monitoring takes place.
hang_watcher_.SetMonitoringPeriodForTesting(base::TimeDelta::Max());
// Register the test main thread for hang watching.
unregister_thread_closure_ = hang_watcher_.RegisterThread();
}
HangWatchScopeBlockingTest(const HangWatchScopeBlockingTest& other) = delete;
HangWatchScopeBlockingTest& operator=(
const HangWatchScopeBlockingTest& other) = delete;
void VerifyScopesDontBlock() {
// Start a hang watch scope that cannot possibly cause a hang to be
// detected.
{
HangWatchScope long_scope(base::TimeDelta::Max());
// Manually trigger a monitoring.
hang_watcher_.SignalMonitorEventForTesting();
// Execution has to continue freely here as no capture is in progress.
}
// Monitoring should not be over yet because the test code should execute
// faster when not blocked.
EXPECT_FALSE(completed_monitoring_.IsSignaled());
// Wait for the full monitoring process to be complete. This is to prove
// that monitoring truly executed and that we raced the signaling.
completed_monitoring_.Wait();
// No hang means no capture.
EXPECT_FALSE(completed_capture_);
}
protected:
base::WaitableEvent capture_started_;
base::WaitableEvent completed_monitoring_;
// No need for this to be atomic because in tests with no capture the variable
// is not even written to by the HangWatcher thread and in tests with a
// capture the accesses are serialized by the blocking in ~HangWatchScope().
bool completed_capture_ = false;
HangWatcher hang_watcher_;
base::ScopedClosureRunner unregister_thread_closure_;
};
} // namespace
// Tests that execution is unimpeded by ~HangWatchScope() when no capture ever
// takes place.
TEST_F(HangWatchScopeBlockingTest, ScopeDoesNotBlocksWithoutCapture) {
VerifyScopesDontBlock();
}
// Test that execution blocks in ~HangWatchScope() for a thread under watch
// during the capturing of a hang.
TEST_F(HangWatchScopeBlockingTest, ScopeBlocksDuringCapture) {
// Start a hang watch scope that expires in the past already. Ensures that the
// first monitor will detect a hang.
{
// Start a hang watch scope that expires immediately . Ensures that
// the first monitor will detect a hang.
BlockingThread blocking_thread(&capture_started_,
base::TimeDelta::FromMilliseconds(0));
blocking_thread.StartAndWaitForScopeEntered();
// Manually trigger a monitoring.
hang_watcher_.SignalMonitorEventForTesting();
// Ensure that the hang capturing started.
capture_started_.Wait();
// Execution will get stuck in this scope because execution does not escape
// ~HangWatchScope() if a hang capture is under way.
blocking_thread.Join();
}
// A hang was in progress so execution should have been blocked in
// BlockWhileCaptureInProgress() until capture finishes.
EXPECT_TRUE(completed_capture_);
completed_monitoring_.Wait();
// Reset expectations
completed_monitoring_.Reset();
capture_started_.Reset();
completed_capture_ = false;
// Verify that scopes don't block just because a capture happened in the past.
VerifyScopesDontBlock();
}
} // namespace base