blob: 46d0d1df84a7f61e5a2e093eb240e5ccbdd6452c [file] [log] [blame]
// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef GPU_IPC_SERVICE_GPU_WATCHDOG_THREAD_H_
#define GPU_IPC_SERVICE_GPU_WATCHDOG_THREAD_H_
#include "base/atomicops.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/raw_ptr_exclusion.h"
#include "base/memory/weak_ptr.h"
#include "base/power_monitor/power_observer.h"
#include "base/task/task_observer.h"
#include "base/threading/thread.h"
#include "base/time/time.h"
#include "build/build_config.h"
#include "build/chromecast_buildflags.h"
#include "gpu/ipc/common/gpu_watchdog_timeout.h"
#include "gpu/ipc/service/gpu_ipc_service_export.h"
#include "ui/gfx/native_widget_types.h"
#include "ui/gl/progress_reporter.h"
namespace gpu {
// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
enum class GpuWatchdogThreadEvent {
kGpuWatchdogStart,
kGpuWatchdogKill,
kGpuWatchdogEnd,
kMaxValue = kGpuWatchdogEnd,
};
// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
enum class GpuWatchdogTimeoutEvent {
// Recorded each time OnWatchdogTimeout() is called.
kTimeout = 0,
// Recorded when a GPU main thread is killed for a detected hang.
kKill = 1,
// Window only: Recorded when a hang is detected but we allow the GPU main
// thread to continue until it spent the full
// thread time doing the work.
kMoreThreadTime = 2,
// Windows only: The GPU makes progress after givenmore thread time. The GPU
// main thread is not killed.
kProgressAfterMoreThreadTime = 3,
// Deprecated. A gpu hang is detected but watchdog waits for 60 seconds before
// taking action.
// kTimeoutWait = 4,
// Deprecated. The GPU makes progress within 60 sec in OnWatchdogTimeout().
// The GPU main thread is not killed.
// kProgressAfterWait = 5,
// Just continue if it's not on the TTY of our host X11 server.
kContinueOnNonHostServerTty = 6,
// Windows only: After detecting GPU hang and continuing running through
// OnGpuWatchdogTimeout for the max cycles, the GPU main thread still cannot
// get the full thread time.
kLessThanFullThreadTimeAfterCapped = 7,
// Windows only: The GPU main thread went through the
// kLessThanFullThreadTimeAfterCapped stage before the process is killed.
kKillOnLessThreadTime = 8,
// OnWatchdogTimeout() is called long after the expected time. The GPU is not
// killed this time because of the slow system.
kSlowWatchdogThread = 9,
kNoKillForGpuProgressDuringCrashDumping = 10,
kMaxValue = kNoKillForGpuProgressDuringCrashDumping,
};
#if BUILDFLAG(IS_WIN)
// If the actual time the watched GPU thread spent doing actual work is less
// than the watchdog timeout, the GPU thread can continue running through
// OnGPUWatchdogTimeout for at most 4 times before the gpu thread is killed.
constexpr int kMaxCountOfMoreGpuThreadTimeAllowed = 3;
#endif
constexpr int kMaxExtraCyclesBeforeKill = 0;
// If the scheduled timeout function is delayed by more than
// kUnreasonableTimeoutDelay, we assume the system is in a unexpected state and
// the GPU watchdog will NOT terminate the GPU process if no progress is made in
// the GPU main thread or in the GPU display compositor thread. This is used in
// determining SlowWatchdogThread.
constexpr base::TimeDelta kUnreasonableTimeoutDelay = base::Seconds(5);
// A thread that intermitently sends tasks to a group of watched message loops
// and deliberately crashes if one of them does not respond after a timeout.
class GPU_IPC_SERVICE_EXPORT GpuWatchdogThread
: public base::Thread,
public base::PowerSuspendObserver,
public base::TaskObserver,
public gl::ProgressReporter {
public:
static std::unique_ptr<GpuWatchdogThread> Create(
bool start_backgrounded,
bool software_rendering,
const std::string& thread_name);
// Use the existing GpuWatchdogThread to create a second one. This is used
// for DrDC thread only.
static std::unique_ptr<GpuWatchdogThread> Create(
bool start_backgrounded,
const GpuWatchdogThread* existing_watchdog,
const std::string& thread_name);
static std::unique_ptr<GpuWatchdogThread> Create(
bool start_backgrounded,
base::TimeDelta timeout,
int restart_factor,
bool test_mode,
const std::string& thread_name);
GpuWatchdogThread(const GpuWatchdogThread&) = delete;
GpuWatchdogThread& operator=(const GpuWatchdogThread&) = delete;
~GpuWatchdogThread() override;
// Notifies the watchdog when Chrome is backgrounded / foregrounded. Should
// only be used if Chrome is completely backgrounded and not expected to
// render (all windows backgrounded and not producing frames).
void OnBackgrounded();
void OnForegrounded();
// The watchdog starts armed to catch startup hangs, and needs to be disarmed
// once init is complete, before executing tasks.
void OnInitComplete();
// Notifies the watchdog when the GPU child process is being destroyed.
// This function is called directly from
// viz::GpuServiceImpl::~GpuServiceImpl()
void OnGpuProcessTearDown();
// Pause the GPU watchdog to stop the timeout task. If the current heavy task
// is not running on the GPU driver, the watchdog can be paused to avoid
// unneeded crash.
void PauseWatchdog();
// Continue the watchdog after a pause.
void ResumeWatchdog();
// In this mode, when the GPU detects a hang, it will record the crash state
// and report it without terminating the GPU process, and things will move on
// as if the Watchdog thread did not interfere.
void EnableReportOnlyMode();
// Disable report only mode.
void DisableReportOnlyMode();
// For gpu testing only. Return status for the watchdog tests
bool IsGpuHangDetectedForTesting();
// For gpu testing only. Return status for the watchdog tests
bool IsGpuHangDetectedWithoutKillForTesting();
// Implements base::Thread.
void Init() override;
void CleanUp() override;
// Implements gl::ProgressReporter.
void ReportProgress() override;
// Implements TaskObserver.
void WillProcessTask(const base::PendingTask& pending_task,
bool was_blocked_or_low_priority) override;
void DidProcessTask(const base::PendingTask& pending_task) override;
// Implements base::PowerSuspendObserver.
void OnSuspend() override;
void OnResume() override;
protected:
GpuWatchdogThread();
private:
enum PauseResumeSource {
kAndroidBackgroundForeground = 0,
kPowerSuspendResume = 1,
kGeneralGpuFlow = 2,
};
GpuWatchdogThread(base::TimeDelta timeout,
int restart_factor,
bool test_mode,
const std::string& thread_name);
void AddPowerObserver();
void RestartWatchdogTimeoutTask(PauseResumeSource source_of_request);
void StopWatchdogTimeoutTask(PauseResumeSource source_of_request);
void SetReportOnlyModeTask(bool enabled);
void UpdateInitializationFlag();
void Arm();
void Disarm();
void InProgress();
bool IsArmed();
base::subtle::Atomic32 ReadArmDisarmCounter();
void OnWatchdogTimeout();
bool SlowWatchdogThread();
bool WatchedThreadNeedsMoreThreadTime(bool no_gpu_hang_detected);
#if BUILDFLAG(IS_WIN)
base::ThreadTicks GetWatchedThreadTime();
#endif
// Do not change the function name. It is used for [GPU HANG] crash reports.
void DeliberatelyTerminateToRecoverFromHang();
void ContinueWithNextWatchdogTimeoutTask();
// Records "GPU.WatchdogThread.Event".
void GpuWatchdogThreadEventHistogram(GpuWatchdogThreadEvent thread_event);
// Histogram recorded in OnWatchdogTimeout()
// Records "GPU.WatchdogThread.Timeout"
void GpuWatchdogTimeoutHistogram(GpuWatchdogTimeoutEvent timeout_event);
#if BUILDFLAG(IS_WIN)
// Histograms recorded for WatchedThreadNeedsMoreThreadTime() function.
void WatchedThreadNeedsMoreThreadTimeHistogram(
bool no_gpu_hang_detected,
bool start_of_more_thread_time);
#endif
// Used for metrics. It's 1 minute after the event.
bool WithinOneMinFromPowerResumed();
bool WithinOneMinFromForegrounded();
#if BUILDFLAG(IS_LINUX) && !BUILDFLAG(IS_CASTOS)
void UpdateActiveTTY();
#endif
// The watchdog continues when it's not on the TTY of our host X11 server.
bool ContinueOnNonHostX11ServerTty();
// This counter is only written on the gpu thread, and read on both threads.
volatile base::subtle::Atomic32 arm_disarm_counter_ = 0;
// The counter number read in the last OnWatchdogTimeout() on the watchdog
// thread.
int32_t last_arm_disarm_counter_ = 0;
// Timeout on the watchdog thread to check if gpu hangs.
base::TimeDelta watchdog_timeout_;
// The one-time watchdog timeout multiplier after the watchdog pauses and
// restarts.
const int watchdog_restart_factor_;
// The time the gpu watchdog was created.
base::TimeTicks watchdog_start_timeticks_;
// The time the last OnSuspend and OnResume was called.
base::TimeTicks power_suspend_timeticks_;
base::TimeTicks power_resume_timeticks_;
// The time the last OnBackgrounded and OnForegrounded was called.
base::TimeTicks backgrounded_timeticks_;
base::TimeTicks foregrounded_timeticks_;
// The time PauseWatchdog and ResumeWatchdog was called.
base::TimeTicks watchdog_pause_timeticks_;
base::TimeTicks watchdog_resume_timeticks_;
// TimeTicks: Tracking the amount of time a task runs. Executing delayed
// tasks at the right time.
// ThreadTicks: Use this timer to (approximately) measure how much time the
// calling thread spent doing actual work vs. being de-scheduled.
// The time the last OnWatchdogTimeout() was called.
base::TimeTicks last_on_watchdog_timeout_timeticks_;
// The wall-clock time the next OnWatchdogTimeout() will be called.
base::Time next_on_watchdog_timeout_time_;
#if BUILDFLAG(IS_WIN)
base::ThreadTicks last_on_watchdog_timeout_thread_ticks_;
// The difference between the timeout and the actual time the watched thread
// spent doing actual work.
base::TimeDelta remaining_watched_thread_ticks_;
// The Windows thread hanndle of the watched GPU main thread.
// This field is not a raw_ptr<> because it was filtered by the rewriter for:
// #addr-of
RAW_PTR_EXCLUSION void* watched_thread_handle_ = nullptr;
// After GPU hang detected, how many times has the GPU thread been allowed to
// continue due to not enough thread time.
int count_of_more_gpu_thread_time_allowed_ = 0;
// After detecting GPU hang and continuing running through
// OnGpuWatchdogTimeout for the max cycles, the GPU main thread still cannot
// get the full thread time.
bool less_than_full_thread_time_after_capped_ = false;
#endif
#if BUILDFLAG(IS_LINUX) && !BUILDFLAG(IS_CASTOS)
struct Deleter {
inline void operator()(FILE* f) {
if (f)
fclose(f);
}
};
std::unique_ptr<FILE, Deleter> tty_file_;
int host_tty_ = -1;
int active_tty_ = -1;
int last_active_tty_ = -1;
#endif
// The system has entered the power suspension mode.
bool in_power_suspension_ = false;
// The GPU process has started tearing down. Accessed only in the gpu process.
bool in_gpu_process_teardown_ = false;
// Chrome is running on the background on Android. Gpu is probably very slow
// or stalled.
bool is_backgrounded_ = false;
// The GPU watchdog is paused. The timeout task is temporarily stopped.
bool is_paused_ = false;
// The lock between the GpuMainThread and GpuWatchdogThread for stopping
// GpuWatchdog.
base::Lock skip_lock_;
bool skip_for_pause_ GUARDED_BY(skip_lock_) = false;
bool skip_for_backgrounded_ GUARDED_BY(skip_lock_) = false;
// The GPU watchdog is in report only mode. The watchdog will behave as though
// the thread which it found to be hung has made progress during crash
// reporting.
bool in_report_only_mode_ = false;
// whether GpuWatchdogThreadEvent::kGpuWatchdogStart has been recorded.
bool is_watchdog_start_histogram_recorded_ = false;
// Read/Write by the watchdog thread only after initialized in the
// constructor.
bool in_gpu_initialization_ = false;
// For the experiment and the debugging purpose
size_t num_of_timeout_after_power_resume_ = 0;
size_t num_of_timeout_after_foregrounded_ = 0;
bool foregrounded_event_ = false;
bool power_resumed_event_ = false;
// The watched thread name string used for UMA and crash key.
std::string watched_thread_name_str_uma_;
// The thread id string of the watched thread.
std::string watched_thread_id_str_;
// For gpu testing only.
const bool is_test_mode_;
// Set by the watchdog thread and Read by the test thread.
base::AtomicFlag test_result_timeout_and_gpu_hang_;
// Set by the watchdog thread and Read by the test thread.
base::AtomicFlag test_result_timeout_and_gpu_hang_without_kill_;
SEQUENCE_CHECKER(watched_thread_sequence_checker_);
base::WeakPtr<GpuWatchdogThread> weak_ptr_;
base::WeakPtrFactory<GpuWatchdogThread> weak_factory_{this};
};
} // namespace gpu
#endif // GPU_IPC_SERVICE_GPU_WATCHDOG_THREAD_H_