| // Copyright 2016 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "chrome/browser/offline_pages/background_loader_offliner.h" |
| |
| #include <memory> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "base/functional/bind.h" |
| #include "base/json/json_writer.h" |
| #include "base/metrics/histogram_functions.h" |
| #include "base/metrics/histogram_macros.h" |
| #include "base/system/sys_info.h" |
| #include "base/task/single_thread_task_runner.h" |
| #include "base/time/time.h" |
| #include "chrome/browser/content_settings/page_specific_content_settings_delegate.h" |
| #include "chrome/browser/offline_pages/offline_page_mhtml_archiver.h" |
| #include "chrome/browser/offline_pages/offliner_helper.h" |
| #include "chrome/browser/offline_pages/offliner_user_data.h" |
| #include "chrome/browser/profiles/profile.h" |
| #include "chrome/browser/renderer_preferences_util.h" |
| #include "chrome/browser/ssl/security_state_tab_helper.h" |
| #include "chrome/common/chrome_isolated_world_ids.h" |
| #include "components/content_settings/browser/page_specific_content_settings.h" |
| #include "components/offline_pages/core/background/offliner_policy.h" |
| #include "components/offline_pages/core/background/save_page_request.h" |
| #include "components/offline_pages/core/client_namespace_constants.h" |
| #include "components/offline_pages/core/offline_page_client_policy.h" |
| #include "components/offline_pages/core/offline_page_feature.h" |
| #include "components/offline_pages/core/offline_page_model.h" |
| #include "components/security_state/core/security_state.h" |
| #include "content/public/browser/browser_context.h" |
| #include "content/public/browser/mhtml_extra_parts.h" |
| #include "content/public/browser/navigation_entry.h" |
| #include "content/public/browser/navigation_handle.h" |
| #include "content/public/browser/render_frame_host.h" |
| #include "content/public/browser/web_contents.h" |
| #include "content/public/browser/web_contents_user_data.h" |
| #include "net/cert/cert_status_flags.h" |
| #include "net/http/http_response_headers.h" |
| |
| namespace offline_pages { |
| |
| namespace { |
| |
| void HandleLoadTerminationCancel( |
| Offliner::CompletionCallback completion_callback, |
| const SavePageRequest& canceled_request) { |
| std::move(completion_callback) |
| .Run(canceled_request, Offliner::RequestStatus::FOREGROUND_CANCELED); |
| } |
| |
| } // namespace |
| |
| BackgroundLoaderOffliner::BackgroundLoaderOffliner( |
| content::BrowserContext* browser_context, |
| const OfflinerPolicy* policy, |
| OfflinePageModel* offline_page_model, |
| std::unique_ptr<LoadTerminationListener> load_termination_listener) |
| : browser_context_(browser_context), |
| offline_page_model_(offline_page_model), |
| policy_(policy), |
| load_termination_listener_(std::move(load_termination_listener)), |
| save_state_(NONE), |
| page_load_state_(SUCCESS), |
| network_bytes_(0LL), |
| is_low_bar_met_(false), |
| did_snapshot_on_last_retry_(false) { |
| DCHECK(offline_page_model_); |
| DCHECK(browser_context_); |
| // When the offliner is created for test harness runs, the |
| // |load_termination_listener_| will be set to nullptr, in order to prevent |
| // crashing, adding a check here. |
| if (load_termination_listener_) |
| load_termination_listener_->set_offliner(this); |
| |
| for (int i = 0; i < ResourceDataType::RESOURCE_DATA_TYPE_COUNT; ++i) { |
| stats_[i].requested = 0; |
| stats_[i].completed = 0; |
| } |
| } |
| |
| BackgroundLoaderOffliner::~BackgroundLoaderOffliner() {} |
| |
| // static |
| BackgroundLoaderOffliner* BackgroundLoaderOffliner::FromWebContents( |
| content::WebContents* contents) { |
| Offliner* offliner = OfflinerUserData::OfflinerFromWebContents(contents); |
| // Today we only have one kind of offliner that uses OfflinerUserData. If we |
| // add other types, revisit this cast. |
| if (offliner) |
| return static_cast<BackgroundLoaderOffliner*>(offliner); |
| return nullptr; |
| } |
| |
| bool BackgroundLoaderOffliner::LoadAndSave( |
| const SavePageRequest& request, |
| CompletionCallback completion_callback, |
| const ProgressCallback& progress_callback) { |
| DCHECK(completion_callback); |
| DCHECK(progress_callback); |
| DCHECK(offline_page_model_); |
| |
| if (pending_request_) { |
| DVLOG(1) << "Already have pending request"; |
| return false; |
| } |
| |
| if (GetPolicy(request.client_id().name_space) |
| .requires_specific_user_settings && |
| (AreThirdPartyCookiesBlocked(browser_context_) || |
| IsNetworkPredictionDisabled(browser_context_))) { |
| DVLOG(1) << "WARNING: Unable to load when 3rd party cookies blocked or " |
| << "prediction disabled"; |
| return false; |
| } |
| |
| if (!OfflinePageModel::CanSaveURL(request.url())) { |
| DVLOG(1) << "Not able to save page for requested url: " << request.url(); |
| return false; |
| } |
| |
| ResetLoader(); |
| AttachObservers(); |
| |
| MarkLoadStartTime(); |
| |
| // Track copy of pending request. |
| pending_request_ = std::make_unique<SavePageRequest>(request); |
| completion_callback_ = std::move(completion_callback); |
| progress_callback_ = progress_callback; |
| |
| // Load page attempt. |
| loader_.get()->LoadPage(request.url()); |
| |
| snapshot_controller_ = std::make_unique<BackgroundSnapshotController>( |
| base::SingleThreadTaskRunner::GetCurrentDefault(), this, false); |
| |
| return true; |
| } |
| |
| bool BackgroundLoaderOffliner::Cancel(CancelCallback callback) { |
| DCHECK(pending_request_); |
| // We ignore the case where pending_request_ is not set, but given the checks |
| // in RequestCoordinator this should not happen. |
| if (!pending_request_) |
| return false; |
| |
| // TODO(chili): We are not able to cancel a pending |
| // OfflinePageModel::SaveSnapshot() operation. We will notify caller that |
| // cancel completed once the SavePage operation returns. |
| if (save_state_ != NONE) { |
| save_state_ = DELETE_AFTER_SAVE; |
| cancel_callback_ = std::move(callback); |
| return true; |
| } |
| |
| // Post the cancel callback right after this call concludes. |
| base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask( |
| FROM_HERE, base::BindOnce(std::move(callback), *pending_request_.get())); |
| ResetState(); |
| return true; |
| } |
| |
| void BackgroundLoaderOffliner::TerminateLoadIfInProgress() { |
| if (!pending_request_) |
| return; |
| |
| Cancel(base::BindOnce(HandleLoadTerminationCancel, |
| std::move(completion_callback_))); |
| } |
| |
| bool BackgroundLoaderOffliner::HandleTimeout(int64_t request_id) { |
| if (pending_request_) { |
| DCHECK(request_id == pending_request_->request_id()); |
| if (is_low_bar_met_ && (pending_request_->started_attempt_count() + 1 >= |
| policy_->GetMaxStartedTries() || |
| pending_request_->completed_attempt_count() + 1 >= |
| policy_->GetMaxCompletedTries())) { |
| // If we are already in the middle of a save operation, let it finish |
| // but do not return SAVED_ON_LAST_RETRY |
| if (save_state_ == NONE) { |
| did_snapshot_on_last_retry_ = true; |
| StartSnapshot(); |
| } |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| void BackgroundLoaderOffliner::CanDownload( |
| base::OnceCallback<void(bool)> callback) { |
| if (!pending_request_.get()) { |
| std::move(callback).Run(false); // Shouldn't happen though... |
| return; |
| } |
| |
| bool should_allow_downloads = false; |
| Offliner::RequestStatus final_status = |
| Offliner::RequestStatus::LOADING_FAILED_DOWNLOAD; |
| // Check whether we should allow file downloads for this save page request. |
| // If we want to proceed with the file download, fail with |
| // DOWNLOAD_THROTTLED. If we don't want to proceed with the file download, |
| // fail with LOADING_FAILED_DOWNLOAD. |
| if (GetPolicy(pending_request_.get()->client_id().name_space) |
| .allows_conversion_to_background_file_download) { |
| should_allow_downloads = true; |
| final_status = Offliner::RequestStatus::DOWNLOAD_THROTTLED; |
| } |
| |
| std::move(callback).Run(should_allow_downloads); |
| SavePageRequest request(*pending_request_.get()); |
| std::move(completion_callback_).Run(request, final_status); |
| base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask( |
| FROM_HERE, base::BindOnce(&BackgroundLoaderOffliner::ResetState, |
| weak_ptr_factory_.GetWeakPtr())); |
| } |
| |
| void BackgroundLoaderOffliner::MarkLoadStartTime() { |
| load_start_time_ = base::TimeTicks::Now(); |
| } |
| |
| void BackgroundLoaderOffliner::PrimaryMainDocumentElementAvailable() { |
| is_low_bar_met_ = true; |
| |
| // Add this signal to signal_data_. |
| AddLoadingSignal("PrimaryMainDocumentElementAvailable"); |
| } |
| |
| void BackgroundLoaderOffliner::DocumentOnLoadCompletedInPrimaryMainFrame() { |
| if (!pending_request_.get()) { |
| DVLOG(1) << "DidStopLoading called even though no pending request."; |
| return; |
| } |
| |
| // Add this signal to signal_data_. |
| AddLoadingSignal("DocumentOnLoadCompletedInPrimaryMainFrame"); |
| |
| snapshot_controller_->DocumentOnLoadCompletedInPrimaryMainFrame(); |
| } |
| |
| void BackgroundLoaderOffliner::PrimaryMainFrameRenderProcessGone( |
| base::TerminationStatus status) { |
| if (pending_request_) { |
| SavePageRequest request(*pending_request_.get()); |
| switch (status) { |
| case base::TERMINATION_STATUS_OOM: |
| case base::TERMINATION_STATUS_PROCESS_CRASHED: |
| case base::TERMINATION_STATUS_STILL_RUNNING: |
| std::move(completion_callback_) |
| .Run(request, Offliner::RequestStatus::LOADING_FAILED_NO_NEXT); |
| break; |
| case base::TERMINATION_STATUS_PROCESS_WAS_KILLED: |
| default: |
| std::move(completion_callback_) |
| .Run(request, Offliner::RequestStatus::LOADING_FAILED); |
| } |
| ResetState(); |
| } |
| } |
| |
| void BackgroundLoaderOffliner::WebContentsDestroyed() { |
| if (pending_request_) { |
| SavePageRequest request(*pending_request_.get()); |
| std::move(completion_callback_) |
| .Run(request, Offliner::RequestStatus::LOADING_FAILED); |
| ResetState(); |
| } |
| } |
| |
| void BackgroundLoaderOffliner::DidFinishNavigation( |
| content::NavigationHandle* navigation_handle) { |
| if (!navigation_handle->IsInPrimaryMainFrame()) |
| return; |
| // If there was an error of any kind (certificate, client, DNS, etc), |
| // Mark as error page. Resetting here causes RecordNavigationMetrics to crash. |
| if (navigation_handle->IsErrorPage()) { |
| page_load_state_ = RETRIABLE_NET_ERROR; |
| } else { |
| int status_code = 200; // Default to OK. |
| // No response header can imply intermediate navigation state. |
| if (navigation_handle->GetResponseHeaders()) |
| status_code = navigation_handle->GetResponseHeaders()->response_code(); |
| // 2XX and 3XX are ok because they indicate success or redirection. |
| // We track 301 because it's MOVED_PERMANENTLY and usually accompanies an |
| // error page with new address. |
| // 400+ codes are client and server errors. |
| // We skip 418 because it's a teapot. |
| if (status_code == 301 || (status_code >= 400 && status_code != 418)) { |
| page_load_state_ = RETRIABLE_HTTP_ERROR; |
| } |
| } |
| } |
| |
| void BackgroundLoaderOffliner::SetBackgroundSnapshotControllerForTest( |
| std::unique_ptr<BackgroundSnapshotController> controller) { |
| snapshot_controller_ = std::move(controller); |
| } |
| |
| void BackgroundLoaderOffliner::ObserveResourceLoading( |
| ResourceLoadingObserver::ResourceDataType type, |
| bool started) { |
| // Add the signal to extra data, and use for tracking. |
| |
| RequestStats& found_stats = stats_[type]; |
| if (started) |
| ++found_stats.requested; |
| else |
| ++found_stats.completed; |
| } |
| |
| void BackgroundLoaderOffliner::OnNetworkBytesChanged(int64_t bytes) { |
| if (pending_request_ && save_state_ != SAVING) { |
| network_bytes_ += bytes; |
| progress_callback_.Run(*pending_request_, network_bytes_); |
| } |
| } |
| |
| void BackgroundLoaderOffliner::StartSnapshot() { |
| if (!pending_request_.get()) { |
| DVLOG(1) << "Pending request was cleared during delay."; |
| return; |
| } |
| DCHECK(is_low_bar_met_) |
| << "Minimum quality must have been reached before saving a snapshot"; |
| |
| // Add this signal to signal_data_. |
| AddLoadingSignal("Snapshotting"); |
| |
| SavePageRequest request(*pending_request_.get()); |
| // If there was an error navigating to page, return loading failed. |
| if (page_load_state_ != SUCCESS) { |
| Offliner::RequestStatus status; |
| switch (page_load_state_) { |
| case RETRIABLE_NET_ERROR: |
| status = Offliner::RequestStatus::LOADING_FAILED_NET_ERROR; |
| break; |
| case RETRIABLE_HTTP_ERROR: |
| status = Offliner::RequestStatus::LOADING_FAILED_HTTP_ERROR; |
| break; |
| case NONRETRIABLE: |
| status = Offliner::RequestStatus::LOADING_FAILED_NO_RETRY; |
| break; |
| default: |
| // We should've already checked for Success before entering here. |
| NOTREACHED(); |
| status = Offliner::RequestStatus::LOADING_FAILED; |
| } |
| |
| std::move(completion_callback_).Run(request, status); |
| ResetState(); |
| return; |
| } |
| |
| content::WebContents* web_contents( |
| content::WebContentsObserver::web_contents()); |
| |
| Offliner::RequestStatus loaded_page_error = |
| CanSavePageInBackground(web_contents); |
| if (loaded_page_error != Offliner::RequestStatus::UNKNOWN) { |
| std::move(completion_callback_).Run(request, loaded_page_error); |
| ResetState(); |
| return; |
| } |
| |
| save_state_ = SAVING; |
| |
| std::unique_ptr<OfflinePageArchiver> archiver(new OfflinePageMHTMLArchiver()); |
| |
| OfflinePageModel::SavePageParams params; |
| params.url = web_contents->GetLastCommittedURL(); |
| params.client_id = request.client_id(); |
| params.proposed_offline_id = request.request_id(); |
| params.is_background = true; |
| params.request_origin = request.request_origin(); |
| |
| // Pass in the original URL if it's different from last committed |
| // when redirects occur. |
| if (!request.original_url().is_empty()) |
| params.original_url = request.original_url(); |
| else if (params.url != request.url()) |
| params.original_url = request.url(); |
| |
| offline_page_model_->SavePage( |
| params, std::move(archiver), web_contents, |
| base::BindOnce(&BackgroundLoaderOffliner::OnPageSaved, |
| weak_ptr_factory_.GetWeakPtr())); |
| } |
| |
| void BackgroundLoaderOffliner::OnPageSaved(SavePageResult save_result, |
| int64_t offline_id) { |
| if (!pending_request_) |
| return; |
| |
| SavePageRequest request(*pending_request_.get()); |
| bool did_snapshot_on_last_retry = did_snapshot_on_last_retry_; |
| ResetState(); |
| |
| if (save_state_ == DELETE_AFTER_SAVE) { |
| // Delete the saved page off disk and from the OPM. |
| PageCriteria criteria; |
| criteria.offline_ids = std::vector<int64_t>{offline_id}; |
| offline_page_model_->DeletePagesWithCriteria( |
| criteria, |
| base::BindOnce(&BackgroundLoaderOffliner::DeleteOfflinePageCallback, |
| weak_ptr_factory_.GetWeakPtr(), request)); |
| save_state_ = NONE; |
| return; |
| } |
| |
| save_state_ = NONE; |
| |
| Offliner::RequestStatus save_status; |
| if (save_result == SavePageResult::ALREADY_EXISTS) { |
| save_status = RequestStatus::SAVED; |
| } else if (save_result == SavePageResult::SUCCESS) { |
| if (did_snapshot_on_last_retry) |
| save_status = RequestStatus::SAVED_ON_LAST_RETRY; |
| else |
| save_status = RequestStatus::SAVED; |
| } else { |
| save_status = RequestStatus::SAVE_FAILED; |
| } |
| |
| std::move(completion_callback_).Run(request, save_status); |
| } |
| |
| void BackgroundLoaderOffliner::DeleteOfflinePageCallback( |
| const SavePageRequest& request, |
| DeletePageResult result) { |
| std::move(cancel_callback_).Run(request); |
| } |
| |
| void BackgroundLoaderOffliner::ResetState() { |
| pending_request_.reset(); |
| // Stop snapshot controller from triggering any more events. |
| snapshot_controller_->Stop(); |
| // Delete the snapshot controller after stack unwinds, so we don't |
| // corrupt stack in some edge cases. Deleting it soon should be safe because |
| // we check against pending_request_ with every action, and snapshot |
| // controller is configured to only call StartSnapshot once for BGL. |
| base::SingleThreadTaskRunner::GetCurrentDefault()->DeleteSoon( |
| FROM_HERE, snapshot_controller_.release()); |
| page_load_state_ = SUCCESS; |
| network_bytes_ = 0LL; |
| is_low_bar_met_ = false; |
| did_snapshot_on_last_retry_ = false; |
| content::WebContentsObserver::Observe(nullptr); |
| loader_.reset(); |
| |
| for (int i = 0; i < ResourceDataType::RESOURCE_DATA_TYPE_COUNT; ++i) { |
| stats_[i].requested = 0; |
| stats_[i].completed = 0; |
| } |
| } |
| |
| void BackgroundLoaderOffliner::ResetLoader() { |
| loader_ = std::make_unique<background_loader::BackgroundLoaderContents>( |
| browser_context_); |
| loader_->SetDelegate(this); |
| |
| // Initialize web contents settings. |
| renderer_preferences_util::UpdateFromSystemSettings( |
| loader_->web_contents()->GetMutableRendererPrefs(), |
| Profile::FromBrowserContext(browser_context_)); |
| content_settings::PageSpecificContentSettings::CreateForWebContents( |
| loader_->web_contents(), |
| std::make_unique<chrome::PageSpecificContentSettingsDelegate>( |
| loader_->web_contents())); |
| } |
| |
| void BackgroundLoaderOffliner::AttachObservers() { |
| content::WebContents* contents = loader_->web_contents(); |
| content::WebContentsObserver::Observe(contents); |
| OfflinerUserData::CreateForWebContents(contents, this); |
| } |
| |
| void BackgroundLoaderOffliner::AddLoadingSignal(const char* signal_name) { |
| base::TimeTicks current_time = base::TimeTicks::Now(); |
| base::TimeDelta delay_so_far = current_time - load_start_time_; |
| // We would prefer to use int64_t here, but JSON does not support that type. |
| // Given the choice between int and double, we choose to implicitly convert to |
| // a double since it maintains more precision (we can get a longer time in |
| // milliseconds than we can with a 2 bit int, 53 bits vs 32). |
| signal_data_.Set(signal_name, delay_so_far.InMillisecondsF()); |
| } |
| |
| void BackgroundLoaderOffliner::RenovationsCompleted() { |
| snapshot_controller_->RenovationsCompleted(); |
| } |
| |
| Offliner::RequestStatus BackgroundLoaderOffliner::CanSavePageInBackground( |
| content::WebContents* web_contents) { |
| DCHECK(is_low_bar_met_) |
| << "Minimum quality must have been reached before checking loaded page"; |
| std::unique_ptr<security_state::VisibleSecurityState> visible_security_state = |
| GetVisibleSecurityState(web_contents); |
| // Checks for HTTPS certificate errors (HTTP connections are not affected). |
| if (security_state::HasMajorCertificateError(*visible_security_state)) |
| return Offliner::RequestStatus::LOADED_PAGE_HAS_CERTIFICATE_ERROR; |
| |
| // Checks if the page is blocked by SafeBrowsing. |
| if (visible_security_state->malicious_content_status != |
| security_state::MaliciousContentStatus::MALICIOUS_CONTENT_STATUS_NONE) { |
| return Offliner::RequestStatus::LOADED_PAGE_IS_BLOCKED; |
| } |
| |
| // Don't save Chrome error or interstitial pages. |
| if (GetPageType(web_contents) != content::PageType::PAGE_TYPE_NORMAL) |
| return Offliner::RequestStatus::LOADED_PAGE_IS_CHROME_INTERNAL; |
| |
| return Offliner::RequestStatus::UNKNOWN; |
| } |
| |
| std::unique_ptr<security_state::VisibleSecurityState> |
| BackgroundLoaderOffliner::GetVisibleSecurityState( |
| content::WebContents* web_contents) { |
| // Note: this tab helper needs to be created here as in the background it is |
| // not created by default. |
| SecurityStateTabHelper::CreateForWebContents(web_contents); |
| SecurityStateTabHelper* helper = |
| SecurityStateTabHelper::FromWebContents(web_contents); |
| DCHECK(helper); |
| return helper->GetVisibleSecurityState(); |
| } |
| |
| content::PageType BackgroundLoaderOffliner::GetPageType( |
| content::WebContents* web_contents) { |
| DCHECK(web_contents->GetController().GetVisibleEntry()) |
| << "An entry must have committed at this WebContents"; |
| return web_contents->GetController().GetVisibleEntry()->GetPageType(); |
| } |
| |
| } // namespace offline_pages |