blob: a294dc8887a9955e27770840a199fb57c0a45e3f [file] [log] [blame]
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import time
from profile_creators import profile_extender
from telemetry.core import exceptions
from telemetry.core import util
class FastNavigationProfileExtender(profile_extender.ProfileExtender):
"""Extends a Chrome profile.
This class creates or extends an existing profile by performing a set of tab
navigations in large batches. This is accomplished by opening a large number
of tabs, simultaneously navigating all the tabs, and then waiting for all the
tabs to load. This provides two benefits:
- Takes advantage of the high number of logical cores on modern CPUs.
- The total time spent waiting for navigations to time out scales linearly
with the number of batches, but does not scale with the size of the
batch.
"""
def __init__(self, finder_options, maximum_batch_size):
"""Initializer.
Args:
maximum_batch_size: A positive integer indicating the number of tabs to
simultaneously perform navigations.
"""
super(FastNavigationProfileExtender, self).__init__(finder_options)
# The instance keeps a list of Tabs that can be navigated successfully.
# This means that the Tab is not crashed, and is processing JavaScript in a
# timely fashion.
self._navigation_tabs = []
# The number of tabs to use.
self._NUM_TABS = maximum_batch_size
# The amount of additional time to wait for a batch of pages to finish
# loading for each page in the batch.
self._BATCH_TIMEOUT_PER_PAGE_IN_SECONDS = 20
# The amount of time to wait for a page to quiesce. Some pages will never
# quiesce.
self._TIME_TO_WAIT_FOR_PAGE_TO_QUIESCE_IN_SECONDS = 10
def Run(self):
"""Superclass override."""
try:
self.SetUpBrowser()
self._PerformNavigations()
finally:
self.TearDownBrowser()
# When there hasn't been an exception, verify that the profile was
# correctly extended.
# TODO(erikchen): I've intentionally omitted my implementation of
# VerifyProfileWasExtended() in small_profile_extender, since the profile
# is not being correctly extended. http://crbug.com/484833
# http://crbug.com/484880
self.VerifyProfileWasExtended()
def VerifyProfileWasExtended(self):
"""Verifies that the profile was correctly extended.
Can be overridden by subclasses.
"""
pass
def GetUrlIterator(self):
"""Gets URLs for the browser to navigate to.
Intended for subclass override.
Returns:
An iterator whose elements are urls to be navigated to.
"""
raise NotImplementedError()
def ShouldExitAfterBatchNavigation(self):
"""Returns a boolean indicating whether profile extension is finished.
Intended for subclass override.
"""
raise NotImplementedError()
def CleanUpAfterBatchNavigation(self):
"""A hook for subclasses to perform cleanup after each batch of
navigations.
Can be overridden by subclasses.
"""
pass
def _RefreshNavigationTabs(self):
"""Updates the member self._navigation_tabs to contain self._NUM_TABS
elements, each of which is not crashed. The crashed tabs are intentionally
leaked, since Telemetry doesn't have a good way of killing crashed tabs.
It is also possible for a tab to be stalled in an infinite JavaScript loop.
These tabs will be in self.browser.tabs, but not in self._navigation_tabs.
There is no way to kill these tabs, so they are also leaked. This method is
careful to only use tabs in self._navigation_tabs, or newly created tabs.
"""
live_tabs = [tab for tab in self._navigation_tabs if tab.IsAlive()]
self._navigation_tabs = live_tabs
while len(self._navigation_tabs) < self._NUM_TABS:
self._navigation_tabs.append(self._browser.tabs.New())
def _RemoveNavigationTab(self, tab):
"""Removes a tab which is no longer in a useable state from
self._navigation_tabs. The tab is not removed from self.browser.tabs,
since there is no guarantee that the tab can be safely removed."""
self._navigation_tabs.remove(tab)
def _RetrieveTabUrl(self, tab, timeout):
"""Retrives the URL of the tab."""
# TODO(erikchen): Use tab.url instead, which talks to the browser process
# instead of the renderer process. http://crbug.com/486119
return tab.EvaluateJavaScript('document.URL', timeout)
def _WaitForUrlToChange(self, tab, initial_url, end_time):
"""Waits for the tab to navigate away from its initial url.
If time.time() is larger than end_time, the function does nothing.
Otherwise, the function tries to return no later than end_time.
"""
while True:
seconds_to_wait = end_time - time.time()
if seconds_to_wait <= 0:
break
current_url = self._RetrieveTabUrl(tab, seconds_to_wait)
if current_url != initial_url and current_url != '':
break
# Retrieving the current url is a non-trivial operation. Add a small
# sleep here to prevent this method from contending with the actual
# navigation.
time.sleep(0.01)
def _WaitForTabToBeReady(self, tab, end_time):
"""Waits for the tab to be ready.
If time.time() is larger than end_time, the function does nothing.
Otherwise, the function tries to return no later than end_time.
"""
seconds_to_wait = end_time - time.time()
if seconds_to_wait <= 0:
return
tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait)
# Wait up to 10 seconds for the page to quiesce. If the page hasn't
# quiesced in 10 seconds, it will probably never quiesce.
seconds_to_wait = end_time - time.time()
seconds_to_wait = max(0, seconds_to_wait)
try:
util.WaitFor(tab.HasReachedQuiescence, seconds_to_wait)
except exceptions.TimeoutException:
pass
def _BatchNavigateTabs(self, batch):
"""Performs a batch of tab navigations with minimal delay.
Args:
batch: A list of tuples (tab, url).
Returns:
A list of tuples (tab, initial_url). |initial_url| is the url of the
|tab| prior to a navigation command being sent to it.
"""
# Attempting to pass in a timeout of 0 seconds results in a synchronous
# socket error from the websocket library. Pass in a very small timeout
# instead so that the websocket library raises a Timeout exception. This
# prevents the logic from accidentally catching different socket
# exceptions.
timeout_in_seconds = 0.01
queued_tabs = []
for tab, url in batch:
initial_url = self._RetrieveTabUrl(tab, 20)
try:
tab.Navigate(url, None, timeout_in_seconds)
except exceptions.TimeoutException:
# We expect to receive a timeout exception, since we're not waiting for
# the navigation to complete.
pass
queued_tabs.append((tab, initial_url))
return queued_tabs
def _WaitForQueuedTabsToLoad(self, queued_tabs):
"""Waits for all the batch navigated tabs to finish loading.
Args:
queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed
to have already been sent a navigation command.
"""
total_batch_timeout = (len(queued_tabs) *
self._BATCH_TIMEOUT_PER_PAGE_IN_SECONDS)
end_time = time.time() + total_batch_timeout
for tab, initial_url in queued_tabs:
# Since we didn't wait any time for the tab url navigation to commit, it's
# possible that the tab hasn't started navigating yet.
self._WaitForUrlToChange(tab, initial_url, end_time)
self._WaitForTabToBeReady(tab, end_time)
def _GetUrlsToNavigate(self, url_iterator):
"""Returns an array of urls to navigate to, given a url_iterator."""
urls = []
for _ in xrange(self._NUM_TABS):
try:
urls.append(url_iterator.next())
except StopIteration:
break
return urls
def _PerformNavigations(self):
"""Repeatedly fetches a batch of urls, and navigates to those urls. This
will run until an empty batch is returned, or
ShouldExitAfterBatchNavigation() returns True.
"""
url_iterator = self.GetUrlIterator()
while True:
self._RefreshNavigationTabs()
urls = self._GetUrlsToNavigate(url_iterator)
if len(urls) == 0:
break
batch = []
for i in range(len(urls)):
url = urls[i]
tab = self._navigation_tabs[i]
batch.append((tab, url))
queued_tabs = self._BatchNavigateTabs(batch)
self._WaitForQueuedTabsToLoad(queued_tabs)
self.CleanUpAfterBatchNavigation()
if self.ShouldExitAfterBatchNavigation():
break