| # Copyright 2015 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| import time |
| |
| from profile_creators import profile_extender |
| from telemetry.core import exceptions |
| from telemetry.core import util |
| |
| |
| class FastNavigationProfileExtender(profile_extender.ProfileExtender): |
| """Extends a Chrome profile. |
| |
| This class creates or extends an existing profile by performing a set of tab |
| navigations in large batches. This is accomplished by opening a large number |
| of tabs, simultaneously navigating all the tabs, and then waiting for all the |
| tabs to load. This provides two benefits: |
| - Takes advantage of the high number of logical cores on modern CPUs. |
| - The total time spent waiting for navigations to time out scales linearly |
| with the number of batches, but does not scale with the size of the |
| batch. |
| """ |
| |
| def __init__(self, finder_options, maximum_batch_size): |
| """Initializer. |
| |
| Args: |
| maximum_batch_size: A positive integer indicating the number of tabs to |
| simultaneously perform navigations. |
| """ |
| super(FastNavigationProfileExtender, self).__init__(finder_options) |
| |
| # The instance keeps a list of Tabs that can be navigated successfully. |
| # This means that the Tab is not crashed, and is processing JavaScript in a |
| # timely fashion. |
| self._navigation_tabs = [] |
| |
| # The number of tabs to use. |
| self._NUM_TABS = maximum_batch_size |
| |
| # The amount of additional time to wait for a batch of pages to finish |
| # loading for each page in the batch. |
| self._BATCH_TIMEOUT_PER_PAGE_IN_SECONDS = 20 |
| |
| # The amount of time to wait for a page to quiesce. Some pages will never |
| # quiesce. |
| self._TIME_TO_WAIT_FOR_PAGE_TO_QUIESCE_IN_SECONDS = 10 |
| |
| def Run(self): |
| """Superclass override.""" |
| try: |
| self.SetUpBrowser() |
| self._PerformNavigations() |
| finally: |
| self.TearDownBrowser() |
| |
| # When there hasn't been an exception, verify that the profile was |
| # correctly extended. |
| # TODO(erikchen): I've intentionally omitted my implementation of |
| # VerifyProfileWasExtended() in small_profile_extender, since the profile |
| # is not being correctly extended. http://crbug.com/484833 |
| # http://crbug.com/484880 |
| self.VerifyProfileWasExtended() |
| |
| def VerifyProfileWasExtended(self): |
| """Verifies that the profile was correctly extended. |
| |
| Can be overridden by subclasses. |
| """ |
| pass |
| |
| def GetUrlIterator(self): |
| """Gets URLs for the browser to navigate to. |
| |
| Intended for subclass override. |
| |
| Returns: |
| An iterator whose elements are urls to be navigated to. |
| """ |
| raise NotImplementedError() |
| |
| def ShouldExitAfterBatchNavigation(self): |
| """Returns a boolean indicating whether profile extension is finished. |
| |
| Intended for subclass override. |
| """ |
| raise NotImplementedError() |
| |
| def CleanUpAfterBatchNavigation(self): |
| """A hook for subclasses to perform cleanup after each batch of |
| navigations. |
| |
| Can be overridden by subclasses. |
| """ |
| pass |
| |
| def _RefreshNavigationTabs(self): |
| """Updates the member self._navigation_tabs to contain self._NUM_TABS |
| elements, each of which is not crashed. The crashed tabs are intentionally |
| leaked, since Telemetry doesn't have a good way of killing crashed tabs. |
| |
| It is also possible for a tab to be stalled in an infinite JavaScript loop. |
| These tabs will be in self.browser.tabs, but not in self._navigation_tabs. |
| There is no way to kill these tabs, so they are also leaked. This method is |
| careful to only use tabs in self._navigation_tabs, or newly created tabs. |
| """ |
| live_tabs = [tab for tab in self._navigation_tabs if tab.IsAlive()] |
| self._navigation_tabs = live_tabs |
| |
| while len(self._navigation_tabs) < self._NUM_TABS: |
| self._navigation_tabs.append(self._browser.tabs.New()) |
| |
| def _RemoveNavigationTab(self, tab): |
| """Removes a tab which is no longer in a useable state from |
| self._navigation_tabs. The tab is not removed from self.browser.tabs, |
| since there is no guarantee that the tab can be safely removed.""" |
| self._navigation_tabs.remove(tab) |
| |
| def _RetrieveTabUrl(self, tab, timeout): |
| """Retrives the URL of the tab.""" |
| # TODO(erikchen): Use tab.url instead, which talks to the browser process |
| # instead of the renderer process. http://crbug.com/486119 |
| return tab.EvaluateJavaScript('document.URL', timeout) |
| |
| def _WaitForUrlToChange(self, tab, initial_url, end_time): |
| """Waits for the tab to navigate away from its initial url. |
| |
| If time.time() is larger than end_time, the function does nothing. |
| Otherwise, the function tries to return no later than end_time. |
| """ |
| while True: |
| seconds_to_wait = end_time - time.time() |
| if seconds_to_wait <= 0: |
| break |
| |
| current_url = self._RetrieveTabUrl(tab, seconds_to_wait) |
| if current_url != initial_url and current_url != '': |
| break |
| |
| # Retrieving the current url is a non-trivial operation. Add a small |
| # sleep here to prevent this method from contending with the actual |
| # navigation. |
| time.sleep(0.01) |
| |
| def _WaitForTabToBeReady(self, tab, end_time): |
| """Waits for the tab to be ready. |
| |
| If time.time() is larger than end_time, the function does nothing. |
| Otherwise, the function tries to return no later than end_time. |
| """ |
| seconds_to_wait = end_time - time.time() |
| if seconds_to_wait <= 0: |
| return |
| tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) |
| |
| # Wait up to 10 seconds for the page to quiesce. If the page hasn't |
| # quiesced in 10 seconds, it will probably never quiesce. |
| seconds_to_wait = end_time - time.time() |
| seconds_to_wait = max(0, seconds_to_wait) |
| try: |
| util.WaitFor(tab.HasReachedQuiescence, seconds_to_wait) |
| except exceptions.TimeoutException: |
| pass |
| |
| def _BatchNavigateTabs(self, batch): |
| """Performs a batch of tab navigations with minimal delay. |
| |
| Args: |
| batch: A list of tuples (tab, url). |
| |
| Returns: |
| A list of tuples (tab, initial_url). |initial_url| is the url of the |
| |tab| prior to a navigation command being sent to it. |
| """ |
| # Attempting to pass in a timeout of 0 seconds results in a synchronous |
| # socket error from the websocket library. Pass in a very small timeout |
| # instead so that the websocket library raises a Timeout exception. This |
| # prevents the logic from accidentally catching different socket |
| # exceptions. |
| timeout_in_seconds = 0.01 |
| |
| queued_tabs = [] |
| for tab, url in batch: |
| initial_url = self._RetrieveTabUrl(tab, 20) |
| try: |
| tab.Navigate(url, None, timeout_in_seconds) |
| except exceptions.TimeoutException: |
| # We expect to receive a timeout exception, since we're not waiting for |
| # the navigation to complete. |
| pass |
| queued_tabs.append((tab, initial_url)) |
| return queued_tabs |
| |
| def _WaitForQueuedTabsToLoad(self, queued_tabs): |
| """Waits for all the batch navigated tabs to finish loading. |
| |
| Args: |
| queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed |
| to have already been sent a navigation command. |
| """ |
| total_batch_timeout = (len(queued_tabs) * |
| self._BATCH_TIMEOUT_PER_PAGE_IN_SECONDS) |
| end_time = time.time() + total_batch_timeout |
| for tab, initial_url in queued_tabs: |
| # Since we didn't wait any time for the tab url navigation to commit, it's |
| # possible that the tab hasn't started navigating yet. |
| self._WaitForUrlToChange(tab, initial_url, end_time) |
| self._WaitForTabToBeReady(tab, end_time) |
| |
| def _GetUrlsToNavigate(self, url_iterator): |
| """Returns an array of urls to navigate to, given a url_iterator.""" |
| urls = [] |
| for _ in xrange(self._NUM_TABS): |
| try: |
| urls.append(url_iterator.next()) |
| except StopIteration: |
| break |
| return urls |
| |
| def _PerformNavigations(self): |
| """Repeatedly fetches a batch of urls, and navigates to those urls. This |
| will run until an empty batch is returned, or |
| ShouldExitAfterBatchNavigation() returns True. |
| """ |
| url_iterator = self.GetUrlIterator() |
| while True: |
| self._RefreshNavigationTabs() |
| urls = self._GetUrlsToNavigate(url_iterator) |
| |
| if len(urls) == 0: |
| break |
| |
| batch = [] |
| for i in range(len(urls)): |
| url = urls[i] |
| tab = self._navigation_tabs[i] |
| batch.append((tab, url)) |
| |
| queued_tabs = self._BatchNavigateTabs(batch) |
| self._WaitForQueuedTabsToLoad(queued_tabs) |
| |
| self.CleanUpAfterBatchNavigation() |
| |
| if self.ShouldExitAfterBatchNavigation(): |
| break |