| // Copyright 2013 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| import {CrWebApi, gCrWeb} from '//ios/web/public/js_messaging/resources/gcrweb.js'; |
| import {sendWebKitMessage} from '//ios/web/public/js_messaging/resources/utils.js'; |
| |
| // Mark: Private properties |
| |
| /** |
| * The cache of the text content that was extracted from the page. |
| */ |
| let bufferedTextContent: string|null; |
| |
| /** |
| * The number of active requests that have populated the cache. This is |
| * incremented every time a call to `__gCrWeb.getRegisteredApi('languageDetection').getFunction('detectLanguage')` |
| * populates the buffer. This is decremented every time there is a call to |
| * retrieve the buffer. The buffer is purged when this goes down to 0. |
| */ |
| let activeRequests = 0; |
| |
| /** |
| * Searches page elements for "notranslate" meta tag. |
| * @return true if "notranslate" meta tag is defined or the translate attribute |
| * equal to no on html document. |
| */ |
| function hasNoTranslate(): boolean { |
| if (document.documentElement.hasAttribute('translate')) { |
| if (document.documentElement.getAttribute('translate')!.toLowerCase() === |
| 'no') { |
| return true; |
| } |
| } |
| for (const metaTag of document.getElementsByTagName('meta')) { |
| if (metaTag.name === 'google') { |
| if (metaTag.content === 'notranslate' || |
| metaTag.getAttribute('value') === 'notranslate') { |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Gets the content of a meta tag by httpEquiv. |
| * The function is case insensitive. |
| * @param httpEquiv Value of the "httpEquiv" attribute, has to be lower case. |
| * @return Value of the "content" attribute of the meta tag. |
| */ |
| function getMetaContentByHttpEquiv(httpEquiv: string): string { |
| for (const metaTag of document.getElementsByTagName('meta')) { |
| if (metaTag.httpEquiv && metaTag.httpEquiv.toLowerCase() === httpEquiv) { |
| return metaTag.content; |
| } |
| } |
| return ''; |
| } |
| |
| // Used by the `getTextContent` function below. |
| const NON_TEXT_NODE_NAMES = new Set([ |
| 'EMBED', |
| 'NOSCRIPT', |
| 'OBJECT', |
| 'SCRIPT', |
| 'STYLE', |
| ]); |
| |
| /** |
| * Walks a DOM tree to extract the text content. |
| * Does not walk into a node when its name is in `NON_TEXT_NODE_NAMES`. |
| * @param node The DOM tree |
| * @param maxLen Output will be truncated to `maxLen` |
| * @return The text content |
| */ |
| function getTextContent(node: ChildNode, maxLen: number): string { |
| if (!node || maxLen <= 0) { |
| return ''; |
| } |
| |
| let txt = ''; |
| // Formatting and filtering. |
| if (node.nodeType === Node.ELEMENT_NODE && node instanceof Element) { |
| // Reject non-text nodes such as scripts. |
| if (!node.nodeName || NON_TEXT_NODE_NAMES.has(node.nodeName)) { |
| return ''; |
| } |
| if (node.nodeName === 'BR') { |
| return '\n'; |
| } |
| const style = window.getComputedStyle(node); |
| // Only proceed if the element is visible. |
| if (style.display === 'none' || style.visibility === 'hidden') { |
| return ''; |
| } |
| // No need to add a line break before |body| as it is the first element. |
| if (node.nodeName.toUpperCase() !== 'BODY' && style.display !== 'inline') { |
| txt = '\n'; |
| } |
| } |
| |
| if (node.hasChildNodes()) { |
| for (const childNode of node.childNodes) { |
| txt += getTextContent(childNode, maxLen - txt.length); |
| if (txt.length >= maxLen) { |
| break; |
| } |
| } |
| } else if (node.nodeType === Node.TEXT_NODE && node.textContent) { |
| txt += node.textContent.substring(0, maxLen - txt.length); |
| } |
| |
| return txt; |
| } |
| |
| /** |
| * Detects if a page has content that needs translation and informs the native |
| * side. The text content of a page is cached in `bufferedTextContent` and |
| * retrieved at a later time directly from the Obj-C side. This is to avoid |
| * sending it back via async messaging. |
| */ |
| function detectLanguage(): void { |
| // Constant for the maximum length of the extracted text returned by |
| // `detectLanguage` to the native side. |
| // Matches desktop implementation. |
| // Note: This should stay in sync with the constant in |
| // ios_language_detection_tab_helper.mm . |
| const kMaxIndexChars = 65535; |
| |
| activeRequests += 1; |
| bufferedTextContent = getTextContent(document.body, kMaxIndexChars); |
| const httpContentLanguage = getMetaContentByHttpEquiv('content-language'); |
| const textCapturedCommand = { |
| 'hasNoTranslate': false, |
| 'htmlLang': document.documentElement.lang, |
| 'httpContentLanguage': httpContentLanguage, |
| 'frameId': gCrWeb.getFrameId(), |
| }; |
| |
| if (hasNoTranslate()) { |
| textCapturedCommand['hasNoTranslate'] = true; |
| } |
| |
| sendWebKitMessage('LanguageDetectionTextCaptured', textCapturedCommand); |
| } |
| |
| /** |
| * Retrieves the cached text content of a page. Returns it and then purges the |
| * cache. |
| */ |
| function retrieveBufferedTextContent(): string|null { |
| const textContent = bufferedTextContent; |
| activeRequests -= 1; |
| if (activeRequests === 0) { |
| bufferedTextContent = null; |
| } |
| return textContent; |
| } |
| |
| const languageDetectionApi = new CrWebApi(); |
| |
| languageDetectionApi.addFunction('detectLanguage', detectLanguage); |
| languageDetectionApi.addFunction('retrieveBufferedTextContent', retrieveBufferedTextContent); |
| |
| gCrWeb.registerApi('languageDetection', languageDetectionApi); |