components/language/ios/browser/resources/language_detection.ts - chromium/src - Git at Google

 // Copyright 2013 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 import {CrWebApi, gCrWeb} from '//ios/web/public/js_messaging/resources/gcrweb.js';
 import {sendWebKitMessage} from '//ios/web/public/js_messaging/resources/utils.js';

 // Mark: Private properties

 /**
  * The cache of the text content that was extracted from the page.
  */
 let bufferedTextContent: string|null;

 /**
  * The number of active requests that have populated the cache. This is
  * incremented every time a call to `__gCrWeb.getRegisteredApi('languageDetection').getFunction('detectLanguage')`
  * populates the buffer. This is decremented every time there is a call to
  * retrieve the buffer. The buffer is purged when this goes down to 0.
  */
 let activeRequests = 0;

 /**
  * Searches page elements for "notranslate" meta tag.
  * @return  true if "notranslate" meta tag is defined or the translate attribute
  * equal to no on html document.
  */
 function hasNoTranslate(): boolean {
   if (document.documentElement.hasAttribute('translate')) {
     if (document.documentElement.getAttribute('translate')!.toLowerCase() ===
         'no') {
       return true;
     }
   }
   for (const metaTag of document.getElementsByTagName('meta')) {
     if (metaTag.name === 'google') {
       if (metaTag.content === 'notranslate' ||
           metaTag.getAttribute('value') === 'notranslate') {
         return true;
       }
     }
   }
   return false;
 }

 /**
  * Gets the content of a meta tag by httpEquiv.
  * The function is case insensitive.
  * @param httpEquiv Value of the "httpEquiv" attribute, has to be lower case.
  * @return Value of the "content" attribute of the meta tag.
  */
 function getMetaContentByHttpEquiv(httpEquiv: string): string {
   for (const metaTag of document.getElementsByTagName('meta')) {
     if (metaTag.httpEquiv && metaTag.httpEquiv.toLowerCase() === httpEquiv) {
       return metaTag.content;
     }
   }
   return '';
 }

 // Used by the `getTextContent` function below.
 const NON_TEXT_NODE_NAMES = new Set([
   'EMBED',
   'NOSCRIPT',
   'OBJECT',
   'SCRIPT',
   'STYLE',
 ]);

 /**
  * Walks a DOM tree to extract the text content.
  * Does not walk into a node when its name is in `NON_TEXT_NODE_NAMES`.
  * @param node The DOM tree
  * @param maxLen Output will be truncated to `maxLen`
  * @return The text content
  */
 function getTextContent(node: ChildNode, maxLen: number): string {
   if (!node || maxLen <= 0) {
     return '';
   }

   let txt = '';
   // Formatting and filtering.
   if (node.nodeType === Node.ELEMENT_NODE && node instanceof Element) {
     // Reject non-text nodes such as scripts.
     if (!node.nodeName || NON_TEXT_NODE_NAMES.has(node.nodeName)) {
       return '';
     }
     if (node.nodeName === 'BR') {
       return '\n';
     }
     const style = window.getComputedStyle(node);
     // Only proceed if the element is visible.
     if (style.display === 'none' || style.visibility === 'hidden') {
       return '';
     }
     // No need to add a line break before |body| as it is the first element.
     if (node.nodeName.toUpperCase() !== 'BODY' && style.display !== 'inline') {
       txt = '\n';
     }
   }

   if (node.hasChildNodes()) {
     for (const childNode of node.childNodes) {
       txt += getTextContent(childNode, maxLen - txt.length);
       if (txt.length >= maxLen) {
         break;
       }
     }
   } else if (node.nodeType === Node.TEXT_NODE && node.textContent) {
     txt += node.textContent.substring(0, maxLen - txt.length);
   }

   return txt;
 }

 /**
  * Detects if a page has content that needs translation and informs the native
  * side. The text content of a page is cached in `bufferedTextContent` and
  * retrieved at a later time directly from the Obj-C side. This is to avoid
  * sending it back via async messaging.
  */
 function detectLanguage(): void {
   // Constant for the maximum length of the extracted text returned by
   // `detectLanguage` to the native side.
   // Matches desktop implementation.
   // Note: This should stay in sync with the constant in
   // ios_language_detection_tab_helper.mm .
   const kMaxIndexChars = 65535;

   activeRequests += 1;
   bufferedTextContent = getTextContent(document.body, kMaxIndexChars);
   const httpContentLanguage = getMetaContentByHttpEquiv('content-language');
   const textCapturedCommand = {
     'hasNoTranslate': false,
     'htmlLang': document.documentElement.lang,
     'httpContentLanguage': httpContentLanguage,
     'frameId': gCrWeb.getFrameId(),
   };

   if (hasNoTranslate()) {
     textCapturedCommand['hasNoTranslate'] = true;
   }

   sendWebKitMessage('LanguageDetectionTextCaptured', textCapturedCommand);
 }

 /**
  * Retrieves the cached text content of a page. Returns it and then purges the
  * cache.
  */
 function retrieveBufferedTextContent(): string|null {
   const textContent = bufferedTextContent;
   activeRequests -= 1;
   if (activeRequests === 0) {
     bufferedTextContent = null;
   }
   return textContent;
 }

 const languageDetectionApi = new CrWebApi();

 languageDetectionApi.addFunction('detectLanguage', detectLanguage);
 languageDetectionApi.addFunction('retrieveBufferedTextContent', retrieveBufferedTextContent);

 gCrWeb.registerApi('languageDetection', languageDetectionApi);
	// Copyright 2013 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	import {CrWebApi, gCrWeb} from '//ios/web/public/js_messaging/resources/gcrweb.js';
	import {sendWebKitMessage} from '//ios/web/public/js_messaging/resources/utils.js';

	// Mark: Private properties

	/**
	* The cache of the text content that was extracted from the page.
	*/
	let bufferedTextContent: string\|null;

	/**
	* The number of active requests that have populated the cache. This is
	* incremented every time a call to `__gCrWeb.getRegisteredApi('languageDetection').getFunction('detectLanguage')`
	* populates the buffer. This is decremented every time there is a call to
	* retrieve the buffer. The buffer is purged when this goes down to 0.
	*/
	let activeRequests = 0;

	/**
	* Searches page elements for "notranslate" meta tag.
	* @return true if "notranslate" meta tag is defined or the translate attribute
	* equal to no on html document.
	*/
	function hasNoTranslate(): boolean {
	if (document.documentElement.hasAttribute('translate')) {
	if (document.documentElement.getAttribute('translate')!.toLowerCase() ===
	'no') {
	return true;
	}
	}
	for (const metaTag of document.getElementsByTagName('meta')) {
	if (metaTag.name === 'google') {
	if (metaTag.content === 'notranslate' \|\|
	metaTag.getAttribute('value') === 'notranslate') {
	return true;
	}
	}
	}
	return false;
	}

	/**
	* Gets the content of a meta tag by httpEquiv.
	* The function is case insensitive.
	* @param httpEquiv Value of the "httpEquiv" attribute, has to be lower case.
	* @return Value of the "content" attribute of the meta tag.
	*/
	function getMetaContentByHttpEquiv(httpEquiv: string): string {
	for (const metaTag of document.getElementsByTagName('meta')) {
	if (metaTag.httpEquiv && metaTag.httpEquiv.toLowerCase() === httpEquiv) {
	return metaTag.content;
	}
	}
	return '';
	}

	// Used by the `getTextContent` function below.
	const NON_TEXT_NODE_NAMES = new Set([
	'EMBED',
	'NOSCRIPT',
	'OBJECT',
	'SCRIPT',
	'STYLE',
	]);

	/**
	* Walks a DOM tree to extract the text content.
	* Does not walk into a node when its name is in `NON_TEXT_NODE_NAMES`.
	* @param node The DOM tree
	* @param maxLen Output will be truncated to `maxLen`
	* @return The text content
	*/
	function getTextContent(node: ChildNode, maxLen: number): string {
	if (!node \|\| maxLen <= 0) {
	return '';
	}

	let txt = '';
	// Formatting and filtering.
	if (node.nodeType === Node.ELEMENT_NODE && node instanceof Element) {
	// Reject non-text nodes such as scripts.
	if (!node.nodeName \|\| NON_TEXT_NODE_NAMES.has(node.nodeName)) {
	return '';
	}
	if (node.nodeName === 'BR') {
	return '\n';
	}
	const style = window.getComputedStyle(node);
	// Only proceed if the element is visible.
	if (style.display === 'none' \|\| style.visibility === 'hidden') {
	return '';
	}
	// No need to add a line break before \|body\| as it is the first element.
	if (node.nodeName.toUpperCase() !== 'BODY' && style.display !== 'inline') {
	txt = '\n';
	}
	}

	if (node.hasChildNodes()) {
	for (const childNode of node.childNodes) {
	txt += getTextContent(childNode, maxLen - txt.length);
	if (txt.length >= maxLen) {
	break;
	}
	}
	} else if (node.nodeType === Node.TEXT_NODE && node.textContent) {
	txt += node.textContent.substring(0, maxLen - txt.length);
	}

	return txt;
	}

	/**
	* Detects if a page has content that needs translation and informs the native
	* side. The text content of a page is cached in `bufferedTextContent` and
	* retrieved at a later time directly from the Obj-C side. This is to avoid
	* sending it back via async messaging.
	*/
	function detectLanguage(): void {
	// Constant for the maximum length of the extracted text returned by
	// `detectLanguage` to the native side.
	// Matches desktop implementation.
	// Note: This should stay in sync with the constant in
	// ios_language_detection_tab_helper.mm .
	const kMaxIndexChars = 65535;

	activeRequests += 1;
	bufferedTextContent = getTextContent(document.body, kMaxIndexChars);
	const httpContentLanguage = getMetaContentByHttpEquiv('content-language');
	const textCapturedCommand = {
	'hasNoTranslate': false,
	'htmlLang': document.documentElement.lang,
	'httpContentLanguage': httpContentLanguage,
	'frameId': gCrWeb.getFrameId(),
	};

	if (hasNoTranslate()) {
	textCapturedCommand['hasNoTranslate'] = true;
	}

	sendWebKitMessage('LanguageDetectionTextCaptured', textCapturedCommand);
	}

	/**
	* Retrieves the cached text content of a page. Returns it and then purges the
	* cache.
	*/
	function retrieveBufferedTextContent(): string\|null {
	const textContent = bufferedTextContent;
	activeRequests -= 1;
	if (activeRequests === 0) {
	bufferedTextContent = null;
	}
	return textContent;
	}

	const languageDetectionApi = new CrWebApi();

	languageDetectionApi.addFunction('detectLanguage', detectLanguage);
	languageDetectionApi.addFunction('retrieveBufferedTextContent', retrieveBufferedTextContent);

	gCrWeb.registerApi('languageDetection', languageDetectionApi);