components/translate/ios/browser/resources/language_detection.js - chromium/src - Git at Google

 // Copyright 2013 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 /**
  * @fileoverview Installs Language Detection management functions on the
  * __gCrWeb object.
  *
  * TODO(crbug.com/659442): Enable checkTypes error for this file.
  * @suppress {checkTypes}
  */

 __gCrWeb.languageDetection = {};

 // Store languageDetection namespace object in a global __gCrWeb object
 // referenced by a string, so it does not get renamed by closure compiler during
 // the minification.
 __gCrWeb['languageDetection'] = __gCrWeb.languageDetection;

 (function() {
 /**
  * The cache of the text content that was extracted from the page
  */
 __gCrWeb.languageDetection.bufferedTextContent = null;

 /**
  * The number of active requests that have populated the cache. This is
  * incremented every time a call to |__gCrWeb.languageDetection.detectLanguage|
  * populates the buffer. This is decremented every time there is a call to
  * retrieve the buffer. The buffer is purged when this goes down to 0.
  */
 __gCrWeb.languageDetection.activeRequests = 0;

 /**
  * Returns true if translation of the page is allowed.
  * Translation is not allowed when a "notranslate" meta tag is defined.
  * @return {boolean} true if translation of the page is allowed.
  */
 __gCrWeb.languageDetection['translationAllowed'] = function() {
   var metaTags = document.getElementsByTagName('meta');
   for (var i = 0; i < metaTags.length; ++i) {
     if (metaTags[i].name === 'google') {
       if (metaTags[i].content === 'notranslate' ||
           metaTags[i].getAttribute('value') === 'notranslate') {
         return false;
       }
     }
   }
   return true;
 };

 /**
  * Gets the content of a meta tag by httpEquiv.
  * The function is case insensitive.
  * @param {String} httpEquiv Value of the "httpEquiv" attribute, has to be
  *     lower case.
  * @return {string} Value of the "content" attribute of the meta tag.
  */
 __gCrWeb.languageDetection['getMetaContentByHttpEquiv'] = function(httpEquiv) {
   var metaTags = document.getElementsByTagName('meta');
   for (var i = 0; i < metaTags.length; ++i) {
     if (metaTags[i].httpEquiv.toLowerCase() === httpEquiv) {
       return metaTags[i].content;
     }
   }
   return '';
 };

 // Used by the |getTextContent| function below.
 __gCrWeb.languageDetection['nonTextNodeNames'] = {
   'SCRIPT': 1,
   'NOSCRIPT': 1,
   'STYLE': 1,
   'EMBED': 1,
   'OBJECT': 1
 };

 /**
  * Walks a DOM tree to extract the text content.
  * Does not walk into a node when its name is in |nonTextNodeNames|.
  * @param {HTMLElement} node The DOM tree
  * @param {number} maxLen Output will be truncated to |maxLen|
  * @return {string} The text content
  */
 __gCrWeb.languageDetection['getTextContent'] = function(node, maxLen) {
   if (!node || maxLen <= 0) {
     return '';
   }

   var txt = '';
   // Formatting and filtering.
   if (node.nodeType === Node.ELEMENT_NODE) {
     // Reject non-text nodes such as scripts.
     if (__gCrWeb.languageDetection.nonTextNodeNames[node.nodeName]) {
       return '';
     }
     if (node.nodeName === 'BR') {
       return '\n';
     }
     var style = window.getComputedStyle(node);
     // Only proceed if the element is visible.
     if (style.display === 'none' || style.visibility === 'hidden') {
       return '';
     }
     // No need to add a line break before |body| as it is the first element.
     if (node.nodeName.toUpperCase() !== 'BODY' && style.display !== 'inline') {
       txt = '\n';
     }
   }

   if (node.hasChildNodes()) {
     for (var childIdx = 0;
          childIdx < node.childNodes.length && txt.length < maxLen;
          childIdx++) {
       txt += __gCrWeb.languageDetection.getTextContent(
           node.childNodes[childIdx], maxLen - txt.length);
     }
   } else if (node.nodeType === Node.TEXT_NODE && node.textContent) {
     txt += node.textContent.substring(0, maxLen - txt.length);
   }

   return txt;
 };

 /**
  * Detects if a page has content that needs translation and informs the native
  * side. The text content of a page is cached in
  * |__gCrWeb.languageDetection.bufferedTextContent| and retrived at a later time
  * retrived at a later time directly from the Obj-C side. This is to avoid
  * using |invokeOnHost|.
  */
 __gCrWeb.languageDetection['detectLanguage'] = function() {
   if (!__gCrWeb.languageDetection.translationAllowed()) {
     __gCrWeb.message.invokeOnHost({
         'command': 'languageDetection.textCaptured',
         'translationAllowed': false});
   } else {
     // Constant for the maximum length of the extracted text returned by
     // |-detectLanguage| to the native side.
     // Matches desktop implementation.
     // Note: This should stay in sync with the constant in
     // js_language_detection_manager.mm .
     var kMaxIndexChars = 65535;
     var captureBeginTime = new Date();
     __gCrWeb.languageDetection.activeRequests += 1;
     __gCrWeb.languageDetection.bufferedTextContent =
         __gCrWeb.languageDetection.getTextContent(document.body,
             kMaxIndexChars);
     var captureTextTime =
         (new Date()).getMilliseconds() - captureBeginTime.getMilliseconds();
     var httpContentLanguage =
         __gCrWeb.languageDetection.getMetaContentByHttpEquiv(
             'content-language');
     __gCrWeb.message.invokeOnHost({
         'command': 'languageDetection.textCaptured',
         'translationAllowed': true,
         'captureTextTime': captureTextTime,
         'htmlLang': document.documentElement.lang,
         'httpContentLanguage': httpContentLanguage});
   }
 }

 /**
  * Retrives the cached text content of a page. Returns it and then purges the
  * cache.
  */
 __gCrWeb.languageDetection['retrieveBufferedTextContent'] = function() {
   var textContent = __gCrWeb.languageDetection.bufferedTextContent;
   __gCrWeb.languageDetection.activeRequests -= 1;
   if (__gCrWeb.languageDetection.activeRequests == 0) {
     __gCrWeb.languageDetection.bufferedTextContent = null;
   }
   return textContent;
 }

 }())  // End of anonymous function.
	// Copyright 2013 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	/**
	* @fileoverview Installs Language Detection management functions on the
	* __gCrWeb object.
	*
	* TODO(crbug.com/659442): Enable checkTypes error for this file.
	* @suppress {checkTypes}
	*/

	__gCrWeb.languageDetection = {};

	// Store languageDetection namespace object in a global __gCrWeb object
	// referenced by a string, so it does not get renamed by closure compiler during
	// the minification.
	__gCrWeb['languageDetection'] = __gCrWeb.languageDetection;

	(function() {
	/**
	* The cache of the text content that was extracted from the page
	*/
	__gCrWeb.languageDetection.bufferedTextContent = null;

	/**
	* The number of active requests that have populated the cache. This is
	* incremented every time a call to \|__gCrWeb.languageDetection.detectLanguage\|
	* populates the buffer. This is decremented every time there is a call to
	* retrieve the buffer. The buffer is purged when this goes down to 0.
	*/
	__gCrWeb.languageDetection.activeRequests = 0;

	/**
	* Returns true if translation of the page is allowed.
	* Translation is not allowed when a "notranslate" meta tag is defined.
	* @return {boolean} true if translation of the page is allowed.
	*/
	__gCrWeb.languageDetection['translationAllowed'] = function() {
	var metaTags = document.getElementsByTagName('meta');
	for (var i = 0; i < metaTags.length; ++i) {
	if (metaTags[i].name === 'google') {
	if (metaTags[i].content === 'notranslate' \|\|
	metaTags[i].getAttribute('value') === 'notranslate') {
	return false;
	}
	}
	}
	return true;
	};

	/**
	* Gets the content of a meta tag by httpEquiv.
	* The function is case insensitive.
	* @param {String} httpEquiv Value of the "httpEquiv" attribute, has to be
	* lower case.
	* @return {string} Value of the "content" attribute of the meta tag.
	*/
	__gCrWeb.languageDetection['getMetaContentByHttpEquiv'] = function(httpEquiv) {
	var metaTags = document.getElementsByTagName('meta');
	for (var i = 0; i < metaTags.length; ++i) {
	if (metaTags[i].httpEquiv.toLowerCase() === httpEquiv) {
	return metaTags[i].content;
	}
	}
	return '';
	};

	// Used by the \|getTextContent\| function below.
	__gCrWeb.languageDetection['nonTextNodeNames'] = {
	'SCRIPT': 1,
	'NOSCRIPT': 1,
	'STYLE': 1,
	'EMBED': 1,
	'OBJECT': 1
	};

	/**
	* Walks a DOM tree to extract the text content.
	* Does not walk into a node when its name is in \|nonTextNodeNames\|.
	* @param {HTMLElement} node The DOM tree
	* @param {number} maxLen Output will be truncated to \|maxLen\|
	* @return {string} The text content
	*/
	__gCrWeb.languageDetection['getTextContent'] = function(node, maxLen) {
	if (!node \|\| maxLen <= 0) {
	return '';
	}

	var txt = '';
	// Formatting and filtering.
	if (node.nodeType === Node.ELEMENT_NODE) {
	// Reject non-text nodes such as scripts.
	if (__gCrWeb.languageDetection.nonTextNodeNames[node.nodeName]) {
	return '';
	}
	if (node.nodeName === 'BR') {
	return '\n';
	}
	var style = window.getComputedStyle(node);
	// Only proceed if the element is visible.
	if (style.display === 'none' \|\| style.visibility === 'hidden') {
	return '';
	}
	// No need to add a line break before \|body\| as it is the first element.
	if (node.nodeName.toUpperCase() !== 'BODY' && style.display !== 'inline') {
	txt = '\n';
	}
	}

	if (node.hasChildNodes()) {
	for (var childIdx = 0;
	childIdx < node.childNodes.length && txt.length < maxLen;
	childIdx++) {
	txt += __gCrWeb.languageDetection.getTextContent(
	node.childNodes[childIdx], maxLen - txt.length);
	}
	} else if (node.nodeType === Node.TEXT_NODE && node.textContent) {
	txt += node.textContent.substring(0, maxLen - txt.length);
	}

	return txt;
	};

	/**
	* Detects if a page has content that needs translation and informs the native
	* side. The text content of a page is cached in
	* \|__gCrWeb.languageDetection.bufferedTextContent\| and retrived at a later time
	* retrived at a later time directly from the Obj-C side. This is to avoid
	* using \|invokeOnHost\|.
	*/
	__gCrWeb.languageDetection['detectLanguage'] = function() {
	if (!__gCrWeb.languageDetection.translationAllowed()) {
	__gCrWeb.message.invokeOnHost({
	'command': 'languageDetection.textCaptured',
	'translationAllowed': false});
	} else {
	// Constant for the maximum length of the extracted text returned by
	// \|-detectLanguage\| to the native side.
	// Matches desktop implementation.
	// Note: This should stay in sync with the constant in
	// js_language_detection_manager.mm .
	var kMaxIndexChars = 65535;
	var captureBeginTime = new Date();
	__gCrWeb.languageDetection.activeRequests += 1;
	__gCrWeb.languageDetection.bufferedTextContent =
	__gCrWeb.languageDetection.getTextContent(document.body,
	kMaxIndexChars);
	var captureTextTime =
	(new Date()).getMilliseconds() - captureBeginTime.getMilliseconds();
	var httpContentLanguage =
	__gCrWeb.languageDetection.getMetaContentByHttpEquiv(
	'content-language');
	__gCrWeb.message.invokeOnHost({
	'command': 'languageDetection.textCaptured',
	'translationAllowed': true,
	'captureTextTime': captureTextTime,
	'htmlLang': document.documentElement.lang,
	'httpContentLanguage': httpContentLanguage});
	}
	}

	/**
	* Retrives the cached text content of a page. Returns it and then purges the
	* cache.
	*/
	__gCrWeb.languageDetection['retrieveBufferedTextContent'] = function() {
	var textContent = __gCrWeb.languageDetection.bufferedTextContent;
	__gCrWeb.languageDetection.activeRequests -= 1;
	if (__gCrWeb.languageDetection.activeRequests == 0) {
	__gCrWeb.languageDetection.bufferedTextContent = null;
	}
	return textContent;
	}

	}()) // End of anonymous function.