blob: 6e2f37289213641f4ee31b65bbf765c67a767659 [file] [log] [blame]
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
/**
* @fileoverview Installs Language Detection management functions on the
* __gCrWeb object.
*
* TODO(crbug.com/659442): Enable checkTypes error for this file.
* @suppress {checkTypes}
*/
__gCrWeb.languageDetection = {};
// Store languageDetection namespace object in a global __gCrWeb object
// referenced by a string, so it does not get renamed by closure compiler during
// the minification.
__gCrWeb['languageDetection'] = __gCrWeb.languageDetection;
(function() {
/**
* The cache of the text content that was extracted from the page
*/
__gCrWeb.languageDetection.bufferedTextContent = null;
/**
* The number of active requests that have populated the cache. This is
* incremented every time a call to |__gCrWeb.languageDetection.detectLanguage|
* populates the buffer. This is decremented every time there is a call to
* retrieve the buffer. The buffer is purged when this goes down to 0.
*/
__gCrWeb.languageDetection.activeRequests = 0;
/**
* Returns true if translation of the page is allowed.
* Translation is not allowed when a "notranslate" meta tag is defined.
* @return {boolean} true if translation of the page is allowed.
*/
__gCrWeb.languageDetection['translationAllowed'] = function() {
var metaTags = document.getElementsByTagName('meta');
for (var i = 0; i < metaTags.length; ++i) {
if (metaTags[i].name === 'google') {
if (metaTags[i].content === 'notranslate' ||
metaTags[i].getAttribute('value') === 'notranslate') {
return false;
}
}
}
return true;
};
/**
* Gets the content of a meta tag by httpEquiv.
* The function is case insensitive.
* @param {String} httpEquiv Value of the "httpEquiv" attribute, has to be
* lower case.
* @return {string} Value of the "content" attribute of the meta tag.
*/
__gCrWeb.languageDetection['getMetaContentByHttpEquiv'] = function(httpEquiv) {
var metaTags = document.getElementsByTagName('meta');
for (var i = 0; i < metaTags.length; ++i) {
if (metaTags[i].httpEquiv.toLowerCase() === httpEquiv) {
return metaTags[i].content;
}
}
return '';
};
// Used by the |getTextContent| function below.
__gCrWeb.languageDetection['nonTextNodeNames'] = {
'SCRIPT': 1,
'NOSCRIPT': 1,
'STYLE': 1,
'EMBED': 1,
'OBJECT': 1
};
/**
* Walks a DOM tree to extract the text content.
* Does not walk into a node when its name is in |nonTextNodeNames|.
* @param {HTMLElement} node The DOM tree
* @param {number} maxLen Output will be truncated to |maxLen|
* @return {string} The text content
*/
__gCrWeb.languageDetection['getTextContent'] = function(node, maxLen) {
if (!node || maxLen <= 0) {
return '';
}
var txt = '';
// Formatting and filtering.
if (node.nodeType === Node.ELEMENT_NODE) {
// Reject non-text nodes such as scripts.
if (__gCrWeb.languageDetection.nonTextNodeNames[node.nodeName]) {
return '';
}
if (node.nodeName === 'BR') {
return '\n';
}
var style = window.getComputedStyle(node);
// Only proceed if the element is visible.
if (style.display === 'none' || style.visibility === 'hidden') {
return '';
}
// No need to add a line break before |body| as it is the first element.
if (node.nodeName.toUpperCase() !== 'BODY' && style.display !== 'inline') {
txt = '\n';
}
}
if (node.hasChildNodes()) {
for (var childIdx = 0;
childIdx < node.childNodes.length && txt.length < maxLen;
childIdx++) {
txt += __gCrWeb.languageDetection.getTextContent(
node.childNodes[childIdx], maxLen - txt.length);
}
} else if (node.nodeType === Node.TEXT_NODE && node.textContent) {
txt += node.textContent.substring(0, maxLen - txt.length);
}
return txt;
};
/**
* Detects if a page has content that needs translation and informs the native
* side. The text content of a page is cached in
* |__gCrWeb.languageDetection.bufferedTextContent| and retrived at a later time
* retrived at a later time directly from the Obj-C side. This is to avoid
* using |invokeOnHost|.
*/
__gCrWeb.languageDetection['detectLanguage'] = function() {
if (!__gCrWeb.languageDetection.translationAllowed()) {
__gCrWeb.message.invokeOnHost({
'command': 'languageDetection.textCaptured',
'translationAllowed': false});
} else {
// Constant for the maximum length of the extracted text returned by
// |-detectLanguage| to the native side.
// Matches desktop implementation.
// Note: This should stay in sync with the constant in
// js_language_detection_manager.mm .
var kMaxIndexChars = 65535;
var captureBeginTime = new Date();
__gCrWeb.languageDetection.activeRequests += 1;
__gCrWeb.languageDetection.bufferedTextContent =
__gCrWeb.languageDetection.getTextContent(document.body,
kMaxIndexChars);
var captureTextTime =
(new Date()).getMilliseconds() - captureBeginTime.getMilliseconds();
var httpContentLanguage =
__gCrWeb.languageDetection.getMetaContentByHttpEquiv(
'content-language');
__gCrWeb.message.invokeOnHost({
'command': 'languageDetection.textCaptured',
'translationAllowed': true,
'captureTextTime': captureTextTime,
'htmlLang': document.documentElement.lang,
'httpContentLanguage': httpContentLanguage});
}
}
/**
* Retrives the cached text content of a page. Returns it and then purges the
* cache.
*/
__gCrWeb.languageDetection['retrieveBufferedTextContent'] = function() {
var textContent = __gCrWeb.languageDetection.bufferedTextContent;
__gCrWeb.languageDetection.activeRequests -= 1;
if (__gCrWeb.languageDetection.activeRequests == 0) {
__gCrWeb.languageDetection.bufferedTextContent = null;
}
return textContent;
}
}()) // End of anonymous function.