blob: 1f70c31e9d78f70abf92a6b32f39b5cf009379b8 [file] [log] [blame]
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/translate/core/language_detection/language_detection_model.h"
#include "base/files/file_util.h"
#include "base/files/scoped_temp_dir.h"
#include "base/path_service.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/metrics/histogram_tester.h"
#include "components/translate/core/common/translate_constants.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace translate {
base::File CreateInvalidModelFile() {
base::ScopedTempDir temp_dir;
EXPECT_TRUE(temp_dir.CreateUniqueTempDir());
base::FilePath file_path =
temp_dir.GetPath().AppendASCII("model_file.tflite");
base::File file(file_path, (base::File::FLAG_CREATE | base::File::FLAG_READ |
base::File::FLAG_WRITE |
base::File::FLAG_CAN_DELETE_ON_CLOSE));
EXPECT_TRUE(file.WriteAtCurrentPos("12345", 5));
return file;
}
base::File GetValidModelFile() {
base::FilePath source_root_dir;
base::PathService::Get(base::DIR_SOURCE_ROOT, &source_root_dir);
base::FilePath model_file_path = source_root_dir.AppendASCII("components")
.AppendASCII("test")
.AppendASCII("data")
.AppendASCII("translate")
.AppendASCII("valid_model.tflite");
base::File file(model_file_path,
(base::File::FLAG_OPEN | base::File::FLAG_READ));
return file;
}
TEST(LanguageDetectionModelTest, ModelUnavailable) {
LanguageDetectionModel language_detection_model;
EXPECT_FALSE(language_detection_model.IsAvailable());
}
TEST(LanguageDetectionModelTest, EmptyFileProvided) {
base::HistogramTester histogram_tester;
LanguageDetectionModel language_detection_model;
language_detection_model.UpdateWithFile(base::File());
EXPECT_FALSE(language_detection_model.IsAvailable());
histogram_tester.ExpectUniqueSample(
"LanguageDetection.TFLiteModel.LanguageDetectionModelState",
LanguageDetectionModelState::kModelFileInvalid, 1);
}
// TODO(crbug.com/1240561): Fix flaky test.
TEST(LanguageDetectionModelTest, DISABLED_UnsupportedModelFileProvided) {
base::HistogramTester histogram_tester;
base::File file = CreateInvalidModelFile();
LanguageDetectionModel language_detection_model;
language_detection_model.UpdateWithFile(std::move(file));
EXPECT_FALSE(language_detection_model.IsAvailable());
histogram_tester.ExpectUniqueSample(
"LanguageDetection.TFLiteModel.LanguageDetectionModelState",
LanguageDetectionModelState::kModelFileValidAndMemoryMapped, 1);
histogram_tester.ExpectUniqueSample(
"LanguageDetection.TFLiteModel.InvalidModelFile", true, 1);
}
TEST(LanguageDetectionModelTest, ReliableLanguageDetermination) {
base::HistogramTester histogram_tester;
base::File file = GetValidModelFile();
LanguageDetectionModel language_detection_model;
language_detection_model.UpdateWithFile(std::move(file));
EXPECT_TRUE(language_detection_model.IsAvailable());
bool is_prediction_reliable;
float model_reliability_score = 0.0;
std::string predicted_language;
std::u16string contents = u"This is a page apparently written in English.";
std::string language = language_detection_model.DeterminePageLanguage(
std::string("ja"), std::string(), contents, &predicted_language,
&is_prediction_reliable, model_reliability_score);
EXPECT_TRUE(is_prediction_reliable);
EXPECT_EQ("en", predicted_language);
EXPECT_EQ(translate::kUnknownLanguageCode, language);
histogram_tester.ExpectUniqueSample(
"LanguageDetection.TFLite.DidAttemptDetection", true, 1);
}
TEST(LanguageDetectionModelTest, UnreliableLanguageDetermination) {
base::HistogramTester histogram_tester;
base::File file = GetValidModelFile();
LanguageDetectionModel language_detection_model;
language_detection_model.UpdateWithFile(std::move(file));
EXPECT_TRUE(language_detection_model.IsAvailable());
bool is_prediction_reliable;
float model_reliability_score = 0.0;
std::string predicted_language;
std::u16string contents = u"e";
std::string language = language_detection_model.DeterminePageLanguage(
std::string("ja"), std::string(), contents, &predicted_language,
&is_prediction_reliable, model_reliability_score);
EXPECT_FALSE(is_prediction_reliable);
EXPECT_EQ(translate::kUnknownLanguageCode, predicted_language);
// Rely on the provided language code if the mode is unreliable.
EXPECT_EQ("ja", language);
histogram_tester.ExpectUniqueSample(
"LanguageDetection.TFLite.DidAttemptDetection", true, 1);
}
TEST(LanguageDetectionModelTest, LongTextLanguageDetemination) {
base::HistogramTester histogram_tester;
base::File file = GetValidModelFile();
LanguageDetectionModel language_detection_model;
language_detection_model.UpdateWithFile(std::move(file));
EXPECT_TRUE(language_detection_model.IsAvailable());
bool is_prediction_reliable;
float model_reliability_score = 0.0;
std::string predicted_language;
const char* const zh_content_string =
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
"对于要提交的图书 我确认 我是版权所有者或已得到版权所有者的授权 "
"要更改您的国家 地区 请在此表的最上端更改您的"
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
"对于要提交的图书 我确认 我是版权所有者或已得到版权所有者的授权 "
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
"对于要提交的图书 我确认 我是版权所有者或已得到版权所有者的授权 "
"要更改您的国家 地区 请在此表的最上端更改您的"
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
"对于要提交的图书 我确认 我是版权所有者或已得到版权所有者的授权 "
"要更改您的国家 地区 请在此表的最上端更改您的"
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
"对于要提交的图书 我确认 我是版权所有者或已得到版权所有者的授权 "
"要更改您的国家 地区 请在此表的最上端更改您的"
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
"对于要提交的图书 我确认 我是版权所有者或已得到版权所有者的授权 "
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
"对于要提交的图书 我确认 我是版权所有者或已得到版权所有者的授权 "
"要更改您的国家 地区 请在此表的最上端更改您的"
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
"对于要提交的图书 我确认 我是版权所有者或已得到版权所有者的授权 "
"要更改您的国家 地区 请在此表的最上端更改您的"
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
"要更改您的国家 地区 请在此表的最上端更改您的"
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
"对于要提交的图书 我确认 我是版权所有者或已得到版权所有者的授权 "
"要更改您的国家 地区 请在此表的最上端更改您的"
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
"对于要提交的图书 我确认 我是版权所有者或已得到版权所有者的授权 "
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
"对于要提交的图书 我确认 我是版权所有者或已得到版权所有者的授权 "
"要更改您的国家 地区 请在此表的最上端更改您的"
"This is a page apparently written in English."
"This is a page apparently written in English."
"This is a page apparently written in English."
"要更改您的国家 地区 请在此表的最上端更改您的"
"产品的简报和公告 提交该申请后无法进行更改 请确认您的选择是正确的 "
"要更改您的国家 地区 请在此表的最上端更改您的";
std::u16string contents = base::UTF8ToUTF16(zh_content_string);
EXPECT_GE(contents.length(), 250u * 3u);
std::string language = language_detection_model.DeterminePageLanguage(
std::string("ja"), std::string(), contents, &predicted_language,
&is_prediction_reliable, model_reliability_score);
EXPECT_TRUE(is_prediction_reliable);
EXPECT_EQ("zh-CN", predicted_language);
EXPECT_EQ(translate::kUnknownLanguageCode, language);
histogram_tester.ExpectUniqueSample(
"LanguageDetection.TFLite.DidAttemptDetection", true, 1);
}
} // namespace translate