LanguageDetector: Update und logic to match new spec logic
The logic to calculate the results and und have been updated in the
spec. This updates our implementation to match.
Fixed: 413151736
Change-Id: I99ac48978104cf9e599b20880382ca6ea3a54f65
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6490186
Reviewed-by: Christine Hollingsworth <christinesm@chromium.org>
Commit-Queue: Nathan Memmott <memmott@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1451925}
diff --git a/third_party/blink/renderer/modules/ai/on_device_translation/language_detector.cc b/third_party/blink/renderer/modules/ai/on_device_translation/language_detector.cc
index 004b7a64..8b743d0 100644
--- a/third_party/blink/renderer/modules/ai/on_device_translation/language_detector.cc
+++ b/third_party/blink/renderer/modules/ai/on_device_translation/language_detector.cc
@@ -441,6 +441,16 @@
double last_score = 1;
double cumulative_confidence = 0;
+ const WTF::UncheckedIterator<LanguageDetectionModel::LanguagePrediction>&
+ unknown_iter = std::find_if(
+ predictions.begin(), predictions.end(),
+ [](const LanguageDetectionModel::LanguagePrediction& prediction) {
+ return prediction.language == "unknown";
+ });
+
+ CHECK_NE(unknown_iter, predictions.end());
+ double unknown = unknown_iter->score;
+
HeapVector<Member<LanguageDetectionResult>> results;
for (const auto& prediction : predictions) {
CHECK_GE(prediction.score, 0);
@@ -448,9 +458,10 @@
CHECK_LE(prediction.score, last_score);
last_score = prediction.score;
- if (prediction.score == 0 || prediction.language == "unknown") {
+ if (prediction.score == 0 || prediction.score < unknown) {
break;
}
+
auto* result = MakeGarbageCollected<LanguageDetectionResult>();
results.push_back(result);
result->setDetectedLanguage(String(prediction.language));
@@ -463,12 +474,17 @@
}
}
+ CHECK_GE(1 - cumulative_confidence, unknown);
+ if (!results.empty()) {
+ CHECK_GE(results.back()->confidence(), unknown);
+ }
+
// Append "und" to end. Set it's confidence so that the total confidences add
// up to 1.
auto* und_result = MakeGarbageCollected<LanguageDetectionResult>();
results.push_back(und_result);
und_result->setDetectedLanguage(String("und"));
- und_result->setConfidence(1 - cumulative_confidence);
+ und_result->setConfidence(unknown);
return results;
}
diff --git a/third_party/blink/web_tests/external/wpt/ai/language_detection/detector.https.window.js b/third_party/blink/web_tests/external/wpt/ai/language_detection/detector.https.window.js
index 7c86d01..e85ea6d 100644
--- a/third_party/blink/web_tests/external/wpt/ai/language_detection/detector.https.window.js
+++ b/third_party/blink/web_tests/external/wpt/ai/language_detection/detector.https.window.js
@@ -1,6 +1,7 @@
// META: title=Detect english
// META: global=window
// META: script=../resources/util.js
+// META: script=../resources/locale-util.js
'use strict';
@@ -15,6 +16,9 @@
const detector = await LanguageDetector.create();
const results = await detector.detect('Hello world!');
+ // must at least have the 'und' result.
+ assert_greater_than_equal(results.length, 1);
+
// The last result should be 'und'.
const undResult = results.pop();
assert_equals(undResult.detectedLanguage, 'und');
@@ -22,24 +26,31 @@
let total_confidence_without_und = 0;
let last_confidence = 1;
- for (const {confidence} of results) {
+ for (const {detectedLanguage, confidence} of results) {
+ // All results must be in canonical form.
+ assert_is_canonical(detectedLanguage);
+
assert_greater_than(confidence, 0);
+ assert_greater_than(confidence, undResult.confidence);
total_confidence_without_und += confidence;
- // Except for 'und', results should be from high to low confidence.
+ // Except for 'und', results must be from high to low confidence.
assert_greater_than_equal(last_confidence, confidence);
last_confidence = confidence;
}
- // Confidences, excluding both 'und' and the last non-'und' result, should be
- // less than 0.99.
- assert_less_than(
- total_confidence_without_und - results.at(-1).confidence, 0.99);
+ // If we have non-und results, their confidences, excluding the last non-'und'
+ // result, must be less than 0.99.
+ if (results.length > 0) {
+ assert_less_than(
+ total_confidence_without_und - results.at(-1).confidence, 0.99);
+ }
- // Confidences, including 'und', should add up to 1.
- assert_equals(total_confidence_without_und + undResult.confidence, 1);
-}, 'Simple LanguageDetector.detect() call');
+ // Confidences, including 'und', should be less than or equal to one.
+ assert_less_than_equal(
+ total_confidence_without_und + undResult.confidence, 1);
+}, 'LanguageDetector.detect() returns valid results');
promise_test(async t => {
const error = new Error('CreateMonitorCallback threw an error');