LanguageDetector: Update und logic to match new spec logic The logic to calculate the results and und have been updated in the spec. This updates our implementation to match. Fixed: 413151736 Change-Id: I99ac48978104cf9e599b20880382ca6ea3a54f65 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6490186 Reviewed-by: Christine Hollingsworth <christinesm@chromium.org> Commit-Queue: Nathan Memmott <memmott@chromium.org> Cr-Commit-Position: refs/heads/main@{#1451925}

commit: a3418441f65ddb70f7733b1002415e8fc1ba72fa [log] [tgz]
author: Nathan Memmott <memmott@chromium.org> Fri Apr 25 18:03:48 2025
committer: Chromium LUCI CQ <chromium-scoped@luci-project-accounts.iam.gserviceaccount.com> Fri Apr 25 18:03:48 2025
tree: 8027f3dc47e2eef667f2b698e207694713bec549
parent: 8e5790bd2ad70c75c61103a8fb20c36c6bdc994d [diff]
diff --git a/third_party/blink/renderer/modules/ai/on_device_translation/language_detector.cc b/third_party/blink/renderer/modules/ai/on_device_translation/language_detector.cc
index 004b7a64..8b743d0 100644
--- a/third_party/blink/renderer/modules/ai/on_device_translation/language_detector.cc
+++ b/third_party/blink/renderer/modules/ai/on_device_translation/language_detector.cc

@@ -441,6 +441,16 @@
   double last_score = 1;
   double cumulative_confidence = 0;
 
+  const WTF::UncheckedIterator<LanguageDetectionModel::LanguagePrediction>&
+      unknown_iter = std::find_if(
+          predictions.begin(), predictions.end(),
+          [](const LanguageDetectionModel::LanguagePrediction& prediction) {
+            return prediction.language == "unknown";
+          });
+
+  CHECK_NE(unknown_iter, predictions.end());
+  double unknown = unknown_iter->score;
+
   HeapVector<Member<LanguageDetectionResult>> results;
   for (const auto& prediction : predictions) {
     CHECK_GE(prediction.score, 0);
@@ -448,9 +458,10 @@
     CHECK_LE(prediction.score, last_score);
     last_score = prediction.score;
 
-    if (prediction.score == 0 || prediction.language == "unknown") {
+    if (prediction.score == 0 || prediction.score < unknown) {
       break;
     }
+
     auto* result = MakeGarbageCollected<LanguageDetectionResult>();
     results.push_back(result);
     result->setDetectedLanguage(String(prediction.language));
@@ -463,12 +474,17 @@
     }
   }
 
+  CHECK_GE(1 - cumulative_confidence, unknown);
+  if (!results.empty()) {
+    CHECK_GE(results.back()->confidence(), unknown);
+  }
+
   // Append "und" to end. Set it's confidence so that the total confidences add
   // up to 1.
   auto* und_result = MakeGarbageCollected<LanguageDetectionResult>();
   results.push_back(und_result);
   und_result->setDetectedLanguage(String("und"));
-  und_result->setConfidence(1 - cumulative_confidence);
+  und_result->setConfidence(unknown);
 
   return results;
 }

diff --git a/third_party/blink/web_tests/external/wpt/ai/language_detection/detector.https.window.js b/third_party/blink/web_tests/external/wpt/ai/language_detection/detector.https.window.js
index 7c86d01..e85ea6d 100644
--- a/third_party/blink/web_tests/external/wpt/ai/language_detection/detector.https.window.js
+++ b/third_party/blink/web_tests/external/wpt/ai/language_detection/detector.https.window.js

@@ -1,6 +1,7 @@
 // META: title=Detect english
 // META: global=window
 // META: script=../resources/util.js
+// META: script=../resources/locale-util.js
 
 'use strict';
 
@@ -15,6 +16,9 @@
   const detector = await LanguageDetector.create();
   const results = await detector.detect('Hello world!');
 
+  // must at least have the 'und' result.
+  assert_greater_than_equal(results.length, 1);
+
   // The last result should be 'und'.
   const undResult = results.pop();
   assert_equals(undResult.detectedLanguage, 'und');
@@ -22,24 +26,31 @@
 
   let total_confidence_without_und = 0;
   let last_confidence = 1;
-  for (const {confidence} of results) {
+  for (const {detectedLanguage, confidence} of results) {
+    // All results must be in canonical form.
+    assert_is_canonical(detectedLanguage);
+
     assert_greater_than(confidence, 0);
+    assert_greater_than(confidence, undResult.confidence);
 
     total_confidence_without_und += confidence;
 
-    // Except for 'und', results should be from high to low confidence.
+    // Except for 'und', results must be from high to low confidence.
     assert_greater_than_equal(last_confidence, confidence);
     last_confidence = confidence;
   }
 
-  // Confidences, excluding both 'und' and the last non-'und' result, should be
-  // less than 0.99.
-  assert_less_than(
-      total_confidence_without_und - results.at(-1).confidence, 0.99);
+  // If we have non-und results, their confidences, excluding the last non-'und'
+  // result, must be less than 0.99.
+  if (results.length > 0) {
+    assert_less_than(
+        total_confidence_without_und - results.at(-1).confidence, 0.99);
+  }
 
-  // Confidences, including 'und', should add up to 1.
-  assert_equals(total_confidence_without_und + undResult.confidence, 1);
-}, 'Simple LanguageDetector.detect() call');
+  // Confidences, including 'und', should be less than or equal to one.
+  assert_less_than_equal(
+      total_confidence_without_und + undResult.confidence, 1);
+}, 'LanguageDetector.detect() returns valid results');
 
 promise_test(async t => {
   const error = new Error('CreateMonitorCallback threw an error');
commit	a3418441f65ddb70f7733b1002415e8fc1ba72fa	[log] [tgz]
author	Nathan Memmott <memmott@chromium.org>	Fri Apr 25 18:03:48 2025
committer	Chromium LUCI CQ <chromium-scoped@luci-project-accounts.iam.gserviceaccount.com>	Fri Apr 25 18:03:48 2025
tree	8027f3dc47e2eef667f2b698e207694713bec549
parent	8e5790bd2ad70c75c61103a8fb20c36c6bdc994d [diff]