[Read Aloud] Workaround for bug where brackets would be split.

This bug causes opening punctuation to be split, so "Hello.[2]"
would be read aloud as "Hello opening square bracket" and "2."

With this change, the same segment will now be read out as "Hello"
and "2."

This is a temporary workaround- I'm investigating if it would be
feasible to get a more permanent fix into the ax_utils libraries.

Note: this appears to work in both LTR and RTL languages.

Bug: 1474951
Change-Id: Ie0ca0263b90d0bbbe3f0573055af54052f04eaa6
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5262947
Reviewed-by: Xiang Xiao <xiangxiao@google.com>
Commit-Queue: Lauren Winston <lwinston@google.com>
Cr-Commit-Position: refs/heads/main@{#1256309}
diff --git a/chrome/renderer/accessibility/read_anything_app_controller_browsertest.cc b/chrome/renderer/accessibility/read_anything_app_controller_browsertest.cc
index de2fdac..9b895dd 100644
--- a/chrome/renderer/accessibility/read_anything_app_controller_browsertest.cc
+++ b/chrome/renderer/accessibility/read_anything_app_controller_browsertest.cc
@@ -2301,6 +2301,127 @@
   EXPECT_EQ(GetCurrentTextStartIndex(next_node_ids[0]), 0);
   EXPECT_EQ(GetCurrentTextEndIndex(next_node_ids[0]), (int)sentence3.length());
 
+  // Nodes are empty at the end of the tree.
+  next_node_ids = MoveToNextGranularityAndGetText();
+  EXPECT_EQ((int)next_node_ids.size(), 0);
+}
+
+TEST_F(ReadAnythingAppControllerTest,
+       GetCurrentText_OpeningPunctuationIgnored) {
+  std::u16string sentence1 = u"And I am almost there.";
+  std::u16string sentence2 = u"[2]";
+  ui::AXTreeUpdate update;
+  SetUpdateTreeID(&update);
+  ui::AXNodeData static_text1;
+  static_text1.id = 2;
+  static_text1.role = ax::mojom::Role::kStaticText;
+  static_text1.SetNameChecked(sentence1);
+
+  ui::AXNodeData static_text2;
+  static_text2.id = 3;
+  static_text2.role = ax::mojom::Role::kStaticText;
+  static_text2.SetNameChecked(sentence2);
+
+  update.nodes = {static_text1, static_text2};
+  AccessibilityEventReceived({update});
+  OnAXTreeDistilled({static_text1.id, static_text2.id});
+  InitAXPosition(update.nodes[0].id);
+
+  std::vector<ui::AXNodeID> next_node_ids = GetCurrentText();
+  EXPECT_EQ((int)next_node_ids.size(), 1);
+
+  // The first segment was returned correctly.
+  EXPECT_EQ(next_node_ids[0], static_text1.id);
+  EXPECT_EQ(GetCurrentTextStartIndex(next_node_ids[0]), 0);
+  EXPECT_EQ(GetCurrentTextEndIndex(next_node_ids[0]), (int)sentence1.length());
+
+  next_node_ids = MoveToNextGranularityAndGetText();
+  EXPECT_EQ((int)next_node_ids.size(), 1);
+
+  // The second segment was returned correctly.
+  EXPECT_EQ(next_node_ids[0], static_text2.id);
+  EXPECT_EQ(GetCurrentTextStartIndex(next_node_ids[0]), 0);
+  EXPECT_EQ(GetCurrentTextEndIndex(next_node_ids[0]), (int)sentence2.length());
+
+  // Nodes are empty at the end of the new tree.
+  next_node_ids = MoveToNextGranularityAndGetText();
+  EXPECT_EQ((int)next_node_ids.size(), 0);
+}
+
+TEST_F(ReadAnythingAppControllerTest,
+       GetCurrentText_OpeningPunctuationIncludedWhenEntireNode) {
+  // Simulate breaking up the brackets across a link.
+  std::u16string sentence1 = u"And I am almost there.";
+  std::u16string sentence2 = u"[";
+  std::u16string sentence3 = u"2";
+  std::u16string sentence4 = u"]";
+  ui::AXTreeUpdate update;
+  ui::AXTreeID id_1 = ui::AXTreeID::CreateNewAXTreeID();
+  SetUpdateTreeID(&update, id_1);
+
+  ui::AXNodeData static_text1;
+  static_text1.id = 2;
+  static_text1.role = ax::mojom::Role::kStaticText;
+  static_text1.SetNameChecked(sentence1);
+
+  ui::AXNodeData static_text2;
+  static_text2.id = 3;
+  static_text2.role = ax::mojom::Role::kStaticText;
+  static_text2.SetNameChecked(sentence2);
+
+  ui::AXNodeData static_text3;
+  static_text3.id = 4;
+  static_text3.role = ax::mojom::Role::kStaticText;
+  static_text3.SetNameChecked(sentence3);
+
+  ui::AXNodeData static_text4;
+  static_text4.id = 12;
+  static_text4.role = ax::mojom::Role::kStaticText;
+  static_text4.SetNameChecked(sentence4);
+
+  ui::AXNodeData superscript;
+  superscript.id = 13;
+  superscript.role = ax::mojom::Role::kSuperscript;
+  superscript.child_ids = {static_text2.id, static_text3.id, static_text4.id};
+
+  ui::AXNodeData root;
+  root.id = 10;
+  root.child_ids = {static_text1.id, superscript.id};
+  update.root_id = root.id;
+
+  update.nodes = {root,         static_text1, superscript,
+                  static_text2, static_text3, static_text4};
+  OnActiveAXTreeIDChanged(id_1);
+  AccessibilityEventReceived({update});
+  OnAXTreeDistilled(id_1, {root.id, static_text1.id, superscript.id,
+                           static_text2.id, static_text3.id, static_text4.id});
+  InitAXPosition(static_text1.id);
+
+  std::vector<ui::AXNodeID> next_node_ids = GetCurrentText();
+  EXPECT_EQ((int)next_node_ids.size(), 1);
+
+  // The first segment was returned correctly.
+  EXPECT_EQ(next_node_ids[0], static_text1.id);
+  EXPECT_EQ(GetCurrentTextStartIndex(next_node_ids[0]), 0);
+  EXPECT_EQ(GetCurrentTextEndIndex(next_node_ids[0]), (int)sentence1.length());
+
+  // The next segment contains the entire superscript '[2]' with both opening
+  // and closing brackets so neither bracket is read out-of-context.
+  next_node_ids = MoveToNextGranularityAndGetText();
+  EXPECT_EQ((int)next_node_ids.size(), 3);
+
+  EXPECT_EQ(next_node_ids[0], static_text2.id);
+  EXPECT_EQ(GetCurrentTextStartIndex(next_node_ids[0]), 0);
+  EXPECT_EQ(GetCurrentTextEndIndex(next_node_ids[0]), (int)sentence2.length());
+
+  EXPECT_EQ(next_node_ids[1], static_text3.id);
+  EXPECT_EQ(GetCurrentTextStartIndex(next_node_ids[1]), 0);
+  EXPECT_EQ(GetCurrentTextEndIndex(next_node_ids[1]), (int)sentence3.length());
+
+  EXPECT_EQ(next_node_ids[2], static_text4.id);
+  EXPECT_EQ(GetCurrentTextStartIndex(next_node_ids[2]), 0);
+  EXPECT_EQ(GetCurrentTextEndIndex(next_node_ids[2]), (int)sentence4.length());
+
   // Nodes are empty at the end of the new tree.
   next_node_ids = MoveToNextGranularityAndGetText();
   EXPECT_EQ((int)next_node_ids.size(), 0);
diff --git a/chrome/renderer/accessibility/read_anything_app_model.cc b/chrome/renderer/accessibility/read_anything_app_model.cc
index b972a8e..d714f20 100644
--- a/chrome/renderer/accessibility/read_anything_app_model.cc
+++ b/chrome/renderer/accessibility/read_anything_app_model.cc
@@ -1186,6 +1186,30 @@
       // Get the index of the next sentence if we're looking at the combined
       // previous and current node text.
       int combined_sentence_index = GetNextSentence(combined_text);
+
+      bool is_opening_punctuation = false;
+      // The code that checks for accessible text boundaries sometimes
+      // incorrectly includes opening punctuation (i.e. '(', '<', etc.) as part
+      // of the prior sentence.
+      // e.g. "This is a sentence.[2]" will return a sentence boundary for
+      // "This is a sentence.[", splitting the opening and closing punctuation.
+      // When opening punctuation is split like this in Read Aloud, text will
+      // be read out for the punctuation e.g. "opening square bracket," which
+      // we want to avoid.
+      // Therefore, this is a workaround that prevents adding text from the
+      // next node to the current segment if that text is a single character
+      // and also opening punctuation. The opening punctuation will then be
+      // read out as part of the next segment. If the opening punctuation is
+      // followed by text and closing punctuation, the punctuation will not be
+      // read out directly- just the text content.
+      // TODO(crbug.com/1474951): See if it's possible to fix the code
+      // in FindAccessibleTextBoundary instead so that this workaround isn't
+      // needed.
+      if (combined_sentence_index == (int)current_text.length() + 1) {
+        char c = combined_text[combined_sentence_index - 1];
+        is_opening_punctuation = IsOpeningPunctuation(c);
+      }
+
       // If the combined_sentence_index is the same as the current_text length,
       // the new node should not be considered part of the current sentence.
       // If these values differ, add the current node's text to the list of
@@ -1203,7 +1227,8 @@
       //    The current text length is 6, and the next sentence index of
       //    "Hello. Goodbye." is still 6, so the current node's text shouldn't
       //    be added to the current sentence.
-      if ((int)current_text.length() < combined_sentence_index) {
+      if (((int)current_text.length() < combined_sentence_index) &&
+          !is_opening_punctuation) {
         anchor_node = GetNodeFromCurrentPosition();
         // Calculate the new sentence index.
         int index_in_new_node = combined_sentence_index - current_text.length();
@@ -1404,3 +1429,6 @@
   // TODO(crbug.com/1474951): Can this be updated to IsText() instead?
   return (GetHtmlTag(ax_node_id).length() == 0);
 }
+bool ReadAnythingAppModel::IsOpeningPunctuation(char c) {
+  return (c == '(' || c == '{' || c == '[' || c == '<');
+}
diff --git a/chrome/renderer/accessibility/read_anything_app_model.h b/chrome/renderer/accessibility/read_anything_app_model.h
index 86aadbf..19de79d 100644
--- a/chrome/renderer/accessibility/read_anything_app_model.h
+++ b/chrome/renderer/accessibility/read_anything_app_model.h
@@ -351,6 +351,8 @@
       ReadAnythingAppModel::ReadAloudCurrentGranularity current_granularity,
       ui::AXNodeID id);
 
+  bool IsOpeningPunctuation(char c);
+
   // State.
   // Store AXTrees of web contents in the browser's tab strip as AXTreeManagers.
   std::map<ui::AXTreeID, std::unique_ptr<ui::AXTreeManager>> tree_managers_;