Preserve vector order in RemoveDuplicatePrepopulateIDs

This CL is the union of 2 other CLs. The purpose of this CL is
to ensure one clean merge to the M73 branch. The 2 source CLs are:

----- https://crrev.com/c/1428939
Preserve vector order in RemoveDuplicatePrepopulateIDs

Template URLs with nonzero prepopulate_id were implicitly
reordered by RemoveDuplicatePrepopulateIDs, though relative
order was preserved for elements with zero prepopulate_id.
This CL adjusts the algorithm to output the selected
representatives for each prepopulate_id in order of first
appearance in the original template_urls vector, stabilizing
the overall order (instead of partitioning as it did before).

Bug: 924268
Change-Id: Ic885065cbf20398dfff723a85aff35141e0cb9a5
Reviewed-on: https://chromium-review.googlesource.com/c/1428939
Commit-Queue: Orin Jaworski <orinj@chromium.org>
Reviewed-by: Vasilii Sukhanov <vasilii@chromium.org>
Cr-Commit-Position: refs/heads/master@{#626208}(cherry picked from commit ceb5964755062a1972ad1f7c12b5aff134ce71bf)

----- https://crrev.com/c/1437703
Add includes for IWYU: follow-on for cl/1428939

This CL just adds some includes that were harmlessly forgotten
on https://crrev.com/c/1428939 .

Change-Id: Ic885065cbf20398dfff723a85aff35141e0cb9a5
Reviewed-on: https://chromium-review.googlesource.com/c/1444696
Reviewed-by: Justin Donnelly <jdonnelly@chromium.org>
Cr-Commit-Position: refs/branch-heads/3683@{#77}
Cr-Branched-From: e51029943e0a38dd794b73caaf6373d5496ae783-refs/heads/master@{#625896}
diff --git a/components/search_engines/util.cc b/components/search_engines/util.cc
index 324409b..d1371e3 100644
--- a/components/search_engines/util.cc
+++ b/components/search_engines/util.cc
@@ -8,9 +8,11 @@
 #include <stdint.h>
 
 #include <algorithm>
+#include <limits>
 #include <map>
 #include <set>
 #include <string>
+#include <unordered_map>
 #include <vector>
 
 #include "base/logging.h"
@@ -55,77 +57,100 @@
     const SearchTermsData& search_terms_data,
     std::set<std::string>* removed_keyword_guids) {
   DCHECK(template_urls);
+  TemplateURLService::OwnedTemplateURLVector checked_urls;
 
   // For convenience construct an ID->TemplateURL* map from |prepopulated_urls|.
   std::map<int, TemplateURLData*> prepopulated_url_map;
   for (const auto& url : prepopulated_urls)
     prepopulated_url_map[url->prepopulate_id] = url.get();
 
-  // Separate |template_urls| into prepopulated and non-prepopulated groups.
-  std::multimap<int, std::unique_ptr<TemplateURL>> unchecked_urls;
-  TemplateURLService::OwnedTemplateURLVector checked_urls;
+  constexpr size_t invalid_index = std::numeric_limits<size_t>::max();
+  // A helper structure for deduplicating elements with the same prepopulate_id.
+  struct DuplicationData {
+    DuplicationData() : index_representative(invalid_index) {}
+
+    // The index into checked_urls at which the best representative is stored.
+    size_t index_representative;
+
+    // Proper duplicates for consideration during selection phase.  This
+    // does not include the representative stored in checked_urls.
+    TemplateURLService::OwnedTemplateURLVector duplicates;
+  };
+  // Map from prepopulate_id to data for deduplication and selection.
+  std::unordered_map<int, DuplicationData> duplication_map;
+
+  const auto has_default_search_keyword = [&](const auto& turl) {
+    return default_search_provider &&
+           (default_search_provider->prepopulate_id() ==
+            turl->prepopulate_id()) &&
+           default_search_provider->HasSameKeywordAs(turl->data(),
+                                                     search_terms_data);
+  };
+
+  // Deduplication phase: move elements into new vector, preserving order while
+  // gathering duplicates into separate container for selection.
   for (auto& turl : *template_urls) {
-    int prepopulate_id = turl->prepopulate_id();
-    if (prepopulate_id)
-      unchecked_urls.insert(std::make_pair(prepopulate_id, std::move(turl)));
-    else
+    const int prepopulate_id = turl->prepopulate_id();
+    if (prepopulate_id) {
+      auto& duplication_data = duplication_map[prepopulate_id];
+      if (duplication_data.index_representative == invalid_index) {
+        // This is the first found.
+        duplication_data.index_representative = checked_urls.size();
+        checked_urls.push_back(std::move(turl));
+      } else {
+        // This is a duplicate.
+        duplication_data.duplicates.push_back(std::move(turl));
+      }
+    } else {
       checked_urls.push_back(std::move(turl));
+    }
   }
 
-  // For each group of prepopulated URLs with one ID, find the best URL to use
-  // and add it to the (initially all non-prepopulated) URLs we've already OKed.
-  // Delete the others from the service and from memory.
-  while (!unchecked_urls.empty()) {
-    // Find the best URL.
-    int prepopulate_id = unchecked_urls.begin()->first;
-    auto prepopulated_url = prepopulated_url_map.find(prepopulate_id);
-    auto end = unchecked_urls.upper_bound(prepopulate_id);
-    auto best = unchecked_urls.begin();
-    bool matched_keyword = false;
-    for (auto i = unchecked_urls.begin(); i != end; ++i) {
-      // If the user-selected DSE is a prepopulated engine its properties will
-      // either come from the prepopulation origin or from the user preferences
-      // file (see DefaultSearchManager). Those properties will end up
-      // overwriting whatever we load now anyway. If we are eliminating
-      // duplicates, then, we err on the side of keeping the thing that looks
-      // more like the value we will end up with in the end.
-      if (default_search_provider &&
-          (default_search_provider->prepopulate_id() ==
-              i->second->prepopulate_id()) &&
-          default_search_provider->HasSameKeywordAs(i->second->data(),
-                                                    search_terms_data)) {
-        best = i;
-        break;
-      }
+  // Selection and cleanup phase: swap out elements if necessary to ensure new
+  // vector contains only the best representative for each prepopulate_id.
+  // Then delete the remaining duplicates.
+  for (auto& id_data : duplication_map) {
+    const auto prepopulated_url = prepopulated_url_map.find(id_data.first);
+    const auto has_prepopulated_keyword = [&](const auto& turl) {
+      return (prepopulated_url != prepopulated_url_map.end()) &&
+             turl->HasSameKeywordAs(*prepopulated_url->second,
+                                    search_terms_data);
+    };
 
-      // Otherwise, a URL is best if it matches the prepopulated data's keyword;
-      // if none match, just fall back to using the one with the lowest ID.
-      if (matched_keyword)
-        continue;
-      if ((prepopulated_url != prepopulated_url_map.end()) &&
-          i->second->HasSameKeywordAs(*prepopulated_url->second,
-                                      search_terms_data)) {
-        best = i;
-        matched_keyword = true;
-      } else if (i->second->id() < best->second->id()) {
-        best = i;
+    // If the user-selected DSE is a prepopulated engine its properties will
+    // either come from the prepopulation origin or from the user preferences
+    // file (see DefaultSearchManager). Those properties will end up
+    // overwriting whatever we load now anyway. If we are eliminating
+    // duplicates, then, we err on the side of keeping the thing that looks
+    // more like the value we will end up with in the end.
+    // Otherwise, a URL is best if it matches the prepopulated data's keyword;
+    // if none match, just fall back to using the one with the lowest ID.
+    auto& best = checked_urls[id_data.second.index_representative];
+    if (!has_default_search_keyword(best)) {
+      bool matched_keyword = has_prepopulated_keyword(best);
+      for (auto& duplicate : id_data.second.duplicates) {
+        if (has_default_search_keyword(duplicate)) {
+          best.swap(duplicate);
+          break;
+        } else if (matched_keyword) {
+          continue;
+        } else if (has_prepopulated_keyword(duplicate)) {
+          best.swap(duplicate);
+          matched_keyword = true;
+        } else if (duplicate->id() < best->id()) {
+          best.swap(duplicate);
+        }
       }
     }
 
-    // Add the best URL to the checked group and delete the rest.
-    checked_urls.push_back(std::move(best->second));
-    for (auto i = unchecked_urls.begin(); i != end; ++i) {
-      if (i == best)
-        continue;
+    // Clean up what's left.
+    for (const auto& duplicate : id_data.second.duplicates) {
       if (service) {
-        service->RemoveKeyword(i->second->id());
+        service->RemoveKeyword(duplicate->id());
         if (removed_keyword_guids)
-          removed_keyword_guids->insert(i->second->sync_guid());
+          removed_keyword_guids->insert(duplicate->sync_guid());
       }
     }
-
-    // Done with this group.
-    unchecked_urls.erase(unchecked_urls.begin(), end);
   }
 
   // Return the checked URLs.
diff --git a/components/search_engines/util.h b/components/search_engines/util.h
index 39034e5..d3790c9 100644
--- a/components/search_engines/util.h
+++ b/components/search_engines/util.h
@@ -127,6 +127,9 @@
 // Sync GUID of each item removed from the DB will be added to it. This is a
 // helper used by GetSearchProvidersUsingKeywordResult(), but is declared here
 // so it's accessible by unittests.
+// The order of template_urls is preserved (except for duplicates) because it
+// affects order of presentation in settings web-ui.
+// See https://crbug.com/924268 for details.
 void RemoveDuplicatePrepopulateIDs(
     KeywordWebDataService* service,
     const std::vector<std::unique_ptr<TemplateURLData>>& prepopulated_urls,