| // Copyright 2019 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "services/image_annotation/annotator.h" |
| |
| #include <algorithm> |
| #include <tuple> |
| #include <utility> |
| |
| #include "base/base64.h" |
| #include "base/bind.h" |
| #include "base/feature_list.h" |
| #include "base/json/json_writer.h" |
| #include "base/location.h" |
| #include "base/logging.h" |
| #include "base/no_destructor.h" |
| #include "base/stl_util.h" |
| #include "base/strings/string_split.h" |
| #include "components/google/core/common/google_util.h" |
| #include "net/base/load_flags.h" |
| #include "net/base/net_errors.h" |
| #include "net/traffic_annotation/network_traffic_annotation.h" |
| #include "services/image_annotation/image_annotation_metrics.h" |
| #include "services/network/public/mojom/url_response_head.mojom.h" |
| #include "url/gurl.h" |
| |
| namespace image_annotation { |
| |
| namespace { |
| |
| constexpr size_t kImageAnnotationMaxResponseSize = 1024 * 1024; // 1MB. |
| constexpr size_t kServerLangsMaxResponseSize = 1024; // 1KB. |
| |
| // For a given source ID and requested description language, returns the unique |
| // image ID string that can be used to look up results from a server response. |
| std::string MakeImageId(const std::string& source_id, |
| const std::string& desc_lang_tag) { |
| return source_id + (desc_lang_tag.empty() ? "" : " " + desc_lang_tag); |
| } |
| |
| std::string NormalizeLanguageCode(std::string language) { |
| // Remove anything after a comma, in case we got more than one language |
| // like "de,de-DE". |
| language = language.substr(0, language.find(',')); |
| |
| // Split based on underscore or dash so that we catch both |
| // "zh_CN" and "zh-CN". |
| const std::vector<std::string> tokens = base::SplitString( |
| language, "-_", base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY); |
| |
| if (tokens.size() == 0) |
| return ""; |
| |
| // Normalize the language portion to lowercase. |
| const std::string language_only = base::ToLowerASCII(tokens[0]); |
| |
| // For every language other than "zh" (Chinese), return only the language |
| // and strip the locale. Image descriptions don't changed based on locale, |
| // but zh-CN and zh-TW use different character sets. |
| if (tokens.size() == 1 || language_only != "zh") |
| return language_only; |
| |
| // Normalize the locale to uppercase. |
| std::string locale_only = base::ToUpperASCII(tokens[1]); |
| |
| // Map several Chinese locales to the two most common ones used for |
| // Simplified and Traditional. |
| if (locale_only == "CN" || locale_only == "HANS" || locale_only == "SG") { |
| return "zh-CN"; |
| } else if (locale_only == "TW" || locale_only == "HANT" || |
| locale_only == "MO" || locale_only == "HK") { |
| return "zh-TW"; |
| } |
| |
| return "zh"; |
| } |
| |
| // The server returns separate OCR results for each region of the image; we |
| // naively concatenate these into one response string. |
| // |
| // Returns a null pointer if there is any unexpected structure to the |
| // annotations message. |
| mojom::AnnotationPtr ParseJsonOcrAnnotation(const base::Value& ocr_engine, |
| const double min_ocr_confidence) { |
| if (!ocr_engine.is_dict()) |
| return mojom::AnnotationPtr(nullptr); |
| |
| // No OCR regions is valid - it just means there is no text. |
| const base::Value* const ocr_regions = ocr_engine.FindKey("ocrRegions"); |
| if (!ocr_regions) { |
| ReportOcrAnnotation(1.0 /* confidence */, true /* empty */); |
| return mojom::Annotation::New(mojom::AnnotationType::kOcr, 1.0 /* score */, |
| std::string() /* text */); |
| } |
| |
| if (!ocr_regions->is_list()) |
| return mojom::AnnotationPtr(nullptr); |
| |
| std::string all_ocr_text; |
| int word_count = 0; |
| double word_confidence_sum = 0.0; |
| for (const base::Value& ocr_region : ocr_regions->GetList()) { |
| if (!ocr_region.is_dict()) |
| continue; |
| |
| const base::Value* const words = ocr_region.FindKey("words"); |
| if (!words || !words->is_list()) |
| continue; |
| |
| std::string region_ocr_text; |
| for (const base::Value& word : words->GetList()) { |
| if (!word.is_dict()) |
| continue; |
| |
| const base::Value* const detected_text = word.FindKey("detectedText"); |
| if (!detected_text || !detected_text->is_string()) |
| continue; |
| |
| // A confidence value of 0 or 1 is interpreted as an int and not a double. |
| const base::Value* const confidence = word.FindKey("confidenceScore"); |
| if (!confidence || (!confidence->is_double() && !confidence->is_int()) || |
| confidence->GetDouble() < 0.0 || confidence->GetDouble() > 1.0) |
| continue; |
| |
| if (confidence->GetDouble() < min_ocr_confidence) |
| continue; |
| |
| const std::string& detected_text_str = detected_text->GetString(); |
| |
| if (detected_text_str.empty()) |
| continue; |
| |
| if (!region_ocr_text.empty()) |
| region_ocr_text += " "; |
| |
| region_ocr_text += detected_text_str; |
| ++word_count; |
| word_confidence_sum += confidence->GetDouble(); |
| } |
| |
| if (!all_ocr_text.empty() && !region_ocr_text.empty()) |
| all_ocr_text += "\n"; |
| all_ocr_text += region_ocr_text; |
| } |
| |
| const double all_ocr_confidence = |
| word_count == 0 ? 1.0 : word_confidence_sum / word_count; |
| ReportOcrAnnotation(all_ocr_confidence, all_ocr_text.empty()); |
| return mojom::Annotation::New(mojom::AnnotationType::kOcr, all_ocr_confidence, |
| all_ocr_text); |
| } |
| |
| // Extracts annotations from the given description engine result into the second |
| // element of the return tuple. |
| // |
| // The first element of the return tuple will be true if the image was |
| // classified as containing adult content. |
| std::tuple<bool, std::vector<mojom::AnnotationPtr>> ParseJsonDescAnnotations( |
| const base::Value& desc_engine) { |
| static const base::NoDestructor<std::map<std::string, mojom::AnnotationType>> |
| kAnnotationTypes({{"OCR", mojom::AnnotationType::kOcr}, |
| {"CAPTION", mojom::AnnotationType::kCaption}, |
| {"LABEL", mojom::AnnotationType::kLabel}}); |
| |
| bool adult = false; |
| std::vector<mojom::AnnotationPtr> results; |
| |
| if (!desc_engine.is_dict()) |
| return {adult, std::move(results)}; |
| |
| // If there is a failure reason, log it and track whether it is due to adult |
| // content. |
| const base::Value* const failure_reason_value = |
| desc_engine.FindKey("failureReason"); |
| if (failure_reason_value && failure_reason_value->is_string()) { |
| const DescFailureReason failure_reason = |
| ParseDescFailureReason(failure_reason_value->GetString()); |
| ReportDescFailure(failure_reason); |
| adult = failure_reason == DescFailureReason::kAdult; |
| } |
| |
| const base::Value* const desc_list_dict = |
| desc_engine.FindKey("descriptionList"); |
| if (!desc_list_dict || !desc_list_dict->is_dict()) |
| return {adult, std::move(results)}; |
| |
| const base::Value* const desc_list = desc_list_dict->FindKey("descriptions"); |
| if (!desc_list || !desc_list->is_list()) |
| return {adult, std::move(results)}; |
| |
| for (const base::Value& desc : desc_list->GetList()) { |
| if (!desc.is_dict()) |
| continue; |
| |
| const base::Value* const type = desc.FindKey("type"); |
| if (!type || !type->is_string()) |
| continue; |
| |
| const auto type_lookup = kAnnotationTypes->find(type->GetString()); |
| if (type_lookup == kAnnotationTypes->end()) |
| continue; |
| |
| const base::Value* const score = desc.FindKey("score"); |
| if (!score || (!score->is_double() && !score->is_int())) |
| continue; |
| |
| const base::Value* const text = desc.FindKey("text"); |
| if (!text || !text->is_string()) |
| continue; |
| |
| ReportDescAnnotation(type_lookup->second, score->GetDouble(), |
| text->GetString().empty()); |
| |
| // For OCR, we allow empty text and unusual scores; at the time of writing, |
| // a score of -1 is always returned for OCR. |
| // |
| // For other annotation types, we do not allow these cases. |
| if (type_lookup->second != mojom::AnnotationType::kOcr && |
| (text->GetString().empty() || score->GetDouble() < 0.0 || |
| score->GetDouble() > 1.0)) |
| continue; |
| |
| results.push_back(mojom::Annotation::New( |
| type_lookup->second, score->GetDouble(), text->GetString())); |
| } |
| |
| return {adult, std::move(results)}; |
| } |
| |
| // Returns the integer status code for this engine, or -1 if no status can be |
| // extracted. |
| int ExtractStatusCode(const base::Value* const status_dict) { |
| if (!status_dict || !status_dict->is_dict()) |
| return -1; |
| |
| const base::Value* const code_value = status_dict->FindKey("code"); |
| |
| // A missing code is the same as a default (i.e. OK) code. |
| if (!code_value) |
| return 0; |
| |
| if (!code_value->is_int()) |
| return -1; |
| const int code = code_value->GetInt(); |
| |
| #ifndef NDEBUG |
| // Also log error status messages (which are helpful for debugging). |
| const base::Value* const message = status_dict->FindKey("message"); |
| if (code != 0 && message && message->is_string()) |
| DVLOG(1) << "Engine failed with status " << code << " and message '" |
| << message->GetString() << "'"; |
| #endif |
| |
| return code; |
| } |
| |
| // Attempts to extract annotation results from the server response, returning a |
| // map from each image ID to its annotations (if successfully extracted). |
| std::map<std::string, mojom::AnnotateImageResultPtr> UnpackJsonResponse( |
| const base::Value& json_data, |
| const double min_ocr_confidence) { |
| if (!json_data.is_dict()) |
| return {}; |
| |
| const base::Value* const results = json_data.FindKey("results"); |
| if (!results || !results->is_list()) |
| return {}; |
| |
| std::map<std::string, mojom::AnnotateImageResultPtr> out; |
| for (const base::Value& result : results->GetList()) { |
| if (!result.is_dict()) |
| continue; |
| |
| const base::Value* const image_id = result.FindKey("imageId"); |
| if (!image_id || !image_id->is_string()) |
| continue; |
| |
| const base::Value* const engine_results = result.FindKey("engineResults"); |
| if (!engine_results || !engine_results->is_list()) |
| continue; |
| |
| // We expect the engine result list to have exactly two results: one for OCR |
| // and one for image descriptions. However, we "robustly" handle missing |
| // engines, unknown engines (by skipping them) and repetitions (by |
| // overwriting data). |
| bool adult = false; |
| std::vector<mojom::AnnotationPtr> annotations; |
| mojom::AnnotationPtr ocr_annotation; |
| for (const base::Value& engine_result : engine_results->GetList()) { |
| if (!engine_result.is_dict()) |
| continue; |
| |
| // A non-zero status code means the following: |
| // -1: The status dict could not be parsed. We |
| // still try to parse an engine result in this |
| // case to be robust. |
| // any other non-zero value: The status dict was parsed and contains a |
| // known failure. We always report an error |
| // in this case. |
| const int status_code = |
| ExtractStatusCode(engine_result.FindKey("status")); |
| |
| const base::Value* const desc_engine = |
| engine_result.FindKey("descriptionEngine"); |
| const base::Value* const ocr_engine = engine_result.FindKey("ocrEngine"); |
| |
| if (desc_engine) { |
| // Add description annotations and update the adult image flag. |
| ReportDescStatus(status_code); |
| |
| if (status_code <= 0) { |
| std::tie(adult, annotations) = ParseJsonDescAnnotations(*desc_engine); |
| } |
| } else if (ocr_engine) { |
| // Update the specialized OCR annotations. |
| ReportOcrStatus(status_code); |
| |
| if (status_code <= 0) { |
| ocr_annotation = |
| ParseJsonOcrAnnotation(*ocr_engine, min_ocr_confidence); |
| } |
| } |
| |
| ReportEngineKnown(ocr_engine || desc_engine); |
| } |
| |
| // Remove any description OCR data (which is lower quality) if we have |
| // specialized OCR results. |
| if (!ocr_annotation.is_null()) { |
| base::EraseIf(annotations, [](const mojom::AnnotationPtr& a) { |
| return a->type == mojom::AnnotationType::kOcr; |
| }); |
| annotations.push_back(std::move(ocr_annotation)); |
| } |
| |
| if (adult) { |
| out[image_id->GetString()] = mojom::AnnotateImageResult::NewErrorCode( |
| mojom::AnnotateImageError::kAdult); |
| } else if (!annotations.empty()) { |
| out[image_id->GetString()] = |
| mojom::AnnotateImageResult::NewAnnotations(std::move(annotations)); |
| } |
| } |
| |
| return out; |
| } |
| |
| } // namespace |
| |
| constexpr char Annotator::kGoogApiKeyHeader[]; |
| |
| static_assert(Annotator::kDescMinDimension > 0, |
| "Description engine must accept images of some sizes."); |
| static_assert(Annotator::kDescMaxAspectRatio > 0.0, |
| "Description engine must accept images of some aspect ratios."); |
| |
| Annotator::ClientRequestInfo::ClientRequestInfo( |
| mojo::PendingRemote<mojom::ImageProcessor> in_image_processor, |
| AnnotateImageCallback in_callback) |
| : image_processor(std::move(in_image_processor)), |
| callback(std::move(in_callback)) {} |
| |
| Annotator::ClientRequestInfo::~ClientRequestInfo() = default; |
| |
| Annotator::ServerRequestInfo::ServerRequestInfo( |
| const std::string& in_source_id, |
| const bool in_desc_requested, |
| const std::string& in_desc_lang_tag, |
| const std::vector<uint8_t>& in_image_bytes) |
| : source_id(in_source_id), |
| desc_requested(in_desc_requested), |
| desc_lang_tag(in_desc_lang_tag), |
| image_bytes(in_image_bytes) {} |
| |
| Annotator::ServerRequestInfo& Annotator::ServerRequestInfo::operator=( |
| ServerRequestInfo&& other) = default; |
| |
| Annotator::ServerRequestInfo::~ServerRequestInfo() = default; |
| |
| Annotator::Annotator( |
| GURL pixels_server_url, |
| GURL langs_server_url, |
| std::string api_key, |
| const base::TimeDelta throttle, |
| const int batch_size, |
| const double min_ocr_confidence, |
| scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory, |
| std::unique_ptr<Client> client) |
| : client_(std::move(client)), |
| url_loader_factory_(std::move(url_loader_factory)), |
| pixels_server_url_(std::move(pixels_server_url)), |
| langs_server_url_(std::move(langs_server_url)), |
| api_key_(std::move(api_key)), |
| batch_size_(batch_size), |
| min_ocr_confidence_(min_ocr_confidence), |
| server_languages_({"de", "en", "es", "fr", "hi", "it"}) { |
| server_request_timer_ = std::make_unique<base::RepeatingTimer>( |
| FROM_HERE, throttle, |
| base::BindRepeating(&Annotator::SendRequestBatchToServer, |
| weak_factory_.GetWeakPtr())); |
| FetchServerLanguages(); |
| } |
| |
| Annotator::~Annotator() { |
| // Report any clients still connected at service shutdown. |
| for (const auto& request_info_kv : request_infos_) { |
| for (const auto& unused : request_info_kv.second) { |
| ReportClientResult(ClientResult::kShutdown); |
| ANALYZER_ALLOW_UNUSED(unused); |
| } |
| } |
| } |
| |
| void Annotator::BindReceiver(mojo::PendingReceiver<mojom::Annotator> receiver) { |
| receivers_.Add(this, std::move(receiver)); |
| } |
| |
| void Annotator::AnnotateImage( |
| const std::string& source_id, |
| const std::string& page_language, |
| mojo::PendingRemote<mojom::ImageProcessor> image_processor, |
| AnnotateImageCallback callback) { |
| // Compute the desired language for the description result, based on the |
| // page language, the accept languages, the top languages, and the |
| // server languages. |
| const std::string preferred_language = |
| ComputePreferredLanguage(page_language); |
| client_->RecordLanguageMetrics(page_language, preferred_language); |
| |
| const RequestKey request_key(source_id, preferred_language); |
| |
| // Return cached results if they exist. |
| const auto cache_lookup = cached_results_.find(request_key); |
| ReportCacheHit(cache_lookup != cached_results_.end()); |
| if (cache_lookup != cached_results_.end()) { |
| std::move(callback).Run(cache_lookup->second.Clone()); |
| return; |
| } |
| |
| // Register the ImageProcessor and callback to be used for this request. |
| std::list<ClientRequestInfo>& request_info_list = request_infos_[request_key]; |
| request_info_list.emplace_back(std::move(image_processor), |
| std::move(callback)); |
| |
| // If the image processor dies: automatically delete the request info and |
| // reassign local processing (for other interested clients) if the dead image |
| // processor was responsible for some ongoing work. |
| request_info_list.back().image_processor.set_disconnect_handler( |
| base::BindOnce(&Annotator::RemoveRequestInfo, weak_factory_.GetWeakPtr(), |
| request_key, --request_info_list.end(), |
| true /* canceled */)); |
| |
| // Don't start local work if it would duplicate some already-ongoing work. |
| if (base::Contains(local_processors_, request_key) || |
| base::Contains(pending_requests_, request_key)) |
| return; |
| |
| local_processors_.insert( |
| {request_key, &request_info_list.back().image_processor}); |
| |
| // TODO(crbug.com/916420): first query the public result cache by URL to |
| // improve latency. |
| |
| request_info_list.back().image_processor->GetJpgImageData(base::BindOnce( |
| &Annotator::OnJpgImageDataReceived, weak_factory_.GetWeakPtr(), |
| request_key, --request_info_list.end())); |
| } |
| |
| // static |
| bool Annotator::IsWithinDescPolicy(const int32_t width, const int32_t height) { |
| if (width < kDescMinDimension || height < kDescMinDimension) |
| return false; |
| |
| // Can't be 0 or inf because |kDescMinDimension| is guaranteed positive (via a |
| // static_assert). |
| const double aspect_ratio = static_cast<double>(width) / height; |
| if (aspect_ratio < 1.0 / kDescMaxAspectRatio || |
| aspect_ratio > kDescMaxAspectRatio) |
| return false; |
| |
| return true; |
| } |
| |
| // static |
| std::string Annotator::FormatJsonRequest( |
| const std::deque<ServerRequestInfo>::iterator begin, |
| const std::deque<ServerRequestInfo>::iterator end) { |
| base::Value image_request_list(base::Value::Type::LIST); |
| for (std::deque<ServerRequestInfo>::iterator it = begin; it != end; ++it) { |
| // Re-encode image bytes into base64, which can be represented in JSON. |
| std::string base64_data; |
| Base64Encode( |
| base::StringPiece(reinterpret_cast<const char*>(it->image_bytes.data()), |
| it->image_bytes.size()), |
| &base64_data); |
| |
| // TODO(crbug.com/916420): accept and propagate page language info to |
| // improve OCR accuracy. |
| base::Value ocr_engine_params(base::Value::Type::DICTIONARY); |
| ocr_engine_params.SetKey("ocrParameters", |
| base::Value(base::Value::Type::DICTIONARY)); |
| |
| base::Value engine_params_list(base::Value::Type::LIST); |
| engine_params_list.Append(std::move(ocr_engine_params)); |
| |
| // Also add a description annotations request if the image is within model |
| // policy. |
| if (it->desc_requested) { |
| base::Value desc_params(base::Value::Type::DICTIONARY); |
| |
| // Add preferred description language if it has been specified. |
| if (!it->desc_lang_tag.empty()) { |
| base::Value desc_lang_list(base::Value::Type::LIST); |
| desc_lang_list.Append(base::Value(it->desc_lang_tag)); |
| |
| desc_params.SetKey("preferredLanguages", std::move(desc_lang_list)); |
| } |
| |
| base::Value engine_params(base::Value::Type::DICTIONARY); |
| engine_params.SetKey("descriptionParameters", std::move(desc_params)); |
| |
| engine_params_list.Append(std::move(engine_params)); |
| } |
| ReportImageRequestIncludesDesc(it->desc_requested); |
| |
| base::Value image_request(base::Value::Type::DICTIONARY); |
| image_request.SetKey( |
| "imageId", base::Value(MakeImageId(it->source_id, it->desc_lang_tag))); |
| image_request.SetKey("imageBytes", base::Value(std::move(base64_data))); |
| image_request.SetKey("engineParameters", std::move(engine_params_list)); |
| |
| image_request_list.Append(std::move(image_request)); |
| } |
| |
| base::Value request(base::Value::Type::DICTIONARY); |
| request.SetKey("imageRequests", std::move(image_request_list)); |
| |
| std::string json_request; |
| base::JSONWriter::Write(request, &json_request); |
| |
| ReportServerRequestSizeKB(json_request.size() / 1024); |
| |
| return json_request; |
| } |
| |
| // static |
| std::unique_ptr<network::SimpleURLLoader> Annotator::MakeRequestLoader( |
| const GURL& server_url, |
| const std::string& api_key) { |
| auto resource_request = std::make_unique<network::ResourceRequest>(); |
| resource_request->method = "POST"; |
| |
| resource_request->url = server_url; |
| |
| resource_request->credentials_mode = network::mojom::CredentialsMode::kOmit; |
| |
| // Put API key in request's header if a key exists, and the endpoint is |
| // trusted by Google. |
| if (!api_key.empty() && server_url.SchemeIs(url::kHttpsScheme) && |
| google_util::IsGoogleAssociatedDomainUrl(server_url)) { |
| resource_request->headers.SetHeader(kGoogApiKeyHeader, api_key); |
| } |
| |
| const net::NetworkTrafficAnnotationTag traffic_annotation = |
| net::DefineNetworkTrafficAnnotation("image_annotation", R"( |
| semantics { |
| sender: "Get Image Descriptions from Google" |
| description: |
| "Chrome can provide image labels (which include detected objects, " |
| "extracted text and generated captions) to screen readers (for " |
| "visually-impaired users) by sending images to Google's servers. " |
| "If image labeling is enabled for a page, Chrome will send the " |
| "URLs and pixels of all images on the page to Google's servers, " |
| "which will return labels for content identified inside the " |
| "images. This content is made accessible to screen reading " |
| "software. Chrome fetches the list of supported languages from " |
| "the servers and uses that to determine what language to request " |
| "descriptions in." |
| trigger: "A page containing images is loaded for a user who has " |
| "automatic image labeling enabled. At most once per day, " |
| "Chrome fetches the list of supported languages as a " |
| "separate network request." |
| data: "Image pixels and URLs. No user identifier is sent along with " |
| "the data." |
| destination: GOOGLE_OWNED_SERVICE |
| } |
| policy { |
| cookies_allowed: NO |
| setting: |
| "You can enable or disable this feature via the context menu " |
| "for images, or via 'Get Image Descriptions' in Chrome's " |
| "settings under Accessibility. This feature is disabled by default." |
| chrome_policy { |
| AccessibilityImageLabelsEnabled { |
| AccessibilityImageLabelsEnabled: false |
| } |
| } |
| })"); |
| |
| return network::SimpleURLLoader::Create(std::move(resource_request), |
| traffic_annotation); |
| } |
| |
| void Annotator::OnJpgImageDataReceived( |
| const RequestKey& request_key, |
| const std::list<ClientRequestInfo>::iterator request_info_it, |
| const std::vector<uint8_t>& image_bytes, |
| const int32_t width, |
| const int32_t height) { |
| const std::string& source_id = request_key.first; |
| const std::string& request_language = request_key.second; |
| |
| ReportPixelFetchSuccess(!image_bytes.empty()); |
| |
| // Failed to retrieve bytes from local processor; remove dead processor and |
| // reschedule processing. |
| if (image_bytes.empty()) { |
| RemoveRequestInfo(request_key, request_info_it, false /* canceled */); |
| return; |
| } |
| |
| // Local processing is no longer ongoing. |
| local_processors_.erase(request_key); |
| |
| // Schedule a server request for this image. |
| server_request_queue_.emplace_front(source_id, |
| IsWithinDescPolicy(width, height), |
| request_language, image_bytes); |
| pending_requests_.insert(request_key); |
| |
| // Start sending batches to the server. |
| if (!server_request_timer_->IsRunning()) |
| server_request_timer_->Reset(); |
| } |
| |
| void Annotator::SendRequestBatchToServer() { |
| if (server_request_queue_.empty()) { |
| server_request_timer_->Stop(); |
| return; |
| } |
| |
| // Take last n elements (or all elements if there are less than n). |
| const auto begin = |
| server_request_queue_.end() - |
| std::min<size_t>(server_request_queue_.size(), batch_size_); |
| const auto end = server_request_queue_.end(); |
| |
| // The set of (source ID, desc lang) pairs relevant for this request. |
| std::set<RequestKey> request_keys; |
| for (std::deque<ServerRequestInfo>::iterator it = begin; it != end; it++) { |
| request_keys.insert({it->source_id, it->desc_lang_tag}); |
| } |
| |
| // Kick off server communication. |
| std::unique_ptr<network::SimpleURLLoader> url_loader = |
| MakeRequestLoader(pixels_server_url_, api_key_); |
| url_loader->AttachStringForUpload(FormatJsonRequest(begin, end), |
| "application/json"); |
| ongoing_server_requests_.push_back(std::move(url_loader)); |
| ongoing_server_requests_.back()->DownloadToString( |
| url_loader_factory_.get(), |
| base::BindOnce(&Annotator::OnServerResponseReceived, |
| weak_factory_.GetWeakPtr(), request_keys, |
| --ongoing_server_requests_.end()), |
| kImageAnnotationMaxResponseSize); |
| |
| server_request_queue_.erase(begin, end); |
| } |
| |
| void Annotator::OnServerResponseReceived( |
| const std::set<RequestKey>& request_keys, |
| const UrlLoaderList::iterator server_request_it, |
| const std::unique_ptr<std::string> json_response) { |
| ReportServerNetError(server_request_it->get()->NetError()); |
| |
| if (const network::mojom::URLResponseHead* const response_info = |
| server_request_it->get()->ResponseInfo()) { |
| ReportServerResponseCode(response_info->headers->response_code()); |
| ReportServerLatency(response_info->response_time - |
| response_info->request_time); |
| } |
| |
| ongoing_server_requests_.erase(server_request_it); |
| |
| if (!json_response) { |
| DVLOG(1) << "HTTP request to image annotation server failed."; |
| ProcessResults(request_keys, {}); |
| return; |
| } |
| |
| ReportServerResponseSizeBytes(json_response->size()); |
| |
| // Send JSON string to a dedicated service for safe parsing. |
| GetJsonParser()->Parse( |
| *json_response, base::BindOnce(&Annotator::OnResponseJsonParsed, |
| weak_factory_.GetWeakPtr(), request_keys)); |
| } |
| |
| void Annotator::OnResponseJsonParsed( |
| const std::set<RequestKey>& request_keys, |
| const base::Optional<base::Value> json_data, |
| const base::Optional<std::string>& error) { |
| const bool success = json_data.has_value() && !error.has_value(); |
| ReportJsonParseSuccess(success); |
| |
| // Extract annotation results for each request key with valid results. |
| if (success) { |
| ProcessResults(request_keys, |
| UnpackJsonResponse(*json_data, min_ocr_confidence_)); |
| } else { |
| DVLOG(1) << "Parsing server response JSON failed with error: " |
| << error.value_or("No reason reported."); |
| ProcessResults(request_keys, {}); |
| } |
| } |
| |
| void Annotator::ProcessResults( |
| const std::set<RequestKey>& request_keys, |
| const std::map<std::string, mojom::AnnotateImageResultPtr>& results) { |
| // Process each request key for which we expect to have results. |
| for (const RequestKey& request_key : request_keys) { |
| pending_requests_.erase(request_key); |
| |
| // The lookup will be successful if there is a valid result (i.e. not an |
| // error and not a malformed result) for this (source ID, desc lang) pair. |
| const auto result_lookup = |
| results.find(MakeImageId(request_key.first, request_key.second)); |
| |
| // Populate the result struct for this image and copy it into the cache if |
| // necessary. |
| if (result_lookup != results.end()) |
| cached_results_.insert( |
| std::make_pair(request_key, result_lookup->second.Clone())); |
| |
| // This should not happen, since only this method removes entries of |
| // |request_infos_|, and this method should only execute once per request |
| // key. |
| const auto request_info_it = request_infos_.find(request_key); |
| if (request_info_it == request_infos_.end()) { |
| LOG(ERROR) << "Could not find request key in request_infos_: " |
| << request_key.first << "," << request_key.second; |
| continue; |
| } |
| |
| const auto image_result = result_lookup != results.end() |
| ? result_lookup->second.Clone() |
| : mojom::AnnotateImageResult::NewErrorCode( |
| mojom::AnnotateImageError::kFailure); |
| const auto client_result = result_lookup != results.end() |
| ? ClientResult::kSucceeded |
| : ClientResult::kFailed; |
| |
| // Notify clients of success or failure. |
| // TODO(crbug.com/916420): explore server retry strategies. |
| for (auto& info : request_info_it->second) { |
| std::move(info.callback).Run(image_result.Clone()); |
| ReportClientResult(client_result); |
| } |
| request_infos_.erase(request_info_it); |
| } |
| } |
| |
| data_decoder::mojom::JsonParser* Annotator::GetJsonParser() { |
| if (!json_parser_) { |
| client_->BindJsonParser(json_parser_.BindNewPipeAndPassReceiver()); |
| json_parser_.reset_on_disconnect(); |
| } |
| |
| return json_parser_.get(); |
| } |
| |
| void Annotator::RemoveRequestInfo( |
| const RequestKey& request_key, |
| const std::list<ClientRequestInfo>::iterator request_info_it, |
| const bool canceled) { |
| // Check whether we are deleting the ImageProcessor responsible for current |
| // local processing. |
| auto local_processor_lookup = local_processors_.find(request_key); |
| const bool should_reassign = |
| local_processor_lookup != local_processors_.end() && |
| local_processor_lookup->second == &request_info_it->image_processor; |
| |
| // Notify client of cancellation / failure. |
| ReportClientResult(canceled ? ClientResult::kCanceled |
| : ClientResult::kFailed); |
| std::move(request_info_it->callback) |
| .Run(mojom::AnnotateImageResult::NewErrorCode( |
| canceled ? mojom::AnnotateImageError::kCanceled |
| : mojom::AnnotateImageError::kFailure)); |
| |
| // Delete the specified ImageProcessor. |
| std::list<ClientRequestInfo>& request_info_list = request_infos_[request_key]; |
| request_info_list.erase(request_info_it); |
| |
| // If necessary, reassign local processing. |
| if (should_reassign) { |
| if (request_info_list.empty()) { |
| local_processors_.erase(local_processor_lookup); |
| } else { |
| local_processor_lookup->second = |
| &request_info_list.front().image_processor; |
| |
| request_info_list.front().image_processor->GetJpgImageData(base::BindOnce( |
| &Annotator::OnJpgImageDataReceived, weak_factory_.GetWeakPtr(), |
| request_key, request_info_list.begin())); |
| } |
| } |
| } |
| |
| std::string Annotator::ComputePreferredLanguage( |
| const std::string& in_page_language) const { |
| DCHECK(!server_languages_.empty()); |
| if (in_page_language.empty()) |
| return ""; |
| |
| std::string page_language = NormalizeLanguageCode(in_page_language); |
| std::vector<std::string> accept_languages = client_->GetAcceptLanguages(); |
| std::transform(accept_languages.begin(), accept_languages.end(), |
| accept_languages.begin(), NormalizeLanguageCode); |
| std::vector<std::string> top_languages = client_->GetTopLanguages(); |
| std::transform(top_languages.begin(), top_languages.end(), |
| top_languages.begin(), NormalizeLanguageCode); |
| |
| // If the page language is a server language and it's in the list of accept |
| // languages or top languages for this user, return that. |
| if (base::Contains(server_languages_, page_language) && |
| (base::Contains(accept_languages, page_language) || |
| base::Contains(top_languages, page_language))) { |
| return page_language; |
| } |
| |
| // Otherwise, ignore the page language and compute the best language |
| // for this user. The accept languages are the ones the user can |
| // explicitly choose, so pick the first accept language that's a |
| // top language and a server language. |
| if (!top_languages.empty()) { |
| for (const std::string& accept_language : accept_languages) { |
| if (base::Contains(server_languages_, accept_language) && |
| base::Contains(top_languages, accept_language)) { |
| return accept_language; |
| } |
| } |
| } |
| |
| // Sometimes the top languages are empty. Try any accept language that's |
| // a server language. |
| for (const std::string& accept_language : accept_languages) { |
| if (base::Contains(server_languages_, accept_language)) |
| return accept_language; |
| } |
| |
| // If that still fails, try any top language that's a server language. |
| for (const std::string& top_language : top_languages) { |
| if (base::Contains(server_languages_, top_language)) |
| return top_language; |
| } |
| |
| // If all else fails, return the first accept language. The server can |
| // still do OCR and it can log this language request. |
| if (!accept_languages.empty()) |
| return accept_languages[0]; |
| |
| // If that fails, return the page language. The server can |
| // still do OCR and it can log this language request. |
| return page_language; |
| } |
| |
| void Annotator::FetchServerLanguages() { |
| if (langs_server_url_.is_empty()) |
| return; |
| |
| langs_url_loader_ = MakeRequestLoader(langs_server_url_, api_key_); |
| langs_url_loader_->AttachStringForUpload("", "application/json"); |
| langs_url_loader_->DownloadToString( |
| url_loader_factory_.get(), |
| base::BindOnce(&Annotator::OnServerLangsResponseReceived, |
| weak_factory_.GetWeakPtr()), |
| kServerLangsMaxResponseSize); |
| } |
| |
| void Annotator::OnServerLangsResponseReceived( |
| const std::unique_ptr<std::string> json_response) { |
| if (!json_response) { |
| DVLOG(1) << "Failed to get languages from the server."; |
| return; |
| } |
| |
| GetJsonParser()->Parse( |
| *json_response, |
| base::BindOnce(&Annotator::OnServerLangsResponseJsonParsed, |
| weak_factory_.GetWeakPtr())); |
| } |
| |
| void Annotator::OnServerLangsResponseJsonParsed( |
| base::Optional<base::Value> json_data, |
| const base::Optional<std::string>& error) { |
| if (!json_data.has_value() || error.has_value()) { |
| DVLOG(1) << "Parsing server langs response JSON failed with error: " |
| << error.value_or("No reason reported."); |
| return; |
| } |
| |
| const base::Value* const langs = json_data->FindKey("langs"); |
| if (!langs || !langs->is_list()) { |
| DVLOG(1) << "No langs in response JSON"; |
| return; |
| } |
| |
| std::vector<std::string> new_server_languages; |
| for (const base::Value& lang : langs->GetList()) { |
| if (!lang.is_string()) { |
| DVLOG(1) << "Lang in response JSON is not a string"; |
| return; |
| } |
| new_server_languages.push_back(lang.GetString()); |
| } |
| |
| if (!base::Contains(new_server_languages, "en")) { |
| DVLOG(1) << "Server langs don't even include 'en', rejecting"; |
| return; |
| } |
| |
| // Only swap in the new languages at the end, if all of the other |
| // checks passed. |
| server_languages_.swap(new_server_languages); |
| } |
| |
| } // namespace image_annotation |