blob: b277b752c7b10fb4b1892d983d29c850c30b3d90 [file] [log] [blame]
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/services/screen_ai/screen_ai_service_impl.h"
#include <memory>
#include <utility>
#include <vector>
#include "base/check.h"
#include "base/functional/bind.h"
#include "base/location.h"
#include "base/logging.h"
#include "base/metrics/histogram_functions.h"
#include "base/process/process.h"
#include "base/task/single_thread_task_runner.h"
#include "base/task/thread_pool.h"
#include "components/services/screen_ai/proto/main_content_extractor_proto_convertor.h"
#include "components/services/screen_ai/proto/visual_annotator_proto_convertor.h"
#include "components/services/screen_ai/public/cpp/utilities.h"
#include "components/services/screen_ai/screen_ai_ax_tree_serializer.h"
#include "content/public/browser/browser_thread.h"
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/cpp/ukm_recorder.h"
#include "ui/accessibility/accessibility_features.h"
#include "ui/accessibility/ax_tree_id.h"
#include "ui/gfx/geometry/rect_f.h"
namespace screen_ai {
namespace {
// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
enum class ScreenAILoadLibraryResult {
kAllOk = 0,
kDeprecatedVisualAnnotationFailed = 1,
kMainContentExtractionFailed = 2,
kLayoutExtractionFailed = 3,
kOcrFailed = 4,
kFunctionsLoadFailed = 5,
kMaxValue = kFunctionsLoadFailed,
};
// Returns an empty result if load or initialization fail.
std::unique_ptr<ScreenAILibraryWrapper> LoadAndInitializeLibraryInternal(
base::File model_config,
base::File model_tflite,
const base::FilePath& library_path) {
DCHECK(!content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
std::unique_ptr<ScreenAILibraryWrapper> library =
std::make_unique<ScreenAILibraryWrapper>();
bool init_ok = true;
if (!library->Init(library_path)) {
init_ok = false;
base::UmaHistogramEnumeration(
"Accessibility.ScreenAI.LoadLibraryResult",
ScreenAILoadLibraryResult::kFunctionsLoadFailed);
}
if (init_ok) {
uint32_t version_major;
uint32_t version_minor;
library->GetLibraryVersion(version_major, version_minor);
VLOG(2) << "Screen AI library version: " << version_major << "."
<< version_minor;
#if BUILDFLAG(IS_CHROMEOS_ASH)
library->SetLogger();
#endif
if (features::IsScreenAIDebugModeEnabled()) {
library->EnableDebugMode();
}
}
if (init_ok && features::IsPdfOcrEnabled()) {
if (!library->InitOCR(library_path.DirName())) {
init_ok = false;
base::UmaHistogramEnumeration("Accessibility.ScreenAI.LoadLibraryResult",
ScreenAILoadLibraryResult::kOcrFailed);
}
}
if (init_ok && features::IsLayoutExtractionEnabled()) {
if (!library->InitLayoutExtraction()) {
init_ok = false;
base::UmaHistogramEnumeration(
"Accessibility.ScreenAI.LoadLibraryResult",
ScreenAILoadLibraryResult::kLayoutExtractionFailed);
}
}
if (init_ok && features::IsReadAnythingWithScreen2xEnabled()) {
if (!library->InitMainContentExtraction(model_config, model_tflite)) {
init_ok = false;
base::UmaHistogramEnumeration(
"Accessibility.ScreenAI.LoadLibraryResult",
ScreenAILoadLibraryResult::kMainContentExtractionFailed);
}
}
if (init_ok) {
base::UmaHistogramEnumeration("Accessibility.ScreenAI.LoadLibraryResult",
ScreenAILoadLibraryResult::kAllOk);
} else {
VLOG(0) << "Screen AI library initialization failed.";
library.reset();
}
return library;
}
} // namespace
ScreenAIService::ScreenAIService(
mojo::PendingReceiver<mojom::ScreenAIService> receiver)
: task_runner_(new base::DeferredSequencedTaskRunner(
base::SingleThreadTaskRunner::GetCurrentDefault())),
receiver_(this, std::move(receiver)) {}
ScreenAIService::~ScreenAIService() = default;
void ScreenAIService::LoadAndInitializeLibrary(
base::File model_config,
base::File model_tflite,
const base::FilePath& library_path,
LoadAndInitializeLibraryCallback callback) {
base::ThreadPool::PostTaskAndReplyWithResult(
FROM_HERE,
{base::MayBlock(), base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN},
base::BindOnce(&LoadAndInitializeLibraryInternal, std::move(model_config),
std::move(model_tflite), library_path),
base::BindOnce(&ScreenAIService::SetLibraryAndStartTaskRunner,
weak_ptr_factory_.GetWeakPtr(), std::move(callback)));
}
void ScreenAIService::SetLibraryAndStartTaskRunner(
LoadAndInitializeLibraryCallback success_callback,
std::unique_ptr<ScreenAILibraryWrapper> library) {
std::move(success_callback).Run((bool)library);
if (library) {
library_ = std::move(library);
task_runner_->Start();
} else {
base::Process::TerminateCurrentProcessImmediately(-1);
}
}
void ScreenAIService::BindAnnotator(
mojo::PendingReceiver<mojom::ScreenAIAnnotator> annotator) {
screen_ai_annotators_.Add(this, std::move(annotator));
}
void ScreenAIService::BindAnnotatorClient(
mojo::PendingRemote<mojom::ScreenAIAnnotatorClient> annotator_client) {
DCHECK(!screen_ai_annotator_client_.is_bound());
screen_ai_annotator_client_.Bind(std::move(annotator_client));
}
void ScreenAIService::BindMainContentExtractor(
mojo::PendingReceiver<mojom::Screen2xMainContentExtractor>
main_content_extractor) {
screen_2x_main_content_extractors_.Add(this,
std::move(main_content_extractor));
}
void ScreenAIService::ExtractSemanticLayout(
const SkBitmap& image,
const ui::AXTreeID& parent_tree_id,
ExtractSemanticLayoutCallback callback) {
DCHECK(screen_ai_annotator_client_.is_bound());
std::unique_ptr<ui::AXTreeUpdate> annotation =
std::make_unique<ui::AXTreeUpdate>();
ui::AXTreeUpdate* annotation_ptr = annotation.get();
// We need to get the pointer beforehand since compiler optimizations may
// result in binding the reply function (and moving `annotation`) before
// binding the task.
task_runner_->PostTaskAndReply(
FROM_HERE,
base::BindOnce(&ScreenAIService::VisualAnnotationInternal,
weak_ptr_factory_.GetWeakPtr(), std::move(image),
/*run_ocr=*/false, /*run_layout_extraction=*/true,
annotation_ptr),
base::BindOnce(
[](mojo::Remote<mojom::ScreenAIAnnotatorClient>* client,
const ui::AXTreeID& parent_tree_id,
ExtractSemanticLayoutCallback callback,
std::unique_ptr<ui::AXTreeUpdate> update) {
// The original caller is always replied to, and an AXTreeIDUnknown
// is sent to tell it that the annotation function was not
// successful. However the client is only contacted for successful
// runs and when we have an update.
ScreenAIAXTreeSerializer serializer(parent_tree_id,
std::move(update->nodes));
*update = serializer.Serialize();
// `ScreenAIAXTreeSerializer` should have assigned a new tree ID to
// `update`. Thereby, it should never be an unknown tree ID,
// otherwise there has been an unexpected serialization bug.
DCHECK_NE(update->tree_data.tree_id, ui::AXTreeIDUnknown())
<< "Invalid serialization.\n"
<< update->ToString();
std::move(callback).Run(update->tree_data.tree_id);
if (update->tree_data.tree_id != ui::AXTreeIDUnknown())
(*client)->HandleAXTreeUpdate(*update);
},
&screen_ai_annotator_client_, std::move(parent_tree_id),
std::move(callback), std::move(annotation)));
}
void ScreenAIService::PerformOcrAndReturnAXTreeUpdate(
const SkBitmap& image,
PerformOcrAndReturnAXTreeUpdateCallback callback) {
std::unique_ptr<ui::AXTreeUpdate> annotation =
std::make_unique<ui::AXTreeUpdate>();
ui::AXTreeUpdate* annotation_ptr = annotation.get();
// We need to get the pointer beforehand since compiler optimizations may
// result in binding the reply function (and moving `annotation`) before
// binding the task.
task_runner_->PostTaskAndReply(
FROM_HERE,
base::BindOnce(&ScreenAIService::VisualAnnotationInternal,
weak_ptr_factory_.GetWeakPtr(), std::move(image),
/*run_ocr=*/true, /*run_layout_extraction=*/false,
annotation_ptr),
base::BindOnce(
[](PerformOcrAndReturnAXTreeUpdateCallback callback,
std::unique_ptr<ui::AXTreeUpdate> update) {
// The original caller is always replied to, and an empty
// AXTreeUpdate tells that the annotation function was not
// successful.
std::move(callback).Run(*update);
// TODO(crbug.com/1434701): Send the AXTreeUpdate to the browser
// side client for Backlight.
VLOG(1) << "OCR returned " << update->nodes.size() << " nodes.";
},
std::move(callback), std::move(annotation)));
}
void ScreenAIService::VisualAnnotationInternal(const SkBitmap& image,
bool run_ocr,
bool run_layout_extraction,
ui::AXTreeUpdate* annotation) {
// Currently we only support either of OCR or LayoutExtraction features.
DCHECK_NE(run_ocr, run_layout_extraction);
chrome_screen_ai::VisualAnnotation annotation_proto;
// TODO(https://crbug.com/1278249): Consider adding a signature that
// verifies the data integrity and source.
bool result = false;
if (run_ocr) {
result = library_->PerformOcr(image, annotation_proto);
} else /* if (run_layout_extraction) */ {
result = library_->ExtractLayout(image, annotation_proto);
}
if (!result) {
DCHECK_EQ(annotation->tree_data.tree_id, ui::AXTreeIDUnknown());
VLOG(1) << "Screen AI library could not process snapshot or no OCR data.";
return;
}
gfx::Rect image_rect(image.width(), image.height());
*annotation = VisualAnnotationToAXTreeUpdate(annotation_proto, image_rect);
}
void ScreenAIService::ExtractMainContent(const ui::AXTreeUpdate& snapshot,
ukm::SourceId ukm_source_id,
ExtractMainContentCallback callback) {
std::unique_ptr<std::vector<int32_t>> content_node_ids =
std::make_unique<std::vector<int32_t>>();
std::vector<int32_t>* node_ids_ptr = content_node_ids.get();
// Ownership of |content_node_ids| is passed to the reply function, so it's
// safe to pass an unretained pointer to the task function.
task_runner_->PostTaskAndReply(
FROM_HERE,
base::BindOnce(&ScreenAIService::ExtractMainContentInternal,
weak_ptr_factory_.GetWeakPtr(), std::move(snapshot),
std::move(ukm_source_id), base::Unretained(node_ids_ptr)),
base::BindOnce(
[](ExtractMainContentCallback callback,
std::unique_ptr<std::vector<int32_t>> content_node_ids) {
std::move(callback).Run(*content_node_ids);
},
std::move(callback), std::move(content_node_ids)));
}
void ScreenAIService::ExtractMainContentInternal(
const ui::AXTreeUpdate& snapshot,
const ukm::SourceId& ukm_source_id,
std::vector<int32_t>* content_node_ids) {
DCHECK(content_node_ids);
DCHECK(content_node_ids->empty());
// Early return if input is empty.
if (snapshot.nodes.empty()) {
return;
}
std::string serialized_snapshot = SnapshotToViewHierarchy(snapshot);
base::TimeTicks start_time = base::TimeTicks::Now();
bool success =
library_->ExtractMainContent(serialized_snapshot, *content_node_ids);
base::TimeDelta elapsed_time = base::TimeTicks::Now() - start_time;
if (!success) {
VLOG(1) << "Screen2x did not return main content.";
RecordMetrics(ukm_source_id, ukm::UkmRecorder::Get(), elapsed_time,
/* success= */ false);
return;
}
VLOG(2) << "Screen2x returned " << content_node_ids->size() << " node ids.";
RecordMetrics(ukm_source_id, ukm::UkmRecorder::Get(), elapsed_time,
/* success= */ true);
}
// static
void ScreenAIService::RecordMetrics(ukm::SourceId ukm_source_id,
ukm::UkmRecorder* ukm_recorder,
base::TimeDelta elapsed_time,
bool success) {
if (success) {
base::UmaHistogramTimes(
"Accessibility.ScreenAI.Screen2xDistillationTime.Success",
elapsed_time);
if (ukm_source_id != ukm::kInvalidSourceId) {
ukm::builders::Accessibility_ScreenAI(ukm_source_id)
.SetScreen2xDistillationTime_Success(elapsed_time.InMilliseconds())
.Record(ukm_recorder);
}
} else {
base::UmaHistogramTimes(
"Accessibility.ScreenAI.Screen2xDistillationTime.Failure",
elapsed_time);
if (ukm_source_id != ukm::kInvalidSourceId) {
ukm::builders::Accessibility_ScreenAI(ukm_source_id)
.SetScreen2xDistillationTime_Failure(elapsed_time.InMilliseconds())
.Record(ukm_recorder);
}
}
}
} // namespace screen_ai