webnn: Implement tracing for the TFLite backend
Copies the design of the trace events recently added to the Core ML
backend to the TFLite backend.
To reduce overhead the implementation of webnn::ScopedTrace no longer
requires a memory allocation to create a substep.
Change-Id: I2d62f0fcc111e9914abfba595c88ffc79d19ee59
Bug: 41486052
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6191420
Auto-Submit: Reilly Grant <reillyg@chromium.org>
Commit-Queue: Reilly Grant <reillyg@chromium.org>
Commit-Queue: Austin Sullivan <asully@chromium.org>
Reviewed-by: Austin Sullivan <asully@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1410067}
diff --git a/services/webnn/public/cpp/webnn_trace.cc b/services/webnn/public/cpp/webnn_trace.cc
index 73ca78c..61a3d2f7 100644
--- a/services/webnn/public/cpp/webnn_trace.cc
+++ b/services/webnn/public/cpp/webnn_trace.cc
@@ -5,7 +5,6 @@
#include "services/webnn/public/cpp/webnn_trace.h"
#include "base/logging.h"
-#include "base/memory/ptr_util.h"
#include "base/trace_event/trace_event.h"
#include "base/trace_event/trace_id_helper.h"
@@ -13,15 +12,19 @@
constexpr char kWebNNTraceCategory[] = "webnn";
-// Reset the |id_| so the moved `ScopedTrace` object won't end the trace
-// prematurely on destruction.
+// Reset the `id_` and `step_name_` fields so the moved `ScopedTrace` object
+// won't end the trace prematurely on destruction.
ScopedTrace::ScopedTrace(ScopedTrace&& other)
: name_(other.name_),
id_(std::exchange(other.id_, std::nullopt)),
- step_(std::move(other.step_)) {}
+ step_name_(std::exchange(other.step_name_, std::nullopt)) {}
ScopedTrace::~ScopedTrace() {
if (id_.has_value()) {
+ if (step_name_.has_value()) {
+ TRACE_EVENT_NESTABLE_ASYNC_END0(kWebNNTraceCategory, *step_name_,
+ TRACE_ID_LOCAL(id_.value()));
+ }
TRACE_EVENT_NESTABLE_ASYNC_END0(kWebNNTraceCategory, name_,
TRACE_ID_LOCAL(id_.value()));
}
@@ -31,7 +34,7 @@
if (this != &other) {
name_ = other.name_;
id_ = std::exchange(other.id_, std::nullopt);
- step_ = std::move(other.step_);
+ step_name_ = std::exchange(other.step_name_, std::nullopt);
}
return *this;
}
@@ -39,8 +42,13 @@
void ScopedTrace::AddStep(const char* step_name) {
// Calling AddStep() after move is not allowed.
CHECK(id_.has_value());
- step_.reset();
- step_ = base::WrapUnique(new ScopedTrace(step_name, id_.value()));
+ if (step_name_.has_value()) {
+ TRACE_EVENT_NESTABLE_ASYNC_END0(kWebNNTraceCategory, *step_name_,
+ TRACE_ID_LOCAL(id_.value()));
+ }
+ step_name_ = step_name;
+ TRACE_EVENT_NESTABLE_ASYNC_BEGIN0(kWebNNTraceCategory, *step_name_,
+ TRACE_ID_LOCAL(id_.value()));
}
ScopedTrace::ScopedTrace(const char* name)
diff --git a/services/webnn/public/cpp/webnn_trace.h b/services/webnn/public/cpp/webnn_trace.h
index 5686d60..2005e989 100644
--- a/services/webnn/public/cpp/webnn_trace.h
+++ b/services/webnn/public/cpp/webnn_trace.h
@@ -47,7 +47,13 @@
// 'ScopedTrace' object, and stops 'this''s destruction from ending the
// trace.
std::optional<uint64_t> id_;
- std::unique_ptr<ScopedTrace> step_;
+
+ // The step name.
+ //
+ // An `std::nullopt` means that either the trace has been transferred to
+ // another `ScopedTrace` object or there is no active sub-trace, and stops
+ // `this`'s destruction from ending the sub-trace.
+ std::optional<const char*> step_name_;
};
} // namespace webnn
diff --git a/services/webnn/tflite/graph_impl_tflite.cc b/services/webnn/tflite/graph_impl_tflite.cc
index 30df4ba..5b961922 100644
--- a/services/webnn/tflite/graph_impl_tflite.cc
+++ b/services/webnn/tflite/graph_impl_tflite.cc
@@ -19,6 +19,7 @@
#include "mojo/public/cpp/bindings/self_owned_associated_receiver.h"
#include "services/webnn/buildflags.h"
#include "services/webnn/error.h"
+#include "services/webnn/public/cpp/webnn_trace.h"
#include "services/webnn/public/mojom/webnn_context_provider.mojom.h"
#include "services/webnn/public/mojom/webnn_error.mojom.h"
#include "services/webnn/public/mojom/webnn_graph.mojom.h"
@@ -181,12 +182,13 @@
#endif
}
- void DoDispatch(base::flat_map<int, raw_ref<const BufferContent>> tensors) {
- TfLiteStatus status;
- bool needs_reallocate_tensors = false;
+ void DoDispatch(base::flat_map<int, raw_ref<const BufferContent>> tensors,
+ ScopedTrace scoped_trace) {
+ scoped_trace.AddStep("Set up intepreter");
// TODO: Detect when `tensors` hasn't changed since the last invocation and
// this step can be skipped.
+ bool needs_reallocate_tensors = false;
for (auto& [tensor_idx, buffer] : tensors) {
TfLiteTensor* tensor = interpreter_->tensor(tensor_idx);
if (tensor->allocation_type == kTfLitePersistentRo) {
@@ -196,7 +198,7 @@
}
base::span<uint8_t> data = buffer->AsSpan();
- status = interpreter_->SetCustomAllocationForTensor(
+ TfLiteStatus status = interpreter_->SetCustomAllocationForTensor(
tensor_idx, {data.data(), data.size()});
if (status != kTfLiteOk) {
LOG(ERROR) << "Unable set custom tensor allocation: "
@@ -207,7 +209,7 @@
}
if (needs_reallocate_tensors) {
- status = interpreter_->AllocateTensors();
+ TfLiteStatus status = interpreter_->AllocateTensors();
if (status != kTfLiteOk) {
LOG(ERROR) << "Unable to allocate tensors: "
<< TfLiteStatusToString(status);
@@ -215,10 +217,11 @@
}
}
+ scoped_trace.AddStep("Run inference");
#if BUILDFLAG(WEBNN_ENABLE_TFLITE_PROFILER)
profiler_.StartProfiling();
#endif
- status = interpreter_->Invoke();
+ TfLiteStatus status = interpreter_->Invoke();
#if BUILDFLAG(WEBNN_ENABLE_TFLITE_PROFILER)
profiler_.StopProfiling();
#endif
@@ -229,6 +232,7 @@
}
// Copy the outputs that weren't configured as custom allocations.
+ scoped_trace.AddStep("Process outputs");
for (int tensor_idx : interpreter_->outputs()) {
TfLiteTensor* tensor = interpreter_->tensor(tensor_idx);
if (tensor->allocation_type == kTfLitePersistentRo) {
@@ -323,6 +327,8 @@
void GraphImplTflite::DispatchImpl(
const base::flat_map<std::string_view, WebNNTensorImpl*>& named_inputs,
const base::flat_map<std::string_view, WebNNTensorImpl*>& named_outputs) {
+ ScopedTrace scoped_trace("GraphImplTflite::DispatchImpl");
+
std::vector<
std::pair<int, scoped_refptr<QueueableResourceState<BufferContent>>>>
input_buffer_states, output_buffer_states;
@@ -359,6 +365,7 @@
exclusive_resources.push_back(buffer_state);
}
+ scoped_trace.AddStep("Acquire resources");
auto task = base::MakeRefCounted<ResourceTask>(
std::move(shared_resources), std::move(exclusive_resources),
base::BindOnce(
@@ -370,7 +377,7 @@
base::flat_map<
int, scoped_refptr<QueueableResourceState<BufferContent>>>
output_buffer_states,
- base::OnceClosure completion_closure) {
+ ScopedTrace scoped_trace, base::OnceClosure completion_closure) {
ComputeResources* raw_compute_resources =
compute_resources_state->GetExclusivelyLockedResource();
@@ -388,12 +395,12 @@
// a `QueueableResourceState` corresponding to
// `raw_compute_resources` is held by the
// `ResourceTask` until `completion_closure` is run below.
- base::Unretained(raw_compute_resources),
- std::move(buffers)),
+ base::Unretained(raw_compute_resources), std::move(buffers),
+ std::move(scoped_trace)),
std::move(completion_closure));
},
compute_resources_state_, std::move(input_buffer_states),
- std::move(output_buffer_states)));
+ std::move(output_buffer_states), std::move(scoped_trace)));
task->Enqueue();
}
diff --git a/services/webnn/tflite/tensor_impl_tflite.cc b/services/webnn/tflite/tensor_impl_tflite.cc
index 95454fd..c470099 100644
--- a/services/webnn/tflite/tensor_impl_tflite.cc
+++ b/services/webnn/tflite/tensor_impl_tflite.cc
@@ -9,6 +9,7 @@
#include "base/compiler_specific.h"
#include "base/memory/ptr_util.h"
#include "base/memory/scoped_refptr.h"
+#include "services/webnn/public/cpp/webnn_trace.h"
#include "services/webnn/public/mojom/webnn_tensor.mojom.h"
#include "services/webnn/queueable_resource_state.h"
#include "services/webnn/queueable_resource_state_base.h"
@@ -59,10 +60,14 @@
void TensorImplTflite::ReadTensorImpl(ReadTensorCallback callback) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+
+ ScopedTrace scoped_trace("TensorImplTflite::ReadTensorImpl");
+
// Lock the buffer contents as shared/read-only.
std::vector<scoped_refptr<QueueableResourceStateBase>> shared_resources = {
buffer_state_};
+ scoped_trace.AddStep("Wait for tensor");
auto task = base::MakeRefCounted<ResourceTask>(
std::move(shared_resources),
/*exclusive_resources=*/
@@ -70,25 +75,32 @@
base::BindOnce(
[](scoped_refptr<QueueableResourceState<BufferContent>>
content_handle,
- ReadTensorCallback callback,
+ ReadTensorCallback callback, ScopedTrace scoped_trace,
base::OnceClosure completion_closure) {
+ scoped_trace.AddStep("Begin read");
// Memory copies are fast, avoid the overhead of posting a task
// to the thread pool and do the work synchronously.
std::move(callback).Run(
mojom::ReadTensorResult::NewBuffer(mojo_base::BigBuffer(
content_handle->GetSharedLockedResource().AsSpan())));
+
+ scoped_trace.AddStep("End read");
std::move(completion_closure).Run();
},
- buffer_state_, std::move(callback)));
+ buffer_state_, std::move(callback), std::move(scoped_trace)));
task->Enqueue();
}
void TensorImplTflite::WriteTensorImpl(mojo_base::BigBuffer src_buffer) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
+
+ ScopedTrace scoped_trace("TensorImplTflite::WriteTensorImpl");
+
// Take an exclusive lock to the buffer contents while reading.
std::vector<scoped_refptr<QueueableResourceStateBase>> exclusive_resources = {
buffer_state_};
+ scoped_trace.AddStep("Wait for tensor");
auto task = base::MakeRefCounted<ResourceTask>(
/*shared_resources=*/std::vector<
scoped_refptr<QueueableResourceStateBase>>(),
@@ -96,16 +108,19 @@
base::BindOnce(
[](scoped_refptr<QueueableResourceState<BufferContent>>
content_handle,
- mojo_base::BigBuffer src_buffer,
+ mojo_base::BigBuffer src_buffer, ScopedTrace scoped_trace,
base::OnceClosure completion_closure) {
+ scoped_trace.AddStep("Begin write");
// Memory copies are fast, avoid the overhead of posting a task to
// the thread pool and do the work synchronously.
content_handle->GetExclusivelyLockedResource()
->AsSpan()
.copy_prefix_from(src_buffer);
+
+ scoped_trace.AddStep("End write");
std::move(completion_closure).Run();
},
- buffer_state_, std::move(src_buffer)));
+ buffer_state_, std::move(src_buffer), std::move(scoped_trace)));
task->Enqueue();
}