delegate/sample/async_kernel.cc - chromiumos/platform/tflite - Git at Google

 /*
  * Copyright 2024 The ChromiumOS Authors
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #include "delegate/sample/async_kernel.h"

 #include <algorithm>
 #include <vector>

 #include "absl/cleanup/cleanup.h"
 #include "absl/synchronization/mutex.h"
 #include "common/log.h"
 #include "tensorflow/lite/core/async/c/task.h"
 #include "tensorflow/lite/core/c/c_api_opaque.h"
 #include "tensorflow/lite/delegates/utils/async_type_helpers.h"

 namespace tflite::cros {

 namespace {

 using delegates::utils::ReadBufferAttrs;
 using delegates::utils::ReadSyncAttrs;
 using delegates::utils::ScopedTfLiteAttrMap;
 using delegates::utils::SyncType;
 using delegates::utils::WriteBufferAttrs;

 }  // namespace

 CrosSampleDelegateAsyncKernel::CrosSampleDelegateAsyncKernel(
     CrosSampleDelegateCore* core)
     : core_(*core) {}

 TfLiteStatus CrosSampleDelegateAsyncKernel::RegisterBuffer(
     TfLiteOpaqueContext* context,
     TfLiteIoType io_type,
     const TfLiteBackendBuffer* buffer,
     const TfLiteAttributeMap* attrs,
     TfLiteBufferHandle handle) {
   absl::MutexLock lock(&mutex_);
   auto ahwb = static_cast<AHardwareBuffer*>(TfLiteBackendBufferGetPtr(buffer));
   if (ahwb == nullptr) {
     LOGF(ERROR) << "Got null AHardwareBuffer";
     return kTfLiteError;
   }
   AHardwareBuffer_acquire(ahwb);
   registered_buffers_.emplace(handle, ahwb);
   return kTfLiteOk;
 }

 TfLiteStatus CrosSampleDelegateAsyncKernel::RegisterBufferSlice(
     TfLiteOpaqueContext* context,
     TfLiteBufferHandle buffer_pool,
     const TfLiteAttributeMap* attrs,
     TfLiteBufferHandle handle) {
   // TODO(shik): Not supported yet.
   return kTfLiteError;
 }

 TfLiteStatus CrosSampleDelegateAsyncKernel::UnregisterBuffer(
     TfLiteOpaqueContext* context,
     TfLiteBufferHandle handle) {
   absl::MutexLock lock(&mutex_);
   auto it = registered_buffers_.find(handle);
   if (it == registered_buffers_.end()) {
     LOGF(ERROR) << "Unknown handle " << handle;
     return kTfLiteError;
   }
   AHardwareBuffer_release(it->second);
   registered_buffers_.erase(it);
   return kTfLiteOk;
 }

 const std::vector<const char*>&
 CrosSampleDelegateAsyncKernel::SupportedBufferTypes(
     TfLiteIoType io_type) const {
   return supported_buffer_types_;
 }

 const std::vector<const char*>&
 CrosSampleDelegateAsyncKernel::SupportedSynchronizations(
     TfLiteIoType io_type) const {
   return supported_sync_types_;
 }

 bool CrosSampleDelegateAsyncKernel::ReconcileRestrictions(
     const TfLiteOpaqueContext* context,
     const TfLiteOpaqueNode* node,
     int tensor_index,
     const TfLiteAttributeMap* user_provided_attributes,
     TfLiteAttributeMap* merged,
     TfLiteAttributeMap* conflict) const {
   if (TfLiteAttributeMapIsBufferAttributeMap(user_provided_attributes)) {
     auto attrs = ReadBufferAttrs(user_provided_attributes);
     size_t tensor_byte_size = TfLiteOpaqueTensorByteSize(
         TfLiteOpaqueContextGetOpaqueTensor(context, tensor_index));
     attrs.size = std::max(attrs.size.value_or(0), tensor_byte_size);
     WriteBufferAttrs(attrs, merged);
   } else if (TfLiteAttributeMapIsSyncAttributeMap(user_provided_attributes)) {
     auto attrs = ReadSyncAttrs(user_provided_attributes);
     if (attrs.sync_type.value_or(SyncType::kNoSyncObj) !=
         SyncType::kNoSyncObj) {
       return false;
     }
   } else {
     return false;
   }
   return true;
 }

 TfLiteStatus CrosSampleDelegateAsyncKernel::SetAttributes(
     TfLiteOpaqueContext* context,
     TfLiteOpaqueNode* node,
     int tensor_index,
     const TfLiteAttributeMap* attrs) {
   // No-op for now. We need to store sync information once we support fence.
   return kTfLiteOk;
 }

 TfLiteStatus CrosSampleDelegateAsyncKernel::SetBufferAttributes(
     const TfLiteBackendBuffer* buffer,
     const TfLiteAttributeMap* attrs) {
   // TODO(b/348328994): Implement this. This is a no-op function for now to
   // unblock TensorFlow uprev.
   return kTfLiteDelegateError;
 }

 TfLiteStatus CrosSampleDelegateAsyncKernel::GetBufferAttributes(
     const TfLiteBackendBuffer* buffer,
     TfLiteAttributeMap* attrs) {
   // TODO(b/348328994): Implement this. This is a no-op function for now to
   // unblock TensorFlow uprev.
   return kTfLiteDelegateError;
 }

 TfLiteStatus CrosSampleDelegateAsyncKernel::Prepare(
     TfLiteOpaqueContext* context,
     TfLiteOpaqueNode* node) {
   absl::MutexLock lock(&mutex_);
   return core_.Prepare();
 }

 TfLiteStatus CrosSampleDelegateAsyncKernel::Eval(TfLiteOpaqueContext* context,
                                                  TfLiteOpaqueNode* node,
                                                  TfLiteExecutionTask* task) {
   absl::MutexLock lock(&mutex_);

   std::vector<AHardwareBuffer*> locked_buffers;
   absl::Cleanup unlock_buffers = [&] {
     for (auto* buffer : locked_buffers) {
       AHardwareBuffer_unlock(buffer, nullptr);
     }
   };

   int num_inputs = 0;
   const int* inputs = nullptr;
   if (TfLiteOpaqueNodeInputs(node, &inputs, &num_inputs) != kTfLiteOk) {
     return kTfLiteError;
   }
   for (int i = 0; i < num_inputs; ++i) {
     auto tensor = TfLiteOpaqueNodeGetInput(context, node, i);
     TfLiteBufferHandle handle =
         TfLiteExecutionTaskGetBufferByIndex(task, inputs[i]);
     AHardwareBuffer* buffer = registered_buffers_.at(handle);

     void* addr = nullptr;
     if (AHardwareBuffer_lock(buffer,
                              AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
                                  AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN,
                              /*fence=*/-1, /*rect=*/nullptr, &addr) != 0) {
       LOGF(ERROR) << "Failed to lock AHardwareBuffer " << buffer;
       return kTfLiteError;
     }
     locked_buffers.push_back(buffer);

     core_.SetExternalTensorMemory(tensor, addr);
   }

   int num_outputs = 0;
   const int* outputs = nullptr;
   if (TfLiteOpaqueNodeOutputs(node, &outputs, &num_outputs) != kTfLiteOk) {
     return kTfLiteError;
   }
   for (int i = 0; i < num_outputs; ++i) {
     auto tensor = TfLiteOpaqueNodeGetOutput(context, node, i);
     TfLiteBufferHandle handle =
         TfLiteExecutionTaskGetBufferByIndex(task, outputs[i]);
     AHardwareBuffer* buffer = registered_buffers_.at(handle);

     void* addr = nullptr;
     if (AHardwareBuffer_lock(buffer,
                              AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
                                  AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN,
                              /*fence=*/-1, /*rect=*/nullptr, &addr) != 0) {
       LOGF(ERROR) << "Failed to lock AHardwareBuffer " << buffer;
       return kTfLiteError;
     }
     locked_buffers.push_back(buffer);

     core_.SetExternalTensorMemory(tensor, addr);
   }

   TfLiteStatus status = core_.Eval();
   task_status_map_.insert_or_assign(task, status);
   return status;
 }

 TfLiteStatus CrosSampleDelegateAsyncKernel::Wait(TfLiteOpaqueContext* context,
                                                  TfLiteExecutionTask* task) {
   absl::MutexLock lock(&mutex_);
   auto it = task_status_map_.find(task);
   if (it == task_status_map_.end()) {
     LOGF(ERROR) << "Unknown task " << task;
     return kTfLiteError;
   }
   return it->second;
 }

 TfLiteStatus CrosSampleDelegateAsyncKernel::Finish(TfLiteOpaqueContext* context,
                                                    TfLiteExecutionTask* task) {
   absl::MutexLock lock(&mutex_);
   size_t erased = task_status_map_.erase(task);
   return erased == 1 ? kTfLiteOk : kTfLiteError;
 }

 }  // namespace tflite::cros
	/*
	* Copyright 2024 The ChromiumOS Authors
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/

	#include "delegate/sample/async_kernel.h"

	#include <algorithm>
	#include <vector>

	#include "absl/cleanup/cleanup.h"
	#include "absl/synchronization/mutex.h"
	#include "common/log.h"
	#include "tensorflow/lite/core/async/c/task.h"
	#include "tensorflow/lite/core/c/c_api_opaque.h"
	#include "tensorflow/lite/delegates/utils/async_type_helpers.h"

	namespace tflite::cros {

	namespace {

	using delegates::utils::ReadBufferAttrs;
	using delegates::utils::ReadSyncAttrs;
	using delegates::utils::ScopedTfLiteAttrMap;
	using delegates::utils::SyncType;
	using delegates::utils::WriteBufferAttrs;

	} // namespace

	CrosSampleDelegateAsyncKernel::CrosSampleDelegateAsyncKernel(
	CrosSampleDelegateCore* core)
	: core_(*core) {}

	TfLiteStatus CrosSampleDelegateAsyncKernel::RegisterBuffer(
	TfLiteOpaqueContext* context,
	TfLiteIoType io_type,
	const TfLiteBackendBuffer* buffer,
	const TfLiteAttributeMap* attrs,
	TfLiteBufferHandle handle) {
	absl::MutexLock lock(&mutex_);
	auto ahwb = static_cast<AHardwareBuffer*>(TfLiteBackendBufferGetPtr(buffer));
	if (ahwb == nullptr) {
	LOGF(ERROR) << "Got null AHardwareBuffer";
	return kTfLiteError;
	}
	AHardwareBuffer_acquire(ahwb);
	registered_buffers_.emplace(handle, ahwb);
	return kTfLiteOk;
	}

	TfLiteStatus CrosSampleDelegateAsyncKernel::RegisterBufferSlice(
	TfLiteOpaqueContext* context,
	TfLiteBufferHandle buffer_pool,
	const TfLiteAttributeMap* attrs,
	TfLiteBufferHandle handle) {
	// TODO(shik): Not supported yet.
	return kTfLiteError;
	}

	TfLiteStatus CrosSampleDelegateAsyncKernel::UnregisterBuffer(
	TfLiteOpaqueContext* context,
	TfLiteBufferHandle handle) {
	absl::MutexLock lock(&mutex_);
	auto it = registered_buffers_.find(handle);
	if (it == registered_buffers_.end()) {
	LOGF(ERROR) << "Unknown handle " << handle;
	return kTfLiteError;
	}
	AHardwareBuffer_release(it->second);
	registered_buffers_.erase(it);
	return kTfLiteOk;
	}

	const std::vector<const char*>&
	CrosSampleDelegateAsyncKernel::SupportedBufferTypes(
	TfLiteIoType io_type) const {
	return supported_buffer_types_;
	}

	const std::vector<const char*>&
	CrosSampleDelegateAsyncKernel::SupportedSynchronizations(
	TfLiteIoType io_type) const {
	return supported_sync_types_;
	}

	bool CrosSampleDelegateAsyncKernel::ReconcileRestrictions(
	const TfLiteOpaqueContext* context,
	const TfLiteOpaqueNode* node,
	int tensor_index,
	const TfLiteAttributeMap* user_provided_attributes,
	TfLiteAttributeMap* merged,
	TfLiteAttributeMap* conflict) const {
	if (TfLiteAttributeMapIsBufferAttributeMap(user_provided_attributes)) {
	auto attrs = ReadBufferAttrs(user_provided_attributes);
	size_t tensor_byte_size = TfLiteOpaqueTensorByteSize(
	TfLiteOpaqueContextGetOpaqueTensor(context, tensor_index));
	attrs.size = std::max(attrs.size.value_or(0), tensor_byte_size);
	WriteBufferAttrs(attrs, merged);
	} else if (TfLiteAttributeMapIsSyncAttributeMap(user_provided_attributes)) {
	auto attrs = ReadSyncAttrs(user_provided_attributes);
	if (attrs.sync_type.value_or(SyncType::kNoSyncObj) !=
	SyncType::kNoSyncObj) {
	return false;
	}
	} else {
	return false;
	}
	return true;
	}

	TfLiteStatus CrosSampleDelegateAsyncKernel::SetAttributes(
	TfLiteOpaqueContext* context,
	TfLiteOpaqueNode* node,
	int tensor_index,
	const TfLiteAttributeMap* attrs) {
	// No-op for now. We need to store sync information once we support fence.
	return kTfLiteOk;
	}

	TfLiteStatus CrosSampleDelegateAsyncKernel::SetBufferAttributes(
	const TfLiteBackendBuffer* buffer,
	const TfLiteAttributeMap* attrs) {
	// TODO(b/348328994): Implement this. This is a no-op function for now to
	// unblock TensorFlow uprev.
	return kTfLiteDelegateError;
	}

	TfLiteStatus CrosSampleDelegateAsyncKernel::GetBufferAttributes(
	const TfLiteBackendBuffer* buffer,
	TfLiteAttributeMap* attrs) {
	// TODO(b/348328994): Implement this. This is a no-op function for now to
	// unblock TensorFlow uprev.
	return kTfLiteDelegateError;
	}

	TfLiteStatus CrosSampleDelegateAsyncKernel::Prepare(
	TfLiteOpaqueContext* context,
	TfLiteOpaqueNode* node) {
	absl::MutexLock lock(&mutex_);
	return core_.Prepare();
	}

	TfLiteStatus CrosSampleDelegateAsyncKernel::Eval(TfLiteOpaqueContext* context,
	TfLiteOpaqueNode* node,
	TfLiteExecutionTask* task) {
	absl::MutexLock lock(&mutex_);

	std::vector<AHardwareBuffer*> locked_buffers;
	absl::Cleanup unlock_buffers = [&] {
	for (auto* buffer : locked_buffers) {
	AHardwareBuffer_unlock(buffer, nullptr);
	}
	};

	int num_inputs = 0;
	const int* inputs = nullptr;
	if (TfLiteOpaqueNodeInputs(node, &inputs, &num_inputs) != kTfLiteOk) {
	return kTfLiteError;
	}
	for (int i = 0; i < num_inputs; ++i) {
	auto tensor = TfLiteOpaqueNodeGetInput(context, node, i);
	TfLiteBufferHandle handle =
	TfLiteExecutionTaskGetBufferByIndex(task, inputs[i]);
	AHardwareBuffer* buffer = registered_buffers_.at(handle);

	void* addr = nullptr;
	if (AHardwareBuffer_lock(buffer,
	AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN \|
	AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN,
	/fence=/-1, /rect=/nullptr, &addr) != 0) {
	LOGF(ERROR) << "Failed to lock AHardwareBuffer " << buffer;
	return kTfLiteError;
	}
	locked_buffers.push_back(buffer);

	core_.SetExternalTensorMemory(tensor, addr);
	}

	int num_outputs = 0;
	const int* outputs = nullptr;
	if (TfLiteOpaqueNodeOutputs(node, &outputs, &num_outputs) != kTfLiteOk) {
	return kTfLiteError;
	}
	for (int i = 0; i < num_outputs; ++i) {
	auto tensor = TfLiteOpaqueNodeGetOutput(context, node, i);
	TfLiteBufferHandle handle =
	TfLiteExecutionTaskGetBufferByIndex(task, outputs[i]);
	AHardwareBuffer* buffer = registered_buffers_.at(handle);

	void* addr = nullptr;
	if (AHardwareBuffer_lock(buffer,
	AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN \|
	AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN,
	/fence=/-1, /rect=/nullptr, &addr) != 0) {
	LOGF(ERROR) << "Failed to lock AHardwareBuffer " << buffer;
	return kTfLiteError;
	}
	locked_buffers.push_back(buffer);

	core_.SetExternalTensorMemory(tensor, addr);
	}

	TfLiteStatus status = core_.Eval();
	task_status_map_.insert_or_assign(task, status);
	return status;
	}

	TfLiteStatus CrosSampleDelegateAsyncKernel::Wait(TfLiteOpaqueContext* context,
	TfLiteExecutionTask* task) {
	absl::MutexLock lock(&mutex_);
	auto it = task_status_map_.find(task);
	if (it == task_status_map_.end()) {
	LOGF(ERROR) << "Unknown task " << task;
	return kTfLiteError;
	}
	return it->second;
	}

	TfLiteStatus CrosSampleDelegateAsyncKernel::Finish(TfLiteOpaqueContext* context,
	TfLiteExecutionTask* task) {
	absl::MutexLock lock(&mutex_);
	size_t erased = task_status_map_.erase(task);
	return erased == 1 ? kTfLiteOk : kTfLiteError;
	}

	} // namespace tflite::cros