// // Copyright © 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include #include #include #include "Activation.hpp" #include "ArgMinMax.hpp" #include "BatchMatMul.hpp" #include "BatchSpace.hpp" #include "Comparison.hpp" #include "Convolution.hpp" #include "Control.hpp" #include "ElementwiseBinary.hpp" #include "ElementwiseUnary.hpp" #include "Fill.hpp" #include "FullyConnected.hpp" #include "Gather.hpp" #include "GatherNd.hpp" #include "LogicalBinary.hpp" #include "Lstm.hpp" #include "Normalization.hpp" #include "Pack.hpp" #include "Pad.hpp" #include "Pooling.hpp" #include "Prelu.hpp" #include "Quantization.hpp" #include "Redefine.hpp" #include "Reduce.hpp" #include "Resize.hpp" #include "Round.hpp" #include "Shape.hpp" #include "Slice.hpp" #include "StridedSlice.hpp" #include "Softmax.hpp" #include "SpaceDepth.hpp" #include "Split.hpp" #include "Transpose.hpp" #include "UnidirectionalSequenceLstm.hpp" #include "Unpack.hpp" #include #include #include #include #include #include #include #include #include #include #include namespace armnnOpaqueDelegate { const TfLiteStableDelegate TFL_TheStableDelegate = { /*delegate_abi_version=*/ TFL_STABLE_DELEGATE_ABI_VERSION, /*delegate_name=*/ "ArmnnDelegatePlugin", /*delegate_version=*/ "1.0.0", /*delegate_plugin=*/ GetArmnnDelegatePluginApi() }; ArmnnOpaqueDelegate::ArmnnOpaqueDelegate(armnnDelegate::DelegateOptions options) : m_Options(std::move(options)) { // Configures logging for ARMNN if (m_Options.IsLoggingEnabled()) { armnn::ConfigureLogging(true, true, m_Options.GetLoggingSeverity()); } // Create/Get the static ArmNN Runtime. Note that the m_Runtime will be shared by all armnn_delegate // instances so the RuntimeOptions cannot be altered for different armnn_delegate instances. m_Runtime = GetRuntime(m_Options.GetRuntimeOptions()); std::vector backends; if (m_Runtime) { const armnn::BackendIdSet supportedDevices = m_Runtime->GetDeviceSpec().GetSupportedBackends(); for (auto& backend : m_Options.GetBackends()) { if (std::find(supportedDevices.cbegin(), supportedDevices.cend(), backend) == supportedDevices.cend()) { TFLITE_LOG_PROD(tflite::TFLITE_LOG_INFO, "TfLiteArmnnDelegate: Requested unknown backend %s", backend.Get().c_str()); } else { backends.push_back(backend); } } } if (backends.empty()) { // No known backend specified throw armnn::InvalidArgumentException("TfLiteArmnnOpaqueDelegate: No known backend specified."); } m_Options.SetBackends(backends); TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, "TfLiteArmnnOpaqueDelegate: Created TfLite ArmNN delegate."); } TfLiteStatus DoPrepare(TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueDelegate* tfLiteDelegate) { TfLiteIntArray* supportedOperators = static_cast<::armnnOpaqueDelegate::ArmnnOpaqueDelegate*> (tfLiteDelegate->data_)->IdentifyOperatorsToDelegate(tfLiteContext); if(supportedOperators == nullptr) { return kTfLiteError; } // ArmNN Opaque Delegate Registration TfLiteRegistrationExternal* kernelRegistration = TfLiteRegistrationExternalCreate(kTfLiteBuiltinDelegate, "TfLiteArmNNOpaqueDelegate", /*version=*/1); if(kernelRegistration == nullptr) { return kTfLiteError; } TfLiteRegistrationExternalSetInit( kernelRegistration, [](TfLiteOpaqueContext* tfLiteContext, const char* buffer, size_t length) -> void* { armnn::IgnoreUnused(length); const TfLiteOpaqueDelegateParams* parameters = reinterpret_cast(buffer); if(parameters == nullptr) { TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext, "TfLiteArmnnOpaqueDelegate: Unable to get parameters."); return nullptr; } return static_cast( ArmnnSubgraph::Create(tfLiteContext, parameters, static_cast<::armnnOpaqueDelegate::ArmnnOpaqueDelegate*>( parameters->delegate->data_))); } ); TfLiteRegistrationExternalSetFree( kernelRegistration, [](TfLiteOpaqueContext* tfLiteContext, void* buffer) -> void { armnn::IgnoreUnused(tfLiteContext); if (buffer != nullptr) { delete static_cast(buffer); } } ); TfLiteRegistrationExternalSetPrepare( kernelRegistration, [](TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueNode* tfLiteNode) -> TfLiteStatus { void* userData = TfLiteOpaqueNodeGetUserData(tfLiteNode); if (userData == nullptr) { return kTfLiteError; } return static_cast(userData)->Prepare(tfLiteContext); } ); TfLiteRegistrationExternalSetInvoke( kernelRegistration, [](TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueNode* tfLiteNode) -> TfLiteStatus { void* userData = TfLiteOpaqueNodeGetUserData(tfLiteNode); if (userData == nullptr) { return kTfLiteError; } return static_cast(userData)->Invoke(tfLiteContext, tfLiteNode); } ); const TfLiteStatus status = TfLiteOpaqueContextReplaceNodeSubsetsWithDelegateKernels( tfLiteContext, kernelRegistration, supportedOperators, tfLiteDelegate); TfLiteIntArrayFree(supportedOperators); return status; } TfLiteOpaqueDelegate* TfLiteArmnnOpaqueDelegateCreate(const void* settings) { // This method will always create Opaque Delegate with default settings until // we have a DelegateOptions Constructor which can parse the void* settings armnn::IgnoreUnused(settings); auto options = TfLiteArmnnDelegateOptionsDefault(); auto* armnnDelegate = new ::armnnOpaqueDelegate::ArmnnOpaqueDelegate(options); return TfLiteOpaqueDelegateCreate(armnnDelegate->GetDelegateBuilder()); } ::armnnDelegate::DelegateOptions TfLiteArmnnDelegateOptionsDefault() { ::armnnDelegate::DelegateOptions options(armnn::Compute::CpuRef); return options; } void TfLiteArmnnOpaqueDelegateDelete(TfLiteOpaqueDelegate* tfLiteDelegate) { if (tfLiteDelegate != nullptr) { delete static_cast<::armnnOpaqueDelegate::ArmnnOpaqueDelegate*>(TfLiteOpaqueDelegateGetData(tfLiteDelegate)); TfLiteOpaqueDelegateDelete(tfLiteDelegate); } } const TfLiteOpaqueDelegatePlugin* GetArmnnDelegatePluginApi() { static constexpr TfLiteOpaqueDelegatePlugin armnnPlugin{ TfLiteArmnnOpaqueDelegateCreate, TfLiteArmnnOpaqueDelegateDelete, TfLiteArmnnOpaqueDelegateErrno}; return &armnnPlugin; } const std::string ArmnnOpaqueDelegate::GetVersion() { return OPAQUE_DELEGATE_VERSION; } TfLiteIntArray* ArmnnOpaqueDelegate::IdentifyOperatorsToDelegate(TfLiteOpaqueContext* tfLiteContext) { TfLiteIntArray* executionPlan = nullptr; if (TfLiteOpaqueContextGetExecutionPlan(tfLiteContext, &executionPlan) != kTfLiteOk) { TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext, "TfLiteArmnnOpaqueDelegate: Unable to get graph execution plan."); return nullptr; } // Delegate data with null network DelegateData delegateData(m_Options.GetBackends()); TfLiteIntArray* nodesToDelegate = TfLiteIntArrayCreate(executionPlan->size); if (nodesToDelegate == nullptr) { TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext, "TfLiteArmnnOpaqueDelegate: Unable to create int array from execution plan."); return nullptr; } nodesToDelegate->size = 0; std::set unsupportedOperators; for (int i = 0; i < executionPlan->size; ++i) { const int nodeIndex = executionPlan->data[i]; // If TfLiteOpaqueNodes can be delegated to ArmNN TfLiteOpaqueNode* tfLiteNode = nullptr; TfLiteRegistrationExternal* tfLiteRegistration = nullptr; if (TfLiteOpaqueContextGetNodeAndRegistration( tfLiteContext, nodeIndex, &tfLiteNode, &tfLiteRegistration) != kTfLiteOk) { TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext, "TfLiteArmnnOpaqueDelegate: Unable to get node and registration for node %d.", nodeIndex); continue; } TfLiteStatus visitStatus; try { visitStatus = ArmnnSubgraph::VisitNode( delegateData, tfLiteContext, tfLiteRegistration, tfLiteNode, nodeIndex); } catch(std::exception& ex) { ARMNN_LOG(error) << "ArmNN Failed to visit node with error: " << ex.what(); visitStatus = kTfLiteError; } if (visitStatus != kTfLiteOk) { // node is not supported by ArmNN unsupportedOperators.insert(TfLiteRegistrationExternalGetBuiltInCode(tfLiteRegistration)); continue; } nodesToDelegate->data[nodesToDelegate->size++] = nodeIndex; } for (std::set::iterator it=unsupportedOperators.begin(); it!=unsupportedOperators.end(); ++it) { TF_LITE_OPAQUE_KERNEL_LOG(tfLiteContext, "Operator %s [%d] is not supported by armnn_opaque_delegate.", tflite::EnumNameBuiltinOperator(tflite::BuiltinOperator(*it)), *it); } if (!unsupportedOperators.empty() && m_Options.TfLiteRuntimeFallbackDisabled()) { std::stringstream exMessage; exMessage << "TfLiteArmnnOpaqueDelegate: There are unsupported operators in the model. "; exMessage << "Not falling back to TfLite Runtime as fallback is disabled. "; exMessage << "This should only be disabled under test conditions."; throw armnn::Exception(exMessage.str()); } if (nodesToDelegate->size == 0) { ARMNN_LOG(info) << "No operators in this model are supported by the Arm NN TfLite delegate." << " The model will be executed entirely by TfLite runtime."; } std::sort(&nodesToDelegate->data[0], &nodesToDelegate->data[nodesToDelegate->size]); return nodesToDelegate; } TfLiteStatus ArmnnSubgraph::AddInputLayer(DelegateData& delegateData, TfLiteOpaqueContext* tfLiteContext, const TfLiteIntArray* inputs, std::vector& inputBindings) { const size_t numInputs = static_cast(inputs->size); for (unsigned int i = 0; i < numInputs; ++i) { const int32_t tensorId = inputs->data[i]; const TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, tensorId); if(!tensor) { return kTfLiteError; } // Do not create bindings for constant inputs if (TfLiteOpaqueTensorGetAllocationType(tensor) == kTfLiteMmapRo) { continue; } auto bindingId = static_cast((tensorId)); armnn::IConnectableLayer* layer = delegateData.m_Network->AddInputLayer(bindingId); auto tensorInfo = GetTensorInfoForTfLiteOpaqueTensor(tensor); armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0); outputSlot.SetTensorInfo(tensorInfo); // Store for creating connections delegateData.m_OutputSlotForNode[static_cast(tensorId)] = &outputSlot; inputBindings.push_back(std::make_pair(bindingId, tensorInfo)); } return kTfLiteOk; } TfLiteStatus ArmnnSubgraph::AddOutputLayer(DelegateData& delegateData, TfLiteOpaqueContext* tfLiteContext, const TfLiteIntArray* outputs, std::vector& outputBindings) { const size_t numOutputs = static_cast(outputs->size); for (unsigned int i = 0; i < numOutputs; ++i) { const int32_t tensorId = outputs->data[i]; const TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, tensorId); if(!tensor) { return kTfLiteError; } auto bindingId = static_cast((tensorId)); armnn::IConnectableLayer* layer = delegateData.m_Network->AddOutputLayer(bindingId); auto tensorInfo = GetTensorInfoForTfLiteOpaqueTensor(tensor); ARMNN_ASSERT(delegateData.m_OutputSlotForNode[static_cast(tensorId)] != nullptr); delegateData.m_OutputSlotForNode[static_cast(tensorId)]->Connect(layer->GetInputSlot(0)); outputBindings.push_back(std::make_pair(bindingId, tensorInfo)); } return kTfLiteOk; } ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteOpaqueContext* tfLiteContext, const TfLiteOpaqueDelegateParams* parameters, const ArmnnOpaqueDelegate* delegate) { const auto startTime = armnn::GetTimeNow(); ARMNN_LOG(info) << "ArmnnSubgraph creation"; TfLiteIntArray* executionPlan; if (TfLiteOpaqueContextGetExecutionPlan(tfLiteContext, &executionPlan) != kTfLiteOk) { return nullptr; } // Initialize DelegateData holds network and output slots information DelegateData delegateData(delegate->m_Options.GetBackends()); // Build ArmNN Network armnn::NetworkOptions networkOptions = delegate->m_Options.GetOptimizerOptions().m_ModelOptions; armnn::NetworkId networkId; delegateData.m_Network = armnn::INetwork::Create(networkOptions); delegateData.m_OutputSlotForNode = std::vector( TfLiteOpaqueContextGetNumTensors(tfLiteContext), nullptr); std::vector inputBindings; std::vector outputBindings; // Add input layer auto status = AddInputLayer(delegateData, tfLiteContext, parameters->input_tensors, inputBindings); if (status != kTfLiteOk) { throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to add Inputs to the network!"); } // Parse TfLite delegate nodes to ArmNN const auto parseStartTime = armnn::GetTimeNow(); for (int i = 0; i < parameters->nodes_to_replace->size; ++i) { const int nodeIndex = parameters->nodes_to_replace->data[i]; TfLiteOpaqueNode* tfLiteNode = nullptr; TfLiteRegistrationExternal* tfLiteRegistration = nullptr; if (TfLiteOpaqueContextGetNodeAndRegistration( tfLiteContext, nodeIndex, &tfLiteNode, &tfLiteRegistration) != kTfLiteOk) { throw armnn::Exception(&"TfLiteArmnnOpaqueDelegate: Unable to get node registration: " [ nodeIndex]); } if (VisitNode(delegateData, tfLiteContext, tfLiteRegistration, tfLiteNode, nodeIndex) != kTfLiteOk) { throw armnn::Exception(&"TfLiteArmnnOpaqueDelegate: Unable to parse node: " [ nodeIndex]); } } ARMNN_LOG(info) << "Parse nodes to ArmNN time: " << std::setprecision(2) << std::fixed << armnn::GetTimeDuration(parseStartTime).count() << " ms"; // Add Output layer status = AddOutputLayer(delegateData, tfLiteContext, parameters->output_tensors, outputBindings); if (status != kTfLiteOk) { throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to add Outputs to the network!"); } // Optimize ArmNN network armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr); try { const auto optimizeStartTime = armnn::GetTimeNow(); optNet = armnn::Optimize(*(delegateData.m_Network.get()), delegate->m_Options.GetBackends(), delegate->m_Runtime->GetDeviceSpec(), delegate->m_Options.GetOptimizerOptions()); ARMNN_LOG(info) << "Optimize ArmnnSubgraph time: " << std::setprecision(2) << std::fixed << armnn::GetTimeDuration(optimizeStartTime).count() << " ms"; } catch (std::exception& ex) { std::stringstream exMessage; exMessage << "TfLiteArmnnOpaqueDelegate: Exception (" << ex.what() << ") caught from optimize."; throw armnn::Exception(exMessage.str()); } if (!optNet) { // Optimize failed throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to optimize the network!"); } // If set, we will serialize the optimized model into a dot file. const std::string serializeToDotFile = delegate->m_Options.GetSerializeToDot(); if (!serializeToDotFile.empty()) { ARMNN_LOG(info) << "Writing graph to dot file: " << serializeToDotFile; fs::path filename = serializeToDotFile; std::fstream file(filename.c_str(), std::ios_base::out); optNet->SerializeToDot(file); } try { const auto loadStartTime = armnn::GetTimeNow(); // Load graph into runtime std::string errorMessage; armnn::Status loadingStatus; armnn::MemorySource inputSource = armnn::MemorySource::Undefined; armnn::MemorySource outputSource = armnn::MemorySource::Undefined; // There's a bit of an assumption here that the delegate will only support Malloc memory source. if (delegate->m_Options.GetOptimizerOptions().m_ImportEnabled) { inputSource = armnn::MemorySource::Malloc; } if (delegate->m_Options.GetOptimizerOptions().m_ExportEnabled) { outputSource = armnn::MemorySource::Malloc; } armnn::INetworkProperties networkProperties(false, inputSource, outputSource, delegate->m_Options.GetInternalProfilingState(), delegate->m_Options.GetInternalProfilingDetail()); loadingStatus = delegate->m_Runtime->LoadNetwork(networkId, std::move(optNet), errorMessage, networkProperties); if (loadingStatus != armnn::Status::Success) { // Network load failed. throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Network could not be loaded: " + errorMessage); } ARMNN_LOG(info) << "Load ArmnnSubgraph time: " << std::setprecision(2) << std::fixed << armnn::GetTimeDuration(loadStartTime).count() << " ms"; } catch (std::exception& ex) { std::stringstream exMessage; exMessage << "TfLiteArmnnOpaqueDelegate: Exception (" << ex.what() << ") caught from LoadNetwork."; throw armnn::Exception(exMessage.str()); } // Register debug callback function if (delegate->m_Options.GetDebugCallbackFunction().has_value()) { delegate->m_Runtime->RegisterDebugCallback(networkId, delegate->m_Options.GetDebugCallbackFunction().value()); } ARMNN_LOG(info) << "Overall ArmnnSubgraph creation time: " << std::setprecision(2) << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms\n"; // Create a new SubGraph with networkId and runtime return new ArmnnSubgraph(networkId, delegate->m_Runtime, inputBindings, outputBindings); } TfLiteStatus ArmnnSubgraph::Prepare(TfLiteOpaqueContext* tfLiteContext) { armnn::IgnoreUnused(tfLiteContext); return kTfLiteOk; } TfLiteStatus ArmnnSubgraph::Invoke(TfLiteOpaqueContext* tfLiteContext, TfLiteOpaqueNode* tfLiteNode) { // Prepare inputs armnn::InputTensors inputTensors; size_t inputIndex = 0; const int* inputs; int numInputs; if(TfLiteOpaqueNodeInputs(tfLiteNode, &inputs, &numInputs) != kTfLiteOk) { throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to load subgraph inputs!"); } for (int inputIdx = 0; inputIdx < numInputs; inputIdx++) { TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, inputs[inputIdx]); if(!tensor) { return kTfLiteError; } if (TfLiteOpaqueTensorGetAllocationType(tensor) != kTfLiteMmapRo) { const armnn::BindingPointInfo& inputBinding = m_InputBindings[inputIndex]; armnn::TensorInfo inputTensorInfo = inputBinding.second; inputTensorInfo.SetConstant(true); const armnn::ConstTensor inputTensor(inputTensorInfo, TfLiteOpaqueTensorData(tensor)); inputTensors.emplace_back(inputIdx, inputTensor); ++inputIndex; } } // Prepare outputs armnn::OutputTensors outputTensors; size_t outputIndex = 0; const int* outputs; int numOutputs; if(TfLiteOpaqueNodeOutputs(tfLiteNode, &outputs, &numOutputs) != kTfLiteOk) { throw armnn::Exception("TfLiteArmnnOpaqueDelegate: Unable to load subgraph outputs!"); } for (int outputIdx = 0; outputIdx < numOutputs; outputIdx++) { const armnn::BindingPointInfo& outputBinding = m_OutputBindings[outputIndex]; TfLiteOpaqueTensor* tensor = TfLiteOpaqueContextGetOpaqueTensor(tfLiteContext, outputs[outputIdx]); if(!tensor) { return kTfLiteError; } const armnn::Tensor outputTensor(outputBinding.second, TfLiteOpaqueTensorData(tensor)); outputTensors.emplace_back(outputIdx, outputTensor); ++outputIndex; } // Run graph auto status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors); // The delegate holds its own Arm NN runtime so this is our last chance to print internal profiling data. std::shared_ptr profiler = m_Runtime->GetProfiler(m_NetworkId); if (profiler && profiler->IsProfilingEnabled()) { profiler->Print(std::cout); } return (status == armnn::Status::Success) ? kTfLiteOk : kTfLiteError; } TfLiteStatus ArmnnSubgraph::VisitNode(DelegateData& delegateData, TfLiteOpaqueContext* tfLiteContext, TfLiteRegistrationExternal* tfLiteRegistration, TfLiteOpaqueNode* tfLiteNode, int nodeIndex) { switch (TfLiteRegistrationExternalGetBuiltInCode(tfLiteRegistration)) { default: return kTfLiteError; } } } // armnnOpaqueDelegate namespace