// // Copyright © 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "NeonFusedWorkload.hpp" #include "NeonWorkloadUtils.hpp" #include #include #include #include #include namespace armnn { using namespace armcomputetensorutils; arm_compute::Status NeonFusedWorkloadValidate(const std::vector>& inputInfos, const std::vector>& outputInfos, const FusedDescriptor& fusedDescriptor, const ActivationDescriptor* activationDescriptor) { std::vector actInputInfos; actInputInfos.reserve(inputInfos.size()); for (size_t i = 0u; i < inputInfos.size(); ++i) { actInputInfos.emplace_back(BuildArmComputeTensorInfo(inputInfos[i])); } std::vector actOutputInfos; actOutputInfos.reserve(outputInfos.size()); for (size_t i = 0u; i < outputInfos.size(); ++i) { actOutputInfos.emplace_back(BuildArmComputeTensorInfo(outputInfos[i])); } const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(activationDescriptor); switch (fusedDescriptor.m_FusedKernelType) { case FusedKernelType::AddMulAdd: return arm_compute::NEAddMulAdd::validate( &actInputInfos[0], &actInputInfos[1], &actInputInfos[2], // bn_mul &actInputInfos[3], // bn_add actOutputInfos.size() == 1 ? nullptr : &actOutputInfos[0], // add_output actOutputInfos.size() == 1 ? &actOutputInfos[0] : &actOutputInfos[1], // final_output arm_compute::ConvertPolicy::SATURATE, activationInfo); default: return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "NeonFusedWorkloadValidate: no valid kernel type"}; } } NeonFusedWorkload::NeonFusedWorkload(const FusedQueueDescriptor& descriptor, const WorkloadInfo& info) : NeonBaseWorkload(descriptor, info) { m_Data.ValidateInputsOutputs("NeonFusedWorkload", static_cast(info.m_InputTensorInfos.size()), static_cast(info.m_OutputTensorInfos.size())); std::vector inputs; inputs.reserve(info.m_InputTensorInfos.size()); for (auto input : m_Data.m_Inputs) { inputs.emplace_back(&PolymorphicDowncast(input)->GetTensor()); } std::vector outputs; outputs.reserve(info.m_OutputTensorInfos.size()); for (auto output : m_Data.m_Outputs) { outputs.emplace_back(&PolymorphicDowncast(output)->GetTensor()); } const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); switch (descriptor.m_Parameters.m_FusedKernelType) { case FusedKernelType::AddMulAdd: { auto layer = std::make_unique(); layer->configure(inputs[0], inputs[1], inputs[2], // bn_mul inputs[3], // bn_add outputs.size() == 1 ? nullptr : outputs[0], // add_output outputs.size() == 1 ? outputs[0] : outputs[1], // final_output arm_compute::ConvertPolicy::SATURATE, activationInfo); m_FusedLayer.reset(layer.release()); break; } default: throw Exception("NeonFusedWorkload: no valid kernel type."); } } void NeonFusedWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonFusedWorkload_Execute", this->GetGuid()); m_FusedLayer->run(); } } //namespace armnn