diff options
author | Teresa Charlin <teresa.charlinreyes@arm.com> | 2023-08-17 18:44:58 +0100 |
---|---|---|
committer | Teresa Charlin <teresa.charlinreyes@arm.com> | 2023-08-28 12:37:25 +0100 |
commit | 9145e38edf49fa4862008c163c34590141eecb14 (patch) | |
tree | 64706ef579f548b804d5b674b33f6b239c638d0f /src/backends/neon | |
parent | e40cc8359b02a7786908294300c45b672cf6b0e4 (diff) | |
download | armnn-9145e38edf49fa4862008c163c34590141eecb14.tar.gz |
IVGCVSW-7505 Create FusedLayer and NeonFusedWorkload for AddMulAdd Neon kernel
Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: Ic778d35b001474b44fb1e433a6fe276e4ec9f565
Diffstat (limited to 'src/backends/neon')
-rw-r--r-- | src/backends/neon/NeonLayerSupport.cpp | 54 | ||||
-rw-r--r-- | src/backends/neon/NeonLayerSupport.hpp | 5 | ||||
-rw-r--r-- | src/backends/neon/NeonWorkloadFactory.cpp | 5 | ||||
-rw-r--r-- | src/backends/neon/backend.mk | 1 | ||||
-rw-r--r-- | src/backends/neon/test/NeonLayerTests.cpp | 7 | ||||
-rw-r--r-- | src/backends/neon/workloads/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonFusedWorkload.cpp | 115 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonFusedWorkload.hpp | 35 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonWorkloads.hpp | 1 |
9 files changed, 212 insertions, 13 deletions
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp index b491ba8493..ef1d21835a 100644 --- a/src/backends/neon/NeonLayerSupport.cpp +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -4,7 +4,6 @@ // #include "NeonLayerSupport.hpp" -#include "NeonBackendId.hpp" #include "NeonBackendModelContext.hpp" #include <armnn/Exceptions.hpp> @@ -12,7 +11,6 @@ #include <armnn/Types.hpp> #include <armnn/BackendRegistry.hpp> -#include <InternalTypes.hpp> #include <LayerSupportCommon.hpp> #include <armnn/utility/IgnoreUnused.hpp> #include <armnn/utility/PolymorphicDowncast.hpp> @@ -39,8 +37,13 @@ #include "workloads/NeonDepthToSpaceWorkload.hpp" #include "workloads/NeonDepthwiseConvolutionWorkload.hpp" #include "workloads/NeonDequantizeWorkload.hpp" +#include "workloads/NeonDivisionWorkload.hpp" #include "workloads/NeonElementwiseBinaryWorkload.hpp" #include "workloads/NeonExpWorkload.hpp" +#include "workloads/NeonFullyConnectedWorkload.hpp" +#include "workloads/NeonFusedWorkload.hpp" +#include "workloads/NeonGatherWorkload.hpp" +#include "workloads/NeonGatherNdWorkload.hpp" #include "workloads/NeonInstanceNormalizationWorkload.hpp" #include "workloads/NeonL2NormalizationFloatWorkload.hpp" #include "workloads/NeonLogWorkload.hpp" @@ -53,12 +56,8 @@ #include "workloads/NeonMeanWorkload.hpp" #include "workloads/NeonMinimumWorkload.hpp" #include "workloads/NeonMultiplicationWorkload.hpp" -#include "workloads/NeonDivisionWorkload.hpp" #include "workloads/NeonNegWorkload.hpp" #include "workloads/NeonNormalizationFloatWorkload.hpp" -#include "workloads/NeonFullyConnectedWorkload.hpp" -#include "workloads/NeonGatherWorkload.hpp" -#include "workloads/NeonGatherNdWorkload.hpp" #include "workloads/NeonPadWorkload.hpp" #include "workloads/NeonPermuteWorkload.hpp" #include "workloads/NeonPooling2dWorkload.hpp" @@ -128,13 +127,13 @@ bool IsSupportedForDataTypeNeon(Optional<std::string&> reasonIfUnsupported, { return IsNeonBackendSupported(reasonIfUnsupported) && IsSupportedForDataTypeGeneric(reasonIfUnsupported, - dataType, - floatFuncPtr, - floatFuncPtr, - uint8FuncPtr, - &FalseFunc<>, - &FalseFunc<>, - std::forward<Params>(params)...); + dataType, + floatFuncPtr, + floatFuncPtr, + uint8FuncPtr, + &FalseFunc<>, + &FalseFunc<>, + std::forward<Params>(params)...); } #if defined(ARMCOMPUTENEON_ENABLED) @@ -430,6 +429,22 @@ bool IsLayerTypeSupported(const LayerType& type, *(PolymorphicDowncast<const FullyConnectedDescriptor*>(&descriptor)), reasonIfUnsupported); + case LayerType::Fused: + { + auto fusedDescriptor = *(PolymorphicDowncast<const FusedDescriptor*>(&descriptor)); + if (fusedDescriptor.m_NumInputSlots + fusedDescriptor.m_NumOutputSlots != infos.size()) + { + throw InvalidArgumentException("Invalid number of FusedLayer TensorInfos."); + } + + std::vector<TensorInfo> inputInfos(infos.begin(), infos.begin() + fusedDescriptor.m_NumInputSlots); + std::vector<TensorInfo> outputInfos(infos.begin() + fusedDescriptor.m_NumInputSlots, infos.end()); + + return support.IsFusedSupported({inputInfos.begin(), inputInfos.end()}, + {outputInfos.begin(), outputInfos.end()}, + fusedDescriptor, + reasonIfUnsupported); + } case LayerType::Gather: return support.IsGatherSupported(infos[0], infos[1], @@ -1155,6 +1170,19 @@ bool NeonLayerSupport::IsFullyConnectedSupported(const TensorInfo& input, nullptr); } +bool NeonLayerSupport::IsFusedSupported(const std::vector<std::reference_wrapper<TensorInfo>>& inputs, + const std::vector<std::reference_wrapper<TensorInfo>>& outputs, + const FusedDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported) const +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonFusedWorkloadValidate, + reasonIfUnsupported, + inputs, + outputs, + descriptor, + nullptr); +} + bool NeonLayerSupport::IsGatherSupported(const TensorInfo& input0, const TensorInfo& input1, const TensorInfo& output, diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp index 4bc96acd30..0295c2b3e2 100644 --- a/src/backends/neon/NeonLayerSupport.hpp +++ b/src/backends/neon/NeonLayerSupport.hpp @@ -151,6 +151,11 @@ public: const FullyConnectedDescriptor& descriptor, Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const; + bool IsFusedSupported(const std::vector<std::reference_wrapper<TensorInfo>>& inputs, + const std::vector<std::reference_wrapper<TensorInfo>>& outputs, + const FusedDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const; + bool IsGatherNdSupported(const TensorInfo& input0, const TensorInfo& input1, const TensorInfo& output, diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index e3411de254..4f131ac575 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -400,6 +400,11 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateWorkload(LayerType type, info, m_MemoryManager->GetIntraLayerManager()); } + case LayerType::Fused : + { + auto fusedQueueDescriptor = PolymorphicDowncast<const FusedQueueDescriptor*>(&descriptor); + return std::make_unique<NeonFusedWorkload>(*fusedQueueDescriptor, info); + } case LayerType::Gather : { auto gatherQueueDescriptor = PolymorphicDowncast<const GatherQueueDescriptor*>(&descriptor); diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk index 2c91d1491d..3961ed1e34 100644 --- a/src/backends/neon/backend.mk +++ b/src/backends/neon/backend.mk @@ -48,6 +48,7 @@ BACKEND_SOURCES := \ workloads/NeonFillWorkload.cpp \ workloads/NeonFloorFloatWorkload.cpp \ workloads/NeonFullyConnectedWorkload.cpp \ + workloads/NeonFusedWorkload.cpp \ workloads/NeonGatherWorkload.cpp \ workloads/NeonGatherNdWorkload.cpp \ workloads/NeonInstanceNormalizationWorkload.cpp \ diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp index 588c90be6d..c9dd1ff507 100644 --- a/src/backends/neon/test/NeonLayerTests.cpp +++ b/src/backends/neon/test/NeonLayerTests.cpp @@ -1724,6 +1724,13 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleConvertFp16ToFp32, SimpleConvertFp16ToFp32Te // Convert from Float32 to Float16 ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleConvertFp32ToFp16, SimpleConvertFp32ToFp16Test) +// AddMulAdd +ARMNN_AUTO_TEST_CASE_WITH_THF(AddMulAdd2OutputsFloat32, AddMulAddTest<DataType::Float32>, true) +ARMNN_AUTO_TEST_CASE_WITH_THF(AddMulAdd2OutputsUint8, AddMulAddTest<DataType::QAsymmU8>, true) + +ARMNN_AUTO_TEST_CASE_WITH_THF(AddMulAdd1OutputFloat32, AddMulAddTest<DataType::Float32>, false) +ARMNN_AUTO_TEST_CASE_WITH_THF(AddMulAdd1OutputUint8, AddMulAddTest<DataType::QAsymmU8>, false) + #if defined(ARMNNREF_ENABLED) // The ARMNN_COMPARE_REF_AUTO_TEST_CASE and the ARMNN_COMPARE_REF_FIXTURE_TEST_CASE test units are not available diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt index 2cb2ccf385..f4438e4baa 100644 --- a/src/backends/neon/workloads/CMakeLists.txt +++ b/src/backends/neon/workloads/CMakeLists.txt @@ -54,6 +54,8 @@ list(APPEND armnnNeonBackendWorkloads_sources NeonFloorFloatWorkload.hpp NeonFullyConnectedWorkload.cpp NeonFullyConnectedWorkload.hpp + NeonFusedWorkload.cpp + NeonFusedWorkload.hpp NeonGatherWorkload.cpp NeonGatherWorkload.hpp NeonGatherNdWorkload.cpp diff --git a/src/backends/neon/workloads/NeonFusedWorkload.cpp b/src/backends/neon/workloads/NeonFusedWorkload.cpp new file mode 100644 index 0000000000..f770f46c81 --- /dev/null +++ b/src/backends/neon/workloads/NeonFusedWorkload.cpp @@ -0,0 +1,115 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonFusedWorkload.hpp" +#include "NeonWorkloadUtils.hpp" + +#include <aclCommon/ArmComputeTensorUtils.hpp> +#include <aclCommon/ArmComputeUtils.hpp> + +#include <armnn/utility/PolymorphicDowncast.hpp> +#include <armnn/backends/TensorHandle.hpp> + +#include <arm_compute/runtime/NEON/functions/NEAddMulAdd.h> + +namespace armnn +{ + +using namespace armcomputetensorutils; + +arm_compute::Status NeonFusedWorkloadValidate(const std::vector<std::reference_wrapper<TensorInfo>>& inputInfos, + const std::vector<std::reference_wrapper<TensorInfo>>& outputInfos, + const FusedDescriptor& fusedDescriptor, + const ActivationDescriptor* activationDescriptor) +{ + std::vector<arm_compute::TensorInfo> actInputInfos; + actInputInfos.reserve(inputInfos.size()); + for (size_t i = 0u; i < inputInfos.size(); ++i) + { + actInputInfos.emplace_back(BuildArmComputeTensorInfo(inputInfos[i])); + } + + std::vector<arm_compute::TensorInfo> actOutputInfos; + actOutputInfos.reserve(outputInfos.size()); + for (size_t i = 0u; i < outputInfos.size(); ++i) + { + actOutputInfos.emplace_back(BuildArmComputeTensorInfo(outputInfos[i])); + } + + const arm_compute::ActivationLayerInfo activationInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(activationDescriptor); + + switch (fusedDescriptor.m_FusedKernelType) + { + case FusedKernelType::AddMulAdd: + return arm_compute::NEAddMulAdd::validate( + &actInputInfos[0], + &actInputInfos[1], + &actInputInfos[2], // bn_mul + &actInputInfos[3], // bn_add + actOutputInfos.size() == 1 ? nullptr : &actOutputInfos[0], // add_output + actOutputInfos.size() == 1 ? &actOutputInfos[0] : &actOutputInfos[1], // final_output + arm_compute::ConvertPolicy::SATURATE, + activationInfo); + default: + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "NeonFusedWorkloadValidate: no valid kernel type"}; + } +} + + +NeonFusedWorkload::NeonFusedWorkload(const FusedQueueDescriptor& descriptor, const WorkloadInfo& info) + : NeonBaseWorkload<FusedQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonFusedWorkload", + static_cast<unsigned int>(info.m_InputTensorInfos.size()), + static_cast<unsigned int>(info.m_OutputTensorInfos.size())); + + std::vector<arm_compute::ITensor*> inputs; + inputs.reserve(info.m_InputTensorInfos.size()); + for (auto input : m_Data.m_Inputs) + { + inputs.emplace_back(&PolymorphicDowncast<IAclTensorHandle*>(input)->GetTensor()); + } + + std::vector<arm_compute::ITensor*> outputs; + outputs.reserve(info.m_OutputTensorInfos.size()); + for (auto output : m_Data.m_Outputs) + { + outputs.emplace_back(&PolymorphicDowncast<IAclTensorHandle*>(output)->GetTensor()); + } + + const arm_compute::ActivationLayerInfo activationInfo = + ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + + switch (descriptor.m_Parameters.m_FusedKernelType) + { + case FusedKernelType::AddMulAdd: + { + auto layer = std::make_unique<arm_compute::NEAddMulAdd>(); + layer->configure(inputs[0], + inputs[1], + inputs[2], // bn_mul + inputs[3], // bn_add + outputs.size() == 1 ? nullptr : outputs[0], // add_output + outputs.size() == 1 ? outputs[0] : outputs[1], // final_output + arm_compute::ConvertPolicy::SATURATE, + activationInfo); + m_FusedLayer.reset(layer.release()); + break; + } + default: + throw Exception("NeonFusedWorkload: no valid kernel type."); + } +} + +void NeonFusedWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonFusedWorkload_Execute", this->GetGuid()); + m_FusedLayer->run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonFusedWorkload.hpp b/src/backends/neon/workloads/NeonFusedWorkload.hpp new file mode 100644 index 0000000000..aaabf61560 --- /dev/null +++ b/src/backends/neon/workloads/NeonFusedWorkload.hpp @@ -0,0 +1,35 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseWorkload.hpp" + +#include <arm_compute/core/Error.h> +#include <arm_compute/core/Types.h> +#include <arm_compute/runtime/IFunction.h> + +namespace armnn +{ + +arm_compute::Status NeonFusedWorkloadValidate(const std::vector<std::reference_wrapper<TensorInfo>>& inputInfos, + const std::vector<std::reference_wrapper<TensorInfo>>& outputInfos, + const FusedDescriptor& fusedDescriptor, + const ActivationDescriptor* activationDescriptor = nullptr); + +class NeonFusedWorkload : public NeonBaseWorkload<FusedQueueDescriptor> +{ +public: + NeonFusedWorkload(const FusedQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr<arm_compute::IFunction> m_FusedLayer; +}; + +} //namespace armnn + + + diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp index b72f3bb703..615e5d87c8 100644 --- a/src/backends/neon/workloads/NeonWorkloads.hpp +++ b/src/backends/neon/workloads/NeonWorkloads.hpp @@ -30,6 +30,7 @@ #include "NeonFillWorkload.hpp" #include "NeonFloorFloatWorkload.hpp" #include "NeonFullyConnectedWorkload.hpp" +#include "NeonFusedWorkload.hpp" #include "NeonGatherWorkload.hpp" #include "NeonGatherNdWorkload.hpp" #include "NeonInstanceNormalizationWorkload.hpp" |