aboutsummaryrefslogtreecommitdiff
path: root/src/backends/neon
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/neon')
-rw-r--r--src/backends/neon/NeonLayerSupport.cpp54
-rw-r--r--src/backends/neon/NeonLayerSupport.hpp5
-rw-r--r--src/backends/neon/NeonWorkloadFactory.cpp5
-rw-r--r--src/backends/neon/backend.mk1
-rw-r--r--src/backends/neon/test/NeonLayerTests.cpp7
-rw-r--r--src/backends/neon/workloads/CMakeLists.txt2
-rw-r--r--src/backends/neon/workloads/NeonFusedWorkload.cpp115
-rw-r--r--src/backends/neon/workloads/NeonFusedWorkload.hpp35
-rw-r--r--src/backends/neon/workloads/NeonWorkloads.hpp1
9 files changed, 212 insertions, 13 deletions
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index b491ba8493..ef1d21835a 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -4,7 +4,6 @@
//
#include "NeonLayerSupport.hpp"
-#include "NeonBackendId.hpp"
#include "NeonBackendModelContext.hpp"
#include <armnn/Exceptions.hpp>
@@ -12,7 +11,6 @@
#include <armnn/Types.hpp>
#include <armnn/BackendRegistry.hpp>
-#include <InternalTypes.hpp>
#include <LayerSupportCommon.hpp>
#include <armnn/utility/IgnoreUnused.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
@@ -39,8 +37,13 @@
#include "workloads/NeonDepthToSpaceWorkload.hpp"
#include "workloads/NeonDepthwiseConvolutionWorkload.hpp"
#include "workloads/NeonDequantizeWorkload.hpp"
+#include "workloads/NeonDivisionWorkload.hpp"
#include "workloads/NeonElementwiseBinaryWorkload.hpp"
#include "workloads/NeonExpWorkload.hpp"
+#include "workloads/NeonFullyConnectedWorkload.hpp"
+#include "workloads/NeonFusedWorkload.hpp"
+#include "workloads/NeonGatherWorkload.hpp"
+#include "workloads/NeonGatherNdWorkload.hpp"
#include "workloads/NeonInstanceNormalizationWorkload.hpp"
#include "workloads/NeonL2NormalizationFloatWorkload.hpp"
#include "workloads/NeonLogWorkload.hpp"
@@ -53,12 +56,8 @@
#include "workloads/NeonMeanWorkload.hpp"
#include "workloads/NeonMinimumWorkload.hpp"
#include "workloads/NeonMultiplicationWorkload.hpp"
-#include "workloads/NeonDivisionWorkload.hpp"
#include "workloads/NeonNegWorkload.hpp"
#include "workloads/NeonNormalizationFloatWorkload.hpp"
-#include "workloads/NeonFullyConnectedWorkload.hpp"
-#include "workloads/NeonGatherWorkload.hpp"
-#include "workloads/NeonGatherNdWorkload.hpp"
#include "workloads/NeonPadWorkload.hpp"
#include "workloads/NeonPermuteWorkload.hpp"
#include "workloads/NeonPooling2dWorkload.hpp"
@@ -128,13 +127,13 @@ bool IsSupportedForDataTypeNeon(Optional<std::string&> reasonIfUnsupported,
{
return IsNeonBackendSupported(reasonIfUnsupported) &&
IsSupportedForDataTypeGeneric(reasonIfUnsupported,
- dataType,
- floatFuncPtr,
- floatFuncPtr,
- uint8FuncPtr,
- &FalseFunc<>,
- &FalseFunc<>,
- std::forward<Params>(params)...);
+ dataType,
+ floatFuncPtr,
+ floatFuncPtr,
+ uint8FuncPtr,
+ &FalseFunc<>,
+ &FalseFunc<>,
+ std::forward<Params>(params)...);
}
#if defined(ARMCOMPUTENEON_ENABLED)
@@ -430,6 +429,22 @@ bool IsLayerTypeSupported(const LayerType& type,
*(PolymorphicDowncast<const
FullyConnectedDescriptor*>(&descriptor)),
reasonIfUnsupported);
+ case LayerType::Fused:
+ {
+ auto fusedDescriptor = *(PolymorphicDowncast<const FusedDescriptor*>(&descriptor));
+ if (fusedDescriptor.m_NumInputSlots + fusedDescriptor.m_NumOutputSlots != infos.size())
+ {
+ throw InvalidArgumentException("Invalid number of FusedLayer TensorInfos.");
+ }
+
+ std::vector<TensorInfo> inputInfos(infos.begin(), infos.begin() + fusedDescriptor.m_NumInputSlots);
+ std::vector<TensorInfo> outputInfos(infos.begin() + fusedDescriptor.m_NumInputSlots, infos.end());
+
+ return support.IsFusedSupported({inputInfos.begin(), inputInfos.end()},
+ {outputInfos.begin(), outputInfos.end()},
+ fusedDescriptor,
+ reasonIfUnsupported);
+ }
case LayerType::Gather:
return support.IsGatherSupported(infos[0],
infos[1],
@@ -1155,6 +1170,19 @@ bool NeonLayerSupport::IsFullyConnectedSupported(const TensorInfo& input,
nullptr);
}
+bool NeonLayerSupport::IsFusedSupported(const std::vector<std::reference_wrapper<TensorInfo>>& inputs,
+ const std::vector<std::reference_wrapper<TensorInfo>>& outputs,
+ const FusedDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported) const
+{
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonFusedWorkloadValidate,
+ reasonIfUnsupported,
+ inputs,
+ outputs,
+ descriptor,
+ nullptr);
+}
+
bool NeonLayerSupport::IsGatherSupported(const TensorInfo& input0,
const TensorInfo& input1,
const TensorInfo& output,
diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp
index 4bc96acd30..0295c2b3e2 100644
--- a/src/backends/neon/NeonLayerSupport.hpp
+++ b/src/backends/neon/NeonLayerSupport.hpp
@@ -151,6 +151,11 @@ public:
const FullyConnectedDescriptor& descriptor,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+ bool IsFusedSupported(const std::vector<std::reference_wrapper<TensorInfo>>& inputs,
+ const std::vector<std::reference_wrapper<TensorInfo>>& outputs,
+ const FusedDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
bool IsGatherNdSupported(const TensorInfo& input0,
const TensorInfo& input1,
const TensorInfo& output,
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index e3411de254..4f131ac575 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -400,6 +400,11 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateWorkload(LayerType type,
info,
m_MemoryManager->GetIntraLayerManager());
}
+ case LayerType::Fused :
+ {
+ auto fusedQueueDescriptor = PolymorphicDowncast<const FusedQueueDescriptor*>(&descriptor);
+ return std::make_unique<NeonFusedWorkload>(*fusedQueueDescriptor, info);
+ }
case LayerType::Gather :
{
auto gatherQueueDescriptor = PolymorphicDowncast<const GatherQueueDescriptor*>(&descriptor);
diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk
index 2c91d1491d..3961ed1e34 100644
--- a/src/backends/neon/backend.mk
+++ b/src/backends/neon/backend.mk
@@ -48,6 +48,7 @@ BACKEND_SOURCES := \
workloads/NeonFillWorkload.cpp \
workloads/NeonFloorFloatWorkload.cpp \
workloads/NeonFullyConnectedWorkload.cpp \
+ workloads/NeonFusedWorkload.cpp \
workloads/NeonGatherWorkload.cpp \
workloads/NeonGatherNdWorkload.cpp \
workloads/NeonInstanceNormalizationWorkload.cpp \
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index 588c90be6d..c9dd1ff507 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -1724,6 +1724,13 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleConvertFp16ToFp32, SimpleConvertFp16ToFp32Te
// Convert from Float32 to Float16
ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleConvertFp32ToFp16, SimpleConvertFp32ToFp16Test)
+// AddMulAdd
+ARMNN_AUTO_TEST_CASE_WITH_THF(AddMulAdd2OutputsFloat32, AddMulAddTest<DataType::Float32>, true)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AddMulAdd2OutputsUint8, AddMulAddTest<DataType::QAsymmU8>, true)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(AddMulAdd1OutputFloat32, AddMulAddTest<DataType::Float32>, false)
+ARMNN_AUTO_TEST_CASE_WITH_THF(AddMulAdd1OutputUint8, AddMulAddTest<DataType::QAsymmU8>, false)
+
#if defined(ARMNNREF_ENABLED)
// The ARMNN_COMPARE_REF_AUTO_TEST_CASE and the ARMNN_COMPARE_REF_FIXTURE_TEST_CASE test units are not available
diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt
index 2cb2ccf385..f4438e4baa 100644
--- a/src/backends/neon/workloads/CMakeLists.txt
+++ b/src/backends/neon/workloads/CMakeLists.txt
@@ -54,6 +54,8 @@ list(APPEND armnnNeonBackendWorkloads_sources
NeonFloorFloatWorkload.hpp
NeonFullyConnectedWorkload.cpp
NeonFullyConnectedWorkload.hpp
+ NeonFusedWorkload.cpp
+ NeonFusedWorkload.hpp
NeonGatherWorkload.cpp
NeonGatherWorkload.hpp
NeonGatherNdWorkload.cpp
diff --git a/src/backends/neon/workloads/NeonFusedWorkload.cpp b/src/backends/neon/workloads/NeonFusedWorkload.cpp
new file mode 100644
index 0000000000..f770f46c81
--- /dev/null
+++ b/src/backends/neon/workloads/NeonFusedWorkload.cpp
@@ -0,0 +1,115 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "NeonFusedWorkload.hpp"
+#include "NeonWorkloadUtils.hpp"
+
+#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+
+#include <armnn/utility/PolymorphicDowncast.hpp>
+#include <armnn/backends/TensorHandle.hpp>
+
+#include <arm_compute/runtime/NEON/functions/NEAddMulAdd.h>
+
+namespace armnn
+{
+
+using namespace armcomputetensorutils;
+
+arm_compute::Status NeonFusedWorkloadValidate(const std::vector<std::reference_wrapper<TensorInfo>>& inputInfos,
+ const std::vector<std::reference_wrapper<TensorInfo>>& outputInfos,
+ const FusedDescriptor& fusedDescriptor,
+ const ActivationDescriptor* activationDescriptor)
+{
+ std::vector<arm_compute::TensorInfo> actInputInfos;
+ actInputInfos.reserve(inputInfos.size());
+ for (size_t i = 0u; i < inputInfos.size(); ++i)
+ {
+ actInputInfos.emplace_back(BuildArmComputeTensorInfo(inputInfos[i]));
+ }
+
+ std::vector<arm_compute::TensorInfo> actOutputInfos;
+ actOutputInfos.reserve(outputInfos.size());
+ for (size_t i = 0u; i < outputInfos.size(); ++i)
+ {
+ actOutputInfos.emplace_back(BuildArmComputeTensorInfo(outputInfos[i]));
+ }
+
+ const arm_compute::ActivationLayerInfo activationInfo =
+ ConvertActivationDescriptorToAclActivationLayerInfo(activationDescriptor);
+
+ switch (fusedDescriptor.m_FusedKernelType)
+ {
+ case FusedKernelType::AddMulAdd:
+ return arm_compute::NEAddMulAdd::validate(
+ &actInputInfos[0],
+ &actInputInfos[1],
+ &actInputInfos[2], // bn_mul
+ &actInputInfos[3], // bn_add
+ actOutputInfos.size() == 1 ? nullptr : &actOutputInfos[0], // add_output
+ actOutputInfos.size() == 1 ? &actOutputInfos[0] : &actOutputInfos[1], // final_output
+ arm_compute::ConvertPolicy::SATURATE,
+ activationInfo);
+ default:
+ return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+ "NeonFusedWorkloadValidate: no valid kernel type"};
+ }
+}
+
+
+NeonFusedWorkload::NeonFusedWorkload(const FusedQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : NeonBaseWorkload<FusedQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonFusedWorkload",
+ static_cast<unsigned int>(info.m_InputTensorInfos.size()),
+ static_cast<unsigned int>(info.m_OutputTensorInfos.size()));
+
+ std::vector<arm_compute::ITensor*> inputs;
+ inputs.reserve(info.m_InputTensorInfos.size());
+ for (auto input : m_Data.m_Inputs)
+ {
+ inputs.emplace_back(&PolymorphicDowncast<IAclTensorHandle*>(input)->GetTensor());
+ }
+
+ std::vector<arm_compute::ITensor*> outputs;
+ outputs.reserve(info.m_OutputTensorInfos.size());
+ for (auto output : m_Data.m_Outputs)
+ {
+ outputs.emplace_back(&PolymorphicDowncast<IAclTensorHandle*>(output)->GetTensor());
+ }
+
+ const arm_compute::ActivationLayerInfo activationInfo =
+ ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
+ switch (descriptor.m_Parameters.m_FusedKernelType)
+ {
+ case FusedKernelType::AddMulAdd:
+ {
+ auto layer = std::make_unique<arm_compute::NEAddMulAdd>();
+ layer->configure(inputs[0],
+ inputs[1],
+ inputs[2], // bn_mul
+ inputs[3], // bn_add
+ outputs.size() == 1 ? nullptr : outputs[0], // add_output
+ outputs.size() == 1 ? outputs[0] : outputs[1], // final_output
+ arm_compute::ConvertPolicy::SATURATE,
+ activationInfo);
+ m_FusedLayer.reset(layer.release());
+ break;
+ }
+ default:
+ throw Exception("NeonFusedWorkload: no valid kernel type.");
+ }
+}
+
+void NeonFusedWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonFusedWorkload_Execute", this->GetGuid());
+ m_FusedLayer->run();
+}
+
+} //namespace armnn
+
diff --git a/src/backends/neon/workloads/NeonFusedWorkload.hpp b/src/backends/neon/workloads/NeonFusedWorkload.hpp
new file mode 100644
index 0000000000..aaabf61560
--- /dev/null
+++ b/src/backends/neon/workloads/NeonFusedWorkload.hpp
@@ -0,0 +1,35 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "NeonBaseWorkload.hpp"
+
+#include <arm_compute/core/Error.h>
+#include <arm_compute/core/Types.h>
+#include <arm_compute/runtime/IFunction.h>
+
+namespace armnn
+{
+
+arm_compute::Status NeonFusedWorkloadValidate(const std::vector<std::reference_wrapper<TensorInfo>>& inputInfos,
+ const std::vector<std::reference_wrapper<TensorInfo>>& outputInfos,
+ const FusedDescriptor& fusedDescriptor,
+ const ActivationDescriptor* activationDescriptor = nullptr);
+
+class NeonFusedWorkload : public NeonBaseWorkload<FusedQueueDescriptor>
+{
+public:
+ NeonFusedWorkload(const FusedQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ std::unique_ptr<arm_compute::IFunction> m_FusedLayer;
+};
+
+} //namespace armnn
+
+
+
diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp
index b72f3bb703..615e5d87c8 100644
--- a/src/backends/neon/workloads/NeonWorkloads.hpp
+++ b/src/backends/neon/workloads/NeonWorkloads.hpp
@@ -30,6 +30,7 @@
#include "NeonFillWorkload.hpp"
#include "NeonFloorFloatWorkload.hpp"
#include "NeonFullyConnectedWorkload.hpp"
+#include "NeonFusedWorkload.hpp"
#include "NeonGatherWorkload.hpp"
#include "NeonGatherNdWorkload.hpp"
#include "NeonInstanceNormalizationWorkload.hpp"