aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/backends/NeonWorkloads
diff options
context:
space:
mode:
Diffstat (limited to 'src/armnn/backends/NeonWorkloads')
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp34
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp24
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp42
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.hpp28
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp32
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp25
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp72
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp25
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp26
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp45
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp32
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp17
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp20
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp17
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.hpp20
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp88
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp31
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp36
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp25
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp91
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp31
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp91
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp27
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp30
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp27
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp54
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp30
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp30
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp26
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp17
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp20
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp17
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.hpp20
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp41
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp27
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp54
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp27
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp54
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp42
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp47
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp37
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp26
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp24
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp26
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp25
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp32
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp29
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp30
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp27
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp31
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp27
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp38
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp27
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp17
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp20
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp17
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp20
57 files changed, 1895 insertions, 0 deletions
diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp
new file mode 100644
index 0000000000..39e55d5761
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp
@@ -0,0 +1,34 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonActivationFloat32Workload.hpp"
+#include "backends/ArmComputeUtils.hpp"
+
+
+namespace armnn
+{
+NeonActivationFloat32Workload::NeonActivationFloat32Workload(const ActivationQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<ActivationQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonActivationFloat32Workload", 1, 1);
+
+ const arm_compute::ActivationLayerInfo activationLayerInfo =
+ ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters);
+
+ arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_ActivationLayer.configure(&input, &output, activationLayerInfo);
+}
+
+void NeonActivationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonActivationFloat32Workload_Execute");
+ m_ActivationLayer.run();
+}
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp
new file mode 100644
index 0000000000..6fa83ea2f6
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+class NeonActivationFloat32Workload : public Float32Workload<ActivationQueueDescriptor>
+{
+public:
+ NeonActivationFloat32Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::NEActivationLayer m_ActivationLayer;
+};
+} //namespace armnn
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp
new file mode 100644
index 0000000000..27c37e9425
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonActivationUint8Workload.hpp"
+#include "backends/ArmComputeUtils.hpp"
+#include "backends/NeonLayerSupport.hpp"
+
+namespace armnn
+{
+NeonActivationUint8Workload::NeonActivationUint8Workload(const ActivationQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Uint8Workload<ActivationQueueDescriptor>(descriptor, info)
+{
+
+ std::string reasonIfUnsupported;
+ if (!IsNeonActivationUint8Supported(&reasonIfUnsupported, m_Data.m_Parameters))
+ {
+ throw InvalidArgumentException(reasonIfUnsupported);
+ }
+
+ // Only BoundedReLu is supported (see IsNeonActivationUint8Supported)
+ arm_compute::ActivationLayerInfo layerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+ m_Data.m_Parameters.m_A,
+ m_Data.m_Parameters.m_B);
+
+ m_Data.ValidateInputsOutputs("NeonActivationUint8Workload", 1, 1);
+
+ arm_compute::ITensor& input = static_cast<NeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = static_cast<NeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_ActivationLayer.configure(&input, &output, layerInfo);
+}
+
+void NeonActivationUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonActivationUint8Workload_Execute");
+
+ m_ActivationLayer.run();
+}
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.hpp
new file mode 100644
index 0000000000..af655db3d6
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+
+class NeonActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor>
+{
+public:
+ NeonActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::NEActivationLayer m_ActivationLayer;
+};
+
+} //namespace armnn
+
+
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp
new file mode 100644
index 0000000000..d1fb64093d
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp
@@ -0,0 +1,32 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonAdditionFloat32Workload.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+NeonAdditionFloat32Workload::NeonAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<AdditionQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonAdditionFloat32Workload", 2, 1);
+
+ arm_compute::ITensor& input1 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+ arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_AddLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE);
+}
+
+void NeonAdditionFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonAdditionFloat32Workload_Execute");
+ m_AddLayer.run();
+}
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp
new file mode 100644
index 0000000000..5b75b502a3
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+class NeonAdditionFloat32Workload : public Float32Workload<AdditionQueueDescriptor>
+{
+public:
+ NeonAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::NEArithmeticAddition m_AddLayer;
+};
+
+} //namespace armnn
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp
new file mode 100644
index 0000000000..247ebfc5dd
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp
@@ -0,0 +1,72 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/ArmComputeTensorUtils.hpp>
+#include <backends/CpuTensorHandle.hpp>
+#include <backends/NeonTensorHandle.hpp>
+#include <backends/Workload.hpp>
+
+#include <boost/cast.hpp>
+
+namespace armnn
+{
+
+// Base class template providing an implementation of the Constant layer common to all data types
+template <armnn::DataType DataFormat>
+class NeonBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataFormat>
+{
+public:
+ NeonBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : TypedWorkload<ConstantQueueDescriptor, DataFormat>(descriptor, info)
+ , m_RanOnce(false)
+ {
+ }
+
+ virtual void Execute() const override
+ {
+ using namespace armcomputetensorutils;
+
+ // The intermediate tensor held by the corresponding layer output handler can be initialised with the
+ // given data on the first inference, then reused for subsequent inferences.
+ // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer
+ // may not have been configured at the time.
+ if (!m_RanOnce)
+ {
+ const ConstantQueueDescriptor& data = this->m_Data;
+
+ BOOST_ASSERT(data.m_LayerOutput != nullptr);
+ arm_compute::ITensor& output =
+ boost::polymorphic_downcast<NeonTensorHandle*>(data.m_Outputs[0])->GetTensor();
+
+ switch (DataFormat)
+ {
+ case DataType::Float32:
+ {
+ CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<float>(), output);
+ break;
+ }
+ case DataType::QuantisedAsymm8:
+ {
+ CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<uint8_t>(), output);
+ break;
+ }
+ default:
+ {
+ BOOST_ASSERT_MSG(false, "Unknown data type");
+ break;
+ }
+ }
+
+ m_RanOnce = true;
+ }
+ }
+
+private:
+ mutable bool m_RanOnce;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp
new file mode 100644
index 0000000000..24640c7adb
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+namespace armnn
+{
+// Base class template providing an implementation of the Merger layer common to all data types
+template <armnn::DataType DataType>
+class NeonBaseMergerWorkload : public TypedWorkload<MergerQueueDescriptor, DataType>
+{
+public:
+ using TypedWorkload<MergerQueueDescriptor, DataType>::TypedWorkload;
+
+ virtual void Execute() const override
+ {
+ // With subtensors, merger is a no-op
+ }
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp
new file mode 100644
index 0000000000..769905b48b
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+namespace armnn
+{
+
+// Base class template providing an implementation of the Splitter layer common to all data types
+template <armnn::DataType DataType>
+class NeonBaseSplitterWorkload : public TypedWorkload<SplitterQueueDescriptor, DataType>
+{
+public:
+ using TypedWorkload<SplitterQueueDescriptor, DataType>::TypedWorkload;
+
+ virtual void Execute() const override
+ {
+ // With subtensors, splitter is a no-op
+ }
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp
new file mode 100644
index 0000000000..f107c8137f
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp
@@ -0,0 +1,45 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonBatchNormalizationFloat32Workload.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+NeonBatchNormalizationFloat32Workload::NeonBatchNormalizationFloat32Workload(
+ const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Float32Workload<BatchNormalizationQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonBatchNormalizationFloat32Workload", 1, 1);
+
+ arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ BuildArmComputeTensor(m_Mean, m_Data.m_Mean->GetTensorInfo());
+ BuildArmComputeTensor(m_Variance, m_Data.m_Variance->GetTensorInfo());
+ BuildArmComputeTensor(m_Gamma, m_Data.m_Gamma->GetTensorInfo());
+ BuildArmComputeTensor(m_Beta, m_Data.m_Beta->GetTensorInfo());
+
+ m_Layer.configure(
+ &input, &output, &m_Mean, &m_Variance, &m_Beta, &m_Gamma, m_Data.m_Parameters.m_Eps);
+
+ InitialiseArmComputeTensorData(m_Mean, m_Data.m_Mean->GetConstTensor<float>());
+ InitialiseArmComputeTensorData(m_Variance, m_Data.m_Variance->GetConstTensor<float>());
+ InitialiseArmComputeTensorData(m_Gamma, m_Data.m_Gamma->GetConstTensor<float>());
+ InitialiseArmComputeTensorData(m_Beta, m_Data.m_Beta->GetConstTensor<float>());
+}
+
+void NeonBatchNormalizationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonBatchNormalizationFloat32Workload_Execute");
+ m_Layer.run();
+}
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp
new file mode 100644
index 0000000000..2050d42859
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp
@@ -0,0 +1,32 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+
+class NeonBatchNormalizationFloat32Workload : public Float32Workload<BatchNormalizationQueueDescriptor>
+{
+public:
+ NeonBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::NEBatchNormalizationLayer m_Layer;
+
+ arm_compute::Tensor m_Mean;
+ arm_compute::Tensor m_Variance;
+ arm_compute::Tensor m_Gamma;
+ arm_compute::Tensor m_Beta;
+};
+
+} //namespace armnn
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp
new file mode 100644
index 0000000000..8b203fbf3a
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp
@@ -0,0 +1,17 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonConstantFloat32Workload.hpp"
+
+namespace armnn
+{
+
+void NeonConstantFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConstantFloat32Workload_Execute");
+ NeonBaseConstantWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp
new file mode 100644
index 0000000000..4ea4dfe127
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "NeonBaseConstantWorkload.hpp"
+
+namespace armnn
+{
+
+class NeonConstantFloat32Workload : public NeonBaseConstantWorkload<DataType::Float32>
+{
+public:
+ using NeonBaseConstantWorkload<DataType::Float32>::NeonBaseConstantWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp
new file mode 100644
index 0000000000..f6dfaeb7a7
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp
@@ -0,0 +1,17 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonConstantUint8Workload.hpp"
+
+namespace armnn
+{
+
+void NeonConstantUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConstantUint8Workload_Execute");
+ NeonBaseConstantWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.hpp
new file mode 100644
index 0000000000..729bb35499
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "NeonBaseConstantWorkload.hpp"
+
+namespace armnn
+{
+
+class NeonConstantUint8Workload : public NeonBaseConstantWorkload<DataType::QuantisedAsymm8>
+{
+public:
+ using NeonBaseConstantWorkload<DataType::QuantisedAsymm8>::NeonBaseConstantWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp
new file mode 100644
index 0000000000..5099965a24
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp
@@ -0,0 +1,88 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+#include "backends/NeonLayerSupport.hpp"
+
+#include "NeonConvolution2dBaseWorkload.hpp"
+
+namespace armnn
+{
+
+template<armnn::DataType dataType>
+NeonConvolution2dBaseWorkload<dataType>::NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : TypedWorkload<Convolution2dQueueDescriptor, dataType>(descriptor, info)
+{
+ using arm_compute::NEDirectConvolutionLayer;
+ using namespace armcomputetensorutils;
+
+ ValidateData();
+
+ // todo: check tensor shapes match
+
+ arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ BuildArmComputeTensor(m_KernelTensor, m_Data.m_Weight->GetTensorInfo());
+
+ arm_compute::Tensor* optionalBiasTensor = nullptr;
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
+ optionalBiasTensor = &m_BiasTensor;
+ }
+
+ arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
+ m_Data.m_Parameters.m_StrideY,
+ m_Data.m_Parameters.m_PadLeft,
+ m_Data.m_Parameters.m_PadRight,
+ m_Data.m_Parameters.m_PadTop,
+ m_Data.m_Parameters.m_PadBottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ const bool preferDirectConvolution =
+ IsNeonDirectConvolutionPreferred(m_Data.m_Weight->GetTensorInfo(),
+ m_Data.m_Parameters);
+
+ if (preferDirectConvolution)
+ {
+ auto directConvolutionLayer = std::make_unique<arm_compute::NEDirectConvolutionLayer>();
+ directConvolutionLayer->configure(&input,
+ &m_KernelTensor,
+ optionalBiasTensor,
+ &output,
+ padStrideInfo);
+ m_ConvolutionLayer.reset(directConvolutionLayer.release());
+ }
+ else
+ {
+ auto convolutionLayer = std::make_unique<arm_compute::NEConvolutionLayer>();
+ convolutionLayer->configure(&input,
+ &m_KernelTensor,
+ optionalBiasTensor,
+ &output,
+ padStrideInfo);
+ m_ConvolutionLayer.reset(convolutionLayer.release());
+ }
+ BOOST_ASSERT(m_ConvolutionLayer);
+
+ using Type = ResolveType<dataType>;
+
+ InitialiseArmComputeTensorData(m_KernelTensor, m_Data.m_Weight->template GetConstTensor<Type>());
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ InitialiseArmComputeTensorData(m_BiasTensor, m_Data.m_Bias->template GetConstTensor<Type>());
+ }
+}
+
+// Generate known implementations for linker
+template class NeonConvolution2dBaseWorkload<DataType::Float32>;
+template class NeonConvolution2dBaseWorkload<DataType::QuantisedAsymm8>;
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp
new file mode 100644
index 0000000000..37740511ba
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include <backends/Workload.hpp>
+#include <backends/NeonWorkloadUtils.hpp>
+
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+#include "backends/NeonLayerSupport.hpp"
+
+namespace armnn
+{
+
+template<armnn::DataType dataType>
+class NeonConvolution2dBaseWorkload : public TypedWorkload<Convolution2dQueueDescriptor, dataType>
+{
+public:
+ using TypedWorkload<Convolution2dQueueDescriptor, dataType>::m_Data;
+
+ NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ virtual void ValidateData() const {};
+
+protected:
+ std::unique_ptr<arm_compute::IFunction> m_ConvolutionLayer;
+ arm_compute::Tensor m_KernelTensor;
+ arm_compute::Tensor m_BiasTensor;
+};
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp
new file mode 100644
index 0000000000..b4650ac011
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp
@@ -0,0 +1,36 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonConvolution2dFloat32Workload.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+#include "backends/NeonLayerSupport.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+NeonConvolution2dFloat32Workload::NeonConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : NeonConvolution2dBaseWorkload(descriptor, info)
+{}
+
+
+void NeonConvolution2dFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConvolution2dFloat32Workload_Execute");
+ m_ConvolutionLayer->run();
+}
+
+void NeonConvolution2dFloat32Workload::ValidateData() const
+{
+ m_Data.ValidateInputsOutputs("NeonConvolution2dFloat32Workload", 1, 1);
+}
+
+
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp
new file mode 100644
index 0000000000..f4d95d623f
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+#include "NeonConvolution2dBaseWorkload.hpp"
+
+namespace armnn
+{
+class NeonConvolution2dFloat32Workload : public NeonConvolution2dBaseWorkload<DataType::Float32>
+{
+public:
+ NeonConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+ void ValidateData() const override;
+};
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp
new file mode 100644
index 0000000000..11e31c727a
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp
@@ -0,0 +1,91 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonDepthwiseConvolutionFloat32Workload.hpp"
+#include "backends/NeonLayerSupport.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+NeonDepthwiseConvolutionFloat32Workload::NeonDepthwiseConvolutionFloat32Workload(
+ const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
+{
+ const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
+
+ std::string reasonIfUnsupported;
+ if (!IsNeonDepthwiseConvolution2dDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters, weightInfo))
+ {
+ throw UnimplementedException(reasonIfUnsupported);
+ }
+
+ BuildArmComputeTensor(m_KernelTensor, weightInfo);
+
+ arm_compute::Tensor* optionalBias = nullptr;
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
+ optionalBias = &m_BiasTensor;
+ }
+
+ arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
+ m_Data.m_Parameters.m_StrideY,
+ m_Data.m_Parameters.m_PadLeft,
+ m_Data.m_Parameters.m_PadRight,
+ m_Data.m_Parameters.m_PadTop,
+ m_Data.m_Parameters.m_PadBottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionFloat32Workload", 1, 1);
+
+ arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3;
+ if (use3x3Optimisation)
+ {
+ m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>();
+ static_cast<arm_compute::NEDepthwiseConvolutionLayer3x3*>(
+ m_pDepthwiseConvolutionLayer.get())->configure(&input,
+ &m_KernelTensor,
+ optionalBias,
+ &output,
+ padStrideInfo);
+ }
+ else
+ {
+ m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
+ static_cast<arm_compute::NEDepthwiseConvolutionLayer*>(
+ m_pDepthwiseConvolutionLayer.get())->configure(&input,
+ &m_KernelTensor,
+ optionalBias,
+ &output,
+ padStrideInfo);
+ }
+
+ BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
+
+ InitialiseArmComputeTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<float>());
+
+ if (optionalBias)
+ {
+ InitialiseArmComputeTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor<float>());
+ }
+}
+
+void NeonDepthwiseConvolutionFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "NeonDepthwiseConvolutionFloat32Workload_Execute");
+ BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
+
+ m_pDepthwiseConvolutionLayer->run();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp
new file mode 100644
index 0000000000..f9e295f568
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+
+class NeonDepthwiseConvolutionFloat32Workload : public Float32Workload<DepthwiseConvolution2dQueueDescriptor>
+{
+public:
+ NeonDepthwiseConvolutionFloat32Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer;
+
+ arm_compute::Tensor m_KernelTensor;
+ arm_compute::Tensor m_BiasTensor;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp
new file mode 100644
index 0000000000..bd034c4f80
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp
@@ -0,0 +1,91 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonDepthwiseConvolutionUint8Workload.hpp"
+#include "backends/NeonLayerSupport.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload(
+ const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Uint8Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
+{
+ const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
+
+ std::string reasonIfUnsupported;
+ if (!IsNeonDepthwiseConvolution2dDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters, weightInfo))
+ {
+ throw UnimplementedException(reasonIfUnsupported);
+ }
+
+ BuildArmComputeTensor(m_KernelTensor, weightInfo);
+
+ arm_compute::Tensor* optionalBias = nullptr;
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
+ optionalBias = &m_BiasTensor;
+ }
+
+ arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
+ m_Data.m_Parameters.m_StrideY,
+ m_Data.m_Parameters.m_PadLeft,
+ m_Data.m_Parameters.m_PadRight,
+ m_Data.m_Parameters.m_PadTop,
+ m_Data.m_Parameters.m_PadBottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionUint8Workload", 1, 1);
+
+ arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3;
+ if (use3x3Optimisation)
+ {
+ m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>();
+ static_cast<arm_compute::NEDepthwiseConvolutionLayer3x3*>(
+ m_pDepthwiseConvolutionLayer.get())->configure(&input,
+ &m_KernelTensor,
+ optionalBias,
+ &output,
+ padStrideInfo);
+ }
+ else
+ {
+ m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
+ static_cast<arm_compute::NEDepthwiseConvolutionLayer*>(
+ m_pDepthwiseConvolutionLayer.get())->configure(&input,
+ &m_KernelTensor,
+ optionalBias,
+ &output,
+ padStrideInfo);
+ }
+
+ BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
+
+ InitialiseArmComputeTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<uint8_t>());
+
+ if (optionalBias)
+ {
+ InitialiseArmComputeTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor<int32_t>());
+ }
+}
+
+void NeonDepthwiseConvolutionUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "NeonDepthwiseConvolutionUint8Workload_Execute");
+ BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
+
+ m_pDepthwiseConvolutionLayer->run();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp
new file mode 100644
index 0000000000..9cf272e9f5
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+
+class NeonDepthwiseConvolutionUint8Workload : public Uint8Workload<DepthwiseConvolution2dQueueDescriptor>
+{
+public:
+ NeonDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer;
+
+ arm_compute::Tensor m_KernelTensor;
+ arm_compute::Tensor m_BiasTensor;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp
new file mode 100644
index 0000000000..a5eec5cadb
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonFloorFloat32Workload.hpp"
+
+namespace armnn
+{
+NeonFloorFloat32Workload::NeonFloorFloat32Workload(const FloorQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<FloorQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonFloorFloat32Workload", 1, 1);
+
+ arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_Layer.configure(&input, &output);
+}
+
+void NeonFloorFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonFloorFloat32Workload_Execute");
+ m_Layer.run();
+}
+} //namespace armnn
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp
new file mode 100644
index 0000000000..f876f1e1bb
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+
+class NeonFloorFloat32Workload : public Float32Workload<FloorQueueDescriptor>
+{
+public:
+ NeonFloorFloat32Workload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::NEFloor m_Layer;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp
new file mode 100644
index 0000000000..54c4e4333c
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp
@@ -0,0 +1,54 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonFullyConnectedFloat32Workload.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+NeonFullyConnectedFloat32Workload::NeonFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<FullyConnectedQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonFullyConnectedFloat32Workload", 1, 1);
+
+ arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ BuildArmComputeTensor(m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
+
+ arm_compute::Tensor* optionalBiasTensor = nullptr;
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ BuildArmComputeTensor(m_BiasesTensor, m_Data.m_Bias->GetTensorInfo());
+ optionalBiasTensor = &m_BiasesTensor;
+ }
+
+ // Construct
+ m_FullyConnectedLayer.configure(
+ &input, &m_WeightsTensor, optionalBiasTensor, &output, m_Data.m_Parameters.m_TransposeWeightMatrix);
+
+ // Allocate
+ InitialiseArmComputeTensorData(m_WeightsTensor, m_Data.m_Weight->GetConstTensor<float>());
+
+ if (optionalBiasTensor)
+ {
+ InitialiseArmComputeTensorData(*optionalBiasTensor, m_Data.m_Bias->GetConstTensor<float>());
+ }
+}
+
+void NeonFullyConnectedFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonFullyConnectedFloat32Workload_Execute");
+ m_FullyConnectedLayer.run();
+}
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp
new file mode 100644
index 0000000000..f9230f1d93
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+
+class NeonFullyConnectedFloat32Workload : public Float32Workload<FullyConnectedQueueDescriptor>
+{
+public:
+ NeonFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::NEFullyConnectedLayer m_FullyConnectedLayer;
+ arm_compute::Tensor m_WeightsTensor;
+ arm_compute::Tensor m_BiasesTensor;
+};
+
+} //namespace armnn
+
+
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp
new file mode 100644
index 0000000000..085f58a219
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonL2NormalizationFloat32Workload.hpp"
+#include "backends/ArmComputeUtils.hpp"
+
+
+namespace armnn
+{
+
+NeonL2NormalizationFloat32Workload::NeonL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<L2NormalizationQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonL2NormalizationFloat32Workload", 1, 1);
+
+ arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0]));
+}
+
+void NeonL2NormalizationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonL2NormalizationFloat32Workload_Execute");
+ m_Layer.run();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp
new file mode 100644
index 0000000000..6cab28366a
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+class NeonL2NormalizationFloat32Workload : public Float32Workload<L2NormalizationQueueDescriptor>
+{
+public:
+ NeonL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ // Purposely not a NEL2Normalize function. See constructor.
+ mutable arm_compute::NENormalizationLayer m_Layer;
+};
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp
new file mode 100644
index 0000000000..7520e8768e
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp
@@ -0,0 +1,17 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonMergerFloat32Workload.hpp"
+
+namespace armnn
+{
+
+void NeonMergerFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "ClMergerFloat32Workload_Execute");
+ NeonBaseMergerWorkload::Execute();
+}
+
+} // namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp
new file mode 100644
index 0000000000..5c889c2af0
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "NeonBaseMergerWorkload.hpp"
+
+namespace armnn
+{
+
+class NeonMergerFloat32Workload : public NeonBaseMergerWorkload<DataType::Float32>
+{
+public:
+ using NeonBaseMergerWorkload<DataType::Float32>::NeonBaseMergerWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp
new file mode 100644
index 0000000000..51578e5bff
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp
@@ -0,0 +1,17 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonMergerUint8Workload.hpp"
+
+namespace armnn
+{
+
+void NeonMergerUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "ClMergerUint8Workload_Execute");
+ NeonBaseMergerWorkload::Execute();
+}
+
+} // namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.hpp
new file mode 100644
index 0000000000..fd1e6b72b9
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "NeonBaseMergerWorkload.hpp"
+
+namespace armnn
+{
+
+class NeonMergerUint8Workload : public NeonBaseMergerWorkload<DataType::QuantisedAsymm8>
+{
+public:
+ using NeonBaseMergerWorkload<DataType::QuantisedAsymm8>::NeonBaseMergerWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp
new file mode 100644
index 0000000000..58ce7b74ba
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp
@@ -0,0 +1,41 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonMultiplicationFloat32Workload.hpp"
+
+
+namespace armnn
+{
+
+NeonMultiplicationFloat32Workload::NeonMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<MultiplicationQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonMultiplicationFloat32Workload", 2, 1);
+
+ arm_compute::ITensor& input1 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+ arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
+ // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
+ // ignored for F32 tensors.
+ m_PixelWiseMultiplication.configure(&input1,
+ &input2,
+ &output,
+ 1.0f,
+ arm_compute::ConvertPolicy::SATURATE,
+ arm_compute::RoundingPolicy::TO_ZERO);
+}
+
+void NeonMultiplicationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonMultiplicationFloat32Workload_Execute");
+ m_PixelWiseMultiplication.run();
+}
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp
new file mode 100644
index 0000000000..ed5ead3700
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+
+class NeonMultiplicationFloat32Workload : public Float32Workload<MultiplicationQueueDescriptor>
+{
+public:
+ NeonMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::NEPixelWiseMultiplication m_PixelWiseMultiplication;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp
new file mode 100644
index 0000000000..739390d5a1
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp
@@ -0,0 +1,54 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonNormalizationFloat32Workload.hpp"
+#include "backends/NeonLayerSupport.hpp"
+#include "backends/ArmComputeUtils.hpp"
+
+namespace armnn
+{
+
+NeonNormalizationFloat32Workload::NeonNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<NormalizationQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonNormalizationFloat32Workload", 1, 1);
+ std::string reasonIfUnsupported;
+ if (!IsNeonNormalizationDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters))
+ {
+ throw UnimplementedException(reasonIfUnsupported);
+ }
+
+ // input and output tensors have to have the same dimensionality
+ if (info.m_InputTensorInfos[0].GetShape()[1] != info.m_OutputTensorInfos[0].GetShape()[1]
+ || info.m_InputTensorInfos[0].GetShape()[0] != info.m_OutputTensorInfos[0].GetShape()[0]
+ || info.m_InputTensorInfos[0].GetShape()[3] != info.m_OutputTensorInfos[0].GetShape()[3]
+ || info.m_InputTensorInfos[0].GetShape()[2] != info.m_OutputTensorInfos[0].GetShape()[2])
+ {
+ throw InvalidArgumentException("Normalization requires input and output tensors to have equal dimensionality.");
+ }
+
+ arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ const arm_compute::NormType normType =
+ ConvertNormalizationAlgorithmChannelToAclNormType(m_Data.m_Parameters.m_NormChannelType);
+ arm_compute::NormalizationLayerInfo normalizationInfo(normType,
+ m_Data.m_Parameters.m_NormSize,
+ m_Data.m_Parameters.m_Alpha,
+ m_Data.m_Parameters.m_Beta,
+ m_Data.m_Parameters.m_K,
+ false);
+
+ m_NormalizationLayer.configure(&input, &output, normalizationInfo);
+}
+
+void NeonNormalizationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonNormalizationFloat32Workload_Execute");
+ m_NormalizationLayer.run();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp
new file mode 100644
index 0000000000..12a0fa80b2
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+
+class NeonNormalizationFloat32Workload : public Float32Workload<NormalizationQueueDescriptor>
+{
+public:
+ NeonNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::NENormalizationLayer m_NormalizationLayer;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp
new file mode 100644
index 0000000000..e0a0457422
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp
@@ -0,0 +1,54 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonPermuteWorkload.hpp"
+#include "backends/NeonTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+#include <arm_compute/core/Error.h>
+
+namespace armnn
+{
+
+arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const PermuteDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+ const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
+
+ return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo,
+ armcomputetensorutils::BuildArmComputePermutationVector(mappings));
+}
+
+template <armnn::DataType DataType>
+NeonPermuteWorkload<DataType>::NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : TypedWorkload<PermuteQueueDescriptor, DataType>(descriptor, info)
+{
+ using armcomputetensorutils::BuildArmComputePermutationVector;
+
+ m_Data.ValidateInputsOutputs(GetName(), 1, 1);
+
+ const arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
+
+ // Run the layer
+ m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings));
+}
+
+template <armnn::DataType DataType>
+void NeonPermuteWorkload<DataType>::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, GetName() + "_Execute");
+ m_PermuteFunction.run();
+}
+
+template class NeonPermuteWorkload<DataType::Float32>;
+template class NeonPermuteWorkload<DataType::QuantisedAsymm8>;
+
+} // namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp
new file mode 100644
index 0000000000..56e8719d6c
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+#include <armnn/TypesUtils.hpp>
+#include <arm_compute/runtime/NEON/functions/NEPermute.h>
+
+#include <string>
+
+namespace armnn
+{
+arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, const TensorInfo& output,
+ const PermuteDescriptor& descriptor);
+
+template <armnn::DataType DataType>
+class NeonPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataType>
+{
+public:
+ static const std::string& GetName()
+ {
+ static const std::string name = std::string("NeonPermute") + GetDataTypeName(DataType) + "Workload";
+ return name;
+ }
+
+ NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ using TypedWorkload<PermuteQueueDescriptor, DataType>::m_Data;
+ mutable arm_compute::NEPermute m_PermuteFunction;
+};
+
+using NeonPermuteFloat32Workload = NeonPermuteWorkload<DataType::Float32>;
+using NeonPermuteUint8Workload = NeonPermuteWorkload<DataType::QuantisedAsymm8>;
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp
new file mode 100644
index 0000000000..6d6a492155
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp
@@ -0,0 +1,47 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonPooling2dBaseWorkload.hpp"
+#include "backends/NeonLayerSupport.hpp"
+#include "backends/NeonTensorHandle.hpp"
+#include "backends/ArmComputeUtils.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const Pooling2dDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+
+ arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor);
+
+ return arm_compute::NEPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
+}
+
+template <armnn::DataType dataType>
+NeonPooling2dBaseWorkload<dataType>::NeonPooling2dBaseWorkload(
+ const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name)
+ : TypedWorkload<Pooling2dQueueDescriptor, dataType>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs(name, 1, 1);
+
+ arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters);
+
+ m_PoolingLayer.configure(&input, &output, layerInfo);
+}
+
+template class NeonPooling2dBaseWorkload<DataType::Float32>;
+template class NeonPooling2dBaseWorkload<DataType::QuantisedAsymm8>;
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp
new file mode 100644
index 0000000000..9461982f86
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp
@@ -0,0 +1,37 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+
+arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const Pooling2dDescriptor& descriptor);
+
+// Base class template providing an implementation of the Pooling2d layer common to all data types
+template <armnn::DataType dataType>
+class NeonPooling2dBaseWorkload : public TypedWorkload<Pooling2dQueueDescriptor, dataType>
+{
+public:
+ using TypedWorkload<Pooling2dQueueDescriptor, dataType>::m_Data;
+
+ NeonPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info,
+ const std::string& name);
+
+protected:
+ mutable arm_compute::NEPoolingLayer m_PoolingLayer;
+};
+
+
+} //namespace armnn
+
+
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp
new file mode 100644
index 0000000000..ba2aa20924
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonPooling2dFloat32Workload.hpp"
+
+
+
+namespace armnn
+{
+
+NeonPooling2dFloat32Workload::NeonPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : NeonPooling2dBaseWorkload<armnn::DataType::Float32>(descriptor, info, "NeonPooling2dFloat32Workload")
+{
+}
+
+void NeonPooling2dFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonPooling2dFloat32Workload_Execute");
+ m_PoolingLayer.run();
+}
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp
new file mode 100644
index 0000000000..6cfc9cc96f
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+#include "NeonPooling2dBaseWorkload.hpp"
+
+namespace armnn
+{
+
+class NeonPooling2dFloat32Workload : public NeonPooling2dBaseWorkload<armnn::DataType::Float32>
+{
+public:
+ NeonPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp
new file mode 100644
index 0000000000..0778794081
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonPooling2dUint8Workload.hpp"
+
+
+
+namespace armnn
+{
+
+NeonPooling2dUint8Workload::NeonPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : NeonPooling2dBaseWorkload<armnn::DataType::QuantisedAsymm8>(descriptor, info, "NeonPooling2dUint8Workload")
+{
+}
+
+void NeonPooling2dUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonPooling2dUint8Workload_Execute");
+ m_PoolingLayer.run();
+}
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp
new file mode 100644
index 0000000000..fa8182125b
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <armnn/Types.hpp>
+#include "NeonPooling2dBaseWorkload.hpp"
+
+namespace armnn
+{
+
+class NeonPooling2dUint8Workload : public NeonPooling2dBaseWorkload<armnn::DataType::QuantisedAsymm8>
+{
+public:
+ NeonPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp
new file mode 100644
index 0000000000..317d16f6bd
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp
@@ -0,0 +1,32 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonReshapeFloat32Workload.hpp"
+
+
+
+namespace armnn
+{
+
+NeonReshapeFloat32Workload::NeonReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<ReshapeQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonReshapeFloat32Workload", 1, 1);
+
+ arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_Layer.configure(&input, &output);
+}
+
+void NeonReshapeFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonReshapeFloat32Workload_Execute");
+ m_Layer.run();
+}
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp
new file mode 100644
index 0000000000..27f4aea9e7
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+
+class NeonReshapeFloat32Workload : public Float32Workload<ReshapeQueueDescriptor>
+{
+public:
+ NeonReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::NEReshapeLayer m_Layer;
+};
+
+} //namespace armnn
+
+
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp
new file mode 100644
index 0000000000..06f57c1e0f
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonReshapeUint8Workload.hpp"
+
+
+
+
+namespace armnn
+{
+NeonReshapeUint8Workload::NeonReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Uint8Workload<ReshapeQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonReshapeUint8Workload", 1, 1);
+
+ arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_Layer.configure(&input, &output);
+}
+
+void NeonReshapeUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonReshapeUint8Workload_Execute");
+ m_Layer.run();
+}
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp
new file mode 100644
index 0000000000..66b7d914b1
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+
+class NeonReshapeUint8Workload : public Uint8Workload<ReshapeQueueDescriptor>
+{
+public:
+ NeonReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::NEReshapeLayer m_Layer;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp
new file mode 100644
index 0000000000..229562ece2
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonSoftmaxFloat32Workload.hpp"
+
+namespace armnn
+{
+NeonSoftmaxFloat32Workload::NeonSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<SoftmaxQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonSoftmaxFloat32Workload", 1, 1);
+
+ // The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions
+ arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta);
+}
+
+void NeonSoftmaxFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonSoftmaxFloat32Workload_Execute");
+ m_SoftmaxLayer.run();
+}
+} //namespace armnn
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp
new file mode 100644
index 0000000000..c466a0f9c6
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+
+class NeonSoftmaxFloat32Workload : public Float32Workload<SoftmaxQueueDescriptor>
+{
+public:
+ NeonSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp
new file mode 100644
index 0000000000..a66b0343ff
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonSoftmaxUint8Workload.hpp"
+
+
+
+namespace armnn
+{
+NeonSoftmaxUint8Workload::NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonSoftmaxUint8Workload", 1, 1);
+
+ arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ const auto outputQuantization = output.info()->quantization_info();
+
+ if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0))
+ {
+ throw InvalidArgumentException(
+ "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported");
+ }
+
+ m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta);
+}
+
+void NeonSoftmaxUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "ClSoftmaxUint8Workload_Execute");
+
+ m_SoftmaxLayer.run();
+}
+} //namespace armnn
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp
new file mode 100644
index 0000000000..bccd82a850
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+
+class NeonSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor>
+{
+public:
+ NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp
new file mode 100644
index 0000000000..13701d2ed3
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp
@@ -0,0 +1,17 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonSplitterFloat32Workload.hpp"
+
+namespace armnn
+{
+
+void NeonSplitterFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonSplitterFloat32Workload_Execute");
+ NeonBaseSplitterWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp
new file mode 100644
index 0000000000..432f5de4eb
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "NeonBaseSplitterWorkload.hpp"
+
+namespace armnn
+{
+
+class NeonSplitterFloat32Workload : public NeonBaseSplitterWorkload<DataType::Float32>
+{
+public:
+ using NeonBaseSplitterWorkload<DataType::Float32>::NeonBaseSplitterWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp
new file mode 100644
index 0000000000..90d24d3ffd
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp
@@ -0,0 +1,17 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonSplitterUint8Workload.hpp"
+
+namespace armnn
+{
+
+void NeonSplitterUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonSplitterUint8Workload_Execute");
+ NeonBaseSplitterWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp
new file mode 100644
index 0000000000..1c97c74e02
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "NeonBaseSplitterWorkload.hpp"
+
+namespace armnn
+{
+
+class NeonSplitterUint8Workload : public NeonBaseSplitterWorkload<DataType::QuantisedAsymm8>
+{
+public:
+ using NeonBaseSplitterWorkload<DataType::QuantisedAsymm8>::NeonBaseSplitterWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn