aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrancis Murtagh <francis.murtagh@arm.com>2018-08-29 12:42:10 +0100
committerMatthew Bentham <matthew.bentham@arm.com>2018-09-17 17:21:23 +0100
commite7a86a4a3363993fb41b1ea62f23b3643b8b0c78 (patch)
tree6d054cae92a13412129525e4f9ea441e7d8c6b73
parenta68241066c3e797dab70f515d2c55aaa74abf564 (diff)
downloadarmnn-e7a86a4a3363993fb41b1ea62f23b3643b8b0c78.tar.gz
IVGCVSW-1200 Division layer
*IVGCVSW-1772 Create QueueDescriptors *IVGCVSW-1773 Add a CL implementation of the DivisionWorkload *IVGCVSW-1774 Add Neon implementation of the DivisionWorkload *IVGCVSW-1775 Add a Ref implementation of the DivisionWorkload *IVGCVSW-1776 Add a Division Layer * Added simple division unit tests with broadcasting Change-Id: I05751fb7f868789f6c06f91e8d25e52b4f12ab5e
-rw-r--r--Android.mk5
-rw-r--r--CMakeLists.txt10
-rw-r--r--include/armnn/INetwork.hpp5
-rw-r--r--include/armnn/LayerSupport.hpp7
-rw-r--r--src/armnn/InternalTypes.cpp1
-rw-r--r--src/armnn/InternalTypes.hpp1
-rw-r--r--src/armnn/LayerSupport.cpp10
-rw-r--r--src/armnn/LayersFwd.hpp2
-rw-r--r--src/armnn/Network.cpp5
-rw-r--r--src/armnn/Network.hpp2
-rw-r--r--src/armnn/backends/ClLayerSupport.cpp14
-rw-r--r--src/armnn/backends/ClLayerSupport.hpp5
-rw-r--r--src/armnn/backends/ClWorkloadFactory.cpp12
-rw-r--r--src/armnn/backends/ClWorkloadFactory.hpp3
-rw-r--r--src/armnn/backends/ClWorkloads.hpp1
-rw-r--r--src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.cpp49
-rw-r--r--src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.hpp30
-rw-r--r--src/armnn/backends/NeonLayerSupport.cpp9
-rw-r--r--src/armnn/backends/NeonLayerSupport.hpp5
-rw-r--r--src/armnn/backends/NeonWorkloadFactory.cpp12
-rw-r--r--src/armnn/backends/NeonWorkloadFactory.hpp3
-rw-r--r--src/armnn/backends/RefLayerSupport.cpp13
-rw-r--r--src/armnn/backends/RefLayerSupport.hpp5
-rw-r--r--src/armnn/backends/RefWorkloadFactory.cpp6
-rw-r--r--src/armnn/backends/RefWorkloadFactory.hpp3
-rw-r--r--src/armnn/backends/RefWorkloads.hpp2
-rw-r--r--src/armnn/backends/RefWorkloads/Division.cpp52
-rw-r--r--src/armnn/backends/RefWorkloads/Division.hpp20
-rw-r--r--src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.cpp31
-rw-r--r--src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.cpp37
-rw-r--r--src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.hpp21
-rw-r--r--src/armnn/backends/WorkloadData.cpp13
-rw-r--r--src/armnn/backends/WorkloadData.hpp6
-rw-r--r--src/armnn/backends/WorkloadFactory.cpp13
-rw-r--r--src/armnn/backends/WorkloadFactory.hpp3
-rw-r--r--src/armnn/backends/test/ArmComputeCl.cpp5
-rw-r--r--src/armnn/backends/test/IsLayerSupportedTestImpl.hpp2
-rw-r--r--src/armnn/backends/test/LayerTests.cpp122
-rw-r--r--src/armnn/backends/test/LayerTests.hpp4
-rw-r--r--src/armnn/backends/test/Reference.cpp5
-rw-r--r--src/armnn/layers/DivisionLayer.cpp81
-rw-r--r--src/armnn/layers/DivisionLayer.hpp28
-rw-r--r--src/armnn/test/UnitTests.hpp1
44 files changed, 684 insertions, 1 deletions
diff --git a/Android.mk b/Android.mk
index e83000414f..c81b99737d 100644
--- a/Android.mk
+++ b/Android.mk
@@ -60,6 +60,7 @@ LOCAL_SRC_FILES := \
src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp \
src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp \
src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp \
+ src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.cpp \
src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp \
src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp \
src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp \
@@ -163,6 +164,9 @@ LOCAL_SRC_FILES := \
src/armnn/backends/RefWorkloads/RefPermuteWorkload.cpp \
src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp \
src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp \
+ src/armnn/backends/RefWorkloads/Division.cpp \
+ src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.cpp \
+ src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.cpp \
src/armnn/backends/MemCopyWorkload.cpp \
src/armnn/backends/WorkloadData.cpp \
src/armnn/backends/WorkloadFactory.cpp \
@@ -187,6 +191,7 @@ LOCAL_SRC_FILES := \
src/armnn/layers/OutputLayer.cpp \
src/armnn/layers/PermuteLayer.cpp \
src/armnn/layers/Pooling2dLayer.cpp \
+ src/armnn/layers/DivisionLayer.cpp \
src/armnn/layers/ReshapeLayer.cpp \
src/armnn/layers/ResizeBilinearLayer.cpp \
src/armnn/layers/SoftmaxLayer.cpp \
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c06a869af5..b04bf6bd65 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -200,6 +200,8 @@ list(APPEND armnn_sources
src/armnn/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp
src/armnn/backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp
src/armnn/backends/RefWorkloads/Multiplication.cpp
+ src/armnn/backends/RefWorkloads/Division.cpp
+ src/armnn/backends/RefWorkloads/Division.hpp
src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp
src/armnn/backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp
src/armnn/backends/RefWorkloads/Multiplication.hpp
@@ -279,6 +281,10 @@ list(APPEND armnn_sources
src/armnn/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp
src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp
src/armnn/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp
+ src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.cpp
+ src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.hpp
+ src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.cpp
+ src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.hpp
src/armnn/layers/LayerCloneBase.hpp
src/armnn/layers/LayerWithParameters.hpp
src/armnn/layers/ActivationLayer.hpp
@@ -323,6 +329,8 @@ list(APPEND armnn_sources
src/armnn/layers/PermuteLayer.cpp
src/armnn/layers/Pooling2dLayer.hpp
src/armnn/layers/Pooling2dLayer.cpp
+ src/armnn/layers/DivisionLayer.cpp
+ src/armnn/layers/DivisionLayer.hpp
src/armnn/layers/ReshapeLayer.hpp
src/armnn/layers/ReshapeLayer.cpp
src/armnn/layers/ResizeBilinearLayer.hpp
@@ -527,6 +535,8 @@ if(ARMCOMPUTECL)
src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp
src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.hpp
src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp
+ src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.hpp
+ src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.cpp
src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp
src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp
src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.hpp
diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
index cefcbfb06c..18e459bf0d 100644
--- a/include/armnn/INetwork.hpp
+++ b/include/armnn/INetwork.hpp
@@ -269,6 +269,11 @@ public:
const LstmInputParams& params,
const char* name = nullptr) = 0;
+ /// Adds a division layer to the network.
+ /// @param name - Optional name for the layer.
+ /// @return - Interface for configuring the layer.
+ virtual IConnectableLayer* AddDivisionLayer(const char* name = nullptr) = 0;
+
protected:
~INetwork() {}
};
diff --git a/include/armnn/LayerSupport.hpp b/include/armnn/LayerSupport.hpp
index 26cbda47e2..cb4329727a 100644
--- a/include/armnn/LayerSupport.hpp
+++ b/include/armnn/LayerSupport.hpp
@@ -73,6 +73,13 @@ bool IsDepthwiseConvolutionSupported(Compute compute,
char* reasonIfUnsupported = nullptr,
size_t reasonIfUnsupportedMaxLength = 1024);
+bool IsDivisionSupported(Compute compute,
+ const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ char* reasonIfUnsupported = nullptr,
+ size_t reasonIfUnsupportedMaxLength = 1024);
+
bool IsInputSupported(Compute compute,
const TensorInfo& input,
char* reasonIfUnsupported = nullptr,
diff --git a/src/armnn/InternalTypes.cpp b/src/armnn/InternalTypes.cpp
index 3426da3d24..7ccef9e985 100644
--- a/src/armnn/InternalTypes.cpp
+++ b/src/armnn/InternalTypes.cpp
@@ -22,6 +22,7 @@ char const* GetLayerTypeAsCString(LayerType type)
case LayerType::ConvertFp32ToFp16: return "ConvertFp32ToFp16";
case LayerType::Convolution2d: return "Convolution2d";
case LayerType::DepthwiseConvolution2d: return "DepthwiseConvolution2d";
+ case LayerType::Division: return "Division";
case LayerType::FakeQuantization: return "FakeQuantization";
case LayerType::Floor: return "Floor";
case LayerType::FullyConnected: return "FullyConnected";
diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp
index 0968e17b18..236b77cafd 100644
--- a/src/armnn/InternalTypes.hpp
+++ b/src/armnn/InternalTypes.hpp
@@ -22,6 +22,7 @@ enum class LayerType
ConvertFp32ToFp16,
Convolution2d,
DepthwiseConvolution2d,
+ Division,
FakeQuantization,
Floor,
FullyConnected,
diff --git a/src/armnn/LayerSupport.cpp b/src/armnn/LayerSupport.cpp
index 8dcb0dc6ee..50c78cb26d 100644
--- a/src/armnn/LayerSupport.cpp
+++ b/src/armnn/LayerSupport.cpp
@@ -141,6 +141,16 @@ bool IsConvolution2dSupported(Compute compute,
FORWARD_LAYER_SUPPORT_FUNC(compute, IsConvolution2dSupported, input, output, descriptor, weights, biases);
}
+bool IsDivisionSupported(Compute compute,
+ const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ char* reasonIfUnsupported,
+ size_t reasonIfUnsupportedMaxLength)
+{
+ FORWARD_LAYER_SUPPORT_FUNC(compute, IsDivisionSupported, input0, input1, output);
+}
+
bool IsDepthwiseConvolutionSupported(Compute compute,
const TensorInfo& input,
const TensorInfo& output,
diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp
index e79149f28f..bf40f0ff37 100644
--- a/src/armnn/LayersFwd.hpp
+++ b/src/armnn/LayersFwd.hpp
@@ -14,6 +14,7 @@
#include "layers/ConvertFp32ToFp16Layer.hpp"
#include "layers/Convolution2dLayer.hpp"
#include "layers/DepthwiseConvolution2dLayer.hpp"
+#include "layers/DivisionLayer.hpp"
#include "layers/FakeQuantizationLayer.hpp"
#include "layers/FloorLayer.hpp"
#include "layers/FullyConnectedLayer.hpp"
@@ -67,6 +68,7 @@ DECLARE_LAYER(ConvertFp16ToFp32)
DECLARE_LAYER(ConvertFp32ToFp16)
DECLARE_LAYER(Convolution2d)
DECLARE_LAYER(DepthwiseConvolution2d)
+DECLARE_LAYER(Division)
DECLARE_LAYER(FakeQuantization)
DECLARE_LAYER(Floor)
DECLARE_LAYER(FullyConnected)
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index f510207c06..76bf4f17ee 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -584,6 +584,11 @@ IConnectableLayer* Network::AddLstmLayer(const LstmDescriptor& descriptor,
return layer;
}
+IConnectableLayer* Network::AddDivisionLayer(const char* name)
+{
+ return m_Graph->AddLayer<DivisionLayer>(name);
+}
+
OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph)
: m_Graph(std::move(graph))
{
diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
index 72100aae6c..e4dc0e3cd8 100644
--- a/src/armnn/Network.hpp
+++ b/src/armnn/Network.hpp
@@ -113,6 +113,8 @@ public:
const LstmInputParams& params,
const char* name = nullptr) override;
+ IConnectableLayer* AddDivisionLayer(const char* name = nullptr) override;
+
private:
IConnectableLayer* AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
const ConstTensor& weights,
diff --git a/src/armnn/backends/ClLayerSupport.cpp b/src/armnn/backends/ClLayerSupport.cpp
index b00a218a72..77e74f57c6 100644
--- a/src/armnn/backends/ClLayerSupport.cpp
+++ b/src/armnn/backends/ClLayerSupport.cpp
@@ -17,11 +17,11 @@
#include "ClWorkloads/ClAdditionFloat32Workload.hpp"
#include "ClWorkloads/ClActivationFloat32Workload.hpp"
#include "ClWorkloads/ClBatchNormalizationFloat32Workload.hpp"
-
#include "ClWorkloads/ClConvertFp16ToFp32Workload.hpp"
#include "ClWorkloads/ClConvertFp32ToFp16Workload.hpp"
#include "ClWorkloads/ClConvolution2dBaseWorkload.hpp"
#include "ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp"
+#include "ClWorkloads/ClDivisionFloatWorkload.hpp"
#include "ClWorkloads/ClL2NormalizationFloat32Workload.hpp"
#include "ClWorkloads/ClMultiplicationFloat32Workload.hpp"
#include "ClWorkloads/ClFullyConnectedFloat32Workload.hpp"
@@ -238,6 +238,18 @@ bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input,
biases);
}
+bool IsDivisionSupportedCl(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported)
+{
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClDivisionWorkloadValidate,
+ reasonIfUnsupported,
+ input0,
+ input1,
+ output);
+}
+
bool IsFullyConnectedSupportedCl(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& weights,
diff --git a/src/armnn/backends/ClLayerSupport.hpp b/src/armnn/backends/ClLayerSupport.hpp
index ae5f4b0c54..71bbe7c2db 100644
--- a/src/armnn/backends/ClLayerSupport.hpp
+++ b/src/armnn/backends/ClLayerSupport.hpp
@@ -54,6 +54,11 @@ bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input,
const boost::optional<TensorInfo>& biases,
std::string* reasonIfUnsupported = nullptr);
+bool IsDivisionSupportedCl(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported = nullptr);
+
bool IsFullyConnectedSupportedCl(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& weights,
diff --git a/src/armnn/backends/ClWorkloadFactory.cpp b/src/armnn/backends/ClWorkloadFactory.cpp
index 354440c7bc..77959d1e64 100644
--- a/src/armnn/backends/ClWorkloadFactory.cpp
+++ b/src/armnn/backends/ClWorkloadFactory.cpp
@@ -163,6 +163,12 @@ std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMultiplication(
return MakeWorkload<ClMultiplicationFloat32Workload, NullWorkload>(descriptor, info);
}
+std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateDivision(
+ const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const
+{
+ return MakeWorkload<ClDivisionFloatWorkload, NullWorkload>(descriptor, info);
+}
+
std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateBatchNormalization(
const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
{
@@ -423,6 +429,12 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16(
return nullptr;
}
+std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ return nullptr;
+}
+
void ClWorkloadFactory::Finalize()
{
}
diff --git a/src/armnn/backends/ClWorkloadFactory.hpp b/src/armnn/backends/ClWorkloadFactory.hpp
index d0786f3fba..ab8c9268d5 100644
--- a/src/armnn/backends/ClWorkloadFactory.hpp
+++ b/src/armnn/backends/ClWorkloadFactory.hpp
@@ -108,6 +108,9 @@ public:
virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
+ virtual std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
virtual void Finalize() override;
virtual void Release() override;
diff --git a/src/armnn/backends/ClWorkloads.hpp b/src/armnn/backends/ClWorkloads.hpp
index 9f5622a491..6eb8adabe4 100644
--- a/src/armnn/backends/ClWorkloads.hpp
+++ b/src/armnn/backends/ClWorkloads.hpp
@@ -17,6 +17,7 @@
#include "backends/ClWorkloads/ClConvolution2dUint8Workload.hpp"
#include "backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp"
#include "backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp"
+#include "backends/ClWorkloads/ClDivisionFloatWorkload.hpp"
#include "backends/ClWorkloads/ClFloorFloat32Workload.hpp"
#include "backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp"
#include "backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp"
diff --git a/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.cpp
new file mode 100644
index 0000000000..07345c345c
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.cpp
@@ -0,0 +1,49 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClDivisionFloatWorkload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output)
+{
+ const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
+ const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
+ const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
+ // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
+ // ignored for F32 tensors.
+ return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput);
+}
+
+
+ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : FloatWorkload<DivisionQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClDivisionFloatWorkload", 2, 1);
+
+ arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ // Construct
+ m_ArithmeticDivision.configure(&input0, &input1, &output);
+}
+
+void ClDivisionFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClDivisionFloatWorkload_Execute");
+
+ // Executes the layer.
+ m_ArithmeticDivision.run();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.hpp
new file mode 100644
index 0000000000..bd06d38e8b
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output);
+
+class ClDivisionFloatWorkload : public FloatWorkload<DivisionQueueDescriptor>
+{
+public:
+ ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, const
+ WorkloadInfo& info);
+
+ using FloatWorkload<DivisionQueueDescriptor>::FloatWorkload;
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLArithmeticDivision m_ArithmeticDivision;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonLayerSupport.cpp b/src/armnn/backends/NeonLayerSupport.cpp
index 8f73b05460..48b3ccbfa0 100644
--- a/src/armnn/backends/NeonLayerSupport.cpp
+++ b/src/armnn/backends/NeonLayerSupport.cpp
@@ -225,6 +225,15 @@ bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input,
biases);
}
+bool IsDivisionSupportedNeon(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported)
+{
+ // At the moment division is not supported
+ return false;
+}
+
bool IsFullyConnectedSupportedNeon(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& weights,
diff --git a/src/armnn/backends/NeonLayerSupport.hpp b/src/armnn/backends/NeonLayerSupport.hpp
index 45032444a4..654d06be27 100644
--- a/src/armnn/backends/NeonLayerSupport.hpp
+++ b/src/armnn/backends/NeonLayerSupport.hpp
@@ -59,6 +59,11 @@ bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input,
const boost::optional<TensorInfo>& biases,
std::string* reasonIfUnsupported = nullptr);
+bool IsDivisionSupportedNeon(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported = nullptr);
+
bool IsFullyConnectedSupportedNeon(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& weights,
diff --git a/src/armnn/backends/NeonWorkloadFactory.cpp b/src/armnn/backends/NeonWorkloadFactory.cpp
index 6ea72f77cc..2332b8b845 100644
--- a/src/armnn/backends/NeonWorkloadFactory.cpp
+++ b/src/armnn/backends/NeonWorkloadFactory.cpp
@@ -156,6 +156,12 @@ std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMultiplication(
return MakeWorkload<NeonMultiplicationFloat32Workload, NullWorkload>(descriptor, info);
}
+std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateDivision(
+ const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const
+{
+ return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+}
+
std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateBatchNormalization(
const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
{
@@ -417,6 +423,12 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp32ToFp16(
return nullptr;
}
+std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& data,
+ const WorkloadInfo& info) const
+{
+ return nullptr;
+}
+
void NeonWorkloadFactory::Finalize()
{}
diff --git a/src/armnn/backends/NeonWorkloadFactory.hpp b/src/armnn/backends/NeonWorkloadFactory.hpp
index 83e1f5e75f..f6ddb6d332 100644
--- a/src/armnn/backends/NeonWorkloadFactory.hpp
+++ b/src/armnn/backends/NeonWorkloadFactory.hpp
@@ -108,6 +108,9 @@ public:
virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
+ virtual std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
virtual void Finalize() override;
virtual void Release() override;
diff --git a/src/armnn/backends/RefLayerSupport.cpp b/src/armnn/backends/RefLayerSupport.cpp
index dd89dd51b3..ff5809c6e8 100644
--- a/src/armnn/backends/RefLayerSupport.cpp
+++ b/src/armnn/backends/RefLayerSupport.cpp
@@ -117,6 +117,19 @@ bool IsDepthwiseConvolutionSupportedRef(const TensorInfo& input,
&TrueFunc<>);
}
+bool IsDivisionSupportedRef(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported)
+{
+ ignore_unused(input1);
+ ignore_unused(output);
+ return IsSupportedForDataTypeRef(reasonIfUnsupported,
+ input0.GetDataType(),
+ &TrueFunc<>,
+ &TrueFunc<>);
+}
+
bool IsFullyConnectedSupportedRef(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& weights,
diff --git a/src/armnn/backends/RefLayerSupport.hpp b/src/armnn/backends/RefLayerSupport.hpp
index fde09685ac..900cf6923a 100644
--- a/src/armnn/backends/RefLayerSupport.hpp
+++ b/src/armnn/backends/RefLayerSupport.hpp
@@ -51,6 +51,11 @@ bool IsDepthwiseConvolutionSupportedRef(const TensorInfo& input,
const boost::optional<TensorInfo>& biases,
std::string* reasonIfUnsupported = nullptr);
+bool IsDivisionSupportedRef(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported = nullptr);
+
bool IsFullyConnectedSupportedRef(const TensorInfo& input,
const TensorInfo& output,
const TensorInfo& weights,
diff --git a/src/armnn/backends/RefWorkloadFactory.cpp b/src/armnn/backends/RefWorkloadFactory.cpp
index 9294c5accc..b4e4cf9df2 100644
--- a/src/armnn/backends/RefWorkloadFactory.cpp
+++ b/src/armnn/backends/RefWorkloadFactory.cpp
@@ -221,4 +221,10 @@ std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConvertFp32ToFp16(
return std::make_unique<RefConvertFp32ToFp16Workload>(descriptor, info);
}
+std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateDivision(
+ const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const
+{
+ return MakeWorkload<RefDivisionFloat32Workload, RefDivisionUint8Workload>(descriptor, info);
+}
+
} // namespace armnn
diff --git a/src/armnn/backends/RefWorkloadFactory.hpp b/src/armnn/backends/RefWorkloadFactory.hpp
index ee8639f8ed..9b9465ccfc 100644
--- a/src/armnn/backends/RefWorkloadFactory.hpp
+++ b/src/armnn/backends/RefWorkloadFactory.hpp
@@ -124,6 +124,9 @@ public:
virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
+ virtual std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
private:
template <typename F32Workload, typename U8Workload, typename QueueDescriptorType>
diff --git a/src/armnn/backends/RefWorkloads.hpp b/src/armnn/backends/RefWorkloads.hpp
index 1defdbbe82..8ce21d490c 100644
--- a/src/armnn/backends/RefWorkloads.hpp
+++ b/src/armnn/backends/RefWorkloads.hpp
@@ -55,3 +55,5 @@
#include "backends/RefWorkloads/RefLstmFloat32Workload.hpp"
#include "backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp"
#include "backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp"
+#include "backends/RefWorkloads/RefDivisionFloat32Workload.hpp"
+#include "backends/RefWorkloads/RefDivisionUint8Workload.hpp"
diff --git a/src/armnn/backends/RefWorkloads/Division.cpp b/src/armnn/backends/RefWorkloads/Division.cpp
new file mode 100644
index 0000000000..9837fea6b4
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Division.cpp
@@ -0,0 +1,52 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "Division.hpp"
+#include "Broadcast.hpp"
+
+#include <functional>
+
+namespace
+{
+
+void ElementwiseDivision(unsigned int numElements,
+ const float* inData0,
+ const float* inData1,
+ float* outData)
+{
+ for (unsigned int i = 0; i < numElements; ++i)
+ {
+ //TODO How to handle divide by 0
+ outData[i] = inData0[i] / inData1[i];
+ }
+}
+
+} // namespace
+
+namespace armnn
+{
+
+void Division(const TensorShape& inShape0,
+ const TensorShape& inShape1,
+ const TensorShape& outShape,
+ const float* inData0,
+ const float* inData1,
+ float* outData)
+{
+ if (inShape0 == inShape1)
+ {
+ ElementwiseDivision(inShape0.GetNumElements(), inData0, inData1, outData);
+ }
+ else
+ {
+ BroadcastLoop(inShape0, inShape1, outShape).Unroll(std::divides<float>(),
+ 0,
+ inData0,
+ inData1,
+ outData);
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Division.hpp b/src/armnn/backends/RefWorkloads/Division.hpp
new file mode 100644
index 0000000000..d4c7e8dc8f
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/Division.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+ void Division(const TensorShape& inShape0,
+ const TensorShape& inShape1,
+ const TensorShape& outShape,
+ const float* inData0,
+ const float* inData1,
+ float* outData);
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.cpp
new file mode 100644
index 0000000000..7cbd1fae5b
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.cpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefDivisionFloat32Workload.hpp"
+
+#include "Division.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+void RefDivisionFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDivisionFloat32Workload_Execute");
+
+ const TensorShape& inShape0 = GetTensorInfo(m_Data.m_Inputs[0]).GetShape();
+ const TensorShape& inShape1 = GetTensorInfo(m_Data.m_Inputs[1]).GetShape();
+ const TensorShape& outShape = GetTensorInfo(m_Data.m_Outputs[0]).GetShape();
+
+ float* outputData = GetOutputTensorDataFloat(0, m_Data);
+ const float* inputData0 = GetInputTensorDataFloat(0, m_Data);
+ const float* inputData1 = GetInputTensorDataFloat(1, m_Data);
+
+ Division(inShape0, inShape1, outShape, inputData0, inputData1, outputData);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.hpp
new file mode 100644
index 0000000000..e31c255cff
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefDivisionFloat32Workload : public Float32Workload<DivisionQueueDescriptor>
+{
+public:
+ using Float32Workload<DivisionQueueDescriptor>::Float32Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.cpp
new file mode 100644
index 0000000000..4354e70271
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.cpp
@@ -0,0 +1,37 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "RefDivisionUint8Workload.hpp"
+
+#include "Division.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+#include <vector>
+
+namespace armnn
+{
+
+void RefDivisionUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDivisionUint8Workload_Execute");
+
+ const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]);
+ const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+ auto dequant0 = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo0);
+ auto dequant1 = Dequantize(GetInputTensorDataU8(1, m_Data), inputInfo1);
+
+ std::vector<float> results(outputInfo.GetNumElements());
+ Division(
+ inputInfo0.GetShape(), inputInfo1.GetShape(), outputInfo.GetShape(),
+ dequant0.data(), dequant1.data(),results.data());
+
+ Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.hpp
new file mode 100644
index 0000000000..d9e26ce3dd
--- /dev/null
+++ b/src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class RefDivisionUint8Workload : public Uint8Workload<DivisionQueueDescriptor>
+{
+public:
+ using Uint8Workload<DivisionQueueDescriptor>::Uint8Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/WorkloadData.cpp b/src/armnn/backends/WorkloadData.cpp
index aa763801ce..626b1ebd7e 100644
--- a/src/armnn/backends/WorkloadData.cpp
+++ b/src/armnn/backends/WorkloadData.cpp
@@ -798,4 +798,17 @@ void ConvertFp16ToFp32QueueDescriptor::Validate(const WorkloadInfo& workloadInfo
"output");
}
+void DivisionQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+{
+ ValidateTwoInputs(workloadInfo, "DivisionQueueDescriptor");
+ ValidateSingleOutput(workloadInfo, "DivisionQueueDescriptor");
+
+ ValidateBroadcastTensorShapesMatch(workloadInfo.m_InputTensorInfos[0],
+ workloadInfo.m_InputTensorInfos[1],
+ workloadInfo.m_OutputTensorInfos[0],
+ "DivisionQueueDescriptor",
+ "first input",
+ "second input");
+}
+
} //namespace armnn
diff --git a/src/armnn/backends/WorkloadData.hpp b/src/armnn/backends/WorkloadData.hpp
index db266e6df8..e7110a4a2d 100644
--- a/src/armnn/backends/WorkloadData.hpp
+++ b/src/armnn/backends/WorkloadData.hpp
@@ -184,6 +184,12 @@ struct MultiplicationQueueDescriptor : QueueDescriptor
void Validate(const WorkloadInfo& workloadInfo) const;
};
+// Division layer workload data.
+struct DivisionQueueDescriptor : QueueDescriptor
+{
+ void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
// Batch norm layer workload data.
struct BatchNormalizationQueueDescriptor : QueueDescriptorWithParameters<BatchNormalizationDescriptor>
{
diff --git a/src/armnn/backends/WorkloadFactory.cpp b/src/armnn/backends/WorkloadFactory.cpp
index 5708dc0b0c..bdfda2bed0 100644
--- a/src/armnn/backends/WorkloadFactory.cpp
+++ b/src/armnn/backends/WorkloadFactory.cpp
@@ -482,6 +482,19 @@ bool IWorkloadFactory::IsLayerSupported(Compute compute, const Layer& layer, boo
reasonCapacity);
break;
}
+ case LayerType::Division:
+ {
+ const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
+ const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo();
+ const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
+ result = IsDivisionSupported(compute,
+ OverrideDataType(input0, dataType),
+ OverrideDataType(input1, dataType),
+ OverrideDataType(output, dataType),
+ reason,
+ reasonCapacity);
+ break;
+ }
case LayerType::Reshape:
{
const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
diff --git a/src/armnn/backends/WorkloadFactory.hpp b/src/armnn/backends/WorkloadFactory.hpp
index c211a290b3..960a71f891 100644
--- a/src/armnn/backends/WorkloadFactory.hpp
+++ b/src/armnn/backends/WorkloadFactory.hpp
@@ -120,6 +120,9 @@ public:
virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor,
const WorkloadInfo& info) const = 0;
+
+ virtual std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const = 0;
};
} //namespace armnn
diff --git a/src/armnn/backends/test/ArmComputeCl.cpp b/src/armnn/backends/test/ArmComputeCl.cpp
index d0cb7243c3..4f6abad4e9 100644
--- a/src/armnn/backends/test/ArmComputeCl.cpp
+++ b/src/armnn/backends/test/ArmComputeCl.cpp
@@ -140,6 +140,11 @@ ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize9Uint8, L2Pooling2dSize9Uint8Tes
ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest)
ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest)
+// Div
+ARMNN_AUTO_TEST_CASE(SimpleDivision, DivisionTest)
+ARMNN_AUTO_TEST_CASE(DivisionBroadcast1Element, DivisionBroadcast1ElementTest)
+ARMNN_AUTO_TEST_CASE(DivisionBroadcast1DVector, DivisionBroadcast1DVectorTest)
+
// Mul
ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest)
ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest)
diff --git a/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp b/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp
index eca3068822..406dddd934 100644
--- a/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp
+++ b/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp
@@ -340,6 +340,8 @@ DECLARE_LAYER_POLICY_2_PARAM(Permute)
DECLARE_LAYER_POLICY_2_PARAM(Pooling2d)
+DECLARE_LAYER_POLICY_1_PARAM(Division)
+
DECLARE_LAYER_POLICY_2_PARAM(ResizeBilinear)
DECLARE_LAYER_POLICY_2_PARAM(Reshape)
diff --git a/src/armnn/backends/test/LayerTests.cpp b/src/armnn/backends/test/LayerTests.cpp
index 8039ffb9b1..e916c05397 100644
--- a/src/armnn/backends/test/LayerTests.cpp
+++ b/src/armnn/backends/test/LayerTests.cpp
@@ -1070,6 +1070,128 @@ LayerTestResult<float,4> CompareAdditionTest(armnn::IWorkloadFactory& workloadFa
}
namespace {
+ LayerTestResult<float,4> DivisionTestHelper(armnn::IWorkloadFactory& workloadFactory,
+ const unsigned int shape0[4],
+ const std::vector<float> & values0,
+ const unsigned int shape1[4],
+ const std::vector<float> & values1,
+ const unsigned int outShape[4],
+ const std::vector<float> & outValues)
+ {
+ const size_t dimensionCount = 4;
+ armnn::TensorInfo inputTensorInfo0{dimensionCount, shape0, armnn::DataType::Float32};
+ armnn::TensorInfo inputTensorInfo1{dimensionCount, shape1, armnn::DataType::Float32};
+ armnn::TensorInfo outputTensorInfo{dimensionCount, outShape, armnn::DataType::Float32};
+
+ auto input0 = MakeTensor<float, 4>(inputTensorInfo0, values0);
+ auto input1 = MakeTensor<float, 4>(inputTensorInfo1, values1);
+
+ LayerTestResult<float,4> ret(outputTensorInfo);
+
+ std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
+ std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
+ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+ armnn::DivisionQueueDescriptor data;
+ armnn::WorkloadInfo info;
+ AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get());
+ AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
+ AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDivision(data, info);
+
+ inputHandle0->Allocate();
+ inputHandle1->Allocate();
+ outputHandle->Allocate();
+
+ CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
+ CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
+
+ workloadFactory.Finalize();
+ workload->Execute();
+
+ CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+
+ ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outValues);
+ return ret;
+ }
+} // anonymous namespace
+
+LayerTestResult<float,4> DivisionTest(armnn::IWorkloadFactory& workloadFactory)
+{
+ const unsigned int width = 2;
+ const unsigned int height = 2;
+ const unsigned int channelCount = 2;
+ const unsigned int batchSize = 2;
+
+ unsigned int shape[] = { batchSize, channelCount, height, width };
+
+ std::vector<float> input0({
+ 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 4, 4, 5, 5, 5, 5 });
+
+ std::vector<float> input1({
+ 1, 1, 1, 1, 2, 2, 2, 2,
+ 4, 4, 4, 4, 4, 4, 4, 4 });
+
+ std::vector<float> output({
+ 2, 2, 2, 2, 1.5, 1.5, 1.5, 1.5,
+ 1, 1, 1, 1, 1.25, 1.25, 1.25, 1.25 });
+
+ return DivisionTestHelper(workloadFactory,
+ shape,
+ input0,
+ shape,
+ input1,
+ shape,
+ output);
+}
+
+LayerTestResult<float, 4> DivisionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory)
+{
+ unsigned int shape0[] = { 1, 2, 2, 2 };
+ std::vector<float> input0({ 2, 4, 6, 8, 10, 12, 14, 16});
+
+ unsigned int shape1[] = { 1, 1, 1, 1 };
+ std::vector<float> input1({ 2 });
+
+ std::vector<float> output({ 1, 2, 3, 4, 5, 6, 7, 8});
+
+ return DivisionTestHelper(workloadFactory,
+ shape0,
+ input0,
+ shape1,
+ input1,
+ shape0,
+ output);
+}
+
+LayerTestResult<float, 4> DivisionBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory)
+{
+ unsigned int shape0[] = { 1, 3, 3, 2 };
+ std::vector<float> input0({
+ 1, 4, 3, 8, 5, 12,
+ 7, 16, 9, 20, 11, 24,
+ 13, 28, 15, 32, 17, 36});
+
+ unsigned int shape1[] = { 1, 1, 1, 2 };
+ std::vector<float> input1({ 1, 2 });
+
+ std::vector<float> output({
+ 1, 2, 3, 4, 5, 6,
+ 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18});
+
+ return DivisionTestHelper(workloadFactory,
+ shape0,
+ input0,
+ shape1,
+ input1,
+ shape0,
+ output);
+}
+
+namespace {
LayerTestResult<float,4> MultiplicationTestHelper(armnn::IWorkloadFactory& workloadFactory,
const unsigned int shape0[4],
const std::vector<float> & values0,
diff --git a/src/armnn/backends/test/LayerTests.hpp b/src/armnn/backends/test/LayerTests.hpp
index 48f73e7693..a59ff05c90 100644
--- a/src/armnn/backends/test/LayerTests.hpp
+++ b/src/armnn/backends/test/LayerTests.hpp
@@ -192,6 +192,10 @@ LayerTestResult<float, 4> CompareActivationTest(armnn::IWorkloadFactory& worklo
armnn::ActivationFunction f,
unsigned int batchSize);
+LayerTestResult<float, 4> DivisionTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> DivisionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> DivisionBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory);
+
LayerTestResult<float, 4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory);
LayerTestResult<float, 4> MultiplicationBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory);
LayerTestResult<float, 4> MultiplicationBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory);
diff --git a/src/armnn/backends/test/Reference.cpp b/src/armnn/backends/test/Reference.cpp
index dedeb50e33..b31723c3a3 100644
--- a/src/armnn/backends/test/Reference.cpp
+++ b/src/armnn/backends/test/Reference.cpp
@@ -146,6 +146,11 @@ ARMNN_AUTO_TEST_CASE(AdditionUint8, AdditionUint8Test)
ARMNN_AUTO_TEST_CASE(AddBroadcastUint8, AdditionBroadcastUint8Test)
ARMNN_AUTO_TEST_CASE(AddBroadcast1ElementUint8, AdditionBroadcast1ElementUint8Test)
+// Div
+ARMNN_AUTO_TEST_CASE(SimpleDivision, DivisionTest)
+ARMNN_AUTO_TEST_CASE(DivisionBroadcast1Element, DivisionBroadcast1ElementTest)
+ARMNN_AUTO_TEST_CASE(DivisionBroadcast1DVector, DivisionBroadcast1DVectorTest)
+
// Mul
ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest)
ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest)
diff --git a/src/armnn/layers/DivisionLayer.cpp b/src/armnn/layers/DivisionLayer.cpp
new file mode 100644
index 0000000000..bf09e14229
--- /dev/null
+++ b/src/armnn/layers/DivisionLayer.cpp
@@ -0,0 +1,81 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include "DivisionLayer.hpp"
+
+#include "LayerCloneBase.hpp"
+
+#include <armnn/TypesUtils.hpp>
+#include <backends/WorkloadData.hpp>
+#include <backends/WorkloadFactory.hpp>
+
+namespace armnn
+{
+
+DivisionLayer::DivisionLayer(const char* name)
+ : Layer(2, 1, LayerType::Division, name)
+{
+}
+
+std::unique_ptr<IWorkload> DivisionLayer::CreateWorkload(const Graph& graph,
+ const IWorkloadFactory& factory) const
+{
+ DivisionQueueDescriptor descriptor;
+
+ return factory.CreateDivision(descriptor, PrepInfoAndDesc(descriptor, graph));
+}
+
+DivisionLayer* DivisionLayer::Clone(Graph& graph) const
+{
+ return CloneBase<DivisionLayer>(graph, GetName());
+}
+
+std::vector<TensorShape> DivisionLayer::InferOutputShapes(const std::vector<TensorShape>& inputShapes) const
+{
+ BOOST_ASSERT(inputShapes.size() == 2);
+ auto& input0 = inputShapes[0];
+ auto& input1 = inputShapes[1];
+
+ // Get the max of the inputs.
+ BOOST_ASSERT(input0.GetNumDimensions() == input1.GetNumDimensions());
+ unsigned int numDims = input0.GetNumDimensions();
+ std::vector<unsigned int> dims(numDims);
+
+ for (unsigned int i = 0; i < numDims; i++)
+ {
+ unsigned int dim0 = input0[i];
+ unsigned int dim1 = input1[i];
+
+ // Validates inputs are broadcast compatible.
+#if !NDEBUG
+ if (dim0 != dim1)
+ {
+ BOOST_ASSERT_MSG(dim0 == 1 || dim1 == 1, "Dimensions should either match or one should be of size 1.");
+ }
+#endif
+
+ dims[i] = std::max(dim0, dim1);
+ }
+
+ return std::vector<TensorShape>({ TensorShape(numDims, dims.data()) });
+}
+
+void DivisionLayer::ValidateTensorShapesFromInputs()
+{
+ VerifyLayerConnections(2, CHECK_LOCATION());
+
+ auto inferredShapes = InferOutputShapes({
+ GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(),
+ GetInputSlot(1).GetConnection()->GetTensorInfo().GetShape()
+ });
+
+ BOOST_ASSERT(inferredShapes.size() == 1);
+
+ ConditionalThrowIfNotEqual<LayerValidationException>(
+ "DivisionLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.",
+ GetOutputSlot(0).GetTensorInfo().GetShape(),
+ inferredShapes[0]);
+}
+
+} // namespace armnn
diff --git a/src/armnn/layers/DivisionLayer.hpp b/src/armnn/layers/DivisionLayer.hpp
new file mode 100644
index 0000000000..1bd69c4446
--- /dev/null
+++ b/src/armnn/layers/DivisionLayer.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include <Layer.hpp>
+
+namespace armnn
+{
+
+class DivisionLayer : public Layer
+{
+public:
+ virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph,
+ const IWorkloadFactory& factory) const override;
+
+ DivisionLayer* Clone(Graph& graph) const override;
+
+ void ValidateTensorShapesFromInputs() override;
+ std::vector<TensorShape> InferOutputShapes(const std::vector<TensorShape>& inputShapes) const override;
+
+protected:
+ DivisionLayer(const char* name);
+ ~DivisionLayer() = default;
+};
+
+} // namespace
diff --git a/src/armnn/test/UnitTests.hpp b/src/armnn/test/UnitTests.hpp
index 8d5c7055e7..beb765f16a 100644
--- a/src/armnn/test/UnitTests.hpp
+++ b/src/armnn/test/UnitTests.hpp
@@ -8,6 +8,7 @@
#include "armnn/Utils.hpp"
#include "backends/RefWorkloadFactory.hpp"
#include "backends/test/LayerTests.hpp"
+#include "TensorHelpers.hpp"
#include <boost/test/unit_test.hpp>
inline void ConfigureLoggingTest()