From e7a86a4a3363993fb41b1ea62f23b3643b8b0c78 Mon Sep 17 00:00:00 2001 From: Francis Murtagh Date: Wed, 29 Aug 2018 12:42:10 +0100 Subject: IVGCVSW-1200 Division layer *IVGCVSW-1772 Create QueueDescriptors *IVGCVSW-1773 Add a CL implementation of the DivisionWorkload *IVGCVSW-1774 Add Neon implementation of the DivisionWorkload *IVGCVSW-1775 Add a Ref implementation of the DivisionWorkload *IVGCVSW-1776 Add a Division Layer * Added simple division unit tests with broadcasting Change-Id: I05751fb7f868789f6c06f91e8d25e52b4f12ab5e --- src/armnn/backends/ClLayerSupport.cpp | 14 ++- src/armnn/backends/ClLayerSupport.hpp | 5 + src/armnn/backends/ClWorkloadFactory.cpp | 12 ++ src/armnn/backends/ClWorkloadFactory.hpp | 3 + src/armnn/backends/ClWorkloads.hpp | 1 + .../ClWorkloads/ClDivisionFloatWorkload.cpp | 49 +++++++++ .../ClWorkloads/ClDivisionFloatWorkload.hpp | 30 +++++ src/armnn/backends/NeonLayerSupport.cpp | 9 ++ src/armnn/backends/NeonLayerSupport.hpp | 5 + src/armnn/backends/NeonWorkloadFactory.cpp | 12 ++ src/armnn/backends/NeonWorkloadFactory.hpp | 3 + src/armnn/backends/RefLayerSupport.cpp | 13 +++ src/armnn/backends/RefLayerSupport.hpp | 5 + src/armnn/backends/RefWorkloadFactory.cpp | 6 + src/armnn/backends/RefWorkloadFactory.hpp | 3 + src/armnn/backends/RefWorkloads.hpp | 2 + src/armnn/backends/RefWorkloads/Division.cpp | 52 +++++++++ src/armnn/backends/RefWorkloads/Division.hpp | 20 ++++ .../RefWorkloads/RefDivisionFloat32Workload.cpp | 31 ++++++ .../RefWorkloads/RefDivisionFloat32Workload.hpp | 21 ++++ .../RefWorkloads/RefDivisionUint8Workload.cpp | 37 +++++++ .../RefWorkloads/RefDivisionUint8Workload.hpp | 21 ++++ src/armnn/backends/WorkloadData.cpp | 13 +++ src/armnn/backends/WorkloadData.hpp | 6 + src/armnn/backends/WorkloadFactory.cpp | 13 +++ src/armnn/backends/WorkloadFactory.hpp | 3 + src/armnn/backends/test/ArmComputeCl.cpp | 5 + .../backends/test/IsLayerSupportedTestImpl.hpp | 2 + src/armnn/backends/test/LayerTests.cpp | 122 +++++++++++++++++++++ src/armnn/backends/test/LayerTests.hpp | 4 + src/armnn/backends/test/Reference.cpp | 5 + 31 files changed, 526 insertions(+), 1 deletion(-) create mode 100644 src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.cpp create mode 100644 src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.hpp create mode 100644 src/armnn/backends/RefWorkloads/Division.cpp create mode 100644 src/armnn/backends/RefWorkloads/Division.hpp create mode 100644 src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.cpp create mode 100644 src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.hpp create mode 100644 src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.cpp create mode 100644 src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.hpp (limited to 'src/armnn/backends') diff --git a/src/armnn/backends/ClLayerSupport.cpp b/src/armnn/backends/ClLayerSupport.cpp index b00a218a72..77e74f57c6 100644 --- a/src/armnn/backends/ClLayerSupport.cpp +++ b/src/armnn/backends/ClLayerSupport.cpp @@ -17,11 +17,11 @@ #include "ClWorkloads/ClAdditionFloat32Workload.hpp" #include "ClWorkloads/ClActivationFloat32Workload.hpp" #include "ClWorkloads/ClBatchNormalizationFloat32Workload.hpp" - #include "ClWorkloads/ClConvertFp16ToFp32Workload.hpp" #include "ClWorkloads/ClConvertFp32ToFp16Workload.hpp" #include "ClWorkloads/ClConvolution2dBaseWorkload.hpp" #include "ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp" +#include "ClWorkloads/ClDivisionFloatWorkload.hpp" #include "ClWorkloads/ClL2NormalizationFloat32Workload.hpp" #include "ClWorkloads/ClMultiplicationFloat32Workload.hpp" #include "ClWorkloads/ClFullyConnectedFloat32Workload.hpp" @@ -238,6 +238,18 @@ bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input, biases); } +bool IsDivisionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClDivisionWorkloadValidate, + reasonIfUnsupported, + input0, + input1, + output); +} + bool IsFullyConnectedSupportedCl(const TensorInfo& input, const TensorInfo& output, const TensorInfo& weights, diff --git a/src/armnn/backends/ClLayerSupport.hpp b/src/armnn/backends/ClLayerSupport.hpp index ae5f4b0c54..71bbe7c2db 100644 --- a/src/armnn/backends/ClLayerSupport.hpp +++ b/src/armnn/backends/ClLayerSupport.hpp @@ -54,6 +54,11 @@ bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input, const boost::optional& biases, std::string* reasonIfUnsupported = nullptr); +bool IsDivisionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + bool IsFullyConnectedSupportedCl(const TensorInfo& input, const TensorInfo& output, const TensorInfo& weights, diff --git a/src/armnn/backends/ClWorkloadFactory.cpp b/src/armnn/backends/ClWorkloadFactory.cpp index 354440c7bc..77959d1e64 100644 --- a/src/armnn/backends/ClWorkloadFactory.cpp +++ b/src/armnn/backends/ClWorkloadFactory.cpp @@ -163,6 +163,12 @@ std::unique_ptr ClWorkloadFactory::CreateMultiplication( return MakeWorkload(descriptor, info); } +std::unique_ptr ClWorkloadFactory::CreateDivision( + const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + std::unique_ptr ClWorkloadFactory::CreateBatchNormalization( const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const { @@ -423,6 +429,12 @@ std::unique_ptr ClWorkloadFactory::CreateConvertFp32ToFp16( return nullptr; } +std::unique_ptr ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + void ClWorkloadFactory::Finalize() { } diff --git a/src/armnn/backends/ClWorkloadFactory.hpp b/src/armnn/backends/ClWorkloadFactory.hpp index d0786f3fba..ab8c9268d5 100644 --- a/src/armnn/backends/ClWorkloadFactory.hpp +++ b/src/armnn/backends/ClWorkloadFactory.hpp @@ -108,6 +108,9 @@ public: virtual std::unique_ptr CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) const override; + virtual std::unique_ptr CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + virtual void Finalize() override; virtual void Release() override; diff --git a/src/armnn/backends/ClWorkloads.hpp b/src/armnn/backends/ClWorkloads.hpp index 9f5622a491..6eb8adabe4 100644 --- a/src/armnn/backends/ClWorkloads.hpp +++ b/src/armnn/backends/ClWorkloads.hpp @@ -17,6 +17,7 @@ #include "backends/ClWorkloads/ClConvolution2dUint8Workload.hpp" #include "backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp" #include "backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp" +#include "backends/ClWorkloads/ClDivisionFloatWorkload.hpp" #include "backends/ClWorkloads/ClFloorFloat32Workload.hpp" #include "backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp" #include "backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp" diff --git a/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.cpp b/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.cpp new file mode 100644 index 0000000000..07345c345c --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.cpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "ClDivisionFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ + +arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, + // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be + // ignored for F32 tensors. + return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput); +} + + +ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClDivisionFloatWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + // Construct + m_ArithmeticDivision.configure(&input0, &input1, &output); +} + +void ClDivisionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClDivisionFloatWorkload_Execute"); + + // Executes the layer. + m_ArithmeticDivision.run(); +} + +} //namespace armnn diff --git a/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.hpp b/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.hpp new file mode 100644 index 0000000000..bd06d38e8b --- /dev/null +++ b/src/armnn/backends/ClWorkloads/ClDivisionFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class ClDivisionFloatWorkload : public FloatWorkload +{ +public: + ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, const + WorkloadInfo& info); + + using FloatWorkload::FloatWorkload; + void Execute() const override; + +private: + mutable arm_compute::CLArithmeticDivision m_ArithmeticDivision; +}; + +} //namespace armnn diff --git a/src/armnn/backends/NeonLayerSupport.cpp b/src/armnn/backends/NeonLayerSupport.cpp index 8f73b05460..48b3ccbfa0 100644 --- a/src/armnn/backends/NeonLayerSupport.cpp +++ b/src/armnn/backends/NeonLayerSupport.cpp @@ -225,6 +225,15 @@ bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input, biases); } +bool IsDivisionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + // At the moment division is not supported + return false; +} + bool IsFullyConnectedSupportedNeon(const TensorInfo& input, const TensorInfo& output, const TensorInfo& weights, diff --git a/src/armnn/backends/NeonLayerSupport.hpp b/src/armnn/backends/NeonLayerSupport.hpp index 45032444a4..654d06be27 100644 --- a/src/armnn/backends/NeonLayerSupport.hpp +++ b/src/armnn/backends/NeonLayerSupport.hpp @@ -59,6 +59,11 @@ bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input, const boost::optional& biases, std::string* reasonIfUnsupported = nullptr); +bool IsDivisionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + bool IsFullyConnectedSupportedNeon(const TensorInfo& input, const TensorInfo& output, const TensorInfo& weights, diff --git a/src/armnn/backends/NeonWorkloadFactory.cpp b/src/armnn/backends/NeonWorkloadFactory.cpp index 6ea72f77cc..2332b8b845 100644 --- a/src/armnn/backends/NeonWorkloadFactory.cpp +++ b/src/armnn/backends/NeonWorkloadFactory.cpp @@ -156,6 +156,12 @@ std::unique_ptr NeonWorkloadFactory::CreateMultiplication( return MakeWorkload(descriptor, info); } +std::unique_ptr NeonWorkloadFactory::CreateDivision( + const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + std::unique_ptr NeonWorkloadFactory::CreateBatchNormalization( const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const { @@ -417,6 +423,12 @@ std::unique_ptr NeonWorkloadFactory::CreateConvertFp32ToFp16( return nullptr; } +std::unique_ptr NeonWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& data, + const WorkloadInfo& info) const +{ + return nullptr; +} + void NeonWorkloadFactory::Finalize() {} diff --git a/src/armnn/backends/NeonWorkloadFactory.hpp b/src/armnn/backends/NeonWorkloadFactory.hpp index 83e1f5e75f..f6ddb6d332 100644 --- a/src/armnn/backends/NeonWorkloadFactory.hpp +++ b/src/armnn/backends/NeonWorkloadFactory.hpp @@ -108,6 +108,9 @@ public: virtual std::unique_ptr CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) const override; + virtual std::unique_ptr CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + virtual void Finalize() override; virtual void Release() override; diff --git a/src/armnn/backends/RefLayerSupport.cpp b/src/armnn/backends/RefLayerSupport.cpp index dd89dd51b3..ff5809c6e8 100644 --- a/src/armnn/backends/RefLayerSupport.cpp +++ b/src/armnn/backends/RefLayerSupport.cpp @@ -117,6 +117,19 @@ bool IsDepthwiseConvolutionSupportedRef(const TensorInfo& input, &TrueFunc<>); } +bool IsDivisionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input1); + ignore_unused(output); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input0.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + bool IsFullyConnectedSupportedRef(const TensorInfo& input, const TensorInfo& output, const TensorInfo& weights, diff --git a/src/armnn/backends/RefLayerSupport.hpp b/src/armnn/backends/RefLayerSupport.hpp index fde09685ac..900cf6923a 100644 --- a/src/armnn/backends/RefLayerSupport.hpp +++ b/src/armnn/backends/RefLayerSupport.hpp @@ -51,6 +51,11 @@ bool IsDepthwiseConvolutionSupportedRef(const TensorInfo& input, const boost::optional& biases, std::string* reasonIfUnsupported = nullptr); +bool IsDivisionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + bool IsFullyConnectedSupportedRef(const TensorInfo& input, const TensorInfo& output, const TensorInfo& weights, diff --git a/src/armnn/backends/RefWorkloadFactory.cpp b/src/armnn/backends/RefWorkloadFactory.cpp index 9294c5accc..b4e4cf9df2 100644 --- a/src/armnn/backends/RefWorkloadFactory.cpp +++ b/src/armnn/backends/RefWorkloadFactory.cpp @@ -221,4 +221,10 @@ std::unique_ptr RefWorkloadFactory::CreateConvertFp32ToFp16( return std::make_unique(descriptor, info); } +std::unique_ptr RefWorkloadFactory::CreateDivision( + const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + } // namespace armnn diff --git a/src/armnn/backends/RefWorkloadFactory.hpp b/src/armnn/backends/RefWorkloadFactory.hpp index ee8639f8ed..9b9465ccfc 100644 --- a/src/armnn/backends/RefWorkloadFactory.hpp +++ b/src/armnn/backends/RefWorkloadFactory.hpp @@ -124,6 +124,9 @@ public: virtual std::unique_ptr CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) const override; + virtual std::unique_ptr CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + private: template diff --git a/src/armnn/backends/RefWorkloads.hpp b/src/armnn/backends/RefWorkloads.hpp index 1defdbbe82..8ce21d490c 100644 --- a/src/armnn/backends/RefWorkloads.hpp +++ b/src/armnn/backends/RefWorkloads.hpp @@ -55,3 +55,5 @@ #include "backends/RefWorkloads/RefLstmFloat32Workload.hpp" #include "backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp" #include "backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp" +#include "backends/RefWorkloads/RefDivisionFloat32Workload.hpp" +#include "backends/RefWorkloads/RefDivisionUint8Workload.hpp" diff --git a/src/armnn/backends/RefWorkloads/Division.cpp b/src/armnn/backends/RefWorkloads/Division.cpp new file mode 100644 index 0000000000..9837fea6b4 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Division.cpp @@ -0,0 +1,52 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "Division.hpp" +#include "Broadcast.hpp" + +#include + +namespace +{ + +void ElementwiseDivision(unsigned int numElements, + const float* inData0, + const float* inData1, + float* outData) +{ + for (unsigned int i = 0; i < numElements; ++i) + { + //TODO How to handle divide by 0 + outData[i] = inData0[i] / inData1[i]; + } +} + +} // namespace + +namespace armnn +{ + +void Division(const TensorShape& inShape0, + const TensorShape& inShape1, + const TensorShape& outShape, + const float* inData0, + const float* inData1, + float* outData) +{ + if (inShape0 == inShape1) + { + ElementwiseDivision(inShape0.GetNumElements(), inData0, inData1, outData); + } + else + { + BroadcastLoop(inShape0, inShape1, outShape).Unroll(std::divides(), + 0, + inData0, + inData1, + outData); + } +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Division.hpp b/src/armnn/backends/RefWorkloads/Division.hpp new file mode 100644 index 0000000000..d4c7e8dc8f --- /dev/null +++ b/src/armnn/backends/RefWorkloads/Division.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include + +namespace armnn +{ + + void Division(const TensorShape& inShape0, + const TensorShape& inShape1, + const TensorShape& outShape, + const float* inData0, + const float* inData1, + float* outData); + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.cpp new file mode 100644 index 0000000000..7cbd1fae5b --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.cpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefDivisionFloat32Workload.hpp" + +#include "Division.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefDivisionFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDivisionFloat32Workload_Execute"); + + const TensorShape& inShape0 = GetTensorInfo(m_Data.m_Inputs[0]).GetShape(); + const TensorShape& inShape1 = GetTensorInfo(m_Data.m_Inputs[1]).GetShape(); + const TensorShape& outShape = GetTensorInfo(m_Data.m_Outputs[0]).GetShape(); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData0 = GetInputTensorDataFloat(0, m_Data); + const float* inputData1 = GetInputTensorDataFloat(1, m_Data); + + Division(inShape0, inShape1, outShape, inputData0, inputData1, outputData); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.hpp b/src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.hpp new file mode 100644 index 0000000000..e31c255cff --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefDivisionFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefDivisionFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.cpp new file mode 100644 index 0000000000..4354e70271 --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.cpp @@ -0,0 +1,37 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "RefDivisionUint8Workload.hpp" + +#include "Division.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void RefDivisionUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDivisionUint8Workload_Execute"); + + const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(m_Data.m_Inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + auto dequant0 = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo0); + auto dequant1 = Dequantize(GetInputTensorDataU8(1, m_Data), inputInfo1); + + std::vector results(outputInfo.GetNumElements()); + Division( + inputInfo0.GetShape(), inputInfo1.GetShape(), outputInfo.GetShape(), + dequant0.data(), dequant1.data(),results.data()); + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); +} + +} //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.hpp b/src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.hpp new file mode 100644 index 0000000000..d9e26ce3dd --- /dev/null +++ b/src/armnn/backends/RefWorkloads/RefDivisionUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefDivisionUint8Workload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/WorkloadData.cpp b/src/armnn/backends/WorkloadData.cpp index aa763801ce..626b1ebd7e 100644 --- a/src/armnn/backends/WorkloadData.cpp +++ b/src/armnn/backends/WorkloadData.cpp @@ -798,4 +798,17 @@ void ConvertFp16ToFp32QueueDescriptor::Validate(const WorkloadInfo& workloadInfo "output"); } +void DivisionQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateTwoInputs(workloadInfo, "DivisionQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "DivisionQueueDescriptor"); + + ValidateBroadcastTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_InputTensorInfos[1], + workloadInfo.m_OutputTensorInfos[0], + "DivisionQueueDescriptor", + "first input", + "second input"); +} + } //namespace armnn diff --git a/src/armnn/backends/WorkloadData.hpp b/src/armnn/backends/WorkloadData.hpp index db266e6df8..e7110a4a2d 100644 --- a/src/armnn/backends/WorkloadData.hpp +++ b/src/armnn/backends/WorkloadData.hpp @@ -184,6 +184,12 @@ struct MultiplicationQueueDescriptor : QueueDescriptor void Validate(const WorkloadInfo& workloadInfo) const; }; +// Division layer workload data. +struct DivisionQueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + // Batch norm layer workload data. struct BatchNormalizationQueueDescriptor : QueueDescriptorWithParameters { diff --git a/src/armnn/backends/WorkloadFactory.cpp b/src/armnn/backends/WorkloadFactory.cpp index 5708dc0b0c..bdfda2bed0 100644 --- a/src/armnn/backends/WorkloadFactory.cpp +++ b/src/armnn/backends/WorkloadFactory.cpp @@ -482,6 +482,19 @@ bool IWorkloadFactory::IsLayerSupported(Compute compute, const Layer& layer, boo reasonCapacity); break; } + case LayerType::Division: + { + const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsDivisionSupported(compute, + OverrideDataType(input0, dataType), + OverrideDataType(input1, dataType), + OverrideDataType(output, dataType), + reason, + reasonCapacity); + break; + } case LayerType::Reshape: { const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); diff --git a/src/armnn/backends/WorkloadFactory.hpp b/src/armnn/backends/WorkloadFactory.hpp index c211a290b3..960a71f891 100644 --- a/src/armnn/backends/WorkloadFactory.hpp +++ b/src/armnn/backends/WorkloadFactory.hpp @@ -120,6 +120,9 @@ public: virtual std::unique_ptr CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; }; } //namespace armnn diff --git a/src/armnn/backends/test/ArmComputeCl.cpp b/src/armnn/backends/test/ArmComputeCl.cpp index d0cb7243c3..4f6abad4e9 100644 --- a/src/armnn/backends/test/ArmComputeCl.cpp +++ b/src/armnn/backends/test/ArmComputeCl.cpp @@ -140,6 +140,11 @@ ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize9Uint8, L2Pooling2dSize9Uint8Tes ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) +// Div +ARMNN_AUTO_TEST_CASE(SimpleDivision, DivisionTest) +ARMNN_AUTO_TEST_CASE(DivisionBroadcast1Element, DivisionBroadcast1ElementTest) +ARMNN_AUTO_TEST_CASE(DivisionBroadcast1DVector, DivisionBroadcast1DVectorTest) + // Mul ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest) diff --git a/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp b/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp index eca3068822..406dddd934 100644 --- a/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp +++ b/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp @@ -340,6 +340,8 @@ DECLARE_LAYER_POLICY_2_PARAM(Permute) DECLARE_LAYER_POLICY_2_PARAM(Pooling2d) +DECLARE_LAYER_POLICY_1_PARAM(Division) + DECLARE_LAYER_POLICY_2_PARAM(ResizeBilinear) DECLARE_LAYER_POLICY_2_PARAM(Reshape) diff --git a/src/armnn/backends/test/LayerTests.cpp b/src/armnn/backends/test/LayerTests.cpp index 8039ffb9b1..e916c05397 100644 --- a/src/armnn/backends/test/LayerTests.cpp +++ b/src/armnn/backends/test/LayerTests.cpp @@ -1069,6 +1069,128 @@ LayerTestResult CompareAdditionTest(armnn::IWorkloadFactory& workloadFa return ret; } +namespace { + LayerTestResult DivisionTestHelper(armnn::IWorkloadFactory& workloadFactory, + const unsigned int shape0[4], + const std::vector & values0, + const unsigned int shape1[4], + const std::vector & values1, + const unsigned int outShape[4], + const std::vector & outValues) + { + const size_t dimensionCount = 4; + armnn::TensorInfo inputTensorInfo0{dimensionCount, shape0, armnn::DataType::Float32}; + armnn::TensorInfo inputTensorInfo1{dimensionCount, shape1, armnn::DataType::Float32}; + armnn::TensorInfo outputTensorInfo{dimensionCount, outShape, armnn::DataType::Float32}; + + auto input0 = MakeTensor(inputTensorInfo0, values0); + auto input1 = MakeTensor(inputTensorInfo1, values1); + + LayerTestResult ret(outputTensorInfo); + + std::unique_ptr inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); + std::unique_ptr inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::DivisionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateDivision(data, info); + + inputHandle0->Allocate(); + inputHandle1->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + ret.outputExpected = MakeTensor(outputTensorInfo, outValues); + return ret; + } +} // anonymous namespace + +LayerTestResult DivisionTest(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int width = 2; + const unsigned int height = 2; + const unsigned int channelCount = 2; + const unsigned int batchSize = 2; + + unsigned int shape[] = { batchSize, channelCount, height, width }; + + std::vector input0({ + 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5 }); + + std::vector input1({ + 1, 1, 1, 1, 2, 2, 2, 2, + 4, 4, 4, 4, 4, 4, 4, 4 }); + + std::vector output({ + 2, 2, 2, 2, 1.5, 1.5, 1.5, 1.5, + 1, 1, 1, 1, 1.25, 1.25, 1.25, 1.25 }); + + return DivisionTestHelper(workloadFactory, + shape, + input0, + shape, + input1, + shape, + output); +} + +LayerTestResult DivisionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int shape0[] = { 1, 2, 2, 2 }; + std::vector input0({ 2, 4, 6, 8, 10, 12, 14, 16}); + + unsigned int shape1[] = { 1, 1, 1, 1 }; + std::vector input1({ 2 }); + + std::vector output({ 1, 2, 3, 4, 5, 6, 7, 8}); + + return DivisionTestHelper(workloadFactory, + shape0, + input0, + shape1, + input1, + shape0, + output); +} + +LayerTestResult DivisionBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int shape0[] = { 1, 3, 3, 2 }; + std::vector input0({ + 1, 4, 3, 8, 5, 12, + 7, 16, 9, 20, 11, 24, + 13, 28, 15, 32, 17, 36}); + + unsigned int shape1[] = { 1, 1, 1, 2 }; + std::vector input1({ 1, 2 }); + + std::vector output({ + 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18}); + + return DivisionTestHelper(workloadFactory, + shape0, + input0, + shape1, + input1, + shape0, + output); +} + namespace { LayerTestResult MultiplicationTestHelper(armnn::IWorkloadFactory& workloadFactory, const unsigned int shape0[4], diff --git a/src/armnn/backends/test/LayerTests.hpp b/src/armnn/backends/test/LayerTests.hpp index 48f73e7693..a59ff05c90 100644 --- a/src/armnn/backends/test/LayerTests.hpp +++ b/src/armnn/backends/test/LayerTests.hpp @@ -192,6 +192,10 @@ LayerTestResult CompareActivationTest(armnn::IWorkloadFactory& worklo armnn::ActivationFunction f, unsigned int batchSize); +LayerTestResult DivisionTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult DivisionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult DivisionBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory); + LayerTestResult MultiplicationTest(armnn::IWorkloadFactory& workloadFactory); LayerTestResult MultiplicationBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory); LayerTestResult MultiplicationBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory); diff --git a/src/armnn/backends/test/Reference.cpp b/src/armnn/backends/test/Reference.cpp index dedeb50e33..b31723c3a3 100644 --- a/src/armnn/backends/test/Reference.cpp +++ b/src/armnn/backends/test/Reference.cpp @@ -146,6 +146,11 @@ ARMNN_AUTO_TEST_CASE(AdditionUint8, AdditionUint8Test) ARMNN_AUTO_TEST_CASE(AddBroadcastUint8, AdditionBroadcastUint8Test) ARMNN_AUTO_TEST_CASE(AddBroadcast1ElementUint8, AdditionBroadcast1ElementUint8Test) +// Div +ARMNN_AUTO_TEST_CASE(SimpleDivision, DivisionTest) +ARMNN_AUTO_TEST_CASE(DivisionBroadcast1Element, DivisionBroadcast1ElementTest) +ARMNN_AUTO_TEST_CASE(DivisionBroadcast1DVector, DivisionBroadcast1DVectorTest) + // Mul ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest) -- cgit v1.2.1