From bceff2fb3fc68bb0aa88b886900c34b77340c826 Mon Sep 17 00:00:00 2001 From: surmeh01 Date: Thu, 29 Mar 2018 16:29:27 +0100 Subject: Release 18.03 --- src/armnn/backends/ArmComputeTensorUtils.cpp | 7 +- src/armnn/backends/ClWorkloadFactory.cpp | 67 ++++- src/armnn/backends/ClWorkloadFactory.hpp | 11 +- src/armnn/backends/NeonLayerSupport.cpp | 26 +- src/armnn/backends/NeonWorkloadFactory.cpp | 2 +- src/armnn/backends/NeonWorkloads.hpp | 2 + .../NeonConvolution2dBaseWorkload.cpp | 4 - .../NeonConvolution2dBaseWorkload.hpp | 2 + .../NeonConvolution2dFloat32Workload.cpp | 7 +- .../NeonConvolution2dUint8Workload.cpp | 33 +++ .../NeonConvolution2dUint8Workload.hpp | 27 ++ src/armnn/backends/RefWorkloads/Addition.cpp | 6 +- src/armnn/backends/RefWorkloads/Merger.hpp | 1 + src/armnn/backends/RefWorkloads/Multiplication.cpp | 42 ++- src/armnn/backends/RefWorkloads/Multiplication.hpp | 12 +- src/armnn/backends/RefWorkloads/Pooling2d.cpp | 4 +- .../RefMultiplicationFloat32Workload.cpp | 7 +- .../RefMultiplicationUint8Workload.cpp | 7 +- src/armnn/backends/RefWorkloads/Splitter.hpp | 1 + src/armnn/backends/WorkloadData.cpp | 17 +- src/armnn/backends/test/ArmComputeCl.cpp | 13 +- src/armnn/backends/test/ArmComputeNeon.cpp | 10 + src/armnn/backends/test/LayerTests.cpp | 322 ++++++++++++++++----- src/armnn/backends/test/LayerTests.hpp | 9 + src/armnn/backends/test/PermuteTestImpl.hpp | 104 +++++++ src/armnn/backends/test/Pooling2dTestImpl.hpp | 77 +++++ src/armnn/backends/test/Reference.cpp | 11 + 27 files changed, 705 insertions(+), 126 deletions(-) create mode 100644 src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp create mode 100644 src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp (limited to 'src/armnn/backends') diff --git a/src/armnn/backends/ArmComputeTensorUtils.cpp b/src/armnn/backends/ArmComputeTensorUtils.cpp index 9f21c41a2f..f88ed2b4c3 100644 --- a/src/armnn/backends/ArmComputeTensorUtils.cpp +++ b/src/armnn/backends/ArmComputeTensorUtils.cpp @@ -78,6 +78,7 @@ arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDes using arm_compute::DimensionRoundingType; using arm_compute::PadStrideInfo; using arm_compute::PoolingLayerInfo; + using arm_compute::Size2D; // Resolve ARM Compute layer parameters const PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType); @@ -94,7 +95,9 @@ arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDes const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude); - return arm_compute::PoolingLayerInfo(poolingType, descriptor.m_PoolWidth, padStrideInfo, excludePadding); + const Size2D poolSize(descriptor.m_PoolWidth, descriptor.m_PoolHeight); + + return arm_compute::PoolingLayerInfo(poolingType, poolSize, padStrideInfo, excludePadding); } arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& descriptor) @@ -114,7 +117,7 @@ arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::Per arm_compute::PermutationVector aclPerm; unsigned int start = 0; - while ((start == perm[start]) && (start < perm.GetSize())) + while ((start < perm.GetSize()) && (start == perm[start])) { ++start; } diff --git a/src/armnn/backends/ClWorkloadFactory.cpp b/src/armnn/backends/ClWorkloadFactory.cpp index 4e565a05d7..6af657b6b4 100644 --- a/src/armnn/backends/ClWorkloadFactory.cpp +++ b/src/armnn/backends/ClWorkloadFactory.cpp @@ -35,24 +35,62 @@ bool ClWorkloadFactory::IsLayerSupported(const Layer& layer, DataType dataType, #ifdef ARMCOMPUTECL_ENABLED -void ClWorkloadFactory::LoadOpenClRuntime(IClTunedParameters* clTunedParameters) +ClWorkloadFactory::ClWorkloadFactory(IClTunedParameters* clTunedParameters): + m_clTunedParameters(boost::polymorphic_downcast(clTunedParameters)) { - ClTunedParameters* clTunedParametersImpl = boost::polymorphic_downcast(clTunedParameters); + try + { + std::vector platforms; + cl::Platform::get(&platforms); + + // Select default platform as the first element + cl::Platform::setDefault(platforms[0]); + + std::vector devices; + platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices); + + // Select default device as the first element + cl::Device::setDefault(devices[0]); + } + catch (const cl::Error& clError) + { + throw ClRuntimeUnavailableException(boost::str(boost::format( + "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%" + ) % clError.what() % clError.err())); + } + + // Remove the use of global CL context + cl::Context::setDefault(cl::Context{}); + BOOST_ASSERT(cl::Context::getDefault()() == NULL); - cl::Device device; + // Remove the use of global CL command queue + cl::CommandQueue::setDefault(cl::CommandQueue{}); + BOOST_ASSERT(cl::CommandQueue::getDefault()() == NULL); +} + +ClWorkloadFactory::~ClWorkloadFactory() +{ +} + +void ClWorkloadFactory::LoadOpenClRuntime() +{ + cl::Device device = cl::Device::getDefault(); cl::Context context; cl::CommandQueue commandQueue; try { - device = cl::Device::getDefault(); - context = cl::Context::getDefault(); + arm_compute::CLKernelLibrary::get().clear_programs_cache(); + arm_compute::CLScheduler::get().init(context, commandQueue, device); + arm_compute::CLKernelLibrary::get().init(".", context, device); + + context = cl::Context(device); bool enableProfiling = false; #if ARMNN_PROFILING_ENABLED enableProfiling = true; #endif - if (clTunedParametersImpl && clTunedParametersImpl->m_Mode == IClTunedParameters::Mode::UpdateTunedParameters) + if (m_clTunedParameters && m_clTunedParameters->m_Mode == IClTunedParameters::Mode::UpdateTunedParameters) { enableProfiling = true; // Needed for the CLTuner to work. } @@ -65,7 +103,7 @@ void ClWorkloadFactory::LoadOpenClRuntime(IClTunedParameters* clTunedParameters) else { // Use default queue - commandQueue = cl::CommandQueue::getDefault(); + commandQueue = cl::CommandQueue(context, device); } } catch (const cl::Error& clError) @@ -79,9 +117,9 @@ void ClWorkloadFactory::LoadOpenClRuntime(IClTunedParameters* clTunedParameters) arm_compute::CLKernelLibrary::get().init(".", context, device); arm_compute::ICLTuner* tuner = nullptr; - if (clTunedParameters) + if (m_clTunedParameters) { - tuner = &clTunedParametersImpl->m_Tuner; + tuner = &m_clTunedParameters->m_Tuner; } arm_compute::CLScheduler::get().init(context, commandQueue, device, tuner); } @@ -266,7 +304,16 @@ std::unique_ptr ClWorkloadFactory::CreateFloor(const FloorQueueDescri #else // #if ARMCOMPUTECL_ENABLED -void ClWorkloadFactory::LoadOpenClRuntime(IClTunedParameters* clTunedParameters) +ClWorkloadFactory::ClWorkloadFactory(IClTunedParameters* clTunedParameters) +{ + // No CL support +} + +ClWorkloadFactory::~ClWorkloadFactory() +{ +} + +void ClWorkloadFactory::LoadOpenClRuntime() { // No CL support } diff --git a/src/armnn/backends/ClWorkloadFactory.hpp b/src/armnn/backends/ClWorkloadFactory.hpp index 2477e23eeb..e1e66c050b 100644 --- a/src/armnn/backends/ClWorkloadFactory.hpp +++ b/src/armnn/backends/ClWorkloadFactory.hpp @@ -23,18 +23,22 @@ namespace armnn { class IClTunedParameters; +class ClTunedParameters; // ARM Compute OpenCL workload factory class ClWorkloadFactory : public IWorkloadFactory { public: - virtual ~ClWorkloadFactory(){}; + + ClWorkloadFactory(IClTunedParameters* clTunedParameters = nullptr); + + virtual ~ClWorkloadFactory(); virtual Compute GetCompute() const override { return Compute::GpuAcc; } static bool IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported); - void LoadOpenClRuntime(IClTunedParameters* clTunedParameters = nullptr); + void LoadOpenClRuntime(); virtual bool SupportsSubTensors() const override { return true; } @@ -109,6 +113,9 @@ public: virtual std::unique_ptr CreateFloor(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + +private: + ClTunedParameters* m_clTunedParameters; }; class ClTunedParameters : public IClTunedParameters diff --git a/src/armnn/backends/NeonLayerSupport.cpp b/src/armnn/backends/NeonLayerSupport.cpp index 382b15e277..d8a3366775 100644 --- a/src/armnn/backends/NeonLayerSupport.cpp +++ b/src/armnn/backends/NeonLayerSupport.cpp @@ -71,6 +71,22 @@ bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convol return preferDirectConvolution; } +bool IsNeonMultiplicationParamsSupported(std::string* reasonIfUnsupported, + const TensorInfo& info0, + const TensorInfo& info1) +{ + if (info0.GetShape() == info1.GetShape()) + { + return true; + } + + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "Multiplication on Neon does not support implicit broadcast."; + } + return false; +} + bool IsNeonNormalizationDescParamsSupported(std::string* reasonIfUnsupported, const NormalizationDescriptor& parameters) { if (parameters.m_NormMethodType != NormalizationAlgorithmMethod::LocalBrightness) @@ -233,7 +249,7 @@ bool IsConvolution2dSupportedNeon(const TensorInfo& input, return IsSupportedForDataTypeNeon(reasonIfUnsupported, input.GetDataType(), &TrueFunc<>, - &FalseFuncU8<>); + &TrueFunc<>); } bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input, @@ -293,11 +309,13 @@ bool IsMultiplicationSupportedNeon(const TensorInfo& input0, const TensorInfo& input1, std::string* reasonIfUnsupported) { - ignore_unused(input1); return IsSupportedForDataTypeNeon(reasonIfUnsupported, input0.GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); + &IsNeonMultiplicationParamsSupported, + &FalseFuncU8, + input0, + input1 + ); } bool IsNormalizationSupportedNeon(const TensorInfo& input, diff --git a/src/armnn/backends/NeonWorkloadFactory.cpp b/src/armnn/backends/NeonWorkloadFactory.cpp index 384284114f..0f65a3dcd7 100644 --- a/src/armnn/backends/NeonWorkloadFactory.cpp +++ b/src/armnn/backends/NeonWorkloadFactory.cpp @@ -112,7 +112,7 @@ std::unique_ptr NeonWorkloadFactory::CreatePooling2d(const Poo std::unique_ptr NeonWorkloadFactory::CreateConvolution2d( const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload(descriptor, info); + return MakeWorkload(descriptor, info); } std::unique_ptr NeonWorkloadFactory::CreateDepthwiseConvolution2d( diff --git a/src/armnn/backends/NeonWorkloads.hpp b/src/armnn/backends/NeonWorkloads.hpp index 7e9e885adc..83a3e9fd9b 100644 --- a/src/armnn/backends/NeonWorkloads.hpp +++ b/src/armnn/backends/NeonWorkloads.hpp @@ -13,7 +13,9 @@ #include "backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp" #include "backends/NeonWorkloads/NeonConstantFloat32Workload.hpp" #include "backends/NeonWorkloads/NeonConstantUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp" #include "backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp" +#include "backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp" #include "backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp" #include "backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp" #include "backends/NeonWorkloads/NeonFloorFloat32Workload.hpp" diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp index 5099965a24..10c96d82a6 100644 --- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp @@ -73,10 +73,6 @@ NeonConvolution2dBaseWorkload::NeonConvolution2dBaseWorkload(const Con using Type = ResolveType; InitialiseArmComputeTensorData(m_KernelTensor, m_Data.m_Weight->template GetConstTensor()); - if (m_Data.m_Parameters.m_BiasEnabled) - { - InitialiseArmComputeTensorData(m_BiasTensor, m_Data.m_Bias->template GetConstTensor()); - } } // Generate known implementations for linker diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp index 37740511ba..98d075a5ea 100644 --- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp @@ -3,6 +3,8 @@ // See LICENSE file in the project root for full license information. // +#pragma once + #include #include diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp index b4650ac011..a8c5c63683 100644 --- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp @@ -15,7 +15,12 @@ using namespace armcomputetensorutils; NeonConvolution2dFloat32Workload::NeonConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) : NeonConvolution2dBaseWorkload(descriptor, info) -{} +{ + if (m_Data.m_Parameters.m_BiasEnabled) + { + InitialiseArmComputeTensorData(m_BiasTensor, m_Data.m_Bias->template GetConstTensor()); + } +} void NeonConvolution2dFloat32Workload::Execute() const diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..ae20522361 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonConvolution2dUint8Workload.hpp" + + +namespace armnn +{ +NeonConvolution2dUint8Workload::NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : NeonConvolution2dBaseWorkload(descriptor, info) +{ + if (m_Data.m_Parameters.m_BiasEnabled) + { + InitialiseArmComputeTensorData(m_BiasTensor, m_Data.m_Bias->template GetConstTensor()); + } +} + + +void NeonConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, NeonConvolution2dUint8Workload_Execute); + m_ConvolutionLayer->run(); +} + +void NeonConvolution2dUint8Workload::ValidateData() const +{ + m_Data.ValidateInputsOutputs("NeonConvolution2dUint8Workload", 1, 1); +} + +} //namespace armnn \ No newline at end of file diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..319d574b1e --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "NeonConvolution2dBaseWorkload.hpp" + +namespace armnn +{ + +class NeonConvolution2dUint8Workload : public NeonConvolution2dBaseWorkload +{ +public: + NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info); + + virtual void ValidateData() const override; + virtual void Execute() const override; +private: +}; + +} //namespace armnnn + + + + diff --git a/src/armnn/backends/RefWorkloads/Addition.cpp b/src/armnn/backends/RefWorkloads/Addition.cpp index c26f82ecc2..6d53a702e4 100644 --- a/src/armnn/backends/RefWorkloads/Addition.cpp +++ b/src/armnn/backends/RefWorkloads/Addition.cpp @@ -8,9 +8,6 @@ #include -namespace armnn -{ - namespace { @@ -24,6 +21,9 @@ void ElementwiseAddition(unsigned int numElements, const float* inData0, const f } // namespace +namespace armnn +{ + void Addition(const TensorShape& inShape0, const TensorShape& inShape1, const TensorShape& outShape, diff --git a/src/armnn/backends/RefWorkloads/Merger.hpp b/src/armnn/backends/RefWorkloads/Merger.hpp index 9695e457e2..476ced76be 100644 --- a/src/armnn/backends/RefWorkloads/Merger.hpp +++ b/src/armnn/backends/RefWorkloads/Merger.hpp @@ -39,6 +39,7 @@ void Merger(const MergerQueueDescriptor& data) //split view extents are defined by the size of (the corresponding) input tensor const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[viewIdx]); + BOOST_ASSERT(inputInfo.GetNumDimensions() == outputInfo0.GetNumDimensions()); // check all dimensions to see if this element is inside the given input view bool insideView = true; diff --git a/src/armnn/backends/RefWorkloads/Multiplication.cpp b/src/armnn/backends/RefWorkloads/Multiplication.cpp index 7f558d83c5..47c0f1cef1 100644 --- a/src/armnn/backends/RefWorkloads/Multiplication.cpp +++ b/src/armnn/backends/RefWorkloads/Multiplication.cpp @@ -4,18 +4,48 @@ // #include "Multiplication.hpp" +#include "Broadcast.hpp" -namespace armnn +#include + +namespace { -void Multiplication(const float* in0, - const float* in1, - unsigned int numElements, - float* out) +void ElementwiseMultiplication(unsigned int numElements, + const float* inData0, + const float* inData1, + float* outData) { for (unsigned int i = 0; i < numElements; ++i) { - out[i] = in0[i] * in1[i]; + outData[i] = inData0[i] * inData1[i]; + } +} + +} // namespace + +namespace armnn +{ + +void Multiplication(const TensorShape& inShape0, + const TensorShape& inShape1, + const TensorShape& outShape, + const float* inData0, + const float* inData1, + float* outData) +{ + if (inShape0 == inShape1) + { + ElementwiseMultiplication(inShape0.GetNumElements(), inData0, inData1, outData); + } + else + { + BroadcastLoop(inShape0, inShape1, outShape).Unroll( + std::multiplies(), + 0, + inData0, + inData1, + outData); } } diff --git a/src/armnn/backends/RefWorkloads/Multiplication.hpp b/src/armnn/backends/RefWorkloads/Multiplication.hpp index d0b033e7ec..54fcac51c1 100644 --- a/src/armnn/backends/RefWorkloads/Multiplication.hpp +++ b/src/armnn/backends/RefWorkloads/Multiplication.hpp @@ -5,12 +5,16 @@ #pragma once +#include + namespace armnn { -void Multiplication(const float* in0, - const float* in1, - unsigned int numElements, - float* out); +void Multiplication(const TensorShape& inShape0, + const TensorShape& inShape1, + const TensorShape& outShape, + const float* inData0, + const float* inData1, + float* outData); } //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/Pooling2d.cpp b/src/armnn/backends/RefWorkloads/Pooling2d.cpp index 6d15d8a436..a643e67690 100644 --- a/src/armnn/backends/RefWorkloads/Pooling2d.cpp +++ b/src/armnn/backends/RefWorkloads/Pooling2d.cpp @@ -186,8 +186,8 @@ void Pooling2d(const float* in, // Clamp the pooling region inside the valid input area (which includes the padding). // This is necessary because the final pooling in a row may overlap beyond the padding. - hend = std::min(hend, heightInput + padRight); - wend = std::min(wend, widthInput + padBottom); + hend = std::min(hend, heightInput + padBottom); + wend = std::min(wend, widthInput + padRight); float result = defaultInitializer; float poolAreaSize = boost::numeric_cast((hend - hstart) * (wend - wstart)); diff --git a/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp index ed68b1f6db..d7c54d9aad 100644 --- a/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp +++ b/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp @@ -17,12 +17,15 @@ void RefMultiplicationFloat32Workload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMultiplicationFloat32Workload_Execute"); - const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorShape& inShape0 = GetTensorInfo(m_Data.m_Inputs[0]).GetShape(); + const TensorShape& inShape1 = GetTensorInfo(m_Data.m_Inputs[1]).GetShape(); + const TensorShape& outShape = GetTensorInfo(m_Data.m_Outputs[0]).GetShape(); float* outputData = GetOutputTensorDataFloat(0, m_Data); const float* inputData0 = GetInputTensorDataFloat(0, m_Data); const float* inputData1 = GetInputTensorDataFloat(1, m_Data); - Multiplication(inputData0, inputData1, inputInfo0.GetNumElements(), outputData); + + Multiplication(inShape0, inShape1, outShape, inputData0, inputData1, outputData); } } //namespace armnn diff --git a/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp index 2e6f0e6c8b..d5c4afd87c 100644 --- a/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp +++ b/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp @@ -27,10 +27,9 @@ void RefMultiplicationUint8Workload::Execute() const auto dequant1 = Dequantize(GetInputTensorDataU8(1, m_Data), inputInfo1); std::vector results(outputInfo.GetNumElements()); - Multiplication(dequant0.data(), - dequant1.data(), - inputInfo0.GetNumElements(), - results.data()); + Multiplication( + inputInfo0.GetShape(), inputInfo1.GetShape(), outputInfo.GetShape(), + dequant0.data(), dequant1.data(),results.data()); Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); } diff --git a/src/armnn/backends/RefWorkloads/Splitter.hpp b/src/armnn/backends/RefWorkloads/Splitter.hpp index 67f6c100f9..74c4cb4e18 100644 --- a/src/armnn/backends/RefWorkloads/Splitter.hpp +++ b/src/armnn/backends/RefWorkloads/Splitter.hpp @@ -41,6 +41,7 @@ void Splitter(const SplitterQueueDescriptor& data) //split view extents are defined by the size of (the corresponding) input tensor const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[viewIdx]); + BOOST_ASSERT(outputInfo.GetNumDimensions() == inputInfo0.GetNumDimensions()); // check all dimensions to see if this element is inside the given input view bool insideView = true; diff --git a/src/armnn/backends/WorkloadData.cpp b/src/armnn/backends/WorkloadData.cpp index 96a37802f1..c951fc5d8d 100644 --- a/src/armnn/backends/WorkloadData.cpp +++ b/src/armnn/backends/WorkloadData.cpp @@ -502,16 +502,13 @@ void MultiplicationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) c { ValidateTwoInputs(workloadInfo, "MultiplicationQueueDescriptor"); ValidateSingleOutput(workloadInfo, "MultiplicationQueueDescriptor"); - ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], - workloadInfo.m_InputTensorInfos[1], - "MultiplicationQueueDescriptor", - "first input", - "second input"); - ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], - workloadInfo.m_OutputTensorInfos[0], - "MultiplicationQueueDescriptor", - "input", - "output"); + + ValidateBroadcastTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_InputTensorInfos[1], + workloadInfo.m_OutputTensorInfos[0], + "MultiplicationQueueDescriptor", + "first input", + "second input"); } void BatchNormalizationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const diff --git a/src/armnn/backends/test/ArmComputeCl.cpp b/src/armnn/backends/test/ArmComputeCl.cpp index 5933cebc80..c45a82db63 100644 --- a/src/armnn/backends/test/ArmComputeCl.cpp +++ b/src/armnn/backends/test/ArmComputeCl.cpp @@ -103,7 +103,7 @@ ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2d, IgnorePaddingSimpleAve ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test) ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest) ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8, - IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) + IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test) ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test) @@ -114,6 +114,12 @@ ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddi ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest) ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2, + IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, + false) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding, + IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, + true) ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) @@ -136,6 +142,8 @@ ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) // Mul ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVector, MultiplicationBroadcast1DVectorTest) // Batch Norm ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) @@ -194,6 +202,9 @@ ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test) // Permute ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test) ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test) // ============================================================================ // COMPARE tests diff --git a/src/armnn/backends/test/ArmComputeNeon.cpp b/src/armnn/backends/test/ArmComputeNeon.cpp index dd8a668940..a81b7cdcd7 100644 --- a/src/armnn/backends/test/ArmComputeNeon.cpp +++ b/src/armnn/backends/test/ArmComputeNeon.cpp @@ -141,6 +141,7 @@ ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3 ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, true) ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest) ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test) + ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) @@ -170,6 +171,11 @@ ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8, IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test) ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2, + IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, false) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding, + IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, + true) ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest) ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test) @@ -281,6 +287,10 @@ ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test) // Permute ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test) ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test) + // ============================================================================ // COMPARE tests diff --git a/src/armnn/backends/test/LayerTests.cpp b/src/armnn/backends/test/LayerTests.cpp index 76681f9a93..9eed2dbf78 100644 --- a/src/armnn/backends/test/LayerTests.cpp +++ b/src/armnn/backends/test/LayerTests.cpp @@ -1005,31 +1005,22 @@ LayerTestResult CompareAdditionTest(armnn::IWorkloadFactory& workloadFa return ret; } -LayerTestResult MultiplicationTest(armnn::IWorkloadFactory& workloadFactory) -{ - const unsigned int width = 2; - const unsigned int height = 2; - const unsigned int channelCount = 2; - const unsigned int batchSize = 2; - - armnn::TensorInfo inputTensorInfo0; - armnn::TensorInfo inputTensorInfo1; - armnn::TensorInfo outputTensorInfo; - - constexpr unsigned int shape[] = { batchSize, channelCount, height, width }; - constexpr std::size_t dimensionCount = std::extent::value; - - inputTensorInfo0 = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32); - inputTensorInfo1 = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32); - outputTensorInfo = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32); - - auto input0 = MakeTensor(inputTensorInfo0, std::vector({ - 1, 1, 1, 1, 2, 2, 2, 2, - 3, 3, 3, 3, 4, 4, 4, 4 })); - - auto input1 = MakeTensor(inputTensorInfo1, std::vector({ - 2, 2, 2, 2, 3, 3, 3, 3, - 4, 4, 4, 4, 5, 5, 5, 5 })); +namespace { +LayerTestResult MultiplicationTestHelper(armnn::IWorkloadFactory& workloadFactory, + const unsigned int shape0[4], + const std::vector & values0, + const unsigned int shape1[4], + const std::vector & values1, + const unsigned int outShape[4], + const std::vector & outValues) +{ + const size_t dimensionCount = 4; + armnn::TensorInfo inputTensorInfo0{dimensionCount, shape0, armnn::DataType::Float32}; + armnn::TensorInfo inputTensorInfo1{dimensionCount, shape1, armnn::DataType::Float32}; + armnn::TensorInfo outputTensorInfo{dimensionCount, outShape, armnn::DataType::Float32}; + + auto input0 = MakeTensor(inputTensorInfo0, values0); + auto input1 = MakeTensor(inputTensorInfo1, values1); LayerTestResult ret(outputTensorInfo); @@ -1056,11 +1047,84 @@ LayerTestResult MultiplicationTest(armnn::IWorkloadFactory& workloadFac CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - ret.outputExpected = MakeTensor(outputTensorInfo, std::vector({ + ret.outputExpected = MakeTensor(outputTensorInfo, outValues); + return ret; +} +} // anonymous namespace + + +LayerTestResult MultiplicationTest(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int width = 2; + const unsigned int height = 2; + const unsigned int channelCount = 2; + const unsigned int batchSize = 2; + + unsigned int shape[] = { batchSize, channelCount, height, width }; + + std::vector input0({ + 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4 }); + + std::vector input1({ + 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5 }); + + std::vector output({ 2, 2, 2, 2, 6, 6, 6, 6, - 12, 12, 12, 12, 20, 20, 20, 20 })); + 12, 12, 12, 12, 20, 20, 20, 20 }); - return ret; + return MultiplicationTestHelper(workloadFactory, + shape, + input0, + shape, + input1, + shape, + output); +} + +LayerTestResult MultiplicationBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int shape0[] = { 1, 2, 2, 2 }; + std::vector input0({ 1, 2, 3, 4, 5, 6, 7, 8}); + + unsigned int shape1[] = { 1, 1, 1, 1 }; + std::vector input1({ 2 }); + + std::vector output({ 2, 4, 6, 8, 10, 12, 14, 16}); + + return MultiplicationTestHelper(workloadFactory, + shape0, + input0, + shape1, + input1, + shape0, + output); +} + +LayerTestResult MultiplicationBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int shape0[] = { 1, 3, 3, 2 }; + std::vector input0({ + 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18}); + + unsigned int shape1[] = { 1, 1, 1, 2 }; + std::vector input1({ 1, 2 }); + + std::vector output({ + 1, 4, 3, 8, 5, 12, + 7, 16, 9, 20, 11, 24, + 13, 28, 15, 32, 17, 36}); + + return MultiplicationTestHelper(workloadFactory, + shape0, + input0, + shape1, + input1, + shape0, + output); } LayerTestResult CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory, @@ -3253,69 +3317,59 @@ LayerTestResult AdditionUint8Test(armnn::IWorkloadFactory& workloadF return result; } -LayerTestResult MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory) +namespace { - unsigned int batchSize = 1; - unsigned int channels = 2; - unsigned int height = 2; - unsigned int width = 3; +LayerTestResult MultiplicationUint8TestHelper(armnn::IWorkloadFactory& workloadFactory, + const unsigned int shape0[4], + const std::vector & values0, + float scale0, + int32_t offset0, + const unsigned int shape1[4], + const std::vector & values1, + float scale1, + int32_t offset1, + const unsigned int outShape[4], + const std::vector & outValues, + float outScale, + int32_t outOffset) +{ + armnn::TensorInfo inputTensorInfo0(4, shape0, armnn::DataType::QuantisedAsymm8); + armnn::TensorInfo inputTensorInfo1(4, shape1, armnn::DataType::QuantisedAsymm8); + armnn::TensorInfo outputTensorInfo(4, outShape, armnn::DataType::QuantisedAsymm8); - armnn::TensorInfo inputTensorInfo1, inputTensorInfo2; - armnn::TensorInfo outputTensorInfo; + inputTensorInfo0.SetQuantizationScale(scale0); + inputTensorInfo0.SetQuantizationOffset(offset0); - const unsigned int shape[] = { batchSize, channels, height, width }; - inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); - inputTensorInfo1.SetQuantizationScale(4.0f); - inputTensorInfo1.SetQuantizationOffset(1); + inputTensorInfo1.SetQuantizationScale(scale1); + inputTensorInfo1.SetQuantizationOffset(offset1); - inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); - inputTensorInfo2.SetQuantizationScale(3.0f); - inputTensorInfo2.SetQuantizationOffset(-2); + outputTensorInfo.SetQuantizationScale(outScale); + outputTensorInfo.SetQuantizationOffset(outOffset); - outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); - outputTensorInfo.SetQuantizationScale(1366.255f); // Scale/offset chosen to have output values out of range - outputTensorInfo.SetQuantizationOffset(-5); + auto input0 = MakeTensor(inputTensorInfo0, values0); + auto input1 = MakeTensor(inputTensorInfo1, values1); - // See dequantized values to the right - auto input1 = MakeTensor(inputTensorInfo1, std::vector( - { - 62, 37, 3, 172, 13, 111, // 244, 144, 8, 684, 48, 440, - 188, 20, 73, 31, 23, 31 // 748, 76, 288, 120, 88, 120 - })); - - // See dequantized values to the right - auto input2 = MakeTensor(inputTensorInfo1, std::vector( - { - 126, 240, 252, 183, 121, 247, // 384, 726, 762, 555, 369, 747, - 48, 115, 151, 79, 78, 97 // 150, 351, 459, 243, 240, 297 - })); - - // See dequantized values to the right LayerTestResult result(outputTensorInfo); - result.outputExpected = MakeTensor(outputTensorInfo, std::vector( - { - 64, 72, 0, 255, 8, 236, // 93696, 104544, 6096(clamped), 379620(clamped), 17712, 328680, - 77, 15, 92, 16, 10, 21, // 112200, 26676, 132192, 29160, 21120, 35640 - })); + result.outputExpected = MakeTensor(outputTensorInfo, outValues); + std::unique_ptr inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); std::unique_ptr inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); - std::unique_ptr inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); armnn::MultiplicationQueueDescriptor data; armnn::WorkloadInfo info; - AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); - AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); std::unique_ptr workload = workloadFactory.CreateMultiplication(data, info); + inputHandle0->Allocate(); inputHandle1->Allocate(); - inputHandle2->Allocate(); outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); - CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); workload->Execute(); @@ -3323,6 +3377,113 @@ LayerTestResult MultiplicationUint8Test(armnn::IWorkloadFactory& wor return result; } +} // anonymous namespace + +LayerTestResult MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int batchSize = 1; + unsigned int channels = 2; + unsigned int height = 2; + unsigned int width = 3; + const unsigned int shape[] = { batchSize, channels, height, width }; + + // See dequantized values to the right + std::vector input0({ + 62, 37, 3, 172, 13, 111, // 244, 144, 8, 684, 48, 440, + 188, 20, 73, 31, 23, 31 // 748, 76, 288, 120, 88, 120 + }); + + // See dequantized values to the right + std::vector input1({ + 126, 240, 252, 183, 121, 247, // 384, 726, 762, 555, 369, 747, + 48, 115, 151, 79, 78, 97 // 150, 351, 459, 243, 240, 297 + }); + + // See dequantized values to the right + std::vector output( + { + 64, 72, 0, 255, 8, 236, // 93696, 104544, 6096(clamped), 379620(clamped), 17712, 328680, + 77, 15, 92, 16, 10, 21, // 112200, 26676, 132192, 29160, 21120, 35640 + }); + + return MultiplicationUint8TestHelper(workloadFactory, + shape, + input0, + 4.0f, + 1, + shape, + input1, + 3.0f, + -2, + shape, + output, + 1366.255f, // Scale/offset chosen to have output values out of range + -5); +} + +LayerTestResult MultiplicationBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int shape0[] = { 1, 2, 2, 3 }; + const unsigned int shape1[] = { 1, 1, 1, 1 }; + + std::vector input0({ + 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12 + }); + + std::vector input1({2}); + + std::vector output({ + 2, 4, 6, 8, 10, 12, + 14, 16, 18, 20, 22, 24 + }); + + return MultiplicationUint8TestHelper(workloadFactory, + shape0, + input0, + 1.0f, + 0, + shape1, + input1, + 1.0f, + 0, + shape0, + output, + 1.0f, + 0); +} + +LayerTestResult MultiplicationBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int shape0[] = { 1, 2, 2, 3 }; + const unsigned int shape1[] = { 1, 1, 1, 3 }; + + std::vector input0({ + 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12 + }); + + std::vector input1({1, 2, 3}); + + std::vector output({ + 1, 4, 9, 4, 10, 18, + 7, 16, 27, 10, 22, 36 + }); + + return MultiplicationUint8TestHelper(workloadFactory, + shape0, + input0, + 1.0f, + 0, + shape1, + input1, + 1.0f, + 0, + shape0, + output, + 1.0f, + 0); +} LayerTestResult ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& workloadFactory) { @@ -3702,6 +3863,12 @@ LayerTestResult SimpleAveragePooling2dUint8Test(armnn::IWorkloadFact return SimpleAveragePooling2dTestCommon(workloadFactory, 0.5, -1); } +LayerTestResult IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding) +{ + return IgnorePaddingAveragePooling2dSize3x2Stride2x2TestCommon(workloadFactory, forceNoPadding); +} + LayerTestResult LargeTensorsAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory) { return LargeTensorsAveragePooling2dTestCommon(workloadFactory); @@ -3882,3 +4049,18 @@ LayerTestResult SimplePermuteUint8Test(armnn::IWorkloadFactory& work { return SimplePermuteUint8TestCommon(workloadFactory); }; + +LayerTestResult PermuteFloat32ValueSet1Test(armnn::IWorkloadFactory& workloadFactory) +{ + return PermuteFloat32ValueSet1TestCommon(workloadFactory); +}; + +LayerTestResult PermuteFloat32ValueSet2Test(armnn::IWorkloadFactory& workloadFactory) +{ + return PermuteFloat32ValueSet2TestCommon(workloadFactory); +}; + +LayerTestResult PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory) +{ + return PermuteFloat32ValueSet3TestCommon(workloadFactory); +}; diff --git a/src/armnn/backends/test/LayerTests.hpp b/src/armnn/backends/test/LayerTests.hpp index fc0c9c7b14..36e73e461c 100644 --- a/src/armnn/backends/test/LayerTests.hpp +++ b/src/armnn/backends/test/LayerTests.hpp @@ -82,6 +82,8 @@ LayerTestResult IgnorePaddingMaxPooling2dSize3Uint8Test(armnn::IWork LayerTestResult SimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory); LayerTestResult SimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding); LayerTestResult IgnorePaddingSimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory); LayerTestResult IgnorePaddingSimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); LayerTestResult IgnorePaddingSimpleAveragePooling2dNoPaddingTest(armnn::IWorkloadFactory& workloadFactory); @@ -187,6 +189,8 @@ LayerTestResult CompareActivationTest(armnn::IWorkloadFactory& worklo unsigned int batchSize); LayerTestResult MultiplicationTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult MultiplicationBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult MultiplicationBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory); LayerTestResult CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory, armnn::IWorkloadFactory& refWorkloadFactory); @@ -260,6 +264,8 @@ LayerTestResult CompareSoftmaxUint8Test(armnn::IWorkloadFactory& wor float beta); LayerTestResult MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult MultiplicationBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult MultiplicationBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory); LayerTestResult SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); @@ -303,3 +309,6 @@ LayerTestResult FullyConnectedLargeTest(armnn::IWorkloadFactory& workl LayerTestResult SimplePermuteFloat32Test(armnn::IWorkloadFactory& workloadFactory); LayerTestResult SimplePermuteUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult PermuteFloat32ValueSet1Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult PermuteFloat32ValueSet2Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory); diff --git a/src/armnn/backends/test/PermuteTestImpl.hpp b/src/armnn/backends/test/PermuteTestImpl.hpp index 4eafa1a211..4ecffedc91 100644 --- a/src/armnn/backends/test/PermuteTestImpl.hpp +++ b/src/armnn/backends/test/PermuteTestImpl.hpp @@ -119,3 +119,107 @@ LayerTestResult SimplePermuteUint8TestCommon(armnn::IWorkloadFactory return SimplePermuteTestImpl(workloadFactory, descriptor, inputTensorInfo, outputTensorInfo, input, outputExpected); } + +LayerTestResult +PermuteFloat32ValueSet1TestCommon(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 1, 2, 2, 3 }; + unsigned int outputShape[] = { 1, 3, 2, 2 }; + + armnn::PermuteDescriptor descriptor; + descriptor.m_DimMappings = {0U, 2U, 3U, 1U}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + std::vector input = std::vector( + { + 1.0f, 2.0f, 3.0f, + 11.0f, 12.0f, 13.0f, + 21.0f, 22.0f, 23.0f, + 31.0f, 32.0f, 33.0f, + }); + + std::vector outputExpected = std::vector( + { + 1.0f, 11.0f, 21.0f, 31.0f, + 2.0f, 12.0f, 22.0f, 32.0f, + 3.0f, 13.0f, 23.0f, 33.0f, + }); + + return SimplePermuteTestImpl(workloadFactory, descriptor, inputTensorInfo, + outputTensorInfo, input, outputExpected); +} + +LayerTestResult +PermuteFloat32ValueSet2TestCommon(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 1, 3, 2, 2 }; + unsigned int outputShape[] = { 1, 2, 2, 3 }; + + armnn::PermuteDescriptor descriptor; + descriptor.m_DimMappings = {0U, 3U, 1U, 2U}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + std::vector input = std::vector( + { + 1.0f, 11.0f, 21.0f, 31.0f, + 2.0f, 12.0f, 22.0f, 32.0f, + 3.0f, 13.0f, 23.0f, 33.0f, + }); + + std::vector outputExpected = std::vector( + { + 1.0f, 2.0f, 3.0f, + 11.0f, 12.0f, 13.0f, + 21.0f, 22.0f, 23.0f, + 31.0f, 32.0f, 33.0f, + }); + + return SimplePermuteTestImpl(workloadFactory, descriptor, inputTensorInfo, + outputTensorInfo, input, outputExpected); +} + +LayerTestResult +PermuteFloat32ValueSet3TestCommon(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 1, 2, 3, 3 }; + unsigned int outputShape[] = { 1, 3, 2, 3 }; + + armnn::PermuteDescriptor descriptor; + descriptor.m_DimMappings = {0U, 2U, 3U, 1U}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + std::vector input = std::vector( + { + 1.0f, 2.0f, 3.0f, + 11.0f, 12.0f, 13.0f, + 21.0f, 22.0f, 23.0f, + 31.0f, 32.0f, 33.0f, + 41.0f, 42.0f, 43.0f, + 51.0f, 52.0f, 53.0f, + }); + + std::vector outputExpected = std::vector( + { + 1.0f, 11.0f, 21.0f, 31.0f, 41.0f, 51.0f, + 2.0f, 12.0f, 22.0f, 32.0f, 42.0f, 52.0f, + 3.0f, 13.0f, 23.0f, 33.0f, 43.0f, 53.0f, + }); + + return SimplePermuteTestImpl(workloadFactory, descriptor, inputTensorInfo, + outputTensorInfo, input, outputExpected); +} diff --git a/src/armnn/backends/test/Pooling2dTestImpl.hpp b/src/armnn/backends/test/Pooling2dTestImpl.hpp index fc84ddb2ca..ab9fd6d6fb 100644 --- a/src/armnn/backends/test/Pooling2dTestImpl.hpp +++ b/src/armnn/backends/test/Pooling2dTestImpl.hpp @@ -720,6 +720,83 @@ LayerTestResult SimpleMaxPooling2dSize2x2Stride2x2TestCommon(armnn::IWorkl return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); } +// +// Tests max pooling with the following parameters: +// +// Pooling size: 3x2 +// Stride: (2,2) +// input size: 3x2 +// channels: 1 +// batch size: 1 +// +template +LayerTestResult IgnorePaddingAveragePooling2dSize3x2Stride2x2TestCommon( + armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = 3; + descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = 2; + descriptor.m_StrideY = 2; + descriptor.m_PadLeft = (forceNoPadding) ? 0 : 1; + descriptor.m_PadRight = descriptor.m_PadLeft; + descriptor.m_PadTop = 0; + descriptor.m_PadBottom = 0; + descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + unsigned int inputWidth = 3; + unsigned int inputHeight = 2; + unsigned int outputWidth = + (inputWidth + descriptor.m_PadLeft + descriptor.m_PadRight + descriptor.m_StrideX - descriptor.m_PoolWidth) / + descriptor.m_StrideX; + unsigned int outputHeight = + (inputHeight + descriptor.m_PadTop + descriptor.m_PadBottom + descriptor.m_StrideY - descriptor.m_PoolHeight) / + descriptor.m_StrideY; + unsigned int channels = 1; + unsigned int batchSize = 1; + + std::vector inputData = { + 3.0f, 6.0f, 9.0f, + 12.0f, 15.0f, 18.0f, + }; + + std::vector expectedOutputDataWithPadding = { + 6.0f, 8.0f, + }; + + std::vector expectedOutputDataNoPadding = { + 10.5f, + }; + + armnn::TensorInfo inputTensorInfo({ batchSize, channels, inputHeight, inputWidth }, armnn::GetDataType()); + + // Scale and offset should match input - we're just calculating average values. + armnn::TensorInfo outputTensorInfo({ batchSize, channels, outputHeight, outputWidth }, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, inputData)); + + auto outputExpected = MakeTensor(outputTensorInfo, + forceNoPadding ? QuantizedVector(qScale, qOffset, expectedOutputDataNoPadding) : + QuantizedVector(qScale, qOffset, expectedOutputDataWithPadding)); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + + template LayerTestResult IgnorePaddingSimpleMaxPooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, float qScale = 1.0f, diff --git a/src/armnn/backends/test/Reference.cpp b/src/armnn/backends/test/Reference.cpp index 87d82f1781..89e5db8e43 100644 --- a/src/armnn/backends/test/Reference.cpp +++ b/src/armnn/backends/test/Reference.cpp @@ -76,6 +76,10 @@ ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddingL2Pooling2 ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest) ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2, + IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, false) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding, + IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, true) ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) @@ -158,7 +162,11 @@ ARMNN_AUTO_TEST_CASE(AddBroadcast1ElementUint8, AdditionBroadcast1ElementUint8Te // Mul ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVector, MultiplicationBroadcast1DVectorTest) ARMNN_AUTO_TEST_CASE(MultiplicationUint8, MultiplicationUint8Test) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1ElementUint8, MultiplicationBroadcast1ElementUint8Test) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVectorUint8, MultiplicationBroadcast1DVectorUint8Test) // Batch Norm ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) @@ -227,5 +235,8 @@ ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test) // Permute ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test) ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test) BOOST_AUTO_TEST_SUITE_END() -- cgit v1.2.1