aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/backends
diff options
context:
space:
mode:
Diffstat (limited to 'src/armnn/backends')
-rw-r--r--src/armnn/backends/ArmComputeTensorUtils.cpp7
-rw-r--r--src/armnn/backends/ClWorkloadFactory.cpp67
-rw-r--r--src/armnn/backends/ClWorkloadFactory.hpp11
-rw-r--r--src/armnn/backends/NeonLayerSupport.cpp26
-rw-r--r--src/armnn/backends/NeonWorkloadFactory.cpp2
-rw-r--r--src/armnn/backends/NeonWorkloads.hpp2
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp4
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp2
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp7
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp33
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp27
-rw-r--r--src/armnn/backends/RefWorkloads/Addition.cpp6
-rw-r--r--src/armnn/backends/RefWorkloads/Merger.hpp1
-rw-r--r--src/armnn/backends/RefWorkloads/Multiplication.cpp42
-rw-r--r--src/armnn/backends/RefWorkloads/Multiplication.hpp12
-rw-r--r--src/armnn/backends/RefWorkloads/Pooling2d.cpp4
-rw-r--r--src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp7
-rw-r--r--src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp7
-rw-r--r--src/armnn/backends/RefWorkloads/Splitter.hpp1
-rw-r--r--src/armnn/backends/WorkloadData.cpp17
-rw-r--r--src/armnn/backends/test/ArmComputeCl.cpp13
-rw-r--r--src/armnn/backends/test/ArmComputeNeon.cpp10
-rw-r--r--src/armnn/backends/test/LayerTests.cpp322
-rw-r--r--src/armnn/backends/test/LayerTests.hpp9
-rw-r--r--src/armnn/backends/test/PermuteTestImpl.hpp104
-rw-r--r--src/armnn/backends/test/Pooling2dTestImpl.hpp77
-rw-r--r--src/armnn/backends/test/Reference.cpp11
27 files changed, 705 insertions, 126 deletions
diff --git a/src/armnn/backends/ArmComputeTensorUtils.cpp b/src/armnn/backends/ArmComputeTensorUtils.cpp
index 9f21c41a2f..f88ed2b4c3 100644
--- a/src/armnn/backends/ArmComputeTensorUtils.cpp
+++ b/src/armnn/backends/ArmComputeTensorUtils.cpp
@@ -78,6 +78,7 @@ arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDes
using arm_compute::DimensionRoundingType;
using arm_compute::PadStrideInfo;
using arm_compute::PoolingLayerInfo;
+ using arm_compute::Size2D;
// Resolve ARM Compute layer parameters
const PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType);
@@ -94,7 +95,9 @@ arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDes
const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude);
- return arm_compute::PoolingLayerInfo(poolingType, descriptor.m_PoolWidth, padStrideInfo, excludePadding);
+ const Size2D poolSize(descriptor.m_PoolWidth, descriptor.m_PoolHeight);
+
+ return arm_compute::PoolingLayerInfo(poolingType, poolSize, padStrideInfo, excludePadding);
}
arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& descriptor)
@@ -114,7 +117,7 @@ arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::Per
arm_compute::PermutationVector aclPerm;
unsigned int start = 0;
- while ((start == perm[start]) && (start < perm.GetSize()))
+ while ((start < perm.GetSize()) && (start == perm[start]))
{
++start;
}
diff --git a/src/armnn/backends/ClWorkloadFactory.cpp b/src/armnn/backends/ClWorkloadFactory.cpp
index 4e565a05d7..6af657b6b4 100644
--- a/src/armnn/backends/ClWorkloadFactory.cpp
+++ b/src/armnn/backends/ClWorkloadFactory.cpp
@@ -35,24 +35,62 @@ bool ClWorkloadFactory::IsLayerSupported(const Layer& layer, DataType dataType,
#ifdef ARMCOMPUTECL_ENABLED
-void ClWorkloadFactory::LoadOpenClRuntime(IClTunedParameters* clTunedParameters)
+ClWorkloadFactory::ClWorkloadFactory(IClTunedParameters* clTunedParameters):
+ m_clTunedParameters(boost::polymorphic_downcast<ClTunedParameters*>(clTunedParameters))
{
- ClTunedParameters* clTunedParametersImpl = boost::polymorphic_downcast<ClTunedParameters*>(clTunedParameters);
+ try
+ {
+ std::vector<cl::Platform> platforms;
+ cl::Platform::get(&platforms);
+
+ // Select default platform as the first element
+ cl::Platform::setDefault(platforms[0]);
+
+ std::vector<cl::Device> devices;
+ platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices);
+
+ // Select default device as the first element
+ cl::Device::setDefault(devices[0]);
+ }
+ catch (const cl::Error& clError)
+ {
+ throw ClRuntimeUnavailableException(boost::str(boost::format(
+ "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%"
+ ) % clError.what() % clError.err()));
+ }
+
+ // Remove the use of global CL context
+ cl::Context::setDefault(cl::Context{});
+ BOOST_ASSERT(cl::Context::getDefault()() == NULL);
- cl::Device device;
+ // Remove the use of global CL command queue
+ cl::CommandQueue::setDefault(cl::CommandQueue{});
+ BOOST_ASSERT(cl::CommandQueue::getDefault()() == NULL);
+}
+
+ClWorkloadFactory::~ClWorkloadFactory()
+{
+}
+
+void ClWorkloadFactory::LoadOpenClRuntime()
+{
+ cl::Device device = cl::Device::getDefault();
cl::Context context;
cl::CommandQueue commandQueue;
try
{
- device = cl::Device::getDefault();
- context = cl::Context::getDefault();
+ arm_compute::CLKernelLibrary::get().clear_programs_cache();
+ arm_compute::CLScheduler::get().init(context, commandQueue, device);
+ arm_compute::CLKernelLibrary::get().init(".", context, device);
+
+ context = cl::Context(device);
bool enableProfiling = false;
#if ARMNN_PROFILING_ENABLED
enableProfiling = true;
#endif
- if (clTunedParametersImpl && clTunedParametersImpl->m_Mode == IClTunedParameters::Mode::UpdateTunedParameters)
+ if (m_clTunedParameters && m_clTunedParameters->m_Mode == IClTunedParameters::Mode::UpdateTunedParameters)
{
enableProfiling = true; // Needed for the CLTuner to work.
}
@@ -65,7 +103,7 @@ void ClWorkloadFactory::LoadOpenClRuntime(IClTunedParameters* clTunedParameters)
else
{
// Use default queue
- commandQueue = cl::CommandQueue::getDefault();
+ commandQueue = cl::CommandQueue(context, device);
}
}
catch (const cl::Error& clError)
@@ -79,9 +117,9 @@ void ClWorkloadFactory::LoadOpenClRuntime(IClTunedParameters* clTunedParameters)
arm_compute::CLKernelLibrary::get().init(".", context, device);
arm_compute::ICLTuner* tuner = nullptr;
- if (clTunedParameters)
+ if (m_clTunedParameters)
{
- tuner = &clTunedParametersImpl->m_Tuner;
+ tuner = &m_clTunedParameters->m_Tuner;
}
arm_compute::CLScheduler::get().init(context, commandQueue, device, tuner);
}
@@ -266,7 +304,16 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescri
#else // #if ARMCOMPUTECL_ENABLED
-void ClWorkloadFactory::LoadOpenClRuntime(IClTunedParameters* clTunedParameters)
+ClWorkloadFactory::ClWorkloadFactory(IClTunedParameters* clTunedParameters)
+{
+ // No CL support
+}
+
+ClWorkloadFactory::~ClWorkloadFactory()
+{
+}
+
+void ClWorkloadFactory::LoadOpenClRuntime()
{
// No CL support
}
diff --git a/src/armnn/backends/ClWorkloadFactory.hpp b/src/armnn/backends/ClWorkloadFactory.hpp
index 2477e23eeb..e1e66c050b 100644
--- a/src/armnn/backends/ClWorkloadFactory.hpp
+++ b/src/armnn/backends/ClWorkloadFactory.hpp
@@ -23,18 +23,22 @@ namespace armnn
{
class IClTunedParameters;
+class ClTunedParameters;
// ARM Compute OpenCL workload factory
class ClWorkloadFactory : public IWorkloadFactory
{
public:
- virtual ~ClWorkloadFactory(){};
+
+ ClWorkloadFactory(IClTunedParameters* clTunedParameters = nullptr);
+
+ virtual ~ClWorkloadFactory();
virtual Compute GetCompute() const override { return Compute::GpuAcc; }
static bool IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported);
- void LoadOpenClRuntime(IClTunedParameters* clTunedParameters = nullptr);
+ void LoadOpenClRuntime();
virtual bool SupportsSubTensors() const override { return true; }
@@ -109,6 +113,9 @@ public:
virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
+
+private:
+ ClTunedParameters* m_clTunedParameters;
};
class ClTunedParameters : public IClTunedParameters
diff --git a/src/armnn/backends/NeonLayerSupport.cpp b/src/armnn/backends/NeonLayerSupport.cpp
index 382b15e277..d8a3366775 100644
--- a/src/armnn/backends/NeonLayerSupport.cpp
+++ b/src/armnn/backends/NeonLayerSupport.cpp
@@ -71,6 +71,22 @@ bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convol
return preferDirectConvolution;
}
+bool IsNeonMultiplicationParamsSupported(std::string* reasonIfUnsupported,
+ const TensorInfo& info0,
+ const TensorInfo& info1)
+{
+ if (info0.GetShape() == info1.GetShape())
+ {
+ return true;
+ }
+
+ if (reasonIfUnsupported)
+ {
+ *reasonIfUnsupported = "Multiplication on Neon does not support implicit broadcast.";
+ }
+ return false;
+}
+
bool IsNeonNormalizationDescParamsSupported(std::string* reasonIfUnsupported, const NormalizationDescriptor& parameters)
{
if (parameters.m_NormMethodType != NormalizationAlgorithmMethod::LocalBrightness)
@@ -233,7 +249,7 @@ bool IsConvolution2dSupportedNeon(const TensorInfo& input,
return IsSupportedForDataTypeNeon(reasonIfUnsupported,
input.GetDataType(),
&TrueFunc<>,
- &FalseFuncU8<>);
+ &TrueFunc<>);
}
bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input,
@@ -293,11 +309,13 @@ bool IsMultiplicationSupportedNeon(const TensorInfo& input0,
const TensorInfo& input1,
std::string* reasonIfUnsupported)
{
- ignore_unused(input1);
return IsSupportedForDataTypeNeon(reasonIfUnsupported,
input0.GetDataType(),
- &TrueFunc<>,
- &FalseFuncU8<>);
+ &IsNeonMultiplicationParamsSupported,
+ &FalseFuncU8<const TensorInfo&, const TensorInfo&>,
+ input0,
+ input1
+ );
}
bool IsNormalizationSupportedNeon(const TensorInfo& input,
diff --git a/src/armnn/backends/NeonWorkloadFactory.cpp b/src/armnn/backends/NeonWorkloadFactory.cpp
index 384284114f..0f65a3dcd7 100644
--- a/src/armnn/backends/NeonWorkloadFactory.cpp
+++ b/src/armnn/backends/NeonWorkloadFactory.cpp
@@ -112,7 +112,7 @@ std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Poo
std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
{
- return MakeWorkload<NeonConvolution2dFloat32Workload, NullWorkload>(descriptor, info);
+ return MakeWorkload<NeonConvolution2dFloat32Workload, NeonConvolution2dUint8Workload>(descriptor, info);
}
std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
diff --git a/src/armnn/backends/NeonWorkloads.hpp b/src/armnn/backends/NeonWorkloads.hpp
index 7e9e885adc..83a3e9fd9b 100644
--- a/src/armnn/backends/NeonWorkloads.hpp
+++ b/src/armnn/backends/NeonWorkloads.hpp
@@ -13,7 +13,9 @@
#include "backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp"
#include "backends/NeonWorkloads/NeonConstantFloat32Workload.hpp"
#include "backends/NeonWorkloads/NeonConstantUint8Workload.hpp"
+#include "backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp"
#include "backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp"
+#include "backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp"
#include "backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp"
#include "backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp"
#include "backends/NeonWorkloads/NeonFloorFloat32Workload.hpp"
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp
index 5099965a24..10c96d82a6 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp
@@ -73,10 +73,6 @@ NeonConvolution2dBaseWorkload<dataType>::NeonConvolution2dBaseWorkload(const Con
using Type = ResolveType<dataType>;
InitialiseArmComputeTensorData(m_KernelTensor, m_Data.m_Weight->template GetConstTensor<Type>());
- if (m_Data.m_Parameters.m_BiasEnabled)
- {
- InitialiseArmComputeTensorData(m_BiasTensor, m_Data.m_Bias->template GetConstTensor<Type>());
- }
}
// Generate known implementations for linker
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp
index 37740511ba..98d075a5ea 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp
@@ -3,6 +3,8 @@
// See LICENSE file in the project root for full license information.
//
+#pragma once
+
#include <backends/Workload.hpp>
#include <backends/NeonWorkloadUtils.hpp>
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp
index b4650ac011..a8c5c63683 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp
@@ -15,7 +15,12 @@ using namespace armcomputetensorutils;
NeonConvolution2dFloat32Workload::NeonConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info)
: NeonConvolution2dBaseWorkload(descriptor, info)
-{}
+{
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ InitialiseArmComputeTensorData(m_BiasTensor, m_Data.m_Bias->template GetConstTensor<float>());
+ }
+}
void NeonConvolution2dFloat32Workload::Execute() const
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp
new file mode 100644
index 0000000000..ae20522361
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonConvolution2dUint8Workload.hpp"
+
+
+namespace armnn
+{
+NeonConvolution2dUint8Workload::NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : NeonConvolution2dBaseWorkload(descriptor, info)
+{
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ InitialiseArmComputeTensorData(m_BiasTensor, m_Data.m_Bias->template GetConstTensor<int32_t>());
+ }
+}
+
+
+void NeonConvolution2dUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, NeonConvolution2dUint8Workload_Execute);
+ m_ConvolutionLayer->run();
+}
+
+void NeonConvolution2dUint8Workload::ValidateData() const
+{
+ m_Data.ValidateInputsOutputs("NeonConvolution2dUint8Workload", 1, 1);
+}
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp
new file mode 100644
index 0000000000..319d574b1e
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "NeonConvolution2dBaseWorkload.hpp"
+
+namespace armnn
+{
+
+class NeonConvolution2dUint8Workload : public NeonConvolution2dBaseWorkload<DataType::QuantisedAsymm8>
+{
+public:
+ NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ virtual void ValidateData() const override;
+ virtual void Execute() const override;
+private:
+};
+
+} //namespace armnnn
+
+
+
+
diff --git a/src/armnn/backends/RefWorkloads/Addition.cpp b/src/armnn/backends/RefWorkloads/Addition.cpp
index c26f82ecc2..6d53a702e4 100644
--- a/src/armnn/backends/RefWorkloads/Addition.cpp
+++ b/src/armnn/backends/RefWorkloads/Addition.cpp
@@ -8,9 +8,6 @@
#include <functional>
-namespace armnn
-{
-
namespace
{
@@ -24,6 +21,9 @@ void ElementwiseAddition(unsigned int numElements, const float* inData0, const f
} // namespace
+namespace armnn
+{
+
void Addition(const TensorShape& inShape0,
const TensorShape& inShape1,
const TensorShape& outShape,
diff --git a/src/armnn/backends/RefWorkloads/Merger.hpp b/src/armnn/backends/RefWorkloads/Merger.hpp
index 9695e457e2..476ced76be 100644
--- a/src/armnn/backends/RefWorkloads/Merger.hpp
+++ b/src/armnn/backends/RefWorkloads/Merger.hpp
@@ -39,6 +39,7 @@ void Merger(const MergerQueueDescriptor& data)
//split view extents are defined by the size of (the corresponding) input tensor
const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[viewIdx]);
+ BOOST_ASSERT(inputInfo.GetNumDimensions() == outputInfo0.GetNumDimensions());
// check all dimensions to see if this element is inside the given input view
bool insideView = true;
diff --git a/src/armnn/backends/RefWorkloads/Multiplication.cpp b/src/armnn/backends/RefWorkloads/Multiplication.cpp
index 7f558d83c5..47c0f1cef1 100644
--- a/src/armnn/backends/RefWorkloads/Multiplication.cpp
+++ b/src/armnn/backends/RefWorkloads/Multiplication.cpp
@@ -4,18 +4,48 @@
//
#include "Multiplication.hpp"
+#include "Broadcast.hpp"
-namespace armnn
+#include <functional>
+
+namespace
{
-void Multiplication(const float* in0,
- const float* in1,
- unsigned int numElements,
- float* out)
+void ElementwiseMultiplication(unsigned int numElements,
+ const float* inData0,
+ const float* inData1,
+ float* outData)
{
for (unsigned int i = 0; i < numElements; ++i)
{
- out[i] = in0[i] * in1[i];
+ outData[i] = inData0[i] * inData1[i];
+ }
+}
+
+} // namespace
+
+namespace armnn
+{
+
+void Multiplication(const TensorShape& inShape0,
+ const TensorShape& inShape1,
+ const TensorShape& outShape,
+ const float* inData0,
+ const float* inData1,
+ float* outData)
+{
+ if (inShape0 == inShape1)
+ {
+ ElementwiseMultiplication(inShape0.GetNumElements(), inData0, inData1, outData);
+ }
+ else
+ {
+ BroadcastLoop(inShape0, inShape1, outShape).Unroll(
+ std::multiplies<float>(),
+ 0,
+ inData0,
+ inData1,
+ outData);
}
}
diff --git a/src/armnn/backends/RefWorkloads/Multiplication.hpp b/src/armnn/backends/RefWorkloads/Multiplication.hpp
index d0b033e7ec..54fcac51c1 100644
--- a/src/armnn/backends/RefWorkloads/Multiplication.hpp
+++ b/src/armnn/backends/RefWorkloads/Multiplication.hpp
@@ -5,12 +5,16 @@
#pragma once
+#include <armnn/Tensor.hpp>
+
namespace armnn
{
-void Multiplication(const float* in0,
- const float* in1,
- unsigned int numElements,
- float* out);
+void Multiplication(const TensorShape& inShape0,
+ const TensorShape& inShape1,
+ const TensorShape& outShape,
+ const float* inData0,
+ const float* inData1,
+ float* outData);
} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/Pooling2d.cpp b/src/armnn/backends/RefWorkloads/Pooling2d.cpp
index 6d15d8a436..a643e67690 100644
--- a/src/armnn/backends/RefWorkloads/Pooling2d.cpp
+++ b/src/armnn/backends/RefWorkloads/Pooling2d.cpp
@@ -186,8 +186,8 @@ void Pooling2d(const float* in,
// Clamp the pooling region inside the valid input area (which includes the padding).
// This is necessary because the final pooling in a row may overlap beyond the padding.
- hend = std::min(hend, heightInput + padRight);
- wend = std::min(wend, widthInput + padBottom);
+ hend = std::min(hend, heightInput + padBottom);
+ wend = std::min(wend, widthInput + padRight);
float result = defaultInitializer;
float poolAreaSize = boost::numeric_cast<float>((hend - hstart) * (wend - wstart));
diff --git a/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp b/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp
index ed68b1f6db..d7c54d9aad 100644
--- a/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp
+++ b/src/armnn/backends/RefWorkloads/RefMultiplicationFloat32Workload.cpp
@@ -17,12 +17,15 @@ void RefMultiplicationFloat32Workload::Execute() const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMultiplicationFloat32Workload_Execute");
- const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
+ const TensorShape& inShape0 = GetTensorInfo(m_Data.m_Inputs[0]).GetShape();
+ const TensorShape& inShape1 = GetTensorInfo(m_Data.m_Inputs[1]).GetShape();
+ const TensorShape& outShape = GetTensorInfo(m_Data.m_Outputs[0]).GetShape();
float* outputData = GetOutputTensorDataFloat(0, m_Data);
const float* inputData0 = GetInputTensorDataFloat(0, m_Data);
const float* inputData1 = GetInputTensorDataFloat(1, m_Data);
- Multiplication(inputData0, inputData1, inputInfo0.GetNumElements(), outputData);
+
+ Multiplication(inShape0, inShape1, outShape, inputData0, inputData1, outputData);
}
} //namespace armnn
diff --git a/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp b/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp
index 2e6f0e6c8b..d5c4afd87c 100644
--- a/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp
+++ b/src/armnn/backends/RefWorkloads/RefMultiplicationUint8Workload.cpp
@@ -27,10 +27,9 @@ void RefMultiplicationUint8Workload::Execute() const
auto dequant1 = Dequantize(GetInputTensorDataU8(1, m_Data), inputInfo1);
std::vector<float> results(outputInfo.GetNumElements());
- Multiplication(dequant0.data(),
- dequant1.data(),
- inputInfo0.GetNumElements(),
- results.data());
+ Multiplication(
+ inputInfo0.GetShape(), inputInfo1.GetShape(), outputInfo.GetShape(),
+ dequant0.data(), dequant1.data(),results.data());
Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
}
diff --git a/src/armnn/backends/RefWorkloads/Splitter.hpp b/src/armnn/backends/RefWorkloads/Splitter.hpp
index 67f6c100f9..74c4cb4e18 100644
--- a/src/armnn/backends/RefWorkloads/Splitter.hpp
+++ b/src/armnn/backends/RefWorkloads/Splitter.hpp
@@ -41,6 +41,7 @@ void Splitter(const SplitterQueueDescriptor& data)
//split view extents are defined by the size of (the corresponding) input tensor
const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[viewIdx]);
+ BOOST_ASSERT(outputInfo.GetNumDimensions() == inputInfo0.GetNumDimensions());
// check all dimensions to see if this element is inside the given input view
bool insideView = true;
diff --git a/src/armnn/backends/WorkloadData.cpp b/src/armnn/backends/WorkloadData.cpp
index 96a37802f1..c951fc5d8d 100644
--- a/src/armnn/backends/WorkloadData.cpp
+++ b/src/armnn/backends/WorkloadData.cpp
@@ -502,16 +502,13 @@ void MultiplicationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) c
{
ValidateTwoInputs(workloadInfo, "MultiplicationQueueDescriptor");
ValidateSingleOutput(workloadInfo, "MultiplicationQueueDescriptor");
- ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0],
- workloadInfo.m_InputTensorInfos[1],
- "MultiplicationQueueDescriptor",
- "first input",
- "second input");
- ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0],
- workloadInfo.m_OutputTensorInfos[0],
- "MultiplicationQueueDescriptor",
- "input",
- "output");
+
+ ValidateBroadcastTensorShapesMatch(workloadInfo.m_InputTensorInfos[0],
+ workloadInfo.m_InputTensorInfos[1],
+ workloadInfo.m_OutputTensorInfos[0],
+ "MultiplicationQueueDescriptor",
+ "first input",
+ "second input");
}
void BatchNormalizationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
diff --git a/src/armnn/backends/test/ArmComputeCl.cpp b/src/armnn/backends/test/ArmComputeCl.cpp
index 5933cebc80..c45a82db63 100644
--- a/src/armnn/backends/test/ArmComputeCl.cpp
+++ b/src/armnn/backends/test/ArmComputeCl.cpp
@@ -103,7 +103,7 @@ ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2d, IgnorePaddingSimpleAve
ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test)
ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest)
ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8,
- IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test)
+ IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test)
ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test)
ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test)
@@ -114,6 +114,12 @@ ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddi
ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest)
ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test)
+ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2,
+ IgnorePaddingAveragePooling2dSize3x2Stride2x2Test,
+ false)
+ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding,
+ IgnorePaddingAveragePooling2dSize3x2Stride2x2Test,
+ true)
ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest)
ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test)
@@ -136,6 +142,8 @@ ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest)
// Mul
ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest)
+ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest)
+ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVector, MultiplicationBroadcast1DVectorTest)
// Batch Norm
ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest)
@@ -194,6 +202,9 @@ ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test)
// Permute
ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test)
ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test)
// ============================================================================
// COMPARE tests
diff --git a/src/armnn/backends/test/ArmComputeNeon.cpp b/src/armnn/backends/test/ArmComputeNeon.cpp
index dd8a668940..a81b7cdcd7 100644
--- a/src/armnn/backends/test/ArmComputeNeon.cpp
+++ b/src/armnn/backends/test/ArmComputeNeon.cpp
@@ -141,6 +141,7 @@ ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3
ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, true)
ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest)
ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test)
+
ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest)
ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test)
@@ -170,6 +171,11 @@ ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8,
IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test)
ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test)
ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test)
+ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2,
+ IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, false)
+ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding,
+ IgnorePaddingAveragePooling2dSize3x2Stride2x2Test,
+ true)
ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest)
ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test)
@@ -281,6 +287,10 @@ ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test)
// Permute
ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test)
ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test)
+
// ============================================================================
// COMPARE tests
diff --git a/src/armnn/backends/test/LayerTests.cpp b/src/armnn/backends/test/LayerTests.cpp
index 76681f9a93..9eed2dbf78 100644
--- a/src/armnn/backends/test/LayerTests.cpp
+++ b/src/armnn/backends/test/LayerTests.cpp
@@ -1005,31 +1005,22 @@ LayerTestResult<float,4> CompareAdditionTest(armnn::IWorkloadFactory& workloadFa
return ret;
}
-LayerTestResult<float,4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory)
-{
- const unsigned int width = 2;
- const unsigned int height = 2;
- const unsigned int channelCount = 2;
- const unsigned int batchSize = 2;
-
- armnn::TensorInfo inputTensorInfo0;
- armnn::TensorInfo inputTensorInfo1;
- armnn::TensorInfo outputTensorInfo;
-
- constexpr unsigned int shape[] = { batchSize, channelCount, height, width };
- constexpr std::size_t dimensionCount = std::extent<decltype(shape)>::value;
-
- inputTensorInfo0 = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32);
- inputTensorInfo1 = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32);
- outputTensorInfo = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32);
-
- auto input0 = MakeTensor<float, 4>(inputTensorInfo0, std::vector<float>({
- 1, 1, 1, 1, 2, 2, 2, 2,
- 3, 3, 3, 3, 4, 4, 4, 4 }));
-
- auto input1 = MakeTensor<float, 4>(inputTensorInfo1, std::vector<float>({
- 2, 2, 2, 2, 3, 3, 3, 3,
- 4, 4, 4, 4, 5, 5, 5, 5 }));
+namespace {
+LayerTestResult<float,4> MultiplicationTestHelper(armnn::IWorkloadFactory& workloadFactory,
+ const unsigned int shape0[4],
+ const std::vector<float> & values0,
+ const unsigned int shape1[4],
+ const std::vector<float> & values1,
+ const unsigned int outShape[4],
+ const std::vector<float> & outValues)
+{
+ const size_t dimensionCount = 4;
+ armnn::TensorInfo inputTensorInfo0{dimensionCount, shape0, armnn::DataType::Float32};
+ armnn::TensorInfo inputTensorInfo1{dimensionCount, shape1, armnn::DataType::Float32};
+ armnn::TensorInfo outputTensorInfo{dimensionCount, outShape, armnn::DataType::Float32};
+
+ auto input0 = MakeTensor<float, 4>(inputTensorInfo0, values0);
+ auto input1 = MakeTensor<float, 4>(inputTensorInfo1, values1);
LayerTestResult<float,4> ret(outputTensorInfo);
@@ -1056,11 +1047,84 @@ LayerTestResult<float,4> MultiplicationTest(armnn::IWorkloadFactory& workloadFac
CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
- ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({
+ ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outValues);
+ return ret;
+}
+} // anonymous namespace
+
+
+LayerTestResult<float,4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory)
+{
+ const unsigned int width = 2;
+ const unsigned int height = 2;
+ const unsigned int channelCount = 2;
+ const unsigned int batchSize = 2;
+
+ unsigned int shape[] = { batchSize, channelCount, height, width };
+
+ std::vector<float> input0({
+ 1, 1, 1, 1, 2, 2, 2, 2,
+ 3, 3, 3, 3, 4, 4, 4, 4 });
+
+ std::vector<float> input1({
+ 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 4, 4, 5, 5, 5, 5 });
+
+ std::vector<float> output({
2, 2, 2, 2, 6, 6, 6, 6,
- 12, 12, 12, 12, 20, 20, 20, 20 }));
+ 12, 12, 12, 12, 20, 20, 20, 20 });
- return ret;
+ return MultiplicationTestHelper(workloadFactory,
+ shape,
+ input0,
+ shape,
+ input1,
+ shape,
+ output);
+}
+
+LayerTestResult<float, 4> MultiplicationBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory)
+{
+ unsigned int shape0[] = { 1, 2, 2, 2 };
+ std::vector<float> input0({ 1, 2, 3, 4, 5, 6, 7, 8});
+
+ unsigned int shape1[] = { 1, 1, 1, 1 };
+ std::vector<float> input1({ 2 });
+
+ std::vector<float> output({ 2, 4, 6, 8, 10, 12, 14, 16});
+
+ return MultiplicationTestHelper(workloadFactory,
+ shape0,
+ input0,
+ shape1,
+ input1,
+ shape0,
+ output);
+}
+
+LayerTestResult<float, 4> MultiplicationBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory)
+{
+ unsigned int shape0[] = { 1, 3, 3, 2 };
+ std::vector<float> input0({
+ 1, 2, 3, 4, 5, 6,
+ 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18});
+
+ unsigned int shape1[] = { 1, 1, 1, 2 };
+ std::vector<float> input1({ 1, 2 });
+
+ std::vector<float> output({
+ 1, 4, 3, 8, 5, 12,
+ 7, 16, 9, 20, 11, 24,
+ 13, 28, 15, 32, 17, 36});
+
+ return MultiplicationTestHelper(workloadFactory,
+ shape0,
+ input0,
+ shape1,
+ input1,
+ shape0,
+ output);
}
LayerTestResult<float,4> CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory,
@@ -3253,69 +3317,59 @@ LayerTestResult<uint8_t, 4> AdditionUint8Test(armnn::IWorkloadFactory& workloadF
return result;
}
-LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory)
+namespace
{
- unsigned int batchSize = 1;
- unsigned int channels = 2;
- unsigned int height = 2;
- unsigned int width = 3;
+LayerTestResult<uint8_t, 4> MultiplicationUint8TestHelper(armnn::IWorkloadFactory& workloadFactory,
+ const unsigned int shape0[4],
+ const std::vector<uint8_t> & values0,
+ float scale0,
+ int32_t offset0,
+ const unsigned int shape1[4],
+ const std::vector<uint8_t> & values1,
+ float scale1,
+ int32_t offset1,
+ const unsigned int outShape[4],
+ const std::vector<uint8_t> & outValues,
+ float outScale,
+ int32_t outOffset)
+{
+ armnn::TensorInfo inputTensorInfo0(4, shape0, armnn::DataType::QuantisedAsymm8);
+ armnn::TensorInfo inputTensorInfo1(4, shape1, armnn::DataType::QuantisedAsymm8);
+ armnn::TensorInfo outputTensorInfo(4, outShape, armnn::DataType::QuantisedAsymm8);
- armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
- armnn::TensorInfo outputTensorInfo;
+ inputTensorInfo0.SetQuantizationScale(scale0);
+ inputTensorInfo0.SetQuantizationOffset(offset0);
- const unsigned int shape[] = { batchSize, channels, height, width };
- inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
- inputTensorInfo1.SetQuantizationScale(4.0f);
- inputTensorInfo1.SetQuantizationOffset(1);
+ inputTensorInfo1.SetQuantizationScale(scale1);
+ inputTensorInfo1.SetQuantizationOffset(offset1);
- inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
- inputTensorInfo2.SetQuantizationScale(3.0f);
- inputTensorInfo2.SetQuantizationOffset(-2);
+ outputTensorInfo.SetQuantizationScale(outScale);
+ outputTensorInfo.SetQuantizationOffset(outOffset);
- outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
- outputTensorInfo.SetQuantizationScale(1366.255f); // Scale/offset chosen to have output values out of range
- outputTensorInfo.SetQuantizationOffset(-5);
+ auto input0 = MakeTensor<uint8_t, 4>(inputTensorInfo0, values0);
+ auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, values1);
- // See dequantized values to the right
- auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>(
- {
- 62, 37, 3, 172, 13, 111, // 244, 144, 8, 684, 48, 440,
- 188, 20, 73, 31, 23, 31 // 748, 76, 288, 120, 88, 120
- }));
-
- // See dequantized values to the right
- auto input2 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>(
- {
- 126, 240, 252, 183, 121, 247, // 384, 726, 762, 555, 369, 747,
- 48, 115, 151, 79, 78, 97 // 150, 351, 459, 243, 240, 297
- }));
-
- // See dequantized values to the right
LayerTestResult<uint8_t, 4> result(outputTensorInfo);
- result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>(
- {
- 64, 72, 0, 255, 8, 236, // 93696, 104544, 6096(clamped), 379620(clamped), 17712, 328680,
- 77, 15, 92, 16, 10, 21, // 112200, 26676, 132192, 29160, 21120, 35640
- }));
+ result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, outValues);
+ std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
- std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
armnn::MultiplicationQueueDescriptor data;
armnn::WorkloadInfo info;
- AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
- AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
+ AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get());
+ AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info);
+ inputHandle0->Allocate();
inputHandle1->Allocate();
- inputHandle2->Allocate();
outputHandle->Allocate();
+ CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
- CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
workload->Execute();
@@ -3323,6 +3377,113 @@ LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& wor
return result;
}
+} // anonymous namespace
+
+LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory)
+{
+ unsigned int batchSize = 1;
+ unsigned int channels = 2;
+ unsigned int height = 2;
+ unsigned int width = 3;
+ const unsigned int shape[] = { batchSize, channels, height, width };
+
+ // See dequantized values to the right
+ std::vector<uint8_t> input0({
+ 62, 37, 3, 172, 13, 111, // 244, 144, 8, 684, 48, 440,
+ 188, 20, 73, 31, 23, 31 // 748, 76, 288, 120, 88, 120
+ });
+
+ // See dequantized values to the right
+ std::vector<uint8_t> input1({
+ 126, 240, 252, 183, 121, 247, // 384, 726, 762, 555, 369, 747,
+ 48, 115, 151, 79, 78, 97 // 150, 351, 459, 243, 240, 297
+ });
+
+ // See dequantized values to the right
+ std::vector<uint8_t> output(
+ {
+ 64, 72, 0, 255, 8, 236, // 93696, 104544, 6096(clamped), 379620(clamped), 17712, 328680,
+ 77, 15, 92, 16, 10, 21, // 112200, 26676, 132192, 29160, 21120, 35640
+ });
+
+ return MultiplicationUint8TestHelper(workloadFactory,
+ shape,
+ input0,
+ 4.0f,
+ 1,
+ shape,
+ input1,
+ 3.0f,
+ -2,
+ shape,
+ output,
+ 1366.255f, // Scale/offset chosen to have output values out of range
+ -5);
+}
+
+LayerTestResult<uint8_t, 4> MultiplicationBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory)
+{
+ const unsigned int shape0[] = { 1, 2, 2, 3 };
+ const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+ std::vector<uint8_t> input0({
+ 1, 2, 3, 4, 5, 6,
+ 7, 8, 9, 10, 11, 12
+ });
+
+ std::vector<uint8_t> input1({2});
+
+ std::vector<uint8_t> output({
+ 2, 4, 6, 8, 10, 12,
+ 14, 16, 18, 20, 22, 24
+ });
+
+ return MultiplicationUint8TestHelper(workloadFactory,
+ shape0,
+ input0,
+ 1.0f,
+ 0,
+ shape1,
+ input1,
+ 1.0f,
+ 0,
+ shape0,
+ output,
+ 1.0f,
+ 0);
+}
+
+LayerTestResult<uint8_t, 4> MultiplicationBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory)
+{
+ const unsigned int shape0[] = { 1, 2, 2, 3 };
+ const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+ std::vector<uint8_t> input0({
+ 1, 2, 3, 4, 5, 6,
+ 7, 8, 9, 10, 11, 12
+ });
+
+ std::vector<uint8_t> input1({1, 2, 3});
+
+ std::vector<uint8_t> output({
+ 1, 4, 9, 4, 10, 18,
+ 7, 16, 27, 10, 22, 36
+ });
+
+ return MultiplicationUint8TestHelper(workloadFactory,
+ shape0,
+ input0,
+ 1.0f,
+ 0,
+ shape1,
+ input1,
+ 1.0f,
+ 0,
+ shape0,
+ output,
+ 1.0f,
+ 0);
+}
LayerTestResult<uint8_t, 4> ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& workloadFactory)
{
@@ -3702,6 +3863,12 @@ LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(armnn::IWorkloadFact
return SimpleAveragePooling2dTestCommon<uint8_t>(workloadFactory, 0.5, -1);
}
+LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory,
+ bool forceNoPadding)
+{
+ return IgnorePaddingAveragePooling2dSize3x2Stride2x2TestCommon<float>(workloadFactory, forceNoPadding);
+}
+
LayerTestResult<float, 4> LargeTensorsAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory)
{
return LargeTensorsAveragePooling2dTestCommon<float>(workloadFactory);
@@ -3882,3 +4049,18 @@ LayerTestResult<uint8_t, 4> SimplePermuteUint8Test(armnn::IWorkloadFactory& work
{
return SimplePermuteUint8TestCommon(workloadFactory);
};
+
+LayerTestResult<float, 4> PermuteFloat32ValueSet1Test(armnn::IWorkloadFactory& workloadFactory)
+{
+ return PermuteFloat32ValueSet1TestCommon(workloadFactory);
+};
+
+LayerTestResult<float, 4> PermuteFloat32ValueSet2Test(armnn::IWorkloadFactory& workloadFactory)
+{
+ return PermuteFloat32ValueSet2TestCommon(workloadFactory);
+};
+
+LayerTestResult<float, 4> PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory)
+{
+ return PermuteFloat32ValueSet3TestCommon(workloadFactory);
+};
diff --git a/src/armnn/backends/test/LayerTests.hpp b/src/armnn/backends/test/LayerTests.hpp
index fc0c9c7b14..36e73e461c 100644
--- a/src/armnn/backends/test/LayerTests.hpp
+++ b/src/armnn/backends/test/LayerTests.hpp
@@ -82,6 +82,8 @@ LayerTestResult<uint8_t, 4> IgnorePaddingMaxPooling2dSize3Uint8Test(armnn::IWork
LayerTestResult<float, 4> SimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory);
LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory,
+ bool forceNoPadding);
LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory);
LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory);
LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTest(armnn::IWorkloadFactory& workloadFactory);
@@ -187,6 +189,8 @@ LayerTestResult<float, 4> CompareActivationTest(armnn::IWorkloadFactory& worklo
unsigned int batchSize);
LayerTestResult<float, 4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> MultiplicationBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> MultiplicationBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory);
LayerTestResult<float, 4> CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory,
armnn::IWorkloadFactory& refWorkloadFactory);
@@ -260,6 +264,8 @@ LayerTestResult<uint8_t, 2> CompareSoftmaxUint8Test(armnn::IWorkloadFactory& wor
float beta);
LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<uint8_t, 4> MultiplicationBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<uint8_t, 4> MultiplicationBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory);
LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory,
bool biasEnabled);
@@ -303,3 +309,6 @@ LayerTestResult<float, 2> FullyConnectedLargeTest(armnn::IWorkloadFactory& workl
LayerTestResult<float, 4> SimplePermuteFloat32Test(armnn::IWorkloadFactory& workloadFactory);
LayerTestResult<uint8_t, 4> SimplePermuteUint8Test(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> PermuteFloat32ValueSet1Test(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> PermuteFloat32ValueSet2Test(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory);
diff --git a/src/armnn/backends/test/PermuteTestImpl.hpp b/src/armnn/backends/test/PermuteTestImpl.hpp
index 4eafa1a211..4ecffedc91 100644
--- a/src/armnn/backends/test/PermuteTestImpl.hpp
+++ b/src/armnn/backends/test/PermuteTestImpl.hpp
@@ -119,3 +119,107 @@ LayerTestResult<uint8_t, 4> SimplePermuteUint8TestCommon(armnn::IWorkloadFactory
return SimplePermuteTestImpl<uint8_t>(workloadFactory, descriptor, inputTensorInfo,
outputTensorInfo, input, outputExpected);
}
+
+LayerTestResult<float, 4>
+PermuteFloat32ValueSet1TestCommon(armnn::IWorkloadFactory& workloadFactory)
+{
+ armnn::TensorInfo inputTensorInfo;
+ armnn::TensorInfo outputTensorInfo;
+
+ unsigned int inputShape[] = { 1, 2, 2, 3 };
+ unsigned int outputShape[] = { 1, 3, 2, 2 };
+
+ armnn::PermuteDescriptor descriptor;
+ descriptor.m_DimMappings = {0U, 2U, 3U, 1U};
+
+ inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32);
+ outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32);
+
+ std::vector<float> input = std::vector<float>(
+ {
+ 1.0f, 2.0f, 3.0f,
+ 11.0f, 12.0f, 13.0f,
+ 21.0f, 22.0f, 23.0f,
+ 31.0f, 32.0f, 33.0f,
+ });
+
+ std::vector<float> outputExpected = std::vector<float>(
+ {
+ 1.0f, 11.0f, 21.0f, 31.0f,
+ 2.0f, 12.0f, 22.0f, 32.0f,
+ 3.0f, 13.0f, 23.0f, 33.0f,
+ });
+
+ return SimplePermuteTestImpl<float>(workloadFactory, descriptor, inputTensorInfo,
+ outputTensorInfo, input, outputExpected);
+}
+
+LayerTestResult<float, 4>
+PermuteFloat32ValueSet2TestCommon(armnn::IWorkloadFactory& workloadFactory)
+{
+ armnn::TensorInfo inputTensorInfo;
+ armnn::TensorInfo outputTensorInfo;
+
+ unsigned int inputShape[] = { 1, 3, 2, 2 };
+ unsigned int outputShape[] = { 1, 2, 2, 3 };
+
+ armnn::PermuteDescriptor descriptor;
+ descriptor.m_DimMappings = {0U, 3U, 1U, 2U};
+
+ inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32);
+ outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32);
+
+ std::vector<float> input = std::vector<float>(
+ {
+ 1.0f, 11.0f, 21.0f, 31.0f,
+ 2.0f, 12.0f, 22.0f, 32.0f,
+ 3.0f, 13.0f, 23.0f, 33.0f,
+ });
+
+ std::vector<float> outputExpected = std::vector<float>(
+ {
+ 1.0f, 2.0f, 3.0f,
+ 11.0f, 12.0f, 13.0f,
+ 21.0f, 22.0f, 23.0f,
+ 31.0f, 32.0f, 33.0f,
+ });
+
+ return SimplePermuteTestImpl<float>(workloadFactory, descriptor, inputTensorInfo,
+ outputTensorInfo, input, outputExpected);
+}
+
+LayerTestResult<float, 4>
+PermuteFloat32ValueSet3TestCommon(armnn::IWorkloadFactory& workloadFactory)
+{
+ armnn::TensorInfo inputTensorInfo;
+ armnn::TensorInfo outputTensorInfo;
+
+ unsigned int inputShape[] = { 1, 2, 3, 3 };
+ unsigned int outputShape[] = { 1, 3, 2, 3 };
+
+ armnn::PermuteDescriptor descriptor;
+ descriptor.m_DimMappings = {0U, 2U, 3U, 1U};
+
+ inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32);
+ outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32);
+
+ std::vector<float> input = std::vector<float>(
+ {
+ 1.0f, 2.0f, 3.0f,
+ 11.0f, 12.0f, 13.0f,
+ 21.0f, 22.0f, 23.0f,
+ 31.0f, 32.0f, 33.0f,
+ 41.0f, 42.0f, 43.0f,
+ 51.0f, 52.0f, 53.0f,
+ });
+
+ std::vector<float> outputExpected = std::vector<float>(
+ {
+ 1.0f, 11.0f, 21.0f, 31.0f, 41.0f, 51.0f,
+ 2.0f, 12.0f, 22.0f, 32.0f, 42.0f, 52.0f,
+ 3.0f, 13.0f, 23.0f, 33.0f, 43.0f, 53.0f,
+ });
+
+ return SimplePermuteTestImpl<float>(workloadFactory, descriptor, inputTensorInfo,
+ outputTensorInfo, input, outputExpected);
+}
diff --git a/src/armnn/backends/test/Pooling2dTestImpl.hpp b/src/armnn/backends/test/Pooling2dTestImpl.hpp
index fc84ddb2ca..ab9fd6d6fb 100644
--- a/src/armnn/backends/test/Pooling2dTestImpl.hpp
+++ b/src/armnn/backends/test/Pooling2dTestImpl.hpp
@@ -720,6 +720,83 @@ LayerTestResult<T, 4> SimpleMaxPooling2dSize2x2Stride2x2TestCommon(armnn::IWorkl
return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected);
}
+//
+// Tests max pooling with the following parameters:
+//
+// Pooling size: 3x2
+// Stride: (2,2)
+// input size: 3x2
+// channels: 1
+// batch size: 1
+//
+template<typename T>
+LayerTestResult<T, 4> IgnorePaddingAveragePooling2dSize3x2Stride2x2TestCommon(
+ armnn::IWorkloadFactory& workloadFactory,
+ bool forceNoPadding,
+ float qScale = 1.0f,
+ int32_t qOffset = 0)
+{
+ armnn::Pooling2dDescriptor descriptor;
+ descriptor.m_PoolType = armnn::PoolingAlgorithm::Average;
+ descriptor.m_PoolWidth = 3;
+ descriptor.m_PoolHeight = 2;
+ descriptor.m_StrideX = 2;
+ descriptor.m_StrideY = 2;
+ descriptor.m_PadLeft = (forceNoPadding) ? 0 : 1;
+ descriptor.m_PadRight = descriptor.m_PadLeft;
+ descriptor.m_PadTop = 0;
+ descriptor.m_PadBottom = 0;
+ descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor;
+ descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue;
+
+ unsigned int inputWidth = 3;
+ unsigned int inputHeight = 2;
+ unsigned int outputWidth =
+ (inputWidth + descriptor.m_PadLeft + descriptor.m_PadRight + descriptor.m_StrideX - descriptor.m_PoolWidth) /
+ descriptor.m_StrideX;
+ unsigned int outputHeight =
+ (inputHeight + descriptor.m_PadTop + descriptor.m_PadBottom + descriptor.m_StrideY - descriptor.m_PoolHeight) /
+ descriptor.m_StrideY;
+ unsigned int channels = 1;
+ unsigned int batchSize = 1;
+
+ std::vector<float> inputData = {
+ 3.0f, 6.0f, 9.0f,
+ 12.0f, 15.0f, 18.0f,
+ };
+
+ std::vector<float> expectedOutputDataWithPadding = {
+ 6.0f, 8.0f,
+ };
+
+ std::vector<float> expectedOutputDataNoPadding = {
+ 10.5f,
+ };
+
+ armnn::TensorInfo inputTensorInfo({ batchSize, channels, inputHeight, inputWidth }, armnn::GetDataType<T>());
+
+ // Scale and offset should match input - we're just calculating average values.
+ armnn::TensorInfo outputTensorInfo({ batchSize, channels, outputHeight, outputWidth }, armnn::GetDataType<T>());
+
+ // Set quantization parameters if the requested type is a quantized type.
+ if(armnn::IsQuantizedType<T>())
+ {
+ inputTensorInfo.SetQuantizationScale(qScale);
+ inputTensorInfo.SetQuantizationOffset(qOffset);
+ outputTensorInfo.SetQuantizationScale(qScale);
+ outputTensorInfo.SetQuantizationOffset(qOffset);
+ }
+
+ auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, inputData));
+
+ auto outputExpected = MakeTensor<T, 4>(outputTensorInfo,
+ forceNoPadding ? QuantizedVector<T>(qScale, qOffset, expectedOutputDataNoPadding) :
+ QuantizedVector<T>(qScale, qOffset, expectedOutputDataWithPadding));
+
+ return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected);
+}
+
+
template<typename T>
LayerTestResult<T, 4> IgnorePaddingSimpleMaxPooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory,
float qScale = 1.0f,
diff --git a/src/armnn/backends/test/Reference.cpp b/src/armnn/backends/test/Reference.cpp
index 87d82f1781..89e5db8e43 100644
--- a/src/armnn/backends/test/Reference.cpp
+++ b/src/armnn/backends/test/Reference.cpp
@@ -76,6 +76,10 @@ ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddingL2Pooling2
ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest)
ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test)
+ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2,
+ IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, false)
+ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding,
+ IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, true)
ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest)
ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test)
@@ -158,7 +162,11 @@ ARMNN_AUTO_TEST_CASE(AddBroadcast1ElementUint8, AdditionBroadcast1ElementUint8Te
// Mul
ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest)
+ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest)
+ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVector, MultiplicationBroadcast1DVectorTest)
ARMNN_AUTO_TEST_CASE(MultiplicationUint8, MultiplicationUint8Test)
+ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1ElementUint8, MultiplicationBroadcast1ElementUint8Test)
+ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVectorUint8, MultiplicationBroadcast1DVectorUint8Test)
// Batch Norm
ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest)
@@ -227,5 +235,8 @@ ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test)
// Permute
ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test)
ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test)
+ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test)
BOOST_AUTO_TEST_SUITE_END()