diff options
author | telsoa01 <telmo.soares@arm.com> | 2018-03-09 14:13:49 +0000 |
---|---|---|
committer | telsoa01 <telmo.soares@arm.com> | 2018-03-09 14:13:49 +0000 |
commit | 4fcda0101ec3d110c1d6d7bee5c83416b645528a (patch) | |
tree | c9a70aeb2887006160c1b3d265c27efadb7bdbae /src/armnn/backends/test | |
download | armnn-4fcda0101ec3d110c1d6d7bee5c83416b645528a.tar.gz |
Release 18.02
Change-Id: Id3c11dc5ee94ef664374a988fcc6901e9a232fa6
Diffstat (limited to 'src/armnn/backends/test')
27 files changed, 11417 insertions, 0 deletions
diff --git a/src/armnn/backends/test/ActivationFixture.hpp b/src/armnn/backends/test/ActivationFixture.hpp new file mode 100644 index 0000000000..a67a110354 --- /dev/null +++ b/src/armnn/backends/test/ActivationFixture.hpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "TensorCopyUtils.hpp" +#include "WorkloadTestUtils.hpp" + +struct ActivationFixture +{ + ActivationFixture() + { + auto boostArrayExtents = boost::extents + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(batchSize)] + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(channels)] + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(height)] + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(width)]; + output.resize(boostArrayExtents); + outputExpected.resize(boostArrayExtents); + input.resize(boostArrayExtents); + + unsigned int inputShape[] = { batchSize, channels, height, width }; + unsigned int outputShape[] = { batchSize, channels, height, width }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + input = MakeRandomTensor<float, 4>(inputTensorInfo, 21453); + } + + unsigned int width = 17; + unsigned int height = 29; + unsigned int channels = 2; + unsigned int batchSize = 5; + + boost::multi_array<float, 4> output; + boost::multi_array<float, 4> outputExpected; + boost::multi_array<float, 4> input; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + // parameters used by some of the activation functions + float a = 0.234f; + float b = -12.345f; +}; + + +struct PositiveActivationFixture : public ActivationFixture +{ + PositiveActivationFixture() + { + input = MakeRandomTensor<float, 4>(inputTensorInfo, 2342423, 0.0f, 1.0f); + } +};
\ No newline at end of file diff --git a/src/armnn/backends/test/ActivationTestImpl.hpp b/src/armnn/backends/test/ActivationTestImpl.hpp new file mode 100644 index 0000000000..255a00ef0b --- /dev/null +++ b/src/armnn/backends/test/ActivationTestImpl.hpp @@ -0,0 +1,559 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <armnn/TypesUtils.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" +#include "ActivationFixture.hpp" + +#include <algorithm> + +template<typename T> +LayerTestResult<T, 4> BoundedReLuTestCommon(armnn::IWorkloadFactory& workloadFactory, + float upperBound, float lowerBound, + float inputScale, int32_t inputOffset, float outputScale, int32_t outputOffset, + const std::vector<T>& inputData, const std::vector<T>& outputExpectedData, + unsigned int inputWidth, unsigned int inputHeight, + unsigned int inputChannels, unsigned int inputBatchSize) +{ + unsigned int outputWidth = inputWidth; + unsigned int outputHeight = inputHeight; + unsigned int outputChannels = inputChannels; + unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::GetDataType<T>()); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType<T>()); + + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(inputScale); + inputTensorInfo.SetQuantizationOffset(inputOffset); + + outputTensorInfo.SetQuantizationScale(outputScale); + outputTensorInfo.SetQuantizationOffset(outputOffset); + } + + LayerTestResult<T, 4> result(inputTensorInfo); + + auto input = MakeTensor<T, 4>(inputTensorInfo, inputData); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + // Setup bounded ReLu + armnn::ActivationQueueDescriptor descriptor; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + descriptor.m_Parameters.m_Function = armnn::ActivationFunction::BoundedReLu; + descriptor.m_Parameters.m_A = upperBound; + descriptor.m_Parameters.m_B = lowerBound; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(descriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputExpectedData); + + return result; +} + +LayerTestResult<float, 4> BoundedReLuUpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int inputWidth = 4u; + unsigned int inputHeight = 5u; + unsigned int inputChannels = 1u; + unsigned int inputBatchSize = 1; + + std::vector<float> input = std::vector<float>{ + -2.0f, 0.1f, 0.5f, 1.25f, + 0.786f, 0.9875f, -1.5f, 0.384f, + 1.0001f, 3.5f, 7.5f, 0.896f, + 2.126f, 2.0f, 0.3f, 0.15f, + 0.999f, 1.2f, 0.89f, 6.1f, + }; + + // Calculated manually + std::vector<float> output = std::vector<float>{ + -1.0f, 0.1f, 0.5f, 1.0f, + 0.786f, 0.9875f, -1.0f, 0.384f, + 1.0f, 1.0f, 1.0f, 0.896f, + 1.0f, 1.0f, 0.3f, 0.15f, + 0.999f, 1.0f, 0.89f, 1.0f, + }; + + return BoundedReLuTestCommon(workloadFactory, 1.0f, -1.0f, 1.0f, 0, 1.0f, 0, input, output, + inputWidth, inputHeight, inputChannels, inputBatchSize); +} + +LayerTestResult<float, 4> BoundedReLuUpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int inputWidth = 4u; + unsigned int inputHeight = 5u; + unsigned int inputChannels = 1u; + unsigned int inputBatchSize = 1; + + std::vector<float> input = std::vector<float>{ + -1.0f, 0.1f, 0.5f, 6.25f, + 0.786f, 5.9875f, -0.5f, 0.384f, + 6.0001f, 3.5f, 7.5f, 0.896f, + 2.126f, 12.0f, 0.3f, 0.15f, + 0.999f, 1.2f, 0.89f, 6.1f, + }; + + // Calculated manually + std::vector<float> output = std::vector<float>{ + 0.0f, 0.1f, 0.5f, 6.0f, + 0.786f, 5.9875f, 0.0f, 0.384f, + 6.0f, 3.5f, 6.0f, 0.896f, + 2.126f, 6.0f, 0.3f, 0.15f, + 0.999f, 1.2f, 0.89f, 6.0f, + }; + + return BoundedReLuTestCommon(workloadFactory, 6.0f, 0.0f, 1.0f, 0, 1.0f, 0, input, output, + inputWidth, inputHeight, inputChannels, inputBatchSize); +} + +LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int inputWidth = 3u; + unsigned int inputHeight = 2u; + unsigned int inputChannels = 1u; + unsigned int inputBatchSize = 1; + + std::vector<uint8_t> input = std::vector<uint8_t>{ + 51, 124, 28, + 251, 8, 92 + }; + + // Calculated manually + std::vector<uint8_t> output = std::vector<uint8_t>{ + 0, 122, 0, + 255, 0, 58 + }; + + float inputScale = 12.0f / 255.0f; + int32_t inputOffset = 63; + float outputScale = 6.0f / 255.0f; + int32_t outputOffset = 0; + + return BoundedReLuTestCommon(workloadFactory, 6.0f, 0.0f, + inputScale, inputOffset, outputScale, outputOffset, + input, output, + inputWidth, inputHeight, inputChannels, inputBatchSize); +} + +LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int inputWidth = 3u; + unsigned int inputHeight = 2u; + unsigned int inputChannels = 1u; + unsigned int inputBatchSize = 1; + + std::vector<uint8_t> input = std::vector<uint8_t>{ + 51, 230, 28, + 251, 8, 92 + }; + + // Calculated manually + std::vector<uint8_t> output = std::vector<uint8_t>{ + 51, 192, 32, + 192, 32, 92 + }; + + int32_t inputOffset = 112; + float inputScale = 0.0125f; + + return BoundedReLuTestCommon(workloadFactory, 1.0f, -1.0f, + inputScale, inputOffset, inputScale, inputOffset, // input/output scale & offset same + input, output, + inputWidth, inputHeight, inputChannels, inputBatchSize); +} + +namespace +{ + +struct BoundedReLuRandomInputTestTraits +{ + constexpr static unsigned int inputHeight = 31u; + constexpr static unsigned int inputWidth = 19u; + constexpr static unsigned int inputChannels = 4u; + constexpr static unsigned int inputBatchSize = 2; + + constexpr static unsigned int outputHeight = inputHeight; + constexpr static unsigned int outputWidth = inputWidth; + constexpr static unsigned int outputChannels = inputChannels; + constexpr static unsigned int outputBatchSize = inputBatchSize; + + static armnn::TensorInfo GetInputTensorInfo() + { + return armnn::TensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + } + + static armnn::TensorInfo GetOutputTensorInfo() + { + return armnn::TensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + } +}; + +boost::multi_array<float, 4> BoundedReLuRandomInputTest(armnn::IWorkloadFactory& workloadFactory, + float lowerBound, + float upperBound, + const armnn::ActivationDescriptor& activationDescriptor) +{ + const armnn::TensorInfo inputTensorInfo = BoundedReLuRandomInputTestTraits::GetInputTensorInfo(); + const armnn::TensorInfo outputTensorInfo = BoundedReLuRandomInputTestTraits::GetOutputTensorInfo(); + + boost::multi_array<float, 4> output(GetTensorShapeAsArray<4>(outputTensorInfo)); + + // min/max random values passed to MakeRandomTensor are purposely outside of the ReLu range [lowerBound, upperBound] + auto input = MakeRandomTensor<float, 4>(inputTensorInfo, 4605828, lowerBound - 5.0f, upperBound * 2.0f); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + // Setup bounded ReLu + armnn::ActivationQueueDescriptor descriptor; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + descriptor.m_Parameters = activationDescriptor; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(descriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&output[0][0][0][0], outputHandle.get()); + + return output; +} + +} // namespace + +LayerTestResult<float, 4> CompareBoundedReLuTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& otherWorkloadFactory, + float upperBound, + float lowerBound) +{ + LayerTestResult<float, 4> result(BoundedReLuRandomInputTestTraits::GetOutputTensorInfo()); + + armnn::ActivationDescriptor activationDescriptor; + activationDescriptor.m_Function = armnn::ActivationFunction::BoundedReLu; + activationDescriptor.m_A = upperBound; + activationDescriptor.m_B = lowerBound; + + result.output = BoundedReLuRandomInputTest(workloadFactory, 0.0f, upperBound, activationDescriptor); + result.outputExpected = BoundedReLuRandomInputTest(otherWorkloadFactory, 0.0f, upperBound, activationDescriptor); + + return result; +} + +template<typename T> +LayerTestResult<T,4> ConstantLinearActivationTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 0.0f, + int32_t qOffset = 0) +{ + unsigned int inputHeight = 20; + unsigned int inputWidth = 17; + unsigned int inputChannels = 3; + unsigned int batchSize = 5; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape[] = {batchSize, inputChannels, inputHeight, inputWidth}; + + inputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType<T>()); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + LayerTestResult<T, 4> ret(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + // Do linear activation that should leave tensor unchanged + armnn::ActivationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Parameters.m_A = 1.0f; + data.m_Parameters.m_B = 0.0f; + data.m_Parameters.m_Function = armnn::ActivationFunction::Linear; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + boost::multi_array<T, 4> input = MakeRandomTensor<T, 4>(inputTensorInfo, 7123561); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + // Ensure output equals input + ret.outputExpected = input; + + return ret; +} + +LayerTestResult<float, 4> ConstantLinearActivationTest(armnn::IWorkloadFactory& workloadFactory) +{ + return ConstantLinearActivationTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> ConstantLinearActivationUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return ConstantLinearActivationTestCommon<uint8_t>(workloadFactory, 4.0f, 3); +} + +template<typename T> +LayerTestResult<T, 4> SimpleActivationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::ActivationFunction activationFunction, + float activationParameterA, + float activationParameterB, + float qScale, + int32_t qOffset, + const std::vector<float>& inputData, + const std::vector<float>& outputExpectedData) +{ + constexpr static unsigned int inputWidth = 16u; + constexpr static unsigned int inputHeight = 1u; + constexpr static unsigned int inputChannels = 1u; + constexpr static unsigned int inputBatchSize = 1u; + + constexpr static unsigned int outputWidth = inputWidth; + constexpr static unsigned int outputHeight = inputHeight; + constexpr static unsigned int outputChannels = inputChannels; + constexpr static unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + LayerTestResult<T, 4> result(inputTensorInfo); + + auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, inputData)); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + // Setup bounded ReLu + armnn::ActivationQueueDescriptor descriptor; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + descriptor.m_Parameters.m_Function = activationFunction; + descriptor.m_Parameters.m_A = activationParameterA; + descriptor.m_Parameters.m_B = activationParameterB; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(descriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + // Calculated manually + result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, outputExpectedData)); + + return result; +} + +template<typename T> +LayerTestResult<T, 4> SimpleSigmoidTestCommon(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset) +{ + std::vector<float> inputData = { + -0.1f, -0.2f, -0.3f, -0.4f, + 0.1f, 0.2f, 0.3f, 0.4f, + -1.0f, -2.0f, -3.0f, -4.0f, + 1.0f, 2.0f, 3.0f, 4.0f + }; + + // Calculate output values for input + auto f = [](float value) + { + return 1.0f / (1.0f + std::exp(-value)); + }; + std::vector<float> outputExpectedData(inputData.size()); + std::transform(inputData.begin(), inputData.end(), outputExpectedData.begin(), f); + + return SimpleActivationTest<T>(workloadFactory, + armnn::ActivationFunction::Sigmoid, + 0.f, + 0.f, + qScale, + qOffset, + inputData, + outputExpectedData); +} + +LayerTestResult<float, 4> SimpleSigmoidTest(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleSigmoidTestCommon<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<uint8_t, 4> SimpleSigmoidUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleSigmoidTestCommon<uint8_t>(workloadFactory, 0.1f, 50); +} + +template<typename T> +LayerTestResult<T,4> CompareActivationTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::ActivationFunction f, + unsigned int batchSize = 5, + float qScale = 0.0f, + int32_t qOffset = 0) +{ + unsigned int width = 17; + unsigned int height = 29; + unsigned int channels = 2; + + float a = 0.234f; + float b = -12.345f; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape[] = {batchSize, channels, height, width}; + + inputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType<T>()); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + float minVal = -10.f; + if (f == armnn::ActivationFunction::Sqrt) + { + minVal = 0.f; + } + + boost::multi_array<T, 4> input = MakeRandomTensor<T, 4>(inputTensorInfo, 21453, minVal, 10.f); + + + LayerTestResult<T,4> ret(outputTensorInfo); + auto boostArrayExtents = boost::extents + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(batchSize)] + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(channels)] + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(height)] + [boost::numeric_cast<boost::multi_array_types::extent_gen::index>(width)]; + ret.output.resize(boostArrayExtents); + ret.outputExpected.resize(boostArrayExtents); + + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ActivationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Parameters.m_A = a; + data.m_Parameters.m_B = b; + data.m_Parameters.m_Function = f; + + armnn::ActivationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateActivation(data, info); + BOOST_ASSERT(workload != nullptr); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateActivation(refData, refInfo); + BOOST_ASSERT(workloadRef != nullptr); + + inputHandle->Allocate(); + outputHandle->Allocate(); + inputHandleRef->Allocate(); + outputHandleRef->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + +LayerTestResult<float,4> CompareActivationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::ActivationFunction f, + unsigned int batchSize) +{ + return CompareActivationTestImpl<float>(workloadFactory, refWorkloadFactory, f, batchSize); +} + +LayerTestResult<uint8_t,4> CompareActivationUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::ActivationFunction f) +{ + return CompareActivationTestImpl<uint8_t>(workloadFactory, refWorkloadFactory, f, 5, 0.1f, 50); +} diff --git a/src/armnn/backends/test/ArmComputeCl.cpp b/src/armnn/backends/test/ArmComputeCl.cpp new file mode 100644 index 0000000000..5933cebc80 --- /dev/null +++ b/src/armnn/backends/test/ArmComputeCl.cpp @@ -0,0 +1,269 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include "test/TensorHelpers.hpp" +#include "LayerTests.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/ClWorkloadFactory.hpp" +#include "backends/ClWorkloadUtils.hpp" +#include "backends/RefWorkloadFactory.hpp" +#include "backends/ClLayerSupport.hpp" +#include "ActivationFixture.hpp" + +#include <arm_compute/core/CL/CLKernelLibrary.h> +#include <arm_compute/runtime/CL/CLScheduler.h> +#include <string> +#include <iostream> + +#include "test/UnitTests.hpp" + +BOOST_AUTO_TEST_SUITE(Compute_ArmComputeCl) +using FactoryType = armnn::ClWorkloadFactory; + +// ============================================================================ +// UNIT tests + +// Activation +ARMNN_AUTO_TEST_CASE(ConstantLinearActivation, ConstantLinearActivationTest) + +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1, SimpleSoftmaxTest, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2, SimpleSoftmaxTest, 2.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1Uint8, SimpleSoftmaxUint8Test, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f) + +ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest) +ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest) + +// Fully Connected +ARMNN_AUTO_TEST_CASE(SimpleFullyConnected, FullyConnectedFloat32Test, false, false) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, true, false) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithTranspose, FullyConnectedFloat32Test, false, true) + +ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) + +// Convolution +ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true) + +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d, SimpleConvolution2d3x5Test, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dSquare, SimpleConvolution2d3x3Test, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x3Uint8, SimpleConvolution2d3x3Uint8Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false) +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquare, SimpleConvolution2d3x3Test, false) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPadding, Convolution2dAsymmetricPaddingTest) + +// Depthwise Convolution +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false) +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false) + +// Splitter +BOOST_AUTO_TEST_CASE(SimpleSplitter) +{ + armnn::ClWorkloadFactory workloadFactory; + auto testResult = SplitterTest(workloadFactory); + for (unsigned int i = 0; i < testResult.size(); ++i) + { + BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); + } +} + +BOOST_AUTO_TEST_CASE(SimpleSplitterUint8) +{ + armnn::ClWorkloadFactory workloadFactory; + auto testResult = SplitterUint8Test(workloadFactory); + for (unsigned int i = 0; i < testResult.size(); ++i) + { + BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); + } +} + +ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest) +ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) + +// Merger +ARMNN_AUTO_TEST_CASE(SimpleMerger, MergerTest) +ARMNN_AUTO_TEST_CASE(MergerUint8, MergerUint8Test) + +// Pooling +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3x3Stride2x4Test, true) +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, true) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2d, IgnorePaddingSimpleMaxPooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2dUint8, IgnorePaddingSimpleMaxPooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3, IgnorePaddingMaxPooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3Uint8, IgnorePaddingMaxPooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2d, IgnorePaddingSimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8, + IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3, IgnorePaddingL2Pooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddingL2Pooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2d, SimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_SimpleL2Pooling2dUint8, SimpleL2Pooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride1, L2Pooling2dSize3Stride1Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride1Uint8, L2Pooling2dSize3Stride1Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride3, L2Pooling2dSize3Stride3Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride3Uint8, L2Pooling2dSize3Stride3Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride4, L2Pooling2dSize3Stride4Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride4Uint8, L2Pooling2dSize3Stride4Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7, L2Pooling2dSize7Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize7Uint8, L2Pooling2dSize7Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize9, L2Pooling2dSize9Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize9Uint8, L2Pooling2dSize9Uint8Test) + +// Add +ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) +ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) + +// Mul +ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) + +// Batch Norm +ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) + +ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization2d, L2Normalization2dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization3d, L2Normalization3dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization4d, L2Normalization4dTest) + +// Resize Bilinear +ARMNN_AUTO_TEST_CASE(SimpleResizeBilinear, SimpleResizeBilinearTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearNop, ResizeBilinearNopTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMin, ResizeBilinearSqMinTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMin, ResizeBilinearMinTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMag, ResizeBilinearMagTest) + +// Constant +ARMNN_AUTO_TEST_CASE(Constant, ConstantTest) +ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantTestUint8) + +// Concat +ARMNN_AUTO_TEST_CASE(Concatenation1d, Concatenation1dTest) +ARMNN_AUTO_TEST_CASE(Concatenation1dUint8, Concatenation1dUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0, Concatenation2dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0Uint8, Concatenation2dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1, Concatenation2dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1Uint8, Concatenation2dDim1Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDims, Concatenation2dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDimsUint8, Concatenation2dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDims, Concatenation2dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDimsUint8, Concatenation2dDim1DiffInputDimsUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0, Concatenation3dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0Uint8, Concatenation3dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1, Concatenation3dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1Uint8, Concatenation3dDim1Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2, Concatenation3dDim2Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2Uint8, Concatenation3dDim2Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDims, Concatenation3dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDimsUint8, Concatenation3dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDims, Concatenation3dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDimsUint8, Concatenation3dDim1DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDims, Concatenation3dDim2DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDimsUint8, Concatenation3dDim2DiffInputDimsUint8Test) + +// Floor +ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest) + +// Reshape +ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeFloat32Test) +ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test) + +// Permute +ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test) +ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test) + +// ============================================================================ +// COMPARE tests + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareConv2dWithReference, CompareConvolution2dTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceFloat32, CompareDepthwiseConvolution2dTest<float>) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceUint8, CompareDepthwiseConvolution2dTest<uint8_t>) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationWithinWithReference, CompareNormalizationTest, + armnn::NormalizationAlgorithmChannel::Within, + armnn::NormalizationAlgorithmMethod::LocalBrightness) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationAcrossWithReference, CompareNormalizationTest, + armnn::NormalizationAlgorithmChannel::Across, + armnn::NormalizationAlgorithmMethod::LocalBrightness) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta1WithReference, CompareSoftmaxTest, 1.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta2WithReference, CompareSoftmaxTest, 2.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxUint8, CompareSoftmaxUint8Test, 1.0f) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMaxPooling2dWithRef, ComparePooling2dTest, armnn::PoolingAlgorithm::Max) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithRef, ComparePooling2dTest, armnn::PoolingAlgorithm::Average) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithRefUint8, ComparePooling2dUint8Test, + armnn::PoolingAlgorithm::Average) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareL2Pooling2dWithRef, ComparePooling2dTest, armnn::PoolingAlgorithm::L2) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAddition, CompareAdditionTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMultiplicationWithRef, CompareMultiplicationTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareBatchNorm, CompareBatchNormTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareReLu1, CompareBoundedReLuTest, 1.0f, -1.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareReLu6, CompareBoundedReLuTest, 6.0f, 0.0f) + +// ============================================================================ +// FIXTURE tests + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSigmoidActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Sigmoid, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareTanhActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::TanH, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLinearActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Linear, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::ReLu, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::BoundedReLu, 5u) +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReferenceUint8, ActivationFixture, + CompareActivationUint8Test, armnn::ActivationFunction::BoundedReLu) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSoftReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::SoftReLu, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLeakyReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::LeakyReLu, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareAbsActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Abs, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSqrtActivationWithReference, PositiveActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Sqrt, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSquareActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Square, 5u) + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/ArmComputeNeon.cpp b/src/armnn/backends/test/ArmComputeNeon.cpp new file mode 100644 index 0000000000..dd8a668940 --- /dev/null +++ b/src/armnn/backends/test/ArmComputeNeon.cpp @@ -0,0 +1,360 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include "test/TensorHelpers.hpp" +#include "LayerTests.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/NeonWorkloadFactory.hpp" +#include "backends/RefWorkloadFactory.hpp" +#include "backends/test/TensorCopyUtils.hpp" +#include "ActivationFixture.hpp" + +#include "WorkloadTestUtils.hpp" + +#include "test/UnitTests.hpp" + +BOOST_AUTO_TEST_SUITE(Compute_ArmComputeNeon) +using FactoryType = armnn::NeonWorkloadFactory; + +// ============================================================================ +// UNIT tests + +// Convolution +ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true) + +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d, SimpleConvolution2d3x5Test, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dSquare, SimpleConvolution2d3x3Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false) +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquare, SimpleConvolution2d3x3Test, false) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPadding, Convolution2dAsymmetricPaddingTest) + +namespace +{ + +armnn::Convolution2dDescriptor MakeConv2dDesc(uint32_t strideX, uint32_t strideY, + uint32_t padLeft = 0, uint32_t padRight = 0, uint32_t padTop = 0, uint32_t padBottom = 0) +{ + armnn::Convolution2dDescriptor result; + result.m_StrideX = strideX; + result.m_StrideY = strideY; + result.m_PadLeft = padLeft; + result.m_PadRight = padRight; + result.m_PadTop = padTop; + result.m_PadBottom = padBottom; + result.m_BiasEnabled = true; + return result; +} + +} + +BOOST_AUTO_TEST_CASE(Conv2dUtils) +{ + // the only preferred Neon convolution is 1x1 with padding=0 and stride size {1,2,3} + armnn::TensorShape shape1x1({ 1,1,1,1 }); + armnn::TensorInfo info1x1(shape1x1, armnn::DataType::Float32); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 2))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 3))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(2, 1))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(2, 2))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(2, 3))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 1))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 2))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 3))); + + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(4, 1))); + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(4, 5))); + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 6))); + + // non zero padding is not preferred for direct convolution + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1, 1, 0))); + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1, 0, 1))); + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1, 1, 1))); + + // 2x2 filter not preferred for direct convolution + armnn::TensorShape shape2x2({ 1,1,2,2 }); + armnn::TensorInfo info2x2(shape2x2, armnn::DataType::Float32); + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info2x2, MakeConv2dDesc(1, 1))); +} + +// Depthwise Convolution +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false) +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false) + +namespace +{ + +armnn::DepthwiseConvolution2dDescriptor MakeDepthwiseConv2dDesc(uint32_t strideX, uint32_t strideY, + uint32_t depthMultiplier = 1, uint32_t padLeft = 0, uint32_t padRight = 0, + uint32_t padTop = 0, uint32_t padBottom = 0) +{ + armnn::DepthwiseConvolution2dDescriptor desc; + desc.m_PadLeft = padLeft; + desc.m_PadRight = padRight; + desc.m_PadTop = padTop; + desc.m_PadBottom = padBottom; + desc.m_StrideX = strideX; + desc.m_StrideY = strideY; + desc.m_BiasEnabled = true; + return desc; +} + +} + +BOOST_AUTO_TEST_CASE(DepthwiseConv2dUtils) +{ + armnn::TensorInfo inputInfo({ 1, 1, 10, 10 }, armnn::DataType::Float32); + armnn::TensorInfo weightsInfo3x3({ 1, 1, 3, 3 }, armnn::DataType::Float32); + + // Strides supported: 1,2,3 + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 1), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 2), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 3), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(2, 1), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(2, 2), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(2, 3), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(3, 1), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(3, 2), weightsInfo3x3)); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(3, 3), weightsInfo3x3)); + + // Unsupported stride + BOOST_TEST(!armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(4, 1), weightsInfo3x3)); + + // Supported weights shape 1x1 + armnn::TensorInfo weightsInfo1x1({ 1, 1, 1, 1 }, armnn::DataType::Float32); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 1), weightsInfo1x1)); + + // Supported shape 2x2 + armnn::TensorInfo weightsInfo2x2({ 1, 1, 2, 2 }, armnn::DataType::Float32); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 1), weightsInfo2x2)); +} + +// Pooling +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3x3Stride2x4Test, true) +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, true) +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2d, SimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_SimpleL2Pooling2dUint8, SimpleL2Pooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride1, L2Pooling2dSize3Stride1Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride1Uint8, L2Pooling2dSize3Stride1Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride3, L2Pooling2dSize3Stride3Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride3Uint8, L2Pooling2dSize3Stride3Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride4, L2Pooling2dSize3Stride4Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride4Uint8, L2Pooling2dSize3Stride4Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7, L2Pooling2dSize7Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize7Uint8, L2Pooling2dSize7Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize9, L2Pooling2dSize9Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize9Uint8, L2Pooling2dSize9Uint8Test) + +// Ignore padding values for pooling but count padding fields into the divisor +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2d, IgnorePaddingSimpleMaxPooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2dUint8, IgnorePaddingSimpleMaxPooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3, IgnorePaddingMaxPooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3Uint8, IgnorePaddingMaxPooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2d, IgnorePaddingSimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8, + IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3, IgnorePaddingL2Pooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddingL2Pooling2dSize3Uint8Test) + +// Activation +ARMNN_AUTO_TEST_CASE(ConstantLinearActivation, ConstantLinearActivationTest) + +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1, SimpleSoftmaxTest, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2, SimpleSoftmaxTest, 2.0f) + +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1Uint8, SimpleSoftmaxUint8Test, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f) + +ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest) +ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest) + +// Splitter +BOOST_AUTO_TEST_CASE(SimpleSplitter) +{ + armnn::NeonWorkloadFactory workloadFactory; + auto testResult = SplitterTest(workloadFactory); + for (unsigned int i = 0; i < testResult.size(); ++i) + { + BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); + } +} + +BOOST_AUTO_TEST_CASE(SimpleSplitterUint8) +{ + armnn::NeonWorkloadFactory workloadFactory; + auto testResult = SplitterUint8Test(workloadFactory); + for (unsigned int i = 0; i < testResult.size(); ++i) + { + BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); + } +} + +ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest) +ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) + +// Merger +ARMNN_AUTO_TEST_CASE(SimpleMerger, MergerTest) +ARMNN_AUTO_TEST_CASE(MergerUint8, MergerUint8Test) + +// Fully Connected +ARMNN_AUTO_TEST_CASE(SimpleFullyConnected, FullyConnectedFloat32Test, false, false) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, true, false) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithTranspose, FullyConnectedFloat32Test, false, true) +ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) + +// Add +ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) +ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) + +// Mul +ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) + +// Batch Norm +ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) + +// Constant +ARMNN_AUTO_TEST_CASE(Constant, ConstantTest) +ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantTestUint8) + +// Concatenation +ARMNN_AUTO_TEST_CASE(Concatenation1d, Concatenation1dTest) +ARMNN_AUTO_TEST_CASE(Concatenation1dUint8, Concatenation1dUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0, Concatenation2dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0Uint8, Concatenation2dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1, Concatenation2dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1Uint8, Concatenation2dDim1Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDims, Concatenation2dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDimsUint8, Concatenation2dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDims, Concatenation2dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDimsUint8, Concatenation2dDim1DiffInputDimsUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0, Concatenation3dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0Uint8, Concatenation3dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1, Concatenation3dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1Uint8, Concatenation3dDim1Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2, Concatenation3dDim2Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2Uint8, Concatenation3dDim2Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDims, Concatenation3dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDimsUint8, Concatenation3dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDims, Concatenation3dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDimsUint8, Concatenation3dDim1DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDims, Concatenation3dDim2DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDimsUint8, Concatenation3dDim2DiffInputDimsUint8Test) + +// L2 Normalization +ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest); +ARMNN_AUTO_TEST_CASE(L2Normalization2d, L2Normalization2dTest); +ARMNN_AUTO_TEST_CASE(L2Normalization3d, L2Normalization3dTest); +ARMNN_AUTO_TEST_CASE(L2Normalization4d, L2Normalization4dTest); + +// Floor +ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest) + +// Reshape +ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeFloat32Test) +ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test) + +// Permute +ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test) +ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test) +// ============================================================================ +// COMPARE tests + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareConv2dWithReference, CompareConvolution2dTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceFloat32, CompareDepthwiseConvolution2dTest<float>) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceUint8, CompareDepthwiseConvolution2dTest<uint8_t>) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationWithinWithReference, CompareNormalizationTest, + armnn::NormalizationAlgorithmChannel::Within, + armnn::NormalizationAlgorithmMethod::LocalBrightness) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationAcrossWithReference, CompareNormalizationTest, + armnn::NormalizationAlgorithmChannel::Across, + armnn::NormalizationAlgorithmMethod::LocalBrightness) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMaxPooling2dWithReference, ComparePooling2dTest, armnn::PoolingAlgorithm::Max) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMaxPooling2dWithReferenceUint8, ComparePooling2dUint8Test, + armnn::PoolingAlgorithm::Max) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithReference, ComparePooling2dTest, + armnn::PoolingAlgorithm::Average) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithReferenceUint8, ComparePooling2dUint8Test, + armnn::PoolingAlgorithm::Average) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareL2Pooling2dWithReference, ComparePooling2dTest, armnn::PoolingAlgorithm::L2) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(UNSUPPORTED_CompareL2Pooling2dWithReferenceUint8, ComparePooling2dUint8Test, + armnn::PoolingAlgorithm::L2) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta1WithReference, CompareSoftmaxTest, 1.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta2WithReference, CompareSoftmaxTest, 2.0f) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxUint8Beta1WithReference, CompareSoftmaxUint8Test, 1.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxUint8Beta2WithReference, CompareSoftmaxUint8Test, 2.0f) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAddition, CompareAdditionTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMultiplicationWithReference, CompareMultiplicationTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareBatchNorm, CompareBatchNormTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(ReLu1, CompareBoundedReLuTest, 1.0f, -1.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(ReLu6, CompareBoundedReLuTest, 6.0f, 0.0f) + +// ============================================================================ +// FIXTURE tests + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSigmoidActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Sigmoid, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareTanhActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::TanH, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLinearActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Linear, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::ReLu, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::BoundedReLu, 5u) +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReferenceUint8, ActivationFixture, + CompareActivationUint8Test, armnn::ActivationFunction::BoundedReLu) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSoftReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::SoftReLu, 1u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLeakyReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::LeakyReLu, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareAbsActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Abs, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSqrtActivationWithReference, PositiveActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Sqrt, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSquareActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Square, 5u) + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/BatchNormTestImpl.hpp b/src/armnn/backends/test/BatchNormTestImpl.hpp new file mode 100644 index 0000000000..861ef6b053 --- /dev/null +++ b/src/armnn/backends/test/BatchNormTestImpl.hpp @@ -0,0 +1,112 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +#include "backends/test/QuantizeHelper.hpp" + + +template<typename T> +LayerTestResult<T,4> BatchNormTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + const unsigned int width = 2; + const unsigned int height = 3; + const unsigned int channels = 2; + const unsigned int num = 1; + + armnn::TensorInfo inputTensorInfo({num, channels, height, width}, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({num, channels, height, width}, armnn::GetDataType<T>()); + armnn::TensorInfo tensorInfo({channels}, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + tensorInfo.SetQuantizationScale(qScale); + tensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, + { + 1.f, 4.f, + 4.f, 2.f, + 1.f, 6.f, + + 1.f, 1.f, + 4.f, 1.f, + -2.f, 4.f + })); + // these values are per-channel of the input + auto mean = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3, -2})); + auto variance = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {4, 9})); + auto beta = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {3, 2})); + auto gamma = MakeTensor<T, 1>(tensorInfo, QuantizedVector<T>(qScale, qOffset, {2, 1})); + LayerTestResult<T,4> ret(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::BatchNormalizationQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle meanTensor(tensorInfo); + armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo); + armnn::ScopedCpuTensorHandle betaTensor(tensorInfo); + armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo); + + AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]); + AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]); + AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]); + AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Mean = &meanTensor; + data.m_Variance = &varianceTensor; + data.m_Beta = &betaTensor; + data.m_Gamma = &gammaTensor; + data.m_Parameters.m_Eps = 0.0f; + + // for each channel: + // substract mean, divide by standard deviation (with an epsilon to avoid div by 0) + // multiply by gamma and add beta + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, + { + 1.f, 4.f, + 4.f, 2.f, + 1.f, 6.f, + + 3.f, 3.f, + 4.f, 3.f, + 2.f, 4.f + })); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +}
\ No newline at end of file diff --git a/src/armnn/backends/test/Conv2dTestImpl.hpp b/src/armnn/backends/test/Conv2dTestImpl.hpp new file mode 100644 index 0000000000..0c0511b234 --- /dev/null +++ b/src/armnn/backends/test/Conv2dTestImpl.hpp @@ -0,0 +1,802 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <armnn/TypesUtils.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +// Mapping from input type to bias type for fully connected layers. +// float => float, uint8_t => int32_t +template<typename T> +struct FullyConnectedBiasTypeForInputType; + +template<> +struct FullyConnectedBiasTypeForInputType<float> +{ + using Type = float; +}; + +template<> +struct FullyConnectedBiasTypeForInputType<uint8_t> +{ + using Type = int32_t; +}; + +// Modifies a std::vector in-place using a specified bias +template<typename T, typename B> +void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset, + const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h) +{ + BOOST_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) || (!armnn::IsQuantizedType<T>()), + "Invalid type and parameter combination."); + BOOST_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) || (!armnn::IsQuantizedType<B>()), + "Invalid type and parameter combination."); + + // Note we need to dequantize and re-quantize the image value and the bias + for (uint32_t i = 0; i < bias.size(); ++i) + { + float dBias = SelectiveDequantize(bias[i], bScale, bOffset); + for (uint32_t y = 0; y < h; ++y) + { + for (uint32_t x = 0; x < w; ++x) + { + uint32_t offset = (i * h + y) * w + x; + BOOST_ASSERT(offset < v.size()); + T& outRef = v[offset]; + float dOutput = SelectiveDequantize(outRef, vScale, vOffset); + outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset); + } + } + } +} + + + +template<typename T, typename B> +LayerTestResult<T, 4> SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, + const boost::multi_array<T, 4>& input, + const boost::multi_array<T, 4>& kernel, + const boost::multi_array<B, 1>& bias, + const boost::multi_array<T, 4>& outputExpected, + float qScale, + int32_t qOffset, + uint32_t padLeft = 0, + uint32_t padTop = 0, + uint32_t padRight = 0, + uint32_t padBottom = 0) +{ + unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]); + unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]); + unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]); + unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]); + + unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]); + unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]); + unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]); + unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]); + + unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]); + unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]); + + bool biasEnabled = bias.size() > 0; + + // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches) + BOOST_ASSERT(inputNum == 1); + BOOST_ASSERT(outputNum == 1); + + // If a bias is used, its size must equal the number of output channels + BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels); + + + // Note these tensors will use two (identical) batches + armnn::TensorInfo inputTensorInfo({2*inputNum, inputChannels, inputHeight, inputWidth}, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({2*outputNum, outputChannels, outputHeight, outputWidth}, + armnn::GetDataType<T>()); + armnn::TensorInfo kernelDesc({outputChannels, inputChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>()); + armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + + LayerTestResult<T, 4> ret(outputTensorInfo); + + // Construct input data - Two batches of the same input image + std::vector<T> inputImage; + inputImage.assign(input.data(), input.data() + 1*inputChannels*inputHeight*inputWidth); + std::vector<T> inputData; + inputData.insert(inputData.end(), inputImage.begin(), inputImage.end()); + inputData.insert(inputData.end(), inputImage.begin(), inputImage.end()); + auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData); + + std::vector<T> outputImage; + outputImage.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth); + + // Apply bias to output image if enabled + if(biasEnabled) + { + std::vector<T> biasV; + biasV.assign(bias.data(), bias.data() + outputChannels); + ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + outputWidth, outputHeight); + } + + // Construct expected output data - two identical images + std::vector<T> outputData; + outputData.insert(outputData.end(), outputImage.begin(), outputImage.end()); + outputData.insert(outputData.end(), outputImage.begin(), outputImage.end()); + + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData); + + // todo: nontrivial padding and strides + uint32_t strideX = 1; + uint32_t strideY = 1; + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::Convolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + + if(biasEnabled) + { + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + } + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // still set this whether or not bias is enabled - can be a source of bugs + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padLeft; + data.m_Parameters.m_PadRight = padRight; + data.m_Parameters.m_PadTop = padTop; + data.m_Parameters.m_PadBottom = padBottom; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template<typename T, typename B> +LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + unsigned int inputHeight = 3; + unsigned int inputWidth = 3; + unsigned int inputChannels = 2; + unsigned int inputNum = 1; + + unsigned int kernelHeight = 3; + unsigned int kernelWidth = 3; + unsigned int kernelChannels = inputChannels; + + unsigned int outputHeight = 1; + unsigned int outputWidth = 1; + unsigned int outputChannels = kernelChannels; + unsigned int outputNum = inputNum; + + armnn::TensorInfo inputTensorInfo({ inputNum, inputChannels, inputHeight, inputWidth }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ outputNum, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType<T>()); + armnn::TensorInfo kernelDesc({ 1, outputChannels, kernelHeight, kernelWidth }, armnn::GetDataType<T>()); + armnn::TensorInfo biasDesc({ outputChannels }, armnn::GetDataType<B>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>( + QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), { + 1.f, 2.f, 1.f, + 2.f, 1.f, 2.f, + 1.f, 2.f, 1.f, + + 1.f, 2.f, 1.f, + 2.f, 1.f, 2.f, + 1.f, 2.f, 1.f, + }))); + + std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + {0, 2})); + auto bias = MakeTensor<B, 1>(biasDesc, biasV); + + auto kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), { + 1.f, 0.f, 1.f, + 0.f, 0.f, 0.f, + -1.f, 0.f, -1.f, + + 1.f, 0.f, 1.f, + 0.f, 0.f, 0.f, + -1.f, 0.f, -1.f, + }))); + + // manually calculated + std::vector<T> outputImage( + QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), + outputTensorInfo.GetQuantizationOffset(), + {0.f, 0.f}) + ); + + // Optionally apply bias to output image + if(biasEnabled) + { + ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + outputWidth, outputHeight); + } + + LayerTestResult<T, 4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // still set this whether or not bias is enabled + data.m_Parameters.m_StrideX = 1; + data.m_Parameters.m_StrideY = 1; + data.m_Parameters.m_PadLeft = 0; + data.m_Parameters.m_PadRight = 0; + data.m_Parameters.m_PadTop = 0; + data.m_Parameters.m_PadBottom = 0; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template<typename T, typename B> +LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + unsigned int depthMultiplier = 2; + + unsigned int inputHeight = 8; + unsigned int inputWidth = 16; + unsigned int inputChannels = 2; + unsigned int inputBatchSize = 1; + + unsigned int kernelHeight = 5; + unsigned int kernelWidth = 3; + + unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2; + unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2; + unsigned int outputChannels = inputChannels * depthMultiplier; + unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({inputBatchSize, inputChannels, inputHeight, inputWidth}, + armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({outputBatchSize, outputChannels, outputHeight, outputWidth}, + armnn::GetDataType<T>()); + armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>()); + armnn::TensorInfo biasDesc({outputChannels}, armnn::GetDataType<B>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>( + QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), { + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }))); + + std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + {0, 2, 1, -1})); + auto bias = MakeTensor<B, 1>(biasDesc, biasV); + + auto kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), { + 1, 1, 1, + 1, -1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + + 0, 0, 0, + 0, -1, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 0, 0, 0, + 0, 0, 0, + 0, 1, 0, + 0, 0, 0, + 0, 0, 0 + }))); + + // manually calculated + std::vector<T> outputImage = std::vector<T>( + QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), { + 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, + 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, + 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, + 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, + 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, + 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, + + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + + 8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f + })); + + // Optionally apply bias to output image + if(biasEnabled) + { + ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + outputWidth, outputHeight); + } + + LayerTestResult<T, 4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // still set this whether or not bias is enabled + data.m_Parameters.m_StrideX = 2; + data.m_Parameters.m_StrideY = 1; + data.m_Parameters.m_PadLeft = 0; + data.m_Parameters.m_PadRight = 0; + data.m_Parameters.m_PadTop = 1; + data.m_Parameters.m_PadBottom = 1; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + + + +template<typename T> +LayerTestResult<T,4> Convolution1dTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + using B = typename FullyConnectedBiasTypeForInputType<T>::Type; + + // until we have a specialist 1D convolution layer, we can fake one using + // 2D convolution with the final dimension set to 1. + // I don't anticipate this being particularly slow, given that convolution is implemented + // as a matrix multiplication, at which point dimension doesn't matter. + + unsigned int batchSize = 1; + unsigned int inputChannels = 2; + unsigned int outputChannels = 3; + unsigned int inputSize = 5; // the 1D size (could view as 'width' or 'height') + unsigned int kernelSize = 3; + unsigned int padSize = 2; + unsigned int stride = 1; + unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride + + armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, armnn::GetDataType<T>()); + armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, armnn::GetDataType<T>()); + armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, armnn::GetDataType<T>()); + armnn::TensorInfo biasInfo({outputChannels}, armnn::GetDataType<B>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputInfo.SetQuantizationScale(qScale); + inputInfo.SetQuantizationOffset(qOffset); + outputInfo.SetQuantizationScale(qScale); + outputInfo.SetQuantizationOffset(qOffset); + kernelInfo.SetQuantizationScale(qScale); + kernelInfo.SetQuantizationOffset(qOffset); + biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale()); + biasInfo.SetQuantizationOffset(0); + } + + std::vector<T> inputData( + QuantizedVector<T>(inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), { + 5.0f, -2.0f, 2.5f, 0.0f, 1.0f, + -3.0f, 3.2f, 5.0f, 2.0f, 3.0f, + })); + + std::vector<T> kernelData( + QuantizedVector<T>(kernelInfo.GetQuantizationScale(), kernelInfo.GetQuantizationOffset(), { + 1.0f, 0.0f, 0.0f, + 0.0f, 2.0f, -1.5f, + + 0.0f, 0.0f, 0.0f, + 0.2f, 0.2f, 0.2f, + + 0.5f, 0.0f, 0.5f, + 0.0f, -1.0f, 0.0f + })); + + std::vector<B> biasData( + QuantizedVector<B>(biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(), { + 1.0f, 0.0f, 0.0f + })); + + std::vector<T> outputData( + QuantizedVector<T>(outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), { + 4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f, + -0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f, + 2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f + })); + + // Optionally apply bias to output image + if(biasEnabled) + { + ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), + biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(), + 1, outputSize); + } + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo); + + armnn::Convolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelInfo); + armnn::ScopedCpuTensorHandle biasTensor(biasInfo); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data()); + AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data()); + + AddInputToWorkload(data, info, inputInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_StrideX = 1; + data.m_Parameters.m_StrideY = stride; + data.m_Parameters.m_PadLeft = 0; + data.m_Parameters.m_PadRight = 0; + data.m_Parameters.m_PadTop = padSize; + data.m_Parameters.m_PadBottom = padSize; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), inputData.data()); + + workload->Execute(); + + // output + LayerTestResult<T,4> ret(outputInfo); + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData); + return ret; +} + + + +template<typename T> +LayerTestResult<T,4> CompareConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + unsigned int inputHeight = 8; + unsigned int inputWidth = 16; + unsigned int inputChannels = 3; + unsigned int inputNum = 5; + + unsigned int kernelHeight = 3; + unsigned int kernelWidth = 3; + + unsigned int strideX = 2; + unsigned int strideY = 3; + unsigned int padX = 1; + unsigned int padY = 1; + + unsigned int outputNum = inputNum; + unsigned int outputChannels = 2; + unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY; + unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo kernelDesc; + armnn::TensorInfo biasDesc; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth}; + unsigned int biasShape[] = {outputChannels}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>()); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>()); + kernelDesc = armnn::TensorInfo(4, kernelShape, armnn::GetDataType<T>()); + biasDesc = armnn::TensorInfo(1, biasShape, armnn::GetDataType<T>()); + + LayerTestResult<T,4> ret(outputTensorInfo); + + auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908); + auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234); + auto bias = MakeRandomTensor<T, 1>(biasDesc, 1028); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::Convolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padX; + data.m_Parameters.m_PadRight = padX; + data.m_Parameters.m_PadTop = padY; + data.m_Parameters.m_PadBottom = padY; + data.m_Parameters.m_BiasEnabled = true; + + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + armnn::Convolution2dQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + +template<typename T> +LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + unsigned int inputHeight = 8; + unsigned int inputWidth = 16; + unsigned int inputChannels = 3; + unsigned int inputNum = 5; + + unsigned int kernelHeight = 3; + unsigned int kernelWidth = 3; + unsigned int channelMultiplier = 1; + + unsigned int strideX = 2; + unsigned int strideY = 3; + unsigned int padX = 1; + unsigned int padY = 1; + + unsigned int outputNum = inputNum; + unsigned int outputChannels = inputChannels * channelMultiplier; + unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY; + unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo kernelDesc; + armnn::TensorInfo biasDesc; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; + unsigned int kernelShape[] = { channelMultiplier, inputChannels, kernelHeight, kernelWidth }; + unsigned int biasShape[] = { outputChannels }; + + float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0; + float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0; + int32_t qOffset = 0; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>(), inputsQScale, qOffset); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>(), outputQScale, qOffset); + kernelDesc = armnn::TensorInfo(4, kernelShape, armnn::GetDataType<T>(), inputsQScale, qOffset); + biasDesc = armnn::TensorInfo(1, biasShape, armnn::GetBiasDataType(armnn::GetDataType<T>()), inputsQScale, qOffset); + + LayerTestResult<T, 4> ret(outputTensorInfo); + + auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f); + auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f); + auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>(biasDesc, 1028, 0.0f, 255.0f); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padX; + data.m_Parameters.m_PadRight = padX; + data.m_Parameters.m_PadTop = padY; + data.m_Parameters.m_PadBottom = padY; + data.m_Parameters.m_BiasEnabled = true; + + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} diff --git a/src/armnn/backends/test/CreateWorkloadCl.cpp b/src/armnn/backends/test/CreateWorkloadCl.cpp new file mode 100644 index 0000000000..3f320d80e9 --- /dev/null +++ b/src/armnn/backends/test/CreateWorkloadCl.cpp @@ -0,0 +1,356 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "backends/ClWorkloadFactory.hpp" +#include "backends/RefWorkloadFactory.hpp" +#include "backends/MemCopyWorkload.hpp" +#include "backends/ClWorkloadUtils.hpp" +#include "backends/ClWorkloads.hpp" +#include "backends/ClTensorHandle.hpp" + +#include "test/CreateWorkloadClNeon.hpp" + +boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* tensorHandle, + std::initializer_list<unsigned int> expectedDimensions) +{ + return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions); +} + +BOOST_AUTO_TEST_SUITE(CreateWorkloadCl) + +BOOST_AUTO_TEST_CASE(CreateActivationWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateActivationWorkloadTest<ClActivationFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest) + ActivationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1})); +} + +BOOST_AUTO_TEST_CASE(CreateAdditionWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateAdditionWorkloadTest<ClAdditionFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateAdditionWorkloadTest) + AdditionQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto inputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3})); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3})); +} + +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest) + BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 1, 1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3, 1, 1})); +} + +template <typename Convolution2dWorkloadType> +static void Convolution2dWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = CreateConvolution2dWorkloadTest<Convolution2dWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest) + Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 8, 16})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 2, 10})); +} + +BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat32Workload) +{ + Convolution2dWorkloadTest<ClConvolution2dFloat32Workload>(); +} + + +template <typename Convolution2dWorkloadType> +static void DirectConvolution2dWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = CreateDirectConvolution2dWorkloadTest<Convolution2dWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest) + Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6})); +} + +BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloat32Workload) +{ + DirectConvolution2dWorkloadTest<ClConvolution2dFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dUint8Workload) +{ + DirectConvolution2dWorkloadTest<ClConvolution2dUint8Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = + CreateFullyConnectedWorkloadTest<ClFullyConnectedFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest) + FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 7})); +} + +BOOST_AUTO_TEST_CASE(CreateMultiplicationWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = + CreateMultiplicationWorkloadTest<ClMultiplicationFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateMultiplicationWorkloadTest) + MultiplicationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto inputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3})); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3})); +} + +BOOST_AUTO_TEST_CASE(CreateNormalizationWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateNormalizationWorkloadTest<ClNormalizationFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest) + NormalizationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 5, 5, 1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 5, 5, 1})); +} + +BOOST_AUTO_TEST_CASE(CreatePooling2dWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreatePooling2dWorkloadTest<ClPooling2dFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest) + Pooling2dQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 2, 5, 5})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 2, 2, 4})); +} + +template <typename ReshapeWorkloadType> +static void ClCreateReshapeWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateReshapeWorkloadTest<ReshapeWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest) + ReshapeQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4})); // Leading size 1 dimensions are collapsed by ACL. +} + +BOOST_AUTO_TEST_CASE(CreateReshapeFloat32Workload) +{ + ClCreateReshapeWorkloadTest<ClReshapeFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) +{ + ClCreateReshapeWorkloadTest<ClReshapeUint8Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateSoftmaxWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateSoftmaxWorkloadTest<ClSoftmaxFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of ClSoftmaxFloat32Workload) + SoftmaxQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1})); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateSplitterWorkloadTest<ClSplitterFloat32Workload>(factory, graph); + + // check that outputs are as we expect them (see definition of CreateSplitterWorkloadTest) + SplitterQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {7})); + auto outputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {4})); + auto outputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {1})); + auto outputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2})); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterMerger) +{ + // Test that it is possible to decide which output of the splitter layer + // should be lined to which input of the merger layer + // We test that is is possible to specify 0th output + // of the splitter to be the 1st input to the merger and the 1st output of the splitter to be 0th input + // of the merger. + + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workloads = + CreateSplitterMergerWorkloadTest<ClSplitterFloat32Workload, ClMergerFloat32Workload>(factory, graph); + + auto wlSplitter = std::move(workloads.first); + auto wlMerger = std::move(workloads.second); + + //check that the index of inputs/outputs matches what we declared on InputDescriptor construction. + armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); + armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); + armnn::ClSubTensorHandle* mIn0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlMerger->GetData().m_Inputs[0]); + armnn::ClSubTensorHandle* mIn1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlMerger->GetData().m_Inputs[1]); + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(mIn0); + BOOST_TEST(mIn1); + + //fliped order of inputs/outputs + bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); + BOOST_TEST(validDataPointers); + + + //also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor + bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent()) + && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent()); + + BOOST_TEST(validSubTensorParents); +} + +BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) +{ + // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. + // We create a splitter with two outputs. That each of those outputs is used by two different activation layers + + Graph graph; + ClWorkloadFactory factory; + std::unique_ptr<ClSplitterFloat32Workload> wlSplitter; + std::unique_ptr<ClActivationFloat32Workload> wlActiv0_0; + std::unique_ptr<ClActivationFloat32Workload> wlActiv0_1; + std::unique_ptr<ClActivationFloat32Workload> wlActiv1_0; + std::unique_ptr<ClActivationFloat32Workload> wlActiv1_1; + + CreateSplitterMultipleInputsOneOutputWorkloadTest<ClSplitterFloat32Workload, + ClActivationFloat32Workload>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, wlActiv1_0, wlActiv1_1); + + //check that the index of inputs/outputs matches what we declared on InputDescriptor construction. + armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); + armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); + armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]); + armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]); + armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]); + armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]); + + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(activ0_0Im); + BOOST_TEST(activ0_1Im); + BOOST_TEST(activ1_0Im); + BOOST_TEST(activ1_1Im); + + bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) && + (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im); + + BOOST_TEST(validDataPointers); +} + +BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl) +{ + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + CreateMemCopyWorkloads<CopyFromCpuToClWorkload,CopyFromClToCpuWorkload,IClTensorHandle>(factory); +} + +BOOST_AUTO_TEST_CASE(CreateL2NormalizationWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + factory.LoadOpenClRuntime(); + + auto workload = CreateL2NormalizationWorkloadTest<ClL2NormalizationFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest) + L2NormalizationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 5, 20, 50, 67 })); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 5, 20, 50, 67 })); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/CreateWorkloadNeon.cpp b/src/armnn/backends/test/CreateWorkloadNeon.cpp new file mode 100644 index 0000000000..807937ba2b --- /dev/null +++ b/src/armnn/backends/test/CreateWorkloadNeon.cpp @@ -0,0 +1,302 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "backends/NeonWorkloadFactory.hpp" +#include "backends/NeonWorkloadUtils.hpp" +#include "backends/NeonWorkloads.hpp" +#include "backends/MemCopyWorkload.hpp" +#include "backends/NeonTensorHandle.hpp" + +#include "test/CreateWorkloadClNeon.hpp" + +BOOST_AUTO_TEST_SUITE(CreateWorkloadNeon) + +namespace +{ + +bool TestNeonTensorHandleInfo(armnn::INeonTensorHandle* handle, const armnn::TensorInfo& expectedInfo) +{ + using namespace armnn::armcomputetensorutils; + + const arm_compute::ITensorInfo* handleInfo = handle->GetTensor().info(); + const arm_compute::TensorInfo expectedAclInfo = BuildArmComputeTensorInfo(expectedInfo); + + if (handleInfo->data_type() != expectedAclInfo.data_type()) + { + return false; + } + + if (handleInfo->num_dimensions() != expectedAclInfo.num_dimensions()) + { + return false; + } + + if (handleInfo->quantization_info() != expectedAclInfo.quantization_info()) + { + return false; + } + + for (std::size_t d = 0; d < expectedAclInfo.num_dimensions(); ++d) + { + if (handleInfo->dimension(d) != expectedAclInfo.dimension(d)) + { + return false; + } + } + + return true; +} + +} // namespace + +BOOST_AUTO_TEST_CASE(CreateActivationWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateActivationWorkloadTest<NeonActivationFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest) + ActivationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateAdditionWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateAdditionWorkloadTest<NeonAdditionFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateAdditionWorkloadTest) + AdditionQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto inputHandle2 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[1]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateBatchNormalizationWorkloadTest<NeonBatchNormalizationFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest) + BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({2, 3, 1, 1}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3, 1, 1}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateConvolution2dWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateConvolution2dWorkloadTest<NeonConvolution2dFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest) + Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({2, 3, 8, 16}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 2, 2, 10}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateFullyConnectedWorkloadTest<NeonFullyConnectedFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest) + FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateMultiplicationWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateMultiplicationWorkloadTest<NeonMultiplicationFloat32Workload>(factory, graph); + + // check that inputs/outputs are as we expect them (see definition of CreateMultiplicationWorkloadTest) + MultiplicationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto inputHandle2 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[1]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateNormalizationWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateNormalizationWorkloadTest<NeonNormalizationFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest) + NormalizationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 5, 5, 1}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 5, 5, 1}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreatePooling2dWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreatePooling2dWorkloadTest<NeonPooling2dFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest) + Pooling2dQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 2, 5, 5}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 2, 2, 4}, DataType::Float32))); +} + +template <typename ReshapeWorkloadType> +static void NeonCreateReshapeWorkloadTest(DataType dataType) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateReshapeWorkloadTest<ReshapeWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest) + ReshapeQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, dataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, dataType))); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeFloat32Workload) +{ + NeonCreateReshapeWorkloadTest<NeonReshapeFloat32Workload>(DataType::Float32); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) +{ + NeonCreateReshapeWorkloadTest<NeonReshapeUint8Workload>(DataType::QuantisedAsymm8); +} + +BOOST_AUTO_TEST_CASE(CreateSoftmaxWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateSoftmaxWorkloadTest<NeonSoftmaxFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest) + SoftmaxQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({4, 1}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateSplitterWorkloadTest<NeonSplitterFloat32Workload>(factory, graph); + + // check that outputs are as we expect them (see definition of CreateSplitterWorkloadTest) + SplitterQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 7}, DataType::Float32))); + auto outputHandle0 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 4}, DataType::Float32))); + auto outputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[1]); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({1, 1}, DataType::Float32))); + auto outputHandle2 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[2]); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({1, 2}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterMerger) +{ + // Test that it is possible to decide which output of the splitter layer + // should be lined to which input of the merger layer + // We test that is is possible to specify 0th output + // of the splitter to be the 1st input to the merger and the 1st output of the splitter to be 0th input + // of the merger. + + Graph graph; + NeonWorkloadFactory factory; + + auto workloads = + CreateSplitterMergerWorkloadTest<NeonSplitterFloat32Workload, NeonMergerFloat32Workload>(factory, graph); + + auto wlSplitter = std::move(workloads.first); + auto wlMerger = std::move(workloads.second); + + //check that the index of inputs/outputs matches what we declared on InputDescriptor construction. + armnn::INeonTensorHandle* sOut0 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); + armnn::INeonTensorHandle* sOut1 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); + armnn::INeonTensorHandle* mIn0 = dynamic_cast<armnn::INeonTensorHandle*>(wlMerger->GetData().m_Inputs[0]); + armnn::INeonTensorHandle* mIn1 = dynamic_cast<armnn::INeonTensorHandle*>(wlMerger->GetData().m_Inputs[1]); + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(mIn0); + BOOST_TEST(mIn1); + + bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); + + BOOST_TEST(validDataPointers); +} + +BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) +{ + // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. + // We create a splitter with two outputs. That each of those outputs is used by two different activation layers + + Graph graph; + NeonWorkloadFactory factory; + std::unique_ptr<NeonSplitterFloat32Workload> wlSplitter; + std::unique_ptr<NeonActivationFloat32Workload> wlActiv0_0; + std::unique_ptr<NeonActivationFloat32Workload> wlActiv0_1; + std::unique_ptr<NeonActivationFloat32Workload> wlActiv1_0; + std::unique_ptr<NeonActivationFloat32Workload> wlActiv1_1; + + CreateSplitterMultipleInputsOneOutputWorkloadTest<NeonSplitterFloat32Workload, + NeonActivationFloat32Workload>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, wlActiv1_0, wlActiv1_1); + + armnn::INeonTensorHandle* sOut0 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); + armnn::INeonTensorHandle* sOut1 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); + armnn::INeonTensorHandle* activ0_0Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]); + armnn::INeonTensorHandle* activ0_1Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]); + armnn::INeonTensorHandle* activ1_0Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]); + armnn::INeonTensorHandle* activ1_1Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]); + + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(activ0_0Im); + BOOST_TEST(activ0_1Im); + BOOST_TEST(activ1_0Im); + BOOST_TEST(activ1_1Im); + + bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) && + (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im); + + BOOST_TEST(validDataPointers); +} + +BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsNeon) +{ + NeonWorkloadFactory factory; + CreateMemCopyWorkloads<CopyFromCpuToNeonWorkload,CopyFromNeonToCpuWorkload,INeonTensorHandle>(factory); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/CreateWorkloadRef.cpp b/src/armnn/backends/test/CreateWorkloadRef.cpp new file mode 100644 index 0000000000..e0eacebe1a --- /dev/null +++ b/src/armnn/backends/test/CreateWorkloadRef.cpp @@ -0,0 +1,414 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "backends/RefWorkloadFactory.hpp" +#include "backends/RefWorkloads.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include "test/CreateWorkload.hpp" + +namespace +{ + +template<typename Workload> +void CheckInputOutput(std::unique_ptr<Workload> workload, const TensorInfo& inputInfo, const TensorInfo& outputInfo) +{ + auto queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST((inputHandle->GetTensorInfo() == inputInfo)); + BOOST_TEST((outputHandle->GetTensorInfo() == outputInfo)); +} + +template <typename Workload> +void CheckInputsOutput(std::unique_ptr<Workload> workload, + const TensorInfo& inputInfo0, + const TensorInfo& inputInfo1, + const TensorInfo& outputInfo) +{ + auto queueDescriptor = workload->GetData(); + auto inputHandle0 = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor.m_Inputs[0]); + auto inputHandle1 = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor.m_Inputs[1]); + auto outputHandle = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST((inputHandle0->GetTensorInfo() == inputInfo0)); + BOOST_TEST((inputHandle1->GetTensorInfo() == inputInfo1)); + BOOST_TEST((outputHandle->GetTensorInfo() == outputInfo)); +} +} + +BOOST_AUTO_TEST_SUITE(CreateWorkloadRef) + +template <typename ActivationWorkloadType> +static void RefCreateActivationWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateActivationWorkloadTest<ActivationWorkloadType>(factory, graph); + + // check that outputs are as we expect them (see definition of CreateActivationWorkloadTest) + CheckInputOutput(std::move(workload), + TensorInfo({ 1, 1 }, ActivationWorkloadType::ms_DataType), + TensorInfo({ 1, 1 }, ActivationWorkloadType::ms_DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateActivationFloat32Workload) +{ + RefCreateActivationWorkloadTest<RefActivationFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateActivationUint8Workload) +{ + RefCreateActivationWorkloadTest<RefActivationUint8Workload>(); +} + +template <typename AdditionWorkloadType> +static void RefCreateAdditionWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateAdditionWorkloadTest<AdditionWorkloadType>(factory, graph); + + // check that outputs are as we expect them (see definition of CreateAdditionWorkloadTest) + CheckInputsOutput(std::move(workload), + TensorInfo({ 2, 3 }, AdditionWorkloadType::ms_DataType), + TensorInfo({ 2, 3 }, AdditionWorkloadType::ms_DataType), + TensorInfo({ 2, 3 }, AdditionWorkloadType::ms_DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload) +{ + RefCreateAdditionWorkloadTest<RefAdditionFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateAdditionUint8Workload) +{ + RefCreateAdditionWorkloadTest<RefAdditionUint8Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationWorkload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateBatchNormalizationWorkloadTest<RefBatchNormalizationFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest) + CheckInputOutput( + std::move(workload), TensorInfo({2, 3, 1, 1}, DataType::Float32), TensorInfo({2, 3, 1, 1}, DataType::Float32)); +} + +BOOST_AUTO_TEST_CASE(CreateConvolution2dWorkload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateConvolution2dWorkloadTest<RefConvolution2dFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest) + CheckInputOutput(std::move(workload), + TensorInfo({2, 3, 8, 16}, DataType::Float32), + TensorInfo({2, 2, 2, 10}, DataType::Float32)); +} + +BOOST_AUTO_TEST_CASE(CreateDepthwiseConvolution2dWorkload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = + CreateDepthwiseConvolution2dWorkloadTest<RefDepthwiseConvolution2dFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest) + CheckInputOutput(std::move(workload), + TensorInfo({2, 3, 8, 16}, DataType::Float32), + TensorInfo({2, 9, 2, 10}, DataType::Float32)); +} + +template <typename FullyConnectedWorkloadType> +static void RefCreateFullyConnectedWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest) + float inputsQScale = FullyConnectedWorkloadType::ms_DataType == DataType::QuantisedAsymm8 ? 1.0f : 0.0; + float outputQScale = FullyConnectedWorkloadType::ms_DataType == DataType::QuantisedAsymm8 ? 2.0f : 0.0; + CheckInputOutput(std::move(workload), + TensorInfo({ 3, 1, 4, 5 }, FullyConnectedWorkloadType::ms_DataType, inputsQScale), + TensorInfo({ 3, 7 }, FullyConnectedWorkloadType::ms_DataType, outputQScale)); +} + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat32Workload) +{ + RefCreateFullyConnectedWorkloadTest<RefFullyConnectedFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedUint8Workload) +{ + RefCreateFullyConnectedWorkloadTest<RefFullyConnectedUint8Workload>(); +} + +template <typename MultiplicationWorkloadType> +static void RefCreateMultiplicationWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateMultiplicationWorkloadTest<MultiplicationWorkloadType>(factory, graph); + + // check that outputs are as we expect them (see definition of CreateMultiplicationWorkloadTest) + CheckInputsOutput(std::move(workload), + TensorInfo({ 2, 3 }, MultiplicationWorkloadType::ms_DataType), + TensorInfo({ 2, 3 }, MultiplicationWorkloadType::ms_DataType), + TensorInfo({ 2, 3 }, MultiplicationWorkloadType::ms_DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload) +{ + RefCreateMultiplicationWorkloadTest<RefMultiplicationFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8Workload) +{ + RefCreateMultiplicationWorkloadTest<RefMultiplicationUint8Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateNormalizationWorkload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateNormalizationWorkloadTest<RefNormalizationFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest) + CheckInputOutput(std::move(workload), + TensorInfo({3, 5, 5, 1}, DataType::Float32), + TensorInfo({3, 5, 5, 1}, DataType::Float32)); +} + +template <typename Pooling2dWorkloadType> +static void RefCreatePooling2dWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreatePooling2dWorkloadTest<Pooling2dWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest) + CheckInputOutput( + std::move(workload), + TensorInfo({3, 2, 5, 5}, Pooling2dWorkloadType::ms_DataType), + TensorInfo({3, 2, 2, 4}, Pooling2dWorkloadType::ms_DataType)); +} + +BOOST_AUTO_TEST_CASE(CreatePooling2dFloat32Workload) +{ + RefCreatePooling2dWorkloadTest<RefPooling2dFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreatePooling2dUint8Workload) +{ + RefCreatePooling2dWorkloadTest<RefPooling2dUint8Workload>(); +} + +template <typename SoftmaxWorkloadType> +static void RefCreateSoftmaxWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest) + CheckInputOutput( + std::move(workload), + TensorInfo({4, 1}, SoftmaxWorkloadType::ms_DataType), + TensorInfo({4, 1}, SoftmaxWorkloadType::ms_DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat32Workload) +{ + RefCreateSoftmaxWorkloadTest<RefSoftmaxFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateSoftmaxUint8Workload) +{ + RefCreateSoftmaxWorkloadTest<RefSoftmaxUint8Workload>(); +} + +template <typename SplitterWorkloadType> +static void RefCreateSplitterWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateSplitterWorkloadTest<SplitterWorkloadType>(factory, graph); + + // check that outputs are as we expect them (see definition of CreateSplitterWorkloadTest) + SplitterQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor.m_Inputs[0]); + BOOST_TEST((inputHandle->GetTensorInfo() == TensorInfo({ 1, 7 }, SplitterWorkloadType::ms_DataType))); + auto outputHandle0 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[0]); + BOOST_TEST((outputHandle0->GetTensorInfo() == TensorInfo({ 1, 4 }, SplitterWorkloadType::ms_DataType))); + auto outputHandle1 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[1]); + BOOST_TEST((outputHandle1->GetTensorInfo() == TensorInfo({ 1, 1 }, SplitterWorkloadType::ms_DataType))); + auto outputHandle2 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[2]); + BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({ 1, 2 }, SplitterWorkloadType::ms_DataType))); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterFloat32Workload) +{ + RefCreateSplitterWorkloadTest<RefSplitterFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterUint8Workload) +{ + RefCreateSplitterWorkloadTest<RefSplitterUint8Workload>(); +} + +template <typename SplitterWorkloadType, typename MergerWorkloadType> +static void RefCreateSplitterMergerWorkloadTest() +{ + // Test that it is possible to decide which output of the splitter layer + // should be lined to which input of the merger layer + // We test that is is possible to specify 0th output + // of the splitter to be the 1st input to the merger and the 1st output of the splitter to be 0th input + // of the merger. + + Graph graph; + RefWorkloadFactory factory; + auto workloads = CreateSplitterMergerWorkloadTest<SplitterWorkloadType, MergerWorkloadType>(factory, graph); + + auto wlSplitter = std::move(workloads.first); + auto wlMerger = std::move(workloads.second); + + //check that the index of inputs/outputs matches what we declared on InputDescriptor construction. + armnn::CpuTensorHandle* sOut0 = dynamic_cast<armnn::CpuTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); + armnn::CpuTensorHandle* sOut1 = dynamic_cast<armnn::CpuTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); + armnn::CpuTensorHandle* mIn0 = dynamic_cast<armnn::CpuTensorHandle*>(wlMerger->GetData().m_Inputs[0]); + armnn::CpuTensorHandle* mIn1 = dynamic_cast<armnn::CpuTensorHandle*>(wlMerger->GetData().m_Inputs[1]); + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(mIn0); + BOOST_TEST(mIn1); + + bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); + + BOOST_TEST(validDataPointers); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterMergerFloat32) +{ + RefCreateSplitterMergerWorkloadTest<RefSplitterFloat32Workload, RefMergerFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterMergerUint8) +{ + RefCreateSplitterMergerWorkloadTest<RefSplitterUint8Workload, RefMergerUint8Workload>(); +} + +template <typename SplitterWorkloadType, typename ActivationWorkloadType> +static void RefCreateSingleOutputMultipleInputsTest() +{ + // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. + // We create a splitter with two outputs. That each of those outputs is used by two different activation layers + + Graph graph; + RefWorkloadFactory factory; + std::unique_ptr<SplitterWorkloadType> wlSplitter; + std::unique_ptr<ActivationWorkloadType> wlActiv0_0; + std::unique_ptr<ActivationWorkloadType> wlActiv0_1; + std::unique_ptr<ActivationWorkloadType> wlActiv1_0; + std::unique_ptr<ActivationWorkloadType> wlActiv1_1; + + CreateSplitterMultipleInputsOneOutputWorkloadTest<SplitterWorkloadType, + ActivationWorkloadType>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, wlActiv1_0, wlActiv1_1); + + armnn::CpuTensorHandle* sOut0 = dynamic_cast<armnn::CpuTensorHandle*>(wlSplitter->GetData().m_Outputs[0]); + armnn::CpuTensorHandle* sOut1 = dynamic_cast<armnn::CpuTensorHandle*>(wlSplitter->GetData().m_Outputs[1]); + armnn::CpuTensorHandle* activ0_0Im = dynamic_cast<armnn::CpuTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]); + armnn::CpuTensorHandle* activ0_1Im = dynamic_cast<armnn::CpuTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]); + armnn::CpuTensorHandle* activ1_0Im = dynamic_cast<armnn::CpuTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]); + armnn::CpuTensorHandle* activ1_1Im = dynamic_cast<armnn::CpuTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]); + + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(activ0_0Im); + BOOST_TEST(activ0_1Im); + BOOST_TEST(activ1_0Im); + BOOST_TEST(activ1_1Im); + + bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) && + (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im); + + BOOST_TEST(validDataPointers); +} + +BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputsFloat32) +{ + RefCreateSingleOutputMultipleInputsTest<RefSplitterFloat32Workload, RefActivationFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputsUint8) +{ + RefCreateSingleOutputMultipleInputsTest<RefSplitterUint8Workload, RefActivationUint8Workload>(); +} + +template <typename ResizeBilinearWorkloadType> +static void RefCreateResizeBilinearTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateResizeBilinearWorkloadTest<ResizeBilinearWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateResizeBilinearWorkloadTest) + CheckInputOutput( + std::move(workload), + TensorInfo({ 2, 3, 4, 4 }, ResizeBilinearWorkloadType::ms_DataType), + TensorInfo({ 2, 3, 2, 2 }, ResizeBilinearWorkloadType::ms_DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateResizeBilinearFloat32) +{ + RefCreateResizeBilinearTest<RefResizeBilinearFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateResizeBilinearUint8) +{ + RefCreateResizeBilinearTest<RefResizeBilinearUint8Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat32) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateL2NormalizationWorkloadTest<RefL2NormalizationFloat32Workload>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateL2NormalizationWorkloadTest) + CheckInputOutput( + std::move(workload), + TensorInfo({ 5, 20, 50, 67 }, RefL2NormalizationFloat32Workload::ms_DataType), + TensorInfo({ 5, 20, 50, 67 }, RefL2NormalizationFloat32Workload::ms_DataType)); +} + +template <typename ReshapeWorkloadType> +static void RefCreateReshapeWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateReshapeWorkloadTest<ReshapeWorkloadType>(factory, graph); + + // check that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest) + CheckInputOutput( + std::move(workload), + TensorInfo({ 4, 1 }, ReshapeWorkloadType::ms_DataType), + TensorInfo({ 1, 4 }, ReshapeWorkloadType::ms_DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeFloat32Workload) +{ + RefCreateReshapeWorkloadTest<RefReshapeFloat32Workload>(); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) +{ + RefCreateReshapeWorkloadTest<RefReshapeUint8Workload>(); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/FullyConnectedTestImpl.hpp b/src/armnn/backends/test/FullyConnectedTestImpl.hpp new file mode 100644 index 0000000000..479da3fabc --- /dev/null +++ b/src/armnn/backends/test/FullyConnectedTestImpl.hpp @@ -0,0 +1,286 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +template<typename T, typename B> +LayerTestResult<T, 2> SimpleFullyConnectedTestImpl( + armnn::IWorkloadFactory& workloadFactory, + armnn::TensorInfo inputTensorInfo, + armnn::TensorInfo outputTensorInfo, + armnn::TensorInfo weightsDesc, + armnn::TensorInfo biasesDesc, + boost::multi_array<T, 2> weights, + boost::multi_array<B, 1> bias, + boost::multi_array<T, 4> input, + bool biasEnabled, + bool transposeWeights) +{ + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::FullyConnectedQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(weightsDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasesDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &weights[0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_BiasEnabled = biasEnabled; + data.m_Parameters.m_TransposeWeightMatrix = transposeWeights; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFullyConnected(data, info); + LayerTestResult<T, 2> result(outputTensorInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get()); + + return result; +} + +LayerTestResult<float, 2> FullyConnectedFloat32Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled, + bool transposeWeights) +{ + unsigned int inputWidth = 1; + unsigned int inputHeight = 1; + unsigned int inputChannels = 5; + unsigned int inputNum = 2; + + unsigned int outputChannels = 3; + unsigned int outputNum = 2; + + // Define the tensor descriptors + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo weightsDesc; + armnn::TensorInfo biasesDesc; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels }; + unsigned int weightsShape[] = { inputChannels, outputChannels }; + if (transposeWeights) + { + std::swap(weightsShape[0], weightsShape[1]); + } + unsigned int biasShape[] = { outputChannels }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(2, outputShape, armnn::DataType::Float32); + weightsDesc = armnn::TensorInfo(2, weightsShape, armnn::DataType::Float32); + biasesDesc = armnn::TensorInfo(1, biasShape, armnn::DataType::Float32); + + LayerTestResult<float, 2> result(outputTensorInfo); + + boost::multi_array<float, 4> input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>( + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + + 5.0f, 4.0f, 3.0f, 2.0f, 1.0f + }) + ); + + boost::multi_array<float, 2> weights = MakeTensor<float, 2>(weightsDesc, std::vector<float>( + { + .5f, 2.f, .5f, + .5f, 2.f, 1.f, + .5f, 2.f, 2.f, + .5f, 2.f, 3.f, + .5f, 2.f, 4.f + })); + + if (transposeWeights) + { + weights = MakeTensor<float, 2>(weightsDesc, std::vector<float>( + { + .5f, .5f, .5f, .5f, .5f, + 2.f, 2.f, 2.f, 2.f, 2.f, + .5f, 1.f, 2.f, 3.f, 4.f + })); + } + + + std::vector<float> biasValues({0.f, 0.f, 0.f}); + if (biasEnabled) + { + biasValues = std::vector<float>({10.f, 20.f, 30.f}); + } + boost::multi_array<float, 1> bias = MakeTensor<float, 1>(biasesDesc, biasValues); + + result = SimpleFullyConnectedTestImpl<float>( + workloadFactory, + inputTensorInfo, outputTensorInfo, + weightsDesc, biasesDesc, + weights, bias, input, + biasEnabled, transposeWeights + ); + + result.outputExpected = MakeTensor<float, 2>(outputTensorInfo, std::vector<float>( + { + 0.5f + 1.0f + 1.5f + 2.0f + 2.5f + biasValues[0], + 2.0f + 4.0f + 6.0f + 8.0f + 10.f + biasValues[1], + 0.5f + 2.0f + 6.0f + 12.f + 20.f + biasValues[2], + + 2.5f + 2.0f + 1.5f + 1.0f + 0.5f + biasValues[0], + 10.0f + 8.0f + 6.0f + 4.0f + 2.f + biasValues[1], + 2.5f + 4.0f + 6.0f + 6.f + 4.f + biasValues[2] + }) + ); + + return result; +} + +LayerTestResult<uint8_t, 2> FullyConnectedUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled) +{ + constexpr static unsigned int inputWidth = 3u; + constexpr static unsigned int inputHeight = 2u; + constexpr static unsigned int inputChannels = 1u; + + constexpr static unsigned int inputSize = inputWidth * inputHeight * inputChannels; + + constexpr static unsigned int outputChannels = 2u; + + armnn::TensorInfo inputTensorInfo({ 1, inputChannels, inputHeight, inputWidth }, armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(0.1f); + inputTensorInfo.SetQuantizationOffset(63); + + armnn::TensorInfo outputTensorInfo({ 1, outputChannels }, armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(5.f); + outputTensorInfo.SetQuantizationOffset(biasEnabled ? -50 : 10); + + armnn::TensorInfo weightsDesc({ outputChannels, inputSize }, armnn::DataType::QuantisedAsymm8); + weightsDesc.SetQuantizationScale(0.2f); + weightsDesc.SetQuantizationOffset(93); + + armnn::TensorInfo biasesDesc({ outputChannels }, armnn::DataType::Signed32); + biasesDesc.SetQuantizationScale(inputTensorInfo.GetQuantizationScale() * weightsDesc.GetQuantizationScale()); + biasesDesc.SetQuantizationOffset(0); + + LayerTestResult<uint8_t, 2> result(outputTensorInfo); + + auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>{51, 124, 28, + 251, 8, 92}); + + auto weights = MakeTensor<uint8_t, 2>(weightsDesc, std::vector<uint8_t>{51, 193, 42, 53, 175, 34, + 210, 145, 23, 74, 34, 150}); + + // scale = 0.02 + // offset = 0 + auto bias = MakeTensor<int32_t, 1>(biasesDesc, std::vector<int32_t>{9250, 67500}); + + result = SimpleFullyConnectedTestImpl<uint8_t>( + workloadFactory, + inputTensorInfo, outputTensorInfo, + weightsDesc, biasesDesc, + weights, bias, input, + biasEnabled, true + ); + + // manually calculated + // note one of these values has been clamped to 0 + if (biasEnabled) + { + result.outputExpected = MakeTensor<uint8_t, 2>(outputTensorInfo, std::vector<uint8_t>{0, 242}); + } + else + { + result.outputExpected = MakeTensor<uint8_t, 2>(outputTensorInfo, std::vector<uint8_t>{0, 32}); + } + + return result; +} + + + +// +// ArmNN variant of the AndroidNN fully_connected_float_large test. +// +// Tests the fully connected layer with large values, optionally transposing weights. +// Note this is templated for consistency, but the nature of this tests makes it unlikely to be useful in Uint8 mode. +// +template<typename T> +LayerTestResult<T, 2> FullyConnectedLargeTestCommon(armnn::IWorkloadFactory& workloadFactory, + bool transposeWeights, + float qScale = 0.0f, + int32_t qOffset = 0) +{ + unsigned int inputWidth = 1; + unsigned int inputHeight = 1; + unsigned int inputChannels = 5; + unsigned int inputNum = 1; + + unsigned int outputChannels = 1; + unsigned int outputNum = 1; + + // Define the tensor descriptors + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo weightsDesc; + armnn::TensorInfo biasesDesc; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels }; + unsigned int weightsShape[] = { inputChannels, outputChannels }; + if (transposeWeights) + { + std::swap(weightsShape[0], weightsShape[1]); + } + + unsigned int biasShape[] = { outputChannels }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>()); + outputTensorInfo = armnn::TensorInfo(2, outputShape, armnn::GetDataType<T>()); + weightsDesc = armnn::TensorInfo(2, weightsShape, armnn::GetDataType<T>()); + biasesDesc = armnn::TensorInfo(1, biasShape, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + LayerTestResult<T, 2> result(outputTensorInfo); + + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 10.0f, 100.0f, 1000.0f, 10000.0f, + }) + ); + + boost::multi_array<T, 2> weights = MakeTensor<T, 2>(weightsDesc, + QuantizedVector<T>(qScale, qOffset, { + 2.0f, 3.0f, 4.0f, 5.0f, 6.0f + }) + ); + + std::vector<T> biasValues({900000.f}); + boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasesDesc, biasValues); + + result = SimpleFullyConnectedTestImpl<T>( + workloadFactory, + inputTensorInfo, outputTensorInfo, + weightsDesc, biasesDesc, + weights, bias, input, + true, transposeWeights + ); + + result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 965432.0f, + }) + ); + + return result; +} diff --git a/src/armnn/backends/test/IsLayerSupportedTest.cpp b/src/armnn/backends/test/IsLayerSupportedTest.cpp new file mode 100644 index 0000000000..4b4c9f6099 --- /dev/null +++ b/src/armnn/backends/test/IsLayerSupportedTest.cpp @@ -0,0 +1,70 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include "test/TensorHelpers.hpp" +#include "LayerTests.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/RefWorkloadFactory.hpp" +#include <Layers.hpp> + +#include <string> +#include <iostream> +#include <backends/ClWorkloadFactory.hpp> +#include <backends/NeonWorkloadFactory.hpp> + +#include "IsLayerSupportedTestImpl.hpp" + + +BOOST_AUTO_TEST_SUITE(IsLayerSupported) + +BOOST_AUTO_TEST_CASE(IsLayerSupportedLayerTypeMatches) +{ + LayerTypeMatchesTest(); +} + +BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat32Reference) +{ + armnn::RefWorkloadFactory factory; + IsLayerSupportedTests<armnn::RefWorkloadFactory, armnn::DataType::Float32>(&factory); +} + +BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Reference) +{ + armnn::RefWorkloadFactory factory; + IsLayerSupportedTests<armnn::RefWorkloadFactory, armnn::DataType::QuantisedAsymm8>(&factory); +} + +#ifdef ARMCOMPUTENEON_ENABLED +BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat32Neon) +{ + armnn::NeonWorkloadFactory factory; + IsLayerSupportedTests<armnn::NeonWorkloadFactory, armnn::DataType::Float32>(&factory); +} + +BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Neon) +{ + armnn::NeonWorkloadFactory factory; + IsLayerSupportedTests<armnn::NeonWorkloadFactory, armnn::DataType::QuantisedAsymm8>(&factory); +} +#endif //#ifdef ARMCOMPUTENEON_ENABLED + + +#ifdef ARMCOMPUTECL_ENABLED +BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat32Cl) +{ + armnn::ClWorkloadFactory factory; + IsLayerSupportedTests<armnn::ClWorkloadFactory, armnn::DataType::Float32>(&factory); +} + +BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Cl) +{ + armnn::ClWorkloadFactory factory; + IsLayerSupportedTests<armnn::ClWorkloadFactory, armnn::DataType::QuantisedAsymm8>(&factory); +} +#endif //#ifdef ARMCOMPUTECL_ENABLED + +BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file diff --git a/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp b/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp new file mode 100644 index 0000000000..abc9806737 --- /dev/null +++ b/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp @@ -0,0 +1,440 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "Graph.hpp" + +#include <boost/core/ignore_unused.hpp> + +namespace +{ +armnn::Graph dummyGraph; + +// Make a dummy TensorInfo object +template<armnn::DataType DataType> +armnn::TensorInfo MakeDummyTensorInfo() +{ + return armnn::TensorInfo({2,2,2,2}, DataType); +} + + +// Make a dummy WorkloadInfo using a dummy TensorInfo. +template<armnn::DataType DataType> +armnn::WorkloadInfo MakeDummyWorkloadInfo(unsigned int numInputs, unsigned int numOutputs) +{ + armnn::WorkloadInfo info; + for (unsigned int i=0; i < numInputs; i++) + { + info.m_InputTensorInfos.push_back(MakeDummyTensorInfo<DataType>()); + } + for (unsigned int o=0; o < numOutputs; o++) + { + info.m_OutputTensorInfos.push_back(MakeDummyTensorInfo<DataType>()); + } + return info; +} + +// template class to create a dummy layer (2 parameters) +template<typename LayerType, typename DescType = typename LayerType::DescriptorType> +struct DummyLayer +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer<LayerType>(DescType(), ""); + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + LayerType* m_Layer; +}; + +// template class to create a dummy layer (1 parameter) +template<typename LayerType> +struct DummyLayer<LayerType, void> +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer<LayerType>(""); + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + LayerType* m_Layer; +}; + +template<> +struct DummyLayer<armnn::ConstantLayer, void> +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer<armnn::ConstantLayer>(std::shared_ptr<armnn::ScopedCpuTensorHandle>(), ""); + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::ConstantLayer* m_Layer; +}; + +template<> +struct DummyLayer<armnn::InputLayer, armnn::LayerBindingId> +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer<armnn::InputLayer>(armnn::LayerBindingId(), ""); + + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::InputLayer* m_Layer; +}; + +template<> +struct DummyLayer<armnn::MergerLayer> +{ + DummyLayer() + { + armnn::OriginsDescriptor desc(2); + m_Layer = dummyGraph.AddLayer<armnn::MergerLayer>(desc, ""); + + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::MergerLayer* m_Layer; +}; + +template<> +struct DummyLayer<armnn::OutputLayer, armnn::LayerBindingId> +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer<armnn::OutputLayer>(armnn::LayerBindingId(), ""); + + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::OutputLayer* m_Layer; +}; + +template<> +struct DummyLayer<armnn::SplitterLayer> +{ + DummyLayer() + { + armnn::ViewsDescriptor desc(1); + m_Layer = dummyGraph.AddLayer<armnn::SplitterLayer>(desc, ""); + + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::SplitterLayer* m_Layer; +}; + +template <typename ConvolutionLayerType> +struct DummyConvolutionLayer +{ + DummyConvolutionLayer() + { + typename ConvolutionLayerType::DescriptorType desc; + m_Layer = dummyGraph.AddLayer<ConvolutionLayerType>(desc, ""); + m_Layer->m_Weight = std::make_unique<armnn::ScopedCpuTensorHandle>( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_Bias = std::make_unique<armnn::ScopedCpuTensorHandle>( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + } + ~DummyConvolutionLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + ConvolutionLayerType* m_Layer; +}; + +template<> +struct DummyLayer<armnn::Convolution2dLayer> + : public DummyConvolutionLayer<armnn::Convolution2dLayer> +{ +}; + +template<> +struct DummyLayer<armnn::DepthwiseConvolution2dLayer> + : public DummyConvolutionLayer<armnn::DepthwiseConvolution2dLayer> +{ +}; + +// Tag for giving LayerType entries a unique strong type each. +template<armnn::LayerType> +struct Tag{}; + +#define DECLARE_LAYER_POLICY_CUSTOM_PARAM(name, descType) \ +template<armnn::DataType DataType> \ +struct LayerTypePolicy<armnn::LayerType::name, DataType> \ +{ \ + using Type = armnn::name##Layer; \ + using Desc = descType; \ + using QueueDesc = armnn::name##QueueDescriptor; \ + constexpr static const char* NameStr = #name; \ + \ + static std::unique_ptr<armnn::IWorkload> MakeDummyWorkload(armnn::IWorkloadFactory *factory, \ + unsigned int nIn, unsigned int nOut) \ + { \ + QueueDesc desc; \ + armnn::WorkloadInfo info = MakeDummyWorkloadInfo<DataType>(nIn, nOut); \ + return factory->Create##name(desc, info); \ + } \ +}; + +// define a layer policy specialization for use with the IsLayerSupported tests. +// Use this version for layers whose constructor takes 1 parameter(name). +#define DECLARE_LAYER_POLICY_1_PARAM(name) DECLARE_LAYER_POLICY_CUSTOM_PARAM(name, void) + +// define a layer policy specialization for use with the IsLayerSupported tests. +// Use this version for layers whose constructor takes 2 parameters(descriptor and name). +#define DECLARE_LAYER_POLICY_2_PARAM(name) DECLARE_LAYER_POLICY_CUSTOM_PARAM(name, armnn::name##Descriptor) + +// Layer policy template +template<armnn::LayerType Type, armnn::DataType DataType> +struct LayerTypePolicy; + +// Every entry in the armnn::LayerType enum must be accounted for below. +DECLARE_LAYER_POLICY_2_PARAM(Activation) + +DECLARE_LAYER_POLICY_1_PARAM(Addition) + +DECLARE_LAYER_POLICY_2_PARAM(BatchNormalization) + +DECLARE_LAYER_POLICY_1_PARAM(Constant) + +DECLARE_LAYER_POLICY_2_PARAM(Convolution2d) + +DECLARE_LAYER_POLICY_1_PARAM(MemCopy) + +DECLARE_LAYER_POLICY_2_PARAM(DepthwiseConvolution2d) + +DECLARE_LAYER_POLICY_2_PARAM(FakeQuantization) + +DECLARE_LAYER_POLICY_1_PARAM(Floor) + +DECLARE_LAYER_POLICY_2_PARAM(FullyConnected) + +DECLARE_LAYER_POLICY_CUSTOM_PARAM(Input, armnn::LayerBindingId) + +DECLARE_LAYER_POLICY_1_PARAM(L2Normalization) + +DECLARE_LAYER_POLICY_2_PARAM(Merger) + +DECLARE_LAYER_POLICY_1_PARAM(Multiplication) + +DECLARE_LAYER_POLICY_2_PARAM(Normalization) + +DECLARE_LAYER_POLICY_CUSTOM_PARAM(Output, armnn::LayerBindingId) + +DECLARE_LAYER_POLICY_2_PARAM(Permute) + +DECLARE_LAYER_POLICY_2_PARAM(Pooling2d) + +DECLARE_LAYER_POLICY_2_PARAM(ResizeBilinear) + +DECLARE_LAYER_POLICY_2_PARAM(Softmax) + +DECLARE_LAYER_POLICY_2_PARAM(Splitter) + +DECLARE_LAYER_POLICY_2_PARAM(Reshape) + + +// Generic implementation to get the number of input slots for a given layer type; +template<armnn::LayerType Type> +unsigned int GetNumInputs(const armnn::Layer& layer) +{ + return layer.GetNumInputSlots(); +} + +// Generic implementation to get the number of output slots for a given layer type; +template<armnn::LayerType Type> +unsigned int GetNumOutputs(const armnn::Layer& layer) +{ + return layer.GetNumOutputSlots(); +} + +template<> +unsigned int GetNumInputs<armnn::LayerType::Merger>(const armnn::Layer& layer) +{ + boost::ignore_unused(layer); + return 2; +} + +// Test that the IsLayerSupported() function returns the correct value. +// We determine the correct value by *trying* to create the relevant workload and seeing if it matches what we expect. +// Returns true if expectations are met, otherwise returns false. +template<typename FactoryType, armnn::DataType DataType, armnn::LayerType Type> +bool IsLayerSupportedTest(FactoryType *factory, Tag<Type>) +{ + using LayerPolicy = LayerTypePolicy<Type, DataType>; + using LayerType = typename LayerPolicy::Type; + using LayerDesc = typename LayerPolicy::Desc; + DummyLayer<LayerType, LayerDesc> layer; + + unsigned int numIn = GetNumInputs<Type>(*layer.m_Layer); + unsigned int numOut = GetNumOutputs<Type>(*layer.m_Layer); + + // Make another dummy layer just to make IsLayerSupported have valid inputs + DummyLayer<armnn::ConstantLayer, void> previousLayer; + // Set output of previous layer to a dummy tensor + armnn::TensorInfo output = MakeDummyTensorInfo<DataType>(); + previousLayer.m_Layer->GetOutputSlot(0).SetTensorInfo(output); + // Connect all outputs of previous layer to inputs of tested layer + for (unsigned int i = 0; i < numIn; i++) + { + armnn::IOutputSlot& previousLayerOutputSlot = previousLayer.m_Layer->GetOutputSlot(0); + armnn::IInputSlot& layerInputSlot = layer.m_Layer->GetInputSlot(i); + previousLayerOutputSlot.Connect(layerInputSlot); + } + // Set outputs of tested layer to a dummy tensor + for (unsigned int i = 0; i < numOut; i++) + { + layer.m_Layer->GetOutputSlot(0).SetTensorInfo(output); + } + + std::string layerName = LayerPolicy::NameStr; + std::string reasonIfUnsupported; + if (FactoryType::IsLayerSupported(*layer.m_Layer, DataType, reasonIfUnsupported)) + { + std::string errorMsg = " layer expected support but found none."; + try + { + bool retVal = LayerPolicy::MakeDummyWorkload(factory, numIn, numOut).get() != nullptr; + BOOST_CHECK_MESSAGE(retVal, layerName << errorMsg); + return retVal; + } + catch (const armnn::InvalidArgumentException& e) + { + boost::ignore_unused(e); + // This is ok since we throw InvalidArgumentException when creating the dummy workload. + return true; + } + catch(const std::exception& e) + { + errorMsg = e.what(); + BOOST_TEST_ERROR(layerName << ": " << errorMsg); + return false; + } + catch (...) + { + errorMsg = "Unexpected error while testing support for "; + BOOST_TEST_ERROR(errorMsg << layerName); + return false; + } + } + else + { + std::string errorMsg = "layer expected no support (giving reason: " + reasonIfUnsupported + ") but found some."; + try + { + bool retVal = LayerPolicy::MakeDummyWorkload(factory, numIn, numOut).get() == nullptr; + BOOST_CHECK_MESSAGE(retVal, layerName << errorMsg); + return retVal; + } + // These two exceptions are ok: For workloads that are partially supported, attempting to instantiate them + // using parameters that make IsLayerSupported() return false should throw an + // InvalidArgumentException or UnimplementedException + catch(const armnn::InvalidArgumentException& e) + { + boost::ignore_unused(e); + return true; + } + catch (const armnn::UnimplementedException& e) + { + boost::ignore_unused(e); + return true; + } + catch(const std::exception& e) + { + errorMsg = e.what(); + BOOST_TEST_ERROR(layerName << ": " << errorMsg); + return false; + } + catch (...) + { + errorMsg = "Unexpected error while testing support for "; + BOOST_TEST_ERROR(errorMsg << layerName); + return false; + } + } +} + +// Helper function to compute the next type in the LayerType enum +constexpr armnn::LayerType NextType(armnn::LayerType type) +{ + return static_cast<armnn::LayerType>(static_cast<int>(type)+1); +} + +// Termination function for determining the end of the LayerType enumeration +template<typename FactoryType, armnn::DataType DataType, armnn::LayerType Type> +bool IsLayerSupportedTestsImpl(FactoryType *factory, Tag<armnn::LayerType::LastLayer>) +{ + return IsLayerSupportedTest<FactoryType, DataType, Type>(factory, Tag<Type>()); +}; + +// Recursive function to test and entry in the LayerType enum and then iterate on the next entry. +template<typename FactoryType, armnn::DataType DataType, armnn::LayerType Type> +bool IsLayerSupportedTestsImpl(FactoryType *factory, Tag<Type>) +{ + bool v = IsLayerSupportedTest<FactoryType, DataType, Type>(factory, Tag<Type>()); + + return v && + IsLayerSupportedTestsImpl<FactoryType, DataType, NextType(Type)> + (factory, Tag<NextType(Type)>()); +}; + +// Helper function to pass through to the test framework. +template<typename FactoryType, armnn::DataType DataType> +bool IsLayerSupportedTests(FactoryType *factory) +{ + return IsLayerSupportedTestsImpl<FactoryType, DataType>(factory, Tag<armnn::LayerType::FirstLayer>()); +}; + +template<armnn::LayerType Type> +bool TestLayerTypeMatches() +{ + using LayerPolicy = LayerTypePolicy<Type, armnn::DataType::Float32>; + using LayerType = typename LayerPolicy::Type; + using LayerDesc = typename LayerPolicy::Desc; + DummyLayer<LayerType, LayerDesc> layer; + + std::stringstream ss; + ss << LayerPolicy::NameStr << " layer type mismatches expected layer type value."; + bool v = Type == layer.m_Layer->GetType(); + BOOST_CHECK_MESSAGE(v, ss.str()); + return v; +}; + +template<armnn::LayerType Type> +bool LayerTypeMatchesTestImpl(Tag<armnn::LayerType::LastLayer>) +{ + return TestLayerTypeMatches<Type>(); +}; + +template<armnn::LayerType Type> +bool LayerTypeMatchesTestImpl(Tag<Type>) +{ + return TestLayerTypeMatches<Type>() && + LayerTypeMatchesTestImpl<NextType(Type)>(Tag<NextType(Type)>()); +}; + +bool LayerTypeMatchesTest() +{ + return LayerTypeMatchesTestImpl<armnn::LayerType::FirstLayer>(Tag<armnn::LayerType::FirstLayer>()); +}; + +} //namespace diff --git a/src/armnn/backends/test/LayerTests.cpp b/src/armnn/backends/test/LayerTests.cpp new file mode 100644 index 0000000000..76681f9a93 --- /dev/null +++ b/src/armnn/backends/test/LayerTests.cpp @@ -0,0 +1,3884 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include "LayerTests.hpp" + +#include "test/TensorHelpers.hpp" +#include "TensorCopyUtils.hpp" + +#include <boost/test/unit_test.hpp> + +#include "armnn/LayerSupport.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include "backends/ClTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#endif + +#include <algorithm> +#include <boost/cast.hpp> + +#include "WorkloadTestUtils.hpp" +#include "Conv2dTestImpl.hpp" +#include "BatchNormTestImpl.hpp" +#include "ActivationTestImpl.hpp" +#include "Pooling2dTestImpl.hpp" +#include "ReshapeTestImpl.hpp" +#include "FullyConnectedTestImpl.hpp" +#include "SplitterTestImpl.hpp" +#include "SoftmaxTestImpl.hpp" +#include "NormTestImpl.hpp" +#include "PermuteTestImpl.hpp" + +// 3-channel 16x8 image used as common input data for a number of Conv2d tests +static std::vector<float> ConvInput3x8x16({ + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 +}); + +// 2-channel bias used by a number of Conv2d tests +static std::vector<float> Bias2({0, 2}); + +// Helper function that returns either Bias2 or an empty vector depending on whether bias is enabled +template<typename T> +boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale, int32_t qOffset) +{ + if(biasEnabled) + { + armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, armnn::GetDataType<T>()); + boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, qOffset, Bias2)); + return bias; + } + else + { + return boost::multi_array<T, 1>(); + } +} + +template<typename T> +LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + // Use common single-batch 3-channel 16x8 image + armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16)); + + // Use a 2-element batch with 3-channel 3x5 kernels + armnn::TensorInfo kernelDesc({2, 3, 5, 3}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 1, 1, 1, + 1, -1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0 + }))); + + // Expected output is 2 batch elements of a 1-channel 14x4 image + armnn::TensorInfo outputDesc({1, 2, 4, 14}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, + -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, + -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, + -23.5f, -23.5f, -23.5f, + -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, + -23.5f, -23.5f, -23.5f, + + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }))); + + return SimpleConvolution2dTestImpl<T>(workloadFactory, + input, + kernel, + GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset), + expectedOutput, + qScale, + qOffset); +} + +template<typename T> +LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path + + // Use common single-batch 3-channel 16x8 image + armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16)); + + // Use a 2-element batch of 3-channel 3x3 kernels + armnn::TensorInfo kernelDesc({2, 3, 3, 3}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 1, 1, 1, + 1, -1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0 + }))); + + // Expected output is 1 batch of a 2-channel 14x6 image + armnn::TensorInfo outputDesc({1, 2, 6, 14}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, + -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }))); + + return SimpleConvolution2dTestImpl<T>(workloadFactory, + input, + kernel, + GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset), + expectedOutput, + qScale, + qOffset); +} + +LayerTestResult<float, 4> SimpleConvolution2d3x5Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return SimpleConvolution2d3x5TestCommon<float>(workloadFactory, 0.f, 0, biasEnabled); +} + +LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return SimpleConvolution2d3x5TestCommon<uint8_t>(workloadFactory, 0.5f, 50, biasEnabled); +} + +LayerTestResult<float, 4> SimpleConvolution2d3x3Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return SimpleConvolution2d3x3TestCommon<float>(workloadFactory, 0.f, 0, biasEnabled); +} + +LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return SimpleConvolution2d3x3TestCommon<uint8_t>(workloadFactory, 0.5f, 50, biasEnabled); +} + +template<typename T> +LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon( + armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + // Use a single-batch 1-channel 3x3 image as input + armnn::TensorInfo inputDesc({1, 1, 3, 3}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 11,21,31, + 12,22,32, + 13,23,33 + }))); + + // Use 1 batch of a 1-channel 2x2 kernel + armnn::TensorInfo kernelDesc({1, 1, 2, 2}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + -11,-21, + -12,-22, + }))); + +// Expected output is 1 batch of a 1-channel 6x8 image +// Manually calculated like this: +//[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..] +//[-11*0 -21*0 -12*0 -22*11 ; -11*0 -21*0 -12*11 -22*21 ; -11*0 -21*0 -12*21 -22*31 ; -11*0 -21*0 -12*31 -22*0 ..] +//[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..] +//[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..] +//[-11*0 -21*13 -12*0 -22*0 ; -11*13 -21*23 -12*0 -22*0 ; -11*23 -21*33 -12*0 -22*0 ; -11*33 -21*0 -12*0 -22*0 ..] +//[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..] +//[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..] + armnn::TensorInfo outputDesc({1, 1, 8, 6}, armnn::GetDataType<T>()); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 0, 0, 0, 0, 0, 0, + -242, -594, -934, -372, 0, 0, + -495, -1190, -1850, -725, 0, 0, + -538, -1256, -1916, -748, 0, 0, + -273, -626, -946, -363, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 + }))); + + return SimpleConvolution2dTestImpl<T>(workloadFactory, + input, + kernel, + GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(false, qScale, qOffset), + expectedOutput, + qScale, + qOffset, + 1, // padding left + 2, // padding top + 3, // padding right + 4); // padding bottom +} + +template<typename T> +LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + // Use a single-batch 1-channel 5x5 image as input + armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, armnn::GetDataType<T>()); + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 11,21,31,41,51, + 12,22,32,42,52, + 13,23,33,43,53, + 14,24,34,44,54, + 15,25,35,45,55, + }))); + + // Use 1 batch of a 1-channel 4x4 kernel + armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + -11,-21,-31,-41, + -12,-22,-32,-42, + -13,-23,-33,-43, + -14,-24,-34,-44, + }))); + + // Expected output is 1 batch of a 1-channel 5x5 image + armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, armnn::GetDataType<T>()); + std::vector<T> myVec(outputDesc.GetNumElements(), 0); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + -4723, -7044, -9324, -6253, -3542, + -7140, -10580, -13940, -9300, -5230, + -9590, -14120, -18520, -12290, -6860, + -9980, -14560, -18960, -12560, -7000, + -7518, -10904, -14144, -9318, -5152, + }))); + + return SimpleConvolution2dTestImpl<T>(workloadFactory, + input, + kernel, + GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(false, qScale, qOffset), + expectedOutput, + qScale, + qOffset, + 1, // padding left + 2, // padding top + 2, // padding right + 1); // padding bottom +} + +LayerTestResult<float, 4> +Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleConvolution2dAsymmetricPaddingTestCommon<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<float, 4> DepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return DepthwiseConvolution2dTestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled); +} + +LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return DepthwiseConvolution2dDepthMul1TestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled); +} + +LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return DepthwiseConvolution2dTestImpl<uint8_t, int32_t>(workloadFactory, 0.5f, 50, biasEnabled); +} + +LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return DepthwiseConvolution2dDepthMul1TestImpl<uint8_t, int32_t>(workloadFactory, 0.5f, 50, biasEnabled); +} + +LayerTestResult<float, 4> Convolution1dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled) +{ + return Convolution1dTestImpl<float>(workloadFactory, 0.0f, 0, biasEnabled); +} + +LayerTestResult<uint8_t, 4> Convolution1dUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled) +{ + return Convolution1dTestImpl<uint8_t>(workloadFactory, 0.1f, 128, biasEnabled); +} + +LayerTestResult<float,4> CompareConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + return CompareConvolution2dTestImpl<float>(workloadFactory, refWorkloadFactory); +} + +template<typename T> +LayerTestResult<T,4> CompareDepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + return CompareDepthwiseConvolution2dTestImpl<T>(workloadFactory, refWorkloadFactory); +} + +template LayerTestResult<float, 4> CompareDepthwiseConvolution2dTest<float>( + armnn::IWorkloadFactory&, armnn::IWorkloadFactory&); +template LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dTest<uint8_t>( + armnn::IWorkloadFactory&, armnn::IWorkloadFactory&); + +LayerTestResult<float,4> SimpleNormalizationAcrossTest(armnn::IWorkloadFactory& workloadFactory) +{ + auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness; + auto normChannel = armnn::NormalizationAlgorithmChannel::Across; + return SimpleNormalizationTestImpl(workloadFactory, normChannel, normMethod); +} + +LayerTestResult<float,4> SimpleNormalizationWithinTest(armnn::IWorkloadFactory& workloadFactory) +{ + auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness; + auto normChannel = armnn::NormalizationAlgorithmChannel::Within; + return SimpleNormalizationTestImpl(workloadFactory, normChannel, normMethod); +} + +LayerTestResult<float,2> SimpleSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, float beta) +{ + return SimpleSoftmaxTestImpl<float>(workloadFactory, beta); +} + +LayerTestResult<uint8_t,2> SimpleSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, float beta) +{ + return SimpleSoftmaxTestImpl<uint8_t>(workloadFactory, beta); +} + +LayerTestResult<float,4> CompareNormalizationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::NormalizationAlgorithmChannel normChannel, + armnn::NormalizationAlgorithmMethod normMethod) +{ + return CompareNormalizationTestImpl(workloadFactory, refWorkloadFactory, normChannel, normMethod); +} + +LayerTestResult<float,2> CompareSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + float beta) +{ + return CompareSoftmaxTestImpl<float>(workloadFactory, refWorkloadFactory, beta); +} + +LayerTestResult<uint8_t,2> CompareSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + float beta) +{ + return CompareSoftmaxTestImpl<uint8_t>(workloadFactory, refWorkloadFactory, beta); +} + +std::vector<LayerTestResult<float,3>> SplitterTest(armnn::IWorkloadFactory& workloadFactory) +{ + return SplitterTestCommon<float>(workloadFactory); +} + +std::vector<LayerTestResult<uint8_t,3>> SplitterUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SplitterTestCommon<uint8_t>(workloadFactory, 1.0f, 0); +} + +LayerTestResult<float, 3> CopyViaSplitterTest(armnn::IWorkloadFactory& workloadFactory) +{ + return CopyViaSplitterTestImpl<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return CopyViaSplitterTestImpl<uint8_t>(workloadFactory, 1.0f, 0); +} + +LayerTestResult<float,3> MergerTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int outputWidth = 5; + unsigned int outputHeight = 6; + unsigned int outputChannels = 3; + + unsigned int inputWidth1 = 2; + unsigned int inputHeight1 = 2; + unsigned int inputChannels1 = 3; + + unsigned int inputWidth2 = 2; + unsigned int inputHeight2 = 4; + unsigned int inputChannels2 = 3; + + unsigned int inputWidth3 = 3; + unsigned int inputHeight3 = 6; + unsigned int inputChannels3 = 2; + + unsigned int inputWidth4 = 3; + unsigned int inputHeight4 = 6; + unsigned int inputChannels4 = 1; + + // Define the tensor descriptors + armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::Float32); + armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::Float32); + armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::Float32); + armnn::TensorInfo inputTensorInfo3({ inputChannels3, inputHeight3, inputWidth3 }, armnn::DataType::Float32); + armnn::TensorInfo inputTensorInfo4({ inputChannels4, inputHeight4, inputWidth4 }, armnn::DataType::Float32); + + LayerTestResult<float,3> ret(outputTensorInfo); + + + ret.outputExpected = MakeTensor<float, 3>(outputTensorInfo, std::vector<float>( + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, + 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, + 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + + 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, + 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, + 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, + 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, + + 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, + 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, + 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, + 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, + 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, + 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, + + }) + ); + + + auto input1 = MakeTensor<float, 3>(inputTensorInfo1, std::vector<float>( + { + 1.0f, 2.0f, + 6.0f, 7.0f, + + 31.0f, 32.0f, + 36.0f, 37.0f, + + 61.0f, 62.0f, + 66.0f, 67.0f, + }) + ); + + auto input2 = MakeTensor<float, 3>(inputTensorInfo2, std::vector<float>( + { + 11.0f, 12.0f, + 16.0f, 17.0f, + 21.0f, 22.0f, + 26.0f, 27.0f, + + 41.0f, 42.0f, + 46.0f, 47.0f, + 51.0f, 52.0f, + 56.0f, 57.0f, + + 71.0f, 72.0f, + 76.0f, 77.0f, + 81.0f, 82.0f, + 86.0f, 87.0f, + }) + ); + + auto input3 = MakeTensor<float, 3>(inputTensorInfo3, std::vector<float>( + { + 3.0f, 4.0f, 5.0f, + 8.0f, 9.0f, 10.0f, + 13.0f, 14.0f, 15.0f, + 18.0f, 19.0f, 20.0f, + 23.0f, 24.0f, 25.0f, + 28.0f, 29.0f, 30.0f, + + 33.0f, 34.0f, 35.0f, + 38.0f, 39.0f, 40.0f, + 43.0f, 44.0f, 45.0f, + 48.0f, 49.0f, 50.0f, + 53.0f, 54.0f, 55.0f, + 58.0f, 59.0f, 60.0f, + }) + ); + + + auto input4 = MakeTensor<float, 3>(inputTensorInfo4, std::vector<float>( + { + 63.0f, 64.0f, 65.0f, + 68.0f, 69.0f, 70.0f, + 73.0f, 74.0f, 75.0f, + 78.0f, 79.0f, 80.0f, + 83.0f, 84.0f, 85.0f, + 88.0f, 89.0f, 90.0f, + }) + ); + + std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //extent of the window is defined by size of input[0] + armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1); + + std::vector<unsigned int> wOrigin2 = {0, 2, 0}; //extent of the window is defined by size of input[1] + armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2); + + std::vector<unsigned int> wOrigin3 = {0, 0, 2}; //extent of the window is defined by size of input[2] + armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3); + + std::vector<unsigned int> wOrigin4 = {2, 0, 2}; //extent of the window is defined by size of input[3] + armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4); + + + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + bool subTensorsSupported = workloadFactory.SupportsSubTensors(); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo1); + + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo2); + + std::unique_ptr<armnn::ITensorHandle> inputHandle3 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo3.GetShape(), wOrigin3.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo3); + + std::unique_ptr<armnn::ITensorHandle> inputHandle4 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo4.GetShape(), wOrigin4.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo4); + + + armnn::MergerQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddInputToWorkload(data, info, inputTensorInfo3, inputHandle3.get()); + AddInputToWorkload(data, info, inputTensorInfo4, inputHandle4.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_ViewOrigins.push_back(window1); + data.m_ViewOrigins.push_back(window2); + data.m_ViewOrigins.push_back(window3); + data.m_ViewOrigins.push_back(window4); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + inputHandle3->Allocate(); + inputHandle4->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]); + CopyDataToITensorHandle(inputHandle3.get(), &input3[0][0][0]); + CopyDataToITensorHandle(inputHandle4.get(), &input4[0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult<float,4> AdditionTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int batchSize = 2; + unsigned int channels = 2; + unsigned int height = 2; + unsigned int width = 3; + + armnn::TensorInfo inputTensorInfo1, inputTensorInfo2; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape[] = {batchSize, channels, height, width}; + + inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + + + auto input1 = MakeTensor<float, 4>(inputTensorInfo1, std::vector<float>( + { + 0.0f, 2.0f, 1.0f, + 0.2f, 1.0f, 2.0f, + + 1.0f, 2.0f, 1.0f, + 0.2f, 1.0f, 2.0f, + + 0.0f, 2.0f, 1.0f, + 4.2f, 1.0f, 2.0f, + + 0.0f, 0.0f, 1.0f, + 0.2f, 1.0f, 2.0f, + })); + + auto input2 = MakeTensor<float, 4>(inputTensorInfo2, std::vector<float>( + { + 1.0f, 2.0f, 1.0f, + 0.0f, 1.0f, 2.0f, + + 1.0f, 2.0f, -2.0f, + 0.2f, 1.0f, 2.0f, + + 0.0f, 2.0f, 1.0f, + 4.2f, 0.0f, -3.0f, + + 0.0f, 0.0f, 1.0f, + 0.7f, 1.0f, 5.0f, + })); + + LayerTestResult<float,4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>( + { + 1.0f, 4.0f, 2.0f, + 0.2f, 2.0f, 4.0f, + + 2.0f, 4.0f, -1.0f, + 0.4f, 2.0f, 4.0f, + + 0.0f, 4.0f, 2.0f, + 8.4f, 1.0f, -1.0f, + + 0.0f, 0.0f, 2.0f, + 0.9f, 2.0f, 7.0f, + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::AdditionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template <typename T> +LayerTestResult<T, 4> AdditionBroadcastTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 1}, armnn::GetDataType<T>()); + armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 2, 3}, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>()); + + if (armnn::IsQuantizedType<T>()) + { + inputTensorInfo1.SetQuantizationScale(qScale); + inputTensorInfo1.SetQuantizationOffset(qOffset); + inputTensorInfo2.SetQuantizationScale(qScale); + inputTensorInfo2.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, + { + 0.0f, + 1.0f, + + 2.0f, + 3.0f, + + 4.0f, + 5.0f, + })); + + auto input2 = MakeTensor<T, 4>(inputTensorInfo2, QuantizedVector<T>(qScale, qOffset, + { + 0.5f, 1.5f, 2.5f, + 3.5f, 4.5f, 5.5f, + })); + + LayerTestResult<T,4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, + { + 0.5f, 1.5f, 2.5f, + 4.5f, 5.5f, 6.5f, + + 2.5f, 3.5f, 4.5f, + 6.5f, 7.5f, 8.5f, + + 4.5f, 5.5f, 6.5f, + 8.5f, 9.5f, 10.5f, + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::AdditionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template <typename T> +LayerTestResult<T, 4> AdditionBroadcast1ElementTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>()); + armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 1, 1}, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>()); + + if (armnn::IsQuantizedType<T>()) + { + inputTensorInfo1.SetQuantizationScale(qScale); + inputTensorInfo1.SetQuantizationOffset(qOffset); + inputTensorInfo2.SetQuantizationScale(qScale); + inputTensorInfo2.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, + { + 0.0f, 1.0f, 2.0f, + 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, + 9.0f, 10.0f, 11.0f, + 12.0f, 13.0f, 14.0f, + 15.0f, 16.0f, 17.0f, + })); + + auto input2 = MakeTensor<T, 4>(inputTensorInfo2, QuantizedVector<T>(qScale, qOffset, + { + 0.5f, + })); + + LayerTestResult<T,4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, + { + 0.5f, 1.5f, 2.5f, + 3.5f, 4.5f, 5.5f, + 6.5f, 7.5f, 8.5f, + 9.5f, 10.5f, 11.5f, + 12.5f, 13.5f, 14.5f, + 15.5f, 16.5f, 17.5f, + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::AdditionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult<float, 4> AdditionBroadcastTest(armnn::IWorkloadFactory& workloadFactory) +{ + return AdditionBroadcastTestImpl<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<uint8_t, 4> AdditionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return AdditionBroadcastTestImpl<uint8_t>(workloadFactory, 2.f, 0); +} + +LayerTestResult<float, 4> AdditionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory) +{ + return AdditionBroadcast1ElementTestImpl<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<uint8_t, 4> AdditionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return AdditionBroadcast1ElementTestImpl<uint8_t>(workloadFactory, 0.1333333f, 128); +} + +LayerTestResult<float,4> CompareAdditionTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + unsigned int batchSize = 4; + unsigned int channels = 1; + unsigned int height = 2; + unsigned int width = 3; + + armnn::TensorInfo inputTensorInfo1, inputTensorInfo2; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape[] = {batchSize, channels, height, width}; + + inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + + auto input1 = MakeRandomTensor<float, 4>(inputTensorInfo1, 1232); + auto input2 = MakeRandomTensor<float, 4>(inputTensorInfo2, 456); + + LayerTestResult<float,4> ret(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> inputHandle2Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::AdditionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + armnn::AdditionQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo1, inputHandle1Ref.get()); + SetWorkloadInput(refData, refInfo, 1, inputTensorInfo2, inputHandle2Ref.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateAddition(refData, refInfo); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + inputHandle1Ref->Allocate(); + inputHandle2Ref->Allocate(); + outputHandleRef->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2Ref.get(), &input2[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + +LayerTestResult<float,4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int width = 2; + const unsigned int height = 2; + const unsigned int channelCount = 2; + const unsigned int batchSize = 2; + + armnn::TensorInfo inputTensorInfo0; + armnn::TensorInfo inputTensorInfo1; + armnn::TensorInfo outputTensorInfo; + + constexpr unsigned int shape[] = { batchSize, channelCount, height, width }; + constexpr std::size_t dimensionCount = std::extent<decltype(shape)>::value; + + inputTensorInfo0 = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32); + inputTensorInfo1 = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32); + + auto input0 = MakeTensor<float, 4>(inputTensorInfo0, std::vector<float>({ + 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4 })); + + auto input1 = MakeTensor<float, 4>(inputTensorInfo1, std::vector<float>({ + 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5 })); + + LayerTestResult<float,4> ret(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::MultiplicationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info); + + inputHandle0->Allocate(); + inputHandle1->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({ + 2, 2, 2, 2, 6, 6, 6, 6, + 12, 12, 12, 12, 20, 20, 20, 20 })); + + return ret; +} + +LayerTestResult<float,4> CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + const unsigned int width = 16; + const unsigned int height = 32; + const unsigned int channelCount = 2; + const unsigned int batchSize = 5; + + armnn::TensorInfo inputTensorInfo0; + armnn::TensorInfo inputTensorInfo1; + armnn::TensorInfo outputTensorInfo; + + constexpr unsigned int shape[] = { batchSize, channelCount, height, width }; + + inputTensorInfo0 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + + LayerTestResult<float,4> comparisonResult(outputTensorInfo); + + auto input0 = MakeRandomTensor<float, 4>(inputTensorInfo0, 803506992); + auto input1 = MakeRandomTensor<float, 4>(inputTensorInfo1, 54902257); + + std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle0Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo0); + std::unique_ptr<armnn::ITensorHandle> inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::MultiplicationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + armnn::MultiplicationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo0, inputHandle0Ref.get()); + SetWorkloadInput(refData, refInfo, 1, inputTensorInfo1, inputHandle1Ref.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateMultiplication(refData, refInfo); + + inputHandle0->Allocate(); + inputHandle1->Allocate(); + outputHandle->Allocate(); + inputHandle0Ref->Allocate(); + inputHandle1Ref->Allocate(); + outputHandleRef->Allocate(); + + CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle0Ref.get(), &input0[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&comparisonResult.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&comparisonResult.outputExpected[0][0][0][0], outputHandleRef.get()); + + return comparisonResult; +} + +LayerTestResult<float,4> CompareBatchNormTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + const unsigned int width = 2; + const unsigned int height = 3; + const unsigned int channels = 5; + const unsigned int batchSize = 3; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo tensorInfo; + + constexpr unsigned int shape[] = {batchSize, channels, height, width}; + constexpr unsigned int tensorShape[] = {channels}; + + inputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + tensorInfo = armnn::TensorInfo(1, tensorShape, armnn::DataType::Float32); + + auto input = MakeRandomTensor<float, 4>(inputTensorInfo, 21312); + + auto mean = MakeRandomTensor<float, 1>(tensorInfo, 123); + auto variance = MakeRandomTensor<float, 1>(tensorInfo, 234, 0.0f); + auto beta = MakeRandomTensor<float, 1>(tensorInfo, 123); + auto gamma = MakeRandomTensor<float, 1>(tensorInfo, 345); + + LayerTestResult<float,4> ret(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::BatchNormalizationQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle meanTensor(tensorInfo); + armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo); + armnn::ScopedCpuTensorHandle betaTensor(tensorInfo); + armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo); + + AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]); + AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]); + AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]); + AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Mean = &meanTensor; + data.m_Variance = &varianceTensor; + data.m_Beta = &betaTensor; + data.m_Gamma = &gammaTensor; + data.m_Parameters.m_Eps = 0.01f; + + armnn::BatchNormalizationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateBatchNormalization(refData, refInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + inputHandleRef->Allocate(); + outputHandleRef->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + +void Concatenate(armnn::IWorkloadFactory& workloadFactory, + std::initializer_list<const armnn::TensorInfo> inputTensorInfos, + std::initializer_list<void*> inputs, + const armnn::TensorInfo& outputTensorInfo, + void* output, + unsigned int concatDim) +{ + armnn::MergerQueueDescriptor queueDescriptor; + + std::vector<armnn::TensorShape> shapes; + shapes.reserve(inputTensorInfos.size()); + for (const armnn::TensorInfo& it: inputTensorInfos) + { + shapes.push_back(it.GetShape()); + } + armnn::OriginsDescriptor viewsDescriptor = armnn::CreateMergerDescriptorForConcatenation(shapes.begin(), + shapes.end(), concatDim); + + queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews()); + for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i) + { + queueDescriptor.m_ViewOrigins.emplace_back(std::vector<unsigned int>(viewsDescriptor.GetViewOrigin(i), + viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions())); + } + + const size_t inputCount = inputTensorInfos.size(); + + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + std::vector<std::unique_ptr<armnn::ITensorHandle>> inputHandles; + inputHandles.reserve(inputCount); + + const bool subTensorsSupported = workloadFactory.SupportsSubTensors(); + for (unsigned int i = 0; i < inputCount; ++i) + { + const armnn::TensorInfo& inputTensorInfo = inputTensorInfos.begin()[i]; + + std::unique_ptr<armnn::ITensorHandle> inputHandle = subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo.GetShape(), + queueDescriptor.m_ViewOrigins[i].m_Origin.data()) + : workloadFactory.CreateTensorHandle(inputTensorInfo); + + inputHandles.emplace_back(std::move(inputHandle)); + } + + armnn::WorkloadInfo workloadInfo; + + for (unsigned int i = 0; i < inputCount; ++i) + { + AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos.begin()[i], inputHandles[i].get()); + } + + AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(queueDescriptor, workloadInfo); + + for (auto& inputHandle : inputHandles) + { + inputHandle->Allocate(); + } + + outputHandle->Allocate(); + + unsigned int nextInputId = 0; + for (auto& inputHandle : inputHandles) + { + CopyDataToITensorHandle(inputHandle.get(), *(inputs.begin() + nextInputId++)); + } + + workload->Execute(); + + CopyDataFromITensorHandle(output, outputHandle.get()); +} + +template <typename T> +LayerTestResult<T, 1> Concatenation1dTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset) +{ + armnn::TensorInfo inputTensorInfo({ 3 }, armnn::GetDataType<T>()); + + auto input0 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 1.0f, 2.0f, 3.0f })); + auto input1 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 4.0f, 5.0f, 6.0f })); + auto input2 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 7.0f, 8.0f, 9.0f })); + + armnn::TensorInfo outputTensorInfo({ 9 }, armnn::GetDataType<T>()); + + LayerTestResult<T, 1> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { inputTensorInfo, inputTensorInfo, inputTensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 0); + + result.output = MakeTensor<T, 1>(outputTensorInfo, output); + result.outputExpected = MakeTensor<T, 1>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f + })); + + return result; +} + +LayerTestResult<float, 1> Concatenation1dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation1dTestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 2> Concatenation2dTestImpl(armnn::IWorkloadFactory& workloadFactory, + const armnn::TensorInfo& outputTensorInfo, + unsigned int dimension, + const float qScale, + const int32_t qOffset) +{ + armnn::TensorInfo inputTensorInfo({ 2, 3 }, armnn::GetDataType<T>()); + + auto input0 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, + })); + + auto input1 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 4.0f, 5.0f, 6.0f, + + // Batch 1 + 13.0f, 14.0f, 15.0f, + })); + + auto input2 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 7.0f, 8.0f, 9.0f, + + // Batch 1 + 16.0f, 17.0f, 18.0f, + })); + + LayerTestResult<T, 2> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { inputTensorInfo, inputTensorInfo, inputTensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + dimension); + + result.output = MakeTensor<T, 2>(outputTensorInfo, output); + return result; +} + +template <typename T> +LayerTestResult<T, 2> Concatenation2dDim0TestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, int32_t qOffset) +{ + armnn::TensorInfo outputTensorInfo({ 6, 3 }, armnn::GetDataType<T>()); + + LayerTestResult<T, 2> result = Concatenation2dTestImpl<T>(workloadFactory, outputTensorInfo, 0, qScale, qOffset); + result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, + + // Batch 2 + 4.0f, 5.0f, 6.0f, + + // Batch 3 + 13.0f, 14.0f, 15.0f, + + // Batch 4 + 7.0f, 8.0f, 9.0f, + + // Batch 5 + 16.0f, 17.0f, 18.0f, + })); + + return result; +} + +LayerTestResult<float, 2> Concatenation2dDim0Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim0TestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 2> Concatenation2dDim1TestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, int32_t qOffset) +{ + armnn::TensorInfo outputTensorInfo({ 2, 9 }, armnn::GetDataType<T>()); + + LayerTestResult<T, 2> result = Concatenation2dTestImpl<T>(workloadFactory, outputTensorInfo, 1, qScale, qOffset); + result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f + })); + + return result; +} + +LayerTestResult<float, 2> Concatenation2dDim1Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim1TestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 2> Concatenation2dDim0DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo input0TensorInfo({ 2, 3 }, armnn::GetDataType<T>()); + auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, + })); + + armnn::TensorInfo input1TensorInfo({ 3, 3 }, armnn::GetDataType<T>()); + auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 4.0f, 5.0f, 6.0f, + + // Batch 1 + 13.0f, 14.0f, 15.0f, + + // Batch 0 + 7.0f, 8.0f, 9.0f, + })); + + armnn::TensorInfo input2TensorInfo({ 1, 3 }, armnn::GetDataType<T>()); + auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 1 + 16.0f, 17.0f, 18.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 6, 3 }, armnn::GetDataType<T>()); + LayerTestResult<T, 2> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { input0TensorInfo, input1TensorInfo, input2TensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 0); + + result.output = MakeTensor<T, 2>(outputTensorInfo, output); + result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, + + // Batch 2 + 4.0f, 5.0f, 6.0f, + + // Batch 3 + 13.0f, 14.0f, 15.0f, + + // Batch 4 + 7.0f, 8.0f, 9.0f, + + // Batch 5 + 16.0f, 17.0f, 18.0f, + })); + + return result; +} + +LayerTestResult<float, 2> Concatenation2dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim0DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 2> Concatenation2dDim1DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo input0TensorInfo({ 2, 3 }, armnn::GetDataType<T>()); + auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, + })); + + armnn::TensorInfo input1TensorInfo({ 2, 5 }, armnn::GetDataType<T>()); + auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, + + // Batch 1 + 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, + })); + + armnn::TensorInfo input2TensorInfo({ 2, 1 }, armnn::GetDataType<T>()); + auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 9.0f, + + // Batch 1 + 18.0f + })); + + armnn::TensorInfo outputTensorInfo({ 2, 9 }, armnn::GetDataType<T>()); + LayerTestResult<T, 2> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { input0TensorInfo, input1TensorInfo, input2TensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 1); + + result.output = MakeTensor<T, 2>(outputTensorInfo, output); + result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, + })); + + return result; +} + +LayerTestResult<float, 2> Concatenation2dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim1DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 3> Concatenation3dTestImpl(armnn::IWorkloadFactory& workloadFactory, + const armnn::TensorInfo& outputTensorInfo, + unsigned int dimension, + float qScale, + int32_t qOffset) +{ + armnn::TensorInfo inputTensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>()); + + auto input0 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f + })); + + auto input1 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 7.0f, 8.0f, + + // Batch 0, Channel 1 + 9.0f, 10.0f, + + // Batch 0, Channel 2 + 11.0f, 12.0f, + + // Batch 1, Channel 0 + 25.0f, 26.0f, + + // Batch 1, Channel 1 + 27.0f, 28.0f, + + // Batch 1, Channel 2 + 29.0f, 30.0f + })); + + auto input2 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 13.0f, 14.0f, + + // Batch 0, Channel 1 + 15.0f, 16.0f, + + // Batch 0, Channel 2 + 17.0f, 18.0f, + + // Batch 1, Channel 0 + 31.0f, 32.0f, + + // Batch 1, Channel 1 + 33.0f, 34.0f, + + // Batch 1, Channel 2 + 35.0f, 36.0f + })); + + LayerTestResult<T, 3> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { inputTensorInfo, inputTensorInfo, inputTensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + dimension); + + result.output = MakeTensor<T, 3>(outputTensorInfo, output); + return result; +} + +template <typename T> +LayerTestResult<T, 3> Concatenation3dDim0TestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, armnn::GetDataType<T>()); + + LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 0, + qScale, qOffset); + result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, + + // Batch 2, Channel 0 + 7.0f, 8.0f, + + // Batch 2, Channel 1 + 9.0f, 10.0f, + + // Batch 2, Channel 2 + 11.0f, 12.0f, + + // Batch 3, Channel 0 + 25.0f, 26.0f, + + // Batch 3, Channel 1 + 27.0f, 28.0f, + + // Batch 3, Channel 2 + 29.0f, 30.0f, + + // Batch 4, Channel 0 + 13.0f, 14.0f, + + // Batch 4, Channel 1 + 15.0f, 16.0f, + + // Batch 4, Channel 2 + 17.0f, 18.0f, + + // Batch 5, Channel 0 + 31.0f, 32.0f, + + // Batch 5, Channel 1 + 33.0f, 34.0f, + + // Batch 5, Channel 2 + 35.0f, 36.0f + })); + return result; +} + +LayerTestResult<float, 3> Concatenation3dDim0Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim0TestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 3> Concatenation3dDim1TestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, int32_t qOffset) +{ + armnn::TensorInfo outputTensorInfo({ 2, 9, 2 }, armnn::GetDataType<T>()); + + LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 1, qScale, qOffset); + result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 0, Channel 3 + 7.0f, 8.0f, + + // Batch 0, Channel 4 + 9.0f, 10.0f, + + // Batch 0, Channel 5 + 11.0f, 12.0f, + + // Batch 0, Channel 6 + 13.0f, 14.0f, + + // Batch 0, Channel 7 + 15.0f, 16.0f, + + // Batch 0, Channel 8 + 17.0f, 18.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, + + // Batch 1, Channel 3 + 25.0f, 26.0f, + + // Batch 1, Channel 4 + 27.0f, 28.0f, + + // Batch 1, Channel 5 + 29.0f, 30.0f, + + // Batch 1, Channel 6 + 31.0f, 32.0f, + + // Batch 1, Channel 7 + 33.0f, 34.0f, + + // Batch 1, Channel 8 + 35.0f, 36.0f + })); + + return result; +} + +LayerTestResult<float, 3> Concatenation3dDim1Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim1TestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 3> Concatenation3dDim2TestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, int32_t qOffset) +{ + armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, armnn::GetDataType<T>()); + + LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 2, qScale, qOffset); + result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, 7.0f, 8.0f, 13.0f, 14.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, 9.0f, 10.0f, 15.0f, 16.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, 11.0f, 12.0f, 17.0f, 18.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, 25.0f, 26.0f, 31.0f, 32.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, 27.0f, 28.0f, 33.0f, 34.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, 29.0f, 30.0f, 35.0f, 36.0f, + })); + + return result; +} + +LayerTestResult<float, 3> Concatenation3dDim2Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim2TestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 3> Concatenation3dDim0DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>()); + auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f + })); + + armnn::TensorInfo input1TensorInfo({ 1, 3, 2 }, armnn::GetDataType<T>()); + auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 7.0f, 8.0f, + + // Batch 0, Channel 1 + 9.0f, 10.0f, + + // Batch 0, Channel 2 + 11.0f, 12.0f, + })); + + armnn::TensorInfo input2TensorInfo({ 3, 3, 2 }, armnn::GetDataType<T>()); + auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 25.0f, 26.0f, + + // Batch 0, Channel 1 + 27.0f, 28.0f, + + // Batch 0, Channel 2 + 29.0f, 30.0f, + + // Batch 1, Channel 0 + 13.0f, 14.0f, + + // Batch 1, Channel 1 + 15.0f, 16.0f, + + // Batch 1, Channel 2 + 17.0f, 18.0f, + + // Batch 2, Channel 0 + 31.0f, 32.0f, + + // Batch 2, Channel 1 + 33.0f, 34.0f, + + // Batch 2, Channel 2 + 35.0f, 36.0f + })); + + armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, armnn::GetDataType<T>()); + LayerTestResult<T, 3> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { input0TensorInfo, input1TensorInfo, input2TensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 0); + + result.output = MakeTensor<T, 3>(outputTensorInfo, output); + result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, + + // Batch 2, Channel 0 + 7.0f, 8.0f, + + // Batch 2, Channel 1 + 9.0f, 10.0f, + + // Batch 2, Channel 2 + 11.0f, 12.0f, + + // Batch 3, Channel 0 + 25.0f, 26.0f, + + // Batch 3, Channel 1 + 27.0f, 28.0f, + + // Batch 3, Channel 2 + 29.0f, 30.0f, + + // Batch 4, Channel 0 + 13.0f, 14.0f, + + // Batch 4, Channel 1 + 15.0f, 16.0f, + + // Batch 4, Channel 2 + 17.0f, 18.0f, + + // Batch 5, Channel 0 + 31.0f, 32.0f, + + // Batch 5, Channel 1 + 33.0f, 34.0f, + + // Batch 5, Channel 2 + 35.0f, 36.0f + })); + + return result; +} + +LayerTestResult<float, 3> Concatenation3dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim0DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 3> Concatenation3dDim1DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>()); + auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f + })); + + armnn::TensorInfo input1TensorInfo({ 2, 4, 2 }, armnn::GetDataType<T>()); + auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 7.0f, 8.0f, + + // Batch 0, Channel 1 + 9.0f, 10.0f, + + // Batch 0, Channel 2 + 11.0f, 12.0f, + + // Batch 0, Channel 3 + 25.0f, 26.0f, + + // Batch 1, Channel 0 + 27.0f, 28.0f, + + // Batch 1, Channel 1 + 29.0f, 30.0f, + + // Batch 1, Channel 2 + 13.0f, 14.0f, + + // Batch 1, Channel 3 + 15.0f, 16.0f, + })); + + armnn::TensorInfo input2TensorInfo({ 2, 1, 2 }, armnn::GetDataType<T>()); + auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 17.0f, 18.0f, + + // Batch 1, Channel 0 + 31.0f, 32.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 2, 8, 2 }, armnn::GetDataType<T>()); + LayerTestResult<T, 3> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { input0TensorInfo, input1TensorInfo, input2TensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 1); + + result.output = MakeTensor<T, 3>(outputTensorInfo, output); + result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 0, Channel 3 + 7.0f, 8.0f, + + // Batch 0, Channel 4 + 9.0f, 10.0f, + + // Batch 0, Channel 5 + 11.0f, 12.0f, + + // Batch 0, Channel 6 + 25.0f, 26.0f, + + // Batch 0, Channel 7 + 17.0f, 18.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, + + // Batch 1, Channel 3 + 27.0f, 28.0f, + + // Batch 1, Channel 4 + 29.0f, 30.0f, + + // Batch 1, Channel 5 + 13.0f, 14.0f, + + // Batch 1, Channel 6 + 15.0f, 16.0f, + + // Batch 1, Channel 7 + 31.0f, 32.0f, + })); + + return result; +} + +LayerTestResult<float, 3> Concatenation3dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim1DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0); +} + +template <typename T> +LayerTestResult<T, 3> Concatenation3dDim2DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>()); + auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f + })); + + armnn::TensorInfo input1TensorInfo({ 2, 3, 1 }, armnn::GetDataType<T>()); + auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 7.0f, + + // Batch 0, Channel 1 + 9.0f, + + // Batch 0, Channel 2 + 11.0f, + + // Batch 1, Channel 0 + 25.0f, + + // Batch 1, Channel 1 + 27.0f, + + // Batch 1, Channel 2 + 29.0f + })); + + armnn::TensorInfo input2TensorInfo({ 2, 3, 3 }, armnn::GetDataType<T>()); + auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 13.0f, 14.0f, 50.0f, + + // Batch 0, Channel 1 + 15.0f, 16.0f, 51.0f, + + // Batch 0, Channel 2 + 17.0f, 18.0f, 52.0f, + + // Batch 1, Channel 0 + 31.0f, 32.0f, 53.0f, + + // Batch 1, Channel 1 + 33.0f, 34.0f, 54.0f, + + // Batch 1, Channel 2 + 35.0f, 36.0f, 55.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, armnn::GetDataType<T>()); + LayerTestResult<T, 3> result(outputTensorInfo); + + std::vector<T> output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { input0TensorInfo, input1TensorInfo, input2TensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 2); + + result.output = MakeTensor<T, 3>(outputTensorInfo, output); + result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, 7.0f, 13.0f, 14.0f, 50.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, 9.0f, 15.0f, 16.0f, 51.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, 11.0f, 17.0f, 18.0f, 52.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, 25.0f, 31.0f, 32.0f, 53.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, 27.0f, 33.0f, 34.0f, 54.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, 29.0f, 35.0f, 36.0f, 55.0f, + })); + + return result; +} + +LayerTestResult<float, 3> Concatenation3dDim2DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim2DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<float, 4> ResizeBilinearNopTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 4; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f, 2.0f, 3.0f, 4.0f, + 2.0f, 3.0f, 4.0f, 5.0f, + 3.0f, 4.0f, 5.0f, 6.0f, + 4.0f, 5.0f, 6.0f, 7.0f + })); + + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = input; + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> SimpleResizeBilinearTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 2; + constexpr unsigned int inputHeight = 2; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth / 2; + constexpr unsigned int outputHeight = inputHeight / 2; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f, 255.0f, + 200.0f, 250.f, + })); + + // The 'resize bilinear' operation projects the top-left corner of output texels into the input image, + // then figures out the interpolants and weights. Note this is different to projecting the centre of the + // output texel - and thus we'll expect the output 1x1 matrix to contain as its single element the value + // that was at position (0,0) of the input matrix (rather than an average, which we would expect if projecting + // the centre). + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({ + 1.0f + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> ResizeBilinearSqMinTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 4; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth / 2; + constexpr unsigned int outputHeight = inputHeight / 2; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f, 2.0f, 3.0f, 4.0f, + 2.0f, 3.0f, 4.0f, 5.0f, + 3.0f, 4.0f, 5.0f, 6.0f, + 4.0f, 5.0f, 6.0f, 7.0f + })); + + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({ + 1.f, 3.f, + 3.f, 5.f + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> ResizeBilinearMinTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 5; + constexpr unsigned int inputHeight = 3; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = 3; + constexpr unsigned int outputHeight = 2; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f, 2.0f, 3.0f, 5.0f, 8.0f, + 13.0f, 21.0f, 34.0f, 55.0f, 89.0f, + 144.0f, 233.0f, 377.0f, 610.0f, 987.0f + })); + + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({ + 1.0f, 2.6666f, 6.0f, + 78.5f, 179.3333f, 401.f + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> ResizeBilinearMagTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 2; + constexpr unsigned int inputHeight = 3; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = 5; + constexpr unsigned int outputHeight = 3; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f, 2.0f, + 13.0f, 21.0f, + 144.0f, 233.0f + })); + + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({ + 1.0f, 1.4f, 1.8f, 2.f, 2.f, + 13.f, 16.2f, 19.4f, 21.f, 21.f, + 144.f, 179.6f, 215.2f, 233.f, 233.f + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 2> FakeQuantizationTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int width = 2; + constexpr unsigned int height = 3; + + const armnn::TensorInfo tensorInfo({height, width }, + armnn::DataType::Float32); + auto input = MakeTensor<float, 2>(tensorInfo, std::vector<float>({ + -10.0f, -5.0f, + 0.0f, 5.0f, + 10.0f, 10.0f + })); + + LayerTestResult<float, 2> ret(tensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(tensorInfo); + + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(tensorInfo); + + armnn::FakeQuantizationQueueDescriptor data; + armnn::WorkloadInfo info; + + AddInputToWorkload(data, info, tensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, tensorInfo, outputHandle.get()); + float min = -10.f; + float max = 10.f; + + data.m_Parameters.m_Min = min; + data.m_Parameters.m_Max = max; + + armnn::PassthroughCpuTensorHandle refHandle(tensorInfo, &ret.outputExpected[0][0]); + armnn::FakeQuantizationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadOutput(refData, refInfo, 0, tensorInfo, &refHandle); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFakeQuantization(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + + ret.outputExpected = MakeTensor<float, 2>(tensorInfo, std::vector<float>({ + 0.0f, 63.0f, + 128.0f, 191.0f, + 255.0f, 255.0f + })); + return ret; +} + +LayerTestResult<float, 4> L2Normalization1dTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 1; + constexpr unsigned int inputHeight = 1; + constexpr unsigned int inputChannels = 10; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f + })); + + const float approxInvL2Norm = 0.050964719f; + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f * approxInvL2Norm, + 2.0f * approxInvL2Norm, + 3.0f * approxInvL2Norm, + 4.0f * approxInvL2Norm, + 5.0f * approxInvL2Norm, + 6.0f * approxInvL2Norm, + 7.0f * approxInvL2Norm, + 8.0f * approxInvL2Norm, + 9.0f * approxInvL2Norm, + 10.0f * approxInvL2Norm + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::L2NormalizationQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +namespace +{ + +float CalcInvL2Norm(std::initializer_list<float> elements) +{ + const float reduction = std::accumulate(elements.begin(), elements.end(), 0.0f, + [](float acc, float element) { return acc + element * element; }); + return 1.0f / sqrtf(reduction); +} + +} + +LayerTestResult<float, 4> L2Normalization2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 5; + constexpr unsigned int inputHeight = 1; + constexpr unsigned int inputChannels = 2; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f, 3.0f, 5.0f, 7.0f, 9.0f, + 2.0f, 4.0f, 6.0f, 8.0f, 10.0f + })); + + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 1.0f * CalcInvL2Norm({ 1.0f, 2.0f }), + 3.0f * CalcInvL2Norm({ 3.0f, 4.0f }), + 5.0f * CalcInvL2Norm({ 5.0f, 6.0f }), + 7.0f * CalcInvL2Norm({ 7.0f, 8.0f }), + 9.0f * CalcInvL2Norm({ 9.0f, 10.0f }), + + 2.0f * CalcInvL2Norm({ 1.0f, 2.0f }), + 4.0f * CalcInvL2Norm({ 3.0f, 4.0f }), + 6.0f * CalcInvL2Norm({ 5.0f, 6.0f }), + 8.0f * CalcInvL2Norm({ 7.0f, 8.0f }), + 10.0f * CalcInvL2Norm({ 9.0f, 10.0f }) + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::L2NormalizationQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> L2Normalization3dTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 3; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 2; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + // Channel 0 + 119.0f, 21.0f, 150.0f, + 149.0f, 32.0f, 179.0f, + 15.0f, 227.0f, 141.0f, + 147.0f, 199.0f, 220.0f, + + // Channel 1 + 110.0f, 140.0f, 73.0f, + 211.0f, 212.0f, 89.0f, + 24.0f, 138.0f, 188.0f, + 162.0f, 12.0f, 161.0f, + })); + + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + 119.0f * CalcInvL2Norm({ 119.0f, 110.0f }), + 21.0f * CalcInvL2Norm({ 21.0f, 140.0f }), + 150.0f * CalcInvL2Norm({ 150.0f, 73.0f }), + 149.0f * CalcInvL2Norm({ 149.0f, 211.0f }), + 32.0f * CalcInvL2Norm({ 32.0f, 212.0f }), + 179.0f * CalcInvL2Norm({ 179.0f, 89.0f }), + 15.0f * CalcInvL2Norm({ 15.0f, 24.0f }), + 227.0f * CalcInvL2Norm({ 227.0f, 138.0f }), + 141.0f * CalcInvL2Norm({ 141.0f, 188.0f }), + 147.0f * CalcInvL2Norm({ 147.0f, 162.0f }), + 199.0f * CalcInvL2Norm({ 199.0f, 12.0f }), + 220.0f * CalcInvL2Norm({ 220.0f, 161.0f }), + + 110.0f * CalcInvL2Norm({ 119.0f, 110.0f }), + 140.0f * CalcInvL2Norm({ 21.0f, 140.0f }), + 73.0f * CalcInvL2Norm({ 150.0f, 73.0f }), + 211.0f * CalcInvL2Norm({ 149.0f, 211.0f }), + 212.0f * CalcInvL2Norm({ 32.0f, 212.0f }), + 89.0f * CalcInvL2Norm({ 179.0f, 89.0f }), + 24.0f * CalcInvL2Norm({ 15.0f, 24.0f }), + 138.0f * CalcInvL2Norm({ 227.0f, 138.0f }), + 188.0f * CalcInvL2Norm({ 141.0f, 188.0f }), + 162.0f * CalcInvL2Norm({ 147.0f, 162.0f }), + 12.0f * CalcInvL2Norm({ 199.0f, 12.0f }), + 161.0f * CalcInvL2Norm({ 220.0f, 161.0f }), + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::L2NormalizationQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> L2Normalization4dTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 3; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 3; + constexpr unsigned int inputBatchSize = 2; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + // Batch 0, Channel 0 + 235.0f, 46.0f, 178.0f, + 100.0f, 123.0f, 19.0f, + 172.0f, 74.0f, 250.0f, + 6.0f, 195.0f, 80.0f, + + // Batch 0, Channel 1 + 113.0f, 95.0f, 202.0f, + 77.0f, 114.0f, 71.0f, + 122.0f, 246.0f, 166.0f, + 82.0f, 28.0f, 37.0f, + + // Batch 0, Channel 2 + 56.0f, 170.0f, 162.0f, + 194.0f, 89.0f, 254.0f, + 12.0f, 209.0f, 200.0f, + 1.0f, 64.0f, 54.0f, + + // Batch 1, Channel 0 + 67.0f, 90.0f, 49.0f, + 7.0f, 163.0f, 18.0f, + 25.0f, 117.0f, 103.0f, + 247.0f, 59.0f, 189.0f, + + // Batch 1, Channel 1 + 239.0f, 104.0f, 199.0f, + 17.0f, 124.0f, 153.0f, + 222.0f, 217.0f, 75.0f, + 32.0f, 126.0f, 21.0f, + + // Batch 1, Channel 2 + 97.0f, 145.0f, 215.0f, + 115.0f, 116.0f, 238.0f, + 226.0f, 16.0f, 132.0f, + 92.0f, 125.0f, 88.0f, + })); + + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + + // Batch 0, Channel 0 + 235.0f * CalcInvL2Norm({ 235.0f, 113.0f, 56.0f }), + 46.0f * CalcInvL2Norm({ 46.0f, 95.0f, 170.0f }), + 178.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }), + 100.0f * CalcInvL2Norm({ 100.0f, 77.0f, 194.0f }), + 123.0f * CalcInvL2Norm({ 123.0f, 114.0f, 89.0f }), + 19.0f * CalcInvL2Norm({ 19.0f, 71.0f, 254.0f }), + 172.0f * CalcInvL2Norm({ 172.0f, 122.0f, 12.0f }), + 74.0f * CalcInvL2Norm({ 74.0f, 246.0f, 209.0f }), + 250.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }), + 6.0f * CalcInvL2Norm({ 6.0f, 82.0f, 1.0f }), + 195.0f * CalcInvL2Norm({ 195.0f, 28.0f, 64.0f }), + 80.0f * CalcInvL2Norm({ 80.0f, 37.0f, 54.0f }), + + // Batch 0, Channel 1 + 113.0f * CalcInvL2Norm({ 235.0f, 113.0f, 56.0f }), + 95.0f * CalcInvL2Norm({ 46.0f, 95.0f, 170.0f }), + 202.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }), + 77.0f * CalcInvL2Norm({ 100.0f, 77.0f, 194.0f }), + 114.0f * CalcInvL2Norm({ 123.0f, 114.0f, 89.0f }), + 71.0f * CalcInvL2Norm({ 19.0f, 71.0f, 254.0f }), + 122.0f * CalcInvL2Norm({ 172.0f, 122.0f, 12.0f }), + 246.0f * CalcInvL2Norm({ 74.0f, 246.0f, 209.0f }), + 166.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }), + 82.0f * CalcInvL2Norm({ 6.0f, 82.0f, 1.0f }), + 28.0f * CalcInvL2Norm({ 195.0f, 28.0f, 64.0f }), + 37.0f * CalcInvL2Norm({ 80.0f, 37.0f, 54.0f }), + + // Batch 0, Channel 2 + 56.0f * CalcInvL2Norm({ 235.0f, 113.0f, 56.0f }), + 170.0f * CalcInvL2Norm({ 46.0f, 95.0f, 170.0f }), + 162.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }), + 194.0f * CalcInvL2Norm({ 100.0f, 77.0f, 194.0f }), + 89.0f * CalcInvL2Norm({ 123.0f, 114.0f, 89.0f }), + 254.0f * CalcInvL2Norm({ 19.0f, 71.0f, 254.0f }), + 12.0f * CalcInvL2Norm({ 172.0f, 122.0f, 12.0f }), + 209.0f * CalcInvL2Norm({ 74.0f, 246.0f, 209.0f }), + 200.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }), + 1.0f * CalcInvL2Norm({ 6.0f, 82.0f, 1.0f }), + 64.0f * CalcInvL2Norm({ 195.0f, 28.0f, 64.0f }), + 54.0f * CalcInvL2Norm({ 80.0f, 37.0f, 54.0f }), + + // Batch 1, Channel 0 + 67.0f * CalcInvL2Norm({ 67.0f, 239.0f, 97.0f }), + 90.0f * CalcInvL2Norm({ 90.0f, 104.0f, 145.0f }), + 49.0f * CalcInvL2Norm({ 49.0f, 199.0f, 215.0f }), + 7.0f * CalcInvL2Norm({ 7.0f, 17.0f, 115.0f }), + 163.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }), + 18.0f * CalcInvL2Norm({ 18.0f, 153.0f, 238.0f }), + 25.0f * CalcInvL2Norm({ 25.0f, 222.0f, 226.0f }), + 117.0f * CalcInvL2Norm({ 117.0f, 217.0f, 16.0f }), + 103.0f * CalcInvL2Norm({ 103.0f, 75.0f, 132.0f }), + 247.0f * CalcInvL2Norm({ 247.0f, 32.0f, 92.0f }), + 59.0f * CalcInvL2Norm({ 59.0f, 126.0f, 125.0f }), + 189.0f * CalcInvL2Norm({ 189.0f, 21.0f, 88.0f }), + + // Batch 1, Channel 1 + 239.0f * CalcInvL2Norm({ 67.0f, 239.0f, 97.0f }), + 104.0f * CalcInvL2Norm({ 90.0f, 104.0f, 145.0f }), + 199.0f * CalcInvL2Norm({ 49.0f, 199.0f, 215.0f }), + 17.0f * CalcInvL2Norm({ 7.0f, 17.0f, 115.0f }), + 124.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }), + 153.0f * CalcInvL2Norm({ 18.0f, 153.0f, 238.0f }), + 222.0f * CalcInvL2Norm({ 25.0f, 222.0f, 226.0f }), + 217.0f * CalcInvL2Norm({ 117.0f, 217.0f, 16.0f }), + 75.0f * CalcInvL2Norm({ 103.0f, 75.0f, 132.0f }), + 32.0f * CalcInvL2Norm({ 247.0f, 32.0f, 92.0f }), + 126.0f * CalcInvL2Norm({ 59.0f, 126.0f, 125.0f }), + 21.0f * CalcInvL2Norm({ 189.0f, 21.0f, 88.0f }), + + // Batch 1, Channel 2 + 97.0f * CalcInvL2Norm({ 67.0f, 239.0f, 97.0f }), + 145.0f * CalcInvL2Norm({ 90.0f, 104.0f, 145.0f }), + 215.0f * CalcInvL2Norm({ 49.0f, 199.0f, 215.0f }), + 115.0f * CalcInvL2Norm({ 7.0f, 17.0f, 115.0f }), + 116.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }), + 238.0f * CalcInvL2Norm({ 18.0f, 153.0f, 238.0f }), + 226.0f * CalcInvL2Norm({ 25.0f, 222.0f, 226.0f }), + 16.0f * CalcInvL2Norm({ 117.0f, 217.0f, 16.0f }), + 132.0f * CalcInvL2Norm({ 103.0f, 75.0f, 132.0f }), + 92.0f * CalcInvL2Norm({ 247.0f, 32.0f, 92.0f }), + 125.0f * CalcInvL2Norm({ 59.0f, 126.0f, 125.0f }), + 88.0f * CalcInvL2Norm({ 189.0f, 21.0f, 88.0f }), + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::L2NormalizationQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +template <typename T> +LayerTestResult<T, 4> ConstantTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + constexpr unsigned int inputWidth = 3; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 3; + constexpr unsigned int inputBatchSize = 2; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::GetDataType<T>()); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + // Batch 0, Channel 0 + 235.0f, 46.0f, 178.0f, + 100.0f, 123.0f, 19.0f, + 172.0f, 74.0f, 250.0f, + 6.0f, 195.0f, 80.0f, + + // Batch 0, Channel 1 + 113.0f, 95.0f, 202.0f, + 77.0f, 114.0f, 71.0f, + 122.0f, 246.0f, 166.0f, + 82.0f, 28.0f, 37.0f, + + // Batch 0, Channel 2 + 56.0f, 170.0f, 162.0f, + 194.0f, 89.0f, 254.0f, + 12.0f, 209.0f, 200.0f, + 1.0f, 64.0f, 54.0f, + + // Batch 1, Channel 0 + 67.0f, 90.0f, 49.0f, + 7.0f, 163.0f, 18.0f, + 25.0f, 117.0f, 103.0f, + 247.0f, 59.0f, 189.0f, + + // Batch 1, Channel 1 + 239.0f, 104.0f, 199.0f, + 17.0f, 124.0f, 153.0f, + 222.0f, 217.0f, 75.0f, + 32.0f, 126.0f, 21.0f, + + // Batch 1, Channel 2 + 97.0f, 145.0f, 215.0f, + 115.0f, 116.0f, 238.0f, + 226.0f, 16.0f, 132.0f, + 92.0f, 125.0f, 88.0f, + }))); + + LayerTestResult<T, 4> result(outputTensorInfo); + result.outputExpected = input; + + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ScopedCpuTensorHandle constantTensor(inputTensorInfo); + AllocateAndCopyDataToITensorHandle(&constantTensor, &input[0][0][0][0]); + + armnn::ConstantQueueDescriptor descriptor; + descriptor.m_LayerOutput = &constantTensor; + + armnn::WorkloadInfo info; + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConstant(descriptor, info); + + outputHandle->Allocate(); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> ConstantTest(armnn::IWorkloadFactory& workloadFactory) +{ + return ConstantTestImpl<float>(workloadFactory, 0.0f, 0); +} + +LayerTestResult<uint8_t, 4> ConstantTestUint8(armnn::IWorkloadFactory& workloadFactory) +{ + return ConstantTestImpl<uint8_t>(workloadFactory, 1.0f, 0); +} + +LayerTestResult<uint8_t, 3> MergerUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int outputWidth = 5; + unsigned int outputHeight = 6; + unsigned int outputChannels = 3; + + unsigned int inputWidth1 = 2; + unsigned int inputHeight1 = 2; + unsigned int inputChannels1 = 3; + + unsigned int inputWidth2 = 2; + unsigned int inputHeight2 = 4; + unsigned int inputChannels2 = 3; + + unsigned int inputWidth3 = 3; + unsigned int inputHeight3 = 6; + unsigned int inputChannels3 = 2; + + unsigned int inputWidth4 = 3; + unsigned int inputHeight4 = 6; + unsigned int inputChannels4 = 1; + + // Define the tensor descriptors + armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8); + armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8); + armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8); + armnn::TensorInfo inputTensorInfo3({ inputChannels3, inputHeight3, inputWidth3 }, armnn::DataType::QuantisedAsymm8); + armnn::TensorInfo inputTensorInfo4({ inputChannels4, inputHeight4, inputWidth4 }, armnn::DataType::QuantisedAsymm8); + + // Arbitrary scale and offsets. They don't really matter as the merger operator doesn't dequantize/quantize + const float scale = 0.13497836f; + const int32_t offset = -7; + + outputTensorInfo.SetQuantizationScale(scale); + outputTensorInfo.SetQuantizationOffset(offset); + inputTensorInfo1.SetQuantizationScale(scale); + inputTensorInfo1.SetQuantizationOffset(offset); + inputTensorInfo2.SetQuantizationScale(scale); + inputTensorInfo2.SetQuantizationOffset(offset); + inputTensorInfo3.SetQuantizationScale(scale); + inputTensorInfo3.SetQuantizationOffset(offset); + inputTensorInfo4.SetQuantizationScale(scale); + inputTensorInfo4.SetQuantizationOffset(offset); + + LayerTestResult<uint8_t, 3> ret(outputTensorInfo); + + ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>( + { + 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, + + 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, + + 61, 62, 63, 64, 65, + 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, + 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, + 86, 87, 88, 89, 90, + }) + ); + + + auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>( + { + 1, 2, + 6, 7, + + 31, 32, + 36, 37, + + 61, 62, + 66, 67, + }) + ); + + auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>( + { + 11, 12, + 16, 17, + 21, 22, + 26, 27, + + 41, 42, + 46, 47, + 51, 52, + 56, 57, + + 71, 72, + 76, 77, + 81, 82, + 86, 87, + }) + ); + + auto input3 = MakeTensor<uint8_t, 3>(inputTensorInfo3, std::vector<uint8_t>( + { + 3, 4, 5, + 8, 9, 10, + 13, 14, 15, + 18, 19, 20, + 23, 24, 25, + 28, 29, 30, + + 33, 34, 35, + 38, 39, 40, + 43, 44, 45, + 48, 49, 50, + 53, 54, 55, + 58, 59, 60, + }) + ); + + + auto input4 = MakeTensor<uint8_t, 3>(inputTensorInfo4, std::vector<uint8_t>( + { + 63, 64, 65, + 68, 69, 70, + 73, 74, 75, + 78, 79, 80, + 83, 84, 85, + 88, 89, 90, + }) + ); + + std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //extent of the window is defined by size of input[0] + armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1); + + std::vector<unsigned int> wOrigin2 = { 0, 2, 0 }; //extent of the window is defined by size of input[1] + armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2); + + std::vector<unsigned int> wOrigin3 = { 0, 0, 2 }; //extent of the window is defined by size of input[2] + armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3); + + std::vector<unsigned int> wOrigin4 = { 2, 0, 2 }; //extent of the window is defined by size of input[3] + armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4); + + + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + bool subTensorsSupported = workloadFactory.SupportsSubTensors(); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo1); + + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo2); + + std::unique_ptr<armnn::ITensorHandle> inputHandle3 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo3.GetShape(), wOrigin3.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo3); + + std::unique_ptr<armnn::ITensorHandle> inputHandle4 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo4.GetShape(), wOrigin4.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo4); + + + armnn::MergerQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddInputToWorkload(data, info, inputTensorInfo3, inputHandle3.get()); + AddInputToWorkload(data, info, inputTensorInfo4, inputHandle4.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_ViewOrigins.push_back(window1); + data.m_ViewOrigins.push_back(window2); + data.m_ViewOrigins.push_back(window3); + data.m_ViewOrigins.push_back(window4); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + inputHandle3->Allocate(); + inputHandle4->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]); + CopyDataToITensorHandle(inputHandle3.get(), &input3[0][0][0]); + CopyDataToITensorHandle(inputHandle4.get(), &input4[0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult<uint8_t, 4> AdditionUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int batchSize = 1; + unsigned int channels = 2; + unsigned int height = 2; + unsigned int width = 3; + + const float scale = 7.0f; + const int32_t offset = 3; + + armnn::TensorInfo inputTensorInfo1, inputTensorInfo2; + armnn::TensorInfo outputTensorInfo; + + const unsigned int shape[] = { batchSize, channels, height, width }; + inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo1.SetQuantizationScale(scale); + inputTensorInfo1.SetQuantizationOffset(offset); + + inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo2.SetQuantizationScale(scale); + inputTensorInfo2.SetQuantizationOffset(offset); + + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(scale); + outputTensorInfo.SetQuantizationOffset(offset); + + // See dequantized values to the right + auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>( + { + 63, 35, 77, 70, 56, 112, // 420, 224, 518, 469, 371, 763 + 203, 28, 252, 168, 245, 91 // 1400, 175, 1743, 1155, 1694, 616 + })); + + // See dequantized values to the right + auto input2 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>( + { + 21, 7, 175, 231, 175, 210, // 126, 28, 1204, 1596, 1204, 1449 + 126, 161, 63, 21, 105, 126 // 861, 1106, 420, 126, 714, 861 + })); + + // See dequantized values to the right + LayerTestResult<uint8_t, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>( + { + 81, 39, 249, 255, 228, 255, // 546, 252, 1722, 2065(clamped), 1575, 2212(clamped) + 255, 186, 255, 186, 255, 214, // 2261(clamped), 1281, 2163(clamped), 1281, 2408(clamped), 1477 + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::AdditionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + return result; +} + +LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int batchSize = 1; + unsigned int channels = 2; + unsigned int height = 2; + unsigned int width = 3; + + armnn::TensorInfo inputTensorInfo1, inputTensorInfo2; + armnn::TensorInfo outputTensorInfo; + + const unsigned int shape[] = { batchSize, channels, height, width }; + inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo1.SetQuantizationScale(4.0f); + inputTensorInfo1.SetQuantizationOffset(1); + + inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo2.SetQuantizationScale(3.0f); + inputTensorInfo2.SetQuantizationOffset(-2); + + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(1366.255f); // Scale/offset chosen to have output values out of range + outputTensorInfo.SetQuantizationOffset(-5); + + // See dequantized values to the right + auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>( + { + 62, 37, 3, 172, 13, 111, // 244, 144, 8, 684, 48, 440, + 188, 20, 73, 31, 23, 31 // 748, 76, 288, 120, 88, 120 + })); + + // See dequantized values to the right + auto input2 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>( + { + 126, 240, 252, 183, 121, 247, // 384, 726, 762, 555, 369, 747, + 48, 115, 151, 79, 78, 97 // 150, 351, 459, 243, 240, 297 + })); + + // See dequantized values to the right + LayerTestResult<uint8_t, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>( + { + 64, 72, 0, 255, 8, 236, // 93696, 104544, 6096(clamped), 379620(clamped), 17712, 328680, + 77, 15, 92, 16, 10, 21, // 112200, 26676, 132192, 29160, 21120, 35640 + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::MultiplicationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + return result; +} + +LayerTestResult<uint8_t, 4> ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 4; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(1.5f); + inputTensorInfo.SetQuantizationOffset(-3); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(1.5f); + outputTensorInfo.SetQuantizationOffset(-3); + + auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({ + 1, 2, 3, 4, + 2, 3, 4, 5, + 3, 4, 5, 6, + 4, 5, 6, 7 + })); + + LayerTestResult<uint8_t, 4> result(outputTensorInfo); + result.outputExpected = input; + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<uint8_t, 4> SimpleResizeBilinearUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 2; + constexpr unsigned int inputHeight = 2; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth / 2; + constexpr unsigned int outputHeight = inputHeight / 2; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(0.1567f); + inputTensorInfo.SetQuantizationOffset(1); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(0.1567f); + outputTensorInfo.SetQuantizationOffset(1); + + auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({ + 1, 255, + 200, 250 + })); + + // The 'resize bilinear' operation projects the top-left corner of output texels into the input image, + // then figures out the interpolants and weights. Note this is different to projecting the centre of the + // output texel - and thus we'll expect the output 1x1 matrix to contain as its single element the value + // that was at position (0,0) of the input matrix (rather than an average, which we would expect if projecting + // the centre). + LayerTestResult<uint8_t, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({ + 1 + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<uint8_t, 4> ResizeBilinearSqMinUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 4; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth / 2; + constexpr unsigned int outputHeight = inputHeight / 2; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(3.141592f); + inputTensorInfo.SetQuantizationOffset(3); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(3.141592f); + outputTensorInfo.SetQuantizationOffset(3); + + auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({ + 1, 2, 3, 4, + 2, 3, 4, 5, + 3, 4, 5, 6, + 4, 5, 6, 7 + })); + + LayerTestResult<uint8_t, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({ + 1, 3, + 3, 5 + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<uint8_t, 4> ResizeBilinearMinUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 3; + constexpr unsigned int inputHeight = 2; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = 2; + constexpr unsigned int outputHeight = 1; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(1.5f); + inputTensorInfo.SetQuantizationOffset(-1); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(1.5f); + outputTensorInfo.SetQuantizationOffset(-1); + + auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({ + 1, 2, 3, // 3.0, 4.5, 6.0 + 5, 8, 13 // 9.0, 13.5, 21.0 + })); + + LayerTestResult<uint8_t, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({ + 1, 3 // 3.0, 5.25 + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<uint8_t, 4> ResizeBilinearMagUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 2; + constexpr unsigned int inputHeight = 3; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = 5; + constexpr unsigned int outputHeight = 3; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(0.010765f); + inputTensorInfo.SetQuantizationOffset(7); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(0.010132f); + outputTensorInfo.SetQuantizationOffset(-18); + + auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({ + 24, 228, // 0.183005, 2.379065, + 105, 128, // 1.05497, 1.302565 + 230, 71 // 2.400595, 0.68896 + })); + + LayerTestResult<uint8_t, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({ + 0, 87, 173, 217, 217, // 0.18300501, 1.06142902, 1.93985295, 2.37906504, 2.37906504 + 86, 96, 106, 111, 111, // 1.05497003, 1.15400803, 1.25304604, 1.30256498, 1.30256498 + 219, 151, 84, 50, 50 // 2.40059495, 1.71594095, 1.03128707, 0.68896002, 0.68896002 + })); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult<float, 4> BatchNormTest(armnn::IWorkloadFactory& workloadFactory) +{ + auto ret = BatchNormTestImpl<float>(workloadFactory, 0.f, 0); + return ret; +} + +LayerTestResult<uint8_t, 4> BatchNormUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + auto ret = BatchNormTestImpl<uint8_t>(workloadFactory, 1.f/20.f, 50); + return ret; +} + +LayerTestResult<uint8_t, 4> ConstantUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return ConstantTestImpl<uint8_t>(workloadFactory, 2e-6f, 1); +} + +LayerTestResult<uint8_t, 1> Concatenation1dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation1dTestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 2> Concatenation2dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 2> Concatenation2dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim1TestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 2> Concatenation2dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim0DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 2> Concatenation2dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim1DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 3> Concatenation3dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 3> Concatenation3dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim1TestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 3> Concatenation3dDim2Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim2TestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 3> Concatenation3dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 3> Concatenation3dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim1DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<uint8_t, 3> Concatenation3dDim2DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim2DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1); +} + +LayerTestResult<float, 4> SimpleMaxPooling2dSize2x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding) +{ + return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<float>(workloadFactory, forceNoPadding); +} + +LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding) +{ + return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<uint8_t>(workloadFactory, forceNoPadding, 3.0f, -5); +} + +LayerTestResult<float, 4> SimpleMaxPooling2dSize3x3Stride2x4Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding) +{ + return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<float>(workloadFactory, forceNoPadding); +} + +LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding) +{ + return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<uint8_t>(workloadFactory, forceNoPadding, 0.1f, 128); +} + +LayerTestResult<float, 4> SimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleAveragePooling2dTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleAveragePooling2dTestCommon<uint8_t>(workloadFactory, 0.5, -1); +} + +LayerTestResult<float, 4> LargeTensorsAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return LargeTensorsAveragePooling2dTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> LargeTensorsAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return LargeTensorsAveragePooling2dTestCommon<uint8_t>(workloadFactory, 0.5, -1); +} + +LayerTestResult<float, 4> SimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleL2Pooling2dTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> SimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleL2Pooling2dTestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> L2Pooling2dSize3Stride1Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride1TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride1Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride1TestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> L2Pooling2dSize3Stride3Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride3TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride3Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride3TestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> L2Pooling2dSize3Stride4Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride4TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride4Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride4TestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> L2Pooling2dSize7Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize7TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> L2Pooling2dSize7Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize7TestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> L2Pooling2dSize9Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize9TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> L2Pooling2dSize9Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize9TestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> AsymmetricNonSquarePooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return AsymmetricNonSquarePooling2dTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> AsymmetricNonSquarePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return AsymmetricNonSquarePooling2dTestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> ComparePooling2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::PoolingAlgorithm poolingType) +{ + return ComparePooling2dTestCommon<float>(workloadFactory, refWorkloadFactory, poolingType); +} + +LayerTestResult<uint8_t, 4> ComparePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::PoolingAlgorithm poolingType) +{ + return ComparePooling2dTestCommon<uint8_t>(workloadFactory, refWorkloadFactory, poolingType, 0.1f, 128); +} + +LayerTestResult<float, 2> FullyConnectedLargeTest(armnn::IWorkloadFactory& workloadFactory, + bool transposeWeights) +{ + return FullyConnectedLargeTestCommon<float>(workloadFactory, transposeWeights); +} + +LayerTestResult<float, 4> IgnorePaddingSimpleMaxPooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleMaxPooling2dTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleMaxPooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleMaxPooling2dTestCommon<uint8_t>(workloadFactory, 1.0f, -5); +} + +LayerTestResult<float, 4> IgnorePaddingMaxPooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingMaxPooling2dSize3TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> IgnorePaddingMaxPooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingMaxPooling2dSize3TestCommon<uint8_t>(workloadFactory, 1.0f, -5); +} + +LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleAveragePooling2dTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleAveragePooling2dTestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTest(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test( + armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingAveragePooling2dSize3TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> IgnorePaddingAveragePooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingAveragePooling2dSize3TestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> IgnorePaddingSimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleL2Pooling2dTestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleL2Pooling2dTestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> IgnorePaddingL2Pooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingL2Pooling2dSize3TestCommon<float>(workloadFactory); +} + +LayerTestResult<uint8_t, 4> IgnorePaddingL2Pooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingL2Pooling2dSize3TestCommon<uint8_t>(workloadFactory); +} + +LayerTestResult<float, 4> SimplePermuteFloat32Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SimplePermuteFloat32TestCommon(workloadFactory); +}; + +LayerTestResult<uint8_t, 4> SimplePermuteUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SimplePermuteUint8TestCommon(workloadFactory); +}; diff --git a/src/armnn/backends/test/LayerTests.hpp b/src/armnn/backends/test/LayerTests.hpp new file mode 100644 index 0000000000..fc0c9c7b14 --- /dev/null +++ b/src/armnn/backends/test/LayerTests.hpp @@ -0,0 +1,305 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "armnn/ArmNN.hpp" +#include "armnn/Tensor.hpp" + +#include <boost/multi_array.hpp> +#include <boost/assert.hpp> +#include <array> + +// Layer callables + +namespace armnn +{ +class IWorkloadFactory; +} + +template <std::size_t n> +boost::array<unsigned int, n> GetTensorShapeAsArray(const armnn::TensorInfo& tensorInfo) +{ + BOOST_ASSERT_MSG(n == tensorInfo.GetNumDimensions(), + "Attempting to construct a shape array of mismatching size"); + + boost::array<unsigned int, n> shape; + for (unsigned int i = 0; i < n; i++) + { + shape[i] = tensorInfo.GetShape()[i]; + } + return shape; +} + +template <typename T, std::size_t n> +struct LayerTestResult +{ + LayerTestResult(const armnn::TensorInfo& outputInfo) + { + auto shape( GetTensorShapeAsArray<n>(outputInfo) ); + output.resize(shape); + outputExpected.resize(shape); + supported = true; + } + + boost::multi_array<T, n> output; + boost::multi_array<T, n> outputExpected; + bool supported; +}; + +LayerTestResult<float, 4> SimpleConvolution2d3x5Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult<float, 4> SimpleConvolution2d3x3Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult<float, 4> +Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory& workloadFactory); + + +LayerTestResult<float, 4> Convolution1dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); +LayerTestResult<uint8_t, 4> Convolution1dUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); + +LayerTestResult<float, 4> DepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); + +LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult<float, 4> SimpleMaxPooling2dSize2x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding); +LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding); +LayerTestResult<float, 4> SimpleMaxPooling2dSize3x3Stride2x4Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding); +LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding ); +LayerTestResult<float, 4> IgnorePaddingSimpleMaxPooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleMaxPooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> IgnorePaddingMaxPooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> IgnorePaddingMaxPooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> SimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test( + armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> IgnorePaddingAveragePooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> SimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> SimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> L2Pooling2dSize3Stride1Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride1Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> L2Pooling2dSize3Stride3Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride3Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> L2Pooling2dSize3Stride4Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride4Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> L2Pooling2dSize7Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> L2Pooling2dSize7Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> L2Pooling2dSize9Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> L2Pooling2dSize9Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> LargeTensorsAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> LargeTensorsAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> IgnorePaddingSimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> IgnorePaddingSimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> IgnorePaddingL2Pooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> IgnorePaddingL2Pooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> AsymmetricNonSquarePooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> AsymmetricNonSquarePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> ComparePooling2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::PoolingAlgorithm poolingType); +LayerTestResult<uint8_t, 4> ComparePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::PoolingAlgorithm poolingType); + +LayerTestResult<float, 4> ConstantLinearActivationTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> SimpleNormalizationAcrossTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> SimpleNormalizationWithinTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 2> SimpleSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, float beta); +LayerTestResult<uint8_t, 2> SimpleSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, float beta); + +LayerTestResult<float, 4> SimpleSigmoidTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> SimpleReshapeFloat32Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> SimpleReshapeUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> SimpleFloorTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 1> Concatenation1dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 2> Concatenation2dDim0Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 2> Concatenation2dDim1Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 2> Concatenation2dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 2> Concatenation2dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 3> Concatenation3dDim0Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 3> Concatenation3dDim1Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 3> Concatenation3dDim2Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 3> Concatenation3dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 3> Concatenation3dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 3> Concatenation3dDim2DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> SimpleSigmoidUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> CompareConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory); + +template<typename T> +LayerTestResult<T, 4> CompareDepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory); + +LayerTestResult<float, 4> CompareNormalizationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::NormalizationAlgorithmChannel normChannel, + armnn::NormalizationAlgorithmMethod normMethod); + +LayerTestResult<float, 2> CompareSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, float beta); + +LayerTestResult<float, 2> FullyConnectedFloat32Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled, + bool transposeWeights); + +std::vector<LayerTestResult<float, 3>> SplitterTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 3> CopyViaSplitterTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 3> MergerTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> AdditionTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> AdditionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> AdditionBroadcastTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> CompareAdditionTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory); + +LayerTestResult<float, 4> CompareActivationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::ActivationFunction f, + unsigned int batchSize); + +LayerTestResult<float, 4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory); + +LayerTestResult<float, 4> BatchNormTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> CompareBatchNormTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory); + +LayerTestResult<float, 4> BoundedReLuUpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> BoundedReLuUpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> BoundedReLuUint8UpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> CompareBoundedReLuTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + float upperBound, + float lowerBound); + +// Tests that the output should be identical to the input when the output dimensions match the input ones +LayerTestResult<float, 4> ResizeBilinearNopTest(armnn::IWorkloadFactory& workloadFactory); + +// Tests the behaviour of the resize bilinear operation when rescaling a 2x2 image into a 1x1 image +LayerTestResult<float, 4> SimpleResizeBilinearTest(armnn::IWorkloadFactory& workloadFactory); + +// Tests resize bilinear for minification of a square input matrix (also: input dimensions are a +// multiple of output dimensions) +LayerTestResult<float, 4> ResizeBilinearSqMinTest(armnn::IWorkloadFactory& workloadFactory); + +// Tests resize bilinear for minification (output dimensions smaller than input dimensions) +LayerTestResult<float, 4> ResizeBilinearMinTest(armnn::IWorkloadFactory& workloadFactory); + +// Tests resize bilinear for magnification (output dimensions bigger than input dimensions) +LayerTestResult<float, 4> ResizeBilinearMagTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> BatchNormTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 2> FakeQuantizationTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> L2Normalization1dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> L2Normalization2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> L2Normalization3dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<float, 4> L2Normalization4dTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<float, 4> ConstantTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> ConstantTestUint8(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> BoundedReLuUint8Test(armnn::IWorkloadFactory& workloadFactory, float upperBound); +LayerTestResult<uint8_t, 4> BoundedReLuUint8Test(armnn::IWorkloadFactory& workloadFactory, + float upperBound, + float lowerBound); + +LayerTestResult<uint8_t, 2> FullyConnectedUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); + +std::vector<LayerTestResult<uint8_t, 3>> SplitterUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 3> MergerUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> AdditionUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> AdditionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> AdditionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> CompareActivationUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::ActivationFunction f); + +LayerTestResult<uint8_t, 2> CompareSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + float beta); + +LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult<uint8_t, 4> ConstantLinearActivationUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> SimpleResizeBilinearUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> ResizeBilinearSqMinUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> ResizeBilinearMinUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> ResizeBilinearMagUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> BatchNormUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 4> ConstantUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult<uint8_t, 1> Concatenation1dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 2> Concatenation2dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 2> Concatenation2dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 2> Concatenation2dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 2> Concatenation2dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 3> Concatenation3dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 3> Concatenation3dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 3> Concatenation3dDim2Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 3> Concatenation3dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 3> Concatenation3dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 3> Concatenation3dDim2DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); + + +LayerTestResult<float, 2> FullyConnectedLargeTest(armnn::IWorkloadFactory& workloadFactory, + bool transposeWeights); +LayerTestResult<float, 4> SimplePermuteFloat32Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult<uint8_t, 4> SimplePermuteUint8Test(armnn::IWorkloadFactory& workloadFactory); + diff --git a/src/armnn/backends/test/MemCopyTests.cpp b/src/armnn/backends/test/MemCopyTests.cpp new file mode 100644 index 0000000000..8e4dae35f2 --- /dev/null +++ b/src/armnn/backends/test/MemCopyTests.cpp @@ -0,0 +1,156 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> +#include <boost/multi_array.hpp> + +#include "armnn/ArmNN.hpp" +#include "backends/RefWorkloadFactory.hpp" +#if ARMCOMPUTECL_ENABLED +#include "backends/ClWorkloadFactory.hpp" +#endif +#if ARMCOMPUTENEON_ENABLED +#include "backends/NeonWorkloadFactory.hpp" +#endif +#include "backends/CpuTensorHandle.hpp" +#include "test/TensorHelpers.hpp" + +#include "TensorCopyUtils.hpp" +#include "WorkloadTestUtils.hpp" + +BOOST_AUTO_TEST_SUITE(MemCopyTestSuite) + +void MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory, armnn::IWorkloadFactory& dstWorkloadFactory, + bool withSubtensors) +{ + const std::array<unsigned int, 4> shapeData = { 1u, 1u, 6u, 5u }; + const armnn::TensorShape tensorShape(4, shapeData.data()); + const armnn::TensorInfo tensorInfo(tensorShape, armnn::DataType::Float32); + boost::multi_array<float, 4> inputData = MakeTensor<float, 4>(tensorInfo, std::vector<float>( + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, + + 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, + + 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + }) + ); + + boost::multi_array<float, 4> outputData(shapeData); + + auto inputTensorHandle = srcWorkloadFactory.CreateTensorHandle(tensorInfo); + auto outputTensorHandle = dstWorkloadFactory.CreateTensorHandle(tensorInfo); + + AllocateAndCopyDataToITensorHandle(inputTensorHandle.get(), inputData.data()); + outputTensorHandle->Allocate(); + + armnn::MemCopyQueueDescriptor memCopyQueueDesc; + armnn::WorkloadInfo workloadInfo; + + const unsigned int origin[4] = {}; + + auto workloadInput = (withSubtensors && srcWorkloadFactory.SupportsSubTensors()) + ? srcWorkloadFactory.CreateSubTensorHandle(*inputTensorHandle, tensorShape, origin) + : std::move(inputTensorHandle); + auto workloadOutput = (withSubtensors && dstWorkloadFactory.SupportsSubTensors()) + ? dstWorkloadFactory.CreateSubTensorHandle(*outputTensorHandle, tensorShape, origin) + : std::move(outputTensorHandle); + + AddInputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadInput.get()); + AddOutputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadOutput.get()); + + dstWorkloadFactory.CreateMemCopy(memCopyQueueDesc, workloadInfo)->Execute(); + + CopyDataFromITensorHandle(outputData.data(), workloadOutput.get()); + + BOOST_TEST(CompareTensors(inputData, outputData)); +} + +template <typename SrcWorkloadFactory, typename DstWorkloadFactory> +void MemCopyTest(bool withSubtensors) +{ + SrcWorkloadFactory srcWorkloadFactory; + DstWorkloadFactory dstWorkloadFactory; + MemCopyTest(srcWorkloadFactory, dstWorkloadFactory, withSubtensors); +} + +#if ARMCOMPUTECL_ENABLED + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpu) +{ + MemCopyTest<armnn::RefWorkloadFactory, armnn::ClWorkloadFactory>(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpu) +{ + MemCopyTest<armnn::ClWorkloadFactory, armnn::RefWorkloadFactory>(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpuWithSubtensors) +{ + MemCopyTest<armnn::RefWorkloadFactory, armnn::ClWorkloadFactory>(true); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpuWithSubtensors) +{ + MemCopyTest<armnn::ClWorkloadFactory, armnn::RefWorkloadFactory>(true); +} + +#endif // ARMCOMPUTECL_ENABLED + +#if ARMCOMPUTENEON_ENABLED + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeon) +{ + MemCopyTest<armnn::RefWorkloadFactory, armnn::NeonWorkloadFactory>(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpu) +{ + MemCopyTest<armnn::NeonWorkloadFactory, armnn::RefWorkloadFactory>(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeonWithSubtensors) +{ + MemCopyTest<armnn::RefWorkloadFactory, armnn::NeonWorkloadFactory>(true); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpuWithSubtensors) +{ + MemCopyTest<armnn::NeonWorkloadFactory, armnn::RefWorkloadFactory>(true); +} + +#endif // ARMCOMPUTENEON_ENABLED + +#if ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpu) +{ + MemCopyTest<armnn::NeonWorkloadFactory, armnn::ClWorkloadFactory>(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeon) +{ + MemCopyTest<armnn::ClWorkloadFactory, armnn::NeonWorkloadFactory>(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpuWithSubtensors) +{ + MemCopyTest<armnn::NeonWorkloadFactory, armnn::ClWorkloadFactory>(true); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeonWithSubtensors) +{ + MemCopyTest<armnn::ClWorkloadFactory, armnn::NeonWorkloadFactory>(true); +} + +#endif + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/NormTestImpl.hpp b/src/armnn/backends/test/NormTestImpl.hpp new file mode 100644 index 0000000000..1f6aadc9df --- /dev/null +++ b/src/armnn/backends/test/NormTestImpl.hpp @@ -0,0 +1,238 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "armnn/Exceptions.hpp" +#include "armnn/LayerSupport.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +LayerTestResult<float,4> SimpleNormalizationTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::NormalizationAlgorithmChannel normChannel, + armnn::NormalizationAlgorithmMethod normMethod) +{ + const unsigned int inputHeight = 2; + const unsigned int inputWidth = 2; + const unsigned int inputChannels = 1; + const unsigned int inputNum = 2; + + unsigned int outputHeight = inputHeight; + unsigned int outputWidth = inputWidth; + unsigned int outputChannels = inputChannels; + unsigned int outputNum = inputNum; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; + + auto inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + auto outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + LayerTestResult<float,4> ret(outputTensorInfo); + + auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({ + // Batch #0 + 1.0f, 2.0f, + 3.0f, 4.0f, + // Batch #1 + 5.0f, 6.0f, + 7.0f, 8.0f + })); + + float alpha = 1.f; + float beta = 1.f; + float kappa = 1.f; + uint32_t normSize = 3; + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::NormalizationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Parameters.m_NormChannelType = normChannel; + data.m_Parameters.m_NormMethodType = normMethod; + data.m_Parameters.m_NormSize = normSize; + data.m_Parameters.m_Alpha = alpha; + data.m_Parameters.m_Beta = beta; + data.m_Parameters.m_K = kappa; + + armnn::PassthroughCpuTensorHandle refHandle(outputTensorInfo, &ret.outputExpected[0][0][0][0]); + armnn::NormalizationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, &refHandle); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateNormalization(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + switch (normMethod) + { + case armnn::NormalizationAlgorithmMethod::LocalBrightness: + { + switch (normChannel) + { + case armnn::NormalizationAlgorithmChannel::Within: + { + // When normalising within channels, the 3x3 kernel covers the entire 2x2 input at every index. + // Therefore, all output values should equal the inputs, but divided by: + // pow((kappa + (accumulatedScale * alpha)), beta) + // ...where accumulatedScale is the sum of every element squared + float divisor[inputNum]; + for(int i = 0; i < boost::numeric_cast<int>(inputNum); i++) + { + float accumulatedScale = input[i][0][0][0]*input[i][0][0][0] + + input[i][0][0][1]*input[i][0][0][1] + + input[i][0][1][0]*input[i][0][1][0] + + input[i][0][1][1]*input[i][0][1][1]; + divisor[i] = powf((kappa + accumulatedScale * alpha), beta); + } + ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, + std::vector<float>({input[0][0][0][0]/divisor[0], + input[0][0][0][1]/divisor[0], + input[0][0][1][0]/divisor[0], + input[0][0][1][1]/divisor[0], + input[1][0][0][0]/divisor[1], + input[1][0][0][1]/divisor[1], + input[1][0][1][0]/divisor[1], + input[1][0][1][1]/divisor[1]})); + break; + } + case armnn::NormalizationAlgorithmChannel::Across: + { + // When normalising across channels, all output values should equal the inputs, but multiplied by: + // pow((kappa + (accumulatedScale * alpha)), -beta) + // ...where accumulatedScale is the sum of the inputs for adjacent channels for this element squared + // ...where adjacent channels means within half the normSize for the channel + // The test data has only one channel, so this is simplified below. + std::vector<float> outputVector; + for (int n = 0; n < boost::numeric_cast<int>(inputNum); ++n) + { + for (int h = 0; h < boost::numeric_cast<int>(inputHeight); ++h) + { + for (int w = 0; w < boost::numeric_cast<int>(inputWidth); ++w) + { + float accumulatedScale = input[n][0][h][w]*input[n][0][h][w]; + float scale = powf((kappa + accumulatedScale * alpha), -beta); + outputVector.push_back(input[n][0][h][w] * scale); + } + } + } + ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputVector); + break; + } + default: + { + throw armnn::UnimplementedException("Unsupported normalisation channel type, " + "only Across and Within are supported"); + } + } + break; + } + case armnn::NormalizationAlgorithmMethod::LocalContrast: // NOTE: intentional fallthrough + default: + { + throw armnn::UnimplementedException("Unsupported normalisation method type, " + "only LocalBrightness is supported"); + } + } + + return ret; +} + +LayerTestResult<float,4> CompareNormalizationTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::NormalizationAlgorithmChannel normChannel, + armnn::NormalizationAlgorithmMethod normMethod) +{ + constexpr unsigned int inputNum = 5; + constexpr unsigned int inputChannels = 3; + constexpr unsigned int inputHeight = 32; + constexpr unsigned int inputWidth = 24; + + constexpr unsigned int outputNum = inputNum; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputWidth = inputWidth; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + LayerTestResult<float,4> ret(outputTensorInfo); + + auto input = MakeRandomTensor<float, 4>(inputTensorInfo, 111234); + + constexpr float alpha = 1.f; + constexpr float beta = 1.f; + constexpr float kappa = 1.f; + constexpr uint32_t normSize = 5; + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::NormalizationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Parameters.m_NormChannelType = normChannel; + data.m_Parameters.m_NormMethodType = normMethod; + data.m_Parameters.m_NormSize = normSize; + data.m_Parameters.m_Alpha = alpha; + data.m_Parameters.m_Beta = beta; + data.m_Parameters.m_K = kappa; + + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + armnn::NormalizationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + // Don't execute if Normalization is not supported for the method and channel types, as an exception will be raised. + armnn::Compute compute = workloadFactory.GetCompute(); + const size_t reasonIfUnsupportedMaxLen = 255; + char reasonIfUnsupported[reasonIfUnsupportedMaxLen+1]; + ret.supported = armnn::IsNormalizationSupported(compute, inputTensorInfo, outputTensorInfo, data.m_Parameters, + reasonIfUnsupported, reasonIfUnsupportedMaxLen); + if (!ret.supported) + { + return ret; + } + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateNormalization(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateNormalization(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + diff --git a/src/armnn/backends/test/PermuteTestImpl.hpp b/src/armnn/backends/test/PermuteTestImpl.hpp new file mode 100644 index 0000000000..4eafa1a211 --- /dev/null +++ b/src/armnn/backends/test/PermuteTestImpl.hpp @@ -0,0 +1,121 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <armnn/TypesUtils.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +template<typename T> +LayerTestResult<T, 4> SimplePermuteTestImpl( + armnn::IWorkloadFactory& workloadFactory, + armnn::PermuteDescriptor descriptor, + armnn::TensorInfo inputTensorInfo, + armnn::TensorInfo outputTensorInfo, + const std::vector<T>& inputData, + const std::vector<T>& outputExpectedData) +{ + auto input = MakeTensor<T, 4>(inputTensorInfo, inputData); + + LayerTestResult<T, 4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputExpectedData); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::PermuteQueueDescriptor data; + data.m_Parameters = descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePermute(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult<float, 4> SimplePermuteFloat32TestCommon(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 1, 2, 2, 2 }; + unsigned int outputShape[] = { 1, 2, 2, 2 }; + + armnn::PermuteDescriptor descriptor; + descriptor.m_DimMappings = {0U, 3U, 1U, 2U}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + std::vector<float> input = std::vector<float>( + { + 1.0f, 2.0f, + 3.0f, 4.0f, + + 5.0f, 6.0f, + 7.0f, 8.0f + }); + + std::vector<float> outputExpected = std::vector<float>( + { + 1.0f, 5.0f, 2.0f, 6.0f, + 3.0f, 7.0f, 4.0f, 8.0f + }); + + return SimplePermuteTestImpl<float>(workloadFactory, descriptor, inputTensorInfo, + outputTensorInfo, input, outputExpected); +} + +LayerTestResult<uint8_t, 4> SimplePermuteUint8TestCommon(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 1, 2, 2, 2 }; + unsigned int outputShape[] = { 1, 2, 2, 2 }; + + armnn::PermuteDescriptor descriptor; + descriptor.m_DimMappings = {0U, 3U, 1U, 2U}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(1.0f); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(1.0f); + + std::vector<uint8_t> input = std::vector<uint8_t>( + { + 1, 2, + 3, 4, + + 5, 6, + 7, 8 + }); + + std::vector<uint8_t> outputExpected = std::vector<uint8_t>( + { + 1, 5, 2, 6, + 3, 7, 4, 8 + }); + + return SimplePermuteTestImpl<uint8_t>(workloadFactory, descriptor, inputTensorInfo, + outputTensorInfo, input, outputExpected); +} diff --git a/src/armnn/backends/test/Pooling2dTestImpl.hpp b/src/armnn/backends/test/Pooling2dTestImpl.hpp new file mode 100644 index 0000000000..fc84ddb2ca --- /dev/null +++ b/src/armnn/backends/test/Pooling2dTestImpl.hpp @@ -0,0 +1,1039 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <armnn/TypesUtils.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +#include <algorithm> + +template<typename T> +LayerTestResult<T, 4> SimplePooling2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + armnn::Pooling2dDescriptor descriptor, + float qScale, + int32_t qOffset, + const boost::multi_array<T, 4>& input, + const boost::multi_array<T, 4>& outputExpected) +{ + unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]); + unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]); + unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]); + unsigned int inputBatchSize = boost::numeric_cast<unsigned int>(input.shape()[0]); + + unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]); + unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]); + unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]); + unsigned int outputBatchSize = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]); + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + LayerTestResult<T, 4> result(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::Pooling2dQueueDescriptor queueDescriptor; + queueDescriptor.m_Parameters = descriptor; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + // Don't execute if Pooling is not supported, as an exception will be raised. + armnn::Compute compute = workloadFactory.GetCompute(); + const size_t reasonIfUnsupportedMaxLen = 255; + char reasonIfUnsupported[reasonIfUnsupportedMaxLen+1]; + result.supported = armnn::IsPooling2dSupported(compute, inputTensorInfo, outputTensorInfo, + queueDescriptor.m_Parameters, + reasonIfUnsupported, reasonIfUnsupportedMaxLen); + if (!result.supported) + { + return result; + } + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePooling2d(queueDescriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + result.outputExpected = outputExpected; + + return result; +} + +// +// Tests max pooling with the following parameters: +// +// Pooling size: 3x3 +// Stride: (2,4) +// input size: 8x13 +// channels: 2 +// batch size: 2 +// +template<typename T> +LayerTestResult<T, 4> SimpleMaxPooling2dSize3x3Stride2x4TestCommon(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = 2; + descriptor.m_StrideY = 4; + // forceNoPadding is mainly used for compatibility with ARM Compute. + // As of 16/05/2017, it errors if padX or padY are equal to or greater than the pool size. + descriptor.m_PadLeft = descriptor.m_PadRight = forceNoPadding ? 0 : 3; + descriptor.m_PadTop = descriptor.m_PadBottom = 0; + descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + unsigned int inputWidth = 8; + unsigned int inputHeight = 13; + unsigned int outputWidth = + (inputWidth + descriptor.m_PadLeft + descriptor.m_PadRight + descriptor.m_StrideX - descriptor.m_PoolWidth) / + descriptor.m_StrideX; + unsigned int outputHeight = + (inputHeight + descriptor.m_PadTop + descriptor.m_PadBottom + descriptor.m_StrideY - descriptor.m_PoolHeight) / + descriptor.m_StrideY; + unsigned int channels = 2; + unsigned int batchSize = 2; + + armnn::TensorInfo inputTensorInfo({ batchSize, channels, inputHeight, inputWidth }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ batchSize, channels, outputHeight, outputWidth }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + std::vector<float> singleChannelData({ + 0.0f, 4.0f, 8.0f, 1.0f, 6.0f, 4.0f, 5.0f, 8.0f, + 1.0f, 1.0f, 6.0f, 0.0f, 3.0f, 7.0f, 4.0f, 7.0f, + 8.0f, 5.0f, 0.0f, 0.0f, 8.0f, 3.0f, 4.0f, 3.0f, + 8.0f, 2.0f, 5.0f, 4.0f, 1.0f, 9.0f, 2.0f, 0.0f, + 5.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 7.0f, 2.0f, + 1.0f, 2.0f, 6.0f, 2.0f, 7.0f, 9.0f, 5.0f, 2.0f, + 9.0f, 7.0f, 3.0f, 1.0f, 3.0f, 4.0f, 8.0f, 3.0f, + 1.0f, 0.0f, 0.0f, 5.0f, 5.0f, 4.0f, 2.0f, 0.0f, + 6.0f, 4.0f, 3.0f, 6.0f, 9.0f, 5.0f, 5.0f, 6.0f, + 8.0f, 7.0f, 9.0f, 6.0f, 1.0f, 4.0f, 1.0f, 9.0f, + 7.0f, 1.0f, 9.0f, 2.0f, 9.0f, 9.0f, 8.0f, 1.0f, + 4.0f, 4.0f, 5.0f, 9.0f, 2.0f, 6.0f, 6.0f, 4.0f, + 3.0f, 5.0f, 4.0f, 0.0f, 1.0f, 5.0f, 9.0f, 7.0f, + }); + + // Construct input data + std::vector<float> inputData; + auto negator = [](float f) { return -f; }; + + // First image (two channels where the second channel is the negative of the first one) + inputData.insert(inputData.end(), singleChannelData.begin(), singleChannelData.end()); + std::transform(singleChannelData.begin(), singleChannelData.end(), std::back_inserter(inputData), negator); + + // Second image (same as first image) + inputData.insert(inputData.end(), singleChannelData.begin(), singleChannelData.end()); + std::transform(singleChannelData.begin(), singleChannelData.end(), std::back_inserter(inputData), negator); + + auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, inputData)); + + // these were calculated manually + auto shape(GetTensorShapeAsArray<4>(outputTensorInfo)); + boost::multi_array<T, 4> outputExpected(shape); + if (forceNoPadding) + { + outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 8.0f, 8.0f, 8.0f, + 9.0f, 7.0f, 9.0f, + 9.0f, 9.0f, 9.0f, + + 0.0f, 0.0f, -3.0f, + -1.0f, 0.0f, 0.0f, + -1.0f, -1.0f, -1.0f, + + 8.0f, 8.0f, 8.0f, + 9.0f, 7.0f, 9.0f, + 9.0f, 9.0f, 9.0f, + + 0.0f, 0.0f, -3.0f, + -1.0f, 0.0f, 0.0f, + -1.0f, -1.0f, -1.0f + })); + } + else + { + outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 0.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, + 0.0f, 9.0f, 7.0f, 9.0f, 9.0f, 3.0f, + 0.0f, 8.0f, 9.0f, 9.0f, 9.0f, 9.0f, + + 0.0f, 0.0f, 0.0f, 0.0f,-3.0f, 0.0f, + 0.0f,-1.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f,-1.0f,-1.0f,-1.0f,-1.0f, 0.0f, + + 0.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, + 0.0f, 9.0f, 7.0f, 9.0f, 9.0f, 3.0f, + 0.0f, 8.0f, 9.0f, 9.0f, 9.0f, 9.0f, + + 0.0f, 0.0f, 0.0f, 0.0f,-3.0f, 0.0f, + 0.0f,-1.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f,-1.0f,-1.0f,-1.0f,-1.0f, 0.0f + })); + } + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> SimpleAveragePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 2.5f, 4.0f, + 1.0f, 2.5f, 4.0f, + 1.0f, 2.5f, 4.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> LargeTensorsAveragePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 100; + descriptor.m_StrideX = descriptor.m_StrideY = 5; + descriptor.m_PadLeft = 50; + descriptor.m_PadRight = 50; + descriptor.m_PadTop = 50; + descriptor.m_PadBottom = 50; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 5, 3, 52, 60 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 5, 3, 11, 13 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + std::vector<T> inputVec; + + for (unsigned int i = 0 ; i < inputTensorInfo.GetShape().GetNumElements(); ++i) + { + inputVec.push_back(1); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, inputVec); + + std::vector<T> outputVec; + + for (unsigned int i = 0 ; i < outputTensorInfo.GetShape().GetNumElements(); ++i) + { + outputVec.push_back(1); + } + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputVec); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> SimpleL2Pooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 7.0f, 1.0f, 7.0f, + 1.0f, 7.0f, 1.0f, 7.0f, + 1.0f, 7.0f, 1.0f, 7.0f, + 1.0f, 7.0f, 1.0f, 7.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType<T>()); + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 5.0f, 5.0f, + 5.0f, 5.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> L2Pooling2dSize3Stride1TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 2.0f, 1.0f, 5.0f, 2.0f, + 1.0f, 2.0f, 2.0f, 1.0f, + 5.0f, 4.0f, 1.0f, 5.0f, + 2.0f, 1.0f, 5.0f, 2.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType<T>()); + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 3.0f, 3.0f, + 3.0f, 3.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> L2Pooling2dSize3Stride3TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 3; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9 }, armnn::GetDataType<T>()); + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType<T>()); + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 3.0f, 3.0f, 3.0f, + 3.0f, 3.0f, 3.0f, + 3.0f, 3.0f, 3.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> L2Pooling2dSize3Stride4TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 4; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 7, 7 }, armnn::GetDataType<T>()); + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 2.0f, 1.0f, 5.0f, 0.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 0.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 0.0f, 5.0f, 4.0f, 1.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 2.0f, 1.0f, 5.0f, 0.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 0.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 0.0f, 5.0f, 4.0f, 1.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType<T>()); + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 3.0f, 3.0f, + 3.0f, 3.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> L2Pooling2dSize7TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 7; + descriptor.m_StrideX = descriptor.m_StrideY = 7; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 7, 7 }, armnn::GetDataType<T>()); + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 0.0f, 2.0f, 0.0f, 3.0f, 0.0f, 4.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 5.0f, 0.0f, 6.0f, 0.0f, 7.0f, 0.0f, + 8.0f, 0.0f, 9.0f, 0.0f, 10.0f, 0.0f, 5.0f, + 0.0f, 5.0f, 0.0f, 2.0f, 0.0f, 1.0f, 1.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 1, 1 }, armnn::GetDataType<T>()); + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 3.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> L2Pooling2dSize9TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 9; + descriptor.m_StrideX = descriptor.m_StrideY = 9; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9 }, armnn::GetDataType<T>()); + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 1, 1 }, armnn::GetDataType<T>()); + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 3.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> AsymmetricNonSquarePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::TensorInfo inputTensorInfo({ 1, 1, 1, 3 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType<T>()); + + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; + descriptor.m_PoolWidth = 2; + descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = 2; + descriptor.m_StrideY = 1; + descriptor.m_PadLeft = 2; + descriptor.m_PadRight = 0; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 2; + descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + // Construct input data + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 3.0f, 4.0f, + })); + + // these were calculated manually + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 0.0f, 3.0f, 0.0f, 3.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> ComparePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::PoolingAlgorithm poolingType, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + const unsigned int inputWidth = 16; + const unsigned int inputHeight = 32; + const unsigned int channelCount = 2; + const unsigned int batchSize = 5; + + const unsigned int poolSize = 3; + const unsigned int strideX = 2; + const unsigned int strideY = 4; + const unsigned int padX = 0; + const unsigned int padY = 0; + + const unsigned int outputWidth = (inputWidth + 2 * padX + strideX - poolSize) / strideX; + const unsigned int outputHeight = (inputHeight + 2 * padY + strideY - poolSize) / strideY; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { batchSize, channelCount, inputHeight, inputWidth }; + unsigned int outputShape[] = { batchSize, channelCount, outputHeight, outputWidth }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>()); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + boost::multi_array<T, 4> input = MakeRandomTensor<T, 4>(inputTensorInfo, 81715); + + LayerTestResult<T, 4> comparisonResult(outputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::Pooling2dQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Parameters.m_PoolType = poolingType; + data.m_Parameters.m_PoolWidth = poolSize; + data.m_Parameters.m_PoolHeight = poolSize; + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padX; + data.m_Parameters.m_PadRight = padX; + data.m_Parameters.m_PadTop = padY; + data.m_Parameters.m_PadBottom = padY; + data.m_Parameters.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; + + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + // Don't execute if Pooling is not supported, as an exception will be raised. + armnn::Compute compute = workloadFactory.GetCompute(); + const size_t reasonIfUnsupportedMaxLen = 255; + char reasonIfUnsupported[reasonIfUnsupportedMaxLen+1]; + comparisonResult.supported = armnn::IsPooling2dSupported(compute, inputTensorInfo, outputTensorInfo, + data.m_Parameters, + reasonIfUnsupported, reasonIfUnsupportedMaxLen); + if (!comparisonResult.supported) + { + return comparisonResult; + } + + armnn::Pooling2dQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePooling2d(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreatePooling2d(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&comparisonResult.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&comparisonResult.outputExpected[0][0][0][0], outputHandleRef.get()); + + return comparisonResult; +} + +// +// Tests max pooling with the following parameters: +// +// Pooling size: 2x2 +// Stride: (2,2) +// input size: 4x4 +// channels: 1 +// batch size: 1 +// +template<typename T> +LayerTestResult<T, 4> SimpleMaxPooling2dSize2x2Stride2x2TestCommon(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = 2; + descriptor.m_StrideY = 2; + descriptor.m_PadLeft = descriptor.m_PadRight = forceNoPadding ? 0 : 3; + descriptor.m_PadTop = descriptor.m_PadBottom = 0; + descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + unsigned int inputWidth = 4; + unsigned int inputHeight = 4; + unsigned int outputWidth = + (inputWidth + descriptor.m_PadLeft + descriptor.m_PadRight + descriptor.m_StrideX - descriptor.m_PoolWidth) / + descriptor.m_StrideX; + unsigned int outputHeight = + (inputHeight + descriptor.m_PadTop + descriptor.m_PadBottom + descriptor.m_StrideY - descriptor.m_PoolHeight) / + descriptor.m_StrideY; + unsigned int channels = 1; + unsigned int batchSize = 1; + + std::vector<float> inputData = { + 510.0f, 222.0f, 780.0f, 654.0f, + 141.0f, 276.0f, 15.0f, 546.0f, + 303.0f, 618.0f, 582.0f, 339.0f, + 438.0f, 564.0f, 573.0f, 402.0f + }; + + // Note that left and right edges will be 0.f, due to the 2x2 max pooling only accessing zeros here + std::vector<float> expectedOutputDataWithPadding = { + 0.0f, 510.0f, 780.0f, 654.0f, 0.0f, + 0.0f, 438.0f, 618.0f, 402.0f, 0.0f + }; + + std::vector<float> expectedOutputDataNoPadding = { + 510.0f, 780.0f, + 618.0f, 582.0f + }; + + armnn::TensorInfo inputTensorInfo({ batchSize, channels, inputHeight, inputWidth }, armnn::GetDataType<T>()); + + // Scale and offset should match input - we're just calculating maximum values. + armnn::TensorInfo outputTensorInfo({ batchSize, channels, outputHeight, outputWidth }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, inputData)); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + forceNoPadding ? QuantizedVector<T>(qScale, qOffset, expectedOutputDataNoPadding) : + QuantizedVector<T>(qScale, qOffset, expectedOutputDataWithPadding)); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> IgnorePaddingSimpleMaxPooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + -1.0f, -2.0f, 3.0f, 4.0f, + -1.0f, -2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, -3.0f, -4.0f, + 1.0f, 2.0f, -3.0f, -4.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + -1.0f, 3.0f, 4.0f, + 1.0f, 3.0f, 4.0f, + 1.0f, 2.0f, -4.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> IgnorePaddingMaxPooling2dSize3TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 1; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + -1.0f, -2.0f, 3.0f, 4.0f, + -1.0f, -2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, -3.0f, -4.0f, + 1.0f, 2.0f, -3.0f, -4.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + -1.0f, 3.0f, 4.0f, 4.0f, + 2.0f, 3.0f, 4.0f, 4.0f, + 2.0f, 3.0f, 4.0f, 4.0f, + 2.0f, 2.0f, 2.0f, -3.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> IgnorePaddingSimpleAveragePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 12.0f, 20.0f, 32.0f, 40.0f, + 12.0f, 20.0f, 32.0f, 40.0f, + 12.0f, 20.0f, 32.0f, 40.0f, + 12.0f, 20.0f, 32.0f, 40.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 3.0f, 13.0f, 10.0f, + 6.0f, 26.0f, 20.0f, + 3.0f, 13.0f, 10.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PadLeft = 0; + descriptor.m_PadRight = 0; + descriptor.m_PadTop = 0; + descriptor.m_PadBottom = 0; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Ceiling; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4}, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 2.0f, 3.5f, + 2.0f, 3.5f + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> IgnorePaddingAveragePooling2dSize3TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 1; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 9.0f, 27.0f, 18.0f, 36.0f, + 18.0f, 9.0f, 18.0f, 9.0f, + 27.0f, 18.0f, 9.0f, 27.0f, + 9.0f, 27.0f, 9.0f, 18.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 7.0f, 11.0f, 13.0f, 9.0f, + 12.0f, 17.0f, 19.0f, 13.0f, + 12.0f, 16.0f, 16.0f, 10.0f, + 9.0f, 11.0f, 12.0f, 7.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> IgnorePaddingSimpleL2Pooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 2.0f, 4.0f, 8.0f, 16.0f, + 4.0f, 2.0f, 2.0f, 4.0f, + 8.0f, 2.0f, 4.0f, 2.0f, + 16.0f, 2.0f, 2.0f, 8.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 4.4721f, 8.0f, + 4.4721f, 2.6457f, 2.236f, + 8.0f, 1.4142f, 4.0f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template<typename T> +LayerTestResult<T, 4> IgnorePaddingL2Pooling2dSize3TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 1; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor<T, 4>(inputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + })); + + auto outputExpected = MakeTensor<T, 4>(outputTensorInfo, + QuantizedVector<T>(qScale, qOffset, { + 1.0540f, 1.7638f, 2.5385f, 2.3570f, + 1.2909f, 2.1602f, 3.1091f, 2.8867f, + 1.2909f, 2.1602f, 3.1091f, 2.8867f, + 1.0540f, 1.7638f, 2.5385f, 2.3570f, + })); + + return SimplePooling2dTestImpl<T>(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} diff --git a/src/armnn/backends/test/QuantizeHelper.hpp b/src/armnn/backends/test/QuantizeHelper.hpp new file mode 100644 index 0000000000..bfaf9342f0 --- /dev/null +++ b/src/armnn/backends/test/QuantizeHelper.hpp @@ -0,0 +1,91 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/TypesUtils.hpp> + +#include <initializer_list> +#include <iterator> +#include <vector> +#include <boost/core/ignore_unused.hpp> + +template<typename T, bool DoQuantize=true> +struct SelectiveQuantizer +{ + static T Quantize(float value, float scale, int32_t offset) + { + return armnn::Quantize<T>(value, scale, offset); + } + + static float Dequantize(T value, float scale, int32_t offset) + { + return armnn::Dequantize(value, scale, offset); + } +}; + +template<typename T> +struct SelectiveQuantizer<T, false> +{ + static T Quantize(float value, float scale, int32_t offset) + { + boost::ignore_unused(scale, offset); + return value; + } + + static float Dequantize(T value, float scale, int32_t offset) + { + boost::ignore_unused(scale, offset); + return value; + } +}; + +template<typename T> +T SelectiveQuantize(float value, float scale, int32_t offset) +{ + return SelectiveQuantizer<T, armnn::IsQuantizedType<T>()>::Quantize(value, scale, offset); +}; + +template<typename T> +float SelectiveDequantize(T value, float scale, int32_t offset) +{ + return SelectiveQuantizer<T, armnn::IsQuantizedType<T>()>::Dequantize(value, scale, offset); +}; + +template<typename ItType> +struct IsFloatingPointIterator +{ + static constexpr bool value=std::is_floating_point<typename std::iterator_traits<ItType>::value_type>::value; +}; + +template <typename T, typename FloatIt, +typename std::enable_if<IsFloatingPointIterator<FloatIt>::value, int>::type=0 // Make sure valid fp iterator +> +std::vector<T> QuantizedVector(float qScale, int32_t qOffset, FloatIt first, FloatIt last) +{ + std::vector<T> quantized; + quantized.reserve(boost::numeric_cast<size_t>(std::distance(first, last))); + + for (auto it = first; it != last; ++it) + { + auto f = *it; + T q =SelectiveQuantize<T>(f, qScale, qOffset); + quantized.push_back(q); + } + + return quantized; +} + +template<typename T> +std::vector<T> QuantizedVector(float qScale, int32_t qOffset, const std::vector<float>& array) +{ + return QuantizedVector<T>(qScale, qOffset, array.begin(), array.end()); +} + +template<typename T> +std::vector<T> QuantizedVector(float qScale, int32_t qOffset, std::initializer_list<float> array) +{ + return QuantizedVector<T>(qScale, qOffset, array.begin(), array.end()); +} diff --git a/src/armnn/backends/test/Reference.cpp b/src/armnn/backends/test/Reference.cpp new file mode 100644 index 0000000000..87d82f1781 --- /dev/null +++ b/src/armnn/backends/test/Reference.cpp @@ -0,0 +1,231 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> + +#include "LayerTests.hpp" +#include "test/TensorHelpers.hpp" + +#include "backends/RefWorkloadFactory.hpp" + +#include "test/UnitTests.hpp" + +BOOST_AUTO_TEST_SUITE(Compute_Reference) +using FactoryType = armnn::RefWorkloadFactory; + +// ============================================================================ +// UNIT tests + +// Convolution +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x5, SimpleConvolution2d3x5Test, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x5Uint8, SimpleConvolution2d3x5Uint8Test, true) + +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false) +ARMNN_AUTO_TEST_CASE(UnbiasedConvolutionUint8, SimpleConvolution2d3x5Uint8Test, false) + +ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution1dUint8, Convolution1dUint8Test, true) + +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x3, SimpleConvolution2d3x3Test, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x3Uint8, SimpleConvolution2d3x3Uint8Test, true) + +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquare, SimpleConvolution2d3x3Test, false) + +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPaddingLargerThanHalfKernelSize, + Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPadding, Convolution2dAsymmetricPaddingTest) + +// Depthwise Convolution +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2d, DepthwiseConvolution2dTest, true) +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dUint8, DepthwiseConvolution2dUint8Test, true) + +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2d, DepthwiseConvolution2dTest, false) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dUint8, DepthwiseConvolution2dUint8Test, false) + +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true) +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true) + +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false) + +// Pooling +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2, SimpleMaxPooling2dSize2x2Stride2x2Test, false) +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2Uint8, SimpleMaxPooling2dSize2x2Stride2x2Uint8Test, false) + +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3x3Stride2x4Test, false) +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, false) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2d, IgnorePaddingSimpleMaxPooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2dUint8, IgnorePaddingSimpleMaxPooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3, IgnorePaddingMaxPooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3Uint8, IgnorePaddingMaxPooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2d, IgnorePaddingSimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8, + IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3, IgnorePaddingL2Pooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddingL2Pooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test) + +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2d, SimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2dUint8, SimpleL2Pooling2dUint8Test) + +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7, L2Pooling2dSize7Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7Uint8, L2Pooling2dSize7Uint8Test) + +ARMNN_AUTO_TEST_CASE(AsymmNonSquarePooling2d, AsymmetricNonSquarePooling2dTest) +ARMNN_AUTO_TEST_CASE(AsymmNonSquarePooling2dUint8, AsymmetricNonSquarePooling2dUint8Test) + +// Activation +ARMNN_AUTO_TEST_CASE(ConstantLinearActivation, ConstantLinearActivationTest) +ARMNN_AUTO_TEST_CASE(ConstantLinearActivationUint8, ConstantLinearActivationUint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleNormalizationAcross, SimpleNormalizationAcrossTest) +ARMNN_AUTO_TEST_CASE(SimpleNormalizationWithin, SimpleNormalizationWithinTest) + +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1, SimpleSoftmaxTest, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2, SimpleSoftmaxTest, 2.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1Uint8, SimpleSoftmaxUint8Test, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f) + +ARMNN_AUTO_TEST_CASE(SimpleSigmoid, SimpleSigmoidTest) +ARMNN_AUTO_TEST_CASE(SimpleSigmoidUint8, SimpleSigmoidUint8Test) + +ARMNN_AUTO_TEST_CASE(ReLu1, BoundedReLuUpperAndLowerBoundTest) +ARMNN_AUTO_TEST_CASE(ReLu6, BoundedReLuUpperBoundOnlyTest) +ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest) +ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest) + +// Fully Conected +ARMNN_AUTO_TEST_CASE(SimpleFullyConnected, FullyConnectedFloat32Test, false, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedUint8, FullyConnectedUint8Test, false) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, true, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedBiasedUint8, FullyConnectedUint8Test, true) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithTranspose, FullyConnectedFloat32Test, false, true) + +ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) + +// Splitter +BOOST_AUTO_TEST_CASE(SimpleSplitter) +{ + armnn::RefWorkloadFactory workloadFactory; + auto testResult = SplitterTest(workloadFactory); + for (unsigned int i = 0; i < testResult.size(); ++i) + { + BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); + } +} + +BOOST_AUTO_TEST_CASE(SplitterUint8) +{ + armnn::RefWorkloadFactory workloadFactory; + auto testResult = SplitterUint8Test(workloadFactory); + for (unsigned int i = 0; i < testResult.size(); ++i) + { + BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); + } +} + +ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest) +ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) + +// Merger +ARMNN_AUTO_TEST_CASE(SimpleMerger, MergerTest) +ARMNN_AUTO_TEST_CASE(MergerUint8, MergerUint8Test) + +// Add +ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) +ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) +ARMNN_AUTO_TEST_CASE(AddBroadcast, AdditionBroadcastTest) + +ARMNN_AUTO_TEST_CASE(AdditionUint8, AdditionUint8Test) +ARMNN_AUTO_TEST_CASE(AddBroadcastUint8, AdditionBroadcastUint8Test) +ARMNN_AUTO_TEST_CASE(AddBroadcast1ElementUint8, AdditionBroadcast1ElementUint8Test) + +// Mul +ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) +ARMNN_AUTO_TEST_CASE(MultiplicationUint8, MultiplicationUint8Test) + +// Batch Norm +ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) +ARMNN_AUTO_TEST_CASE(BatchNormUint8, BatchNormUint8Test) + +// Resize Bilinear +ARMNN_AUTO_TEST_CASE(SimpleResizeBilinear, SimpleResizeBilinearTest) +ARMNN_AUTO_TEST_CASE(SimpleResizeBilinearUint8, SimpleResizeBilinearUint8Test) +ARMNN_AUTO_TEST_CASE(ResizeBilinearNop, ResizeBilinearNopTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearNopUint8, ResizeBilinearNopUint8Test) +ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMin, ResizeBilinearSqMinTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMinUint8, ResizeBilinearSqMinUint8Test) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMin, ResizeBilinearMinTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMinUint8, ResizeBilinearMinUint8Test) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMag, ResizeBilinearMagTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMagUint8, ResizeBilinearMagUint8Test) + +// Fake Quantization +ARMNN_AUTO_TEST_CASE(FakeQuantization, FakeQuantizationTest) + +// L2 Noramlization +ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization2d, L2Normalization2dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization3d, L2Normalization3dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization4d, L2Normalization4dTest) + +// Constant +ARMNN_AUTO_TEST_CASE(Constant, ConstantTest) +ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantUint8Test) + +// Concat +ARMNN_AUTO_TEST_CASE(Concatenation1d, Concatenation1dTest) +ARMNN_AUTO_TEST_CASE(Concatenation1dUint8, Concatenation1dUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0, Concatenation2dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0Uint8, Concatenation2dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1, Concatenation2dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1Uint8, Concatenation2dDim1Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDims, Concatenation2dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDimsUint8, Concatenation2dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDims, Concatenation2dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDimsUint8, Concatenation2dDim1DiffInputDimsUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0, Concatenation3dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0Uint8, Concatenation3dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1, Concatenation3dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1Uint8, Concatenation3dDim1Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2, Concatenation3dDim2Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2Uint8, Concatenation3dDim2Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDims, Concatenation3dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDimsUint8, Concatenation3dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDims, Concatenation3dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDimsUint8, Concatenation3dDim1DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDims, Concatenation3dDim2DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDimsUint8, Concatenation3dDim2DiffInputDimsUint8Test) + +// Floor +ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest) + +// Reshape +ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeFloat32Test) +ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test) + +// Permute +ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test) +ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test) + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/ReshapeTestImpl.hpp b/src/armnn/backends/test/ReshapeTestImpl.hpp new file mode 100644 index 0000000000..1a31aa3bce --- /dev/null +++ b/src/armnn/backends/test/ReshapeTestImpl.hpp @@ -0,0 +1,177 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <armnn/TypesUtils.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +template<typename T> +LayerTestResult<T, 4> SimpleReshapeTestImpl( + armnn::IWorkloadFactory& workloadFactory, + armnn::TensorInfo inputTensorInfo, + armnn::TensorInfo outputTensorInfo, + const std::vector<T>& inputData, + const std::vector<T>& outputExpectedData) +{ + auto input = MakeTensor<T, 4>(inputTensorInfo, inputData); + + LayerTestResult<T, 4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputExpectedData); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ReshapeQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReshape(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult<float, 4> SimpleReshapeFloat32Test(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 2, 2, 3, 3 }; + unsigned int outputShape[] = { 2, 2, 9, 1 }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + std::vector<float> input = std::vector<float>( + { + 0.0f, 1.0f, 2.0f, + 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, + + 9.0f, 10.0f, 11.0f, + 12.0f, 13.0f, 14.0f, + 15.0f, 16.0f, 17.0f, + + 18.0f, 19.0f, 20.0f, + 21.0f, 22.0f, 23.0f, + 24.0f, 25.0f, 26.0f, + + 27.0f, 28.0f, 29.0f, + 30.0f, 31.0f, 32.0f, + 33.0f, 34.0f, 35.0f, + }); + + std::vector<float> outputExpected = std::vector<float>( + { + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, + + 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, + + 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, + + 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, + }); + + return SimpleReshapeTestImpl<float>(workloadFactory, inputTensorInfo, outputTensorInfo, input, outputExpected); +} + +LayerTestResult<float, 4> SimpleFloorTest(armnn::IWorkloadFactory& workloadFactory) +{ + const armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo(inputTensorInfo); + + auto input = MakeTensor<float, 4>(inputTensorInfo, + { -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f, + 1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f }); + + LayerTestResult<float, 4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, + { -38.0f, -16.0f, -9.0f, -2.0f, -2.0f, -2.0f, -1.0f, -1.0f, 0.0f, + 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 2.0f, 8.0f, 15.0f, 37.0f }); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::FloorQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFloor(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult<uint8_t, 4> SimpleReshapeUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 2, 2, 3, 3 }; + unsigned int outputShape[] = { 2, 2, 9, 1 }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(1.0f); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(1.0f); + + std::vector<uint8_t> input = std::vector<uint8_t>( + { + 0, 1, 2, + 3, 4, 5, + 6, 7, 8, + + 9, 10, 11, + 12, 13, 14, + 15, 16, 17, + + 18, 19, 20, + 21, 22, 23, + 24, 25, 26, + + 27, 28, 29, + 30, 31, 32, + 33, 34, 35, + }); + + std::vector<uint8_t> outputExpected = std::vector<uint8_t>( + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, + + 9, 10, 11, 12, 13, 14, 15, 16, 17, + + 18, 19, 20, 21, 22, 23, 24, 25, 26, + + 27, 28, 29, 30, 31, 32, 33, 34, 35, + }); + + return SimpleReshapeTestImpl<uint8_t>(workloadFactory, inputTensorInfo, outputTensorInfo, input, outputExpected); +} diff --git a/src/armnn/backends/test/SoftmaxTestImpl.hpp b/src/armnn/backends/test/SoftmaxTestImpl.hpp new file mode 100644 index 0000000000..5aa74f9618 --- /dev/null +++ b/src/armnn/backends/test/SoftmaxTestImpl.hpp @@ -0,0 +1,150 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <armnn/TypesUtils.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +#include <algorithm> + +template<typename T> +LayerTestResult<T, 2> SimpleSoftmaxTestImpl(armnn::IWorkloadFactory& workloadFactory, float beta) +{ + using std::exp; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 2, 4 }; + + inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType<T>()); + float qScale = 1.f / 256.f; + int qOffset = 0; + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + + outputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType<T>()); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + + LayerTestResult<T, 2> ret(outputTensorInfo); + + // Each row is independently softmax'd + auto input = MakeTensor<T, 2>(inputTensorInfo, std::vector<T>( + QuantizedVector<T>(qScale, 0, { + 0.f, 1.f, 0.f, 0.f, + .5f, 0.f, 0.f, 0.f, + }))); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::SoftmaxQueueDescriptor data; + data.m_Parameters.m_Beta = beta; + + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSoftmax(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + + float x0[4] = { exp((0.f - 1.0f) * beta), exp((1.0f - 1.0f) * beta), + exp((0.0f - 1.0f) * beta), exp((0.0f - 1.0f) * beta) }; + float sum0 = x0[0] + x0[1] + x0[2] + x0[3]; + float x1[4] = { exp((0.5f - 0.5f) * beta), exp((0.0f - 0.5f) * beta), + exp((0.0f - 0.5f) * beta), exp((0.0f - 0.5f) * beta) }; + float sum1 = x1[0] + x1[1] + x1[2] + x1[3]; + + ret.outputExpected = MakeTensor<T, 2>(outputTensorInfo, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + x0[0] / sum0, x0[1] / sum0, x0[2] / sum0, x0[3] / sum0, + x1[0] / sum1, x1[1] / sum1, x1[2] / sum1, x1[3] / sum1 + }))); + + return ret; +} + +template<typename T> +LayerTestResult<T, 2> CompareSoftmaxTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + float beta) +{ + + const int batchSize = 20; + const int channels = 30; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { batchSize, channels }; + + inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType<T>()); + outputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType<T>()); + float qScale = 1.f / 256.f; + int qOffset = 0; + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + + + LayerTestResult<T, 2> ret(outputTensorInfo); + auto input = MakeRandomTensor<T, 2>(inputTensorInfo, 0xF00D, 0.0f, 1.0f); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::SoftmaxQueueDescriptor data; + data.m_Parameters.m_Beta = beta; + + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + + armnn::SoftmaxQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSoftmax(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateSoftmax(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0], outputHandleRef.get()); + + return ret; +}
\ No newline at end of file diff --git a/src/armnn/backends/test/SplitterTestImpl.hpp b/src/armnn/backends/test/SplitterTestImpl.hpp new file mode 100644 index 0000000000..b72046e4bc --- /dev/null +++ b/src/armnn/backends/test/SplitterTestImpl.hpp @@ -0,0 +1,328 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/ArmNN.hpp> +#include <armnn/Tensor.hpp> +#include <backends/WorkloadInfo.hpp> + +#include "test/TensorHelpers.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +#include "backends/test/QuantizeHelper.hpp" + + +template<typename T> +std::vector<LayerTestResult<T,3>> SplitterTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 0.0f, + int32_t qOffset = 0) +{ + unsigned int inputWidth = 5; + unsigned int inputHeight = 6; + unsigned int inputChannels = 3; + + unsigned int outputWidth1 = 2; + unsigned int outputHeight1 = 2; + unsigned int outputChannels1 = 3; + + unsigned int outputWidth2 = 2; + unsigned int outputHeight2 = 4; + unsigned int outputChannels2 = 3; + + unsigned int outputWidth3 = 3; + unsigned int outputHeight3 = 6; + unsigned int outputChannels3 = 2; + + unsigned int outputWidth4 = 3; + unsigned int outputHeight4 = 6; + unsigned int outputChannels4 = 1; + + + // Define the tensor descriptors + armnn::TensorInfo inputTensorInfo({ inputChannels, inputHeight, inputWidth }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo1({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo2({ outputChannels2, outputHeight2, outputWidth2 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo3({ outputChannels3, outputHeight3, outputWidth3 }, armnn::GetDataType<T>()); + armnn::TensorInfo outputTensorInfo4({ outputChannels4, outputHeight4, outputWidth4 }, armnn::GetDataType<T>()); + // note that output 5 should match output 2 + armnn::TensorInfo outputTensorInfo5({ outputChannels2, outputHeight2, outputWidth2 }, armnn::GetDataType<T>()); + + // Set quantization parameters if the requested type is a quantized type. + // The quantization doesn't really matter as the splitter operator doesn't dequantize/quantize + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo1.SetQuantizationScale(qScale); + outputTensorInfo1.SetQuantizationOffset(qOffset); + outputTensorInfo2.SetQuantizationScale(qScale); + outputTensorInfo2.SetQuantizationOffset(qOffset); + outputTensorInfo3.SetQuantizationScale(qScale); + outputTensorInfo3.SetQuantizationOffset(qOffset); + outputTensorInfo4.SetQuantizationScale(qScale); + outputTensorInfo4.SetQuantizationOffset(qOffset); + outputTensorInfo5.SetQuantizationScale(qScale); + outputTensorInfo5.SetQuantizationOffset(qOffset); + } + + LayerTestResult<T,3> ret1(outputTensorInfo1); + LayerTestResult<T,3> ret2(outputTensorInfo2); + LayerTestResult<T,3> ret3(outputTensorInfo3); + LayerTestResult<T,3> ret4(outputTensorInfo4); + LayerTestResult<T,3> ret5(outputTensorInfo5); + + auto input = MakeTensor<T, 3>(inputTensorInfo, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, + 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, + 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + + 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, + 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, + 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, + 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, + + 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, + 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, + 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, + 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, + 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, + 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, + }) + )); + + + ret1.outputExpected = MakeTensor<T, 3>(outputTensorInfo1, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 1.0f, 2.0f, + 6.0f, 7.0f, + + 31.0f, 32.0f, + 36.0f, 37.0f, + + 61.0f, 62.0f, + 66.0f, 67.0f, + }) + )); + + ret2.outputExpected = MakeTensor<T, 3>(outputTensorInfo2, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 11.0f, 12.0f, + 16.0f, 17.0f, + 21.0f, 22.0f, + 26.0f, 27.0f, + + 41.0f, 42.0f, + 46.0f, 47.0f, + 51.0f, 52.0f, + 56.0f, 57.0f, + + 71.0f, 72.0f, + 76.0f, 77.0f, + 81.0f, 82.0f, + 86.0f, 87.0f, + }) + )); + + ret3.outputExpected = MakeTensor<T, 3>(outputTensorInfo3, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 3.0f, 4.0f, 5.0f, + 8.0f, 9.0f, 10.0f, + 13.0f, 14.0f, 15.0f, + 18.0f, 19.0f, 20.0f, + 23.0f, 24.0f, 25.0f, + 28.0f, 29.0f, 30.0f, + + 33.0f, 34.0f, 35.0f, + 38.0f, 39.0f, 40.0f, + 43.0f, 44.0f, 45.0f, + 48.0f, 49.0f, 50.0f, + 53.0f, 54.0f, 55.0f, + 58.0f, 59.0f, 60.0f, + }) + )); + + ret4.outputExpected = MakeTensor<T, 3>(outputTensorInfo4, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 63.0f, 64.0f, 65.0f, + 68.0f, 69.0f, 70.0f, + 73.0f, 74.0f, 75.0f, + 78.0f, 79.0f, 80.0f, + 83.0f, 84.0f, 85.0f, + 88.0f, 89.0f, 90.0f, + }) + )); + + + ret5.outputExpected = MakeTensor<T, 3>(outputTensorInfo5, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 11.0f, 12.0f, + 16.0f, 17.0f, + 21.0f, 22.0f, + 26.0f, 27.0f, + + 41.0f, 42.0f, + 46.0f, 47.0f, + 51.0f, 52.0f, + 56.0f, 57.0f, + + 71.0f, 72.0f, + 76.0f, 77.0f, + 81.0f, 82.0f, + 86.0f, 87.0f, + }) + )); + + std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //extent of the window is defined by size of output[0] + armnn::SplitterQueueDescriptor::ViewOrigin window1(wOrigin1); + + std::vector<unsigned int> wOrigin2 = {0, 2, 0}; //extent of the window is defined by size of output[1] + armnn::SplitterQueueDescriptor::ViewOrigin window2(wOrigin2); + + std::vector<unsigned int> wOrigin3 = {0, 0, 2}; //extent of the window is defined by size of output[2] + armnn::SplitterQueueDescriptor::ViewOrigin window3(wOrigin3); + + std::vector<unsigned int> wOrigin4 = {2, 0, 2}; //extent of the window is defined by size of output[3] + armnn::SplitterQueueDescriptor::ViewOrigin window4(wOrigin4); + + bool subTensorsSupported = workloadFactory.SupportsSubTensors(); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + + std::unique_ptr<armnn::ITensorHandle> outputHandle1 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo1.GetShape(), wOrigin1.data()) : + workloadFactory.CreateTensorHandle(outputTensorInfo1); + + std::unique_ptr<armnn::ITensorHandle> outputHandle2 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo2.GetShape(), wOrigin2.data()) : + workloadFactory.CreateTensorHandle(outputTensorInfo2); + + std::unique_ptr<armnn::ITensorHandle> outputHandle3 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo3.GetShape(), wOrigin3.data()) : + workloadFactory.CreateTensorHandle(outputTensorInfo3); + + std::unique_ptr<armnn::ITensorHandle> outputHandle4 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo4.GetShape(), wOrigin4.data()) : + workloadFactory.CreateTensorHandle(outputTensorInfo4); + + std::unique_ptr<armnn::ITensorHandle> outputHandle5 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo5.GetShape(), wOrigin2.data()) : + workloadFactory.CreateTensorHandle(outputTensorInfo5); + + armnn::SplitterQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo1, outputHandle1.get()); + AddOutputToWorkload(data, info, outputTensorInfo2, outputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo3, outputHandle3.get()); + AddOutputToWorkload(data, info, outputTensorInfo4, outputHandle4.get()); + AddOutputToWorkload(data, info, outputTensorInfo5, outputHandle5.get()); + + data.m_ViewOrigins.push_back(window1); + data.m_ViewOrigins.push_back(window2); + data.m_ViewOrigins.push_back(window3); + data.m_ViewOrigins.push_back(window4); + //add window2 again (to have an overlapping split) + data.m_ViewOrigins.push_back(window2); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSplitter(data, info); + + inputHandle->Allocate(); + outputHandle1->Allocate(); + outputHandle2->Allocate(); + outputHandle3->Allocate(); + outputHandle4->Allocate(); + outputHandle5->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret1.output[0][0][0], outputHandle1.get()); + CopyDataFromITensorHandle(&ret2.output[0][0][0], outputHandle2.get()); + CopyDataFromITensorHandle(&ret3.output[0][0][0], outputHandle3.get()); + CopyDataFromITensorHandle(&ret4.output[0][0][0], outputHandle4.get()); + CopyDataFromITensorHandle(&ret5.output[0][0][0], outputHandle5.get()); + + std::vector<LayerTestResult<T,3>> ret = {ret1, ret2, ret3, ret4, ret5}; + + return ret; +} + + +template <typename T> +LayerTestResult<T, 3> CopyViaSplitterTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset) +{ + const armnn::TensorInfo tensorInfo({ 3, 6, 5 }, armnn::GetDataType<T>()); + auto input = MakeTensor<T, 3>(tensorInfo, QuantizedVector<T>(qScale, qOffset, + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, + 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, + 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + + 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, + 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, + 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, + 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, + + 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, + 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, + 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, + 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, + 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, + 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, + })); + + std::vector<unsigned int> origin = { 0, 0, 0 }; + armnn::SplitterQueueDescriptor::ViewOrigin window(origin); + + const bool subTensorsSupported = workloadFactory.SupportsSubTensors(); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(tensorInfo); + + std::unique_ptr<armnn::ITensorHandle> outputHandle = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*inputHandle, tensorInfo.GetShape(), origin.data()) : + workloadFactory.CreateTensorHandle(tensorInfo); + + armnn::SplitterQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, tensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, tensorInfo, outputHandle.get()); + + data.m_ViewOrigins.push_back(window); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSplitter(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0]); + + workload->Execute(); + + LayerTestResult<T, 3> ret(tensorInfo); + CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); + ret.outputExpected = input; + + return ret; +} diff --git a/src/armnn/backends/test/TensorCopyUtils.cpp b/src/armnn/backends/test/TensorCopyUtils.cpp new file mode 100644 index 0000000000..e15c12a76f --- /dev/null +++ b/src/armnn/backends/test/TensorCopyUtils.cpp @@ -0,0 +1,152 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include <algorithm> +#include <cstring> +#include <boost/cast.hpp> + +#include "TensorCopyUtils.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include "backends/ClTensorHandle.hpp" +#endif + +#if ARMCOMPUTENEON_ENABLED +#include "backends/NeonTensorHandle.hpp" +#endif + +#if ARMCOMPUTECLENABLED || ARMCOMPUTENEON_ENABLED +#include "backends/ArmComputeTensorUtils.hpp" +#endif + +#include "backends/CpuTensorHandle.hpp" + +void CopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem) +{ + switch (tensorHandle->GetType()) + { + case armnn::ITensorHandle::Cpu: + { + auto handle = boost::polymorphic_downcast<armnn::ScopedCpuTensorHandle*>(tensorHandle); + memcpy(handle->GetTensor<void>(), mem, handle->GetTensorInfo().GetNumBytes()); + break; + } +#ifdef ARMCOMPUTECL_ENABLED + case armnn::ITensorHandle::CL: + { + using armnn::armcomputetensorutils::CopyArmComputeITensorData; + auto handle = boost::polymorphic_downcast<armnn::IClTensorHandle*>(tensorHandle); + handle->Map(true); + switch(handle->GetDataType()) + { + case arm_compute::DataType::F32: + CopyArmComputeITensorData(static_cast<const float*>(mem), handle->GetTensor()); + break; + case arm_compute::DataType::QASYMM8: + CopyArmComputeITensorData(static_cast<const uint8_t*>(mem), handle->GetTensor()); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + handle->UnMap(); + break; + } +#endif +#if ARMCOMPUTENEON_ENABLED + case armnn::ITensorHandle::Neon: + { + using armnn::armcomputetensorutils::CopyArmComputeITensorData; + auto handle = boost::polymorphic_downcast<armnn::INeonTensorHandle*>(tensorHandle); + switch (handle->GetDataType()) + { + case arm_compute::DataType::F32: + CopyArmComputeITensorData(static_cast<const float*>(mem), handle->GetTensor()); + break; + case arm_compute::DataType::QASYMM8: + CopyArmComputeITensorData(static_cast<const uint8_t*>(mem), handle->GetTensor()); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + break; + } +#endif + default: + { + throw armnn::UnimplementedException(); + } + } +} + +void CopyDataFromITensorHandle(void* mem, const armnn::ITensorHandle* tensorHandle) +{ + switch (tensorHandle->GetType()) + { + case armnn::ITensorHandle::Cpu: + { + auto handle = boost::polymorphic_downcast<const armnn::ScopedCpuTensorHandle*>(tensorHandle); + memcpy(mem, handle->GetTensor<void>(), handle->GetTensorInfo().GetNumBytes()); + break; + } +#ifdef ARMCOMPUTECL_ENABLED + case armnn::ITensorHandle::CL: + { + using armnn::armcomputetensorutils::CopyArmComputeITensorData; + auto handle = boost::polymorphic_downcast<const armnn::IClTensorHandle*>(tensorHandle); + const_cast<armnn::IClTensorHandle*>(handle)->Map(true); + switch(handle->GetDataType()) + { + case arm_compute::DataType::F32: + CopyArmComputeITensorData(handle->GetTensor(), static_cast<float*>(mem)); + break; + case arm_compute::DataType::QASYMM8: + CopyArmComputeITensorData(handle->GetTensor(), static_cast<uint8_t*>(mem)); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + const_cast<armnn::IClTensorHandle*>(handle)->UnMap(); + break; + } +#endif +#if ARMCOMPUTENEON_ENABLED + case armnn::ITensorHandle::Neon: + { + using armnn::armcomputetensorutils::CopyArmComputeITensorData; + auto handle = boost::polymorphic_downcast<const armnn::INeonTensorHandle*>(tensorHandle); + switch (handle->GetDataType()) + { + case arm_compute::DataType::F32: + CopyArmComputeITensorData(handle->GetTensor(), static_cast<float*>(mem)); + break; + case arm_compute::DataType::QASYMM8: + CopyArmComputeITensorData(handle->GetTensor(), static_cast<uint8_t*>(mem)); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + break; + } +#endif + default: + { + throw armnn::UnimplementedException(); + } + } +} + +void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem) +{ + tensorHandle->Allocate(); + CopyDataToITensorHandle(tensorHandle, mem); +} diff --git a/src/armnn/backends/test/TensorCopyUtils.hpp b/src/armnn/backends/test/TensorCopyUtils.hpp new file mode 100644 index 0000000000..360eec61df --- /dev/null +++ b/src/armnn/backends/test/TensorCopyUtils.hpp @@ -0,0 +1,14 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include "armnn/Tensor.hpp" +#include "backends/ITensorHandle.hpp" + +void CopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem); + +void CopyDataFromITensorHandle(void* mem, const armnn::ITensorHandle* tensorHandle); + +void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem);
\ No newline at end of file diff --git a/src/armnn/backends/test/WorkloadDataValidation.cpp b/src/armnn/backends/test/WorkloadDataValidation.cpp new file mode 100644 index 0000000000..c3a9d40116 --- /dev/null +++ b/src/armnn/backends/test/WorkloadDataValidation.cpp @@ -0,0 +1,450 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#include <boost/test/unit_test.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/Workload.hpp> +#include <backends/RefWorkloads.hpp> +#include <backends/RefWorkloadFactory.hpp> + +#include <armnn/Exceptions.hpp> + +#include "WorkloadTestUtils.hpp" + +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(WorkloadInfoValidation) + + + +BOOST_AUTO_TEST_CASE(QueueDescriptor_Validate_WrongNumOfInputsOutputs) +{ + InputQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + //invalid argument exception is expected, because no inputs and no outputs were defined + BOOST_CHECK_THROW(RefWorkloadFactory().CreateInput(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(RefPooling2dFloat32Workload_Validate_WrongDimTensor) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = {2, 3, 4}; // <- invalid - input tensor has to be 4D + unsigned int outputShape[] = {2, 3, 4, 5}; + + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + inputTensorInfo = armnn::TensorInfo(3, inputShape, armnn::DataType::Float32); + + Pooling2dQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + + // invalid argument exception is expected, input tensor has to be 4D + BOOST_CHECK_THROW(RefPooling2dFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(SoftmaxQueueDescriptor_Validate_WrongInputHeight) +{ + unsigned int inputHeight = 1; + unsigned int inputWidth = 1; + unsigned int inputChannels = 4; + unsigned int inputNum = 2; + + unsigned int outputChannels = inputChannels; + unsigned int outputHeight = inputHeight + 1; //makes data invalid - Softmax expects height and width to be 1 + unsigned int outputWidth = inputWidth; + unsigned int outputNum = inputNum; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + SoftmaxQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + //invalid argument exception is expected, because height != 1 + BOOST_CHECK_THROW(RefSoftmaxFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(FullyConnectedQueueDescriptor_Validate_RequiredDataMissing) +{ + unsigned int inputWidth = 1; + unsigned int inputHeight = 1; + unsigned int inputChannels = 5; + unsigned int inputNum = 2; + + unsigned int outputWidth = 1; + unsigned int outputHeight = 1; + unsigned int outputChannels = 3; + unsigned int outputNum = 2; + + // Define the tensor descriptors + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo weightsDesc; + armnn::TensorInfo biasesDesc; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; + unsigned int weightsShape[] = { 1, 1, inputChannels, outputChannels }; + unsigned int biasShape[] = { 1, outputChannels, outputHeight, outputWidth }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + weightsDesc = armnn::TensorInfo(4, weightsShape, armnn::DataType::Float32); + biasesDesc = armnn::TensorInfo(4, biasShape, armnn::DataType::Float32); + + FullyConnectedQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + ScopedCpuTensorHandle weightTensor(weightsDesc); + ScopedCpuTensorHandle biasTensor(biasesDesc); + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + invalidData.m_Weight = &weightTensor; + invalidData.m_Bias = &biasTensor; + invalidData.m_Parameters.m_BiasEnabled = true; + invalidData.m_Parameters.m_TransposeWeightMatrix = false; + + + //invalid argument exception is expected, because not all required fields have been provided + //in particular inputsData[0], outputsData[0] and weightsData can not be null + BOOST_CHECK_THROW(RefFullyConnectedFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + + +BOOST_AUTO_TEST_CASE(NormalizationQueueDescriptor_Validate_WrongInputHeight) +{ + constexpr unsigned int inputNum = 5; + constexpr unsigned int inputHeight = 32; + constexpr unsigned int inputWidth = 24; + constexpr unsigned int inputChannels = 3; + + constexpr unsigned int outputNum = inputNum; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputHeight = inputHeight + 1; //makes data invalid - normalization requires + //input and output to have the same dimensions + constexpr unsigned int outputWidth = inputWidth; + + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + + armnn::NormalizationAlgorithmMethod normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness; + armnn::NormalizationAlgorithmChannel normChannel = armnn::NormalizationAlgorithmChannel::Across; + float alpha = 1.f; + float beta = 1.f; + float kappa = 1.f; + uint32_t normSize = 5; + + NormalizationQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + invalidData.m_Parameters.m_NormChannelType = normChannel; + invalidData.m_Parameters.m_NormMethodType = normMethod; + invalidData.m_Parameters.m_NormSize = normSize; + invalidData.m_Parameters.m_Alpha = alpha; + invalidData.m_Parameters.m_Beta = beta; + invalidData.m_Parameters.m_K = kappa; + + //invalid argument exception is expected, because input height != output height + BOOST_CHECK_THROW(RefNormalizationFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(SplitterQueueDescriptor_Validate_WrongWindow) +{ + constexpr unsigned int inputNum = 1; + constexpr unsigned int inputHeight = 32; + constexpr unsigned int inputWidth = 24; + constexpr unsigned int inputChannels = 3; + + constexpr unsigned int outputNum = inputNum; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputHeight = 18; + constexpr unsigned int outputWidth = inputWidth; + + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + SplitterQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + // invalid since it has only 3 dimensions while the input tensor is 4d + std::vector<unsigned int> wOrigin = {0, 0, 0}; + armnn::SplitterQueueDescriptor::ViewOrigin window(wOrigin); + invalidData.m_ViewOrigins.push_back(window); + + BOOST_TEST_INFO("Invalid argument exception is expected, because split window dimensionality does not " + "match input."); + BOOST_CHECK_THROW(RefSplitterFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + + // invalid since window extends past the boundary of input tensor + std::vector<unsigned int> wOrigin3 = {0, 0, 15, 0}; + armnn::SplitterQueueDescriptor::ViewOrigin window3(wOrigin3); + invalidData.m_ViewOrigins[0] = window3; + BOOST_TEST_INFO("Invalid argument exception is expected (wOrigin3[2]+ outputHeight > inputHeight"); + BOOST_CHECK_THROW(RefSplitterFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + + + std::vector<unsigned int> wOrigin4 = {0, 0, 0, 0}; + armnn::SplitterQueueDescriptor::ViewOrigin window4(wOrigin4); + invalidData.m_ViewOrigins[0] = window4; + + std::vector<unsigned int> wOrigin5 = {1, 16, 20, 2}; + armnn::SplitterQueueDescriptor::ViewOrigin window5(wOrigin4); + invalidData.m_ViewOrigins.push_back(window5); + + BOOST_TEST_INFO("Invalid exception due to number of split windows not matching number of outputs."); + BOOST_CHECK_THROW(RefSplitterFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + + +BOOST_AUTO_TEST_CASE(MergerQueueDescriptor_Validate_WrongWindow) +{ + constexpr unsigned int inputNum = 1; + constexpr unsigned int inputChannels = 3; + constexpr unsigned int inputHeight = 32; + constexpr unsigned int inputWidth = 24; + + constexpr unsigned int outputNum = 1; + constexpr unsigned int outputChannels = 3; + constexpr unsigned int outputHeight = 32; + constexpr unsigned int outputWidth = 24; + + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + MergerQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + // invalid since it has only 3 dimensions while the input tensor is 4d + std::vector<unsigned int> wOrigin = {0, 0, 0}; + armnn::MergerQueueDescriptor::ViewOrigin window(wOrigin); + invalidData.m_ViewOrigins.push_back(window); + + BOOST_TEST_INFO("Invalid argument exception is expected, because merge window dimensionality does not " + "match input."); + BOOST_CHECK_THROW(RefMergerFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + + // invalid since window extends past the boundary of output tensor + std::vector<unsigned int> wOrigin3 = {0, 0, 15, 0}; + armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3); + invalidData.m_ViewOrigins[0] = window3; + BOOST_TEST_INFO("Invalid argument exception is expected (wOrigin3[2]+ inputHeight > outputHeight"); + BOOST_CHECK_THROW(RefMergerFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + + + std::vector<unsigned int> wOrigin4 = {0, 0, 0, 0}; + armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4); + invalidData.m_ViewOrigins[0] = window4; + + std::vector<unsigned int> wOrigin5 = {1, 16, 20, 2}; + armnn::MergerQueueDescriptor::ViewOrigin window5(wOrigin4); + invalidData.m_ViewOrigins.push_back(window5); + + BOOST_TEST_INFO("Invalid exception due to number of merge windows not matching number of inputs."); + BOOST_CHECK_THROW(RefMergerFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(AdditionQueueDescriptor_Validate_InputNumbers) +{ + armnn::TensorInfo input1TensorInfo; + armnn::TensorInfo input2TensorInfo; + armnn::TensorInfo input3TensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape[] = {1, 1, 1, 1}; + + input1TensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + input2TensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + input3TensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + + AdditionQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + // too few inputs + BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + + AddInputToWorkload(invalidData, invalidInfo, input2TensorInfo, nullptr); + + // correct + BOOST_CHECK_NO_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo)); + + AddInputToWorkload(invalidData, invalidInfo, input3TensorInfo, nullptr); + + // too many inputs + BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(AdditionQueueDescriptor_Validate_InputShapes) +{ + armnn::TensorInfo input1TensorInfo; + armnn::TensorInfo input2TensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape1[] = {1, 1, 2, 1}; + unsigned int shape2[] = {1, 1, 3, 2}; + + // Incompatible shapes even with broadcasting + { + input1TensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); + input2TensorInfo = armnn::TensorInfo(4, shape2, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); + + AdditionQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input2TensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + } + + // Output size not compatible with input sizes + { + input1TensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); + input2TensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape2, armnn::DataType::Float32); + + AdditionQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input2TensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + // output differs + BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + } +} + +BOOST_AUTO_TEST_CASE(MultiplicationQueueDescriptor_Validate_InputTensorDimensionMismatch) +{ + armnn::TensorInfo input0TensorInfo; + armnn::TensorInfo input1TensorInfo; + armnn::TensorInfo outputTensorInfo; + + constexpr unsigned int input0Shape[] = { 2, 2, 4, 4 }; + constexpr std::size_t dimensionCount = std::extent<decltype(input0Shape)>::value; + + // Check dimension consistency for input tensors + for (unsigned int dimIndex = 0; dimIndex < dimensionCount; ++dimIndex) + { + unsigned int input1Shape[dimensionCount]; + for (unsigned int i = 0; i < dimensionCount; ++i) + { + input1Shape[i] = input0Shape[i]; + } + + ++input1Shape[dimIndex]; + + input0TensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); + input1TensorInfo = armnn::TensorInfo(dimensionCount, input1Shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); + + MultiplicationQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input0TensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); + + BOOST_CHECK_THROW(RefMultiplicationFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + } + + // Check dimension consistency for input and output tensors + for (unsigned int dimIndex = 0; dimIndex < dimensionCount; ++dimIndex) + { + unsigned int outputShape[dimensionCount]; + for (unsigned int i = 0; i < dimensionCount; ++i) + { + outputShape[i] = input0Shape[i]; + } + + ++outputShape[dimIndex]; + + input0TensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); + input1TensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(dimensionCount, outputShape, armnn::DataType::Float32); + + MultiplicationQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input0TensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); + + BOOST_CHECK_THROW(RefMultiplicationFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + } +} + +BOOST_AUTO_TEST_CASE(ReshapeQueueDescriptor_Validate_MismatchingNumElements) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + // The input and output shapes should have the same number of elements, but these don't + unsigned int inputShape[] = { 1, 1, 2, 3 }; + unsigned int outputShape[] = { 1, 1, 1, 2 }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + ReshapeQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + // InvalidArgumentException is expected, because the number of elements don't match + BOOST_CHECK_THROW(RefReshapeFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/WorkloadTestUtils.hpp b/src/armnn/backends/test/WorkloadTestUtils.hpp new file mode 100644 index 0000000000..bac958f57c --- /dev/null +++ b/src/armnn/backends/test/WorkloadTestUtils.hpp @@ -0,0 +1,55 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include <armnn/Tensor.hpp> +#include <backends/WorkloadInfo.hpp> + +namespace armnn +{ +class ITensorHandle; +} + +template <typename QueueDescriptor> +void AddInputToWorkload(QueueDescriptor& descriptor, + armnn::WorkloadInfo& info, + const armnn::TensorInfo& tensorInfo, + armnn::ITensorHandle* tensorHandle) +{ + descriptor.m_Inputs.push_back(tensorHandle); + info.m_InputTensorInfos.push_back(tensorInfo); +} + +template <typename QueueDescriptor> +void AddOutputToWorkload(QueueDescriptor& descriptor, + armnn::WorkloadInfo& info, + const armnn::TensorInfo& tensorInfo, + armnn::ITensorHandle* tensorHandle) +{ + descriptor.m_Outputs.push_back(tensorHandle); + info.m_OutputTensorInfos.push_back(tensorInfo); +} + +template <typename QueueDescriptor> +void SetWorkloadInput(QueueDescriptor& descriptor, + armnn::WorkloadInfo& info, + unsigned int index, + const armnn::TensorInfo& tensorInfo, + armnn::ITensorHandle* tensorHandle) +{ + descriptor.m_Inputs[index] = tensorHandle; + info.m_InputTensorInfos[index] = tensorInfo; +} + +template <typename QueueDescriptor> +void SetWorkloadOutput(QueueDescriptor& descriptor, + armnn::WorkloadInfo& info, + unsigned int index, + const armnn::TensorInfo& tensorInfo, + armnn::ITensorHandle* tensorHandle) +{ + descriptor.m_Outputs[index] = tensorHandle; + info.m_OutputTensorInfos[index] = tensorInfo; +}
\ No newline at end of file |