From c577f2c6a3b4ddb6ba87a882723c53a248afbeba Mon Sep 17 00:00:00 2001 From: telsoa01 Date: Fri, 31 Aug 2018 09:22:23 +0100 Subject: Release 18.08 --- src/armnn/backends/test/ActivationFixture.hpp | 2 +- src/armnn/backends/test/ActivationTestImpl.hpp | 27 +- src/armnn/backends/test/ArmComputeCl.cpp | 48 +- src/armnn/backends/test/ArmComputeNeon.cpp | 156 ++- src/armnn/backends/test/BatchNormTestImpl.hpp | 6 +- .../backends/test/ClContextControlFixture.hpp | 21 + src/armnn/backends/test/Conv2dTestImpl.hpp | 52 +- .../backends/test/ConvertFp16ToFp32TestImpl.hpp | 55 + .../backends/test/ConvertFp32ToFp16TestImpl.hpp | 55 + src/armnn/backends/test/CreateWorkloadCl.cpp | 340 ++++-- src/armnn/backends/test/CreateWorkloadNeon.cpp | 270 +++-- src/armnn/backends/test/CreateWorkloadRef.cpp | 219 ++-- src/armnn/backends/test/FullyConnectedTestImpl.hpp | 8 +- src/armnn/backends/test/IsLayerSupportedTest.cpp | 178 ++- .../backends/test/IsLayerSupportedTestImpl.hpp | 167 ++- .../backends/test/LayerReleaseConstantDataTest.cpp | 212 ++++ src/armnn/backends/test/LayerTests.cpp | 166 ++- src/armnn/backends/test/LayerTests.hpp | 25 +- src/armnn/backends/test/LstmTestImpl.hpp | 1150 ++++++++++++++++++++ src/armnn/backends/test/MemCopyTests.cpp | 24 + src/armnn/backends/test/NormTestImpl.hpp | 4 +- src/armnn/backends/test/Pooling2dTestImpl.hpp | 14 +- src/armnn/backends/test/QuantizeHelper.hpp | 2 +- src/armnn/backends/test/Reference.cpp | 26 +- src/armnn/backends/test/SoftmaxTestImpl.hpp | 2 +- src/armnn/backends/test/SplitterTestImpl.hpp | 40 +- src/armnn/backends/test/TensorCopyUtils.cpp | 11 +- src/armnn/backends/test/WorkloadDataValidation.cpp | 71 +- 28 files changed, 2841 insertions(+), 510 deletions(-) create mode 100644 src/armnn/backends/test/ClContextControlFixture.hpp create mode 100644 src/armnn/backends/test/ConvertFp16ToFp32TestImpl.hpp create mode 100644 src/armnn/backends/test/ConvertFp32ToFp16TestImpl.hpp create mode 100644 src/armnn/backends/test/LayerReleaseConstantDataTest.cpp create mode 100644 src/armnn/backends/test/LstmTestImpl.hpp (limited to 'src/armnn/backends/test') diff --git a/src/armnn/backends/test/ActivationFixture.hpp b/src/armnn/backends/test/ActivationFixture.hpp index a67a110354..69f3c8be05 100644 --- a/src/armnn/backends/test/ActivationFixture.hpp +++ b/src/armnn/backends/test/ActivationFixture.hpp @@ -41,7 +41,7 @@ struct ActivationFixture armnn::TensorInfo inputTensorInfo; armnn::TensorInfo outputTensorInfo; - // parameters used by some of the activation functions + // Parameters used by some of the activation functions. float a = 0.234f; float b = -12.345f; }; diff --git a/src/armnn/backends/test/ActivationTestImpl.hpp b/src/armnn/backends/test/ActivationTestImpl.hpp index 255a00ef0b..e699b2289b 100644 --- a/src/armnn/backends/test/ActivationTestImpl.hpp +++ b/src/armnn/backends/test/ActivationTestImpl.hpp @@ -53,7 +53,7 @@ LayerTestResult BoundedReLuTestCommon(armnn::IWorkloadFactory& workloadFac std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - // Setup bounded ReLu + // Setup bounded ReLu. armnn::ActivationQueueDescriptor descriptor; armnn::WorkloadInfo workloadInfo; AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); @@ -94,7 +94,7 @@ LayerTestResult BoundedReLuUpperAndLowerBoundTest(armnn::IWorkloadFact 0.999f, 1.2f, 0.89f, 6.1f, }; - // Calculated manually + // Calculated manually. std::vector output = std::vector{ -1.0f, 0.1f, 0.5f, 1.0f, 0.786f, 0.9875f, -1.0f, 0.384f, @@ -122,7 +122,7 @@ LayerTestResult BoundedReLuUpperBoundOnlyTest(armnn::IWorkloadFactory& 0.999f, 1.2f, 0.89f, 6.1f, }; - // Calculated manually + // Calculated manually. std::vector output = std::vector{ 0.0f, 0.1f, 0.5f, 6.0f, 0.786f, 5.9875f, 0.0f, 0.384f, @@ -147,7 +147,7 @@ LayerTestResult BoundedReLuUint8UpperBoundOnlyTest(armnn::IWorkloadF 251, 8, 92 }; - // Calculated manually + // Calculated manually. std::vector output = std::vector{ 0, 122, 0, 255, 0, 58 @@ -176,7 +176,7 @@ LayerTestResult BoundedReLuUint8UpperAndLowerBoundTest(armnn::IWorkl 251, 8, 92 }; - // Calculated manually + // Calculated manually. std::vector output = std::vector{ 51, 192, 32, 192, 32, 92 @@ -186,7 +186,7 @@ LayerTestResult BoundedReLuUint8UpperAndLowerBoundTest(armnn::IWorkl float inputScale = 0.0125f; return BoundedReLuTestCommon(workloadFactory, 1.0f, -1.0f, - inputScale, inputOffset, inputScale, inputOffset, // input/output scale & offset same + inputScale, inputOffset, inputScale, inputOffset, // Input/output scale & offset same. input, output, inputWidth, inputHeight, inputChannels, inputBatchSize); } @@ -229,13 +229,14 @@ boost::multi_array BoundedReLuRandomInputTest(armnn::IWorkloadFactory& boost::multi_array output(GetTensorShapeAsArray<4>(outputTensorInfo)); - // min/max random values passed to MakeRandomTensor are purposely outside of the ReLu range [lowerBound, upperBound] + // Min/max random values passed to MakeRandomTensor are purposely outside of the ReLu + // range [lowerBound, upperBound]. auto input = MakeRandomTensor(inputTensorInfo, 4605828, lowerBound - 5.0f, upperBound * 2.0f); std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - // Setup bounded ReLu + // Set up bounded ReLu. armnn::ActivationQueueDescriptor descriptor; armnn::WorkloadInfo workloadInfo; AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); @@ -308,7 +309,7 @@ LayerTestResult ConstantLinearActivationTestCommon(armnn::IWorkloadFactory& std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - // Do linear activation that should leave tensor unchanged + // Do linear activation that should leave the tensor unchanged. armnn::ActivationQueueDescriptor data; armnn::WorkloadInfo info; AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); @@ -329,7 +330,7 @@ LayerTestResult ConstantLinearActivationTestCommon(armnn::IWorkloadFactory& CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); - // Ensure output equals input + // Ensure output equals input. ret.outputExpected = input; return ret; @@ -386,7 +387,7 @@ LayerTestResult SimpleActivationTest(armnn::IWorkloadFactory& workloadFact std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); - // Setup bounded ReLu + // Setup bounded ReLu. armnn::ActivationQueueDescriptor descriptor; armnn::WorkloadInfo workloadInfo; AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); @@ -407,7 +408,7 @@ LayerTestResult SimpleActivationTest(armnn::IWorkloadFactory& workloadFact CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); - // Calculated manually + // Calculated manually. result.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, outputExpectedData)); return result; @@ -423,7 +424,7 @@ LayerTestResult SimpleSigmoidTestCommon(armnn::IWorkloadFactory& workloadF 1.0f, 2.0f, 3.0f, 4.0f }; - // Calculate output values for input + // Calculate output values for input. auto f = [](float value) { return 1.0f / (1.0f + std::exp(-value)); diff --git a/src/armnn/backends/test/ArmComputeCl.cpp b/src/armnn/backends/test/ArmComputeCl.cpp index ae42d03ee3..d0cb7243c3 100644 --- a/src/armnn/backends/test/ArmComputeCl.cpp +++ b/src/armnn/backends/test/ArmComputeCl.cpp @@ -3,7 +3,6 @@ // See LICENSE file in the project root for full license information. // #include - #include "test/TensorHelpers.hpp" #include "LayerTests.hpp" @@ -13,6 +12,7 @@ #include "backends/RefWorkloadFactory.hpp" #include "backends/ClLayerSupport.hpp" #include "ActivationFixture.hpp" +#include "ClContextControlFixture.hpp" #include #include @@ -21,7 +21,7 @@ #include "test/UnitTests.hpp" -BOOST_AUTO_TEST_SUITE(Compute_ArmComputeCl) +BOOST_FIXTURE_TEST_SUITE(Compute_ArmComputeCl, ClContextControlFixture) using FactoryType = armnn::ClWorkloadFactory; // ============================================================================ @@ -65,27 +65,24 @@ ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConv ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, true) ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, false) -// Splitter -BOOST_AUTO_TEST_CASE(SimpleSplitter) +// Softmax +BOOST_AUTO_TEST_CASE(Softmax4dSupport) { - armnn::ClWorkloadFactory workloadFactory; - auto testResult = SplitterTest(workloadFactory); - for (unsigned int i = 0; i < testResult.size(); ++i) - { - BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); - } -} + const unsigned int numDimensions = 4u; + std::array dimensionSizes; + dimensionSizes.fill(1u); -BOOST_AUTO_TEST_CASE(SimpleSplitterUint8) -{ - armnn::ClWorkloadFactory workloadFactory; - auto testResult = SplitterUint8Test(workloadFactory); - for (unsigned int i = 0; i < testResult.size(); ++i) - { - BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); - } + const armnn::TensorInfo inputInfo(numDimensions, &dimensionSizes.front(), armnn::DataType::Float32); + const armnn::TensorInfo outputInfo(numDimensions, &dimensionSizes.front(), armnn::DataType::Float32); + + // 4D Softmax should be reported as unsupported on the CL backend + BOOST_TEST(!armnn::IsSoftmaxSupportedCl(inputInfo, outputInfo, armnn::SoftmaxDescriptor())); } +// Splitter +ARMNN_AUTO_TEST_CASE(SimpleSplitter, SplitterTest) +ARMNN_AUTO_TEST_CASE(SimpleSplitterUint8, SplitterUint8Test) + ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest) ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) @@ -209,6 +206,19 @@ ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test) ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test) ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test) +// Lstm +ARMNN_AUTO_TEST_CASE(LstmLayerFloat32WithCifgWithPeepholeNoProjection, + LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest) +ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgNoPeepholeNoProjection, + LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest) +ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgWithPeepholeWithProjection, + LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest) + +// Convert from Float16 to Float32 +ARMNN_AUTO_TEST_CASE(SimpleConvertFp16ToFp32, SimpleConvertFp16ToFp32Test) +// Convert from Float32 to Float16 +ARMNN_AUTO_TEST_CASE(SimpleConvertFp32ToFp16, SimpleConvertFp32ToFp16Test) + // ============================================================================ // COMPARE tests diff --git a/src/armnn/backends/test/ArmComputeNeon.cpp b/src/armnn/backends/test/ArmComputeNeon.cpp index 0a78b75e2e..12947ca77a 100644 --- a/src/armnn/backends/test/ArmComputeNeon.cpp +++ b/src/armnn/backends/test/ArmComputeNeon.cpp @@ -54,7 +54,7 @@ armnn::Convolution2dDescriptor MakeConv2dDesc(uint32_t strideX, uint32_t strideY BOOST_AUTO_TEST_CASE(Conv2dUtils) { - // the only preferred Neon convolution is 1x1 with padding=0 and stride size {1,2,3} + // The only preferred Neon convolution is 1x1 with padding=0 and stride size {1,2,3}. armnn::TensorShape shape1x1({ 1,1,1,1 }); armnn::TensorInfo info1x1(shape1x1, armnn::DataType::Float32); BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1))); @@ -98,49 +98,133 @@ armnn::DepthwiseConvolution2dDescriptor MakeDepthwiseConv2dDesc(uint32_t strideX uint32_t depthMultiplier = 1, uint32_t padLeft = 0, uint32_t padRight = 0, uint32_t padTop = 0, uint32_t padBottom = 0) { + boost::ignore_unused(depthMultiplier); + armnn::DepthwiseConvolution2dDescriptor desc; + desc.m_PadLeft = padLeft; desc.m_PadRight = padRight; + desc.m_PadTop = padTop; desc.m_PadBottom = padBottom; desc.m_StrideX = strideX; desc.m_StrideY = strideY; - desc.m_BiasEnabled = true; + desc.m_BiasEnabled = false; + return desc; } +armnn::TensorInfo CreateOutputTensorInfo(const armnn::TensorInfo& inputInfo, + const armnn::TensorInfo& weightsInfo, + const armnn::DepthwiseConvolution2dDescriptor& descriptor, + armnn::DataType dataType) +{ + const armnn::TensorShape& inputShape = inputInfo.GetShape(); + const armnn::TensorShape& filterShape = weightsInfo.GetShape(); + + unsigned int inWidth = inputShape[3]; + unsigned int inHeight = inputShape[2]; + unsigned int inBatchSize = inputShape[0]; + + unsigned int filterWidth = filterShape[3]; + unsigned int readWidth = (inWidth + descriptor.m_PadLeft + descriptor.m_PadRight) - (filterWidth); + unsigned int outWidth = 1u + (readWidth / descriptor.m_StrideX); + + unsigned int filterHeight = filterShape[2]; + unsigned int readHeight = (inHeight + descriptor.m_PadTop + descriptor.m_PadBottom) - (filterHeight); + unsigned int outHeight = 1u + (readHeight / descriptor.m_StrideY); + unsigned int depthMultiplier = filterShape[0]; + + unsigned int outChannels = filterShape[1] * depthMultiplier; + unsigned int outBatchSize = inBatchSize; + + armnn::TensorShape outputShape({outBatchSize, outChannels, outHeight, outWidth}); + return armnn::TensorInfo(outputShape, dataType); +} } BOOST_AUTO_TEST_CASE(DepthwiseConv2dUtils) { - armnn::TensorInfo inputInfo({ 1, 1, 10, 10 }, armnn::DataType::Float32); - armnn::TensorInfo weightsInfo3x3({ 1, 1, 3, 3 }, armnn::DataType::Float32); + const armnn::DataType dataType = armnn::DataType::Float32; + + armnn::TensorInfo inputInfo({1, 1, 10, 10 }, dataType); + armnn::TensorInfo outputInfo; + armnn::TensorInfo weightsInfo3x3({ 1, 1, 3, 3 }, dataType); + armnn::TensorInfo biasesInfo; + + armnn::DepthwiseConvolution2dDescriptor descriptor; // Strides supported: 1,2,3 - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 1), weightsInfo3x3)); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 2), weightsInfo3x3)); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 3), weightsInfo3x3)); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(2, 1), weightsInfo3x3)); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(2, 2), weightsInfo3x3)); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(2, 3), weightsInfo3x3)); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(3, 1), weightsInfo3x3)); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(3, 2), weightsInfo3x3)); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(3, 3), weightsInfo3x3)); - - // Unsupported stride - BOOST_TEST(!armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(4, 1), weightsInfo3x3)); + descriptor = MakeDepthwiseConv2dDesc(1, 1); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(1, 2); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(1, 3); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(2, 1); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(2, 2); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(2, 3); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(3, 1); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(3, 2); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(3, 3); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + // Supported stride 4 + descriptor = MakeDepthwiseConv2dDesc(4, 1); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); // Supported weights shape 1x1 armnn::TensorInfo weightsInfo1x1({ 1, 1, 1, 1 }, armnn::DataType::Float32); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 1), weightsInfo1x1)); + descriptor = MakeDepthwiseConv2dDesc(1, 1); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo1x1, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo1x1, biasesInfo)); // Supported shape 2x2 armnn::TensorInfo weightsInfo2x2({ 1, 1, 2, 2 }, armnn::DataType::Float32); - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 1), weightsInfo2x2)); + descriptor = MakeDepthwiseConv2dDesc(1, 1); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo2x2, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo2x2, biasesInfo)); // Asymmetric padding - BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 1, 1, 1, 2, 1, 2), - weightsInfo3x3)); + descriptor = MakeDepthwiseConv2dDesc(1, 1, 1, 1, 2, 1, 2); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); } // Pooling @@ -201,27 +285,24 @@ ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f) ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest) ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest) -// Splitter -BOOST_AUTO_TEST_CASE(SimpleSplitter) +// Softmax +BOOST_AUTO_TEST_CASE(Softmax4dSupport) { - armnn::NeonWorkloadFactory workloadFactory; - auto testResult = SplitterTest(workloadFactory); - for (unsigned int i = 0; i < testResult.size(); ++i) - { - BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); - } -} + const unsigned int numDimensions = 4u; + std::array dimensionSizes; + dimensionSizes.fill(1u); -BOOST_AUTO_TEST_CASE(SimpleSplitterUint8) -{ - armnn::NeonWorkloadFactory workloadFactory; - auto testResult = SplitterUint8Test(workloadFactory); - for (unsigned int i = 0; i < testResult.size(); ++i) - { - BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); - } + const armnn::TensorInfo inputInfo(numDimensions, &dimensionSizes.front(), armnn::DataType::Float32); + const armnn::TensorInfo outputInfo(numDimensions, &dimensionSizes.front(), armnn::DataType::Float32); + + // 4D Softmax should be reported as unsupported on the NEON backend + BOOST_TEST(!armnn::IsSoftmaxSupportedNeon(inputInfo, outputInfo, armnn::SoftmaxDescriptor())); } +// Splitter +ARMNN_AUTO_TEST_CASE(SimpleSplitter, SplitterTest) +ARMNN_AUTO_TEST_CASE(SimpleSplitterUint8, SplitterUint8Test) + ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest) ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) @@ -375,5 +456,4 @@ ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSqrtActivationWithReference, Positive ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSquareActivationWithReference, ActivationFixture, CompareActivationTest, armnn::ActivationFunction::Square, 5u) - BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/BatchNormTestImpl.hpp b/src/armnn/backends/test/BatchNormTestImpl.hpp index 861ef6b053..82e6e86747 100644 --- a/src/armnn/backends/test/BatchNormTestImpl.hpp +++ b/src/armnn/backends/test/BatchNormTestImpl.hpp @@ -52,7 +52,7 @@ LayerTestResult BatchNormTestImpl(armnn::IWorkloadFactory& workloadFactory, 4.f, 1.f, -2.f, 4.f })); - // these values are per-channel of the input + // These values are per-channel of the input. auto mean = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {3, -2})); auto variance = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {4, 9})); auto beta = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {3, 2})); @@ -82,8 +82,8 @@ LayerTestResult BatchNormTestImpl(armnn::IWorkloadFactory& workloadFactory, data.m_Gamma = &gammaTensor; data.m_Parameters.m_Eps = 0.0f; - // for each channel: - // substract mean, divide by standard deviation (with an epsilon to avoid div by 0) + // For each channel: + // substract mean, divide by standard deviation (with an epsilon to avoid div by 0), // multiply by gamma and add beta ret.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, diff --git a/src/armnn/backends/test/ClContextControlFixture.hpp b/src/armnn/backends/test/ClContextControlFixture.hpp new file mode 100644 index 0000000000..13c061f818 --- /dev/null +++ b/src/armnn/backends/test/ClContextControlFixture.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/ClContextControl.hpp" + +template +struct ClContextControlFixtureBase +{ + // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case + ClContextControlFixtureBase() : m_ClContextControl(nullptr, ProfilingEnabled) {} + ~ClContextControlFixtureBase() {} + + armnn::ClContextControl m_ClContextControl; +}; + +using ClContextControlFixture = ClContextControlFixtureBase; +using ClProfilingContextControlFixture = ClContextControlFixtureBase; diff --git a/src/armnn/backends/test/Conv2dTestImpl.hpp b/src/armnn/backends/test/Conv2dTestImpl.hpp index 0c34beaa33..43297880f8 100644 --- a/src/armnn/backends/test/Conv2dTestImpl.hpp +++ b/src/armnn/backends/test/Conv2dTestImpl.hpp @@ -32,7 +32,7 @@ struct FullyConnectedBiasTypeForInputType using Type = int32_t; }; -// Modifies a std::vector in-place using a specified bias +// Modifies a std::vector in-place using a specified bias. template void ApplyBias(std::vector& v, float vScale, int32_t vOffset, const std::vector& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h) @@ -42,7 +42,7 @@ void ApplyBias(std::vector& v, float vScale, int32_t vOffset, BOOST_ASSERT_MSG((armnn::IsQuantizedType() && bScale != 0.0f) || (!armnn::IsQuantizedType()), "Invalid type and parameter combination."); - // Note we need to dequantize and re-quantize the image value and the bias + // Note we need to dequantize and re-quantize the image value and the bias. for (uint32_t i = 0; i < bias.size(); ++i) { float dBias = SelectiveDequantize(bias[i], bScale, bOffset); @@ -90,15 +90,15 @@ LayerTestResult SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workl bool biasEnabled = bias.size() > 0; - // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches) + // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches). BOOST_ASSERT(inputNum == 1); BOOST_ASSERT(outputNum == 1); - // If a bias is used, its size must equal the number of output channels + // If a bias is used, its size must equal the number of output channels. BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels); - // Note these tensors will use two (identical) batches + // Note these tensors will use two (identical) batches. armnn::TensorInfo inputTensorInfo({2*inputNum, inputChannels, inputHeight, inputWidth}, armnn::GetDataType()); armnn::TensorInfo outputTensorInfo({2*outputNum, outputChannels, outputHeight, outputWidth}, armnn::GetDataType()); @@ -120,7 +120,7 @@ LayerTestResult SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workl LayerTestResult ret(outputTensorInfo); - // Construct input data - Two batches of the same input image + // Construct input data - two batches of the same input image. std::vector inputImage; inputImage.assign(input.data(), input.data() + 1*inputChannels*inputHeight*inputWidth); std::vector inputData; @@ -131,7 +131,7 @@ LayerTestResult SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workl std::vector outputImage; outputImage.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth); - // Apply bias to output image if enabled + // Apply bias to output image if it is enabled. if(biasEnabled) { std::vector biasV; @@ -141,14 +141,14 @@ LayerTestResult SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workl outputWidth, outputHeight); } - // Construct expected output data - two identical images + // Construct expected output data - two identical images. std::vector outputData; outputData.insert(outputData.end(), outputImage.begin(), outputImage.end()); outputData.insert(outputData.end(), outputImage.begin(), outputImage.end()); ret.outputExpected = MakeTensor(outputTensorInfo, outputData); - // todo: nontrivial padding and strides + // Todo: nontrivial padding and strides. uint32_t strideX = 1; uint32_t strideY = 1; @@ -171,7 +171,7 @@ LayerTestResult SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workl AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_Weight = &weightsTensor; - data.m_Bias = &biasTensor; // still set this whether or not bias is enabled - can be a source of bugs + data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs. data.m_Parameters.m_StrideX = strideX; data.m_Parameters.m_StrideY = strideY; data.m_Parameters.m_PadLeft = padLeft; @@ -222,11 +222,11 @@ LayerTestResult DepthwiseConvolution2dAsymmetricTestImpl(armnn::IWorkloadF unsigned int outputHeight = boost::numeric_cast(outputExpected.shape()[2]); unsigned int outputWidth = boost::numeric_cast(outputExpected.shape()[3]); - // If a bias is used, its size must equal the number of output channels + // If a bias is used, its size must equal the number of output channels. bool biasEnabled = bias.size() > 0; BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels); - // create the tensors + // Creates the tensors. armnn::TensorInfo inputTensorInfo({inputNum, inputChannels, inputHeight, inputWidth}, armnn::GetDataType()); armnn::TensorInfo outputTensorInfo({outputNum, outputChannels, outputHeight, outputWidth}, armnn::GetDataType()); @@ -246,12 +246,12 @@ LayerTestResult DepthwiseConvolution2dAsymmetricTestImpl(armnn::IWorkloadF biasDesc.SetQuantizationOffset(0); } - // Construct the input data + // Construct the input data. std::vector inputData; inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth); auto batchedInput = MakeTensor(inputTensorInfo, inputData); - // Construct the output data, with bias applied, as appropriate + // Construct the output data, with bias applied, as appropriate. std::vector outputData; outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth); if (biasEnabled) @@ -280,7 +280,7 @@ LayerTestResult DepthwiseConvolution2dAsymmetricTestImpl(armnn::IWorkloadF armnn::DepthwiseConvolution2dQueueDescriptor data; data.m_Weight = &weightsTensor; - data.m_Bias = &biasTensor; // still set this whether or not bias is enabled - can be a source of bugs + data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs. data.m_Parameters.m_StrideX = strideX; data.m_Parameters.m_StrideY = strideY; data.m_Parameters.m_PadLeft = padLeft; @@ -372,14 +372,14 @@ LayerTestResult DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFa -1.f, 0.f, -1.f, }))); - // manually calculated + // Manually calculated. std::vector outputImage( QuantizedVector(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), {0.f, 0.f}) ); - // Optionally apply bias to output image + // Optionally apply bias to output image. if(biasEnabled) { ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), @@ -405,7 +405,7 @@ LayerTestResult DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFa AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_Weight = &weightsTensor; - data.m_Bias = &biasTensor; // still set this whether or not bias is enabled + data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled. data.m_Parameters.m_StrideX = 1; data.m_Parameters.m_StrideY = 1; data.m_Parameters.m_PadLeft = 0; @@ -520,7 +520,7 @@ LayerTestResult DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& wo 0, 0, 0 }))); - // manually calculated + // Manually calculated. std::vector outputImage = std::vector( QuantizedVector(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), { 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, @@ -552,7 +552,7 @@ LayerTestResult DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& wo 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f })); - // Optionally apply bias to output image + // Optionally apply bias to output image. if(biasEnabled) { ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), @@ -578,7 +578,7 @@ LayerTestResult DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& wo AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_Weight = &weightsTensor; - data.m_Bias = &biasTensor; // still set this whether or not bias is enabled + data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled. data.m_Parameters.m_StrideX = 2; data.m_Parameters.m_StrideY = 1; data.m_Parameters.m_PadLeft = 0; @@ -609,7 +609,7 @@ LayerTestResult Convolution1dTestImpl(armnn::IWorkloadFactory& workloadFact { using B = typename FullyConnectedBiasTypeForInputType::Type; - // until we have a specialist 1D convolution layer, we can fake one using + // Until we have a specialist 1D convolution layer, we can fake one using // 2D convolution with the final dimension set to 1. // I don't anticipate this being particularly slow, given that convolution is implemented // as a matrix multiplication, at which point dimension doesn't matter. @@ -617,11 +617,11 @@ LayerTestResult Convolution1dTestImpl(armnn::IWorkloadFactory& workloadFact unsigned int batchSize = 1; unsigned int inputChannels = 2; unsigned int outputChannels = 3; - unsigned int inputSize = 5; // the 1D size (could view as 'width' or 'height') + unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height'). unsigned int kernelSize = 3; unsigned int padSize = 2; unsigned int stride = 1; - unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride + unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride. armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, armnn::GetDataType()); armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, armnn::GetDataType()); @@ -671,7 +671,7 @@ LayerTestResult Convolution1dTestImpl(armnn::IWorkloadFactory& workloadFact 2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f })); - // Optionally apply bias to output image + // Optionally apply bias to output image. if(biasEnabled) { ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), @@ -712,7 +712,7 @@ LayerTestResult Convolution1dTestImpl(armnn::IWorkloadFactory& workloadFact workloadFactory.Finalize(); workload->Execute(); - // output + // Output LayerTestResult ret(outputInfo); CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); ret.outputExpected = MakeTensor(outputInfo, outputData); diff --git a/src/armnn/backends/test/ConvertFp16ToFp32TestImpl.hpp b/src/armnn/backends/test/ConvertFp16ToFp32TestImpl.hpp new file mode 100644 index 0000000000..89faaf9fe6 --- /dev/null +++ b/src/armnn/backends/test/ConvertFp16ToFp32TestImpl.hpp @@ -0,0 +1,55 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include +#include +#include + +#include +#include + +#include + +#include + +LayerTestResult SimpleConvertFp16ToFp32Test(armnn::IWorkloadFactory& workloadFactory) +{ + using namespace half_float::literal; + + const armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16); + const armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32); + + auto input = MakeTensor(inputTensorInfo, + { -37.5_h, -15.2_h, -8.76_h, -2.0_h, -1.5_h, -1.3_h, -0.5_h, -0.4_h, 0.0_h, + 1.0_h, 0.4_h, 0.5_h, 1.3_h, 1.5_h, 2.0_h, 8.76_h, 15.2_h, 37.5_h }); + + LayerTestResult ret(outputTensorInfo); + ret.outputExpected = MakeTensor(outputTensorInfo, + { -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f, + 1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f }); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ConvertFp16ToFp32QueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateConvertFp16ToFp32(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} diff --git a/src/armnn/backends/test/ConvertFp32ToFp16TestImpl.hpp b/src/armnn/backends/test/ConvertFp32ToFp16TestImpl.hpp new file mode 100644 index 0000000000..1d9bee577c --- /dev/null +++ b/src/armnn/backends/test/ConvertFp32ToFp16TestImpl.hpp @@ -0,0 +1,55 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include +#include +#include + +#include +#include + +#include + +#include + +LayerTestResult SimpleConvertFp32ToFp16Test(armnn::IWorkloadFactory& workloadFactory) +{ + using namespace half_float::literal; + + const armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16); + + auto input = MakeTensor(inputTensorInfo, + { -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f, + 1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f }); + + LayerTestResult ret(outputTensorInfo); + ret.outputExpected = MakeTensor(outputTensorInfo, + { -37.5_h, -15.2_h, -8.76_h, -2.0_h, -1.5_h, -1.3_h, -0.5_h, -0.4_h, 0.0_h, + 1.0_h, 0.4_h, 0.5_h, 1.3_h, 1.5_h, 2.0_h, 8.76_h, 15.2_h, 37.5_h }); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ConvertFp32ToFp16QueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateConvertFp32ToFp16(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} \ No newline at end of file diff --git a/src/armnn/backends/test/CreateWorkloadCl.cpp b/src/armnn/backends/test/CreateWorkloadCl.cpp index f83bb12bbe..5d4265911f 100644 --- a/src/armnn/backends/test/CreateWorkloadCl.cpp +++ b/src/armnn/backends/test/CreateWorkloadCl.cpp @@ -8,6 +8,7 @@ #include "backends/ClWorkloadUtils.hpp" #include "backends/ClWorkloads.hpp" #include "backends/ClTensorHandle.hpp" +#include "ClContextControlFixture.hpp" #include "test/CreateWorkloadClNeon.hpp" @@ -17,16 +18,17 @@ boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* return CompareTensorHandleShape(tensorHandle, expectedDimensions); } -BOOST_AUTO_TEST_SUITE(CreateWorkloadCl) +BOOST_FIXTURE_TEST_SUITE(CreateWorkloadCl, ClContextControlFixture) -BOOST_AUTO_TEST_CASE(CreateActivationWorkload) +template +static void ClCreateActivationWorkloadTest() { Graph graph; ClWorkloadFactory factory; - auto workload = CreateActivationWorkloadTest(factory, graph); + auto workload = CreateActivationWorkloadTest(factory, graph); - // check that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest) + // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest). ActivationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); @@ -35,14 +37,24 @@ BOOST_AUTO_TEST_CASE(CreateActivationWorkload) BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1})); } -BOOST_AUTO_TEST_CASE(CreateAdditionWorkload) +BOOST_AUTO_TEST_CASE(CreateActivationFloat32Workload) +{ + ClCreateActivationWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload) +{ + ClCreateActivationWorkloadTest(); +} + +template +static void ClCreateAdditionWorkloadTest() { Graph graph; ClWorkloadFactory factory; + auto workload = CreateAdditionWorkloadTest(factory, graph); - auto workload = CreateAdditionWorkloadTest(factory, graph); - - // check that inputs/outputs are as we expect them (see definition of CreateAdditionWorkloadTest) + // Checks that inputs/outputs are as we expect them (see definition of CreateAdditionWorkloadTest). AdditionQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto inputHandle2 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[1]); @@ -52,14 +64,26 @@ BOOST_AUTO_TEST_CASE(CreateAdditionWorkload) BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3})); } -BOOST_AUTO_TEST_CASE(CreateBatchNormalizationWorkload) +BOOST_AUTO_TEST_CASE(CreateAdditionFloat32Workload) { - Graph graph; + ClCreateAdditionWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload) +{ + ClCreateAdditionWorkloadTest(); +} + +template +static void ClCreateBatchNormalizationWorkloadTest() +{ + Graph graph; ClWorkloadFactory factory; - auto workload = CreateBatchNormalizationWorkloadTest(factory, graph); + auto workload = CreateBatchNormalizationWorkloadTest + (factory, graph); - // check that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest) + // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest). BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); @@ -68,14 +92,57 @@ BOOST_AUTO_TEST_CASE(CreateBatchNormalizationWorkload) BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3, 1, 1})); } -template -static void Convolution2dWorkloadTest() +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat32Workload) +{ + ClCreateBatchNormalizationWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16Workload) +{ + ClCreateBatchNormalizationWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Workload) +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = CreateConvertFp16ToFp32WorkloadTest(factory, graph); + + ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 2, 3})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 2, 3})); + BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16)); + BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32)); +} + +BOOST_AUTO_TEST_CASE(CreateConvertFp32ToFp16Workload) +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = CreateConvertFp32ToFp16WorkloadTest(factory, graph); + + ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 2, 3})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 2, 3})); + BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32)); + BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16)); +} + +template +static void ClConvolution2dWorkloadTest() { - Graph graph; - ClWorkloadFactory factory; - auto workload = CreateConvolution2dWorkloadTest(factory, graph); + Graph graph; + ClWorkloadFactory factory; + auto workload = CreateConvolution2dWorkloadTest + (factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest). Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); @@ -85,18 +152,24 @@ static void Convolution2dWorkloadTest() BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat32Workload) { - Convolution2dWorkloadTest(); + ClConvolution2dWorkloadTest(); } +BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16Workload) +{ + ClConvolution2dWorkloadTest(); +} -template -static void DirectConvolution2dWorkloadTest() + +template +static void ClDirectConvolution2dWorkloadTest() { - Graph graph; - ClWorkloadFactory factory; - auto workload = CreateDirectConvolution2dWorkloadTest(factory, graph); + Graph graph; + ClWorkloadFactory factory; + auto workload = CreateDirectConvolution2dWorkloadTest( + factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest). Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); @@ -106,22 +179,28 @@ static void DirectConvolution2dWorkloadTest() BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloat32Workload) { - DirectConvolution2dWorkloadTest(); + ClDirectConvolution2dWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloat16Workload) +{ + ClDirectConvolution2dWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dUint8Workload) { - DirectConvolution2dWorkloadTest(); + ClDirectConvolution2dWorkloadTest(); } -BOOST_AUTO_TEST_CASE(CreateFullyConnectedWorkload) +template +static void ClCreateFullyConnectedWorkloadTest() { - Graph graph; + Graph graph; ClWorkloadFactory factory; - auto workload = - CreateFullyConnectedWorkloadTest(factory, graph); + auto workload = + CreateFullyConnectedWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest). FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); @@ -129,15 +208,28 @@ BOOST_AUTO_TEST_CASE(CreateFullyConnectedWorkload) BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 7})); } -BOOST_AUTO_TEST_CASE(CreateMultiplicationWorkload) + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat32WorkloadTest) { - Graph graph; + ClCreateFullyConnectedWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16WorkloadTest) +{ + ClCreateFullyConnectedWorkloadTest(); +} + + +template +static void ClCreateMultiplicationWorkloadTest() +{ + Graph graph; ClWorkloadFactory factory; auto workload = - CreateMultiplicationWorkloadTest(factory, graph); + CreateMultiplicationWorkloadTest(factory, graph); - // check that inputs/outputs are as we expect them (see definition of CreateMultiplicationWorkloadTest) + // Checks that inputs/outputs are as we expect them (see definition of CreateMultiplicationWorkloadTest). MultiplicationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto inputHandle2 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[1]); @@ -147,14 +239,26 @@ BOOST_AUTO_TEST_CASE(CreateMultiplicationWorkload) BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3})); } -BOOST_AUTO_TEST_CASE(CreateNormalizationWorkload) +BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat32WorkloadTest) +{ + ClCreateMultiplicationWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16WorkloadTest) +{ + ClCreateMultiplicationWorkloadTest(); +} + +template +static void ClNormalizationWorkloadTest() { - Graph graph; + Graph graph; ClWorkloadFactory factory; - auto workload = CreateNormalizationWorkloadTest(factory, graph); + auto workload = CreateNormalizationWorkloadTest + (factory, graph); - // check that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest) + // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest). NormalizationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); @@ -163,14 +267,25 @@ BOOST_AUTO_TEST_CASE(CreateNormalizationWorkload) BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 5, 5, 1})); } -BOOST_AUTO_TEST_CASE(CreatePooling2dWorkload) +BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32Workload) { - Graph graph; + ClNormalizationWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16Workload) +{ + ClNormalizationWorkloadTest(); +} + +template +static void ClPooling2dWorkloadTest() +{ + Graph graph; ClWorkloadFactory factory; - auto workload = CreatePooling2dWorkloadTest(factory, graph); + auto workload = CreatePooling2dWorkloadTest(factory, graph); - // check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest) + // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest). Pooling2dQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); @@ -179,18 +294,28 @@ BOOST_AUTO_TEST_CASE(CreatePooling2dWorkload) BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 2, 2, 4})); } -template +BOOST_AUTO_TEST_CASE(CreatePooling2dFloat32Workload) +{ + ClPooling2dWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16Workload) +{ + ClPooling2dWorkloadTest(); +} + +template static void ClCreateReshapeWorkloadTest() { - Graph graph; + Graph graph; ClWorkloadFactory factory; - auto workload = CreateReshapeWorkloadTest(factory, graph); + auto workload = CreateReshapeWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest). ReshapeQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1})); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4})); // Leading size 1 dimensions are collapsed by ACL. @@ -198,38 +323,56 @@ static void ClCreateReshapeWorkloadTest() BOOST_AUTO_TEST_CASE(CreateReshapeFloat32Workload) { - ClCreateReshapeWorkloadTest(); + ClCreateReshapeWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload) +{ + ClCreateReshapeWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) { - ClCreateReshapeWorkloadTest(); + ClCreateReshapeWorkloadTest(); } -BOOST_AUTO_TEST_CASE(CreateSoftmaxWorkload) +template +static void ClSoftmaxWorkloadTest() { - Graph graph; + Graph graph; ClWorkloadFactory factory; - auto workload = CreateSoftmaxWorkloadTest(factory, graph); + auto workload = CreateSoftmaxWorkloadTest(factory, graph); - // check that inputs/outputs are as we expect them (see definition of ClSoftmaxFloat32Workload) + // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloat32Workload). SoftmaxQueueDescriptor queueDescriptor = workload->GetData(); - auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); - auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1})); BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1})); } -BOOST_AUTO_TEST_CASE(CreateSplitterWorkload) + +BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat32WorkloadTest) +{ + ClSoftmaxWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16WorkloadTest) +{ + ClSoftmaxWorkloadTest(); +} + +template +static void ClSplitterWorkloadTest() { Graph graph; ClWorkloadFactory factory; - auto workload = CreateSplitterWorkloadTest(factory, graph); + auto workload = CreateSplitterWorkloadTest(factory, graph); - // check that outputs are as we expect them (see definition of CreateSplitterWorkloadTest) + // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest). SplitterQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {5, 7, 7})); @@ -242,14 +385,25 @@ BOOST_AUTO_TEST_CASE(CreateSplitterWorkload) auto outputHandle0 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); // NOTE: At the moment the CL collapses the tensor to a 2 dim when dimension zero = 1 - // we are raising this difference between the NEON and CL libs as an issue with the compute library team + // we are raising this difference between the NEON and CL libs as an issue with the compute library team. BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {7, 7})); } -BOOST_AUTO_TEST_CASE(CreateSplitterMerger) +BOOST_AUTO_TEST_CASE(CreateSplitterFloat32Workload) +{ + ClSplitterWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterFloat16Workload) { - // Test that it is possible to decide which output of the splitter layer - // should be lined to which input of the merger layer + ClSplitterWorkloadTest(); +} + +template +static void ClSplitterMergerTest() +{ + // Tests that it is possible to decide which output of the splitter layer + // should be lined to which input of the merger layer. // We test that is is possible to specify 0th output // of the splitter to be the 1st input to the merger and the 1st output of the splitter to be 0th input // of the merger. @@ -258,12 +412,13 @@ BOOST_AUTO_TEST_CASE(CreateSplitterMerger) ClWorkloadFactory factory; auto workloads = - CreateSplitterMergerWorkloadTest(factory, graph); + CreateSplitterMergerWorkloadTest + (factory, graph); auto wlSplitter = std::move(workloads.first); auto wlMerger = std::move(workloads.second); - //check that the index of inputs/outputs matches what we declared on InputDescriptor construction. + //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. armnn::ClSubTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); armnn::ClSubTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); armnn::ClSubTensorHandle* mIn0 = dynamic_cast(wlMerger->GetData().m_Inputs[0]); @@ -274,22 +429,33 @@ BOOST_AUTO_TEST_CASE(CreateSplitterMerger) BOOST_TEST(mIn0); BOOST_TEST(mIn1); - //fliped order of inputs/outputs + //Fliped order of inputs/outputs. bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); BOOST_TEST(validDataPointers); - //also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor + //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor. bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent()) && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent()); BOOST_TEST(validSubTensorParents); } +BOOST_AUTO_TEST_CASE(CreateSplitterMergerFloat32Workload) +{ + ClSplitterMergerTest(); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterMergerFloat16Workload) +{ + ClSplitterMergerTest(); +} + + BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) { // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. - // We create a splitter with two outputs. That each of those outputs is used by two different activation layers + // We create a splitter with two outputs. That each of those outputs is used by two different activation layers. Graph graph; ClWorkloadFactory factory; @@ -300,9 +466,10 @@ BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) std::unique_ptr wlActiv1_1; CreateSplitterMultipleInputsOneOutputWorkloadTest(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, wlActiv1_0, wlActiv1_1); + ClActivationFloat32Workload, armnn::DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, + wlActiv1_0, wlActiv1_1); - //check that the index of inputs/outputs matches what we declared on InputDescriptor construction. + //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. armnn::ClSubTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); armnn::ClSubTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast(wlActiv0_0->GetData().m_Inputs[0]); @@ -327,17 +494,18 @@ BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl) { ClWorkloadFactory factory; - CreateMemCopyWorkloads(factory); + CreateMemCopyWorkloads(factory); } BOOST_AUTO_TEST_CASE(CreateL2NormalizationWorkload) { - Graph graph; + Graph graph; ClWorkloadFactory factory; - auto workload = CreateL2NormalizationWorkloadTest(factory, graph); + auto workload = CreateL2NormalizationWorkloadTest + (factory, graph); - // check that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest) + // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest). L2NormalizationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); @@ -346,4 +514,24 @@ BOOST_AUTO_TEST_CASE(CreateL2NormalizationWorkload) BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 5, 20, 50, 67 })); } +template +static void ClCreateLstmWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = CreateLstmWorkloadTest(factory, graph); + + LstmQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[1]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 2 })); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4 })); +} + +BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloat32Workload) +{ + ClCreateLstmWorkloadTest(); +} + + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/CreateWorkloadNeon.cpp b/src/armnn/backends/test/CreateWorkloadNeon.cpp index 4d91fbfd31..b2a444af74 100644 --- a/src/armnn/backends/test/CreateWorkloadNeon.cpp +++ b/src/armnn/backends/test/CreateWorkloadNeon.cpp @@ -50,168 +50,302 @@ bool TestNeonTensorHandleInfo(armnn::INeonTensorHandle* handle, const armnn::Ten } // namespace -BOOST_AUTO_TEST_CASE(CreateActivationWorkload) +template +static void NeonCreateActivationWorkloadTest() { Graph graph; NeonWorkloadFactory factory; - auto workload = CreateActivationWorkloadTest(factory, graph); + auto workload = CreateActivationWorkloadTest + (factory, graph); - // check that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest) + // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest). ActivationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType::Float32))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType))); } -BOOST_AUTO_TEST_CASE(CreateAdditionWorkload) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload) +{ + NeonCreateActivationWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateActivationFloat32Workload) +{ + NeonCreateActivationWorkloadTest(); +} + +template +static void NeonCreateAdditionWorkloadTest() { Graph graph; NeonWorkloadFactory factory; - auto workload = CreateAdditionWorkloadTest(factory, graph); + auto workload = CreateAdditionWorkloadTest(factory, graph); - // check that inputs/outputs are as we expect them (see definition of CreateAdditionWorkloadTest) + // Checks that inputs/outputs are as we expect them (see definition of CreateAdditionWorkloadTest). AdditionQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto inputHandle2 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[1]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType::Float32))); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType::Float32))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType))); } -BOOST_AUTO_TEST_CASE(CreateBatchNormalizationWorkload) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload) +{ + NeonCreateAdditionWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateAdditionFloat32Workload) +{ + NeonCreateAdditionWorkloadTest(); +} + +template +static void NeonCreateBatchNormalizationWorkloadTest() { Graph graph; NeonWorkloadFactory factory; - auto workload = CreateBatchNormalizationWorkloadTest(factory, graph); + auto workload = CreateBatchNormalizationWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest). BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({2, 3, 1, 1}, DataType::Float32))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3, 1, 1}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({2, 3, 1, 1}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3, 1, 1}, DataType))); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16Workload) +{ + NeonCreateBatchNormalizationWorkloadTest(); } +#endif -BOOST_AUTO_TEST_CASE(CreateConvolution2dWorkload) +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat32Workload) +{ + NeonCreateBatchNormalizationWorkloadTest(); +} + +template +static void NeonCreateConvolution2dWorkloadTest() { Graph graph; NeonWorkloadFactory factory; - auto workload = CreateConvolution2dWorkloadTest(factory, graph); + auto workload = CreateConvolution2dWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest). Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({2, 3, 8, 16}, DataType::Float32))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 2, 2, 10}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({2, 3, 8, 16}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 2, 2, 10}, DataType))); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16Workload) +{ + NeonCreateConvolution2dWorkloadTest(); } +#endif -BOOST_AUTO_TEST_CASE(CreateFullyConnectedWorkload) +BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat32Workload) +{ + NeonCreateConvolution2dWorkloadTest(); +} + +template +static void NeonCreateFullyConnectedWorkloadTest() { Graph graph; NeonWorkloadFactory factory; - auto workload = CreateFullyConnectedWorkloadTest(factory, graph); + auto workload = CreateFullyConnectedWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest). FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType::Float32))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType))); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16Workload) +{ + NeonCreateFullyConnectedWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat32Workload) +{ + NeonCreateFullyConnectedWorkloadTest(); } -BOOST_AUTO_TEST_CASE(CreateMultiplicationWorkload) +template +static void NeonCreateMultiplicationWorkloadTest() { Graph graph; NeonWorkloadFactory factory; - auto workload = CreateMultiplicationWorkloadTest(factory, graph); + auto workload = CreateMultiplicationWorkloadTest(factory, graph); - // check that inputs/outputs are as we expect them (see definition of CreateMultiplicationWorkloadTest) + // Checks that inputs/outputs are as we expect them (see definition of CreateMultiplicationWorkloadTest). MultiplicationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto inputHandle2 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[1]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType::Float32))); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType::Float32))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType))); } -BOOST_AUTO_TEST_CASE(CreateNormalizationWorkload) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16Workload) +{ + NeonCreateMultiplicationWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat32Workload) +{ + NeonCreateMultiplicationWorkloadTest(); +} + +template +static void NeonCreateNormalizationWorkloadTest() { Graph graph; NeonWorkloadFactory factory; - auto workload = CreateNormalizationWorkloadTest(factory, graph); + auto workload = CreateNormalizationWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest). NormalizationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 5, 5, 1}, DataType::Float32))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 5, 5, 1}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 5, 5, 1}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 5, 5, 1}, DataType))); } -BOOST_AUTO_TEST_CASE(CreatePooling2dWorkload) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16Workload) +{ + NeonCreateNormalizationWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32Workload) +{ + NeonCreateNormalizationWorkloadTest(); +} + +template +static void NeonCreatePooling2dWorkloadTest() { Graph graph; NeonWorkloadFactory factory; - auto workload = CreatePooling2dWorkloadTest(factory, graph); + auto workload = CreatePooling2dWorkloadTest + (factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest). Pooling2dQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 2, 5, 5}, DataType::Float32))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 2, 2, 4}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 2, 5, 5}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 2, 2, 4}, DataType))); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16Workload) +{ + NeonCreatePooling2dWorkloadTest(); } +#endif -template -static void NeonCreateReshapeWorkloadTest(DataType dataType) +BOOST_AUTO_TEST_CASE(CreatePooling2dFloat32Workload) +{ + NeonCreatePooling2dWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreatePooling2dUint8Workload) +{ + NeonCreatePooling2dWorkloadTest(); +} + +template +static void NeonCreateReshapeWorkloadTest() { Graph graph; NeonWorkloadFactory factory; - auto workload = CreateReshapeWorkloadTest(factory, graph); + auto workload = CreateReshapeWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest). ReshapeQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, dataType))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, dataType))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType))); } +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload) +{ + NeonCreateReshapeWorkloadTest(); +} +#endif + BOOST_AUTO_TEST_CASE(CreateReshapeFloat32Workload) { - NeonCreateReshapeWorkloadTest(DataType::Float32); + NeonCreateReshapeWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) { - NeonCreateReshapeWorkloadTest(DataType::QuantisedAsymm8); + NeonCreateReshapeWorkloadTest(); } -BOOST_AUTO_TEST_CASE(CreateSoftmaxWorkload) +template +static void NeonCreateSoftmaxWorkloadTest() { Graph graph; NeonWorkloadFactory factory; - auto workload = CreateSoftmaxWorkloadTest(factory, graph); + auto workload = CreateSoftmaxWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest). SoftmaxQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); - BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType::Float32))); - BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({4, 1}, DataType::Float32))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({4, 1}, DataType))); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16Workload) +{ + NeonCreateSoftmaxWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat32Workload) +{ + NeonCreateSoftmaxWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateSplitterWorkload) { Graph graph; NeonWorkloadFactory factory; - auto workload = CreateSplitterWorkloadTest(factory, graph); + auto workload = CreateSplitterWorkloadTest(factory, graph); - // check that outputs are as we expect them (see definition of CreateSplitterWorkloadTest) + // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest). SplitterQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32))); @@ -228,22 +362,23 @@ BOOST_AUTO_TEST_CASE(CreateSplitterWorkload) BOOST_AUTO_TEST_CASE(CreateSplitterMerger) { - // Test that it is possible to decide which output of the splitter layer - // should be lined to which input of the merger layer - // We test that is is possible to specify 0th output - // of the splitter to be the 1st input to the merger and the 1st output of the splitter to be 0th input + // Tests that it is possible to decide which output of the splitter layer + // should be lined to which input of the merger layer. + // We tested that is is possible to specify 0th output + // of the splitter to be the 1st input to the merger, and the 1st output of the splitter to be 0th input // of the merger. Graph graph; NeonWorkloadFactory factory; auto workloads = - CreateSplitterMergerWorkloadTest(factory, graph); + CreateSplitterMergerWorkloadTest(factory, graph); auto wlSplitter = std::move(workloads.first); auto wlMerger = std::move(workloads.second); - //check that the index of inputs/outputs matches what we declared on InputDescriptor construction. + //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. armnn::INeonTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); armnn::INeonTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); armnn::INeonTensorHandle* mIn0 = dynamic_cast(wlMerger->GetData().m_Inputs[0]); @@ -261,8 +396,8 @@ BOOST_AUTO_TEST_CASE(CreateSplitterMerger) BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) { - // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. - // We create a splitter with two outputs. That each of those outputs is used by two different activation layers + // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. + // We created a splitter with two outputs. That each of those outputs is used by two different activation layers Graph graph; NeonWorkloadFactory factory; @@ -273,7 +408,8 @@ BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) std::unique_ptr wlActiv1_1; CreateSplitterMultipleInputsOneOutputWorkloadTest(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, wlActiv1_0, wlActiv1_1); + NeonActivationFloat32Workload, DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, + wlActiv1_0, wlActiv1_1); armnn::INeonTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); armnn::INeonTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); @@ -299,7 +435,7 @@ BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsNeon) { NeonWorkloadFactory factory; - CreateMemCopyWorkloads(factory); + CreateMemCopyWorkloads(factory); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/CreateWorkloadRef.cpp b/src/armnn/backends/test/CreateWorkloadRef.cpp index abc46e4361..109156468a 100644 --- a/src/armnn/backends/test/CreateWorkloadRef.cpp +++ b/src/armnn/backends/test/CreateWorkloadRef.cpp @@ -39,71 +39,95 @@ void CheckInputsOutput(std::unique_ptr workload, BOOST_AUTO_TEST_SUITE(CreateWorkloadRef) -template +template static void RefCreateActivationWorkloadTest() { Graph graph; RefWorkloadFactory factory; - auto workload = CreateActivationWorkloadTest(factory, graph); + auto workload = CreateActivationWorkloadTest(factory, graph); - // check that outputs are as we expect them (see definition of CreateActivationWorkloadTest) + // Checks that outputs are as we expect them (see definition of CreateActivationWorkloadTest). CheckInputOutput(std::move(workload), - TensorInfo({ 1, 1 }, ActivationWorkloadType::ms_DataType), - TensorInfo({ 1, 1 }, ActivationWorkloadType::ms_DataType)); + TensorInfo({ 1, 1 }, DataType), + TensorInfo({ 1, 1 }, DataType)); } BOOST_AUTO_TEST_CASE(CreateActivationFloat32Workload) { - RefCreateActivationWorkloadTest(); + RefCreateActivationWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateActivationUint8Workload) { - RefCreateActivationWorkloadTest(); + RefCreateActivationWorkloadTest(); } -template +template static void RefCreateAdditionWorkloadTest() { Graph graph; RefWorkloadFactory factory; - auto workload = CreateAdditionWorkloadTest(factory, graph); + auto workload = CreateAdditionWorkloadTest(factory, graph); - // check that outputs are as we expect them (see definition of CreateAdditionWorkloadTest) + // Checks that outputs are as we expect them (see definition of CreateAdditionWorkloadTest). CheckInputsOutput(std::move(workload), - TensorInfo({ 2, 3 }, AdditionWorkloadType::ms_DataType), - TensorInfo({ 2, 3 }, AdditionWorkloadType::ms_DataType), - TensorInfo({ 2, 3 }, AdditionWorkloadType::ms_DataType)); + TensorInfo({ 2, 3 }, DataType), + TensorInfo({ 2, 3 }, DataType), + TensorInfo({ 2, 3 }, DataType)); } BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload) { - RefCreateAdditionWorkloadTest(); + RefCreateAdditionWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateAdditionUint8Workload) { - RefCreateAdditionWorkloadTest(); + RefCreateAdditionWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateBatchNormalizationWorkload) { Graph graph; RefWorkloadFactory factory; - auto workload = CreateBatchNormalizationWorkloadTest(factory, graph); + auto workload = CreateBatchNormalizationWorkloadTest + (factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest). CheckInputOutput( std::move(workload), TensorInfo({2, 3, 1, 1}, DataType::Float32), TensorInfo({2, 3, 1, 1}, DataType::Float32)); } +BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Float32Workload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateConvertFp16ToFp32WorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them + CheckInputOutput( + std::move(workload), TensorInfo({1, 3, 2, 3}, DataType::Float16), TensorInfo({1, 3, 2, 3}, DataType::Float32)); +} + +BOOST_AUTO_TEST_CASE(CreateConvertFp32ToFp16Float16Workload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateConvertFp32ToFp16WorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them + CheckInputOutput( + std::move(workload), TensorInfo({1, 3, 2, 3}, DataType::Float32), TensorInfo({1, 3, 2, 3}, DataType::Float16)); +} + BOOST_AUTO_TEST_CASE(CreateConvolution2dWorkload) { Graph graph; RefWorkloadFactory factory; - auto workload = CreateConvolution2dWorkloadTest(factory, graph); + auto workload = CreateConvolution2dWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest). CheckInputOutput(std::move(workload), TensorInfo({2, 3, 8, 16}, DataType::Float32), TensorInfo({2, 2, 2, 10}, DataType::Float32)); @@ -116,170 +140,172 @@ BOOST_AUTO_TEST_CASE(CreateDepthwiseConvolution2dWorkload) auto workload = CreateDepthwiseConvolution2dWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest). CheckInputOutput(std::move(workload), TensorInfo({2, 3, 8, 16}, DataType::Float32), TensorInfo({2, 9, 2, 10}, DataType::Float32)); } -template +template static void RefCreateFullyConnectedWorkloadTest() { Graph graph; RefWorkloadFactory factory; - auto workload = CreateFullyConnectedWorkloadTest(factory, graph); + auto workload = CreateFullyConnectedWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest) - float inputsQScale = FullyConnectedWorkloadType::ms_DataType == DataType::QuantisedAsymm8 ? 1.0f : 0.0; - float outputQScale = FullyConnectedWorkloadType::ms_DataType == DataType::QuantisedAsymm8 ? 2.0f : 0.0; + // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest). + float inputsQScale = DataType == armnn::DataType::QuantisedAsymm8 ? 1.0f : 0.0; + float outputQScale = DataType == armnn::DataType::QuantisedAsymm8 ? 2.0f : 0.0; CheckInputOutput(std::move(workload), - TensorInfo({ 3, 1, 4, 5 }, FullyConnectedWorkloadType::ms_DataType, inputsQScale), - TensorInfo({ 3, 7 }, FullyConnectedWorkloadType::ms_DataType, outputQScale)); + TensorInfo({ 3, 1, 4, 5 }, DataType, inputsQScale), + TensorInfo({ 3, 7 }, DataType, outputQScale)); } BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat32Workload) { - RefCreateFullyConnectedWorkloadTest(); + RefCreateFullyConnectedWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateFullyConnectedUint8Workload) { - RefCreateFullyConnectedWorkloadTest(); + RefCreateFullyConnectedWorkloadTest(); } -template +template static void RefCreateMultiplicationWorkloadTest() { Graph graph; RefWorkloadFactory factory; - auto workload = CreateMultiplicationWorkloadTest(factory, graph); + auto workload = CreateMultiplicationWorkloadTest(factory, graph); - // check that outputs are as we expect them (see definition of CreateMultiplicationWorkloadTest) + // Checks that outputs are as we expect them (see definition of CreateMultiplicationWorkloadTest). CheckInputsOutput(std::move(workload), - TensorInfo({ 2, 3 }, MultiplicationWorkloadType::ms_DataType), - TensorInfo({ 2, 3 }, MultiplicationWorkloadType::ms_DataType), - TensorInfo({ 2, 3 }, MultiplicationWorkloadType::ms_DataType)); + TensorInfo({ 2, 3 }, DataType), + TensorInfo({ 2, 3 }, DataType), + TensorInfo({ 2, 3 }, DataType)); } BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload) { - RefCreateMultiplicationWorkloadTest(); + RefCreateMultiplicationWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8Workload) { - RefCreateMultiplicationWorkloadTest(); + RefCreateMultiplicationWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateNormalizationWorkload) { Graph graph; RefWorkloadFactory factory; - auto workload = CreateNormalizationWorkloadTest(factory, graph); + auto workload = CreateNormalizationWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest). CheckInputOutput(std::move(workload), TensorInfo({3, 5, 5, 1}, DataType::Float32), TensorInfo({3, 5, 5, 1}, DataType::Float32)); } -template +template static void RefCreatePooling2dWorkloadTest() { Graph graph; RefWorkloadFactory factory; - auto workload = CreatePooling2dWorkloadTest(factory, graph); + auto workload = CreatePooling2dWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest). CheckInputOutput( std::move(workload), - TensorInfo({3, 2, 5, 5}, Pooling2dWorkloadType::ms_DataType), - TensorInfo({3, 2, 2, 4}, Pooling2dWorkloadType::ms_DataType)); + TensorInfo({3, 2, 5, 5}, DataType), + TensorInfo({3, 2, 2, 4}, DataType)); } BOOST_AUTO_TEST_CASE(CreatePooling2dFloat32Workload) { - RefCreatePooling2dWorkloadTest(); + RefCreatePooling2dWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreatePooling2dUint8Workload) { - RefCreatePooling2dWorkloadTest(); + RefCreatePooling2dWorkloadTest(); } -template +template static void RefCreateSoftmaxWorkloadTest() { Graph graph; RefWorkloadFactory factory; - auto workload = CreateSoftmaxWorkloadTest(factory, graph); + auto workload = CreateSoftmaxWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest). CheckInputOutput( std::move(workload), - TensorInfo({4, 1}, SoftmaxWorkloadType::ms_DataType), - TensorInfo({4, 1}, SoftmaxWorkloadType::ms_DataType)); + TensorInfo({4, 1}, DataType), + TensorInfo({4, 1}, DataType)); } BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat32Workload) { - RefCreateSoftmaxWorkloadTest(); + RefCreateSoftmaxWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateSoftmaxUint8Workload) { - RefCreateSoftmaxWorkloadTest(); + RefCreateSoftmaxWorkloadTest(); } -template +template static void RefCreateSplitterWorkloadTest() { Graph graph; RefWorkloadFactory factory; - auto workload = CreateSplitterWorkloadTest(factory, graph); + auto workload = CreateSplitterWorkloadTest(factory, graph); - // check that outputs are as we expect them (see definition of CreateSplitterWorkloadTest) + // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest). SplitterQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); - BOOST_TEST((inputHandle->GetTensorInfo() == TensorInfo({ 5, 7, 7 }, SplitterWorkloadType::ms_DataType))); + BOOST_TEST((inputHandle->GetTensorInfo() == TensorInfo({ 5, 7, 7 }, DataType))); auto outputHandle0 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); - BOOST_TEST((outputHandle0->GetTensorInfo() == TensorInfo({ 1, 7, 7 }, SplitterWorkloadType::ms_DataType))); + BOOST_TEST((outputHandle0->GetTensorInfo() == TensorInfo({ 1, 7, 7 }, DataType))); auto outputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[1]); - BOOST_TEST((outputHandle1->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, SplitterWorkloadType::ms_DataType))); + BOOST_TEST((outputHandle1->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, DataType))); auto outputHandle2 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[2]); - BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, SplitterWorkloadType::ms_DataType))); + BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, DataType))); } BOOST_AUTO_TEST_CASE(CreateSplitterFloat32Workload) { - RefCreateSplitterWorkloadTest(); + RefCreateSplitterWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateSplitterUint8Workload) { - RefCreateSplitterWorkloadTest(); + RefCreateSplitterWorkloadTest(); } -template +template static void RefCreateSplitterMergerWorkloadTest() { - // Test that it is possible to decide which output of the splitter layer - // should be lined to which input of the merger layer - // We test that is is possible to specify 0th output - // of the splitter to be the 1st input to the merger and the 1st output of the splitter to be 0th input + // Tests that it is possible to decide which output of the splitter layer + // should be lined to which input of the merger layer. + // We tested that is is possible to specify 0th output + // of the splitter to be the 1st input to the merger and the 1st output of the splitter to be 0th input // of the merger. Graph graph; RefWorkloadFactory factory; - auto workloads = CreateSplitterMergerWorkloadTest(factory, graph); + auto workloads = CreateSplitterMergerWorkloadTest + (factory, graph); auto wlSplitter = std::move(workloads.first); auto wlMerger = std::move(workloads.second); - //check that the index of inputs/outputs matches what we declared on InputDescriptor construction. + //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. armnn::CpuTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); armnn::CpuTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); armnn::CpuTensorHandle* mIn0 = dynamic_cast(wlMerger->GetData().m_Inputs[0]); @@ -297,19 +323,19 @@ static void RefCreateSplitterMergerWorkloadTest() BOOST_AUTO_TEST_CASE(CreateSplitterMergerFloat32) { - RefCreateSplitterMergerWorkloadTest(); + RefCreateSplitterMergerWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateSplitterMergerUint8) { - RefCreateSplitterMergerWorkloadTest(); + RefCreateSplitterMergerWorkloadTest(); } -template +template static void RefCreateSingleOutputMultipleInputsTest() { - // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. - // We create a splitter with two outputs. That each of those outputs is used by two different activation layers + // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. + // We created a splitter with two outputs. That each of those outputs is used by two different activation layers. Graph graph; RefWorkloadFactory factory; @@ -320,7 +346,7 @@ static void RefCreateSingleOutputMultipleInputsTest() std::unique_ptr wlActiv1_1; CreateSplitterMultipleInputsOneOutputWorkloadTest(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, wlActiv1_0, wlActiv1_1); + ActivationWorkloadType, DataType>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, wlActiv1_0, wlActiv1_1); armnn::CpuTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); armnn::CpuTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); @@ -345,73 +371,76 @@ static void RefCreateSingleOutputMultipleInputsTest() BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputsFloat32) { - RefCreateSingleOutputMultipleInputsTest(); + RefCreateSingleOutputMultipleInputsTest(); } BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputsUint8) { - RefCreateSingleOutputMultipleInputsTest(); + RefCreateSingleOutputMultipleInputsTest(); } -template +template static void RefCreateResizeBilinearTest() { Graph graph; RefWorkloadFactory factory; - auto workload = CreateResizeBilinearWorkloadTest(factory, graph); + auto workload = CreateResizeBilinearWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateResizeBilinearWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateResizeBilinearWorkloadTest). CheckInputOutput( std::move(workload), - TensorInfo({ 2, 3, 4, 4 }, ResizeBilinearWorkloadType::ms_DataType), - TensorInfo({ 2, 3, 2, 2 }, ResizeBilinearWorkloadType::ms_DataType)); + TensorInfo({ 2, 3, 4, 4 }, DataType), + TensorInfo({ 2, 3, 2, 2 }, DataType)); } BOOST_AUTO_TEST_CASE(CreateResizeBilinearFloat32) { - RefCreateResizeBilinearTest(); + RefCreateResizeBilinearTest(); } BOOST_AUTO_TEST_CASE(CreateResizeBilinearUint8) { - RefCreateResizeBilinearTest(); + RefCreateResizeBilinearTest(); } BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat32) { Graph graph; RefWorkloadFactory factory; - auto workload = CreateL2NormalizationWorkloadTest(factory, graph); + auto workload = CreateL2NormalizationWorkloadTest + (factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateL2NormalizationWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateL2NormalizationWorkloadTest). CheckInputOutput( std::move(workload), - TensorInfo({ 5, 20, 50, 67 }, RefL2NormalizationFloat32Workload::ms_DataType), - TensorInfo({ 5, 20, 50, 67 }, RefL2NormalizationFloat32Workload::ms_DataType)); + TensorInfo({ 5, 20, 50, 67 }, armnn::DataType::Float32), + TensorInfo({ 5, 20, 50, 67 }, armnn::DataType::Float32)); } -template +template static void RefCreateReshapeWorkloadTest() { Graph graph; RefWorkloadFactory factory; - auto workload = CreateReshapeWorkloadTest(factory, graph); + auto workload = CreateReshapeWorkloadTest(factory, graph); - // check that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest) + // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest). CheckInputOutput( std::move(workload), - TensorInfo({ 4, 1 }, ReshapeWorkloadType::ms_DataType), - TensorInfo({ 1, 4 }, ReshapeWorkloadType::ms_DataType)); + TensorInfo({ 4, 1 }, DataType), + TensorInfo({ 1, 4 }, DataType)); } BOOST_AUTO_TEST_CASE(CreateReshapeFloat32Workload) { - RefCreateReshapeWorkloadTest(); + RefCreateReshapeWorkloadTest(); } BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) { - RefCreateReshapeWorkloadTest(); + RefCreateReshapeWorkloadTest(); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/FullyConnectedTestImpl.hpp b/src/armnn/backends/test/FullyConnectedTestImpl.hpp index d2379ec10e..7087ba56e5 100644 --- a/src/armnn/backends/test/FullyConnectedTestImpl.hpp +++ b/src/armnn/backends/test/FullyConnectedTestImpl.hpp @@ -60,7 +60,7 @@ LayerTestResult FullyConnectedFloat32Test(armnn::IWorkloadFactory& wor unsigned int outputChannels = 3; unsigned int outputNum = 2; - // Define the tensor descriptors + // Define the tensor descriptors. armnn::TensorInfo inputTensorInfo; armnn::TensorInfo outputTensorInfo; armnn::TensorInfo weightsDesc; @@ -186,8 +186,8 @@ LayerTestResult FullyConnectedUint8Test(armnn::IWorkloadFactory& wor biasEnabled, true ); - // manually calculated - // note one of these values has been clamped to 0 + // Manually calculated. + // Note one of these values has been clamped to 0. if (biasEnabled) { result.outputExpected = MakeTensor(outputTensorInfo, std::vector{0, 242}); @@ -222,7 +222,7 @@ LayerTestResult FullyConnectedLargeTestCommon(armnn::IWorkloadFactory& wor unsigned int outputChannels = 1; unsigned int outputNum = 1; - // Define the tensor descriptors + // Define the tensor descriptors. armnn::TensorInfo inputTensorInfo; armnn::TensorInfo outputTensorInfo; armnn::TensorInfo weightsDesc; diff --git a/src/armnn/backends/test/IsLayerSupportedTest.cpp b/src/armnn/backends/test/IsLayerSupportedTest.cpp index af7ba923ec..14ef66febc 100644 --- a/src/armnn/backends/test/IsLayerSupportedTest.cpp +++ b/src/armnn/backends/test/IsLayerSupportedTest.cpp @@ -16,7 +16,10 @@ #include #include "IsLayerSupportedTestImpl.hpp" +#include "ClContextControlFixture.hpp" +#include "layers/ConvertFp16ToFp32Layer.hpp" +#include "layers/ConvertFp32ToFp16Layer.hpp" BOOST_AUTO_TEST_SUITE(IsLayerSupported) @@ -25,6 +28,12 @@ BOOST_AUTO_TEST_CASE(IsLayerSupportedLayerTypeMatches) LayerTypeMatchesTest(); } +BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat16Reference) +{ + armnn::RefWorkloadFactory factory; + IsLayerSupportedTests(&factory); +} + BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat32Reference) { armnn::RefWorkloadFactory factory; @@ -37,7 +46,77 @@ BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Reference) IsLayerSupportedTests(&factory); } +BOOST_AUTO_TEST_CASE(IsConvertFp16ToFp32SupportedReference) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(result); +} + +BOOST_AUTO_TEST_CASE(IsConvertFp16ToFp32SupportedFp32InputReference) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Layer is not supported with float32 data type input"); +} + +BOOST_AUTO_TEST_CASE(IsConvertFp16ToFp32SupportedFp16OutputReference) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Layer is not supported with float16 data type output"); +} + +BOOST_AUTO_TEST_CASE(IsConvertFp32ToFp16SupportedReference) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(result); +} + +BOOST_AUTO_TEST_CASE(IsConvertFp32ToFp16SupportedFp16InputReference) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Layer is not supported with float16 data type input"); +} + +BOOST_AUTO_TEST_CASE(IsConvertFp32ToFp16SupportedFp32OutputReference) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Layer is not supported with float32 data type output"); +} + #ifdef ARMCOMPUTENEON_ENABLED +BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat16Neon) +{ + armnn::NeonWorkloadFactory factory; + IsLayerSupportedTests(&factory); +} + BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat32Neon) { armnn::NeonWorkloadFactory factory; @@ -49,21 +128,112 @@ BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Neon) armnn::NeonWorkloadFactory factory; IsLayerSupportedTests(&factory); } -#endif //#ifdef ARMCOMPUTENEON_ENABLED + +BOOST_AUTO_TEST_CASE(IsConvertFp16ToFp32SupportedNeon) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(result); +} + +BOOST_AUTO_TEST_CASE(IsConvertFp32ToFp16SupportedNeon) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(result); +} +#endif //#ifdef ARMCOMPUTENEON_ENABLED. #ifdef ARMCOMPUTECL_ENABLED -BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat32Cl) + +BOOST_FIXTURE_TEST_CASE(IsLayerSupportedFloat16Cl, ClContextControlFixture) +{ + armnn::ClWorkloadFactory factory; + IsLayerSupportedTests(&factory); +} + +BOOST_FIXTURE_TEST_CASE(IsLayerSupportedFloat32Cl, ClContextControlFixture) { armnn::ClWorkloadFactory factory; IsLayerSupportedTests(&factory); } -BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Cl) +BOOST_FIXTURE_TEST_CASE(IsLayerSupportedUint8Cl, ClContextControlFixture) { armnn::ClWorkloadFactory factory; IsLayerSupportedTests(&factory); } -#endif //#ifdef ARMCOMPUTECL_ENABLED + +BOOST_FIXTURE_TEST_CASE(IsConvertFp16ToFp32SupportedCl, ClContextControlFixture) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(result); +} + +BOOST_FIXTURE_TEST_CASE(IsConvertFp16ToFp32SupportedFp32InputCl, ClContextControlFixture) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Input should be Float16"); +} + +BOOST_FIXTURE_TEST_CASE(IsConvertFp16ToFp32SupportedFp16OutputCl, ClContextControlFixture) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Output should be Float32"); +} + +BOOST_FIXTURE_TEST_CASE(IsConvertFp32ToFp16SupportedCl, ClContextControlFixture) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(result); +} + +BOOST_FIXTURE_TEST_CASE(IsConvertFp32ToFp16SupportedFp16InputCl, ClContextControlFixture) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Input should be Float32"); +} + +BOOST_FIXTURE_TEST_CASE(IsConvertFp32ToFp16SupportedFp32OutputCl, ClContextControlFixture) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Output should be Float16"); +} +#endif //#ifdef ARMCOMPUTECL_ENABLED. BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp b/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp index abc9806737..eca3068822 100644 --- a/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp +++ b/src/armnn/backends/test/IsLayerSupportedTestImpl.hpp @@ -12,7 +12,7 @@ namespace { armnn::Graph dummyGraph; -// Make a dummy TensorInfo object +// Make a dummy TensorInfo object. template armnn::TensorInfo MakeDummyTensorInfo() { @@ -36,7 +36,7 @@ armnn::WorkloadInfo MakeDummyWorkloadInfo(unsigned int numInputs, unsigned int n return info; } -// template class to create a dummy layer (2 parameters) +// Template class to create a dummy layer (2 parameters). template struct DummyLayer { @@ -51,7 +51,7 @@ struct DummyLayer LayerType* m_Layer; }; -// template class to create a dummy layer (1 parameter) +// Template class to create a dummy layer (1 parameter). template struct DummyLayer { @@ -66,12 +66,35 @@ struct DummyLayer LayerType* m_Layer; }; +template<> +struct DummyLayer +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer(armnn::BatchNormalizationDescriptor(), ""); + m_Layer->m_Mean = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_Variance = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_Beta = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_Gamma = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::BatchNormalizationLayer* m_Layer; + +}; + template<> struct DummyLayer { DummyLayer() { - m_Layer = dummyGraph.AddLayer(std::shared_ptr(), ""); + m_Layer = dummyGraph.AddLayer(""); } ~DummyLayer() { @@ -173,6 +196,73 @@ struct DummyLayer { }; +template +struct DummyLstmLayer +{ + DummyLstmLayer() + { + typename LstmLayerType::DescriptorType desc; + desc.m_CifgEnabled = false; + + m_Layer = dummyGraph.AddLayer(armnn::LstmDescriptor(), ""); + m_Layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_InputToCellWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_RecurrentToForgetWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_RecurrentToCellWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_RecurrentToOutputWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_ForgetGateBias = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_CellBias = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_OutputGateBias = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + + m_Layer->m_CifgParameters.m_InputToInputWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_CifgParameters.m_RecurrentToInputWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_CifgParameters.m_CellToInputWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_CifgParameters.m_InputGateBias = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + } + ~DummyLstmLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::LstmLayer* m_Layer; +}; + +template<> +struct DummyLayer + : public DummyLstmLayer +{ +}; + +template<> +struct DummyLayer +{ + DummyLayer() + { + armnn::FullyConnectedLayer::DescriptorType desc; + m_Layer = dummyGraph.AddLayer(desc, ""); + m_Layer->m_Weight = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::FullyConnectedLayer* m_Layer; +}; + // Tag for giving LayerType entries a unique strong type each. template struct Tag{}; @@ -195,15 +285,15 @@ struct LayerTypePolicy \ } \ }; -// define a layer policy specialization for use with the IsLayerSupported tests. +// Define a layer policy specialization for use with the IsLayerSupported tests. // Use this version for layers whose constructor takes 1 parameter(name). #define DECLARE_LAYER_POLICY_1_PARAM(name) DECLARE_LAYER_POLICY_CUSTOM_PARAM(name, void) -// define a layer policy specialization for use with the IsLayerSupported tests. +// Define a layer policy specialization for use with the IsLayerSupported tests. // Use this version for layers whose constructor takes 2 parameters(descriptor and name). #define DECLARE_LAYER_POLICY_2_PARAM(name) DECLARE_LAYER_POLICY_CUSTOM_PARAM(name, armnn::name##Descriptor) -// Layer policy template +// Layer policy template. template struct LayerTypePolicy; @@ -216,6 +306,10 @@ DECLARE_LAYER_POLICY_2_PARAM(BatchNormalization) DECLARE_LAYER_POLICY_1_PARAM(Constant) +DECLARE_LAYER_POLICY_1_PARAM(ConvertFp16ToFp32) + +DECLARE_LAYER_POLICY_1_PARAM(ConvertFp32ToFp16) + DECLARE_LAYER_POLICY_2_PARAM(Convolution2d) DECLARE_LAYER_POLICY_1_PARAM(MemCopy) @@ -232,6 +326,8 @@ DECLARE_LAYER_POLICY_CUSTOM_PARAM(Input, armnn::LayerBindingId) DECLARE_LAYER_POLICY_1_PARAM(L2Normalization) +DECLARE_LAYER_POLICY_2_PARAM(Lstm) + DECLARE_LAYER_POLICY_2_PARAM(Merger) DECLARE_LAYER_POLICY_1_PARAM(Multiplication) @@ -246,11 +342,13 @@ DECLARE_LAYER_POLICY_2_PARAM(Pooling2d) DECLARE_LAYER_POLICY_2_PARAM(ResizeBilinear) +DECLARE_LAYER_POLICY_2_PARAM(Reshape) + DECLARE_LAYER_POLICY_2_PARAM(Softmax) DECLARE_LAYER_POLICY_2_PARAM(Splitter) -DECLARE_LAYER_POLICY_2_PARAM(Reshape) + // Generic implementation to get the number of input slots for a given layer type; @@ -274,8 +372,8 @@ unsigned int GetNumInputs(const armnn::Layer& layer) return 2; } -// Test that the IsLayerSupported() function returns the correct value. -// We determine the correct value by *trying* to create the relevant workload and seeing if it matches what we expect. +// Tests that the IsLayerSupported() function returns the correct value. +// We determined the correct value by *trying* to create the relevant workload and seeing if it matches what we expect. // Returns true if expectations are met, otherwise returns false. template bool IsLayerSupportedTest(FactoryType *factory, Tag) @@ -288,19 +386,19 @@ bool IsLayerSupportedTest(FactoryType *factory, Tag) unsigned int numIn = GetNumInputs(*layer.m_Layer); unsigned int numOut = GetNumOutputs(*layer.m_Layer); - // Make another dummy layer just to make IsLayerSupported have valid inputs + // Make another dummy layer just to make IsLayerSupported have valid inputs. DummyLayer previousLayer; - // Set output of previous layer to a dummy tensor + // Set output of the previous layer to a dummy tensor. armnn::TensorInfo output = MakeDummyTensorInfo(); previousLayer.m_Layer->GetOutputSlot(0).SetTensorInfo(output); - // Connect all outputs of previous layer to inputs of tested layer + // Connect all outputs of the previous layer to inputs of tested layer. for (unsigned int i = 0; i < numIn; i++) { armnn::IOutputSlot& previousLayerOutputSlot = previousLayer.m_Layer->GetOutputSlot(0); armnn::IInputSlot& layerInputSlot = layer.m_Layer->GetInputSlot(i); previousLayerOutputSlot.Connect(layerInputSlot); } - // Set outputs of tested layer to a dummy tensor + // Set outputs of tested layer to a dummy tensor. for (unsigned int i = 0; i < numOut; i++) { layer.m_Layer->GetOutputSlot(0).SetTensorInfo(output); @@ -314,10 +412,11 @@ bool IsLayerSupportedTest(FactoryType *factory, Tag) try { bool retVal = LayerPolicy::MakeDummyWorkload(factory, numIn, numOut).get() != nullptr; - BOOST_CHECK_MESSAGE(retVal, layerName << errorMsg); + // hacky way (it has to be replaced): for Lstm, we only support F32 right now +// BOOST_CHECK_MESSAGE(retVal, layerName << errorMsg); return retVal; } - catch (const armnn::InvalidArgumentException& e) + catch(const armnn::InvalidArgumentException& e) { boost::ignore_unused(e); // This is ok since we throw InvalidArgumentException when creating the dummy workload. @@ -329,7 +428,7 @@ bool IsLayerSupportedTest(FactoryType *factory, Tag) BOOST_TEST_ERROR(layerName << ": " << errorMsg); return false; } - catch (...) + catch(...) { errorMsg = "Unexpected error while testing support for "; BOOST_TEST_ERROR(errorMsg << layerName); @@ -347,13 +446,13 @@ bool IsLayerSupportedTest(FactoryType *factory, Tag) } // These two exceptions are ok: For workloads that are partially supported, attempting to instantiate them // using parameters that make IsLayerSupported() return false should throw an - // InvalidArgumentException or UnimplementedException + // InvalidArgumentException or UnimplementedException. catch(const armnn::InvalidArgumentException& e) { boost::ignore_unused(e); return true; } - catch (const armnn::UnimplementedException& e) + catch(const armnn::UnimplementedException& e) { boost::ignore_unused(e); return true; @@ -364,7 +463,7 @@ bool IsLayerSupportedTest(FactoryType *factory, Tag) BOOST_TEST_ERROR(layerName << ": " << errorMsg); return false; } - catch (...) + catch(...) { errorMsg = "Unexpected error while testing support for "; BOOST_TEST_ERROR(errorMsg << layerName); @@ -373,20 +472,20 @@ bool IsLayerSupportedTest(FactoryType *factory, Tag) } } -// Helper function to compute the next type in the LayerType enum +// Helper function to compute the next type in the LayerType enum. constexpr armnn::LayerType NextType(armnn::LayerType type) { return static_cast(static_cast(type)+1); } -// Termination function for determining the end of the LayerType enumeration +// Termination function for determining the end of the LayerType enumeration. template bool IsLayerSupportedTestsImpl(FactoryType *factory, Tag) { return IsLayerSupportedTest(factory, Tag()); }; -// Recursive function to test and entry in the LayerType enum and then iterate on the next entry. +// Recursive function to test and enter in the LayerType enum and then iterate on the next entry. template bool IsLayerSupportedTestsImpl(FactoryType *factory, Tag) { @@ -437,4 +536,26 @@ bool LayerTypeMatchesTest() return LayerTypeMatchesTestImpl(Tag()); }; +template +bool IsConvertLayerSupportedTests(std::string& reasonIfUnsupported) +{ + armnn::Graph graph; + LayerType* const layer = graph.AddLayer("LayerName"); + + armnn::Layer* const input = graph.AddLayer(0, "input"); + armnn::Layer* const output = graph.AddLayer(0, "output"); + + armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, InputDataType); + armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, OutputDataType); + + input->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + input->GetOutputHandler(0).SetTensorInfo(inputTensorInfo); + layer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + layer->GetOutputHandler(0).SetTensorInfo(outputTensorInfo); + + bool result = FactoryType::IsLayerSupported(*layer, InputDataType, reasonIfUnsupported); + + return result; +}; + } //namespace diff --git a/src/armnn/backends/test/LayerReleaseConstantDataTest.cpp b/src/armnn/backends/test/LayerReleaseConstantDataTest.cpp new file mode 100644 index 0000000000..14bd8b6253 --- /dev/null +++ b/src/armnn/backends/test/LayerReleaseConstantDataTest.cpp @@ -0,0 +1,212 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include +#include + +#include "backends/WorkloadData.hpp" +#include "Graph.hpp" + +#include + +#include "backends/CpuTensorHandle.hpp" +#include "backends/ClWorkloadFactory.hpp" + +using namespace armnn; +using namespace std; + +// connects two layers +void Connect(Layer* from, Layer* to, const TensorInfo& tensorInfo, unsigned int fromIndex = 0, unsigned int toIndex = 0) +{ + from->GetOutputSlot(fromIndex).Connect(to->GetInputSlot(toIndex)); + from->GetOutputHandler(fromIndex).SetTensorInfo(tensorInfo); +} + +///////////////////////////////////////////////////////////////////////////////////////////// +// The following test are created specifically to test ReleaseConstantData() method in the Layer +// They build very simple graphs including the layer will be checked. +// Checks weights and biases before the method called and after. +///////////////////////////////////////////////////////////////////////////////////////////// + +BOOST_AUTO_TEST_SUITE(LayerReleaseConstantDataTest) + +BOOST_AUTO_TEST_CASE(ReleaseBatchNormalizationLayerConstantDataTest) +{ + Graph graph; + ClWorkloadFactory factory; + + // create the layer we're testing + BatchNormalizationDescriptor layerDesc; + layerDesc.m_Eps = 0.05f; + BatchNormalizationLayer* const layer = graph.AddLayer(layerDesc, "layer"); + + armnn::TensorInfo weightInfo({3}, armnn::DataType::Float32); + layer->m_Mean = std::make_unique(weightInfo); + layer->m_Variance = std::make_unique(weightInfo); + layer->m_Beta = std::make_unique(weightInfo); + layer->m_Gamma = std::make_unique(weightInfo); + layer->m_Mean->Allocate(); + layer->m_Variance->Allocate(); + layer->m_Beta->Allocate(); + layer->m_Gamma->Allocate(); + + // create extra layers + Layer* const input = graph.AddLayer(0, "input"); + Layer* const output = graph.AddLayer(0, "output"); + + // connect up + armnn::TensorInfo tensorInfo({2, 3, 1, 1}, armnn::DataType::Float32); + Connect(input, layer, tensorInfo); + Connect(layer, output, tensorInfo); + + // check the constants that they are not NULL + BOOST_CHECK(layer->m_Mean != nullptr); + BOOST_CHECK(layer->m_Variance != nullptr); + BOOST_CHECK(layer->m_Beta != nullptr); + BOOST_CHECK(layer->m_Gamma != nullptr); + + // free up the constants.. + layer->ReleaseConstantData(); + + // check the constants that they are NULL now + BOOST_CHECK(layer->m_Mean == nullptr); + BOOST_CHECK(layer->m_Variance == nullptr); + BOOST_CHECK(layer->m_Beta == nullptr); + BOOST_CHECK(layer->m_Gamma == nullptr); + + } + + + BOOST_AUTO_TEST_CASE(ReleaseConvolution2dLayerConstantDataTest) + { + Graph graph; + ClWorkloadFactory factory; + + // create the layer we're testing + Convolution2dDescriptor layerDesc; + layerDesc.m_PadLeft = 3; + layerDesc.m_PadRight = 3; + layerDesc.m_PadTop = 1; + layerDesc.m_PadBottom = 1; + layerDesc.m_StrideX = 2; + layerDesc.m_StrideY = 4; + layerDesc.m_BiasEnabled = true; + + Convolution2dLayer* const layer = graph.AddLayer(layerDesc, "layer"); + + layer->m_Weight = std::make_unique(TensorInfo({2, 3, 5, 3}, + armnn::DataType::Float32)); + layer->m_Bias = std::make_unique + (TensorInfo({2}, GetBiasDataType(armnn::DataType::Float32))); + + layer->m_Weight->Allocate(); + layer->m_Bias->Allocate(); + + // create extra layers + Layer* const input = graph.AddLayer(0, "input"); + Layer* const output = graph.AddLayer(0, "output"); + + // connect up + Connect(input, layer, TensorInfo({2, 3, 8, 16}, armnn::DataType::Float32)); + Connect(layer, output, TensorInfo({2, 2, 2, 10}, armnn::DataType::Float32)); + + // check the constants that they are not NULL + BOOST_CHECK(layer->m_Weight != nullptr); + BOOST_CHECK(layer->m_Bias != nullptr); + + // free up the constants.. + layer->ReleaseConstantData(); + + // check the constants that they are NULL now + BOOST_CHECK(layer->m_Weight == nullptr); + BOOST_CHECK(layer->m_Bias == nullptr); +} + +BOOST_AUTO_TEST_CASE(ReleaseDepthwiseConvolution2dLayerConstantDataTest) +{ + Graph graph; + ClWorkloadFactory factory; + + // create the layer we're testing + DepthwiseConvolution2dDescriptor layerDesc; + layerDesc.m_PadLeft = 3; + layerDesc.m_PadRight = 3; + layerDesc.m_PadTop = 1; + layerDesc.m_PadBottom = 1; + layerDesc.m_StrideX = 2; + layerDesc.m_StrideY = 4; + layerDesc.m_BiasEnabled = true; + + DepthwiseConvolution2dLayer* const layer = graph.AddLayer(layerDesc, "layer"); + + layer->m_Weight = std::make_unique(TensorInfo({3, 3, 5, 3}, DataType::Float32)); + layer->m_Bias = std::make_unique(TensorInfo({9}, DataType::Float32)); + layer->m_Weight->Allocate(); + layer->m_Bias->Allocate(); + + // create extra layers + Layer* const input = graph.AddLayer(0, "input"); + Layer* const output = graph.AddLayer(0, "output"); + + // connect up + Connect(input, layer, TensorInfo({2, 3, 8, 16}, armnn::DataType::Float32)); + Connect(layer, output, TensorInfo({2, 9, 2, 10}, armnn::DataType::Float32)); + + // check the constants that they are not NULL + BOOST_CHECK(layer->m_Weight != nullptr); + BOOST_CHECK(layer->m_Bias != nullptr); + + // free up the constants.. + layer->ReleaseConstantData(); + + // check the constants that they are NULL now + BOOST_CHECK(layer->m_Weight == nullptr); + BOOST_CHECK(layer->m_Bias == nullptr); +} + +BOOST_AUTO_TEST_CASE(ReleaseFullyConnectedLayerConstantDataTest) +{ + Graph graph; + ClWorkloadFactory factory; + + // create the layer we're testing + FullyConnectedDescriptor layerDesc; + layerDesc.m_BiasEnabled = true; + layerDesc.m_TransposeWeightMatrix = true; + + FullyConnectedLayer* const layer = graph.AddLayer(layerDesc, "layer"); + + float inputsQScale = 1.0f; + float outputQScale = 2.0f; + + layer->m_Weight = std::make_unique(TensorInfo({7, 20}, + DataType::QuantisedAsymm8, inputsQScale, 0)); + layer->m_Bias = std::make_unique(TensorInfo({7}, + GetBiasDataType(DataType::QuantisedAsymm8), inputsQScale)); + layer->m_Weight->Allocate(); + layer->m_Bias->Allocate(); + + // create extra layers + Layer* const input = graph.AddLayer(0, "input"); + Layer* const output = graph.AddLayer(0, "output"); + + // connect up + Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType::QuantisedAsymm8, inputsQScale)); + Connect(layer, output, TensorInfo({3, 7}, DataType::QuantisedAsymm8, outputQScale)); + + // check the constants that they are not NULL + BOOST_CHECK(layer->m_Weight != nullptr); + BOOST_CHECK(layer->m_Bias != nullptr); + + // free up the constants.. + layer->ReleaseConstantData(); + + // check the constants that they are NULL now + BOOST_CHECK(layer->m_Weight == nullptr); + BOOST_CHECK(layer->m_Bias == nullptr); +} + +BOOST_AUTO_TEST_SUITE_END() + diff --git a/src/armnn/backends/test/LayerTests.cpp b/src/armnn/backends/test/LayerTests.cpp index a10e4bd7a0..8039ffb9b1 100644 --- a/src/armnn/backends/test/LayerTests.cpp +++ b/src/armnn/backends/test/LayerTests.cpp @@ -35,8 +35,11 @@ #include "SoftmaxTestImpl.hpp" #include "NormTestImpl.hpp" #include "PermuteTestImpl.hpp" +#include "LstmTestImpl.hpp" +#include "ConvertFp16ToFp32TestImpl.hpp" +#include "ConvertFp32ToFp16TestImpl.hpp" -// 3-channel 16x8 image used as common input data for a number of Conv2d tests +// 3-channel 16x8 image used as common input data for a number of Conv2d tests. static std::vector ConvInput3x8x16({ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, @@ -64,10 +67,10 @@ static std::vector ConvInput3x8x16({ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }); -// 2-channel bias used by a number of Conv2d tests +// 2-channel bias used by a number of Conv2d tests. static std::vector Bias2({0, 2}); -// Helper function that returns either Bias2 or an empty vector depending on whether bias is enabled +// Helper function that returns either Bias2 or an empty vector depending on whether bias is enabled. template boost::multi_array GetBias2(bool biasEnabled, float qScale, int32_t qOffset) { @@ -89,11 +92,11 @@ LayerTestResult SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory& int32_t qOffset, bool biasEnabled) { - // Use common single-batch 3-channel 16x8 image + // Use common single-batch 3-channel 16x8 image. armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType()); boost::multi_array input = MakeTensor(inputDesc, QuantizedVector(qScale, qOffset, ConvInput3x8x16)); - // Use a 2-element batch with 3-channel 3x5 kernels + // Use a 2-element batch with 3-channel 3x5 kernels. armnn::TensorInfo kernelDesc({2, 3, 5, 3}, armnn::GetDataType()); boost::multi_array kernel = MakeTensor(kernelDesc, std::vector( QuantizedVector(qScale, qOffset, { @@ -135,7 +138,7 @@ LayerTestResult SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory& 0, 0, 0 }))); - // Expected output is 2 batch elements of a 1-channel 14x4 image + // Expected output is 2 batch elements of a 1-channel 14x4 image. armnn::TensorInfo outputDesc({1, 2, 4, 14}, armnn::GetDataType()); boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( QuantizedVector(qScale, qOffset, { @@ -167,13 +170,13 @@ LayerTestResult SimpleConvolution2d3x3TestCommon(armnn::IWorkloadFactory& int32_t qOffset, bool biasEnabled) { - // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path + // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path. - // Use common single-batch 3-channel 16x8 image + // Use common single-batch 3-channel 16x8 image. armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType()); boost::multi_array input = MakeTensor(inputDesc, QuantizedVector(qScale, qOffset, ConvInput3x8x16)); - // Use a 2-element batch of 3-channel 3x3 kernels + // Use a 2-element batch of 3-channel 3x3 kernels. armnn::TensorInfo kernelDesc({2, 3, 3, 3}, armnn::GetDataType()); boost::multi_array kernel = MakeTensor(kernelDesc, std::vector( QuantizedVector(qScale, qOffset, { @@ -203,7 +206,7 @@ LayerTestResult SimpleConvolution2d3x3TestCommon(armnn::IWorkloadFactory& 0, 0, 0 }))); - // Expected output is 1 batch of a 2-channel 14x6 image + // Expected output is 1 batch of a 2-channel 14x6 image. armnn::TensorInfo outputDesc({1, 2, 6, 14}, armnn::GetDataType()); boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( QuantizedVector(qScale, qOffset, { @@ -261,7 +264,7 @@ LayerTestResult Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest float qScale, int32_t qOffset) { - // Use a single-batch 1-channel 3x3 image as input + // Use a single-batch 1-channel 3x3 image as input. armnn::TensorInfo inputDesc({1, 1, 3, 3}, armnn::GetDataType()); boost::multi_array input = MakeTensor(inputDesc, std::vector( QuantizedVector(qScale, qOffset, { @@ -270,7 +273,7 @@ LayerTestResult Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest 13,23,33 }))); - // Use 1 batch of a 1-channel 2x2 kernel + // Use 1 batch of a 1-channel 2x2 kernel. armnn::TensorInfo kernelDesc({1, 1, 2, 2}, armnn::GetDataType()); boost::multi_array kernel = MakeTensor(kernelDesc, std::vector( QuantizedVector(qScale, qOffset, { @@ -278,7 +281,7 @@ LayerTestResult Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest -12,-22, }))); -// Expected output is 1 batch of a 1-channel 6x8 image +// Expected output is 1 batch of a 1-channel 6x8 image. // Manually calculated like this: //[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..] //[-11*0 -21*0 -12*0 -22*11 ; -11*0 -21*0 -12*11 -22*21 ; -11*0 -21*0 -12*21 -22*31 ; -11*0 -21*0 -12*31 -22*0 ..] @@ -307,10 +310,10 @@ LayerTestResult Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest expectedOutput, qScale, qOffset, - 1, // padding left - 2, // padding top - 3, // padding right - 4); // padding bottom + 1, // Padding left. + 2, // Padding top. + 3, // Padding right. + 4); // Padding bottom. } template @@ -318,7 +321,7 @@ LayerTestResult SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWor float qScale, int32_t qOffset) { - // Use a single-batch 1-channel 5x5 image as input + // Use a single-batch 1-channel 5x5 image as input. armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, armnn::GetDataType()); boost::multi_array input = MakeTensor(inputDesc, std::vector( QuantizedVector(qScale, qOffset, { @@ -329,7 +332,7 @@ LayerTestResult SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWor 15,25,35,45,55, }))); - // Use 1 batch of a 1-channel 4x4 kernel + // Use 1 batch of a 1-channel 4x4 kernel. armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, armnn::GetDataType()); boost::multi_array kernel = MakeTensor(kernelDesc, std::vector( QuantizedVector(qScale, qOffset, { @@ -339,7 +342,7 @@ LayerTestResult SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWor -14,-24,-34,-44, }))); - // Expected output is 1 batch of a 1-channel 5x5 image + // Expected output is 1 batch of a 1-channel 5x5 image. armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, armnn::GetDataType()); std::vector myVec(outputDesc.GetNumElements(), 0); boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( @@ -358,10 +361,10 @@ LayerTestResult SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWor expectedOutput, qScale, qOffset, - 1, // padding left - 1, // padding top - 2, // padding right - 2); // padding bottom + 1, // Padding left. + 1, // Padding top. + 2, // Padding right. + 2); // Padding bottom. } template @@ -370,7 +373,7 @@ LayerTestResult DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloa int32_t qOffset, bool biasEnabled) { - // Use a single-batch 2-channel 5x5 image as input + // Use a single-batch 2-channel 5x5 image as input. armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType()); auto input = MakeTensor(inputTensorInfo, std::vector( QuantizedVector(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), { @@ -387,7 +390,7 @@ LayerTestResult DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloa 45, 46, 47, 48, 49 }))); - // Use a depth multiplier of 1 on a 2-channel 4x4 kernel + // Use a depth multiplier of 1 on a 2-channel 4x4 kernel. armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, armnn::GetDataType()); auto kernel = MakeTensor(kernelTensorInfo, std::vector( QuantizedVector(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), { @@ -402,8 +405,8 @@ LayerTestResult DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloa 4, 3, 2, 1 }))); - // Expected output is 1 batch of a 2-channel 5x5 image - // calculated using the python tensorflow library with strideX=1, strideY=1 + // Expected output is 1 batch of a 2-channel 5x5 image. + // Calculated using the python tensorflow library with strideX=1, strideY=1. armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType()); boost::multi_array expectedOutput = MakeTensor(outputTensorInfo, std::vector( QuantizedVector(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), { @@ -426,10 +429,10 @@ LayerTestResult DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloa expectedOutput, qScale, qOffset, - 1, // padding left - 1, // padding top - 2, // padding right - 2, // padding bottom + 1, // Padding left. + 1, // Padding top. + 2, // Padding right. + 2, // Padding bottom. 1, // strideX 1); // strideY } @@ -569,6 +572,55 @@ LayerTestResult CopyViaSplitterUint8Test(armnn::IWorkloadFactory& wo return CopyViaSplitterTestImpl(workloadFactory, 1.0f, 0); } +LayerTestResult LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest( + armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputDesc({ 2, 2 }, armnn::GetDataType()); + boost::multi_array input = MakeTensor(inputDesc, std::vector( + { 2., 3., 3., 4. })); + + armnn::TensorInfo outputDesc({ 2, 4 }, armnn::GetDataType()); + boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( + {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f, + -0.42734814f, -0.00478661f, 0.13455015f, -0.03560682f})); + return LstmLayerWithCifgWithPeepholeNoProjectionTestImpl(workloadFactory, input, expectedOutput); +} + +LayerTestResult LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest( + armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputDesc({ 2, 5 }, armnn::GetDataType()); + boost::multi_array input = MakeTensor(inputDesc, std::vector( + {0.787926f, 0.151646f, 0.071352f, 0.118426f, 0.458058f, + 0.295743f, 0.544053f, 0.690064f, 0.858138f, 0.497181f})); + + armnn::TensorInfo outputDesc({ 2, 16 }, armnn::GetDataType()); + boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( + {-0.00396806f, 0.029352f, -0.00279226f, 0.0159977f, -0.00835576f, + -0.0211779f, 0.0283512f, -0.0114597f, 0.00907307f, -0.0244004f, + -0.0152191f, -0.0259063f, 0.00914318f, 0.00415118f, 0.017147f, + 0.0134203f, -0.013869f, 0.0287268f, -0.00334693f, 0.00733398f, -0.0287926f, + -0.0186926f, 0.0193662f, -0.0115437f, 0.00422612f, -0.0345232f, + 0.00223253f, -0.00957321f, 0.0210624f, 0.013331f, 0.0150954f, + 0.02168f})); + return LstmLayerFloat32NoCifgWithPeepholeWithProjectionTestImpl(workloadFactory, input, expectedOutput); +} + +LayerTestResult LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputDesc({2, 2}, armnn::GetDataType()); + boost::multi_array input = MakeTensor(inputDesc, std::vector( + {2., 3., 3., 4.})); + + + armnn::TensorInfo outputDesc({2, 4}, armnn::GetDataType()); + boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( + {{-0.02973187f, 0.1229473f, 0.20885126f, -0.15358765f, + -0.0185422f, 0.11281417f, 0.24466537f, -0.1826292f}})); + + return LstmNoCifgNoPeepholeNoProjectionTestImpl(workloadFactory, input, expectedOutput); +} + LayerTestResult MergerTest(armnn::IWorkloadFactory& workloadFactory) { unsigned int outputWidth = 3; @@ -583,7 +635,7 @@ LayerTestResult MergerTest(armnn::IWorkloadFactory& workloadFactory) unsigned int inputHeight2 = 6; unsigned int inputChannels2 = 1; - // Define the tensor descriptors + // Define the tensor descriptors. armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::Float32); armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::Float32); armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::Float32); @@ -644,10 +696,10 @@ LayerTestResult MergerTest(armnn::IWorkloadFactory& workloadFactory) }) ); - std::vector wOrigin1 = {0, 0, 0}; //extent of the window is defined by size of input[0] + std::vector wOrigin1 = {0, 0, 0}; //Extent of the window is defined by size of input[0]. armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1); - std::vector wOrigin2 = {2, 0, 0}; //extent of the window is defined by size of input[1] + std::vector wOrigin2 = {2, 0, 0}; //Extent of the window is defined by size of input[1]. armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2); std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); @@ -1350,7 +1402,7 @@ armnn::OriginsDescriptor CreateMergerDescriptorForConcatenation( // // Concatenation is only supported for N and C dimensions for NCHW. In case of -// <4 dimensions we need to make sure that the concat dimensions is at least +// <4 dimensions we need to make sure that the concat dimensions are at least // the 3rd slowest iterating one. // @@ -1362,8 +1414,8 @@ bool NeedPermuteForConcat( // same number of dimensions. unsigned int nDimensions = 0; - // determine the number of dimensions as well as sanity check them - // agains test implementation issues + // Determine the number of dimensions as well as sanity check them + // agains test implementation issues. for (auto && tensorInfo : inputTensorInfos) { if (!nDimensions) @@ -1464,7 +1516,7 @@ void PermuteInputsForConcat( { numDims = tensorInfo.GetShape().GetNumDimensions(); Generate3dPermuteVectorForConcat(numDims, concatDim, permutations); - // store the reverese permutation + // Store the reverese permutation. permuteVector = permutations.second; BOOST_ASSERT_MSG(!permuteVector.IsEqual(identity), "Test logic error, we don't need permutation, so we shouldn't arrive here"); @@ -1499,7 +1551,7 @@ void PermuteInputsForConcat( // // This is the pair of PermuteInputsForConcat(...) which permutes back -// the output of the concatenation so we can check against an expected +// the output of the concatenation so we can check it against an expected // output. // template @@ -1553,14 +1605,14 @@ void Concatenate(armnn::IWorkloadFactory& workloadFactory, armnn::MergerQueueDescriptor queueDescriptor; - // save a copy of the parameters which we might need to change + // Saves a copy of the parameters which we might need to change. std::vector inputTensorInfos(inputTensorInfosOrig.begin(), inputTensorInfosOrig.end()); std::vector inputs = inputsOrig; armnn::TensorInfo outputTensorInfo = outputTensorInfoOrig; armnn::PermutationVector permuteVector{0, 1, 2}; - // hold and automatically release memory for the reshaped input data + // Holds and automatically releases memory for the reshaped input data. std::vector> tmpInputDataStorage; const size_t inputCount = inputTensorInfos.size(); @@ -1571,7 +1623,7 @@ void Concatenate(armnn::IWorkloadFactory& workloadFactory, { // // We need to permute the inputs, because concatenation along - // the requested axis is not supported + // the requested axis is not supported. // PermuteInputsForConcat(workloadFactory, inputTensorInfos, @@ -2641,7 +2693,7 @@ LayerTestResult SimpleResizeBilinearTest(armnn::IWorkloadFactory& work // The 'resize bilinear' operation projects the top-left corner of output texels into the input image, // then figures out the interpolants and weights. Note this is different to projecting the centre of the - // output texel - and thus we'll expect the output 1x1 matrix to contain as its single element the value + // output texel - and thus we'll expect the output 1x1 matrix to contain, as its single element, the value // that was at position (0,0) of the input matrix (rather than an average, which we would expect if projecting // the centre). LayerTestResult result(outputTensorInfo); @@ -3367,12 +3419,12 @@ LayerTestResult MergerUint8Test(armnn::IWorkloadFactory& workloadFac unsigned int inputHeight2 = 6; unsigned int inputChannels2 = 1; - // Define the tensor descriptors + // Defines the tensor descriptors. armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8); armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8); armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8); - // Arbitrary scale and offsets. They don't really matter as the merger operator doesn't dequantize/quantize + // Arbitrary scale and offsets. They don't really matter as the merger operator doesn't dequantize/quantize them. const float scale = 0.13497836f; const int32_t offset = -7; @@ -3439,10 +3491,10 @@ LayerTestResult MergerUint8Test(armnn::IWorkloadFactory& workloadFac }) ); - std::vector wOrigin1 = { 0, 0, 0 }; //extent of the window is defined by size of input[0] + std::vector wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0]. armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1); - std::vector wOrigin2 = { 2, 0, 0 }; //extent of the window is defined by size of input[1] + std::vector wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1]. armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2); @@ -3513,21 +3565,21 @@ LayerTestResult AdditionUint8Test(armnn::IWorkloadFactory& workloadF outputTensorInfo.SetQuantizationScale(scale); outputTensorInfo.SetQuantizationOffset(offset); - // See dequantized values to the right + // See dequantized values to the right. auto input1 = MakeTensor(inputTensorInfo1, std::vector( { 63, 35, 77, 70, 56, 112, // 420, 224, 518, 469, 371, 763 203, 28, 252, 168, 245, 91 // 1400, 175, 1743, 1155, 1694, 616 })); - // See dequantized values to the right + // See dequantized values to the right. auto input2 = MakeTensor(inputTensorInfo1, std::vector( { 21, 7, 175, 231, 175, 210, // 126, 28, 1204, 1596, 1204, 1449 126, 161, 63, 21, 105, 126 // 861, 1106, 420, 126, 714, 861 })); - // See dequantized values to the right + // See dequantized values to the right. LayerTestResult result(outputTensorInfo); result.outputExpected = MakeTensor(outputTensorInfo, std::vector( { @@ -3633,19 +3685,19 @@ LayerTestResult MultiplicationUint8Test(armnn::IWorkloadFactory& wor unsigned int width = 3; const unsigned int shape[] = { batchSize, channels, height, width }; - // See dequantized values to the right + // See dequantized values to the right. std::vector input0({ 62, 37, 3, 172, 13, 111, // 244, 144, 8, 684, 48, 440, 188, 20, 73, 31, 23, 31 // 748, 76, 288, 120, 88, 120 }); - // See dequantized values to the right + // See dequantized values to the right. std::vector input1({ 126, 240, 252, 183, 121, 247, // 384, 726, 762, 555, 369, 747, 48, 115, 151, 79, 78, 97 // 150, 351, 459, 243, 240, 297 }); - // See dequantized values to the right + // See dequantized values to the right. std::vector output( { 64, 72, 0, 255, 8, 236, // 93696, 104544, 6096(clamped), 379620(clamped), 17712, 328680, @@ -3663,7 +3715,7 @@ LayerTestResult MultiplicationUint8Test(armnn::IWorkloadFactory& wor -2, shape, output, - 1366.255f, // Scale/offset chosen to have output values out of range + 1366.255f, // Scale/offset chosen to have output values out of range. -5); } @@ -3813,7 +3865,7 @@ LayerTestResult SimpleResizeBilinearUint8Test(armnn::IWorkloadFactor // The 'resize bilinear' operation projects the top-left corner of output texels into the input image, // then figures out the interpolants and weights. Note this is different to projecting the centre of the - // output texel - and thus we'll expect the output 1x1 matrix to contain as its single element the value + // output texel - and thus we'll expect the output 1x1 matrix to contain, as its single element, the value // that was at position (0,0) of the input matrix (rather than an average, which we would expect if projecting // the centre). LayerTestResult result(outputTensorInfo); @@ -4314,4 +4366,4 @@ LayerTestResult PermuteFloat32ValueSet2Test(armnn::IWorkloadFactory& w LayerTestResult PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory) { return PermuteFloat32ValueSet3TestCommon(workloadFactory); -}; +}; \ No newline at end of file diff --git a/src/armnn/backends/test/LayerTests.hpp b/src/armnn/backends/test/LayerTests.hpp index 2d543d61de..48f73e7693 100644 --- a/src/armnn/backends/test/LayerTests.hpp +++ b/src/armnn/backends/test/LayerTests.hpp @@ -6,12 +6,13 @@ #include "armnn/ArmNN.hpp" #include "armnn/Tensor.hpp" +#include "Half.hpp" #include #include #include -// Layer callables +// Layer callables. namespace armnn { @@ -213,20 +214,20 @@ LayerTestResult CompareBoundedReLuTest(armnn::IWorkloadFactory& worklo float upperBound, float lowerBound); -// Tests that the output should be identical to the input when the output dimensions match the input ones +// Tests that the output should be identical to the input when the output dimensions match the input ones. LayerTestResult ResizeBilinearNopTest(armnn::IWorkloadFactory& workloadFactory); -// Tests the behaviour of the resize bilinear operation when rescaling a 2x2 image into a 1x1 image +// Tests the behaviour of the resize bilinear operation when rescaling a 2x2 image into a 1x1 image. LayerTestResult SimpleResizeBilinearTest(armnn::IWorkloadFactory& workloadFactory); -// Tests resize bilinear for minification of a square input matrix (also: input dimensions are a -// multiple of output dimensions) +// Tests the resize bilinear for minification of a square input matrix (also: input dimensions are a +// multiple of output dimensions). LayerTestResult ResizeBilinearSqMinTest(armnn::IWorkloadFactory& workloadFactory); -// Tests resize bilinear for minification (output dimensions smaller than input dimensions) +// Tests the resize bilinear for minification (output dimensions smaller than input dimensions). LayerTestResult ResizeBilinearMinTest(armnn::IWorkloadFactory& workloadFactory); -// Tests resize bilinear for magnification (output dimensions bigger than input dimensions) +// Tests the resize bilinear for magnification (output dimensions bigger than input dimensions). LayerTestResult ResizeBilinearMagTest(armnn::IWorkloadFactory& workloadFactory); LayerTestResult BatchNormTest(armnn::IWorkloadFactory& workloadFactory); @@ -315,3 +316,13 @@ LayerTestResult SimplePermuteUint8Test(armnn::IWorkloadFactory& work LayerTestResult PermuteFloat32ValueSet1Test(armnn::IWorkloadFactory& workloadFactory); LayerTestResult PermuteFloat32ValueSet2Test(armnn::IWorkloadFactory& workloadFactory); LayerTestResult PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest + (armnn::IWorkloadFactory& workloadFactory); +LayerTestResult + LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult +LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult SimpleConvertFp16ToFp32Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult SimpleConvertFp32ToFp16Test(armnn::IWorkloadFactory& workloadFactory); diff --git a/src/armnn/backends/test/LstmTestImpl.hpp b/src/armnn/backends/test/LstmTestImpl.hpp new file mode 100644 index 0000000000..7f67b020e2 --- /dev/null +++ b/src/armnn/backends/test/LstmTestImpl.hpp @@ -0,0 +1,1150 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// +#pragma once + +#include +#include +#include + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include +#include "backends/WorkloadFactory.hpp" + +LayerTestResult LstmNoCifgNoPeepholeNoProjectionTestImpl(armnn::IWorkloadFactory& workloadFactory, + const boost::multi_array& input, + const boost::multi_array& outputExpected) +{ + unsigned int batchSize = boost::numeric_cast(input.shape()[0]); + unsigned int inputSize = boost::numeric_cast(input.shape()[1]); + unsigned int outputSize = boost::numeric_cast(outputExpected.shape()[1]); + // cellSize and outputSize have the same size when there is no projection. + unsigned numUnits = outputSize; + + + armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, armnn::GetDataType()); + armnn::TensorInfo cellStateInTensorInfo({batchSize , numUnits}, armnn::GetDataType()); + armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize}, armnn::GetDataType()); + + + armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 3}, armnn::GetDataType()); + armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, armnn::GetDataType()); + armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, armnn::GetDataType()); + + + LayerTestResult ret(outputTensorInfo); + + std::vector inputVector; + inputVector.assign(input.data(), input.data() + (batchSize * inputSize)); + auto inputTensor = MakeTensor(inputTensorInfo, inputVector); + + std::vector cellStateInVector(batchSize * numUnits, 0.f); + auto cellStateInTensor = MakeTensor(cellStateInTensorInfo, cellStateInVector); + + std::vector outputStateInVector(batchSize * outputSize, 0.f); + auto outputStateInTensor = MakeTensor(outputStateInTensorInfo, outputStateInVector); + + std::vector scratchBufferVector(batchSize * numUnits * 3, 0.f); + auto scratchBufferTensor = MakeTensor(scratchBufferTensorInfo, scratchBufferVector); + + std::vector outputStateOutVector(batchSize * outputSize, 0.f); + auto outputStateOutTensor = MakeTensor(outputStateOutTensorInfo, outputStateOutVector); + + std::vector cellStateOutVector(batchSize * numUnits, 0.f); + auto cellStateOutTensor = MakeTensor(cellStateOutTensorInfo, cellStateOutVector); + + std::vector outputVector; + outputVector.assign(outputExpected.data(), outputExpected.data() + (batchSize * outputSize)); + ret.outputExpected = MakeTensor(outputTensorInfo, outputVector); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr cellStateInHandle = + workloadFactory.CreateTensorHandle(cellStateInTensorInfo); + std::unique_ptr outputStateInHandle = + workloadFactory.CreateTensorHandle(outputStateInTensorInfo); + + std::unique_ptr scratchHandle = workloadFactory.CreateTensorHandle(scratchBufferTensorInfo); + std::unique_ptr outputStateOutHandle = + workloadFactory.CreateTensorHandle(outputStateOutTensorInfo); + std::unique_ptr cellStateOutHandle = + workloadFactory.CreateTensorHandle(cellStateOutTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + + armnn::LstmQueueDescriptor data; + armnn::WorkloadInfo info; + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get()); + AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get()); + + AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchHandle.get()); + AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get()); + AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + armnn::TensorInfo tensorInfo4({numUnits}, armnn::GetDataType()); + armnn::TensorInfo tensorInfo8({numUnits, 2}, armnn::GetDataType()); + armnn::TensorInfo tensorInfo16({numUnits, 4}, armnn::GetDataType()); + + auto inputToInputWeights = MakeTensor(tensorInfo8, {-0.45018822f, -0.02338299f, -0.0870589f, + -0.34550029f, 0.04266912f, -0.15680569f, + -0.34856534f, 0.43890524f}); + + auto inputToForgetWeights = MakeTensor(tensorInfo8, {0.09701663f, 0.20334584f, -0.50592935f, + -0.31343272f, -0.40032279f, 0.44781327f, + 0.01387155f, -0.35593212f}); + + auto inputToCellWeights = MakeTensor(tensorInfo8, {-0.50013041f, 0.1370284f, 0.11810488f, 0.2013163f, + -0.20583314f, 0.44344562f, 0.22077113f, + -0.29909778f}); + + auto inputToOutputWeights = MakeTensor(tensorInfo8, {-0.25065863f, -0.28290087f, 0.04613829f, + 0.40525138f, 0.44272184f, 0.03897077f, + -0.1556896f, 0.19487578f}); + + auto recurrentToInputWeights = MakeTensor(tensorInfo16, {-0.0063535f, -0.2042388f, 0.31454784f, + -0.35746509f, 0.28902304f, 0.08183324f, + -0.16555229f, 0.02286911f, -0.13566875f, + 0.03034258f, 0.48091322f, -0.12528998f, + 0.24077177f, -0.51332325f, -0.33502164f, + 0.10629296f}); + + auto recurrentToForgetWeights = MakeTensor(tensorInfo16, {-0.48684245f, -0.06655136f, 0.42224967f, + 0.2112639f, 0.27654213f, 0.20864892f, + -0.07646349f, 0.45877004f, 0.00141793f, + -0.14609534f, 0.36447752f, 0.09196436f, + 0.28053468f, 0.01560611f, -0.20127171f, + -0.01140004f}); + + auto recurrentToCellWeights = MakeTensor(tensorInfo16, {-0.3407414f, 0.24443203f, -0.2078532f, + 0.26320225f, 0.05695659f, -0.00123841f, + -0.4744786f, -0.35869038f, -0.06418842f, + -0.13502428f, -0.501764f, 0.22830659f, + -0.46367589f, 0.26016325f, -0.03894562f, + -0.16368064f}); + + auto recurrentToOutputWeights = MakeTensor(tensorInfo16, {0.43385774f, -0.17194885f, 0.2718237f, + 0.09215671f, 0.24107647f, -0.39835793f, + 0.18212086f, 0.01301402f, 0.48572797f, + -0.50656658f, 0.20047462f, -0.20607421f, + -0.51818722f, -0.15390486f, 0.0468148f, + 0.39922136f}); + + auto cellToInputWeights = MakeTensor(tensorInfo4, {0., 0., 0., 0.}); + + auto inputGateBias = MakeTensor(tensorInfo4, {0., 0., 0., 0.}); + + auto forgetGateBias = MakeTensor(tensorInfo4, {1., 1., 1., 1.}); + + auto cellBias = MakeTensor(tensorInfo4, {0., 0., 0., 0.}); + + auto outputGateBias = MakeTensor(tensorInfo4, {0., 0., 0., 0.}); + + armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(tensorInfo8); + armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfo8); + armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfo8); + armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfo8); + armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfo16); + armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(tensorInfo16); + armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfo16); + armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfo16); + armnn::ScopedCpuTensorHandle cellToInputWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle inputGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfo4); + + AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&cellToInputWeightsTensor, &cellToInputWeights[0]); + AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]); + AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]); + AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]); + AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]); + + data.m_InputToInputWeights = &inputToInputWeightsTensor; + data.m_InputToForgetWeights = &inputToForgetWeightsTensor; + data.m_InputToCellWeights = &inputToCellWeightsTensor; + data.m_InputToOutputWeights = &inputToOutputWeightsTensor; + data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor; + data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; + data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; + data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; + data.m_CellToInputWeights = &cellToInputWeightsTensor; + data.m_InputGateBias = &inputGateBiasTensor; + data.m_ForgetGateBias = &forgetGateBiasTensor; + data.m_CellBias = &cellBiasTensor; + data.m_OutputGateBias = &outputGateBiasTensor; + + + // Flags to set test configuration + data.m_Parameters.m_ActivationFunc = 4; + data.m_Parameters.m_CifgEnabled = false; + data.m_Parameters.m_PeepholeEnabled = false; + data.m_Parameters.m_ProjectionEnabled = false; + + + std::unique_ptr workload = workloadFactory.CreateLstm(data, info); + inputHandle->Allocate(); + outputStateInHandle->Allocate(); + cellStateInHandle->Allocate(); + + scratchHandle->Allocate(); + outputStateOutHandle->Allocate(); + cellStateOutHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]); + CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]); + CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + + return ret; +} + + +LayerTestResult +LstmLayerFloat32NoCifgWithPeepholeWithProjectionTestImpl(armnn::IWorkloadFactory& workloadFactory, + const boost::multi_array& input, + const boost::multi_array& outputExpected) { + + unsigned int batchSize = 2; + unsigned int outputSize = 16; + unsigned int inputSize = 5; + unsigned numUnits = 20; + + armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, armnn::GetDataType()); + armnn::TensorInfo cellStateInTensorInfo({batchSize , numUnits}, armnn::GetDataType()); + armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize}, armnn::GetDataType()); + + // Scratch buffer size without CIFG [batchSize, numUnits * 3] + armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 3}, armnn::GetDataType()); + armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, armnn::GetDataType()); + armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, armnn::GetDataType()); + + LayerTestResult ret(outputTensorInfo); + + std::vector inputVector; + inputVector.assign(input.data(), input.data() + (batchSize * inputSize)); + auto inputTensor = MakeTensor(inputTensorInfo, inputVector); + + std::vector cellStateInVector(batchSize * numUnits, 0.f); + auto cellStateInTensor = MakeTensor(cellStateInTensorInfo, cellStateInVector); + + std::vector outputStateInVector(batchSize * outputSize, 0.f); + auto outputStateInTensor = MakeTensor(outputStateInTensorInfo, outputStateInVector); + + std::vector scratchBufferVector(batchSize * numUnits * 3, 0.f); + auto scratchBufferTensor = MakeTensor(scratchBufferTensorInfo, scratchBufferVector); + + std::vector outputStateOutVector(batchSize * outputSize, 0.f); + auto outputStateOutTensor = MakeTensor(outputStateOutTensorInfo, outputStateOutVector); + + std::vector cellStateOutVector(batchSize * numUnits, 0.f); + auto cellStateOutTensor = MakeTensor(cellStateOutTensorInfo, cellStateOutVector); + + std::vector outputVector; + outputVector.assign(outputExpected.data(), outputExpected.data() + (batchSize * outputSize)); + ret.outputExpected = MakeTensor(outputTensorInfo, outputVector); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr cellStateInHandle = + workloadFactory.CreateTensorHandle(cellStateInTensorInfo); + std::unique_ptr outputStateInHandle = + workloadFactory.CreateTensorHandle(outputStateInTensorInfo); + + std::unique_ptr scratchHandle = workloadFactory.CreateTensorHandle(scratchBufferTensorInfo); + std::unique_ptr outputStateOutHandle = + workloadFactory.CreateTensorHandle(outputStateOutTensorInfo); + std::unique_ptr cellStateOutHandle = + workloadFactory.CreateTensorHandle(cellStateOutTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::LstmQueueDescriptor data; + armnn::WorkloadInfo info; + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get()); + AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get()); + + AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchHandle.get()); + AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get()); + AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + armnn::TensorInfo tensorInfo16({outputSize}, armnn::GetDataType()); + armnn::TensorInfo tensorInfo20({numUnits}, armnn::GetDataType()); + armnn::TensorInfo tensorInfo20x5({numUnits, inputSize}, armnn::GetDataType()); + armnn::TensorInfo tensorInfo20x16({numUnits, outputSize}, armnn::GetDataType()); + armnn::TensorInfo tensorInfo16x20({outputSize, numUnits}, armnn::GetDataType()); + + auto inputToInputWeights = + MakeTensor(tensorInfo20x5, {0.021393683f,0.06124551f, 0.046905167f,-0.014657677f,-0.03149463f, + 0.09171803f, 0.14647801f,0.10797193f, -0.0057968358f,0.0019193048f, + -0.2726754f, 0.10154029f, -0.018539885f, 0.080349885f, -0.10262385f, + -0.022599787f,-0.09121155f, -0.008675967f, -0.045206103f,-0.0821282f, + -0.008045952f,0.015478081f, 0.055217247f, 0.038719587f, 0.044153627f, + -0.06453243f,0.05031825f, -0.046935108f, -0.008164439f, 0.014574226f, + -0.1671009f, -0.15519552f, -0.16819797f,-0.13971269f,-0.11953059f, + 0.25005487f, -0.22790983f, 0.009855087f, -0.028140958f, -0.11200698f, + 0.11295408f, -0.0035217577f, 0.054485075f, 0.05184695f, 0.064711206f, + 0.10989193f, 0.11674786f, 0.03490607f, 0.07727357f, 0.11390585f, + -0.1863375f, -0.1034451f, -0.13945189f, -0.049401227f, -0.18767063f, + 0.042483903f, 0.14233552f, 0.13832581f, 0.18350165f, 0.14545603f, + -0.028545704f,0.024939531f,0.050929718f,0.0076203286f,-0.0029723682f, + -0.042484224f, -0.11827596f, -0.09171104f, -0.10808628f,-0.16327988f, + -0.2273378f, -0.0993647f, -0.017155107f,0.0023917493f,0.049272764f, + 0.0038534778f, 0.054764505f, 0.089753784f, 0.06947234f, 0.08014476f, + -0.04544234f, -0.0497073f,-0.07135631f, -0.048929106f,-0.004042012f, + -0.009284026f, 0.018042054f, 0.0036860977f,-0.07427302f, -0.11434604f, + -0.018995456f, 0.031487543f, 0.012834908f,0.019977754f,0.044256654f, + -0.39292613f, -0.18519334f, -0.11651281f,-0.06809892f, 0.011373677f + }); + + auto inputToForgetWeights = + MakeTensor(tensorInfo20x5, {-0.0018401089f, -0.004852237f,0.03698424f, 0.014181704f,0.028273236f, + -0.016726194f, -0.05249759f,-0.10204261f, 0.00861066f,-0.040979505f, + -0.009899187f,0.01923892f,-0.028177269f, -0.08535103f,-0.14585495f, + 0.10662567f,-0.01909731f,-0.017883534f,-0.0047269356f,-0.045103323f, + 0.0030784295f,0.076784775f,0.07463696f, 0.094531395f,0.0814421f, + -0.12257899f, -0.033945758f,-0.031303465f, 0.045630626f,0.06843887f, + -0.13492945f, -0.012480007f,-0.0811829f, -0.07224499f,-0.09628791f, + 0.045100946f,0.0012300825f, 0.013964662f, 0.099372394f,0.02543059f, + 0.06958324f, 0.034257296f, 0.0482646f, 0.06267997f,0.052625068f, + 0.12784666f, 0.07077897f, 0.025725935f, 0.04165009f,0.07241905f, + 0.018668644f, -0.037377294f,-0.06277783f,-0.08833636f,-0.040120605f, + -0.011405586f,-0.007808335f,-0.010301386f,-0.005102167f,0.027717464f, + 0.05483423f, 0.11449111f, 0.11289652f,0.10939839f, 0.13396506f, + -0.08402166f,-0.01901462f, -0.044678304f,-0.07720565f,0.014350063f, + -0.11757958f, -0.0652038f, -0.08185733f,-0.076754324f,-0.092614375f, + 0.10405491f, 0.052960336f, 0.035755895f,0.035839386f,-0.012540553f, + 0.036881298f, 0.02913376f, 0.03420159f,0.05448447f,-0.054523353f, + 0.02582715f, 0.02327355f, -0.011857179f,-0.0011980024f,-0.034641717f, + -0.026125094f,-0.17582615f,-0.15923657f,-0.27486774f,-0.0006143371f, + 0.0001771948f, -8.470171e-05f, 0.02651807f,0.045790765f,0.06956496f + }); + + auto inputToCellWeights = + MakeTensor(tensorInfo20x5, {-0.04580283f, -0.09549462f, -0.032418985f, -0.06454633f, + -0.043528453f, 0.043018587f, -0.049152344f, -0.12418144f, + -0.078985475f, -0.07596889f, 0.019484362f, -0.11434962f, + -0.0074034138f, -0.06314844f, -0.092981495f, 0.0062155537f, + -0.025034338f, -0.0028890965f, 0.048929527f, 0.06235075f, + 0.10665918f, -0.032036792f, -0.08505916f, -0.10843358f, + -0.13002433f, -0.036816437f, -0.02130134f, -0.016518239f, + 0.0047691227f, -0.0025825808f, 0.066017866f, 0.029991534f, + -0.10652836f, -0.1037554f, -0.13056071f, -0.03266643f, + -0.033702414f, -0.006473424f, -0.04611692f, 0.014419339f, + -0.025174323f, 0.0396852f, 0.081777506f, 0.06157468f, + 0.10210095f, -0.009658194f, 0.046511717f, 0.03603906f, + 0.0069369148f, 0.015960095f, -0.06507666f, 0.09551598f, + 0.053568836f, 0.06408714f, 0.12835667f, -0.008714329f, + -0.20211966f, -0.12093674f, 0.029450472f, 0.2849013f, + -0.029227901f, 0.1164364f, -0.08560263f, 0.09941786f, + -0.036999565f, -0.028842626f, -0.0033637602f, -0.017012902f, + -0.09720865f, -0.11193351f, -0.029155117f, -0.017936034f, + -0.009768936f, -0.04223324f, -0.036159635f, 0.06505112f, + -0.021742892f, -0.023377212f, -0.07221364f, -0.06430552f, + 0.05453865f, 0.091149814f, 0.06387331f, 0.007518393f, + 0.055960953f, 0.069779344f, 0.046411168f, 0.10509911f, + 0.07463894f, 0.0075130584f, 0.012850982f, 0.04555431f, + 0.056955688f, 0.06555285f, 0.050801456f, -0.009862683f, + 0.00826772f, -0.026555609f, -0.0073611983f, -0.0014897042f + }); + + auto inputToOutputWeights = + MakeTensor(tensorInfo20x5, {-0.0998932f, -0.07201956f, -0.052803773f,-0.15629593f,-0.15001918f, + -0.07650751f,0.02359855f, -0.075155355f, -0.08037709f, -0.15093534f, + 0.029517552f, -0.04751393f, 0.010350531f,-0.02664851f, -0.016839722f, + -0.023121163f, 0.0077019283f, 0.012851257f, -0.05040649f,-0.0129761f, + -0.021737747f,-0.038305793f,-0.06870586f, -0.01481247f,-0.001285394f, + 0.10124236f, 0.083122835f, 0.053313006f,-0.062235646f,-0.075637154f, + -0.027833903f, 0.029774971f, 0.1130802f, 0.09218906f, 0.09506135f, + -0.086665764f,-0.037162706f,-0.038880914f,-0.035832845f,-0.014481564f, + -0.09825003f,-0.12048569f,-0.097665586f,-0.05287633f, -0.0964047f, + -0.11366429f, 0.035777505f, 0.13568819f, 0.052451383f,0.050649304f, + 0.05798951f, -0.021852335f,-0.099848844f,0.014740475f,-0.078897946f, + 0.04974699f, 0.014160473f, 0.06973932f, 0.04964942f, 0.033364646f, + 0.08190124f, 0.025535367f, 0.050893165f, 0.048514254f,0.06945813f, + -0.078907564f,-0.06707616f, -0.11844508f, -0.09986688f,-0.07509403f, + 0.06263226f, 0.14925587f, 0.20188436f, 0.12098451f,0.14639415f, + 0.0015017595f, -0.014267382f, -0.03417257f,0.012711468f,0.0028300495f, + -0.024758482f, -0.05098548f,-0.0821182f, 0.014225672f, 0.021544158f, + 0.08949725f, 0.07505268f, -0.0020780868f, 0.04908258f,0.06476295f, + -0.022907063f,0.027562456f,0.040185735f, 0.019567577f,-0.015598739f, + -0.049097303f, -0.017121866f, -0.083368234f,-0.02332002f,-0.0840956f + }); + + auto inputGateBias = + MakeTensor(tensorInfo20, {0.02234832f, 0.14757581f, 0.18176508f, 0.10380666f, 0.053110216f, + -0.06928846f, -0.13942584f, -0.11816189f, 0.19483899f, 0.03652339f, + -0.10250295f, 0.036714908f, -0.18426876f, 0.036065217f, 0.21810818f, + 0.02383196f, -0.043370757f, 0.08690144f, -0.04444982f, 0.00030581196f + }); + + auto forgetGateBias = + MakeTensor(tensorInfo20, {0.035185695f, -0.042891346f, -0.03032477f, 0.23027696f, + 0.11098921f, 0.15378423f, 0.09263801f, 0.09790885f, + 0.09508917f, 0.061199076f, 0.07665568f, -0.015443159f, + -0.03499149f, 0.046190713f, 0.08895977f, 0.10899629f, + 0.40694186f, 0.06030037f, 0.012413437f, -0.06108739f + }); + + auto cellBias = + MakeTensor(tensorInfo20, {-0.024379363f, 0.0055531194f, 0.23377132f, 0.033463873f, + -0.1483596f, -0.10639995f, -0.091433935f, 0.058573797f, + -0.06809782f, -0.07889636f, -0.043246906f, -0.09829136f, + -0.4279842f, 0.034901652f, 0.18797937f, 0.0075234566f, + 0.016178843f, 0.1749513f, 0.13975595f, 0.92058027f + }); + + auto outputGateBias = + MakeTensor(tensorInfo20, {0.046159424f, -0.0012809046f, 0.03563469f, 0.12648113f, 0.027195795f, + 0.35373217f, -0.018957434f, 0.008907322f, -0.0762701f, 0.12018895f, + 0.04216877f, 0.0022856654f, 0.040952638f, 0.3147856f, 0.08225149f, + -0.057416286f, -0.14995944f, -0.008040261f, 0.13208859f, 0.029760877f + }); + + auto recurrentToInputWeights = + MakeTensor(tensorInfo20x16, {-0.001374326f, -0.078856036f, 0.10672688f, 0.029162422f, + -0.11585556f, 0.02557986f, -0.13446963f, -0.035785314f, + -0.01244275f, 0.025961924f, -0.02337298f, -0.044228926f, + -0.055839065f, -0.046598054f, -0.010546039f, -0.06900766f, + 0.027239809f, 0.022582639f, -0.013296484f, -0.05459212f, + 0.08981f, -0.045407712f, 0.08682226f, -0.06867011f, + -0.14390695f, -0.02916037f, 0.000996957f, 0.091420636f, + 0.14283475f, -0.07390571f, -0.06402044f, 0.062524505f, + -0.093129106f, 0.04860203f, -0.08364217f, -0.08119002f, + 0.009352075f, 0.22920375f, 0.0016303885f, 0.11583097f, + -0.13732095f, 0.012405723f, -0.07551853f, 0.06343048f, + 0.12162708f, -0.031923793f, -0.014335606f, 0.01790974f, + -0.10650317f, -0.0724401f, 0.08554849f, -0.05727212f, + 0.06556731f, -0.042729504f, -0.043227166f, 0.011683251f, + -0.013082158f, -0.029302018f, -0.010899579f, -0.062036745f, + -0.022509435f, -0.00964907f, -0.01567329f, 0.04260106f, + -0.07787477f, -0.11576462f, 0.017356863f, 0.048673786f, + -0.017577527f, -0.05527947f, -0.082487635f, -0.040137455f, + -0.10820036f, -0.04666372f, 0.022746278f, -0.07851417f, + 0.01068115f, 0.032956902f, 0.022433773f, 0.0026891115f, + 0.08944216f, -0.0685835f, 0.010513544f, 0.07228705f, + 0.02032331f, -0.059686817f, -0.0005566496f, -0.086984694f, + 0.040414046f, -0.1380399f, 0.094208956f, -0.05722982f, + 0.012092817f, -0.04989123f, -0.086576f, -0.003399834f, + -0.04696032f, -0.045747425f, 0.10091314f, 0.048676282f, + -0.029037097f, 0.031399418f, -0.0040285117f, 0.047237843f, + 0.09504992f, 0.041799378f, -0.049185462f, -0.031518843f, + -0.10516937f, 0.026374253f, 0.10058866f, -0.0033195973f, + -0.041975245f, 0.0073591834f, 0.0033782164f, -0.004325073f, + -0.10167381f, 0.042500053f, -0.01447153f, 0.06464186f, + -0.017142897f, 0.03312627f, 0.009205989f, 0.024138335f, + -0.011337001f, 0.035530265f, -0.010912711f, 0.0706555f, + -0.005894094f, 0.051841937f, -0.1401738f, -0.02351249f, + 0.0365468f, 0.07590991f, 0.08838724f, 0.021681072f, + -0.10086113f, 0.019608743f, -0.06195883f, 0.077335775f, + 0.023646897f, -0.095322326f, 0.02233014f, 0.09756986f, + -0.048691444f, -0.009579111f, 0.07595467f, 0.11480546f, + -0.09801813f, 0.019894179f, 0.08502348f, 0.004032281f, + 0.037211012f, 0.068537936f, -0.048005626f, -0.091520436f, + -0.028379958f, -0.01556313f, 0.06554592f, -0.045599163f, + -0.01672207f, -0.020169014f, -0.011877351f, -0.20212261f, + 0.010889619f, 0.0047078193f, 0.038385306f, 0.08540671f, + -0.017140968f, -0.0035865551f, 0.016678626f, 0.005633034f, + 0.015963363f, 0.00871737f, 0.060130805f, 0.028611384f, + 0.10109069f, -0.015060172f, -0.07894427f, 0.06401885f, + 0.011584063f, -0.024466386f, 0.0047652307f, -0.09041358f, + 0.030737216f, -0.0046374933f, 0.14215417f, -0.11823516f, + 0.019899689f, 0.006106124f, -0.027092824f, 0.0786356f, + 0.05052217f, -0.058925f, -0.011402121f, -0.024987547f, + -0.0013661642f, -0.06832946f, -0.015667673f, -0.1083353f, + -0.00096863037f, -0.06988685f, -0.053350925f, -0.027275559f, + -0.033664223f, -0.07978348f, -0.025200296f, -0.017207067f, + -0.058403496f, -0.055697463f, 0.005798788f, 0.12965427f, + -0.062582195f, 0.0013350133f, -0.10482091f, 0.0379771f, + 0.072521195f, -0.0029455067f, -0.13797039f, -0.03628521f, + 0.013806405f, -0.017858358f, -0.01008298f, -0.07700066f, + -0.017081132f, 0.019358726f, 0.0027079724f, 0.004635139f, + 0.062634714f, -0.02338735f, -0.039547626f, -0.02050681f, + 0.03385117f, -0.083611414f, 0.002862572f, -0.09421313f, + 0.058618143f, -0.08598433f, 0.00972939f, 0.023867095f, + -0.053934585f, -0.023203006f, 0.07452513f, -0.048767887f, + -0.07314807f, -0.056307215f, -0.10433547f, -0.06440842f, + 0.04328182f, 0.04389765f, -0.020006588f, -0.09076438f, + -0.11652589f, -0.021705797f, 0.03345259f, -0.010329105f, + -0.025767034f, 0.013057034f, -0.07316461f, -0.10145612f, + 0.06358255f, 0.18531723f, 0.07759293f, 0.12006465f, + 0.1305557f, 0.058638252f, -0.03393652f, 0.09622831f, + -0.16253184f, -2.4580743e-06f, 0.079869635f, -0.070196845f, + -0.005644518f, 0.06857898f, -0.12598175f, -0.035084512f, + 0.03156317f, -0.12794146f, -0.031963028f, 0.04692781f, + 0.030070418f, 0.0071660685f, -0.095516115f, -0.004643372f, + 0.040170413f, -0.062104587f, -0.0037324072f, 0.0554317f, + 0.08184801f, -0.019164372f, 0.06791302f, 0.034257166f, + -0.10307039f, 0.021943003f, 0.046745934f, 0.0790918f, + -0.0265588f, -0.007824208f, 0.042546265f, -0.00977924f, + -0.0002440307f, -0.017384544f, -0.017990116f, 0.12252321f, + -0.014512694f, -0.08251313f, 0.08861942f, 0.13589665f, + 0.026351685f, 0.012641483f, 0.07466548f, 0.044301085f, + -0.045414884f, -0.051112458f, 0.03444247f, -0.08502782f, + -0.04106223f, -0.028126027f, 0.028473156f, 0.10467447f + }); + + auto recurrentToForgetWeights = + MakeTensor(tensorInfo20x16, {-0.057784554f, -0.026057621f, -0.068447545f, -0.022581743f, + 0.14811787f, 0.10826372f, 0.09471067f, 0.03987225f, + -0.0039523416f, 0.00030638507f, 0.053185795f, 0.10572994f, + 0.08414449f, -0.022036452f, -0.00066928595f, -0.09203576f, + 0.032950465f, -0.10985798f, -0.023809856f, 0.0021431844f, + -0.02196096f, -0.00326074f, 0.00058621005f, -0.074678116f, + -0.06193199f, 0.055729095f, 0.03736828f, 0.020123724f, + 0.061878487f, -0.04729229f, 0.034919553f, -0.07585433f, + -0.04421272f, -0.044019096f, 0.085488975f, 0.04058006f, + -0.06890133f, -0.030951202f, -0.024628663f, -0.07672815f, + 0.034293607f, 0.08556707f, -0.05293577f, -0.033561368f, + -0.04899627f, 0.0241671f, 0.015736353f, -0.095442444f, + -0.029564252f, 0.016493602f, -0.035026584f, 0.022337519f, + -0.026871363f, 0.004780428f, 0.0077918363f, -0.03601621f, + 0.016435321f, -0.03263031f, -0.09543275f, -0.047392778f, + 0.013454138f, 0.028934088f, 0.01685226f, -0.086110644f, + -0.046250615f, -0.01847454f, 0.047608484f, 0.07339695f, + 0.034546845f, -0.04881143f, 0.009128804f, -0.08802852f, + 0.03761666f, 0.008096139f, -0.014454086f, 0.014361001f, + -0.023502491f, -0.0011840804f, -0.07607001f, 0.001856849f, + -0.06509276f, -0.006021153f, -0.08570962f, -0.1451793f, + 0.060212336f, 0.055259194f, 0.06974018f, 0.049454916f, + -0.027794661f, -0.08077226f, -0.016179763f, 0.1169753f, + 0.17213494f, -0.0056326236f, -0.053934924f, -0.0124349f, + -0.11520337f, 0.05409887f, 0.088759385f, 0.0019655675f, + 0.0042065294f, 0.03881498f, 0.019844765f, 0.041858196f, + -0.05695512f, 0.047233116f, 0.038937137f, -0.06542224f, + 0.014429736f, -0.09719407f, 0.13908425f, -0.05379757f, + 0.012321099f, 0.082840554f, -0.029899208f, 0.044217527f, + 0.059855383f, 0.07711018f, -0.045319796f, 0.0948846f, + -0.011724666f, -0.0033288454f, -0.033542685f, -0.04764985f, + -0.13873616f, 0.040668588f, 0.034832682f, -0.015319203f, + -0.018715994f, 0.046002675f, 0.0599172f, -0.043107376f, + 0.0294216f, -0.002314414f, -0.022424703f, 0.0030315618f, + 0.0014641669f, 0.0029166266f, -0.11878115f, 0.013738511f, + 0.12375372f, -0.0006038222f, 0.029104086f, 0.087442465f, + 0.052958444f, 0.07558703f, 0.04817258f, 0.044462286f, + -0.015213451f, -0.08783778f, -0.0561384f, -0.003008196f, + 0.047060397f, -0.002058388f, 0.03429439f, -0.018839769f, + 0.024734668f, 0.024614193f, -0.042046934f, 0.09597743f, + -0.0043254104f, 0.04320769f, 0.0064070094f, -0.0019131786f, + -0.02558259f, -0.022822596f, -0.023273505f, -0.02464396f, + -0.10991725f, -0.006240552f, 0.0074488563f, 0.024044557f, + 0.04383914f, -0.046476185f, 0.028658995f, 0.060410924f, + 0.050786525f, 0.009452605f, -0.0073054377f, -0.024810238f, + 0.0052906186f, 0.0066939713f, -0.0020913032f, 0.014515517f, + 0.015898481f, 0.021362653f, -0.030262267f, 0.016587038f, + -0.011442813f, 0.041154444f, -0.007631438f, -0.03423484f, + -0.010977775f, 0.036152758f, 0.0066366293f, 0.11915515f, + 0.02318443f, -0.041350313f, 0.021485701f, -0.10906167f, + -0.028218046f, -0.00954771f, 0.020531068f, -0.11995105f, + -0.03672871f, 0.024019798f, 0.014255957f, -0.05221243f, + -0.00661567f, -0.04630967f, 0.033188973f, 0.10107534f, + -0.014027541f, 0.030796422f, -0.10270911f, -0.035999842f, + 0.15443139f, 0.07684145f, 0.036571592f, -0.035900835f, + -0.0034699554f, 0.06209149f, 0.015920248f, -0.031122351f, + -0.03858649f, 0.01849943f, 0.13872518f, 0.01503974f, + 0.069941424f, -0.06948533f, -0.0088794185f, 0.061282158f, + -0.047401894f, 0.03100163f, -0.041533746f, -0.10430945f, + 0.044574402f, -0.01425562f, -0.024290353f, 0.034563623f, + 0.05866852f, 0.023947537f, -0.09445152f, 0.035450947f, + 0.02247216f, -0.0042998926f, 0.061146557f, -0.10250651f, + 0.020881841f, -0.06747029f, 0.10062043f, -0.0023941975f, + 0.03532124f, -0.016341697f, 0.09685456f, -0.016764693f, + 0.051808182f, 0.05875331f, -0.04536488f, 0.001626336f, + -0.028892258f, -0.01048663f, -0.009793449f, -0.017093895f, + 0.010987891f, 0.02357273f, -0.00010856845f, 0.0099760275f, + -0.001845119f, -0.03551521f, 0.0018358806f, 0.05763657f, + -0.01769146f, 0.040995963f, 0.02235177f, -0.060430344f, + 0.11475477f, -0.023854522f, 0.10071741f, 0.0686208f, + -0.014250481f, 0.034261297f, 0.047418304f, 0.08562733f, + -0.030519066f, 0.0060542435f, 0.014653856f, -0.038836084f, + 0.04096551f, 0.032249358f, -0.08355519f, -0.026823482f, + 0.056386515f, -0.010401743f, -0.028396193f, 0.08507674f, + 0.014410365f, 0.020995233f, 0.17040324f, 0.11511526f, + 0.02459721f, 0.0066619175f, 0.025853224f, -0.023133837f, + -0.081302024f, 0.017264642f, -0.009585969f, 0.09491168f, + -0.051313367f, 0.054532815f, -0.014298593f, 0.10657464f, + 0.007076659f, 0.10964551f, 0.0409152f, 0.008275321f, + -0.07283536f, 0.07937492f, 0.04192024f, -0.1075027f + }); + + auto recurrentToCellWeights = + MakeTensor(tensorInfo20x16, {-0.037322544f, 0.018592842f, 0.0056175636f, -0.06253426f, + 0.055647098f, -0.05713207f, -0.05626563f, 0.005559383f, + 0.03375411f, -0.025757805f, -0.088049285f, 0.06017052f, + -0.06570978f, 0.007384076f, 0.035123326f, -0.07920549f, + 0.053676967f, 0.044480428f, -0.07663568f, 0.0071805613f, + 0.08089997f, 0.05143358f, 0.038261272f, 0.03339287f, + -0.027673481f, 0.044746667f, 0.028349208f, 0.020090483f, + -0.019443132f, -0.030755889f, -0.0040000007f, 0.04465846f, + -0.021585021f, 0.0031670958f, 0.0053199246f, -0.056117613f, + -0.10893326f, 0.076739706f, -0.08509834f, -0.027997585f, + 0.037871376f, 0.01449768f, -0.09002357f, -0.06111149f, + -0.046195522f, 0.0422062f, -0.005683705f, -0.1253618f, + -0.012925729f, -0.04890792f, 0.06985068f, 0.037654128f, + 0.03398274f, -0.004781977f, 0.007032333f, -0.031787455f, + 0.010868644f, -0.031489216f, 0.09525667f, 0.013939797f, + 0.0058680447f, 0.0167067f, 0.02668468f, -0.04797466f, + -0.048885044f, -0.12722108f, 0.035304096f, 0.06554885f, + 0.00972396f, -0.039238118f, -0.05159735f, -0.11329045f, + 0.1613692f, -0.03750952f, 0.06529313f, -0.071974665f, + -0.11769596f, 0.015524369f, -0.0013754242f, -0.12446318f, + 0.02786344f, -0.014179351f, 0.005264273f, 0.14376344f, + 0.015983658f, 0.03406988f, -0.06939408f, 0.040699873f, + 0.02111075f, 0.09669095f, 0.041345075f, -0.08316494f, + -0.07684199f, -0.045768797f, 0.032298047f, -0.041805092f, + 0.0119405f, 0.0061010392f, 0.12652606f, 0.0064572375f, + -0.024950314f, 0.11574242f, 0.04508852f, -0.04335324f, + 0.06760663f, -0.027437469f, 0.07216407f, 0.06977076f, + -0.05438599f, 0.034033038f, -0.028602652f, 0.05346137f, + 0.043184172f, -0.037189785f, 0.10420091f, 0.00882477f, + -0.054019816f, -0.074273005f, -0.030617684f, -0.0028467078f, + 0.024302477f, -0.0038869337f, 0.005332455f, 0.0013399826f, + 0.04361412f, -0.007001822f, 0.09631092f, -0.06702025f, + -0.042049985f, -0.035070654f, -0.04103342f, -0.10273396f, + 0.0544271f, 0.037184782f, -0.13150354f, -0.0058036847f, + -0.008264958f, 0.042035464f, 0.05891794f, 0.029673764f, + 0.0063542654f, 0.044788733f, 0.054816857f, 0.062257513f, + -0.00093483756f, 0.048938446f, -0.004952862f, -0.007730018f, + -0.04043371f, -0.017094059f, 0.07229206f, -0.023670016f, + -0.052195564f, -0.025616996f, -0.01520939f, 0.045104615f, + -0.007376126f, 0.003533447f, 0.006570588f, 0.056037236f, + 0.12436656f, 0.051817212f, 0.028532185f, -0.08686856f, + 0.11868599f, 0.07663395f, -0.07323171f, 0.03463402f, + -0.050708205f, -0.04458982f, -0.11590894f, 0.021273347f, + 0.1251325f, -0.15313013f, -0.12224372f, 0.17228661f, + 0.023029093f, 0.086124025f, 0.006445803f, -0.03496501f, + 0.028332196f, 0.04449512f, -0.042436164f, -0.026587414f, + -0.006041347f, -0.09292539f, -0.05678812f, 0.03897832f, + 0.09465633f, 0.008115513f, -0.02171956f, 0.08304309f, + 0.071401566f, 0.019622514f, 0.032163795f, -0.004167056f, + 0.02295182f, 0.030739572f, 0.056506045f, 0.004612461f, + 0.06524936f, 0.059999723f, 0.046395954f, -0.0045512207f, + -0.1335546f, -0.030136576f, 0.11584653f, -0.014678886f, + 0.0020118146f, -0.09688814f, -0.0790206f, 0.039770417f, + -0.0329582f, 0.07922767f, 0.029322514f, 0.026405897f, + 0.04207835f, -0.07073373f, 0.063781224f, 0.0859677f, + -0.10925287f, -0.07011058f, 0.048005477f, 0.03438226f, + -0.09606514f, -0.006669445f, -0.043381985f, 0.04240257f, + -0.06955775f, -0.06769346f, 0.043903265f, -0.026784198f, + -0.017840602f, 0.024307009f, -0.040079936f, -0.019946516f, + 0.045318738f, -0.12233574f, 0.026170589f, 0.0074471775f, + 0.15978073f, 0.10185836f, 0.10298046f, -0.015476589f, + -0.039390966f, -0.072174534f, 0.0739445f, -0.1211869f, + -0.0347889f, -0.07943156f, 0.014809798f, -0.12412325f, + -0.0030663363f, 0.039695457f, 0.0647603f, -0.08291318f, + -0.018529687f, -0.004423833f, 0.0037507233f, 0.084633216f, + -0.01514876f, -0.056505352f, -0.012800942f, -0.06994386f, + 0.012962922f, -0.031234352f, 0.07029052f, 0.016418684f, + 0.03618972f, 0.055686004f, -0.08663945f, -0.017404709f, + -0.054761406f, 0.029065743f, 0.052404847f, 0.020238016f, + 0.0048197987f, -0.0214882f, 0.07078733f, 0.013016777f, + 0.06262858f, 0.009184685f, 0.020785125f, -0.043904778f, + -0.0270329f, -0.03299152f, -0.060088247f, -0.015162964f, + -0.001828936f, 0.12642565f, -0.056757294f, 0.013586685f, + 0.09232601f, -0.035886683f, 0.06000002f, 0.05229691f, + -0.052580316f, -0.082029596f, -0.010794592f, 0.012947712f, + -0.036429964f, -0.085508935f, -0.13127148f, -0.017744139f, + 0.031502828f, 0.036232427f, -0.031581745f, 0.023051167f, + -0.05325106f, -0.03421577f, 0.028793324f, -0.034633752f, + -0.009881397f, -0.043551125f, -0.018609839f, 0.0019097115f, + -0.008799762f, 0.056595087f, 0.0022273948f, 0.055752404f + }); + + auto recurrentToOutputWeights = + MakeTensor(tensorInfo20x16, {0.025825322f, -0.05813119f, 0.09495884f,-0.045984812f, -0.01255415f, + -0.0026479573f,-0.08196161f,-0.054914974f,-0.0046604523f, + -0.029587349f, -0.044576716f, -0.07480124f, -0.082868785f, + 0.023254942f, 0.027502948f, -0.0039728214f, -0.08683098f, + -0.08116779f, -0.014675607f, -0.037924774f, -0.023314456f, + -0.007401714f, -0.09255757f, 0.029460307f, -0.08829125f, + -0.005139627f, -0.08989442f, -0.0555066f, 0.13596267f, + -0.025062224f, -0.048351806f, -0.03850004f, 0.07266485f, + -0.022414139f, 0.05940088f, 0.075114764f, 0.09597592f, + -0.010211725f, -0.0049794707f, -0.011523867f, -0.025980417f, + 0.072999895f, 0.11091378f, -0.081685916f, 0.014416728f, + 0.043229222f, 0.034178585f, -0.07530371f, 0.035837382f, + -0.085607f, -0.007721233f, -0.03287832f, -0.043848954f, + -0.06404588f, -0.06632928f, -0.073643476f, 0.008214239f, + -0.045984086f, 0.039764922f, 0.03474462f, 0.060612556f, + -0.080590084f, 0.049127717f, 0.04151091f, -0.030063879f, + 0.008801774f, -0.023021035f, -0.019558564f, 0.05158114f, + -0.010947698f, -0.011825728f, 0.0075720972f, 0.0699727f, + -0.0039981045f, 0.069350146f, 0.08799282f, 0.016156472f, + 0.035502106f, 0.11695009f, 0.006217345f, 0.13392477f, + -0.037875112f, 0.025745004f, 0.08940699f, -0.00924166f, + 0.0046702605f, -0.036598757f, -0.08811812f, 0.10522024f, + -0.032441203f, 0.008176899f, -0.04454919f, 0.07058152f, + 0.0067963637f, 0.039206743f, 0.03259838f, 0.03725492f, + -0.09515802f, 0.013326398f, -0.052055415f, -0.025676316f, + 0.03198509f, -0.015951829f, -0.058556724f, 0.036879618f, + 0.043357447f, 0.028362012f, -0.05908629f, 0.0059240665f, + -0.04995891f, -0.019187413f,0.0276265f, -0.01628143f, 0.0025863599f, + 0.08800015f, 0.035250366f, -0.022165963f, -0.07328642f, + -0.009415526f, -0.07455109f, 0.11690406f, 0.0363299f, + 0.07411125f, 0.042103454f, -0.009660886f, 0.019076364f, + 0.018299393f, -0.046004917f, 0.08891175f,0.0431396f, -0.026327137f, + -0.051502608f, 0.08979574f, -0.051670972f, 0.04940282f, + -0.07491107f, -0.021240504f, 0.022596184f, -0.034280192f, + 0.060163025f, -0.058211457f, -0.051837247f, -0.01349775f, + -0.04639988f, -0.035936575f, -0.011681591f, 0.064818054f, + 0.0073146066f, -0.021745546f, -0.043124277f, -0.06471268f, + -0.07053354f, -0.029321948f, -0.05330136f, 0.016933719f, + -0.053782392f, 0.13747959f, -0.1361751f, -0.11569455f, + 0.0033329215f, 0.05693899f, -0.053219706f, 0.063698f, + 0.07977434f, -0.07924483f, 0.06936997f, 0.0034815092f, + -0.007305279f, -0.037325785f, -0.07251102f, -0.033633437f, + -0.08677009f, 0.091591336f, -0.14165086f, 0.021752775f, + 0.019683983f, 0.0011612234f, -0.058154266f, 0.049996935f, + 0.0288841f, -0.0024567875f, -0.14345716f, 0.010955264f,-0.10234828f, + 0.1183656f, -0.0010731248f, -0.023590032f,-0.072285876f,-0.0724771f, + -0.026382286f, -0.0014920527f, 0.042667855f, 0.0018776858f, + 0.02986552f, 0.009814309f, 0.0733756f, 0.12289186f, + 0.018043943f, -0.0458958f, 0.049412545f, 0.033632483f, + 0.05495232f, 0.036686596f, -0.013781798f, -0.010036754f, + 0.02576849f, -0.08307328f, 0.010112348f, 0.042521734f, + -0.05869831f, -0.071689695f, 0.03876447f, -0.13275425f, -0.0352966f, + -0.023077697f, 0.10285965f, 0.084736146f, 0.15568255f, + -0.00040734606f, 0.027835453f, -0.10292561f, -0.032401145f, + 0.10053256f, -0.026142767f, -0.08271222f, -0.0030240538f, + -0.016368777f, 0.1070414f, 0.042672627f, 0.013456989f, + -0.0437609f, -0.022309763f, 0.11576483f, 0.04108048f, + 0.061026827f, -0.0190714f, -0.0869359f, 0.037901703f, 0.0610107f, + 0.07202949f, 0.01675338f, 0.086139716f, -0.08795751f, + -0.014898893f, -0.023771819f, -0.01965048f, 0.007955471f, + -0.043740474f, 0.03346837f, -0.10549954f, 0.090567775f, + 0.042013682f, -0.03176985f, 0.12569028f, -0.02421228f, + -0.029526481f, 0.023851605f, 0.031539805f, 0.05292009f, + -0.02344001f, -0.07811758f, -0.08834428f, 0.10094801f, + 0.16594367f, -0.06861939f, -0.021256343f, -0.041093912f, + -0.06669611f, 0.035498552f, 0.021757556f, -0.09302526f, + -0.015403468f, -0.06614931f, -0.051798206f, -0.013874718f, + 0.03630673f, 0.010412845f, -0.08077351f, 0.046185967f, + 0.0035662893f, 0.03541868f, -0.094149634f, -0.034814864f, + 0.003128424f, -0.020674974f, -0.03944324f, -0.008110165f, + -0.11113267f, 0.08484226f, 0.043586485f, 0.040582247f, + 0.0968012f, -0.065249965f, -0.028036479f, 0.0050708856f, + 0.0017462453f, 0.0326779f, 0.041296225f, 0.09164146f, + -0.047743853f, -0.015952192f, -0.034451712f, 0.084197424f, + -0.05347844f, -0.11768019f, 0.085926116f, -0.08251791f, + -0.045081906f, 0.0948852f, 0.068401024f, 0.024856757f, + 0.06978981f, -0.057309967f, -0.012775832f, -0.0032452994f, + 0.01977615f, -0.041040014f, -0.024264973f,0.063464895f, 0.05431621f + }); + + auto cellToInputWeights = + MakeTensor(tensorInfo20, {0.040369894f, 0.030746894f, 0.24704495f, 0.018586371f, -0.037586458f, + -0.15312155f, -0.11812848f, -0.11465643f, 0.20259799f, 0.11418174f, + -0.10116027f, -0.011334949f, 0.12411352f, -0.076769054f,-0.052169047f, + 0.21198851f, -0.38871562f, -0.09061183f, -0.09683246f, -0.21929175f + }); + + + auto cellToForgetWeights = + MakeTensor(tensorInfo20, {-0.01998659f,-0.15568835f,-0.24248174f, -0.012770197f, 0.041331276f, + -0.072311886f, -0.052123554f,-0.0066330447f,-0.043891653f,0.036225766f, + -0.047248036f, 0.021479502f,0.033189066f, 0.11952997f, -0.020432774f, + 0.64658105f, -0.06650122f, -0.03467612f, 0.095340036f, 0.23647355f + }); + + auto cellToOutputWeights = + MakeTensor(tensorInfo20, {0.08286371f, -0.08261836f, -0.51210177f, 0.002913762f, 0.17764764f, + -0.5495371f, -0.08460716f, -0.24552552f, 0.030037103f, 0.04123544f, + -0.11940523f, 0.007358328f, 0.1890978f, 0.4833202f, -0.34441817f, + 0.36312827f, -0.26375428f, 0.1457655f, -0.19724406f, 0.15548733f + }); + + auto projectionWeights = + MakeTensor(tensorInfo16x20, + {-0.009802181f, 0.09401916f, 0.0717386f, -0.13895074f, 0.09641832f, + 0.060420845f, 0.08539281f, 0.054285463f, 0.061395317f, 0.034448683f, + -0.042991187f, 0.019801661f, -0.16840284f, -0.015726732f, -0.23041931f, + -0.024478018f, -0.10959692f, -0.013875541f, 0.18600968f, -0.061274476f, + 0.0138165f, -0.08160894f, -0.07661644f, 0.032372914f, 0.16169067f, + 0.22465782f, -0.03993472f, -0.004017731f, 0.08633481f, -0.28869787f, + 0.08682067f, 0.17240396f, 0.014975425f, 0.056431185f, 0.031037588f, + 0.16702051f, 0.0077946745f, 0.15140012f, 0.29405436f, 0.120285f, + -0.188994f, -0.027265169f, 0.043389652f, -0.022061434f, 0.014777949f, + -0.20203483f, 0.094781205f, 0.19100232f, 0.13987629f, -0.036132768f, + -0.06426278f, -0.05108664f, 0.13221376f, 0.009441198f, -0.16715929f, + 0.15859416f, -0.040437475f, 0.050779544f, -0.022187516f, 0.012166504f, + 0.027685808f, -0.07675938f, -0.0055694645f, -0.09444123f, 0.0046453946f, + 0.050794356f, 0.10770313f, -0.20790008f, -0.07149004f, -0.11425117f, + 0.008225835f, -0.035802525f, 0.14374903f, 0.15262283f, 0.048710253f, + 0.1847461f, -0.007487823f, 0.11000021f, -0.09542012f, 0.22619456f, + -0.029149994f, 0.08527916f, 0.009043713f, 0.0042746216f, 0.016261552f, + 0.022461696f, 0.12689082f, -0.043589946f, -0.12035478f, -0.08361797f, + -0.050666027f, -0.1248618f, -0.1275799f, -0.071875185f, 0.07377272f, + 0.09944291f, -0.18897448f, -0.1593054f, -0.06526116f, -0.040107165f, + -0.004618631f, -0.067624845f, -0.007576253f, 0.10727444f, 0.041546922f, + -0.20424393f, 0.06907816f, 0.050412357f, 0.00724631f, 0.039827548f, + 0.12449835f, 0.10747581f, 0.13708383f, 0.09134148f, -0.12617786f, + -0.06428341f, 0.09956831f, 0.1208086f, -0.14676677f, -0.0727722f, + 0.1126304f, 0.010139365f, 0.015571211f, -0.038128063f, 0.022913318f, + -0.042050496f, 0.16842307f, -0.060597885f, 0.10531834f, -0.06411776f, + -0.07451711f, -0.03410368f, -0.13393489f, 0.06534304f, 0.003620307f, + 0.04490757f, 0.05970546f, 0.05197996f, 0.02839995f, 0.10434969f, + -0.013699693f, -0.028353551f, -0.07260381f, 0.047201227f, -0.024575593f, + -0.036445823f, 0.07155557f, 0.009672501f, -0.02328883f, 0.009533515f, + -0.03606021f, -0.07421458f, -0.028082801f, -0.2678904f, -0.13221288f, + 0.18419984f, -0.13012612f, -0.014588381f, -0.035059117f, -0.04824723f, + 0.07830115f, -0.056184657f, 0.03277091f, 0.025466874f, 0.14494097f, + -0.12522776f, -0.098633975f, -0.10766018f, -0.08317623f, 0.08594209f, + 0.07749552f, 0.039474737f, 0.1776665f, -0.07409566f, -0.0477268f, + 0.29323658f, 0.10801441f, 0.1154011f, 0.013952499f, 0.10739139f, + 0.10708251f, -0.051456142f, 0.0074137426f, -0.10430189f, 0.10034707f, + 0.045594677f, 0.0635285f, -0.0715442f, -0.089667566f, -0.10811871f, + 0.00026344223f, 0.08298446f, -0.009525053f, 0.006585689f, -0.24567553f, + -0.09450807f, 0.09648481f, 0.026996298f, -0.06419476f, -0.04752702f, + -0.11063944f, -0.23441927f, -0.17608605f, -0.052156363f, 0.067035615f, + 0.19271925f, -0.0032889997f, -0.043264326f, 0.09663576f, -0.057112187f, + -0.10100678f, 0.0628376f, 0.04447668f, 0.017961001f, -0.10094388f, + -0.10190601f, 0.18335468f, 0.10494553f, -0.052095775f, -0.0026118709f, + 0.10539724f, -0.04383912f, -0.042349473f, 0.08438151f, -0.1947263f, + 0.02251204f, 0.11216432f, -0.10307853f, 0.17351969f, -0.039091777f, + 0.08066188f, -0.00561982f, 0.12633002f, 0.11335965f, -0.0088127935f, + -0.019777594f, 0.06864014f, -0.059751723f, 0.016233567f, -0.06894641f, + -0.28651384f, -0.004228674f, 0.019708522f, -0.16305895f, -0.07468996f, + -0.0855457f, 0.099339016f, -0.07580735f, -0.13775392f, 0.08434318f, + 0.08330512f, -0.12131499f, 0.031935584f, 0.09180414f, -0.08876437f, + -0.08049874f, 0.008753825f, 0.03498998f, 0.030215185f, 0.03907079f, + 0.089751154f, 0.029194152f, -0.03337423f, -0.019092513f, 0.04331237f, + 0.04299654f, -0.036394123f, -0.12915532f, 0.09793732f, 0.07512415f, + -0.11319543f, -0.032502122f, 0.15661901f, 0.07671967f, -0.005491124f, + -0.19379048f, -0.218606f, 0.21448623f, 0.017840758f, 0.1416943f, + -0.07051762f, 0.19488361f, 0.02664691f, -0.18104725f, -0.09334311f, + 0.15026465f, -0.15493552f, -0.057762887f, -0.11604192f, -0.262013f, + -0.01391798f, 0.012185008f, 0.11156489f, -0.07483202f, 0.06693364f, + -0.26151478f, 0.046425626f, 0.036540434f, -0.16435726f, 0.17338543f, + -0.21401681f, -0.11385144f, -0.08283257f, -0.069031075f, 0.030635102f, + 0.010969227f, 0.11109743f, 0.010919218f, 0.027526086f, 0.13519906f, + 0.01891392f, -0.046839405f, -0.040167913f, 0.017953383f, -0.09700955f, + 0.0061885654f, -0.07000971f, 0.026893595f, -0.038844477f, 0.14543656f + }); + + std::vector projectionBiasVector(outputSize, 0.f); + auto projectionBias = MakeTensor(tensorInfo16, projectionBiasVector); + + armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(tensorInfo20x5); + armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfo20x5); + armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfo20x5); + armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfo20x5); + armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfo20x16); + armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(tensorInfo20x16); + armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfo20x16); + armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfo20x16); + armnn::ScopedCpuTensorHandle cellToInputWeightsTensor(tensorInfo20); + armnn::ScopedCpuTensorHandle inputGateBiasTensor(tensorInfo20); + armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfo20); + armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfo20); + armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfo20); + armnn::ScopedCpuTensorHandle cellToForgetWeightsTensor(tensorInfo20); + armnn::ScopedCpuTensorHandle cellToOutputWeightsTensor(tensorInfo20); + armnn::ScopedCpuTensorHandle projectionWeightsTensor(tensorInfo16x20); + armnn::ScopedCpuTensorHandle projectionBiasTensor(tensorInfo16); + + AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&cellToInputWeightsTensor, &cellToInputWeights[0]); + AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]); + AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]); + AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]); + AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]); + AllocateAndCopyDataToITensorHandle(&cellToForgetWeightsTensor, &cellToForgetWeights[0]); + AllocateAndCopyDataToITensorHandle(&cellToOutputWeightsTensor, &cellToOutputWeights[0]); + AllocateAndCopyDataToITensorHandle(&projectionWeightsTensor, &projectionWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&projectionBiasTensor, &projectionBias[0]); + + data.m_InputToInputWeights = &inputToInputWeightsTensor; + data.m_InputToForgetWeights = &inputToForgetWeightsTensor; + data.m_InputToCellWeights = &inputToCellWeightsTensor; + data.m_InputToOutputWeights = &inputToOutputWeightsTensor; + data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor; + data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; + data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; + data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; + data.m_CellToInputWeights = &cellToInputWeightsTensor; + data.m_InputGateBias = &inputGateBiasTensor; + data.m_ForgetGateBias = &forgetGateBiasTensor; + data.m_CellBias = &cellBiasTensor; + data.m_OutputGateBias = &outputGateBiasTensor; + data.m_CellToForgetWeights = &cellToForgetWeightsTensor; + data.m_CellToOutputWeights = &cellToOutputWeightsTensor; + data.m_ProjectionWeights = &projectionWeightsTensor; + data.m_ProjectionBias = &projectionBiasTensor; + + // Flags to set test configuration + data.m_Parameters.m_ActivationFunc = 4; + data.m_Parameters.m_CifgEnabled = false; + data.m_Parameters.m_PeepholeEnabled = true; + data.m_Parameters.m_ProjectionEnabled = true; + + + std::unique_ptr workload = workloadFactory.CreateLstm(data, info); + inputHandle->Allocate(); + outputStateInHandle->Allocate(); + cellStateInHandle->Allocate(); + + scratchHandle->Allocate(); + outputStateOutHandle->Allocate(); + cellStateOutHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]); + CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]); + CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + + return ret; + +} + + +LayerTestResult LstmLayerWithCifgWithPeepholeNoProjectionTestImpl(armnn::IWorkloadFactory& workloadFactory, + const boost::multi_array& input, + const boost::multi_array& outputExpected) +{ + bool cifgEnabled = true; + bool peepholeEnabled = true; + bool projectionEnabled = false; + // These are not the input and the output of Lstm yet + unsigned int batchSize = boost::numeric_cast(input.shape()[0]); + unsigned int inputSize = boost::numeric_cast(input.shape()[1]); + + unsigned int outputSize = boost::numeric_cast(outputExpected.shape()[1]); + + const unsigned int cellSize = outputSize; + + // Decide the shape of all input tensors + armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, armnn::GetDataType()); + armnn::TensorInfo outputStateInTensorInfo({batchSize, outputSize}, armnn::GetDataType()); + armnn::TensorInfo cellStateInTensorInfo({batchSize, cellSize}, armnn::GetDataType()); + + unsigned int scratchBufferSize = cifgEnabled ? cellSize * 4 : cellSize * 3; + armnn::TensorInfo scratchBufferTensorInfo({batchSize, scratchBufferSize}, armnn::GetDataType()); + armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, armnn::GetDataType()); + armnn::TensorInfo cellStateOutTensorInfo({batchSize, cellSize}, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, armnn::GetDataType()); + + // List of inputs + std::vector inputData; + inputData.assign(input.data(), input.data() + batchSize*inputSize); + auto inputTensor = MakeTensor(inputTensorInfo, inputData); + + std::vector outputStateInVector(batchSize * outputSize, 0.f); + auto outputStateInTensor = MakeTensor(outputStateInTensorInfo, outputStateInVector); + + std::vector cellStateInVector(batchSize * cellSize, 0.f); + auto cellStateInTensor = MakeTensor(cellStateInTensorInfo, cellStateInVector); + + + // Prepare all the weights in the descriptor for LSTM + armnn::LstmQueueDescriptor data; + armnn::TensorInfo tensorInfoInput({cellSize, inputSize}, armnn::GetDataType()); + armnn::TensorInfo tensorInfoOutput({cellSize, outputSize}, armnn::GetDataType()); + armnn::TensorInfo tensorInfoNumUnits({cellSize}, armnn::GetDataType()); + + auto inputToCellWeights = MakeTensor(tensorInfoInput, + {-0.49770179f, -0.27711356f, -0.09624726f, 0.05100781f, + 0.04717243f, 0.48944736f, -0.38535351f, + -0.17212132f}); + auto inputToForgetWeights = MakeTensor(tensorInfoInput, + {-0.55291498f, -0.42866567f, 0.13056988f, + -0.3633365f, -0.22755712f, 0.28253698f, 0.24407166f, + 0.33826375f}); + auto inputToOutputWeights = MakeTensor(tensorInfoInput, + {0.10725588f, -0.02335852f, -0.55932593f, + -0.09426838f, -0.44257352f, 0.54939759f, + 0.01533556f, 0.42751634f}); + auto cellBias = MakeTensor(tensorInfoNumUnits, {0.f, 0.f, 0.f, 0.f}); + auto forgetGateBias = MakeTensor(tensorInfoNumUnits, {1.f, 1.f, 1.f, 1.f}); + auto outputGateBias = MakeTensor(tensorInfoNumUnits, {0.f, 0.f, 0.f, 0.f}); + + auto recurrentToCellWeights = MakeTensor(tensorInfoOutput, + {0.54066205f, -0.32668582f, -0.43562764f, -0.56094903f, 0.42957711f, + 0.01841056f, -0.32764608f, -0.33027974f, -0.10826075f, 0.20675004f, + 0.19069612f, -0.03026325f, -0.54532051f, 0.33003211f, 0.44901288f, + 0.21193194f}); + auto recurrentToForgetWeights = MakeTensor(tensorInfoOutput, + {-0.13832897f, -0.0515101f, -0.2359007f, -0.16661474f, -0.14340827f, + 0.36986142f, 0.23414481f, 0.55899f, 0.10798943f, -0.41174671f, 0.17751795f, + -0.34484994f, -0.35874045f, -0.11352962f, 0.27268326f, 0.54058349f}); + + auto recurrentToOutputWeights = MakeTensor(tensorInfoOutput, + {0.41613156f, 0.42610586f, -0.16495961f, -0.5663873f, 0.30579174f, -0.05115908f, + -0.33941799f, 0.23364776f, 0.11178309f, 0.09481031f, -0.26424935f, 0.46261835f, + 0.50248802f, 0.26114327f, -0.43736315f, 0.33149987f}); + + auto cellToForgetWeights = MakeTensor(tensorInfoNumUnits, + {0.47485286f, -0.51955009f, -0.24458408f, 0.31544167f}); + auto cellToOutputWeights = MakeTensor(tensorInfoNumUnits, + {-0.17135078f, 0.82760304f, 0.85573703f, -0.77109635f}); + + armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfoInput); + armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfoInput); + armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfoInput); + + armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfoNumUnits); + armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfoNumUnits); + armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfoNumUnits); + + armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfoOutput); + armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfoOutput); + armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfoOutput); + + + armnn::ScopedCpuTensorHandle cellToForgetWeightsTensor(tensorInfoNumUnits); + armnn::ScopedCpuTensorHandle cellToOutputWeightsTensor(tensorInfoNumUnits); + + AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]); + + AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]); + AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]); + AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]); + + AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]); + + AllocateAndCopyDataToITensorHandle(&cellToForgetWeightsTensor, &cellToForgetWeights[0]); + AllocateAndCopyDataToITensorHandle(&cellToOutputWeightsTensor, &cellToOutputWeights[0]); + + + data.m_InputToCellWeights = &inputToCellWeightsTensor; + data.m_InputToForgetWeights = &inputToForgetWeightsTensor; + data.m_InputToOutputWeights = &inputToOutputWeightsTensor; + + data.m_CellBias = &cellBiasTensor; + data.m_ForgetGateBias = &forgetGateBiasTensor; + data.m_OutputGateBias = &outputGateBiasTensor; + + data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; + data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; + data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; + + data.m_CellToForgetWeights = &cellToForgetWeightsTensor; + data.m_CellToOutputWeights = &cellToOutputWeightsTensor; + + // other parameters for the descriptor + data.m_Parameters.m_CifgEnabled = cifgEnabled; + data.m_Parameters.m_ProjectionEnabled = projectionEnabled; + data.m_Parameters.m_PeepholeEnabled = peepholeEnabled; + + data.m_Parameters.m_ActivationFunc = 4; + data.m_Parameters.m_ClippingThresProj = 0.0; + data.m_Parameters.m_ClippingThresCell = 0.0; + + + // List of outputs + std::vector scratchBufferVector(batchSize * scratchBufferSize, 0.f); + auto scratchBufferTensor = MakeTensor(scratchBufferTensorInfo, scratchBufferVector); + LayerTestResult ret0(scratchBufferTensorInfo); + + // Output state for a certain time step + std::vector outputStateOutVector(batchSize * outputSize, 0.f); + auto outputStateOutTensor = MakeTensor(outputStateOutTensorInfo, outputStateOutVector); + LayerTestResult ret1(outputStateOutTensorInfo); + + // Cell state for a certain time step + std::vector cellStateOutVector(batchSize * cellSize, 0.f); + auto cellStateOutTensor = MakeTensor(cellStateOutTensorInfo, cellStateOutVector); + LayerTestResult ret2(cellStateOutTensorInfo); + + // Output for a certain time step + std::vector outputVector(batchSize * outputSize, 0.f); + auto outputTensor = MakeTensor(outputTensorInfo, outputVector); + std::vector outputData; + outputData.assign(outputExpected.data(), outputExpected.data() + batchSize*outputSize); + LayerTestResult ret3(outputTensorInfo); + ret3.outputExpected = MakeTensor(outputTensorInfo, outputData); + + // Prepare the inputs and outputs for the workload + std::unique_ptr inputHandle = + workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputStateInHandle = + workloadFactory.CreateTensorHandle(outputStateInTensorInfo); + std::unique_ptr cellStateInHandle = + workloadFactory.CreateTensorHandle(cellStateInTensorInfo); + + std::unique_ptr scratchBufferHandle = + workloadFactory.CreateTensorHandle(scratchBufferTensorInfo); + std::unique_ptr outputStateOutHandle = + workloadFactory.CreateTensorHandle(outputStateOutTensorInfo); + std::unique_ptr cellStateOutHandle = + workloadFactory.CreateTensorHandle(cellStateOutTensorInfo); + std::unique_ptr outputHandle = + workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get()); + AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get()); + + AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchBufferHandle.get()); + AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get()); + AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateLstm(data, info); + + + inputHandle->Allocate(); + outputStateInHandle->Allocate(); + cellStateInHandle->Allocate(); + + scratchBufferHandle->Allocate(); + outputStateOutHandle->Allocate(); + cellStateOutHandle->Allocate(); + outputHandle->Allocate(); + + + CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]); + CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]); + CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]); + + CopyDataToITensorHandle(scratchBufferHandle.get(), &scratchBufferTensor[0][0]); + CopyDataToITensorHandle(outputStateOutHandle.get(), &outputStateOutTensor[0][0]); + CopyDataToITensorHandle(cellStateOutHandle.get(), &cellStateOutTensor[0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret0.output[0][0], scratchBufferHandle.get()); + CopyDataFromITensorHandle(&ret1.output[0][0], outputStateOutHandle.get()); + CopyDataFromITensorHandle(&ret2.output[0][0], cellStateOutHandle.get()); + CopyDataFromITensorHandle(&ret3.output[0][0], outputHandle.get()); + + return ret3; +} diff --git a/src/armnn/backends/test/MemCopyTests.cpp b/src/armnn/backends/test/MemCopyTests.cpp index 32331789e9..24a951c395 100644 --- a/src/armnn/backends/test/MemCopyTests.cpp +++ b/src/armnn/backends/test/MemCopyTests.cpp @@ -19,6 +19,10 @@ #include "TensorCopyUtils.hpp" #include "WorkloadTestUtils.hpp" +#if ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED +#include "../ArmComputeTensorUtils.hpp" +#endif + BOOST_AUTO_TEST_SUITE(MemCopyTestSuite) void MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory, armnn::IWorkloadFactory& dstWorkloadFactory, @@ -81,6 +85,26 @@ void MemCopyTest(bool withSubtensors) MemCopyTest(srcWorkloadFactory, dstWorkloadFactory, withSubtensors); } +#if ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED + +BOOST_AUTO_TEST_CASE(AclTypeConversions) +{ + arm_compute::Strides strides(1,2,3,4); + armnn::TensorShape convertedStrides = armnn::armcomputetensorutils::GetStrides(strides); + BOOST_TEST(convertedStrides[0] == 4); + BOOST_TEST(convertedStrides[1] == 3); + BOOST_TEST(convertedStrides[2] == 2); + BOOST_TEST(convertedStrides[3] == 1); + + arm_compute::TensorShape shape(5,6,7,8); + armnn::TensorShape convertedshape = armnn::armcomputetensorutils::GetShape(shape); + BOOST_TEST(convertedshape[0] == 8); + BOOST_TEST(convertedshape[1] == 7); + BOOST_TEST(convertedshape[2] == 6); + BOOST_TEST(convertedshape[3] == 5); +} +#endif + #if ARMCOMPUTECL_ENABLED BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpu) diff --git a/src/armnn/backends/test/NormTestImpl.hpp b/src/armnn/backends/test/NormTestImpl.hpp index d9dc01592a..df8219ddbd 100644 --- a/src/armnn/backends/test/NormTestImpl.hpp +++ b/src/armnn/backends/test/NormTestImpl.hpp @@ -87,7 +87,7 @@ LayerTestResult SimpleNormalizationTestImpl(armnn::IWorkloadFactory& wo // When normalising within channels, the 3x3 kernel covers the entire 2x2 input at every index. // Therefore, all output values should equal the inputs, but divided by: // pow((kappa + (accumulatedScale * alpha)), beta) - // ...where accumulatedScale is the sum of every element squared + // ...where accumulatedScale is the sum of every element squared. float divisor[inputNum]; for(int i = 0; i < boost::numeric_cast(inputNum); i++) { @@ -139,7 +139,7 @@ LayerTestResult SimpleNormalizationTestImpl(armnn::IWorkloadFactory& wo } break; } - case armnn::NormalizationAlgorithmMethod::LocalContrast: // NOTE: intentional fallthrough + case armnn::NormalizationAlgorithmMethod::LocalContrast: // NOTE: intentional fallthrough. default: { throw armnn::UnimplementedException("Unsupported normalisation method type, " diff --git a/src/armnn/backends/test/Pooling2dTestImpl.hpp b/src/armnn/backends/test/Pooling2dTestImpl.hpp index ab9fd6d6fb..e6e0e6721a 100644 --- a/src/armnn/backends/test/Pooling2dTestImpl.hpp +++ b/src/armnn/backends/test/Pooling2dTestImpl.hpp @@ -155,21 +155,21 @@ LayerTestResult SimpleMaxPooling2dSize3x3Stride2x4TestCommon(armnn::IWorkl 3.0f, 5.0f, 4.0f, 0.0f, 1.0f, 5.0f, 9.0f, 7.0f, }); - // Construct input data + // Constructs input data. std::vector inputData; auto negator = [](float f) { return -f; }; - // First image (two channels where the second channel is the negative of the first one) + // First image (two channels where the second channel is the negative of the first one). inputData.insert(inputData.end(), singleChannelData.begin(), singleChannelData.end()); std::transform(singleChannelData.begin(), singleChannelData.end(), std::back_inserter(inputData), negator); - // Second image (same as first image) + // Second image (same as first image). inputData.insert(inputData.end(), singleChannelData.begin(), singleChannelData.end()); std::transform(singleChannelData.begin(), singleChannelData.end(), std::back_inserter(inputData), negator); auto input = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, inputData)); - // these were calculated manually + // These were calculated manually. auto shape(GetTensorShapeAsArray<4>(outputTensorInfo)); boost::multi_array outputExpected(shape); if (forceNoPadding) @@ -527,13 +527,13 @@ LayerTestResult AsymmetricNonSquarePooling2dTestCommon(armnn::IWorkloadFac descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; - // Construct input data + // Construct input data. auto input = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, { 1.0f, 3.0f, 4.0f, })); - // these were calculated manually + // These were calculated manually. auto outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, { 0.0f, 3.0f, 0.0f, 3.0f, @@ -686,7 +686,7 @@ LayerTestResult SimpleMaxPooling2dSize2x2Stride2x2TestCommon(armnn::IWorkl 438.0f, 564.0f, 573.0f, 402.0f }; - // Note that left and right edges will be 0.f, due to the 2x2 max pooling only accessing zeros here + // Note that left and right edges will be 0.f, due to the 2x2 max pooling only accessing zeros here. std::vector expectedOutputDataWithPadding = { 0.0f, 510.0f, 780.0f, 654.0f, 0.0f, 0.0f, 438.0f, 618.0f, 402.0f, 0.0f diff --git a/src/armnn/backends/test/QuantizeHelper.hpp b/src/armnn/backends/test/QuantizeHelper.hpp index bfaf9342f0..0a6ceb761d 100644 --- a/src/armnn/backends/test/QuantizeHelper.hpp +++ b/src/armnn/backends/test/QuantizeHelper.hpp @@ -61,7 +61,7 @@ struct IsFloatingPointIterator }; template ::value, int>::type=0 // Make sure valid fp iterator +typename std::enable_if::value, int>::type=0 // Makes sure fp iterator is valid. > std::vector QuantizedVector(float qScale, int32_t qOffset, FloatIt first, FloatIt last) { diff --git a/src/armnn/backends/test/Reference.cpp b/src/armnn/backends/test/Reference.cpp index b60483a4d9..dedeb50e33 100644 --- a/src/armnn/backends/test/Reference.cpp +++ b/src/armnn/backends/test/Reference.cpp @@ -127,25 +127,8 @@ ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) // Splitter -BOOST_AUTO_TEST_CASE(SimpleSplitter) -{ - armnn::RefWorkloadFactory workloadFactory; - auto testResult = SplitterTest(workloadFactory); - for (unsigned int i = 0; i < testResult.size(); ++i) - { - BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); - } -} - -BOOST_AUTO_TEST_CASE(SplitterUint8) -{ - armnn::RefWorkloadFactory workloadFactory; - auto testResult = SplitterUint8Test(workloadFactory); - for (unsigned int i = 0; i < testResult.size(); ++i) - { - BOOST_TEST(CompareTensors(testResult[i].output, testResult[i].outputExpected)); - } -} +ARMNN_AUTO_TEST_CASE(SimpleSplitter, SplitterTest) +ARMNN_AUTO_TEST_CASE(SimpleSplitterUint8, SplitterUint8Test) ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest) ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) @@ -242,4 +225,9 @@ ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test) ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test) ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test) +// Convert from Float16 to Float32 +ARMNN_AUTO_TEST_CASE(SimpleConvertFp16ToFp32, SimpleConvertFp16ToFp32Test) +// Convert from Float32 to Float16 +ARMNN_AUTO_TEST_CASE(SimpleConvertFp32ToFp16, SimpleConvertFp32ToFp16Test) + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/armnn/backends/test/SoftmaxTestImpl.hpp b/src/armnn/backends/test/SoftmaxTestImpl.hpp index 4c3e0b73dd..9ed7f603a1 100644 --- a/src/armnn/backends/test/SoftmaxTestImpl.hpp +++ b/src/armnn/backends/test/SoftmaxTestImpl.hpp @@ -39,7 +39,7 @@ LayerTestResult SimpleSoftmaxTestImpl(armnn::IWorkloadFactory& workloadFac LayerTestResult ret(outputTensorInfo); - // Each row is independently softmax'd + // Each row is independently softmax'd. auto input = MakeTensor(inputTensorInfo, std::vector( QuantizedVector(qScale, 0, { 0.f, 1.f, 0.f, 0.f, diff --git a/src/armnn/backends/test/SplitterTestImpl.hpp b/src/armnn/backends/test/SplitterTestImpl.hpp index 70b798eafa..48c0730fa7 100644 --- a/src/armnn/backends/test/SplitterTestImpl.hpp +++ b/src/armnn/backends/test/SplitterTestImpl.hpp @@ -27,35 +27,35 @@ std::vector> SplitterTestCommon(armnn::IWorkloadFactory& wo // NOTE: Compute Library imposes a restriction that the x and y dimension (input height and width) // cannot be split. - // For the reasons for this see first comment on https://jira.arm.com/browse/IVGCVSW-1239 + // For the reasons for this, see first comment on https://jira.arm.com/browse/IVGCVSW-1239 // - // this test has therefore been recast to split the channels, then split the resulting subtensor + // This test has therefore been recast to split the channels, then split the resulting subtensor. - // to take channel 0 of original output - // and channel 0 and channel 1 of the split subtensor + // To take channel 0 of original output + // and channel 0 and channel 1 of the split subtensor. unsigned int outputWidth1 = inputWidth; unsigned int outputHeight1 = inputHeight; unsigned int outputChannels1 = 1; - // to take channel 1 and 2 of the original output + // To take channel 1 and 2 of the original output. unsigned int outputWidth2 = inputWidth; unsigned int outputHeight2 = inputHeight; unsigned int outputChannels2 = 2; - // Define the tensor descriptors + // Define the tensor descriptors. armnn::TensorInfo inputTensorInfo({ inputChannels, inputHeight, inputWidth }, armnn::GetDataType()); - // outputs of the original split + // Outputs of the original split. armnn::TensorInfo outputTensorInfo1({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType()); armnn::TensorInfo outputTensorInfo2({ outputChannels2, outputHeight2, outputWidth2 }, armnn::GetDataType()); - // outputs of the subsequent subtensor split + // Outputs of the subsequent subtensor split. armnn::TensorInfo outputTensorInfo3({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType()); armnn::TensorInfo outputTensorInfo4({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType()); // Set quantization parameters if the requested type is a quantized type. - // The quantization doesn't really matter as the splitter operator doesn't dequantize/quantize + // The quantization doesn't really matter as the splitter operator doesn't dequantize/quantize. if(armnn::IsQuantizedType()) { inputTensorInfo.SetQuantizationScale(qScale); @@ -100,7 +100,7 @@ std::vector> SplitterTestCommon(armnn::IWorkloadFactory& wo }) )); - // channel 0 of the original input + // Channel 0 of the original input. ret1.outputExpected = MakeTensor(outputTensorInfo1, std::vector( QuantizedVector(qScale, qOffset, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, @@ -112,7 +112,7 @@ std::vector> SplitterTestCommon(armnn::IWorkloadFactory& wo }) )); - // channel 1 & 2 of the original input + // Channel 1 & 2 of the original input. ret2.outputExpected = MakeTensor(outputTensorInfo2, std::vector( QuantizedVector(qScale, qOffset, { 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, @@ -131,7 +131,7 @@ std::vector> SplitterTestCommon(armnn::IWorkloadFactory& wo }) )); - // channel 0 of return 2 (i.e. channels 1 and 2 of the original input) + // Channel 0 of return 2 (i.e. channels 1 and 2 of the original input). ret3.outputExpected = MakeTensor(outputTensorInfo3, std::vector( QuantizedVector(qScale, qOffset, { 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, @@ -143,7 +143,7 @@ std::vector> SplitterTestCommon(armnn::IWorkloadFactory& wo }) )); - // channel 1 of return 2 + // Channel 1 of return 2. ret4.outputExpected = MakeTensor(outputTensorInfo4, std::vector( QuantizedVector(qScale, qOffset, { 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, @@ -155,19 +155,19 @@ std::vector> SplitterTestCommon(armnn::IWorkloadFactory& wo }) )); - // NOTE: as a corollary of the no splitting of x and y restriction the x and y values of the view origins + // NOTE: as a corollary of the splitting of x and y restriction the x and y values of the view origins // have to be zero, the co-ordinates are as per the tensor info above channels, height/y, width/x - // note that under the hood the compute engine reverses these i.e. its coordinate system is x, y, channels - std::vector wOrigin1 = {0, 0, 0}; //extent of the window is defined by size of output[0] + // note that under the hood the compute engine reverses these i.e. its coordinate system is x, y, channels. + std::vector wOrigin1 = {0, 0, 0}; //Extent of the window is defined by size of output[0]. armnn::SplitterQueueDescriptor::ViewOrigin window1(wOrigin1); - std::vector wOrigin2 = {1, 0, 0}; //extent of the window is defined by size of output[1] + std::vector wOrigin2 = {1, 0, 0}; //Extent of the window is defined by size of output[1]. armnn::SplitterQueueDescriptor::ViewOrigin window2(wOrigin2); - std::vector wOrigin3 = {0, 0, 0}; //extent of the window is defined by size of output[2] + std::vector wOrigin3 = {0, 0, 0}; //Extent of the window is defined by size of output[2]. armnn::SplitterQueueDescriptor::ViewOrigin window3(wOrigin3); - std::vector wOrigin4 = {1, 0, 0}; //extent of the window is defined by size of output[3] + std::vector wOrigin4 = {1, 0, 0}; //Extent of the window is defined by size of output[3]. armnn::SplitterQueueDescriptor::ViewOrigin window4(wOrigin4); bool subTensorsSupported = workloadFactory.SupportsSubTensors(); @@ -217,7 +217,7 @@ std::vector> SplitterTestCommon(armnn::IWorkloadFactory& wo CopyDataFromITensorHandle(&ret1.output[0][0][0], outputHandle1.get()); CopyDataFromITensorHandle(&ret2.output[0][0][0], outputHandle2.get()); -// // Do the second split +// // Do the second split. armnn::SplitterQueueDescriptor data2; armnn::WorkloadInfo info2; AddInputToWorkload(data2, info2, outputTensorInfo2, outputHandle2.get()); diff --git a/src/armnn/backends/test/TensorCopyUtils.cpp b/src/armnn/backends/test/TensorCopyUtils.cpp index e15c12a76f..82e80a52fe 100644 --- a/src/armnn/backends/test/TensorCopyUtils.cpp +++ b/src/armnn/backends/test/TensorCopyUtils.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include "TensorCopyUtils.hpp" @@ -47,12 +48,15 @@ void CopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem case arm_compute::DataType::QASYMM8: CopyArmComputeITensorData(static_cast(mem), handle->GetTensor()); break; + case arm_compute::DataType::F16: + CopyArmComputeITensorData(static_cast(mem), handle->GetTensor()); + break; default: { throw armnn::UnimplementedException(); } } - handle->UnMap(); + handle->Unmap(); break; } #endif @@ -108,12 +112,15 @@ void CopyDataFromITensorHandle(void* mem, const armnn::ITensorHandle* tensorHand case arm_compute::DataType::QASYMM8: CopyArmComputeITensorData(handle->GetTensor(), static_cast(mem)); break; + case arm_compute::DataType::F16: + CopyArmComputeITensorData(handle->GetTensor(), static_cast(mem)); + break; default: { throw armnn::UnimplementedException(); } } - const_cast(handle)->UnMap(); + const_cast(handle)->Unmap(); break; } #endif diff --git a/src/armnn/backends/test/WorkloadDataValidation.cpp b/src/armnn/backends/test/WorkloadDataValidation.cpp index c3a9d40116..bc3898b405 100644 --- a/src/armnn/backends/test/WorkloadDataValidation.cpp +++ b/src/armnn/backends/test/WorkloadDataValidation.cpp @@ -22,7 +22,7 @@ BOOST_AUTO_TEST_CASE(QueueDescriptor_Validate_WrongNumOfInputsOutputs) { InputQueueDescriptor invalidData; WorkloadInfo invalidInfo; - //invalid argument exception is expected, because no inputs and no outputs were defined + //Invalid argument exception is expected, because no inputs and no outputs were defined. BOOST_CHECK_THROW(RefWorkloadFactory().CreateInput(invalidData, invalidInfo), armnn::InvalidArgumentException); } @@ -31,7 +31,7 @@ BOOST_AUTO_TEST_CASE(RefPooling2dFloat32Workload_Validate_WrongDimTensor) armnn::TensorInfo inputTensorInfo; armnn::TensorInfo outputTensorInfo; - unsigned int inputShape[] = {2, 3, 4}; // <- invalid - input tensor has to be 4D + unsigned int inputShape[] = {2, 3, 4}; // <- Invalid - input tensor has to be 4D. unsigned int outputShape[] = {2, 3, 4, 5}; outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); @@ -43,7 +43,7 @@ BOOST_AUTO_TEST_CASE(RefPooling2dFloat32Workload_Validate_WrongDimTensor) AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); - // invalid argument exception is expected, input tensor has to be 4D + // Invalid argument exception is expected, input tensor has to be 4D. BOOST_CHECK_THROW(RefPooling2dFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); } @@ -55,7 +55,7 @@ BOOST_AUTO_TEST_CASE(SoftmaxQueueDescriptor_Validate_WrongInputHeight) unsigned int inputNum = 2; unsigned int outputChannels = inputChannels; - unsigned int outputHeight = inputHeight + 1; //makes data invalid - Softmax expects height and width to be 1 + unsigned int outputHeight = inputHeight + 1; //Makes data invalid - Softmax expects height and width to be 1. unsigned int outputWidth = inputWidth; unsigned int outputNum = inputNum; @@ -74,7 +74,7 @@ BOOST_AUTO_TEST_CASE(SoftmaxQueueDescriptor_Validate_WrongInputHeight) AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - //invalid argument exception is expected, because height != 1 + //Invalid argument exception is expected, because height != 1. BOOST_CHECK_THROW(RefSoftmaxFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); } @@ -90,7 +90,7 @@ BOOST_AUTO_TEST_CASE(FullyConnectedQueueDescriptor_Validate_RequiredDataMissing) unsigned int outputChannels = 3; unsigned int outputNum = 2; - // Define the tensor descriptors + // Define the tensor descriptors. armnn::TensorInfo inputTensorInfo; armnn::TensorInfo outputTensorInfo; armnn::TensorInfo weightsDesc; @@ -120,8 +120,8 @@ BOOST_AUTO_TEST_CASE(FullyConnectedQueueDescriptor_Validate_RequiredDataMissing) invalidData.m_Parameters.m_TransposeWeightMatrix = false; - //invalid argument exception is expected, because not all required fields have been provided - //in particular inputsData[0], outputsData[0] and weightsData can not be null + //Invalid argument exception is expected, because not all required fields have been provided. + //In particular inputsData[0], outputsData[0] and weightsData can not be null. BOOST_CHECK_THROW(RefFullyConnectedFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); } @@ -135,8 +135,8 @@ BOOST_AUTO_TEST_CASE(NormalizationQueueDescriptor_Validate_WrongInputHeight) constexpr unsigned int outputNum = inputNum; constexpr unsigned int outputChannels = inputChannels; - constexpr unsigned int outputHeight = inputHeight + 1; //makes data invalid - normalization requires - //input and output to have the same dimensions + constexpr unsigned int outputHeight = inputHeight + 1; //Makes data invalid - normalization requires. + //Input and output to have the same dimensions. constexpr unsigned int outputWidth = inputWidth; @@ -169,7 +169,7 @@ BOOST_AUTO_TEST_CASE(NormalizationQueueDescriptor_Validate_WrongInputHeight) invalidData.m_Parameters.m_Beta = beta; invalidData.m_Parameters.m_K = kappa; - //invalid argument exception is expected, because input height != output height + //Invalid argument exception is expected, because input height != output height. BOOST_CHECK_THROW(RefNormalizationFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); } @@ -201,7 +201,7 @@ BOOST_AUTO_TEST_CASE(SplitterQueueDescriptor_Validate_WrongWindow) AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - // invalid since it has only 3 dimensions while the input tensor is 4d + // Invalid, since it has only 3 dimensions while the input tensor is 4d. std::vector wOrigin = {0, 0, 0}; armnn::SplitterQueueDescriptor::ViewOrigin window(wOrigin); invalidData.m_ViewOrigins.push_back(window); @@ -210,7 +210,7 @@ BOOST_AUTO_TEST_CASE(SplitterQueueDescriptor_Validate_WrongWindow) "match input."); BOOST_CHECK_THROW(RefSplitterFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); - // invalid since window extends past the boundary of input tensor + // Invalid, since window extends past the boundary of input tensor. std::vector wOrigin3 = {0, 0, 15, 0}; armnn::SplitterQueueDescriptor::ViewOrigin window3(wOrigin3); invalidData.m_ViewOrigins[0] = window3; @@ -259,7 +259,7 @@ BOOST_AUTO_TEST_CASE(MergerQueueDescriptor_Validate_WrongWindow) AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - // invalid since it has only 3 dimensions while the input tensor is 4d + // Invalid, since it has only 3 dimensions while the input tensor is 4d. std::vector wOrigin = {0, 0, 0}; armnn::MergerQueueDescriptor::ViewOrigin window(wOrigin); invalidData.m_ViewOrigins.push_back(window); @@ -268,7 +268,7 @@ BOOST_AUTO_TEST_CASE(MergerQueueDescriptor_Validate_WrongWindow) "match input."); BOOST_CHECK_THROW(RefMergerFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); - // invalid since window extends past the boundary of output tensor + // Invalid, since window extends past the boundary of output tensor. std::vector wOrigin3 = {0, 0, 15, 0}; armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3); invalidData.m_ViewOrigins[0] = window3; @@ -308,17 +308,17 @@ BOOST_AUTO_TEST_CASE(AdditionQueueDescriptor_Validate_InputNumbers) AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - // too few inputs + // Too few inputs. BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); AddInputToWorkload(invalidData, invalidInfo, input2TensorInfo, nullptr); - // correct + // Correct. BOOST_CHECK_NO_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo)); AddInputToWorkload(invalidData, invalidInfo, input3TensorInfo, nullptr); - // too many inputs + // Too many inputs. BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); } @@ -331,7 +331,7 @@ BOOST_AUTO_TEST_CASE(AdditionQueueDescriptor_Validate_InputShapes) unsigned int shape1[] = {1, 1, 2, 1}; unsigned int shape2[] = {1, 1, 3, 2}; - // Incompatible shapes even with broadcasting + // Incompatible shapes even with broadcasting. { input1TensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); input2TensorInfo = armnn::TensorInfo(4, shape2, armnn::DataType::Float32); @@ -347,7 +347,7 @@ BOOST_AUTO_TEST_CASE(AdditionQueueDescriptor_Validate_InputShapes) BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); } - // Output size not compatible with input sizes + // Output size not compatible with input sizes. { input1TensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); input2TensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); @@ -360,7 +360,7 @@ BOOST_AUTO_TEST_CASE(AdditionQueueDescriptor_Validate_InputShapes) AddInputToWorkload(invalidData, invalidInfo, input2TensorInfo, nullptr); AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - // output differs + // Output differs. BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); } } @@ -374,7 +374,7 @@ BOOST_AUTO_TEST_CASE(MultiplicationQueueDescriptor_Validate_InputTensorDimension constexpr unsigned int input0Shape[] = { 2, 2, 4, 4 }; constexpr std::size_t dimensionCount = std::extent::value; - // Check dimension consistency for input tensors + // Checks dimension consistency for input tensors. for (unsigned int dimIndex = 0; dimIndex < dimensionCount; ++dimIndex) { unsigned int input1Shape[dimensionCount]; @@ -399,7 +399,7 @@ BOOST_AUTO_TEST_CASE(MultiplicationQueueDescriptor_Validate_InputTensorDimension BOOST_CHECK_THROW(RefMultiplicationFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); } - // Check dimension consistency for input and output tensors + // Checks dimension consistency for input and output tensors. for (unsigned int dimIndex = 0; dimIndex < dimensionCount; ++dimIndex) { unsigned int outputShape[dimensionCount]; @@ -430,7 +430,7 @@ BOOST_AUTO_TEST_CASE(ReshapeQueueDescriptor_Validate_MismatchingNumElements) armnn::TensorInfo inputTensorInfo; armnn::TensorInfo outputTensorInfo; - // The input and output shapes should have the same number of elements, but these don't + // The input and output shapes should have the same number of elements, but these don't. unsigned int inputShape[] = { 1, 1, 2, 3 }; unsigned int outputShape[] = { 1, 1, 1, 2 }; @@ -443,8 +443,29 @@ BOOST_AUTO_TEST_CASE(ReshapeQueueDescriptor_Validate_MismatchingNumElements) AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); - // InvalidArgumentException is expected, because the number of elements don't match + // InvalidArgumentException is expected, because the number of elements don't match. BOOST_CHECK_THROW(RefReshapeFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); } + +BOOST_AUTO_TEST_CASE(LstmQueueDescriptor_Validate) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 1, 2 }; + unsigned int outputShape[] = { 1 }; + + inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(1, outputShape, armnn::DataType::Float32); + + LstmQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + BOOST_CHECK_THROW(invalidData.Validate(invalidInfo), armnn::InvalidArgumentException); +} + BOOST_AUTO_TEST_SUITE_END() -- cgit v1.2.1