diff options
author | Aron Virginas-Tar <Aron.Virginas-Tar@arm.com> | 2019-08-28 18:08:46 +0100 |
---|---|---|
committer | mike.kelly <mike.kelly@arm.com> | 2019-08-30 10:58:54 +0000 |
commit | 00d306e4db5153a4f4d280de4d4cf3e03788fefb (patch) | |
tree | 329c15f71c662e199a24dc0812bf95cb389ddbd8 /src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp | |
parent | 08b518687d2bf2683a2c5f571d3e76d71d67d048 (diff) | |
download | armnn-00d306e4db5153a4f4d280de4d4cf3e03788fefb.tar.gz |
IVGCVSW-3381 Break up LayerTests.hpp into more manageable files
Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Change-Id: Icf39434f09fd340ad664cb3b97b8bee6d9da4838
Diffstat (limited to 'src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp')
-rw-r--r-- | src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp | 3145 |
1 files changed, 3145 insertions, 0 deletions
diff --git a/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp new file mode 100644 index 0000000000..01c1b1868e --- /dev/null +++ b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp @@ -0,0 +1,3145 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "Conv2dTestImpl.hpp" + +#include <DataLayoutIndexed.hpp> +#include <Permute.hpp> +#include <TensorUtils.hpp> + +#include <armnn/ArmNN.hpp> + +#include <backendsCommon/CpuTensorHandle.hpp> + +#include <backendsCommon/test/QuantizeHelper.hpp> +#include <backendsCommon/test/TensorCopyUtils.hpp> +#include <backendsCommon/test/WorkloadTestUtils.hpp> + +#include <test/TensorHelpers.hpp> + +#include <boost/numeric/conversion/cast.hpp> + +#include <string> + +// +// Static data +// + +// 2-channel bias used by a number of Conv2d tests. +static std::vector<float> Bias2({0, 2}); + +static std::vector<float> Bias4({1, 2, 3, 4}); + +static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4}); + +// 3-channel 16x8 image used as common input data for a number of Conv2d tests. +static std::vector<float> ConvInput3x8x16({ + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 +}); + +// +// Helper templates +// + +// Helper template that returns either Bias2 or an empty vector depending on whether bias is enabled. +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale) +{ + if(biasEnabled) + { + armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, ArmnnType); + boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias2)); + return bias; + } + else + { + return boost::multi_array<T, 1>(); + } +} + +// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled. +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +boost::multi_array<T, 1> GetBias4(bool biasEnabled, float qScale) +{ + if(biasEnabled) + { + armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType); + boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias4)); + return bias; + } + else + { + return boost::multi_array<T, 1>(); + } +} + +// Helper template that returns either Bias8 or an empty vector depending on whether bias is enabled. +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +boost::multi_array<T, 1> GetBias8(bool biasEnabled, float qScale) +{ + if(biasEnabled) + { + armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType); + boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias8)); + return bias; + } + else + { + return boost::multi_array<T, 1>(); + } +} + +// Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled. +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +boost::multi_array<T, 1> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout) +{ + const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout); + const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex(); + const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex]; + + switch (outputChannels) + { + case 2: + default: + { + return GetBias2<ArmnnType>(biasEnabled, qScale); + } + case 4: + { + return GetBias4<ArmnnType>(biasEnabled, qScale); + } + case 8: + { + return GetBias8<ArmnnType>(biasEnabled, qScale); + } + } +} + +// +// Implementation templates +// + +// Mapping from input type to bias type for fully connected layers. +// float => float, uint8_t => int32_t +template<typename T> +struct FullyConnectedBiasTypeForInputType; + +template<> +struct FullyConnectedBiasTypeForInputType<float> +{ + using Type = float; +}; + +template<> +struct FullyConnectedBiasTypeForInputType<uint8_t> +{ + using Type = int32_t; +}; + +// Modifies a std::vector in-place using a specified bias. +template<typename T, typename B> +void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset, + const std::vector<B>& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h) +{ + BOOST_ASSERT_MSG((armnn::IsQuantizedType<T>() && vScale != 0.0f) || (!armnn::IsQuantizedType<T>()), + "Invalid type and parameter combination."); + BOOST_ASSERT_MSG((armnn::IsQuantizedType<B>() && bScale != 0.0f) || (!armnn::IsQuantizedType<B>()), + "Invalid type and parameter combination."); + + // Note we need to dequantize and re-quantize the image value and the bias. + for (uint32_t i = 0; i < bias.size(); ++i) + { + float dBias = SelectiveDequantize(bias[i], bScale, bOffset); + for (uint32_t y = 0; y < h; ++y) + { + for (uint32_t x = 0; x < w; ++x) + { + uint32_t offset = (i * h + y) * w + x; + BOOST_ASSERT(offset < v.size()); + T& outRef = v[offset]; + float dOutput = SelectiveDequantize(outRef, vScale, vOffset); + outRef = SelectiveQuantize<T>(dOutput + dBias, vScale, vOffset); + } + } + } +} + +// +// Convolution2d implementations +// + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, + typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>> +LayerTestResult<T, 4> SimpleConvolution2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const boost::multi_array<T, 4>& originalInput, + const boost::multi_array<T, 4>& originalKernel, + const boost::multi_array<B, 1>& bias, + const boost::multi_array<T, 4>& originalOutputExpected, + float qScale, + int32_t qOffset, + const armnn::DataLayout layout = armnn::DataLayout::NCHW, + uint32_t padLeft = 0, + uint32_t padTop = 0, + uint32_t padRight = 0, + uint32_t padBottom = 0, + uint32_t strideX = 1, + uint32_t strideY = 1, + uint32_t dilationX = 1, + uint32_t dilationY = 1) +{ + unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]); + unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]); + unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]); + unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]); + + unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]); + unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]); + unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]); + unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]); + + unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]); + unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]); + unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]); + unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]); + + bool biasEnabled = bias.size() > 0; + + // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches). + BOOST_ASSERT(inputNum == 1); + BOOST_ASSERT(outputNum == 1); + + // If a bias is used, its size must equal the number of output channels. + BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels); + + + // Note these tensors will use two (identical) batches. + armnn::TensorInfo inputTensorInfo = + armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType); + armnn::TensorInfo outputTensorInfo = + armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType); + armnn::TensorInfo kernelDesc = + armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType); + armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + + LayerTestResult<T, 4> ret(outputTensorInfo); + + // Construct input data - two batches of the same input image. + std::vector<T> inputImage; + inputImage.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth); + std::vector<T> inputData; + inputData.insert(inputData.end(), inputImage.begin(), inputImage.end()); + inputData.insert(inputData.end(), inputImage.begin(), inputImage.end()); + + // at this point if we require it permute the input data + const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 }; + if (layout == armnn::DataLayout::NHWC) + { + std::vector<T> tmp(inputData.size()); + armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T)); + inputData = tmp; + } + + auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData); + + std::vector<T> outputImage; + outputImage.assign(originalOutputExpected.data(), + originalOutputExpected.data() + outputChannels*outputHeight*outputWidth); + + // Apply bias to output image if it is enabled. + if(biasEnabled) + { + std::vector<T> biasV; + biasV.assign(bias.data(), bias.data() + outputChannels); + ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + outputWidth, outputHeight); + } + + // Construct expected output data - two identical images. + std::vector<T> outputData; + outputData.insert(outputData.end(), outputImage.begin(), outputImage.end()); + outputData.insert(outputData.end(), outputImage.begin(), outputImage.end()); + + // at this point if we require it permute the expected output + if (layout == armnn::DataLayout::NHWC) + { + std::vector<T> tmp(outputData.size()); + armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T)); + outputData = tmp; + } + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::Convolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + // Permute the kernel if necessary + boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel); + if (layout == armnn::DataLayout::NHWC) + { + armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T)); + } + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + + if(biasEnabled) + { + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + } + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs. + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padLeft; + data.m_Parameters.m_PadRight = padRight; + data.m_Parameters.m_PadTop = padTop; + data.m_Parameters.m_PadBottom = padBottom; + data.m_Parameters.m_BiasEnabled = biasEnabled; + data.m_Parameters.m_DataLayout = layout; + data.m_Parameters.m_DilationX = dilationX; + data.m_Parameters.m_DilationY = dilationY; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]); + + ExecuteWorkload(*workload, memoryManager); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, + typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>> +LayerTestResult<T, 4> SimpleConvolution2dNhwcTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const boost::multi_array<T, 4>& input, + const boost::multi_array<T, 4>& kernel, + const boost::multi_array<B, 1>& bias, + const boost::multi_array<T, 4>& outputExpected, + const armnn::DataLayout dataLayout, + float qScale, + int32_t qOffset, + uint32_t padLeft = 1, + uint32_t padTop = 1, + uint32_t padRight = 1, + uint32_t padBottom = 1, + uint32_t strideX = 1, + uint32_t strideY = 1) +{ + unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]); + unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[3]); + unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[1]); + unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[2]); + + unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]); + unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]); + unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[1]); + unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[2]); + + unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]); + unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]); + unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]); + unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]); + + bool biasEnabled = bias.size() > 0; + + // Creates the tensors. + armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType); + armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels}, + ArmnnType); + armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType); + armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType); + + // Construct the input data. + std::vector<T> inputData; + inputData.assign(input.data(), input.data() + inputHeight*inputWidth*inputChannels); + auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData); + + // Construct the output data, with bias applied, as appropriate. + std::vector<T> outputData; + outputData.assign(outputExpected.data(), outputExpected.data() + outputHeight*outputWidth*outputChannels); + + LayerTestResult<T, 4> ret(outputTensorInfo); + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + armnn::Convolution2dQueueDescriptor data; + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs. + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padLeft; + data.m_Parameters.m_PadRight = padRight; + data.m_Parameters.m_PadTop = padTop; + data.m_Parameters.m_PadBottom = padBottom; + data.m_Parameters.m_BiasEnabled = biasEnabled; + data.m_Parameters.m_DataLayout = dataLayout; + + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]); + + ExecuteWorkload(*workload, memoryManager); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T,4> Convolution1dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + using B = armnn::ResolveType<ArmnnBType>; + // Until we have a specialist 1D convolution layer, we can fake one using + // 2D convolution with the final dimension set to 1. + // I don't anticipate this being particularly slow, given that convolution is implemented + // as a matrix multiplication, at which point dimension doesn't matter. + + unsigned int batchSize = 1; + unsigned int inputChannels = 2; + unsigned int outputChannels = 3; + unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height'). + unsigned int kernelSize = 3; + unsigned int padSize = 2; + unsigned int stride = 1; + unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride. + + armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType); + armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType); + armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType); + armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputInfo.SetQuantizationScale(qScale); + inputInfo.SetQuantizationOffset(qOffset); + outputInfo.SetQuantizationScale(qScale); + outputInfo.SetQuantizationOffset(qOffset); + kernelInfo.SetQuantizationScale(qScale); + kernelInfo.SetQuantizationOffset(qOffset); + biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale()); + biasInfo.SetQuantizationOffset(0); + } + + std::vector<T> inputData( + QuantizedVector<T>(inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), { + 5.0f, -2.0f, 2.5f, 0.0f, 1.0f, + -3.0f, 3.2f, 5.0f, 2.0f, 3.0f, + })); + + std::vector<T> kernelData( + QuantizedVector<T>(kernelInfo.GetQuantizationScale(), kernelInfo.GetQuantizationOffset(), { + 1.0f, 0.0f, 0.0f, + 0.0f, 2.0f, -1.5f, + + 0.0f, 0.0f, 0.0f, + 0.2f, 0.2f, 0.2f, + + 0.5f, 0.0f, 0.5f, + 0.0f, -1.0f, 0.0f + })); + + std::vector<B> biasData( + QuantizedVector<B>(biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(), { + 1.0f, 0.0f, 0.0f + })); + + std::vector<T> outputData( + QuantizedVector<T>(outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), { + 4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f, + -0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f, + 2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f + })); + + // Optionally apply bias to output image. + if(biasEnabled) + { + ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), + biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(), + 1, outputSize); + } + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo); + + armnn::Convolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelInfo); + armnn::ScopedCpuTensorHandle biasTensor(biasInfo); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data()); + AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data()); + + AddInputToWorkload(data, info, inputInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_StrideX = 1; + data.m_Parameters.m_StrideY = stride; + data.m_Parameters.m_PadLeft = 0; + data.m_Parameters.m_PadRight = 0; + data.m_Parameters.m_PadTop = padSize; + data.m_Parameters.m_PadBottom = padSize; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), inputData.data()); + + ExecuteWorkload(*workload, memoryManager); + + // Output + LayerTestResult<T,4> ret(outputInfo); + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + ret.outputExpected = MakeTensor<T, 4>(outputInfo, outputData); + return ret; +} + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 4> SimpleConvolution2d3x3NhwcTestCommon( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + float qScale, + int32_t qOffset, + bool biasEnabled, + armnn::DataLayout dataLayout) +{ + // Use common single-batch 5x5 image. + + armnn::TensorInfo inputDesc({1, 3, 4, 1}, ArmnnType); + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, + { + 1, 5, 2, 3, + 8, 7, 3, 6, + 3, 3, 9, 1 + }); + + + // Use a 2-element batch of 3-channel 3x3 kernels. + armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType); + boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, { + 4, 5, 6, + 0, 0, 0, + 3, 2, 1 + }); + + // Expected output is 1 batch of a 5x5 image. + armnn::TensorInfo outputDesc({1, 3, 4, 1}, ArmnnType); + + const std::vector<float> outputData = + { + 23, 41, 33, 21, + 44, 65, 76, 52, + 82, 85, 79, 42 + }; + + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData); + + return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>( + workloadFactory, + memoryManager, + input, + kernel, + boost::multi_array<T, 1>(), + expectedOutput, + dataLayout, + qScale, + qOffset); +} + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 4> SimpleConvolution2d3x3Stride2x2TestCommon( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + float qScale, + int32_t qOffset, + bool biasEnabled, + const armnn::DataLayout& dataLayout) +{ + // Input is a single-batch, 1 channel, 5x5 image. + armnn::TensorInfo inputDesc({1, 5, 5, 1}, ArmnnType); + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, + { + 1, 5, 2, 3, 5, + 8, 7, 3, 6, 3, + 3, 3, 9, 1, 9, + 4, 1, 8, 1, 3, + 6, 8, 1, 9, 2 + }); + + // Use a 3x3 kernel. + armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType); + boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, + { + 4, 5, 6, + 0, 0, 0, + 3, 2, 1 + }); + + // Expected output is a single-batch, 1 channel, 3x3 image. + armnn::TensorInfo outputDesc({1, 3, 3, 1}, ArmnnType); + + const std::vector<T> outputData = + { + 23, 33, 24, + 91, 99, 48, + 26, 50, 19 + }; + + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData); + + uint32_t padLeft = 1; + uint32_t padTop = 1; + uint32_t padRight = 1; + uint32_t padBottom = 1; + uint32_t strideX = 2; + uint32_t strideY = 2; + + return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>( + workloadFactory, + memoryManager, + input, + kernel, + boost::multi_array<T, 1>(), + expectedOutput, + dataLayout, + qScale, + qOffset, + padLeft, + padTop, + padRight, + padBottom, + strideX, + strideY); +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + float qScale, + int32_t qOffset, + bool biasEnabled, + const armnn::DataLayout layout) +{ + // Use common single-batch 3-channel 16x8 image. + armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType); + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16)); + + // Use a 2-element batch with 3-channel 3x5 kernels. + armnn::TensorInfo kernelDesc({2, 3, 5, 3}, ArmnnType); + boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 1, 1, 1, + 1, -1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0 + }))); + + // Expected output is 2 batch elements of a 1-channel 14x4 image. + armnn::TensorInfo outputDesc({1, 2, 4, 14}, ArmnnType); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, + -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, + -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, + -23.5f, -23.5f, -23.5f, + -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, + -23.5f, -23.5f, -23.5f, + + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }))); + + return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + input, + kernel, + GetBias2<ArmnnBType>(biasEnabled, qScale * qScale), + expectedOutput, + qScale, + qOffset, + layout); +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, + typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + float qScale, + int32_t qOffset, + bool biasEnabled, + const armnn::DataLayout layout) +{ + // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path. + + // Use common single-batch 3-channel 16x8 image. + armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType); + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16)); + + // Use a 2-element batch of 3-channel 3x3 kernels. + armnn::TensorInfo kernelDesc({2, 3, 3, 3}, ArmnnType); + boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 1, 1, 1, + 1, -1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0 + }))); + + // Expected output is 1 batch of a 2-channel 14x6 image. + armnn::TensorInfo outputDesc({1, 2, 6, 14}, ArmnnType); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, + -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }))); + + return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + input, + kernel, + GetBias2<ArmnnBType>(biasEnabled, qScale * qScale), + expectedOutput, + qScale, + qOffset, + layout); +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, + typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::DataLayout layout, + float qScale, + int32_t qOffset) +{ + // Use a single-batch 1-channel 3x3 image as input. + armnn::TensorInfo inputDesc({1, 1, 3, 3}, ArmnnType); + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 11,21,31, + 12,22,32, + 13,23,33 + }))); + + // Use 1 batch of a 1-channel 2x2 kernel. + armnn::TensorInfo kernelDesc({1, 1, 2, 2}, ArmnnType); + boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + -11,-21, + -12,-22, + }))); + +// Expected output is 1 batch of a 1-channel 6x8 image. +// Manually calculated like this: +//[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..] +//[-11*0 -21*0 -12*0 -22*11 ; -11*0 -21*0 -12*11 -22*21 ; -11*0 -21*0 -12*21 -22*31 ; -11*0 -21*0 -12*31 -22*0 ..] +//[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..] +//[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..] +//[-11*0 -21*13 -12*0 -22*0 ; -11*13 -21*23 -12*0 -22*0 ; -11*23 -21*33 -12*0 -22*0 ; -11*33 -21*0 -12*0 -22*0 ..] +//[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..] +//[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..] + armnn::TensorInfo outputDesc({1, 1, 8, 6}, ArmnnType); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 0, 0, 0, 0, 0, 0, + -242, -594, -934, -372, 0, 0, + -495, -1190, -1850, -725, 0, 0, + -538, -1256, -1916, -748, 0, 0, + -273, -626, -946, -363, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 + }))); + + return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + input, + kernel, + GetBias2<ArmnnBType>(false, qScale * qScale), + expectedOutput, + qScale, + qOffset, + layout, + 1, // Padding left. + 2, // Padding top. + 3, // Padding right. + 4); // Padding bottom. +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, + typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::DataLayout layout, + float qScale, + int32_t qOffset) +{ + // Use a single-batch 1-channel 5x5 image as input. + armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType); + boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + 11,21,31,41,51, + 12,22,32,42,52, + 13,23,33,43,53, + 14,24,34,44,54, + 15,25,35,45,55, + }))); + + // Use 1 batch of a 1-channel 4x4 kernel. + armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType); + boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + -11,-21,-31,-41, + -12,-22,-32,-42, + -13,-23,-33,-43, + -14,-24,-34,-44, + }))); + + // Expected output is 1 batch of a 1-channel 5x5 image. + armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType); + std::vector<T> myVec(outputDesc.GetNumElements(), 0); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>( + QuantizedVector<T>(qScale, qOffset, { + -7140, -10580, -13940, -9300, -5230, + -9590, -14120, -18520, -12290, -6860, + -9980, -14560, -18960, -12560, -7000, + -7518, -10904, -14144, -9318, -5152, + -5032, -7256, -9376, -6142, -3368, + }))); + + return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + input, + kernel, + GetBias2<ArmnnBType>(false, qScale * qScale), + expectedOutput, + qScale, + qOffset, + layout, + 1, // Padding left. + 1, // Padding top. + 2, // Padding right. + 2); // Padding bottom. +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 4> Convolution2d3x3DilationTestCommon( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const std::vector<float>& inputNoQuantizedValues, + armnn::TensorInfo& inputTensorInfo, + const std::vector<float>& kernelNoQuantizedValues, + armnn::TensorInfo& kernelTensorInfo, + const std::vector<float>& outputExpectedNoQuantizedValues, + armnn::TensorInfo& outputTensorInfo, + uint32_t dilationX, + uint32_t dilationY, + armnn::DataLayout layout = armnn::DataLayout::NCHW, + uint32_t padLeft = 0, + uint32_t padTop = 0, + uint32_t padRight = 0, + uint32_t padBottom = 0, + uint32_t strideX = 1, + uint32_t strideY = 1, + bool biasEnabled = false +) +{ + float qScale; + int32_t qOffset; + switch (ArmnnType) + { + case armnn::DataType::QuantisedAsymm8: + { + qScale = 0.1f; + qOffset = 128; + break; + } + case armnn::DataType::QuantisedSymm16: + { + qScale = 0.1f; + qOffset = 0; + break; + } + case armnn::DataType::Float32: + default: + { + qScale = 0.f; + qOffset = 0; + break; + } + } + + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + kernelTensorInfo.SetQuantizationScale(qScale); + kernelTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + + auto input = MakeTensor<T, 4>(inputTensorInfo, + std::vector<T>(QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), + inputTensorInfo.GetQuantizationOffset(), + inputNoQuantizedValues))); + auto kernel = MakeTensor<T, 4>(kernelTensorInfo, + std::vector<T>(QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), + kernelTensorInfo.GetQuantizationOffset(), + kernelNoQuantizedValues))); + auto expectedOutput = MakeTensor<T, 4>(outputTensorInfo, + std::vector<T>(QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), + outputTensorInfo.GetQuantizationOffset(), + outputExpectedNoQuantizedValues))); + + return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + input, + kernel, + GetBias2<ArmnnBType>(biasEnabled, qScale * qScale), + expectedOutput, + qScale, + qOffset, + layout, + padLeft, + padTop, + padRight, + padBottom, + strideX, + strideY, + dilationX, + dilationY); +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T> +LayerTestResult<T, 4> Convolution2d3x3Dilation3x3Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType); + std::vector<float> inputNoQuantizedValues = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + + armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType); + std::vector<float> kernelNoQuantizedValues = + { + 1, 2, 3, + 4, 5, 6, + 7, 8, 9 + }; + + // Since the dilation rate is 3 this will dilate the kernel to be like 7x7, + // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1 + armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType); + std::vector<float> outputExpectedNoQuantizedValues = + { + 6., 5., 5., 5., + 6., 5., 5., 5., + 6., 5., 5., 5., + 3., 2., 2., 2. + }; + + return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + inputNoQuantizedValues, + inputTensorInfo, + kernelNoQuantizedValues, + kernelTensorInfo, + outputExpectedNoQuantizedValues, + outputTensorInfo, + 3, + 3, + layout, + biasEnabled); +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T> +LayerTestResult<T, 4> Convolution2d2x3x3Dilation3x3Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType); + std::vector<float> inputNoQuantizedValues = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + + armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType); + std::vector<float> kernelNoQuantizedValues = + { + 1, 2, 3, + 4, 5, 6, + 7, 8, 9, + + 1, 2, 3, + 4, 5, 6, + 7, 8, 9 + }; + + // Since the dilation rate is 3 this will dilate the kernel to be like 7x7, + // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1 + armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType); + std::vector<float> outputExpectedNoQuantizedValues = + { + 12., 10., 10., 10., + 12., 10., 10., 10., + 12., 10., 10., 10., + 6., 4., 4., 4. + }; + + return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + inputNoQuantizedValues, + inputTensorInfo, + kernelNoQuantizedValues, + kernelTensorInfo, + outputExpectedNoQuantizedValues, + outputTensorInfo, + 3, + 3, + layout, + biasEnabled); +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T> +LayerTestResult<T, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test( + armnn::IWorkloadFactory &workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType); + std::vector<float> inputNoQuantizedValues = + { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + }; + + armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2}, ArmnnType); + std::vector<float> kernelNoQuantizedValues = + { + 1, 2, + 3, 4 + }; + + // Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3, + // therefore the output will be 4x4: (I − K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 ) + // where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3 + armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType); + std::vector<float> outputExpectedNoQuantizedValues = + { + 4, 7, 7, 3, + 6, 10, 10, 4, + 6, 10, 10, 4, + 2, 3, 3, 1 + }; + uint32_t padLeft = 1; + uint32_t padTop = 1; + uint32_t padRight = 1; + uint32_t padBottom = 1; + + return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + inputNoQuantizedValues, + inputTensorInfo, + kernelNoQuantizedValues, + kernelTensorInfo, + outputExpectedNoQuantizedValues, + outputTensorInfo, + 2, + 2, + layout, + padLeft, + padTop, + padRight, + padBottom, + 3, + 3, + biasEnabled + ); +} + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T,4> CompareConvolution2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + unsigned int inputHeight = 8; + unsigned int inputWidth = 16; + unsigned int inputChannels = 3; + unsigned int inputNum = 5; + + unsigned int kernelHeight = 3; + unsigned int kernelWidth = 3; + + unsigned int strideX = 2; + unsigned int strideY = 3; + unsigned int padX = 1; + unsigned int padY = 1; + + unsigned int outputNum = inputNum; + unsigned int outputChannels = 2; + unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY; + unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo kernelDesc; + armnn::TensorInfo biasDesc; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth}; + unsigned int biasShape[] = {outputChannels}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType); + outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType); + kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType); + biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType); + + LayerTestResult<T,4> ret(outputTensorInfo); + + auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908); + auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234); + auto bias = MakeRandomTensor<T, 1>(biasDesc, 1028); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::Convolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padX; + data.m_Parameters.m_PadRight = padX; + data.m_Parameters.m_PadTop = padY; + data.m_Parameters.m_PadBottom = padY; + data.m_Parameters.m_BiasEnabled = true; + + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + armnn::Convolution2dQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + ExecuteWorkload(*workload, memoryManager); + + workloadRef->PostAllocationConfigure(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + +// +// DepthwiseConvolution2d implementations +// + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, + typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>> +LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const boost::multi_array<T, 4>& input, + const boost::multi_array<T, 4>& kernel, + const boost::multi_array<B, 1>& bias, + const boost::multi_array<T, 4>& outputExpected, + float qScale, + int32_t qOffset, + const armnn::DataLayout layout, + uint32_t padLeft = 0, + uint32_t padTop = 0, + uint32_t padRight = 0, + uint32_t padBottom = 0, + uint32_t strideX = 1, + uint32_t strideY = 1) +{ + unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]); + unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]); + unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]); + unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]); + unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]); + unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]); + unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]); + unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]); + unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]); + unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]); + unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]); + unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]); + + // If a bias is used, its size must equal the number of output channels. + bool biasEnabled = bias.size() > 0; + BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels); + + // Creates the tensors. + armnn::TensorInfo inputTensorInfo = + armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType); + armnn::TensorInfo outputTensorInfo = + armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType); + armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType); + armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType); + + // Set quantization parameters if the requested type is a quantized type. + if (armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + + // Construct the input data. + std::vector<T> inputData; + inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth); + + // At this point if we require it permute the input data + const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 }; + if (layout == armnn::DataLayout::NHWC) + { + std::vector<T> tmp(inputData.size()); + armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T)); + inputData = tmp; + } + + auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData); + + // Construct the output data, with bias applied, as appropriate. + std::vector<T> outputData; + outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth); + if (biasEnabled) + { + std::vector<T> biasV; + biasV.assign(bias.data(), bias.data() + outputChannels); + ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + outputWidth, outputHeight); + } + + LayerTestResult<T, 4> ret(outputTensorInfo); + + // At this point if we require it permute the expected output + if (layout == armnn::DataLayout::NHWC) + { + std::vector<T> tmp(outputData.size()); + armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T)); + outputData = tmp; + } + + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + if (biasEnabled) + { + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + } + + armnn::DepthwiseConvolution2dQueueDescriptor data; + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs. + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padLeft; + data.m_Parameters.m_PadRight = padRight; + data.m_Parameters.m_PadTop = padTop; + data.m_Parameters.m_PadBottom = padBottom; + data.m_Parameters.m_BiasEnabled = biasEnabled; + data.m_Parameters.m_DataLayout = layout; + + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]); + + ExecuteWorkload(*workload, memoryManager); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + float qScale, + int32_t qOffset, + bool biasEnabled, + const armnn::DataLayout layout) +{ + using B = armnn::ResolveType<ArmnnBType>; + + unsigned int inputHeight = 3; + unsigned int inputWidth = 3; + unsigned int inputChannels = 2; + unsigned int inputNum = 1; + + unsigned int kernelHeight = 3; + unsigned int kernelWidth = 3; + unsigned int kernelChannels = inputChannels; + unsigned int kernelDepthMultiplier = 1; + + unsigned int outputHeight = 1; + unsigned int outputWidth = 1; + unsigned int outputChannels = kernelChannels; + unsigned int outputNum = inputNum; + + armnn::TensorInfo inputTensorInfo = + armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType); + armnn::TensorInfo outputTensorInfo = + armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType); + armnn::TensorInfo kernelDesc({kernelDepthMultiplier, kernelChannels, kernelHeight, kernelWidth}, + ArmnnType); + armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + std::vector<T> inputData = std::vector<T>( + QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), { + 1.f, 2.f, 1.f, + 2.f, 1.f, 2.f, + 1.f, 2.f, 1.f, + + 1.f, 2.f, 1.f, + 2.f, 1.f, 2.f, + 1.f, 2.f, 1.f, + })); + // at this point if we require it permute the input data + const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 }; + if (layout == armnn::DataLayout::NHWC) + { + std::vector<T> tmp(inputData.size()); + armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T)); + inputData = tmp; + } + auto input = MakeTensor<T, 4>(inputTensorInfo, inputData); + + std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + {0, 2})); + auto bias = MakeTensor<B, 1>(biasDesc, biasV); + + std::vector<T> kernelData = std::vector<T>( + QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), { + 1.f, 0.f, 1.f, + 0.f, 0.f, 0.f, + -1.f, 0.f, -1.f, + + 1.f, 0.f, 1.f, + 0.f, 0.f, 0.f, + -1.f, 0.f, -1.f, + })); + auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData); + + // Manually calculated. + std::vector<T> outputImage( + QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), + outputTensorInfo.GetQuantizationOffset(), + {0.f, 0.f}) + ); + + // Optionally apply bias to output image. + if(biasEnabled) + { + ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + outputWidth, outputHeight); + } + + LayerTestResult<T, 4> ret(outputTensorInfo); + if (layout == armnn::DataLayout::NHWC) + { + std::vector<T> tmp(outputImage.size()); + armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T)); + outputImage = tmp; + } + + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled. + data.m_Parameters.m_StrideX = 1; + data.m_Parameters.m_StrideY = 1; + data.m_Parameters.m_PadLeft = 0; + data.m_Parameters.m_PadRight = 0; + data.m_Parameters.m_PadTop = 0; + data.m_Parameters.m_PadBottom = 0; + data.m_Parameters.m_BiasEnabled = biasEnabled; + data.m_Parameters.m_DataLayout = layout; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + ExecuteWorkload(*workload, memoryManager); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + float qScale, + int32_t qOffset, + bool biasEnabled, + const armnn::DataLayout layout) +{ + using B = armnn::ResolveType<ArmnnBType>; + + unsigned int depthMultiplier = 2; + + unsigned int inputHeight = 8; + unsigned int inputWidth = 16; + unsigned int inputChannels = 2; + unsigned int inputBatchSize = 1; + + unsigned int kernelHeight = 5; + unsigned int kernelWidth = 3; + + unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2; + unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2; + unsigned int outputChannels = inputChannels * depthMultiplier; + unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo( + inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType); + armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo( + outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType); + armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth}, + ArmnnType); + armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + + // NOTE: originalInputData is in NCHW format + std::vector<T> originalInputData = std::vector<T>( + QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), { + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + })); + std::vector<T> inputData = originalInputData; + // at this point if we require it permute the input data + const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 }; + if (layout == armnn::DataLayout::NHWC) + { + armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, + originalInputData.data(), inputData.data(), sizeof(T)); + } + auto input = MakeTensor<T, 4>(inputTensorInfo, inputData); + + std::vector<B> biasV(QuantizedVector<B>(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + {0, 2, 1, -1})); + auto bias = MakeTensor<B, 1>(biasDesc, biasV); + + std::vector<T> kernelData = std::vector<T>( + QuantizedVector<T>(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), { + 1, 1, 1, + 1, -1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + + 0, 0, 0, + 0, -1, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 0, 0, 0, + 0, 0, 0, + 0, 1, 0, + 0, 0, 0, + 0, 0, 0 + + })); + auto kernel = MakeTensor<T, 4>(kernelDesc, kernelData); + + // Manually calculated. + std::vector<T> originalOutputImage = std::vector<T>( + QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), { + 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, + 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, + 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, + 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, + 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, + 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, + + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + + 8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f + })); + + // Optionally apply bias to output image. + if(biasEnabled) + { + ApplyBias(originalOutputImage, + outputTensorInfo.GetQuantizationScale(), + outputTensorInfo.GetQuantizationOffset(), + biasV, + biasDesc.GetQuantizationScale(), + biasDesc.GetQuantizationOffset(), + outputWidth, + outputHeight); + } + + LayerTestResult<T, 4> ret(outputTensorInfo); + std::vector<T> outputImage = originalOutputImage; + if (layout == armnn::DataLayout::NHWC) + { + armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, + originalOutputImage.data(), outputImage.data(), sizeof(T)); + } + + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled. + data.m_Parameters.m_StrideX = 2; + data.m_Parameters.m_StrideY = 1; + data.m_Parameters.m_PadLeft = 0; + data.m_Parameters.m_PadRight = 0; + data.m_Parameters.m_PadTop = 1; + data.m_Parameters.m_PadBottom = 1; + data.m_Parameters.m_BiasEnabled = biasEnabled; + data.m_Parameters.m_DataLayout = layout; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + ExecuteWorkload(*workload, memoryManager); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, + typename T = armnn::ResolveType<ArmnnType>, typename B = armnn::ResolveType<ArmnnBType>> +LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const boost::multi_array<T, 4>& originalInput, + const boost::multi_array<T, 4>& originalKernel, + const boost::multi_array<B, 1>& bias, + const boost::multi_array<T, 4>& originalOutputExpected, + float qScale, + int32_t qOffset, + const armnn::DataLayout layout = armnn::DataLayout::NCHW, + uint32_t padLeft = 0, + uint32_t padTop = 0, + uint32_t padRight = 0, + uint32_t padBottom = 0, + uint32_t strideX = 1, + uint32_t strideY = 1, + uint32_t dilationX = 1, + uint32_t dilationY = 1) +{ + unsigned int inputHeight = boost::numeric_cast<unsigned int>(originalInput.shape()[2]); + unsigned int inputWidth = boost::numeric_cast<unsigned int>(originalInput.shape()[3]); + unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]); + unsigned int inputNum = boost::numeric_cast<unsigned int>(originalInput.shape()[0]); + + unsigned int outputHeight = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]); + unsigned int outputWidth = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]); + unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]); + unsigned int outputNum = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]); + + unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]); + unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]); + unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]); + unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]); + + bool biasEnabled = bias.size() > 0; + + // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches). + BOOST_ASSERT(inputNum == 1); + BOOST_ASSERT(outputNum == 1); + + // If a bias is used, its size must equal the number of output channels. + BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels); + + + // Note these tensors will use two (identical) batches. + armnn::TensorInfo inputTensorInfo = + armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType); + armnn::TensorInfo outputTensorInfo = + armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType); + + // Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution. + armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, ArmnnType); + + armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, ArmnnBType); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + + LayerTestResult<T, 4> ret(outputTensorInfo); + + // Construct input data + std::vector<T> input; + input.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth); + std::vector<T> inputData; + inputData.insert(inputData.end(), input.begin(), input.end()); + inputData.insert(inputData.end(), input.begin(), input.end()); + + // at this point if we require it permute the input data + const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 }; + if (layout == armnn::DataLayout::NHWC) + { + std::vector<T> tmp(inputData.size()); + armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T)); + inputData = tmp; + } + + auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData); + + std::vector<T> output; + output.assign(originalOutputExpected.data(), + originalOutputExpected.data() + outputChannels*outputHeight*outputWidth); + + // Apply bias to output data if it is enabled. + if(biasEnabled) + { + std::vector<T> biasV; + biasV.assign(bias.data(), bias.data() + outputChannels); + ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + outputWidth, outputHeight); + } + + // Construct expected output data + std::vector<T> outputData; + outputData.insert(outputData.end(), output.begin(), output.end()); + outputData.insert(outputData.end(), output.begin(), output.end()); + + // at this point if we require it permute the expected output + if (layout == armnn::DataLayout::NHWC) + { + std::vector<T> tmp(outputData.size()); + armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T)); + outputData = tmp; + } + ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel); + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + + if(biasEnabled) + { + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + } + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs. + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padLeft; + data.m_Parameters.m_PadRight = padRight; + data.m_Parameters.m_PadTop = padTop; + data.m_Parameters.m_PadBottom = padBottom; + data.m_Parameters.m_BiasEnabled = biasEnabled; + data.m_Parameters.m_DataLayout = layout; + data.m_Parameters.m_DilationX = dilationX; + data.m_Parameters.m_DilationY = dilationY; + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]); + + ExecuteWorkload(*workload, memoryManager); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, + typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + float qScale, + int32_t qOffset, + bool biasEnabled, + const armnn::DataLayout layout) +{ + // Use a single-batch 2-channel 5x5 image as input. + armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType); + auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>( + QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), + { + 0, 1, 2, 3, 4, + 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, + + 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49 + }))); + + // Use a depth multiplier of 1 on a 2-channel 4x4 kernel. + armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType); + auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>( + QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), + { + 32, 31, 30, 29, + 28, 27, 26, 25, + 24, 23, 22, 21, + 20, 19, 18, 17, + + 16, 15, 14, 13, + 12, 11, 10, 9, + 8, 7, 6, 5, + 4, 3, 2, 1 + }))); + + // Expected output is 1 batch of a 2-channel 5x5 image. + // Calculated using the python tensorflow library with strideX=1, strideY=1. + armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>( + QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + { + 1062, 1580, 1850, 1530, 1117, + 2140, 3108, 3500, 2842, 2042, + 3580, 5068, 5460, 4342, 3062, + 3618, 5072, 5390, 4248, 2971, + 3074, 4282, 4510, 3533, 2457, + + 1550, 2284, 2362, 1955, 1428, + 2910, 4206, 4342, 3528, 2536, + 3390, 4886, 5022, 4068, 2916, + 3566, 5056, 5182, 4133, 2922, + 3100, 4352, 4452, 3517, 2465 + }))); + + return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + input, + kernel, + GetBias2<ArmnnBType>(biasEnabled, qScale * qScale), + expectedOutput, + qScale, + qOffset, + layout, + 1, // Padding left. + 1, // Padding top. + 2, // Padding right. + 2, // Padding bottom. + 1, // strideX + 1); // strideY +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, + typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + auto layout = armnn::DataLayout::NHWC; + + armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType); + auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>( + QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), + { + 0, 1, 2, 3, 4, + 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, + + 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49 + }))); + + armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType); + auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>( + QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), + { + 32, 31, 30, 29, + 28, 27, 26, 25, + 24, 23, 22, 21, + 20, 19, 18, 17, + + 16, 15, 14, 13, + 12, 11, 10, 9, + 8, 7, 6, 5, + 4, 3, 2, 1 + }))); + + armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>( + QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + { + 1062, 1580, 1850, 1530, 1117, + 2140, 3108, 3500, 2842, 2042, + 3580, 5068, 5460, 4342, 3062, + 3618, 5072, 5390, 4248, 2971, + 3074, 4282, 4510, 3533, 2457, + + 1550, 2284, 2362, 1955, 1428, + 2910, 4206, 4342, 3528, 2536, + 3390, 4886, 5022, 4068, 2916, + 3566, 5056, 5182, 4133, 2922, + 3100, 4352, 4452, 3517, 2465 + }))); + + return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + input, + kernel, + GetBias2<ArmnnBType>(biasEnabled, qScale * qScale), + expectedOutput, + qScale, + qOffset, + layout, + 1, // Padding left. + 1, // Padding top. + 2, // Padding right. + 2, // Padding bottom. + 1, // strideX + 1); // strideY +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, + typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + auto layout = armnn::DataLayout::NHWC; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9}, ArmnnType); + auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>( + QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0 + }))); + + armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType); + auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>( + QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), + { + 1, 2, 3, + 4, 5, 6, + 7, 8, 9 + }))); + + uint32_t padLeft = 0; + uint32_t padTop = 0; + uint32_t padRight = 0; + uint32_t padBottom = 0; + uint32_t strideX = 1; + uint32_t strideY = 1; + uint32_t dilationX = 3; + uint32_t dilationY = 3; + + // Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s. + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3}, ArmnnType); + boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>( + QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + { + 5, 5, 5, + 5, 5, 5, + 5, 5, 5 + }))); + + return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + input, + kernel, + GetBias2<ArmnnBType>(biasEnabled, qScale * qScale), + expectedOutput, + qScale, + qOffset, + layout, + padLeft, + padTop, + padRight, + padBottom, + strideX, + strideY, + dilationX, + dilationY); +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 4> DepthwiseConvolution2d3x3DilationTestCommon( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const std::vector<float>& inputNoQuantizedValues, + armnn::TensorInfo& inputTensorInfo, + const std::vector<float>& kernelNoQuantizedValues, + armnn::TensorInfo& kernelTensorInfo, + const std::vector<float>& outputExpectedNoQuantizedValues, + armnn::TensorInfo& outputTensorInfo, + uint32_t dilationX, + uint32_t dilationY, + armnn::DataLayout layout = armnn::DataLayout::NCHW, + bool biasEnabled = false) +{ + float qScale; + int32_t qOffset; + switch (ArmnnType) + { + case armnn::DataType::QuantisedAsymm8: + { + qScale = 0.1f; + qOffset = 128; + break; + } + case armnn::DataType::QuantisedSymm16: + { + qScale = 0.1f; + qOffset = 0; + break; + } + case armnn::DataType::Float32: + default: + { + qScale = 0.f; + qOffset = 0; + break; + } + } + + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + kernelTensorInfo.SetQuantizationScale(qScale); + kernelTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + + auto input = MakeTensor<T, 4>(inputTensorInfo, + std::vector<T>(QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), + inputTensorInfo.GetQuantizationOffset(), + inputNoQuantizedValues))); + auto kernel = MakeTensor<T, 4>(kernelTensorInfo, + std::vector<T>(QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), + kernelTensorInfo.GetQuantizationOffset(), + kernelNoQuantizedValues))); + auto expectedOutput = MakeTensor<T, 4>(outputTensorInfo, + std::vector<T>(QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), + outputTensorInfo.GetQuantizationOffset(), + outputExpectedNoQuantizedValues))); + + uint32_t padLeft = 0; + uint32_t padTop = 0; + uint32_t padRight = 0; + uint32_t padBottom = 0; + uint32_t strideX = 1; + uint32_t strideY = 1; + + return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + input, + kernel, + GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout), + expectedOutput, + qScale, + qOffset, + layout, + padLeft, + padTop, + padRight, + padBottom, + strideX, + strideY, + dilationX, + dilationY); +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T> +LayerTestResult<T, 4> DepthwiseConvolution2d3x3Dilation3x3Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType); + std::vector<float> inputNoQuantizedValues = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + + armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType); + std::vector<float> kernelNoQuantizedValues = + { + 1, 2, 3, + 4, 5, 6, + 7, 8, 9 + }; + + // Since the dilation rate is 3 this will dilate the kernel to be like 7x7, + // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1 + armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType); + std::vector<float> outputExpectedNoQuantizedValues = + { + 6., 5., 5., 5., + 6., 5., 5., 5., + 6., 5., 5., 5., + 3., 2., 2., 2. + }; + + return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + inputNoQuantizedValues, + inputTensorInfo, + kernelNoQuantizedValues, + kernelTensorInfo, + outputExpectedNoQuantizedValues, + outputTensorInfo, + 3, + 3, + layout, + biasEnabled); +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T> +LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType); + std::vector<float> inputNoQuantizedValues = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + + armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType); + std::vector<float> kernelNoQuantizedValues = + { + 1, 2, 3, + 4, 5, 6, + 7, 8, 9, + + 1, 2, 3, + 4, 5, 6, + 7, 8, 9 + }; + + // Since the dilation rate is 3 this will dilate the kernel to be like 7x7, + // therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1 + armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType); + std::vector<float> outputExpectedNoQuantizedValues = + { + 6., 5., 5., 5., + 6., 5., 5., 5., + 6., 5., 5., 5., + 3., 2., 2., 2., + + 6., 5., 5., 5., + 6., 5., 5., 5., + 6., 5., 5., 5., + 3., 2., 2., 2. + }; + + return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + inputNoQuantizedValues, + inputTensorInfo, + kernelNoQuantizedValues, + kernelTensorInfo, + outputExpectedNoQuantizedValues, + outputTensorInfo, + 3, + 3, + layout, + biasEnabled); +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T> +LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType); + std::vector<float> inputNoQuantizedValues = + { + 10.0, 10.0, 10.0, + 10.0, 10.0, 10.0, + 10.0, 10.0, 10.0, + + 21.0, 22.0, 23.0, + 24.0, 25.0, 26.0, + 27.0, 28.0, 29.0 + }; + + armnn::TensorInfo kernelTensorInfo({ 4, 2, 2, 2}, ArmnnType); + + std::vector<float> kernelNoQuantizedValues = + { + 0.25f, 0.25f, + 0.25f, 0.25f, + + 0.25f, 0.25f, + 0.25f, 0.25f, + + 0.0f , 0.0f, + 0.0f , 0.1f, + + 0.0f , 0.0f, + 0.0f , 0.1f, + + 0.2f , 0.0f, + 0.0f , 0.0f, + + 0.2f , 0.0f, + 0.0f , 0.0f, + + 0.0f , 0.3f, + 0.0f , 0.0f, + + 0.0f , 0.3f, + 0.0f , 0.0f + }; + + armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType); + std::vector<float> outputExpectedNoQuantizedValues = + { + 10.f, 10.f, + 10.f, 10.f, + + 1.f, 1.f, + 1.f, 1.f, + + 2.f, 2.f, + 2.f, 2.f, + + 3.f, 3.f, + 3.f, 3.f, + + 23.f, 24.f, + 26.f, 27.f, + + 2.5f, 2.6000001f, + 2.8f, 2.9f, + + 4.2000003f, 4.4f, + 4.8f, 5.f, + + 6.6000004f, 6.9f, + 7.5000005f, 7.8f + }; + + + return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + inputNoQuantizedValues, + inputTensorInfo, + kernelNoQuantizedValues, + kernelTensorInfo, + outputExpectedNoQuantizedValues, + outputTensorInfo, + 1, + 1, + layout, + biasEnabled); +} + +template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T> +LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType); + std::vector<float> inputNoQuantizedValues = + { + 10.0, 10.0, 10.0, + 10.0, 10.0, 10.0, + 10.0, 10.0, 10.0, + + 21.0, 22.0, 23.0, + 24.0, 25.0, 26.0, + 27.0, 28.0, 29.0 + }; + + armnn::TensorInfo kernelTensorInfo({ 2, 2, 2, 2}, ArmnnType); + + std::vector<float> kernelNoQuantizedValues = + { + 0.25f, 0.25f, + 0.25f, 0.25f, + + 0.2f , 0.0f, + 0.0f , 0.0f, + + 0.0f , 0.0f, + 0.0f , 0.1f, + + 0.0f , 0.3f, + 0.0f , 0.0f + + }; + + armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType); + std::vector<float> outputExpectedNoQuantizedValues = + { + 10.f, 10.f, + 10.f, 10.f, + + 1.f, 1.f, + 1.f, 1.f, + + 4.2000003f, 4.4f, + 4.8f, 5.f, + + 6.6000004f, 6.9f, + 7.5000005f, 7.8f + }; + + + return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>( + workloadFactory, + memoryManager, + inputNoQuantizedValues, + inputTensorInfo, + kernelNoQuantizedValues, + kernelTensorInfo, + outputExpectedNoQuantizedValues, + outputTensorInfo, + 1, + 1, + layout, + biasEnabled); +} + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + armnn::IWorkloadFactory& refWorkloadFactory, + const armnnUtils::DataLayoutIndexed& layout) +{ + unsigned int inputHeight = 8; + unsigned int inputWidth = 16; + unsigned int inputChannels = 3; + unsigned int inputNum = 5; + + unsigned int kernelHeight = 3; + unsigned int kernelWidth = 3; + unsigned int channelMultiplier = 1; + + unsigned int strideX = 2; + unsigned int strideY = 3; + unsigned int padX = 1; + unsigned int padY = 1; + + unsigned int outputNum = inputNum; + unsigned int outputChannels = inputChannels * channelMultiplier; + unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY; + unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo kernelDesc; + armnn::TensorInfo biasDesc; + + + std::vector<unsigned int> inputShape; + std::vector<unsigned int> outputShape; + std::vector<unsigned int> kernelShape{ channelMultiplier, inputChannels, kernelHeight, kernelWidth }; + std::vector<unsigned int> biasShape{ outputChannels }; + switch (layout.GetDataLayout()) + { + case armnn::DataLayout::NCHW: + inputShape = { inputNum, inputChannels, inputHeight, inputWidth }; + outputShape = { outputNum, outputChannels, outputHeight, outputWidth }; + break; + case armnn::DataLayout ::NHWC: + inputShape = { inputNum, inputHeight, inputWidth, inputChannels }; + outputShape = { outputNum, outputHeight, outputWidth, outputChannels }; + break; + default: + throw armnn::InvalidArgumentException("unknown data layout [" + + std::to_string(static_cast<int>(layout.GetDataLayout())) + "]"); + } + + float inputsQScale = armnn::IsQuantizedType<T>() ? 1.0f : 0; + float outputQScale = armnn::IsQuantizedType<T>() ? 2.0f : 0; + int32_t qOffset = 0; + + inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset); + outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset); + kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset); + biasDesc = armnn::TensorInfo( + 1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset); + + LayerTestResult<T, 4> ret(outputTensorInfo); + + auto input = MakeRandomTensor<T, 4>(inputTensorInfo, 124908, 0.0f, 255.0f); + auto kernel = MakeRandomTensor<T, 4>(kernelDesc, 891234, 0.0f, 255.0f); + auto bias = MakeRandomTensor<typename FullyConnectedBiasTypeForInputType<T>::Type, 1>( + biasDesc, 1028, 0.0f, 255.0f); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padX; + data.m_Parameters.m_PadRight = padX; + data.m_Parameters.m_PadTop = padY; + data.m_Parameters.m_PadBottom = padY; + data.m_Parameters.m_BiasEnabled = true; + data.m_Parameters.m_DataLayout = layout.GetDataLayout(); + + std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + ExecuteWorkload(*workload, memoryManager); + + workloadRef->PostAllocationConfigure(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + +// +// Explicit template specializations +// + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4> +Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>( + armnn::IWorkloadFactory&, + const armnn::IBackendInternal::IMemoryManagerSharedPtr&, + bool, + armnn::DataLayout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4> +Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>( + armnn::IWorkloadFactory&, + const armnn::IBackendInternal::IMemoryManagerSharedPtr&, + bool, + armnn::DataLayout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4> +Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>( + armnn::IWorkloadFactory&, + const armnn::IBackendInternal::IMemoryManagerSharedPtr&, + bool, + armnn::DataLayout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4> +Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>( + armnn::IWorkloadFactory&, + const armnn::IBackendInternal::IMemoryManagerSharedPtr&, + bool, + armnn::DataLayout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4> +Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>( + armnn::IWorkloadFactory&, + const armnn::IBackendInternal::IMemoryManagerSharedPtr&, + bool, + armnn::DataLayout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4> +Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>( + armnn::IWorkloadFactory&, + const armnn::IBackendInternal::IMemoryManagerSharedPtr&, + bool, + armnn::DataLayout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4> +Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>( + armnn::IWorkloadFactory &workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, + bool biasEnabled, + const armnn::DataLayout layout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4> +Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>( + armnn::IWorkloadFactory &workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, + bool biasEnabled, + const armnn::DataLayout layout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4> +Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>( + armnn::IWorkloadFactory &workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, + bool biasEnabled, + const armnn::DataLayout layout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4> +DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>( + armnn::IWorkloadFactory&, + const armnn::IBackendInternal::IMemoryManagerSharedPtr&, + bool, + armnn::DataLayout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4> +DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>( + armnn::IWorkloadFactory&, + const armnn::IBackendInternal::IMemoryManagerSharedPtr&, + bool, + armnn::DataLayout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4> +DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>( + armnn::IWorkloadFactory&, + const armnn::IBackendInternal::IMemoryManagerSharedPtr&, + bool, + armnn::DataLayout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4> +DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>( + armnn::IWorkloadFactory&, + const armnn::IBackendInternal::IMemoryManagerSharedPtr&, + bool, + armnn::DataLayout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4> +DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>( + armnn::IWorkloadFactory&, + const armnn::IBackendInternal::IMemoryManagerSharedPtr&, + bool, + armnn::DataLayout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4> +DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>( + armnn::IWorkloadFactory&, + const armnn::IBackendInternal::IMemoryManagerSharedPtr&, + bool, + armnn::DataLayout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4> +DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>( + armnn::IWorkloadFactory &workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, + bool biasEnabled, + const armnn::DataLayout layout); + +template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4> +DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>( + armnn::IWorkloadFactory &workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, + bool biasEnabled, + const armnn::DataLayout layout); + +// +// Implementation functions +// + +LayerTestResult<float, 4> SimpleConvolution2d3x5Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>( + workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout); +} + +LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>( + workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout); +} + +LayerTestResult<float, 4> SimpleConvolution2d3x3Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>( + workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout); +} + +LayerTestResult<float, 4> SimpleConvolution2d3x3NhwcTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled) +{ + return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>( + workloadFactory, + memoryManager, + 0.f, + 0, + biasEnabled, + armnn::DataLayout::NHWC); +} + +LayerTestResult<float, 4> SimpleConvolution2d3x3Stride2x2Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>( + workloadFactory, + memoryManager, + 0.f, + 0, + biasEnabled, + layout); +} + +LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>( + workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout); +} + +LayerTestResult<int16_t, 4> SimpleConvolution2d3x5QSymm16Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>( + workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout); +} + +LayerTestResult<int16_t, 4> SimpleConvolution2d3x3QSymm16Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>( + workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout); +} + +LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + armnn::DataLayout layout) +{ + return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>( + workloadFactory, memoryManager, layout, 0.0f, 0); +} + +LayerTestResult<float, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + armnn::DataLayout layout) +{ + return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon + <armnn::DataType::Float32, armnn::DataType::Float32>( + workloadFactory, memoryManager, layout, 0.0f, 0); +} + +LayerTestResult<float, 4> Convolution1dTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled) +{ + return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>( + workloadFactory, memoryManager, 0.0f, 0, biasEnabled); +} + +LayerTestResult<uint8_t, 4> Convolution1dUint8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled) +{ + return Convolution1dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>( + workloadFactory, memoryManager, 0.1f, 128, biasEnabled); +} + +LayerTestResult<float,4> CompareConvolution2dTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + return CompareConvolution2dTestImpl<armnn::DataType::Float32>( + workloadFactory, memoryManager, refWorkloadFactory); +} + +LayerTestResult<float, 4> DepthwiseConvolution2dTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>( + workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout); +} + +LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled) +{ + return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>( + workloadFactory, memoryManager, 0.0f, 0, biasEnabled); +} + +LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>( + workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout); +} + +LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32); + auto input = MakeTensor<float, 4>(inputTensorInfo, { 1.f, 2.f, 3.f, 4.f }); + + std::vector<float> kernelData; + std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f }; + for (unsigned int i = 0; i < 64; ++i) + { + kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end()); + } + armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32); + auto kernel = MakeTensor<float, 4>(kernelTensorInfo, kernelData); + + std::vector<float> expectedOutputData(64, 0.f); + armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32); + auto expectedOutput = MakeTensor<float, 4>(outputTensorInfo, expectedOutputData); + + return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>( + workloadFactory, + memoryManager, + input, + kernel, + boost::multi_array<float, 1>(), + expectedOutput, + 0.f, + 0, + armnn::DataLayout::NCHW); +} + +LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>( + workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout); +} + +LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>( + workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout); +} + +LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>( + workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout); +} + +LayerTestResult<float, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>( + workloadFactory, + memoryManager, + 0.f, + 0, + false); +} + +LayerTestResult<int16_t, 4> DepthwiseConvolution2dInt16Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>( + workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout); +} + +LayerTestResult<int16_t, 4> DepthwiseConvolution2dDepthMul1Int16Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + bool biasEnabled, + const armnn::DataLayout layout) +{ + return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>( + workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout); +} + +LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + armnn::IWorkloadFactory& refWorkloadFactory, + const armnn::DataLayout layout) +{ + return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>( + workloadFactory, memoryManager, refWorkloadFactory, layout); +} + +LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dUint8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + armnn::IWorkloadFactory& refWorkloadFactory, + const armnn::DataLayout layout) +{ + return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8>( + workloadFactory, memoryManager, refWorkloadFactory, layout); +} |