// // Copyright © 2017 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "Conv2dTestImpl.hpp" #include #include #include #include #include #include #include #include #include #include #include #include // // Static data // // 2-channel bias used by a number of Conv2d tests. static std::vector Bias2({0, 2}); static std::vector Bias4({1, 2, 3, 4}); static std::vector Bias8({1, 2, 3, 4, 1, 2, 3, 4}); // 3-channel 16x8 image used as common input data for a number of Conv2d tests. static std::vector ConvInput3x8x16({ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }); using namespace armnnUtils; // // Helper templates // // Helper template that returns either Bias2 or an empty vector depending on whether bias is enabled. template> std::vector GetBias2(bool biasEnabled, float qScale) { if(biasEnabled) { return QuantizedVector(Bias2, qScale, 0); } else { return std::vector(); } } // Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled. template> std::vector GetBias4(bool biasEnabled, float qScale) { if(biasEnabled) { return QuantizedVector(Bias4, qScale, 0); } else { return std::vector(); } } // Helper template that returns either Bias8 or an empty vector depending on whether bias is enabled. template> std::vector GetBias8(bool biasEnabled, float qScale) { if(biasEnabled) { return QuantizedVector(Bias8, qScale, 0); } else { return std::vector(); } } // Helper template that returns either Bias4 or an empty vector depending on whether bias is enabled. template> std::vector GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout) { const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout); const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex(); const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex]; switch (outputChannels) { case 2: default: { return GetBias2(biasEnabled, qScale); } case 4: { return GetBias4(biasEnabled, qScale); } case 8: { return GetBias8(biasEnabled, qScale); } } } // // Implementation templates // // Mapping from input type to bias type for fully connected layers. // float => float, uint8_t => int32_t template struct FullyConnectedBiasTypeForInputType; template<> struct FullyConnectedBiasTypeForInputType { using Type = float; }; template<> struct FullyConnectedBiasTypeForInputType { using Type = int32_t; }; // Modifies a std::vector in-place using a specified bias. template void ApplyBias(std::vector& v, float vScale, int32_t vOffset, const std::vector& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h) { ARMNN_ASSERT_MSG((armnn::IsQuantizedType() && vScale != 0.0f) || (!armnn::IsQuantizedType()), "Invalid type and parameter combination."); ARMNN_ASSERT_MSG((armnn::IsQuantizedType() && bScale != 0.0f) || (!armnn::IsQuantizedType()), "Invalid type and parameter combination."); // Note we need to dequantize and re-quantize the image value and the bias. for (uint32_t i = 0; i < bias.size(); ++i) { float dBias = SelectiveDequantize(bias[i], bScale, bOffset); for (uint32_t y = 0; y < h; ++y) { for (uint32_t x = 0; x < w; ++x) { uint32_t offset = (i * h + y) * w + x; ARMNN_ASSERT(offset < v.size()); T& outRef = v[offset]; float dOutput = SelectiveDequantize(outRef, vScale, vOffset); outRef = SelectiveQuantize(dOutput + dBias, vScale, vOffset); } } } } // // Convolution2d implementations // template, typename B = armnn::ResolveType> LayerTestResult SimpleConvolution2dTestImpl( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, const std::vector& originalInput, const std::vector& originalKernel, const std::vector& bias, const std::vector& originalOutputExpected, const armnn::TensorShape& originalInputShape, const armnn::TensorShape& originalKernelShape, const armnn::TensorShape& originalOutputExpectedShape, float qScale, int32_t qOffset, const armnn::DataLayout layout = armnn::DataLayout::NCHW, uint32_t padLeft = 0, uint32_t padTop = 0, uint32_t padRight = 0, uint32_t padBottom = 0, uint32_t strideX = 1, uint32_t strideY = 1, uint32_t dilationX = 1, uint32_t dilationY = 1) { armnn::IgnoreUnused(memoryManager); unsigned int inputHeight = armnn::numeric_cast(originalInputShape[2]); unsigned int inputWidth = armnn::numeric_cast(originalInputShape[3]); unsigned int inputChannels = armnn::numeric_cast(originalInputShape[1]); unsigned int inputNum = armnn::numeric_cast(originalInputShape[0]); unsigned int outputHeight = armnn::numeric_cast(originalOutputExpectedShape[2]); unsigned int outputWidth = armnn::numeric_cast(originalOutputExpectedShape[3]); unsigned int outputChannels = armnn::numeric_cast(originalOutputExpectedShape[1]); unsigned int outputNum = armnn::numeric_cast(originalOutputExpectedShape[0]); unsigned int kernelHeight = armnn::numeric_cast(originalKernelShape[2]); unsigned int kernelWidth = armnn::numeric_cast(originalKernelShape[3]); unsigned int kernelChannels = armnn::numeric_cast(originalKernelShape[1]); unsigned int kernelDepthMul = armnn::numeric_cast(originalKernelShape[0]); bool biasEnabled = bias.size() > 0; // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches). ARMNN_ASSERT(inputNum == 1); ARMNN_ASSERT(outputNum == 1); // If a bias is used, its size must equal the number of output channels. ARMNN_ASSERT(!biasEnabled || bias.size() == outputChannels); // Note these tensors will use two (identical) batches. armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType); armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType); armnn::TensorInfo kernelDesc = armnnUtils::GetTensorInfo(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout, ArmnnType); armnn::TensorInfo biasDesc({static_cast(bias.size())}, ArmnnBType); // Set quantization parameters if the requested type is a quantized type. if(armnn::IsQuantizedType()) { inputTensorInfo.SetQuantizationScale(qScale); inputTensorInfo.SetQuantizationOffset(qOffset); outputTensorInfo.SetQuantizationScale(qScale); outputTensorInfo.SetQuantizationOffset(qOffset); kernelDesc.SetQuantizationScale(qScale); kernelDesc.SetQuantizationOffset(qOffset); biasDesc.SetQuantizationScale(qScale*qScale); biasDesc.SetQuantizationOffset(0); } // Construct input data - two batches of the same input image. std::vector inputImage; inputImage.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth); std::vector inputData; inputData.insert(inputData.end(), inputImage.begin(), inputImage.end()); inputData.insert(inputData.end(), inputImage.begin(), inputImage.end()); // at this point if we require it permute the input data const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 }; if (layout == armnn::DataLayout::NHWC) { std::vector tmp(inputData.size()); armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T)); inputData = tmp; } std::vector outputImage; outputImage.assign(originalOutputExpected.data(), originalOutputExpected.data() + outputChannels*outputHeight*outputWidth); // Apply bias to output image if it is enabled. if(biasEnabled) { std::vector biasV; biasV.assign(bias.data(), bias.data() + outputChannels); ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), outputWidth, outputHeight); } // Data will be copied from outputHandle std::vector actualOutput(outputTensorInfo.GetNumElements()); // Construct expected output data - two identical images. std::vector expectedOutput; expectedOutput.insert(expectedOutput.end(), outputImage.begin(), outputImage.end()); expectedOutput.insert(expectedOutput.end(), outputImage.begin(), outputImage.end()); // at this point if we require it permute the expected output if (layout == armnn::DataLayout::NHWC) { std::vector tmp(expectedOutput.size()); armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, expectedOutput.data(), tmp.data(), sizeof(T)); expectedOutput = tmp; } std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); armnn::Convolution2dQueueDescriptor data; armnn::WorkloadInfo info; armnn::ScopedTensorHandle weightsTensor(kernelDesc); armnn::ScopedTensorHandle biasTensor(biasDesc); // Permute the kernel if necessary std::vector kernel = originalKernel; if (layout == armnn::DataLayout::NHWC) { armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T)); } AllocateAndCopyDataToITensorHandle(&weightsTensor, kernel.data()); if(biasEnabled) { AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data()); } AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_Weight = &weightsTensor; data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs. data.m_Parameters.m_StrideX = strideX; data.m_Parameters.m_StrideY = strideY; data.m_Parameters.m_PadLeft = padLeft; data.m_Parameters.m_PadRight = padRight; data.m_Parameters.m_PadTop = padTop; data.m_Parameters.m_PadBottom = padBottom; data.m_Parameters.m_BiasEnabled = biasEnabled; data.m_Parameters.m_DataLayout = layout; data.m_Parameters.m_DilationX = dilationX; data.m_Parameters.m_DilationY = dilationY; std::unique_ptr workload = workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d, data, info); inputHandle->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), inputData.data()); ExecuteWorkload(*workload, memoryManager); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); return LayerTestResult(actualOutput, expectedOutput, outputHandle->GetShape(), outputTensorInfo.GetShape()); } template, typename B = armnn::ResolveType, armnn::DataType OutType = ArmnnType, typename O = armnn::ResolveType> LayerTestResult SimpleConvolution2dNhwcTestImpl( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, const std::vector& input, const std::vector& kernel, const std::vector& bias, const std::vector& outputExpected, const armnn::TensorShape& inputShape, const armnn::TensorShape& kernelShape, const armnn::TensorShape& outputExpectedShape, const armnn::DataLayout dataLayout, float qScale, int32_t qOffset, uint32_t padLeft = 1, uint32_t padTop = 1, uint32_t padRight = 1, uint32_t padBottom = 1, uint32_t strideX = 1, uint32_t strideY = 1) { armnn::IgnoreUnused(qScale, qOffset); unsigned int inputNum = armnn::numeric_cast(inputShape[0]); unsigned int inputChannels = armnn::numeric_cast(inputShape[3]); unsigned int inputHeight = armnn::numeric_cast(inputShape[1]); unsigned int inputWidth = armnn::numeric_cast(inputShape[2]); unsigned int kernelChanMul = armnn::numeric_cast(kernelShape[0]); unsigned int kernelChannels = armnn::numeric_cast(kernelShape[3]); unsigned int kernelHeight = armnn::numeric_cast(kernelShape[1]); unsigned int kernelWidth = armnn::numeric_cast(kernelShape[2]); unsigned int outputNum = armnn::numeric_cast(outputExpectedShape[0]); unsigned int outputChannels = armnn::numeric_cast(outputExpectedShape[3]); unsigned int outputHeight = armnn::numeric_cast(outputExpectedShape[1]); unsigned int outputWidth = armnn::numeric_cast(outputExpectedShape[2]); bool biasEnabled = bias.size() > 0; // Creates the tensors. armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, ArmnnType); armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels}, OutType); armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, ArmnnType); armnn::TensorInfo biasDesc({static_cast(bias.size())}, ArmnnBType); // Construct the input data. std::vector inputData; inputData.assign(input.data(), input.data() + inputHeight*inputWidth*inputChannels); // Construct the output data, with bias applied, as appropriate. std::vector outputData; outputData.assign(outputExpected.data(), outputExpected.data() + outputHeight*outputWidth*outputChannels); std::vector actualOutput(outputTensorInfo.GetNumElements()); std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); armnn::ScopedTensorHandle weightsTensor(kernelDesc); AllocateAndCopyDataToITensorHandle(&weightsTensor, kernel.data()); armnn::ScopedTensorHandle biasTensor(biasDesc); armnn::Convolution2dQueueDescriptor data; data.m_Weight = &weightsTensor; data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs. data.m_Parameters.m_StrideX = strideX; data.m_Parameters.m_StrideY = strideY; data.m_Parameters.m_PadLeft = padLeft; data.m_Parameters.m_PadRight = padRight; data.m_Parameters.m_PadTop = padTop; data.m_Parameters.m_PadBottom = padBottom; data.m_Parameters.m_BiasEnabled = biasEnabled; data.m_Parameters.m_DataLayout = dataLayout; armnn::WorkloadInfo info; AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); std::unique_ptr workload = workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d, data, info); inputHandle->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), inputData.data()); ExecuteWorkload(*workload, memoryManager); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); return LayerTestResult(actualOutput, outputData, outputHandle->GetShape(), outputTensorInfo.GetShape()); } template> LayerTestResult Convolution1dTestImpl( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled) { using B = armnn::ResolveType; // Until we have a specialist 1D convolution layer, we can fake one using // 2D convolution with the final dimension set to 1. // I don't anticipate this being particularly slow, given that convolution is implemented // as a matrix multiplication, at which point dimension doesn't matter. unsigned int batchSize = 1; unsigned int inputChannels = 2; unsigned int outputChannels = 3; unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height'). unsigned int kernelSize = 3; unsigned int padSize = 2; unsigned int stride = 1; unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride. armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, ArmnnType); armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, ArmnnType); armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, ArmnnType); armnn::TensorInfo biasInfo({outputChannels}, ArmnnBType); // Set quantization parameters if the requested type is a quantized type. if(armnn::IsQuantizedType()) { inputInfo.SetQuantizationScale(qScale); inputInfo.SetQuantizationOffset(qOffset); outputInfo.SetQuantizationScale(qScale); outputInfo.SetQuantizationOffset(qOffset); kernelInfo.SetQuantizationScale(qScale); kernelInfo.SetQuantizationOffset(qOffset); biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale()); biasInfo.SetQuantizationOffset(0); } std::vector inputData = QuantizedVector( { 5.0f, -2.0f, 2.5f, 0.0f, 1.0f, -3.0f, 3.2f, 5.0f, 2.0f, 3.0f, }, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset()); std::vector kernelData = QuantizedVector( { 1.0f, 0.0f, 0.0f, 0.0f, 2.0f, -1.5f, 0.0f, 0.0f, 0.0f, 0.2f, 0.2f, 0.2f, 0.5f, 0.0f, 0.5f, 0.0f, -1.0f, 0.0f }, kernelInfo.GetQuantizationScale(), kernelInfo.GetQuantizationOffset()); std::vector biasData = QuantizedVector({ 1.0f, 0.0f, 0.0f }, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset()); std::vector outputData = QuantizedVector( { 4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f, -0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f, 2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f }, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset()); std::vector actualOutput(outputInfo.GetNumElements()); // Optionally apply bias to output image. if(biasEnabled) { ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(), 1, outputSize); } std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo); armnn::Convolution2dQueueDescriptor data; armnn::WorkloadInfo info; armnn::ScopedTensorHandle weightsTensor(kernelInfo); armnn::ScopedTensorHandle biasTensor(biasInfo); AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data()); AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data()); AddInputToWorkload(data, info, inputInfo, inputHandle.get()); AddOutputToWorkload(data, info, outputInfo, outputHandle.get()); data.m_Weight = &weightsTensor; data.m_Bias = &biasTensor; data.m_Parameters.m_StrideX = 1; data.m_Parameters.m_StrideY = stride; data.m_Parameters.m_PadLeft = 0; data.m_Parameters.m_PadRight = 0; data.m_Parameters.m_PadTop = padSize; data.m_Parameters.m_PadBottom = padSize; data.m_Parameters.m_BiasEnabled = biasEnabled; std::unique_ptr workload = workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d, data, info); inputHandle->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), inputData.data()); ExecuteWorkload(*workload, memoryManager); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); return LayerTestResult(actualOutput, outputData, outputHandle->GetShape(), outputInfo.GetShape()); } template> LayerTestResult SimpleConvolution2d3x3NhwcTestCommon( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled, armnn::DataLayout dataLayout) { armnn::IgnoreUnused(biasEnabled); // Use common single-batch 5x5 image. armnn::TensorInfo inputDesc({ 1, 3, 4, 1 }, ArmnnType); std::vector input = { 1, 5, 2, 3, 8, 7, 3, 6, 3, 3, 9, 1 }; // Use a 2-element batch of 3-channel 3x3 kernels. armnn::TensorInfo kernelDesc({ 1, 3, 3, 1 }, ArmnnType); std::vector kernel = { 4, 5, 6, 0, 0, 0, 3, 2, 1 }; // Expected output is 1 batch of a 5x5 image. armnn::TensorInfo outputDesc({ 1, 3, 4, 1 }, ArmnnType); const std::vector outputData = { 23, 41, 33, 21, 44, 65, 76, 52, 82, 85, 79, 42 }; return SimpleConvolution2dNhwcTestImpl( workloadFactory, memoryManager, tensorHandleFactory, input, kernel, std::vector(), outputData, inputDesc.GetShape(), kernelDesc.GetShape(), outputDesc.GetShape(), dataLayout, qScale, qOffset); } template> LayerTestResult SimpleConvolution2d3x3Stride2x2TestCommon( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout& dataLayout) { armnn::IgnoreUnused(biasEnabled); // Input is a single-batch, 1 channel, 5x5 image. armnn::TensorInfo inputDesc({ 1, 5, 5, 1 }, ArmnnType); std::vector input = { 1, 5, 2, 3, 5, 8, 7, 3, 6, 3, 3, 3, 9, 1, 9, 4, 1, 8, 1, 3, 6, 8, 1, 9, 2 }; // Use a 3x3 kernel. armnn::TensorInfo kernelDesc({ 1, 3, 3, 1 }, ArmnnType); std::vector kernel = { 4, 5, 6, 0, 0, 0, 3, 2, 1 }; // Expected output is a single-batch, 1 channel, 3x3 image. armnn::TensorInfo outputDesc({ 1, 3, 3, 1 }, ArmnnType); std::vector outputData = { 23, 33, 24, 91, 99, 48, 26, 50, 19 }; uint32_t padLeft = 1; uint32_t padTop = 1; uint32_t padRight = 1; uint32_t padBottom = 1; uint32_t strideX = 2; uint32_t strideY = 2; return SimpleConvolution2dNhwcTestImpl( workloadFactory, memoryManager, tensorHandleFactory, input, kernel, std::vector(), outputData, inputDesc.GetShape(), kernelDesc.GetShape(), outputDesc.GetShape(), dataLayout, qScale, qOffset, padLeft, padTop, padRight, padBottom, strideX, strideY); } template> LayerTestResult SimpleConvolution2d3x5TestCommon( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout) { // Use common single-batch 3-channel 16x8 image. armnn::TensorInfo inputDesc({ 1, 3, 8, 16 }, ArmnnType); std::vector input = QuantizedVector(ConvInput3x8x16, qScale, qOffset); // Use a 2-element batch with 3-channel 3x5 kernels. armnn::TensorInfo kernelDesc({ 2, 3, 5, 3 }, ArmnnType); std::vector kernel = QuantizedVector({ 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, qScale, qOffset); // Expected output is 2 batch elements of a 1-channel 14x4 image. armnn::TensorInfo outputDesc({ 1, 2, 4, 14 }, ArmnnType); std::vector expectedOutput = QuantizedVector({ -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, qScale, qOffset); return SimpleConvolution2dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, input, kernel, GetBias2(biasEnabled, qScale * qScale), expectedOutput, inputDesc.GetShape(), kernelDesc.GetShape(), outputDesc.GetShape(), qScale, qOffset, layout); } template> LayerTestResult SimpleConvolution2d3x3TestCommon( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout) { // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path. // Use common single-batch 3-channel 16x8 image. armnn::TensorInfo inputDesc({ 1, 3, 8, 16 }, ArmnnType); std::vector inputShape = { 1, 3, 8, 16 }; std::vector input = QuantizedVector(ConvInput3x8x16, qScale, qOffset); // Use a 2-element batch of 3-channel 3x3 kernels. armnn::TensorInfo kernelDesc({ 2, 3, 3, 3 }, ArmnnType); std::vector kernel = QuantizedVector({ 1, 1, 1, 1, -1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, qScale, qOffset); // Expected output is 1 batch of a 2-channel 14x6 image. armnn::TensorInfo outputDesc({ 1, 2, 6, 14 }, ArmnnType); std::vector expectedOutput = QuantizedVector({ -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, qScale, qOffset); return SimpleConvolution2dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, input, kernel, GetBias2(biasEnabled, qScale * qScale), expectedOutput, inputDesc.GetShape(), kernelDesc.GetShape(), outputDesc.GetShape(), qScale, qOffset, layout); } template> LayerTestResult Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, const armnn::DataLayout layout, float qScale, int32_t qOffset) { // Use a single-batch 1-channel 3x3 image as input. armnn::TensorInfo inputDesc({ 1, 1, 3, 3 }, ArmnnType); std::vector input = QuantizedVector({ 11,21,31, 12,22,32, 13,23,33 }, qScale, qOffset); // Use 1 batch of a 1-channel 2x2 kernel. armnn::TensorInfo kernelDesc({ 1, 1, 2, 2 }, ArmnnType); std::vector kernel = QuantizedVector({ -11,-21, -12,-22, }, qScale, qOffset); // Expected output is 1 batch of a 1-channel 6x8 image. // Manually calculated like this: //[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..] //[-11*0 -21*0 -12*0 -22*11 ; -11*0 -21*0 -12*11 -22*21 ; -11*0 -21*0 -12*21 -22*31 ; -11*0 -21*0 -12*31 -22*0 ..] //[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..] //[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..] //[-11*0 -21*13 -12*0 -22*0 ; -11*13 -21*23 -12*0 -22*0 ; -11*23 -21*33 -12*0 -22*0 ; -11*33 -21*0 -12*0 -22*0 ..] //[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..] //[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..] armnn::TensorInfo outputDesc({ 1, 1, 8, 6 }, ArmnnType); std::vector expectedOutput = QuantizedVector({ 0, 0, 0, 0, 0, 0, -242, -594, -934, -372, 0, 0, -495, -1190, -1850, -725, 0, 0, -538, -1256, -1916, -748, 0, 0, -273, -626, -946, -363, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, qScale, qOffset); return SimpleConvolution2dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, input, kernel, GetBias2(false, qScale * qScale), expectedOutput, inputDesc.GetShape(), kernelDesc.GetShape(), outputDesc.GetShape(), qScale, qOffset, layout, 1, // Padding left. 2, // Padding top. 3, // Padding right. 4); // Padding bottom. } template> LayerTestResult SimpleConvolution2dAsymmetricPaddingTestCommon( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, const armnn::DataLayout layout, float qScale, int32_t qOffset) { // Use a single-batch 1-channel 5x5 image as input. armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType); std::vector input = QuantizedVector({ 11,21,31,41,51, 12,22,32,42,52, 13,23,33,43,53, 14,24,34,44,54, 15,25,35,45,55, }, qScale, qOffset); // Use 1 batch of a 1-channel 4x4 kernel. armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType); std::vector kernel = QuantizedVector({ -11,-21,-31,-41, -12,-22,-32,-42, -13,-23,-33,-43, -14,-24,-34,-44, }, qScale, qOffset); // Expected output is 1 batch of a 1-channel 5x5 image. armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType); std::vector expectedOutput = QuantizedVector({ -7140, -10580, -13940, -9300, -5230, -9590, -14120, -18520, -12290, -6860, -9980, -14560, -18960, -12560, -7000, -7518, -10904, -14144, -9318, -5152, -5032, -7256, -9376, -6142, -3368, }, qScale, qOffset); return SimpleConvolution2dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, input, kernel, GetBias2(false, qScale * qScale), expectedOutput, inputDesc.GetShape(), kernelDesc.GetShape(), outputDesc.GetShape(), qScale, qOffset, layout, 1, // Padding left. 1, // Padding top. 2, // Padding right. 2); // Padding bottom. } template> LayerTestResult Convolution2d3x3DilationTestCommon( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, const std::vector& inputNoQuantizedValues, armnn::TensorInfo& inputTensorInfo, const std::vector& kernelNoQuantizedValues, armnn::TensorInfo& kernelTensorInfo, const std::vector& outputExpectedNoQuantizedValues, armnn::TensorInfo& outputTensorInfo, uint32_t dilationX, uint32_t dilationY, armnn::DataLayout layout = armnn::DataLayout::NCHW, uint32_t padLeft = 0, uint32_t padTop = 0, uint32_t padRight = 0, uint32_t padBottom = 0, uint32_t strideX = 1, uint32_t strideY = 1, bool biasEnabled = false ) { float qScale; int32_t qOffset; switch (ArmnnType) { case armnn::DataType::QAsymmU8: case armnn::DataType::QAsymmS8: { qScale = 0.1f; qOffset = 128; break; } case armnn::DataType::QSymmS16: { qScale = 0.1f; qOffset = 0; break; } case armnn::DataType::Float32: default: { qScale = 0.f; qOffset = 0; break; } } inputTensorInfo.SetQuantizationScale(qScale); inputTensorInfo.SetQuantizationOffset(qOffset); kernelTensorInfo.SetQuantizationScale(qScale); kernelTensorInfo.SetQuantizationOffset(qOffset); outputTensorInfo.SetQuantizationScale(qScale); outputTensorInfo.SetQuantizationOffset(qOffset); auto input = QuantizedVector(inputNoQuantizedValues, inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset()); auto kernel = QuantizedVector(kernelNoQuantizedValues, kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset()); auto expectedOutput = QuantizedVector(outputExpectedNoQuantizedValues, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset()); return SimpleConvolution2dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, input, kernel, GetBias2(biasEnabled, qScale * qScale), expectedOutput, inputTensorInfo.GetShape(), kernelTensorInfo.GetShape(), outputTensorInfo.GetShape(), qScale, qOffset, layout, padLeft, padTop, padRight, padBottom, strideX, strideY, dilationX, dilationY); } template LayerTestResult Convolution2d3x3Dilation3x3Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { armnn::TensorInfo inputTensorInfo({ 1, 1, 10, 10 }, ArmnnType); std::vector inputNoQuantizedValues = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType); std::vector kernelNoQuantizedValues = { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; // Since the dilation rate is 3 this will dilate the kernel to be like 7x7, // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1 armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType); std::vector outputExpectedNoQuantizedValues = { 6., 5., 5., 5., 6., 5., 5., 5., 6., 5., 5., 5., 3., 2., 2., 2. }; return Convolution2d3x3DilationTestCommon( workloadFactory, memoryManager, tensorHandleFactory, inputNoQuantizedValues, inputTensorInfo, kernelNoQuantizedValues, kernelTensorInfo, outputExpectedNoQuantizedValues, outputTensorInfo, 3, 3, layout, biasEnabled); } template LayerTestResult Convolution2d2x3x3Dilation3x3Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { armnn::TensorInfo inputTensorInfo({ 1, 2, 10, 10 }, ArmnnType); std::vector inputNoQuantizedValues = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3 }, ArmnnType); std::vector kernelNoQuantizedValues = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; // Since the dilation rate is 3 this will dilate the kernel to be like 7x7, // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1 armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4 }, ArmnnType); std::vector outputExpectedNoQuantizedValues = { 12., 10., 10., 10., 12., 10., 10., 10., 12., 10., 10., 10., 6., 4., 4., 4. }; return Convolution2d3x3DilationTestCommon( workloadFactory, memoryManager, tensorHandleFactory, inputNoQuantizedValues, inputTensorInfo, kernelNoQuantizedValues, kernelTensorInfo, outputExpectedNoQuantizedValues, outputTensorInfo, 3, 3, layout, biasEnabled); } template LayerTestResult Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { armnn::TensorInfo inputTensorInfo({ 1, 1, 10, 10 }, ArmnnType); std::vector inputNoQuantizedValues = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2 }, ArmnnType); std::vector kernelNoQuantizedValues = { 1, 2, 3, 4 }; // Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3, // therefore the output will be 4x4: (I − K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 ) // where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3 armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType); std::vector outputExpectedNoQuantizedValues = { 4, 7, 7, 3, 6, 10, 10, 4, 6, 10, 10, 4, 2, 3, 3, 1 }; uint32_t padLeft = 1; uint32_t padTop = 1; uint32_t padRight = 1; uint32_t padBottom = 1; return Convolution2d3x3DilationTestCommon( workloadFactory, memoryManager, tensorHandleFactory, inputNoQuantizedValues, inputTensorInfo, kernelNoQuantizedValues, kernelTensorInfo, outputExpectedNoQuantizedValues, outputTensorInfo, 2, 2, layout, padLeft, padTop, padRight, padBottom, 3, 3, biasEnabled ); } template> LayerTestResult CompareConvolution2dTestImpl( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, armnn::IWorkloadFactory& refWorkloadFactory, const armnn::ITensorHandleFactory& tensorHandleFactory, const armnn::ITensorHandleFactory& refTensorHandleFactory) { unsigned int inputHeight = 8; unsigned int inputWidth = 16; unsigned int inputChannels = 3; unsigned int inputNum = 5; unsigned int kernelHeight = 3; unsigned int kernelWidth = 3; unsigned int strideX = 2; unsigned int strideY = 3; unsigned int padX = 1; unsigned int padY = 1; unsigned int outputNum = inputNum; unsigned int outputChannels = 2; unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY; unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX; armnn::TensorInfo inputTensorInfo; armnn::TensorInfo outputTensorInfo; armnn::TensorInfo kernelDesc; armnn::TensorInfo biasDesc; unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth}; unsigned int biasShape[] = {outputChannels}; inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType); outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType); kernelDesc = armnn::TensorInfo(4, kernelShape, ArmnnType); biasDesc = armnn::TensorInfo(1, biasShape, ArmnnType); auto input = MakeRandomTensor(inputTensorInfo, 124908); auto kernel = MakeRandomTensor(kernelDesc, 891234); auto bias = MakeRandomTensor(biasDesc, 1028); std::vector actualOutput(outputTensorInfo.GetNumElements()); std::vector expectedOutput(outputTensorInfo.GetNumElements()); std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); armnn::Convolution2dQueueDescriptor data; armnn::WorkloadInfo info; armnn::ScopedTensorHandle weightsTensor(kernelDesc); armnn::ScopedTensorHandle biasTensor(biasDesc); AllocateAndCopyDataToITensorHandle(&weightsTensor, kernel.data()); AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data()); AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_Weight = &weightsTensor; data.m_Bias = &biasTensor; data.m_Parameters.m_StrideX = strideX; data.m_Parameters.m_StrideY = strideY; data.m_Parameters.m_PadLeft = padX; data.m_Parameters.m_PadRight = padX; data.m_Parameters.m_PadTop = padY; data.m_Parameters.m_PadBottom = padY; data.m_Parameters.m_BiasEnabled = true; std::unique_ptr outputHandleRef = refTensorHandleFactory.CreateTensorHandle(outputTensorInfo); std::unique_ptr inputHandleRef = refTensorHandleFactory.CreateTensorHandle(inputTensorInfo); armnn::Convolution2dQueueDescriptor refData = data; armnn::WorkloadInfo refInfo = info; SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); std::unique_ptr workload = workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d, data, info); std::unique_ptr workloadRef = refWorkloadFactory.CreateWorkload(armnn::LayerType::Convolution2d, refData, refInfo); outputHandleRef->Allocate(); inputHandleRef->Allocate(); inputHandle->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), input.data()); CopyDataToITensorHandle(inputHandleRef.get(), input.data()); ExecuteWorkload(*workload, memoryManager); workloadRef->PostAllocationConfigure(); workloadRef->Execute(); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); CopyDataFromITensorHandle(expectedOutput.data(), outputHandleRef.get()); return LayerTestResult(actualOutput, expectedOutput, outputHandle->GetShape(), outputTensorInfo.GetShape()); } LayerTestResult Convolution2d3x3Stride2x2BFloat16Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout& dataLayout) { // BFloat16 input and weight, Float32 output armnn::IgnoreUnused(biasEnabled); // Input is a single-batch, 1 channel, 5x5 image. armnn::TensorInfo inputDesc({ 1, 5, 5, 1 }, armnn::DataType::BFloat16); std::vector inputValues = armnnUtils::QuantizedVector( { 10.0367984f, // 10.0625 2.0380895f, // 2.03125 15.0420157f, // 15.0625 22.0675631f, // 22.125 8.0938920f, // 8.125 5.0476106f, // 5.0625 80.1035490f, // 80 100.1260370f, // 100 55.0461647f, // 55 120.0883828f, // 120 9.1159540f, // 9.125 90.0498519f, // 90 200.0104630f, // 200 30.0154114f, // 30 75.00137681f, // 75 30.0344238f, // 30 25.0356445f, // 25 130.0495605f, // 130 60.0683594f, // 60 35.0991211f, // 35 8.0461426f, // 8.0625 12.0996094f, // 12.125 98.1269530f, // 98 125.0393066f, // 125 5.103516f // 5.0937 }, 1.0f, 0); // Use a 3x3 kernel. armnn::TensorInfo kernelDesc({1, 3, 3, 1}, armnn::DataType::BFloat16); std::vector kernelValues = armnnUtils::QuantizedVector( { -0.126184f, // -0.125977 -0.150468f, // -0.150391 -0.101412f, // -0.101562 -0.0586369f,// -0.0585938 -0.0865864f,// -0.0864258 -0.0435089f,// -0.043457 0.0347555f, // 0.034668 0.0323111f, // 0.0322266 0.0385381f // 0.0385742 }, 1.0f, 0); // Expected output is a single-batch, 1 channel, 3x3 image. armnn::TensorInfo outputDesc({ 1, 3, 3, 1 }, armnn::DataType::Float32); // Expected output (with results if calculated as FP32 in the comments) const std::vector outputData = { 2.296875f, // 2.29240716 5.75f, // 5.75851926 3.78125f, // 3.79855026 -11.625f, // -11.65498118 -47.25f, // -47.27316893 -30.0f, // -30.04771684 -8.25f, // -8.28126168 -43.5f, // -43.46531337 -20.625f // -20.63477281 }; uint32_t padLeft = 1; uint32_t padTop = 1; uint32_t padRight = 1; uint32_t padBottom = 1; uint32_t strideX = 2; uint32_t strideY = 2; return SimpleConvolution2dNhwcTestImpl ( workloadFactory, memoryManager, tensorHandleFactory, inputValues, kernelValues, std::vector(), outputData, inputDesc.GetShape(), kernelDesc.GetShape(), outputDesc.GetShape(), dataLayout, 1.0f, 0, padLeft, padTop, padRight, padBottom, strideX, strideY); } LayerTestResult Convolution2d3x3Stride2x2BFloat16SmallValueTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout& dataLayout) { // BFloat16 input and weight, Float32 output armnn::IgnoreUnused(biasEnabled); // Input is a single-batch, 1 channel, 5x5 image. armnn::TensorInfo inputDesc({1, 5, 5, 1}, armnn::DataType::BFloat16); std::vector inputValues = armnnUtils::QuantizedVector( { 0.0367984f, // 0.0368652 0.0380895f, // 0.0380859 0.0420157f, // 0.0419922 0.0675631f, // 0.0673828 0.0938920f, // 0.09375 0.0476106f, // 0.0476074 0.1035490f, // 0.103516 0.1260370f, // 0.125977 0.0461647f, // 0.0461426 0.0883828f, // 0.0883789 0.1159540f, // 0.115723 0.0498519f, // 0.0498047 0.0104630f, // 0.010437 0.0154114f, // 0.0154419 0.00137681f, // 0.00137329 0.0344238f, // 0.0344616 0.0356445f, // 0.0355693 0.0495605f, // 0.0495018 0.0683594f, // 0.0683308 0.0991211f, // 0.0988837 0.0461426f, // 0.0461838 0.0996094f, // 0.0997546 0.1269530f, // 0.127099 0.0393066f, // 0.0392791 0.103516f // 0.103641 }, 1.0f, 0); // Use a 3x3 kernel. armnn::TensorInfo kernelDesc({1, 3, 3, 1}, armnn::DataType::BFloat16); std::vector kernelValues = armnnUtils::QuantizedVector( { -0.126184f, // -0.125977 -0.150468f, // -0.150391 -0.101412f, // -0.101562 -0.0586369f,// -0.0585938 -0.0865864f,// -0.0864258 -0.0435089f,// -0.043457 0.0347555f, // 0.034668 0.0323111f, // 0.0322266 0.0385381f // 0.0385742 }, 1.0f, 0); // Expected output is a single-batch, 1 channel, 3x3 image. armnn::TensorInfo outputDesc({1, 3, 3, 1}, armnn::DataType::Float32); // Expected output (with results if calculated as FP32 in the comments) const std::vector outputData = { 0.000686645508f, // 0.000685 0.000640869141f, // 0.000639 -0.00759887695f, // -0.007631 -0.02734375f, // -0.027388 -0.0356445312f, // -0.035737 -0.0145874023f, // -0.014568 -0.0170898438f, // -0.017124 -0.0373535156f, // -0.037431 -0.0346679688f // -0.034808 }; uint32_t padLeft = 1; uint32_t padTop = 1; uint32_t padRight = 1; uint32_t padBottom = 1; uint32_t strideX = 2; uint32_t strideY = 2; return SimpleConvolution2dNhwcTestImpl ( workloadFactory, memoryManager, tensorHandleFactory, inputValues, kernelValues, std::vector(), outputData, inputDesc.GetShape(), kernelDesc.GetShape(), outputDesc.GetShape(), dataLayout, 1.0f, 0, padLeft, padTop, padRight, padBottom, strideX, strideY); } // // DepthwiseConvolution2d implementations // template, typename B = armnn::ResolveType> LayerTestResult DepthwiseConvolution2dAsymmetricTestImpl( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, const std::vector& input, const std::vector& kernel, const std::vector& bias, const std::vector& outputExpected, const armnn::TensorShape& inputShape, const armnn::TensorShape& kernelShape, const armnn::TensorShape& outputExpectedShape, float qScale, int32_t qOffset, const armnn::DataLayout layout, uint32_t padLeft = 0, uint32_t padTop = 0, uint32_t padRight = 0, uint32_t padBottom = 0, uint32_t strideX = 1, uint32_t strideY = 1) { unsigned int inputNum = armnn::numeric_cast(inputShape[0]); unsigned int inputChannels = armnn::numeric_cast(inputShape[1]); unsigned int inputHeight = armnn::numeric_cast(inputShape[2]); unsigned int inputWidth = armnn::numeric_cast(inputShape[3]); unsigned int kernelHeight = armnn::numeric_cast(kernelShape[1]); unsigned int kernelWidth = armnn::numeric_cast(kernelShape[2]); unsigned int kernelChannels = armnn::numeric_cast(kernelShape[3]); unsigned int outputNum = armnn::numeric_cast(outputExpectedShape[0]); unsigned int outputChannels = armnn::numeric_cast(outputExpectedShape[1]); unsigned int outputHeight = armnn::numeric_cast(outputExpectedShape[2]); unsigned int outputWidth = armnn::numeric_cast(outputExpectedShape[3]); // If a bias is used, its size must equal the number of output channels. bool biasEnabled = bias.size() > 0; ARMNN_ASSERT(!biasEnabled || bias.size() == outputChannels); // Creates the tensors. armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType); armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType); armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, kernelChannels}, ArmnnType); armnn::TensorInfo biasDesc({static_cast(bias.size())}, ArmnnBType); // Set quantization parameters if the requested type is a quantized type. if (armnn::IsQuantizedType()) { inputTensorInfo.SetQuantizationScale(qScale); inputTensorInfo.SetQuantizationOffset(qOffset); outputTensorInfo.SetQuantizationScale(qScale); outputTensorInfo.SetQuantizationOffset(qOffset); kernelDesc.SetQuantizationScale(qScale); kernelDesc.SetQuantizationOffset(qOffset); biasDesc.SetQuantizationScale(qScale*qScale); biasDesc.SetQuantizationOffset(0); } // Construct the input data. std::vector inputData; inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth); // At this point if we require it permute the input data const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 }; if (layout == armnn::DataLayout::NHWC) { std::vector tmp(inputData.size()); armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T)); inputData = tmp; } // Construct the output data, with bias applied, as appropriate. std::vector outputData; outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth); if (biasEnabled) { std::vector biasV; biasV.assign(bias.data(), bias.data() + outputChannels); ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), outputWidth, outputHeight); } std::vector actualOutput(outputTensorInfo.GetNumElements()); // At this point if we require it permute the expected output if (layout == armnn::DataLayout::NHWC) { std::vector tmp(outputData.size()); armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T)); outputData = tmp; } std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); armnn::ScopedTensorHandle weightsTensor(kernelDesc); AllocateAndCopyDataToITensorHandle(&weightsTensor, kernel.data()); armnn::ScopedTensorHandle biasTensor(biasDesc); if (biasEnabled) { AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data()); } armnn::DepthwiseConvolution2dQueueDescriptor data; data.m_Weight = &weightsTensor; data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs. data.m_Parameters.m_StrideX = strideX; data.m_Parameters.m_StrideY = strideY; data.m_Parameters.m_PadLeft = padLeft; data.m_Parameters.m_PadRight = padRight; data.m_Parameters.m_PadTop = padTop; data.m_Parameters.m_PadBottom = padBottom; data.m_Parameters.m_BiasEnabled = biasEnabled; data.m_Parameters.m_DataLayout = layout; armnn::WorkloadInfo info; AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); std::unique_ptr workload = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info); inputHandle->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), inputData.data()); ExecuteWorkload(*workload, memoryManager); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); return LayerTestResult(actualOutput, outputData, outputHandle->GetShape(), outputTensorInfo.GetShape()); } template> LayerTestResult DepthwiseConvolution2dDepthMul1TestImpl( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout) { using B = armnn::ResolveType; unsigned int inputHeight = 3; unsigned int inputWidth = 3; unsigned int inputChannels = 2; unsigned int inputNum = 1; unsigned int kernelHeight = 3; unsigned int kernelWidth = 3; unsigned int outputHeight = 1; unsigned int outputWidth = 1; unsigned int outputChannels = inputChannels; unsigned int outputNum = inputNum; armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType); armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType); armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, outputChannels}, ArmnnType); armnn::TensorInfo biasDesc({ outputChannels }, ArmnnBType); // Set quantization parameters if the requested type is a quantized type. if(armnn::IsQuantizedType()) { inputTensorInfo.SetQuantizationScale(qScale); inputTensorInfo.SetQuantizationOffset(qOffset); outputTensorInfo.SetQuantizationScale(qScale); outputTensorInfo.SetQuantizationOffset(qOffset); kernelDesc.SetQuantizationScale(qScale); kernelDesc.SetQuantizationOffset(qOffset); biasDesc.SetQuantizationScale(qScale*qScale); biasDesc.SetQuantizationOffset(0); } std::vector inputData = std::vector( QuantizedVector({ 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, 1.f, }, inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset())); // at this point if we require it permute the input data const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 }; if (layout == armnn::DataLayout::NHWC) { std::vector tmp(inputData.size()); armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T)); inputData = tmp; } std::vector biasV(QuantizedVector({ 0, 2 }, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset())); std::vector kernelData = std::vector( QuantizedVector({ 1.f, 0.f, 1.f, 0.f, 0.f, 0.f, -1.f, 0.f, -1.f, 1.f, 0.f, 1.f, 0.f, 0.f, 0.f, -1.f, 0.f, -1.f, }, kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset())); // Manually calculated. std::vector outputImage( QuantizedVector({ 0.f, 0.f }, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset()) ); // Optionally apply bias to output image. if(biasEnabled) { ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), outputWidth, outputHeight); } if (layout == armnn::DataLayout::NHWC) { std::vector tmp(outputImage.size()); armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T)); outputImage = tmp; } std::vector actualOutput(outputTensorInfo.GetNumElements()); std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); armnn::DepthwiseConvolution2dQueueDescriptor data; armnn::WorkloadInfo info; armnn::ScopedTensorHandle weightsTensor(kernelDesc); armnn::ScopedTensorHandle biasTensor(biasDesc); AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data()); AllocateAndCopyDataToITensorHandle(&biasTensor, biasV.data()); AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_Weight = &weightsTensor; data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled. data.m_Parameters.m_StrideX = 1; data.m_Parameters.m_StrideY = 1; data.m_Parameters.m_PadLeft = 0; data.m_Parameters.m_PadRight = 0; data.m_Parameters.m_PadTop = 0; data.m_Parameters.m_PadBottom = 0; data.m_Parameters.m_BiasEnabled = biasEnabled; data.m_Parameters.m_DataLayout = layout; std::unique_ptr workload = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info); inputHandle->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), inputData.data()); ExecuteWorkload(*workload, memoryManager); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); return LayerTestResult(actualOutput, outputImage, outputHandle->GetShape(), outputTensorInfo.GetShape()); } template> LayerTestResult DepthwiseConvolution2dTestImpl( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout) { using B = armnn::ResolveType; unsigned int depthMultiplier = 2; unsigned int inputHeight = 8; unsigned int inputWidth = 16; unsigned int inputChannels = 2; unsigned int inputBatchSize = 1; unsigned int kernelHeight = 5; unsigned int kernelWidth = 3; unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2; unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2; unsigned int outputChannels = inputChannels * depthMultiplier; unsigned int outputBatchSize = inputBatchSize; armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo( inputBatchSize, inputChannels, inputHeight, inputWidth, layout, ArmnnType); armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo( outputBatchSize, outputChannels, outputHeight, outputWidth, layout, ArmnnType); armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, outputChannels}, ArmnnType); armnn::TensorInfo biasDesc({outputChannels}, ArmnnBType); // Set quantization parameters if the requested type is a quantized type. if(armnn::IsQuantizedType()) { inputTensorInfo.SetQuantizationScale(qScale); inputTensorInfo.SetQuantizationOffset(qOffset); outputTensorInfo.SetQuantizationScale(qScale); outputTensorInfo.SetQuantizationOffset(qOffset); kernelDesc.SetQuantizationScale(qScale); kernelDesc.SetQuantizationOffset(qOffset); biasDesc.SetQuantizationScale(qScale*qScale); biasDesc.SetQuantizationOffset(0); } // NOTE: originalInputData is in NCHW format std::vector originalInputData = std::vector( QuantizedVector({ 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }, inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset())); std::vector inputData = originalInputData; // at this point if we require it permute the input data const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 }; if (layout == armnn::DataLayout::NHWC) { armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, originalInputData.data(), inputData.data(), sizeof(T)); } std::vector biasV = QuantizedVector({ 0, 2, 1, -1 }, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset()); std::vector kernelData = std::vector( QuantizedVector({ 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }, kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset())); // Manually calculated. std::vector originalOutputImage = std::vector( QuantizedVector({ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5, 5, 5, 5, 5, 5, 5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1, 3, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0, 0, 0 }, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset())); // Optionally apply bias to output image. if(biasEnabled) { ApplyBias(originalOutputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), outputWidth, outputHeight); } std::vector outputImage = originalOutputImage; if (layout == armnn::DataLayout::NHWC) { armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, originalOutputImage.data(), outputImage.data(), sizeof(T)); } std::vector actualOutput(outputTensorInfo.GetNumElements()); std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); armnn::DepthwiseConvolution2dQueueDescriptor data; armnn::WorkloadInfo info; armnn::ScopedTensorHandle weightsTensor(kernelDesc); armnn::ScopedTensorHandle biasTensor(biasDesc); AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data()); AllocateAndCopyDataToITensorHandle(&biasTensor, biasV.data()); AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_Weight = &weightsTensor; data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled. data.m_Parameters.m_StrideX = 2; data.m_Parameters.m_StrideY = 1; data.m_Parameters.m_PadLeft = 0; data.m_Parameters.m_PadRight = 0; data.m_Parameters.m_PadTop = 1; data.m_Parameters.m_PadBottom = 1; data.m_Parameters.m_BiasEnabled = biasEnabled; data.m_Parameters.m_DataLayout = layout; std::unique_ptr workload = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info); inputHandle->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), inputData.data()); ExecuteWorkload(*workload, memoryManager); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); return LayerTestResult(actualOutput, outputImage, outputHandle->GetShape(), outputTensorInfo.GetShape()); } template, typename B = armnn::ResolveType> LayerTestResult DepthwiseConvolution2dTestImpl( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, const std::vector& originalInput, const std::vector& originalKernel, const std::vector& bias, const std::vector& originalOutputExpected, const armnn::TensorShape& originalInputShape, const armnn::TensorShape& originalKernelShape, const armnn::TensorShape& originalOutputExpectedShape, float qScale, int32_t qOffset, const armnn::DataLayout layout = armnn::DataLayout::NCHW, uint32_t padLeft = 0, uint32_t padTop = 0, uint32_t padRight = 0, uint32_t padBottom = 0, uint32_t strideX = 1, uint32_t strideY = 1, uint32_t dilationX = 1, uint32_t dilationY = 1) { unsigned int inputHeight = armnn::numeric_cast(originalInputShape[2]); unsigned int inputWidth = armnn::numeric_cast(originalInputShape[3]); unsigned int inputChannels = armnn::numeric_cast(originalInputShape[1]); unsigned int inputNum = armnn::numeric_cast(originalInputShape[0]); unsigned int outputHeight = armnn::numeric_cast(originalOutputExpectedShape[2]); unsigned int outputWidth = armnn::numeric_cast(originalOutputExpectedShape[3]); unsigned int outputChannels = armnn::numeric_cast(originalOutputExpectedShape[1]); unsigned int outputNum = armnn::numeric_cast(originalOutputExpectedShape[0]); unsigned int kernelHeight = armnn::numeric_cast(originalKernelShape[1]); unsigned int kernelWidth = armnn::numeric_cast(originalKernelShape[2]); unsigned int kernelChannels = armnn::numeric_cast(originalKernelShape[3]); bool biasEnabled = bias.size() > 0; // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches). ARMNN_ASSERT(inputNum == 1); ARMNN_ASSERT(outputNum == 1); // If a bias is used, its size must equal the number of output channels. ARMNN_ASSERT(!biasEnabled || bias.size() == outputChannels); // Note these tensors will use two (identical) batches. armnn::TensorInfo inputTensorInfo = armnnUtils::GetTensorInfo(2*inputNum, inputChannels, inputHeight, inputWidth, layout, ArmnnType); armnn::TensorInfo outputTensorInfo = armnnUtils::GetTensorInfo(2*outputNum, outputChannels, outputHeight, outputWidth, layout, ArmnnType); // Kernel must be NCHW layout always, independently of the layout of the input and output for depthwise convolution. armnn::TensorInfo kernelDesc({1, kernelHeight, kernelWidth, kernelChannels}, ArmnnType); armnn::TensorInfo biasDesc({static_cast(bias.size())}, ArmnnBType); // Set quantization parameters if the requested type is a quantized type. if(armnn::IsQuantizedType()) { inputTensorInfo.SetQuantizationScale(qScale); inputTensorInfo.SetQuantizationOffset(qOffset); outputTensorInfo.SetQuantizationScale(qScale); outputTensorInfo.SetQuantizationOffset(qOffset); kernelDesc.SetQuantizationScale(qScale); kernelDesc.SetQuantizationOffset(qOffset); biasDesc.SetQuantizationScale(qScale*qScale); biasDesc.SetQuantizationOffset(0); } // Construct input data std::vector input; input.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth); std::vector inputData; inputData.insert(inputData.end(), input.begin(), input.end()); inputData.insert(inputData.end(), input.begin(), input.end()); // at this point if we require it permute the input data const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 }; if (layout == armnn::DataLayout::NHWC) { std::vector tmp(inputData.size()); armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T)); inputData = tmp; } std::vector output; output.assign(originalOutputExpected.data(), originalOutputExpected.data() + outputChannels*outputHeight*outputWidth); // Apply bias to output data if it is enabled. if(biasEnabled) { std::vector biasV; biasV.assign(bias.data(), bias.data() + outputChannels); ApplyBias(output, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), outputWidth, outputHeight); } std::vector actualOutput(outputTensorInfo.GetNumElements()); // Construct expected output data std::vector outputData; outputData.insert(outputData.end(), output.begin(), output.end()); outputData.insert(outputData.end(), output.begin(), output.end()); // at this point if we require it permute the expected output if (layout == armnn::DataLayout::NHWC) { std::vector tmp(outputData.size()); armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T)); outputData = tmp; } std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); armnn::DepthwiseConvolution2dQueueDescriptor data; armnn::WorkloadInfo info; armnn::ScopedTensorHandle weightsTensor(kernelDesc); armnn::ScopedTensorHandle biasTensor(biasDesc); AllocateAndCopyDataToITensorHandle(&weightsTensor, originalKernel.data()); if(biasEnabled) { AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data()); } AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_Weight = &weightsTensor; data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs. data.m_Parameters.m_StrideX = strideX; data.m_Parameters.m_StrideY = strideY; data.m_Parameters.m_PadLeft = padLeft; data.m_Parameters.m_PadRight = padRight; data.m_Parameters.m_PadTop = padTop; data.m_Parameters.m_PadBottom = padBottom; data.m_Parameters.m_BiasEnabled = biasEnabled; data.m_Parameters.m_DataLayout = layout; data.m_Parameters.m_DilationX = dilationX; data.m_Parameters.m_DilationY = dilationY; std::unique_ptr workload = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info); inputHandle->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), inputData.data()); ExecuteWorkload(*workload, memoryManager); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); return LayerTestResult(actualOutput, outputData, outputHandle->GetShape(), outputTensorInfo.GetShape()); } template> LayerTestResult DepthwiseConvolution2dAsymmetricTestCommon( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled, const armnn::DataLayout layout) { // Use a single-batch 2-channel 5x5 image as input. armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType); auto input = QuantizedVector( { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49 }, inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset()); // Use a depth multiplier of 1 on a 2-channel 4x4 kernel. // Weights layout for depthwise: [1,H,W,I*M] armnn::TensorInfo kernelTensorInfo({ 1, 4, 4, 2 }, ArmnnType); auto kernel = QuantizedVector({ 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 }, kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset()); // Expected output is 1 batch of a 2-channel 5x5 image. // Calculated using the python tensorflow library with strideX=1, strideY=1. armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType); auto expectedOutput = QuantizedVector( { 396, 664, 820, 756, 602, 1016, 1608, 1880, 1652, 1268, 1976, 2968, 3240, 2732, 2028, 2628, 3808, 4060, 3312, 2390, 2596, 3700, 3900, 3130, 2226, 2817, 4186, 4330, 3609, 2651, 5414, 7864, 8120, 6626, 4780, 6314, 9144, 9400, 7646, 5500, 6759, 9610, 9850, 7875, 5579, 5935, 8348, 8540, 6757, 4742 }, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset()); return DepthwiseConvolution2dAsymmetricTestImpl( workloadFactory, memoryManager, tensorHandleFactory, input, kernel, GetBias2(biasEnabled, qScale * qScale), expectedOutput, inputTensorInfo.GetShape(), kernelTensorInfo.GetShape(), outputTensorInfo.GetShape(), qScale, qOffset, layout, 1, // Padding left. 1, // Padding top. 2, // Padding right. 2, // Padding bottom. 1, // strideX 1); // strideY } template> LayerTestResult DepthwiseConvolution2dNhwcTestCommon( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled) { auto layout = armnn::DataLayout::NHWC; armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType); auto input = QuantizedVector( { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49 }, inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset()); armnn::TensorInfo kernelTensorInfo({ 1, 4, 4, 2 }, ArmnnType); auto kernel = QuantizedVector({ 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 }, kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset()); armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType); auto expectedOutput = QuantizedVector( { 396,664,820,756,602, 1016,1608,1880,1652,1268, 1976,2968,3240,2732,2028, 2628,3808,4060,3312,2390, 2596,3700,3900,3130,2226, 2817,4186,4330,3609,2651, 5414,7864,8120,6626,4780, 6314,9144,9400,7646,5500, 6759,9610,9850,7875,5579, 5935,8348,8540,6757,4742 }, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset()); return DepthwiseConvolution2dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, input, kernel, GetBias2(biasEnabled, qScale * qScale), expectedOutput, inputTensorInfo.GetShape(), kernelTensorInfo.GetShape(), outputTensorInfo.GetShape(), qScale, qOffset, layout, 1, // Padding left. 1, // Padding top. 2, // Padding right. 2, // Padding bottom. 1, // strideX 1); // strideY } template> LayerTestResult SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, float qScale, int32_t qOffset, bool biasEnabled) { auto layout = armnn::DataLayout::NHWC; armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9 }, ArmnnType); auto input = QuantizedVector( { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset()); armnn::TensorInfo kernelTensorInfo({ 1, 3, 3, 1}, ArmnnType); auto kernel = QuantizedVector({ 1, 2, 3, 4, 5, 6, 7, 8, 9 }, kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset()); uint32_t padLeft = 0; uint32_t padTop = 0; uint32_t padRight = 0; uint32_t padBottom = 0; uint32_t strideX = 1; uint32_t strideY = 1; uint32_t dilationX = 3; uint32_t dilationY = 3; // Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s. armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, ArmnnType); auto expectedOutput = QuantizedVector( { 5, 5, 5, 5, 5, 5, 5, 5, 5 }, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset()); return DepthwiseConvolution2dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, input, kernel, GetBias2(biasEnabled, qScale * qScale), expectedOutput, inputTensorInfo.GetShape(), kernelTensorInfo.GetShape(), outputTensorInfo.GetShape(), qScale, qOffset, layout, padLeft, padTop, padRight, padBottom, strideX, strideY, dilationX, dilationY); } template> LayerTestResult DepthwiseConvolution2d3x3DilationTestCommon( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, const std::vector& inputNoQuantizedValues, armnn::TensorInfo& inputTensorInfo, const std::vector& kernelNoQuantizedValues, armnn::TensorInfo& kernelTensorInfo, const std::vector& outputExpectedNoQuantizedValues, armnn::TensorInfo& outputTensorInfo, uint32_t dilationX, uint32_t dilationY, armnn::DataLayout layout = armnn::DataLayout::NCHW, bool biasEnabled = false) { float qScale; int32_t qOffset; switch (ArmnnType) { case armnn::DataType::QAsymmS8: case armnn::DataType::QAsymmU8: { qScale = 0.1f; qOffset = 128; break; } case armnn::DataType::QSymmS16: { qScale = 0.1f; qOffset = 0; break; } case armnn::DataType::Float32: default: { qScale = 0.f; qOffset = 0; break; } } inputTensorInfo.SetQuantizationScale(qScale); inputTensorInfo.SetQuantizationOffset(qOffset); kernelTensorInfo.SetQuantizationScale(qScale); kernelTensorInfo.SetQuantizationOffset(qOffset); outputTensorInfo.SetQuantizationScale(qScale); outputTensorInfo.SetQuantizationOffset(qOffset); auto input = QuantizedVector(inputNoQuantizedValues, inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset()); auto kernel = QuantizedVector(kernelNoQuantizedValues, kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset()); auto expectedOutput = QuantizedVector(outputExpectedNoQuantizedValues, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset()); uint32_t padLeft = 0; uint32_t padTop = 0; uint32_t padRight = 0; uint32_t padBottom = 0; uint32_t strideX = 1; uint32_t strideY = 1; return DepthwiseConvolution2dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, input, kernel, GetBias(biasEnabled, qScale * qScale, outputTensorInfo, layout), expectedOutput, inputTensorInfo.GetShape(), kernelTensorInfo.GetShape(), outputTensorInfo.GetShape(), qScale, qOffset, layout, padLeft, padTop, padRight, padBottom, strideX, strideY, dilationX, dilationY); } template LayerTestResult DepthwiseConvolution2d3x3Dilation3x3Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType); std::vector inputNoQuantizedValues = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; armnn::TensorInfo kernelTensorInfo({ 1, 3, 3, 1}, ArmnnType); std::vector kernelNoQuantizedValues = { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; // Since the dilation rate is 3 this will dilate the kernel to be like 7x7, // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1 armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType); std::vector outputExpectedNoQuantizedValues = { 6., 5., 5., 5., 6., 5., 5., 5., 6., 5., 5., 5., 3., 2., 2., 2. }; return DepthwiseConvolution2d3x3DilationTestCommon( workloadFactory, memoryManager, tensorHandleFactory, inputNoQuantizedValues, inputTensorInfo, kernelNoQuantizedValues, kernelTensorInfo, outputExpectedNoQuantizedValues, outputTensorInfo, 3, 3, layout, biasEnabled); } template LayerTestResult DepthwiseConvolution2d2x3x3Dilation3x3Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType); std::vector inputNoQuantizedValues = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; armnn::TensorInfo kernelTensorInfo({ 1, 3, 3, 2}, ArmnnType); std::vector kernelNoQuantizedValues = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; // Since the dilation rate is 3 this will dilate the kernel to be like 7x7, // therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1 armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType); std::vector outputExpectedNoQuantizedValues = { 2, 9, 9, 9, 2, 9, 9, 9, 2, 9, 9, 9, 5, 3, 3, 3, 3, 1, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 6, 4, 4, 4 }; return DepthwiseConvolution2d3x3DilationTestCommon( workloadFactory, memoryManager, tensorHandleFactory, inputNoQuantizedValues, inputTensorInfo, kernelNoQuantizedValues, kernelTensorInfo, outputExpectedNoQuantizedValues, outputTensorInfo, 3, 3, layout, biasEnabled); } template LayerTestResult DepthwiseConvolution2dMult4Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType); std::vector inputNoQuantizedValues = { 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0 }; armnn::TensorInfo kernelTensorInfo({ 1, 2, 2, 8}, ArmnnType); std::vector kernelNoQuantizedValues = { 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.25f, 0.0f , 0.0f, 0.0f , 0.1f, 0.0f , 0.0f, 0.0f , 0.1f, 0.2f , 0.0f, 0.0f , 0.0f, 0.2f , 0.0f, 0.0f , 0.0f, 0.0f , 0.3f, 0.0f , 0.0f, 0.0f , 0.3f, 0.0f , 0.0f }; armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType); std::vector outputExpectedNoQuantizedValues = { 4.5f, 4.5f, 4.5f, 4.5f, 5.5f, 5.5f, 5.5f, 5.5f, 2.5f, 2.5f, 2.5f, 2.5f, 3.5f, 3.5f, 3.5f, 3.5f, 10.05f, 10.5f, 11.4f, 11.85f, 12.75f, 13.3f, 14.4f, 14.95f, 5.25f, 5.5f, 6.0f, 6.25f, 7.45f, 7.8f, 8.5f, 8.85f }; return DepthwiseConvolution2d3x3DilationTestCommon( workloadFactory, memoryManager, tensorHandleFactory, inputNoQuantizedValues, inputTensorInfo, kernelNoQuantizedValues, kernelTensorInfo, outputExpectedNoQuantizedValues, outputTensorInfo, 1, 1, layout, biasEnabled); } template LayerTestResult DepthwiseConvolution2dMult2Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType); std::vector inputNoQuantizedValues = { 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0 }; armnn::TensorInfo kernelTensorInfo({ 1, 2, 2, 4}, ArmnnType); std::vector kernelNoQuantizedValues = { 0.25f, 0.25f, 0.25f, 0.25f, 0.2f , 0.0f, 0.0f , 0.0f, 0.0f , 0.0f, 0.0f , 0.1f, 0.0f , 0.3f, 0.0f , 0.0f }; armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType); std::vector outputExpectedNoQuantizedValues = { 4.5f, 4.5f, 4.5f, 4.5f, 5.5f, 5.5f, 5.5f, 5.5f, 5.25f, 5.5f, 6.0f, 6.25f, 7.65f, 8.0f, 8.7f, 9.05f }; return DepthwiseConvolution2d3x3DilationTestCommon( workloadFactory, memoryManager, tensorHandleFactory, inputNoQuantizedValues, inputTensorInfo, kernelNoQuantizedValues, kernelTensorInfo, outputExpectedNoQuantizedValues, outputTensorInfo, 1, 1, layout, biasEnabled); } template> LayerTestResult CompareDepthwiseConvolution2dTestImpl( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, armnn::IWorkloadFactory& refWorkloadFactory, const armnn::ITensorHandleFactory& tensorHandleFactory, const armnn::ITensorHandleFactory& refTensorHandleFactory, const armnnUtils::DataLayoutIndexed& layout) { unsigned int inputHeight = 8; unsigned int inputWidth = 16; unsigned int inputChannels = 3; unsigned int inputNum = 5; unsigned int kernelHeight = 3; unsigned int kernelWidth = 3; unsigned int channelMultiplier = 1; unsigned int strideX = 2; unsigned int strideY = 3; unsigned int padX = 1; unsigned int padY = 1; unsigned int outputNum = inputNum; unsigned int outputChannels = inputChannels * channelMultiplier; unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY; unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX; armnn::TensorInfo inputTensorInfo; armnn::TensorInfo outputTensorInfo; armnn::TensorInfo kernelDesc; armnn::TensorInfo biasDesc; std::vector inputShape; std::vector outputShape; std::vector kernelShape{ 1, kernelHeight, kernelWidth, outputChannels }; std::vector biasShape{ outputChannels }; switch (layout.GetDataLayout()) { case armnn::DataLayout::NCHW: inputShape = { inputNum, inputChannels, inputHeight, inputWidth }; outputShape = { outputNum, outputChannels, outputHeight, outputWidth }; break; case armnn::DataLayout ::NHWC: inputShape = { inputNum, inputHeight, inputWidth, inputChannels }; outputShape = { outputNum, outputHeight, outputWidth, outputChannels }; break; default: throw armnn::InvalidArgumentException("unknown data layout [" + std::to_string(static_cast(layout.GetDataLayout())) + "]"); } float inputsQScale = armnn::IsQuantizedType() ? 1.0f : 0; float outputQScale = armnn::IsQuantizedType() ? 2.0f : 0; int32_t qOffset = 0; inputTensorInfo = armnn::TensorInfo(4, inputShape.data(), ArmnnType, inputsQScale, qOffset); outputTensorInfo = armnn::TensorInfo(4, outputShape.data(), ArmnnType, outputQScale, qOffset); kernelDesc = armnn::TensorInfo(4, kernelShape.data(), ArmnnType, inputsQScale, qOffset); biasDesc = armnn::TensorInfo(1, biasShape.data(), armnn::GetBiasDataType(ArmnnType), inputsQScale, qOffset); auto input = MakeRandomTensor(inputTensorInfo, 124908, 0.0f, 255.0f); auto kernel = MakeRandomTensor(kernelDesc, 891234, 0.0f, 255.0f); auto bias = MakeRandomTensor::Type>(biasDesc, 1028, 0.0f, 255.0f); std::vector actualOutput(outputTensorInfo.GetNumElements()); std::vector expectedOutput(outputTensorInfo.GetNumElements()); std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); armnn::DepthwiseConvolution2dQueueDescriptor data; armnn::WorkloadInfo info; armnn::ScopedTensorHandle weightsTensor(kernelDesc); armnn::ScopedTensorHandle biasTensor(biasDesc); AllocateAndCopyDataToITensorHandle(&weightsTensor, kernel.data()); AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data()); AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); data.m_Weight = &weightsTensor; data.m_Bias = &biasTensor; data.m_Parameters.m_StrideX = strideX; data.m_Parameters.m_StrideY = strideY; data.m_Parameters.m_PadLeft = padX; data.m_Parameters.m_PadRight = padX; data.m_Parameters.m_PadTop = padY; data.m_Parameters.m_PadBottom = padY; data.m_Parameters.m_BiasEnabled = true; data.m_Parameters.m_DataLayout = layout.GetDataLayout(); std::unique_ptr outputHandleRef = refTensorHandleFactory.CreateTensorHandle(outputTensorInfo); std::unique_ptr inputHandleRef = refTensorHandleFactory.CreateTensorHandle(inputTensorInfo); armnn::DepthwiseConvolution2dQueueDescriptor refData = data; armnn::WorkloadInfo refInfo = info; SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); std::unique_ptr workload = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, data, info); std::unique_ptr workloadRef = refWorkloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, refData, refInfo); outputHandleRef->Allocate(); inputHandleRef->Allocate(); inputHandle->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), input.data()); CopyDataToITensorHandle(inputHandleRef.get(), input.data()); ExecuteWorkload(*workload, memoryManager); workloadRef->PostAllocationConfigure(); workloadRef->Execute(); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); CopyDataFromITensorHandle(expectedOutput.data(), outputHandleRef.get()); return LayerTestResult(actualOutput, expectedOutput, outputHandle->GetShape(), outputTensorInfo.GetShape()); } // // Explicit template specializations // template LayerTestResult, 4> Convolution2d3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> Convolution2d3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> Convolution2d3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> Convolution2d3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> Convolution2d3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> Convolution2d2x3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> Convolution2d2x3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> Convolution2d2x3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> Convolution2d2x3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> Convolution2d2x3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test( armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout); template LayerTestResult, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test( armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout); template LayerTestResult, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test( armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout); template LayerTestResult, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test( armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout); template LayerTestResult, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test( armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout); template LayerTestResult, 4> DepthwiseConvolution2d3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> DepthwiseConvolution2d3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> DepthwiseConvolution2d3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> DepthwiseConvolution2d3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> DepthwiseConvolution2d3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test( armnn::IWorkloadFactory&, const armnn::IBackendInternal::IMemoryManagerSharedPtr&, const armnn::ITensorHandleFactory&, bool, armnn::DataLayout); template LayerTestResult, 4> DepthwiseConvolution2dMult4Test( armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout); template LayerTestResult, 4> DepthwiseConvolution2dMult4Test( armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout); template LayerTestResult, 4> DepthwiseConvolution2dMult2Test( armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout); template LayerTestResult, 4> DepthwiseConvolution2dMult2Test( armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout); // // Implementation functions // LayerTestResult SimpleConvolution2d3x5Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { return SimpleConvolution2d3x5TestCommon( workloadFactory, memoryManager, tensorHandleFactory, 0.f, 0, biasEnabled, layout); } LayerTestResult SimpleConvolution2d3x5Uint8Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { return SimpleConvolution2d3x5TestCommon( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout); } LayerTestResult SimpleConvolution2d3x3Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { return SimpleConvolution2d3x3TestCommon( workloadFactory, memoryManager, tensorHandleFactory, 0.f, 0, biasEnabled, layout); } LayerTestResult SimpleConvolution2d3x3NhwcTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled) { return SimpleConvolution2d3x3NhwcTestCommon( workloadFactory, memoryManager, tensorHandleFactory, 0.f, 0, biasEnabled, armnn::DataLayout::NHWC); } LayerTestResult SimpleConvolution2d3x3Stride2x2Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { return SimpleConvolution2d3x3Stride2x2TestCommon( workloadFactory, memoryManager, tensorHandleFactory, 0.f, 0, biasEnabled, layout); } LayerTestResult SimpleConvolution2d3x3Uint8Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { return SimpleConvolution2d3x3TestCommon( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout); } LayerTestResult SimpleConvolution2d3x5QSymm16Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { return SimpleConvolution2d3x5TestCommon( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout); } LayerTestResult SimpleConvolution2d3x3QSymm16Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { return SimpleConvolution2d3x3TestCommon( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout); } LayerTestResult Convolution2dAsymmetricPaddingTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, armnn::DataLayout layout) { return SimpleConvolution2dAsymmetricPaddingTestCommon( workloadFactory, memoryManager, tensorHandleFactory, layout, 0.0f, 0); } LayerTestResult Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, armnn::DataLayout layout) { return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon ( workloadFactory, memoryManager, tensorHandleFactory, layout, 0.0f, 0); } LayerTestResult Convolution1dTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled) { return Convolution1dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled); } LayerTestResult Convolution1dUint8Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled) { return Convolution1dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.1f, 128, biasEnabled); } LayerTestResult Convolution2dPerAxisQuantTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, const armnn::DataLayout layout) { using namespace armnn; const DataType inputType = DataType::QAsymmU8; const DataType kernelType = DataType::QSymmS8; const DataType biasType = DataType::Signed32; TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128); TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128); const std::vector quantScales{ 0.5f, 0.75f, 1.0f }; constexpr unsigned int quantDimension = 0; TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension); const std::vector biasQuantScales{ 0.25f, 0.375f, 0.5f }; TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension); std::vector inputData = { 138, 108, 138, 108, 138, 108 }; std::vector kernelData = { 1, 2, 1, 2, 1, 2 }; std::vector biasData = { 4, 4, 4 }; std::vector expectedOutputData = { 121, 118, 115, 121, 118, 115, 121, 118, 115 }; if (layout == DataLayout::NCHW) { PermuteTensorNhwcToNchw(inputInfo, inputData); PermuteTensorNhwcToNchw(kernelInfo, kernelData); PermuteTensorNhwcToNchw(outputInfo, expectedOutputData); } std::vector actualOutput(outputInfo.GetNumElements()); Convolution2dDescriptor descriptor; descriptor.m_StrideX = 1; descriptor.m_StrideY = 1; descriptor.m_PadLeft = 0; descriptor.m_PadRight = 0; descriptor.m_PadTop = 0; descriptor.m_PadBottom = 0; descriptor.m_BiasEnabled = true; descriptor.m_DataLayout = layout; std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo); WorkloadInfo workloadInfo; ScopedTensorHandle weightTensor(kernelInfo); ScopedTensorHandle biasTensor(biasInfo); AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data()); AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data()); Convolution2dQueueDescriptor queueDescriptor; queueDescriptor.m_Parameters = descriptor; queueDescriptor.m_Weight = &weightTensor; queueDescriptor.m_Bias = &biasTensor; AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get()); AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get()); std::unique_ptr workload= workloadFactory.CreateWorkload(armnn::LayerType::Convolution2d, queueDescriptor, workloadInfo); inputHandle->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), inputData.data()); ExecuteWorkload(*workload, memoryManager); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); return LayerTestResult(actualOutput, expectedOutputData, outputHandle->GetShape(), outputInfo.GetShape()); } LayerTestResult CompareConvolution2dTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, armnn::IWorkloadFactory& refWorkloadFactory, const armnn::ITensorHandleFactory& tensorHandleFactory, const armnn::ITensorHandleFactory& refTensorHandleFactory) { return CompareConvolution2dTestImpl( workloadFactory, memoryManager, refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory); } LayerTestResult DepthwiseConvolution2dTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { return DepthwiseConvolution2dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled, layout); } LayerTestResult DepthwiseConvolution2dDepthNhwcTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled) { return DepthwiseConvolution2dNhwcTestCommon( workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled); } LayerTestResult DepthwiseConvolution2dDepthMul1Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { return DepthwiseConvolution2dDepthMul1TestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled, layout); } LayerTestResult DepthwiseConvolution2dDepthMul64Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32); std::vector input = { 1.f, 2.f, 3.f, 4.f }; std::vector kernelData; std::vector singleDepthKernel{ 1.f, -1.f, -1.f, 1.f }; for (unsigned int i = 0; i < 64; ++i) { kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end()); } armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32); // permute from [O,1,H,W] --> [1,H,W,O] armnn::PermutationVector permutationVector {3,0,1,2}; kernelTensorInfo = armnnUtils::Permuted(kernelTensorInfo, permutationVector); std::vector kernelPermuted(kernelTensorInfo.GetNumElements()); armnnUtils::Permute(kernelTensorInfo.GetShape(), permutationVector, kernelData.data(), kernelPermuted.data(), GetDataTypeSize(kernelTensorInfo.GetDataType())); std::vector expectedOutputData(64, 0.f); armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32); return DepthwiseConvolution2dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, input, kernelPermuted, std::vector(), expectedOutputData, inputTensorInfo.GetShape(), kernelTensorInfo.GetShape(), outputTensorInfo.GetShape(), 0.f, 0, armnn::DataLayout::NCHW); } LayerTestResult DepthwiseConvolution2dAsymmetricTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { return DepthwiseConvolution2dAsymmetricTestCommon( workloadFactory, memoryManager, tensorHandleFactory, 0.0f, 0, biasEnabled, layout); } LayerTestResult DepthwiseConvolution2dUint8Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { return DepthwiseConvolution2dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout); } LayerTestResult DepthwiseConvolution2dDepthMul1Uint8Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { return DepthwiseConvolution2dDepthMul1TestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout); } LayerTestResult SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory) { return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon( workloadFactory, memoryManager, tensorHandleFactory, 0.f, 0, false); } LayerTestResult DepthwiseConvolution2dInt16Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { return DepthwiseConvolution2dTestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout); } LayerTestResult DepthwiseConvolution2dDepthMul1Int16Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout) { return DepthwiseConvolution2dDepthMul1TestImpl( workloadFactory, memoryManager, tensorHandleFactory, 0.5f, 50, biasEnabled, layout); } LayerTestResult DepthwiseConvolution2dPerAxisQuantTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, const armnn::DataLayout layout) { using namespace armnn; const DataType inputType = DataType::QAsymmU8; const DataType kernelType = DataType::QSymmS8; const DataType biasType = DataType::Signed32; TensorInfo inputInfo ({ 1, 3, 3, 2 }, inputType, 0.5f, 128); // N H W C TensorInfo outputInfo({ 1, 2, 2, 4 }, inputType, 1.0f, 128); // N H W C const std::vector quantScales{ 1.0f, 0.5f, 1.0f, 0.5f }; const unsigned int quantDimension = 3; TensorInfo kernelInfo({ 1, 2, 2, 4 }, kernelType, quantScales, quantDimension); // [1, H, W, I*M] const std::vector biasQuantScales{ 0.5f, 0.25f, 0.5f, 0.25f }; constexpr unsigned int biasQuantDimension = 0; TensorInfo biasInfo({ 4 }, biasType, biasQuantScales, biasQuantDimension); std::vector inputData = { 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130 }; std::vector kernelData = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; std::vector biasData = { 4, 4, 4, 4 }; std::vector expectedOutputData = { 132, 130, 134, 131, 132, 130, 134, 131, 132, 130, 134, 131, 132, 130, 134, 131 }; if (layout == DataLayout::NCHW) { PermuteTensorNhwcToNchw(inputInfo, inputData); PermuteTensorNhwcToNchw(outputInfo, expectedOutputData); } std::vector actualOutput(outputInfo.GetNumElements()); DepthwiseConvolution2dDescriptor descriptor; descriptor.m_StrideX = 1; descriptor.m_StrideY = 1; descriptor.m_PadLeft = 0; descriptor.m_PadRight = 0; descriptor.m_PadTop = 0; descriptor.m_PadBottom = 0; descriptor.m_DilationX = 1; descriptor.m_DilationY = 1; descriptor.m_BiasEnabled = true; descriptor.m_DataLayout = layout; std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo); std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo); WorkloadInfo workloadInfo; ScopedTensorHandle weightTensor(kernelInfo); ScopedTensorHandle biasTensor(biasInfo); AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data()); AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data()); DepthwiseConvolution2dQueueDescriptor queueDescriptor; queueDescriptor.m_Parameters = descriptor; queueDescriptor.m_Weight = &weightTensor; queueDescriptor.m_Bias = &biasTensor; AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get()); AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get()); std::unique_ptr workload = workloadFactory.CreateWorkload(armnn::LayerType::DepthwiseConvolution2d, queueDescriptor, workloadInfo); inputHandle->Allocate(); outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), inputData.data()); ExecuteWorkload(*workload, memoryManager); LayerTestResult ret(outputInfo); CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); return LayerTestResult(actualOutput, expectedOutputData, outputHandle->GetShape(), outputInfo.GetShape()); } LayerTestResult CompareDepthwiseConvolution2dFloatTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, armnn::IWorkloadFactory& refWorkloadFactory, const armnn::ITensorHandleFactory& tensorHandleFactory, const armnn::ITensorHandleFactory& refTensorHandleFactory, const armnn::DataLayout layout) { return CompareDepthwiseConvolution2dTestImpl( workloadFactory, memoryManager, refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory, layout); } LayerTestResult CompareDepthwiseConvolution2dUint8Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, armnn::IWorkloadFactory& refWorkloadFactory, const armnn::ITensorHandleFactory& tensorHandleFactory, const armnn::ITensorHandleFactory& refTensorHandleFactory, const armnn::DataLayout layout) { return CompareDepthwiseConvolution2dTestImpl( workloadFactory, memoryManager, refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory, layout); }