From d8df0260ced49a2796ff70e96284cf00eb316bcc Mon Sep 17 00:00:00 2001 From: Teresa Charlin Date: Mon, 11 Nov 2019 12:28:15 +0000 Subject: IVGCVSW-4079 Add support of per-axis quantization to DepthwiseConvolution2d !android-nn-driver:2260 Signed-off-by: Teresa Charlin Change-Id: Iad93c1940568ffa65ed314c8871ea66caf4f9e4a --- src/backends/backendsCommon/WorkloadData.cpp | 12 +- .../test/layerTests/Conv2dTestImpl.cpp | 108 +++++++++++++ .../test/layerTests/Conv2dTestImpl.hpp | 5 + src/backends/reference/RefLayerSupport.cpp | 25 ++- src/backends/reference/test/RefLayerTests.cpp | 6 +- src/backends/reference/workloads/ConvImpl.hpp | 177 --------------------- 6 files changed, 147 insertions(+), 186 deletions(-) diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index 6222ba4800..0a2b27afbf 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -1210,18 +1210,24 @@ void DepthwiseConvolution2dQueueDescriptor::Validate(const WorkloadInfo& workloa numWeightInputChannels % numWeightChannelMultiplier)); } - ValidateTensorDataTypesMatch(inputTensorInfo, weightTensorInfo, descriptorName, "input", "weight"); + ValidateWeightDataType(inputTensorInfo, weightTensorInfo, descriptorName); + Optional optionalBiasTensorInfo; if (m_Parameters.m_BiasEnabled) { ValidatePointer(m_Bias, descriptorName, "bias"); - const TensorInfo& biasTensorInfo = m_Bias->GetTensorInfo(); - ValidateTensorNumDimensions(biasTensorInfo, descriptorName, 1, "bias"); + optionalBiasTensorInfo = MakeOptional(m_Bias->GetTensorInfo()); + const TensorInfo& biasTensorInfo = optionalBiasTensorInfo.value(); ValidateBiasTensorQuantization(biasTensorInfo, inputTensorInfo, weightTensorInfo, descriptorName); ValidateTensorDataType(biasTensorInfo, GetBiasDataType(inputTensorInfo.GetDataType()), descriptorName, "bias"); } + ValidatePerAxisQuantization(inputTensorInfo, + outputTensorInfo, + weightTensorInfo, + optionalBiasTensorInfo, + descriptorName); std::vector supportedTypes = { diff --git a/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp index 5fac09f5b3..22e7e29db7 100644 --- a/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp +++ b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.cpp @@ -3260,6 +3260,114 @@ LayerTestResult DepthwiseConvolution2dDepthMul1Int16Test( workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout); } +LayerTestResult DepthwiseConvolution2dPerAxisQuantTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::DataLayout layout) +{ + using namespace armnn; + + const DataType inputType = DataType::QuantisedAsymm8; + const DataType kernelType = DataType::QuantizedSymm8PerAxis; + const DataType biasType = DataType::Signed32; + + TensorInfo inputInfo ({ 1, 3, 3, 2 }, inputType, 0.5f, 128); // N H W C + TensorInfo outputInfo({ 1, 2, 2, 4 }, inputType, 1.0f, 128); // N H W C + + const std::vector quantScales{ 1.0f, 0.5f, 1.0f, 0.5f }; + const unsigned int quantDimension = 0; + TensorInfo kernelInfo({ 2, 2, 2, 2 }, kernelType, quantScales, quantDimension); // M I H W + + const std::vector biasQuantScales{ 0.5f, 0.25f, 0.5f, 0.25f }; + constexpr unsigned int biasQuantDimension = 0; + TensorInfo biasInfo({ 4 }, biasType, biasQuantScales, biasQuantDimension); + + std::vector inputData = + { + 129, 130, + 129, 130, + 129, 130, + 129, 130, + 129, 130, + 129, 130, + 129, 130, + 129, 130, + 129, 130 + }; + + std::vector kernelData = + { + 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1, 1, 1 + }; + + std::vector biasData = + { + 4, 4, 4, 4 + }; + + std::vector expectedOutputData = + { + 132, 130, 134, 131, + 132, 130, 134, 131, + 132, 130, 134, 131, + 132, 130, 134, 131 + }; + + if (layout == DataLayout::NCHW) + { + PermuteTensorNhwcToNchw(inputInfo, inputData); + PermuteTensorNhwcToNchw(outputInfo, expectedOutputData); + } + + DepthwiseConvolution2dDescriptor descriptor; + descriptor.m_StrideX = 1; + descriptor.m_StrideY = 1; + descriptor.m_PadLeft = 0; + descriptor.m_PadRight = 0; + descriptor.m_PadTop = 0; + descriptor.m_PadBottom = 0; + descriptor.m_DilationX = 1; + descriptor.m_DilationY = 1; + descriptor.m_BiasEnabled = true; + descriptor.m_DataLayout = layout; + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputInfo); + + WorkloadInfo workloadInfo; + ScopedCpuTensorHandle weightTensor(kernelInfo); + ScopedCpuTensorHandle biasTensor(biasInfo); + + AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data()); + AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data()); + + DepthwiseConvolution2dQueueDescriptor queueDescriptor; + queueDescriptor.m_Parameters = descriptor; + queueDescriptor.m_Weight = &weightTensor; + queueDescriptor.m_Bias = &biasTensor; + + AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get()); + AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateDepthwiseConvolution2d(queueDescriptor, workloadInfo); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), inputData.data()); + + ExecuteWorkload(*workload, memoryManager); + + LayerTestResult ret(outputInfo); + + CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get()); + ret.outputExpected = MakeTensor(outputInfo, expectedOutputData); + + return ret; +} + LayerTestResult CompareDepthwiseConvolution2dFloatTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, diff --git a/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.hpp index 3aac975b3b..69bfa97281 100644 --- a/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.hpp +++ b/src/backends/backendsCommon/test/layerTests/Conv2dTestImpl.hpp @@ -210,6 +210,11 @@ LayerTestResult DepthwiseConvolution2dDepthMul1Int16Test( bool biasEnabled, const armnn::DataLayout layout); +LayerTestResult DepthwiseConvolution2dPerAxisQuantTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::DataLayout layout); + LayerTestResult CompareDepthwiseConvolution2dFloatTest( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp index e98af7097b..3507162de8 100644 --- a/src/backends/reference/RefLayerSupport.cpp +++ b/src/backends/reference/RefLayerSupport.cpp @@ -565,14 +565,29 @@ bool RefLayerSupport::IsDepthwiseConvolutionSupported(const TensorInfo& input, supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported, "Reference DepthwiseConvolution2d: output is not a supported type."); - supported &= CheckSupportRule(TypeAnyOf(weights, supportedTypes), reasonIfUnsupported, - "Reference DepthwiseConvolution2d: weights is not a supported type."); - supported &= CheckSupportRule(TypesAreEqual(input, output), reasonIfUnsupported, "Reference DepthwiseConvolution2d: input and output types mismatched."); - supported &= CheckSupportRule(TypesAreEqual(input, weights), reasonIfUnsupported, - "Reference DepthwiseConvolution2d: input and weights types mismatched."); + const DataType inputType = input.GetDataType(); + if (inputType == DataType::QuantisedAsymm8) + { + std::array supportedWeightTypes = + { + DataType::QuantisedAsymm8, + DataType::QuantizedSymm8PerAxis + }; + + supported &= CheckSupportRule(TypeAnyOf(weights, supportedWeightTypes), reasonIfUnsupported, + "Reference convolution2d: weights type not supported for quantized input."); + } + else + { + supported &= CheckSupportRule(TypeAnyOf(weights, supportedTypes), reasonIfUnsupported, + "Reference DepthwiseConvolution2d: weights is not a supported type."); + + supported &= CheckSupportRule(TypesAreEqual(input, weights), reasonIfUnsupported, + "Reference DepthwiseConvolution2d: input and weights types mismatched."); + } if (biases.has_value()) { diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp index cd5c9273f7..c5986e0b12 100644 --- a/src/backends/reference/test/RefLayerTests.cpp +++ b/src/backends/reference/test/RefLayerTests.cpp @@ -159,7 +159,6 @@ ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dUint8, DataLayout::NCHW) ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dQSymm16, DepthwiseConvolution2dInt16Test, true, DataLayout::NCHW) -// NHWC Depthwise Convolution ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dNhwc, DepthwiseConvolution2dTest, true, DataLayout::NHWC) ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dUint8Nhwc, DepthwiseConvolution2dUint8Test, true, DataLayout::NHWC) @@ -263,6 +262,11 @@ ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetricNhwc, ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul64, DepthwiseConvolution2dDepthMul64Test); +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dPerAxisQuantTestNchw, DepthwiseConvolution2dPerAxisQuantTest, + DataLayout::NCHW); +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dPerAxisQuantTestNhwc, DepthwiseConvolution2dPerAxisQuantTest, + DataLayout::NHWC); + // Pooling //MaxPooling ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2, SimpleMaxPooling2dSize2x2Stride2x2Test, false) diff --git a/src/backends/reference/workloads/ConvImpl.hpp b/src/backends/reference/workloads/ConvImpl.hpp index 3551ba8f90..7dba760d87 100644 --- a/src/backends/reference/workloads/ConvImpl.hpp +++ b/src/backends/reference/workloads/ConvImpl.hpp @@ -50,183 +50,6 @@ private: int32_t m_RightShift; }; -/// An implementation shared by normal and depthwise convolution. -template -static void ConvImpl(ConvData data, - const InputType* inputData, - float inputScale, - int32_t inputOffset, - const InputType* filterData, - float filterScale, - int32_t filterOffset, - const BiasType* biasData, - float outputScale, - int32_t outputOffset, - const TensorInfo& filterInfo, - bool depthwise = false) -{ - if (data.m_Parameters.m_BiasEnabled && !biasData) - { - throw InvalidArgumentException("Bias is enabled but the bias data is invalid"); - } - - const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]); - - TensorBufferArrayView output(outputInfo.GetShape(), - GetOutputTensorData(0, data), - data.m_Parameters.m_DataLayout); - - const armnnUtils::DataLayoutIndexed dataLayoutIndexed(data.m_Parameters.m_DataLayout); - - const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex(); - const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); - const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); - - unsigned int depthMultiplier = depthwise ? filterInfo.GetShape()[0] : 1; - unsigned int inputChannels = depthwise ? filterInfo.GetShape()[1] : filterInfo.GetShape()[channelsIndex]; - unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : filterInfo.GetShape()[0]; - - unsigned int batchSize = outputInfo.GetShape()[0]; - unsigned int outputHeight = outputInfo.GetShape()[heightIndex]; - unsigned int outputWidth = outputInfo.GetShape()[widthIndex]; - unsigned int inputHeight = inputInfo.GetShape()[heightIndex]; - unsigned int inputWidth = inputInfo.GetShape()[widthIndex]; - - unsigned int filterHeight = depthwise ? filterInfo.GetShape()[2] : filterInfo.GetShape()[heightIndex]; - unsigned int filterWidth = depthwise ? filterInfo.GetShape()[3] : filterInfo.GetShape()[widthIndex]; - - unsigned int paddingTop = data.m_Parameters.m_PadTop; - unsigned int paddingLeft = data.m_Parameters.m_PadLeft; - unsigned int xStride = data.m_Parameters.m_StrideX; - unsigned int yStride = data.m_Parameters.m_StrideY; - unsigned int xDilation = data.m_Parameters.m_DilationX; - unsigned int yDilation = data.m_Parameters.m_DilationY; - - // The world's least efficient convolution. - for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++) - { - for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++) - { - for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++) - { - for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++) - { - // This loop goes over each output element. - AccumulatorType sum = AccumulatorType(); - - // For depthwise, each output channel corresponds to exactly one input channel. - // For normal, must loop over each input channel. - for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++) - { - unsigned int depthwiseMultiplierIdx = 0; - if (depthwise) - { - cInput = cOutput / depthMultiplier; - depthwiseMultiplierIdx = cOutput % depthMultiplier; - } - - for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++) - { - for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++) - { - // This loop goes over each input element for each output element. - - unsigned int filterIndex = 0; - - // Since dimensionality of kernel depends on depthwiseness, so does index. - if (depthwise) - { - filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels + - cInput * filterWidth * filterHeight + - yFilter * filterWidth + - xFilter; - } - else - { - if (data.m_Parameters.m_DataLayout == DataLayout::NHWC) - { - filterIndex = cOutput * filterHeight * filterWidth * inputChannels + - yFilter * filterWidth * inputChannels + - xFilter * inputChannels + - cInput; - } - else - { - filterIndex = cOutput * filterWidth * filterHeight * inputChannels + - cInput * filterWidth * filterHeight + - yFilter * filterWidth + - xFilter; - } - } - - AccumulatorType filterValue = filterData[filterIndex] - - boost::numeric_cast(filterOffset); - - unsigned int yInput = yOutput * yStride + yFilter * yDilation; - unsigned int xInput = xOutput * xStride + xFilter * xDilation; - - AccumulatorType inputValue; - - // Check if we're in the padding. - if (yInput < paddingTop || yInput >= inputHeight + paddingTop || - xInput < paddingLeft || xInput >= inputWidth + paddingLeft ) - { - inputValue = AccumulatorType(); - } - else - { - unsigned int inputIndex; - - if (data.m_Parameters.m_DataLayout == DataLayout::NHWC) - { - inputIndex = batchIdx * inputHeight * inputWidth * inputChannels + - (yInput - paddingTop) * inputWidth * inputChannels + - (xInput - paddingLeft) * inputChannels + - cInput; - - } - else - { - inputIndex = batchIdx * inputWidth * inputHeight * inputChannels + - inputWidth * inputHeight * cInput + - inputWidth * (yInput - paddingTop) + - xInput - paddingLeft; - } - - inputValue = inputData[inputIndex] - - boost::numeric_cast(inputOffset); - - } - sum += filterValue * inputValue; - } - } - } - - if (data.m_Parameters.m_BiasEnabled) - { - sum += biasData[cOutput]; - } - - if (outputScale != 0.0f) - { - float multiplier = (inputScale * filterScale) / outputScale; - // Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent - // with the AndroidNN CPU implementation. This should be (roughly) equivalent to: - // sum = std::round(multiplier * sum + outputOffset); - sum = boost::numeric_cast( - QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast(sum)) - + boost::numeric_cast(outputOffset); - sum = std::min(std::max(sum, 0), 255); - } - - output.Get(batchIdx, cOutput, yOutput, xOutput) = boost::numeric_cast(sum); - } - } - } - } -} - void Convolve(const TensorShape& rInputShape, Decoder& rInputDecoder, const TensorShape& rOutputShape, -- cgit v1.2.1