diff options
author | Matteo Martincigh <matteo.martincigh@arm.com> | 2018-12-18 09:26:39 +0000 |
---|---|---|
committer | Matteo Martincigh <matteo.martincigh@arm.com> | 2019-01-04 17:28:07 +0000 |
commit | 747ef82c88f9afe14a8b80b6b3b34118353e97f2 (patch) | |
tree | a29ac33b84fb96a41103a0a97327189495374cc9 /src/backends/reference | |
parent | 760892724d131c7da4b9baad05cddd49276ad6bb (diff) | |
download | armnn-747ef82c88f9afe14a8b80b6b3b34118353e97f2.tar.gz |
MLCE-77 Depthwise Convolution with depth multiplier > 1 doesn't work
* Unified ArmNN's weight format to [ M, I, H, W ] for the depthwise convolution
* Added conversion utilities to permute/reshape the weights as appropriate
when using CL and Neon backends
* Updated the reference implementation of the convolution
* Updated the relevant unit tests accordingly
!android-nn-driver:459
Change-Id: I07d0818efa9d1ca1e5dad82983aac1fe78eadb18
Diffstat (limited to 'src/backends/reference')
5 files changed, 49 insertions, 72 deletions
diff --git a/src/backends/reference/workloads/ConvImpl.hpp b/src/backends/reference/workloads/ConvImpl.hpp index 704bc368d2..5c07f57ec0 100644 --- a/src/backends/reference/workloads/ConvImpl.hpp +++ b/src/backends/reference/workloads/ConvImpl.hpp @@ -57,7 +57,6 @@ static void ConvImpl(ConvData data, float filterScale, int32_t filterOffset, const BiasType* biasData, - InputType* outputData, float outputScale, int32_t outputOffset, const TensorInfo& filterInfo, @@ -68,10 +67,10 @@ static void ConvImpl(ConvData data, throw InvalidArgumentException("Bias is enabled but the bias data is invalid"); } - const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); - const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]); - TensorBufferArrayView<InputType> output(outputInfo0.GetShape(), + TensorBufferArrayView<InputType> output(outputInfo.GetShape(), GetOutputTensorData<InputType>(0, data), data.m_Parameters.m_DataLayout); @@ -81,18 +80,18 @@ static void ConvImpl(ConvData data, const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); - unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1; - unsigned int channelsInput = filterInfo.GetShape()[channelsIndex]; - unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0]; + unsigned int depthMultiplier = depthwise ? filterInfo.GetShape()[0] : 1; + unsigned int inputChannels = depthwise ? filterInfo.GetShape()[1] : filterInfo.GetShape()[channelsIndex]; + unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : filterInfo.GetShape()[0]; - unsigned int batchSize = outputInfo0.GetShape()[0]; - unsigned int heightOutput = outputInfo0.GetShape()[heightIndex]; - unsigned int widthOutput = outputInfo0.GetShape()[widthIndex]; - unsigned int heightInput = inputInfo0.GetShape()[heightIndex]; - unsigned int widthInput = inputInfo0.GetShape()[widthIndex]; + unsigned int batchSize = outputInfo.GetShape()[0]; + unsigned int outputHeight = outputInfo.GetShape()[heightIndex]; + unsigned int outputWidth = outputInfo.GetShape()[widthIndex]; + unsigned int inputHeight = inputInfo.GetShape()[heightIndex]; + unsigned int inputWidth = inputInfo.GetShape()[widthIndex]; - unsigned int heightFilter = filterInfo.GetShape()[heightIndex]; - unsigned int widthFilter = filterInfo.GetShape()[widthIndex]; + unsigned int filterHeight = depthwise ? filterInfo.GetShape()[2] : filterInfo.GetShape()[heightIndex]; + unsigned int filterWidth = depthwise ? filterInfo.GetShape()[3] : filterInfo.GetShape()[widthIndex]; unsigned int paddingTop = data.m_Parameters.m_PadTop; unsigned int paddingLeft = data.m_Parameters.m_PadLeft; @@ -102,68 +101,56 @@ static void ConvImpl(ConvData data, // The world's least efficient convolution. for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++) { - for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++) + for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++) { - for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++) + for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++) { - for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++) + for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++) { // This loop goes over each output element. AccumulatorType sum = AccumulatorType(); // For depthwise, each output channel corresponds to exactly one input channel. // For normal, must loop over each input channel. - for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++) + for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++) { unsigned int depthwiseMultiplierIdx = 0; if (depthwise) { - cInput = cOutput / depthMult; - depthwiseMultiplierIdx = cOutput % depthMult; + cInput = cOutput / depthMultiplier; + depthwiseMultiplierIdx = cOutput % depthMultiplier; } - for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++) + for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++) { - for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++) + for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++) { // This loop goes over each input element for each output element. - unsigned int filterIndex; + unsigned int filterIndex = 0; // Since dimensionality of kernel depends on depthwiseness, so does index. if (depthwise) { - if (data.m_Parameters.m_DataLayout == DataLayout::NHWC) - { - filterIndex = depthwiseMultiplierIdx * heightFilter * widthFilter - * channelsInput + - yFilter * widthFilter * channelsInput + - xFilter * channelsInput + - cInput; - } - else - { - filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter - * channelsInput + - cInput * widthFilter * heightFilter + - yFilter * widthFilter + - xFilter; - } + filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels + + cInput * filterWidth * filterHeight + + yFilter * filterWidth + + xFilter; } else { if (data.m_Parameters.m_DataLayout == DataLayout::NHWC) { - filterIndex = cOutput * heightFilter * widthFilter * channelsInput + - yFilter * widthFilter * channelsInput + - xFilter * channelsInput + + filterIndex = cOutput * filterHeight * filterWidth * inputChannels + + yFilter * filterWidth * inputChannels + + xFilter * inputChannels + cInput; } else { - filterIndex = cOutput * widthFilter * heightFilter * channelsInput + - cInput * widthFilter * heightFilter + - yFilter * widthFilter + + filterIndex = cOutput * filterWidth * filterHeight * inputChannels + + cInput * filterWidth * filterHeight + + yFilter * filterWidth + xFilter; } } @@ -177,8 +164,8 @@ static void ConvImpl(ConvData data, AccumulatorType inputValue; // Check if we're in the padding. - if (yInput < paddingTop || yInput >= heightInput + paddingTop || - xInput < paddingLeft || xInput >= widthInput + paddingLeft ) + if (yInput < paddingTop || yInput >= inputHeight + paddingTop || + xInput < paddingLeft || xInput >= inputWidth + paddingLeft ) { inputValue = AccumulatorType(); } @@ -188,17 +175,17 @@ static void ConvImpl(ConvData data, if (data.m_Parameters.m_DataLayout == DataLayout::NHWC) { - inputIndex = batchIdx * heightInput * widthInput * channelsInput + - (yInput - paddingTop) * widthInput * channelsInput + - (xInput - paddingLeft) * channelsInput + + inputIndex = batchIdx * inputHeight * inputWidth * inputChannels + + (yInput - paddingTop) * inputWidth * inputChannels + + (xInput - paddingLeft) * inputChannels + cInput; } else { - inputIndex = batchIdx * widthInput * heightInput * channelsInput + - widthInput * heightInput * cInput + - widthInput * (yInput - paddingTop) + + inputIndex = batchIdx * inputWidth * inputHeight * inputChannels + + inputWidth * inputHeight * cInput + + inputWidth * (yInput - paddingTop) + xInput - paddingLeft; } diff --git a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp index 20905646d7..7b298df967 100644 --- a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp +++ b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp @@ -23,15 +23,13 @@ void RefConvolution2dFloat32Workload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dFloat32Workload_Execute"); - float* outputData = GetOutputTensorDataFloat(0, m_Data); const float* inputData = GetInputTensorDataFloat(0, m_Data); - const float* weightData = m_Weight->template GetConstTensor<float>(); - const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor<float>() : nullptr; + const float* filterData = m_Weight->template GetConstTensor<float>(); + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<float>() : nullptr; const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); ConvImpl<armnn::Convolution2dQueueDescriptor, float, float, float>( - m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo); + m_Data, inputData, 0.0f, 0, filterData, 0.0f, 0, biasData, 0.0f, 0, filterInfo); } } //namespace armnn diff --git a/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp b/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp index 881e9bf6b0..af2c7ad0d6 100644 --- a/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp +++ b/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp @@ -27,10 +27,7 @@ void RefConvolution2dUint8Workload::Execute() const const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); const uint8_t* weightsData = m_Weight->template GetConstTensor<uint8_t>(); const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get()); - const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor<int32_t>() : - nullptr; - uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); + const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<int32_t>() : nullptr; const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); @@ -39,7 +36,7 @@ void RefConvolution2dUint8Workload::Execute() const inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), biasData, - outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo); + outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo); } } //namespace armnn diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp index e89013b9bd..756e958753 100644 --- a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp @@ -23,15 +23,13 @@ void RefDepthwiseConvolution2dFloat32Workload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dFloat32Workload_Execute"); - float* outputData = GetOutputTensorDataFloat(0, m_Data); const float* inputData = GetInputTensorDataFloat(0, m_Data); const float* weightData = m_Weight->template GetConstTensor<float>(); - const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor<float>() : nullptr; + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<float>() : nullptr; const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); ConvImpl<armnn::DepthwiseConvolution2dQueueDescriptor, float, float, float> - (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo, true); + (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, 0.0f, 0, filterInfo, true); } } //namespace armnn diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp index e8e501d6ae..629b729ea6 100644 --- a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp @@ -28,10 +28,7 @@ void RefDepthwiseConvolution2dUint8Workload::Execute() const const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); const uint8_t* weightsData = m_Weight->template GetConstTensor<uint8_t>(); const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get()); - const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor<int32_t>() : - nullptr; - uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); + const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor<int32_t>() : nullptr; const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); @@ -40,7 +37,7 @@ void RefDepthwiseConvolution2dUint8Workload::Execute() const inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), biasData, - outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true); + outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true); } } //namespace armnn |