From 747ef82c88f9afe14a8b80b6b3b34118353e97f2 Mon Sep 17 00:00:00 2001 From: Matteo Martincigh Date: Tue, 18 Dec 2018 09:26:39 +0000 Subject: MLCE-77 Depthwise Convolution with depth multiplier > 1 doesn't work * Unified ArmNN's weight format to [ M, I, H, W ] for the depthwise convolution * Added conversion utilities to permute/reshape the weights as appropriate when using CL and Neon backends * Updated the reference implementation of the convolution * Updated the relevant unit tests accordingly !android-nn-driver:459 Change-Id: I07d0818efa9d1ca1e5dad82983aac1fe78eadb18 --- src/backends/reference/workloads/ConvImpl.hpp | 93 ++++++++++------------ .../workloads/RefConvolution2dFloat32Workload.cpp | 8 +- .../workloads/RefConvolution2dUint8Workload.cpp | 7 +- .../RefDepthwiseConvolution2dFloat32Workload.cpp | 6 +- .../RefDepthwiseConvolution2dUint8Workload.cpp | 7 +- 5 files changed, 49 insertions(+), 72 deletions(-) (limited to 'src/backends/reference/workloads') diff --git a/src/backends/reference/workloads/ConvImpl.hpp b/src/backends/reference/workloads/ConvImpl.hpp index 704bc368d2..5c07f57ec0 100644 --- a/src/backends/reference/workloads/ConvImpl.hpp +++ b/src/backends/reference/workloads/ConvImpl.hpp @@ -57,7 +57,6 @@ static void ConvImpl(ConvData data, float filterScale, int32_t filterOffset, const BiasType* biasData, - InputType* outputData, float outputScale, int32_t outputOffset, const TensorInfo& filterInfo, @@ -68,10 +67,10 @@ static void ConvImpl(ConvData data, throw InvalidArgumentException("Bias is enabled but the bias data is invalid"); } - const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); - const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]); + const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]); - TensorBufferArrayView output(outputInfo0.GetShape(), + TensorBufferArrayView output(outputInfo.GetShape(), GetOutputTensorData(0, data), data.m_Parameters.m_DataLayout); @@ -81,18 +80,18 @@ static void ConvImpl(ConvData data, const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); - unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1; - unsigned int channelsInput = filterInfo.GetShape()[channelsIndex]; - unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0]; + unsigned int depthMultiplier = depthwise ? filterInfo.GetShape()[0] : 1; + unsigned int inputChannels = depthwise ? filterInfo.GetShape()[1] : filterInfo.GetShape()[channelsIndex]; + unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : filterInfo.GetShape()[0]; - unsigned int batchSize = outputInfo0.GetShape()[0]; - unsigned int heightOutput = outputInfo0.GetShape()[heightIndex]; - unsigned int widthOutput = outputInfo0.GetShape()[widthIndex]; - unsigned int heightInput = inputInfo0.GetShape()[heightIndex]; - unsigned int widthInput = inputInfo0.GetShape()[widthIndex]; + unsigned int batchSize = outputInfo.GetShape()[0]; + unsigned int outputHeight = outputInfo.GetShape()[heightIndex]; + unsigned int outputWidth = outputInfo.GetShape()[widthIndex]; + unsigned int inputHeight = inputInfo.GetShape()[heightIndex]; + unsigned int inputWidth = inputInfo.GetShape()[widthIndex]; - unsigned int heightFilter = filterInfo.GetShape()[heightIndex]; - unsigned int widthFilter = filterInfo.GetShape()[widthIndex]; + unsigned int filterHeight = depthwise ? filterInfo.GetShape()[2] : filterInfo.GetShape()[heightIndex]; + unsigned int filterWidth = depthwise ? filterInfo.GetShape()[3] : filterInfo.GetShape()[widthIndex]; unsigned int paddingTop = data.m_Parameters.m_PadTop; unsigned int paddingLeft = data.m_Parameters.m_PadLeft; @@ -102,68 +101,56 @@ static void ConvImpl(ConvData data, // The world's least efficient convolution. for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++) { - for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++) + for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++) { - for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++) + for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++) { - for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++) + for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++) { // This loop goes over each output element. AccumulatorType sum = AccumulatorType(); // For depthwise, each output channel corresponds to exactly one input channel. // For normal, must loop over each input channel. - for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++) + for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++) { unsigned int depthwiseMultiplierIdx = 0; if (depthwise) { - cInput = cOutput / depthMult; - depthwiseMultiplierIdx = cOutput % depthMult; + cInput = cOutput / depthMultiplier; + depthwiseMultiplierIdx = cOutput % depthMultiplier; } - for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++) + for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++) { - for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++) + for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++) { // This loop goes over each input element for each output element. - unsigned int filterIndex; + unsigned int filterIndex = 0; // Since dimensionality of kernel depends on depthwiseness, so does index. if (depthwise) { - if (data.m_Parameters.m_DataLayout == DataLayout::NHWC) - { - filterIndex = depthwiseMultiplierIdx * heightFilter * widthFilter - * channelsInput + - yFilter * widthFilter * channelsInput + - xFilter * channelsInput + - cInput; - } - else - { - filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter - * channelsInput + - cInput * widthFilter * heightFilter + - yFilter * widthFilter + - xFilter; - } + filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels + + cInput * filterWidth * filterHeight + + yFilter * filterWidth + + xFilter; } else { if (data.m_Parameters.m_DataLayout == DataLayout::NHWC) { - filterIndex = cOutput * heightFilter * widthFilter * channelsInput + - yFilter * widthFilter * channelsInput + - xFilter * channelsInput + + filterIndex = cOutput * filterHeight * filterWidth * inputChannels + + yFilter * filterWidth * inputChannels + + xFilter * inputChannels + cInput; } else { - filterIndex = cOutput * widthFilter * heightFilter * channelsInput + - cInput * widthFilter * heightFilter + - yFilter * widthFilter + + filterIndex = cOutput * filterWidth * filterHeight * inputChannels + + cInput * filterWidth * filterHeight + + yFilter * filterWidth + xFilter; } } @@ -177,8 +164,8 @@ static void ConvImpl(ConvData data, AccumulatorType inputValue; // Check if we're in the padding. - if (yInput < paddingTop || yInput >= heightInput + paddingTop || - xInput < paddingLeft || xInput >= widthInput + paddingLeft ) + if (yInput < paddingTop || yInput >= inputHeight + paddingTop || + xInput < paddingLeft || xInput >= inputWidth + paddingLeft ) { inputValue = AccumulatorType(); } @@ -188,17 +175,17 @@ static void ConvImpl(ConvData data, if (data.m_Parameters.m_DataLayout == DataLayout::NHWC) { - inputIndex = batchIdx * heightInput * widthInput * channelsInput + - (yInput - paddingTop) * widthInput * channelsInput + - (xInput - paddingLeft) * channelsInput + + inputIndex = batchIdx * inputHeight * inputWidth * inputChannels + + (yInput - paddingTop) * inputWidth * inputChannels + + (xInput - paddingLeft) * inputChannels + cInput; } else { - inputIndex = batchIdx * widthInput * heightInput * channelsInput + - widthInput * heightInput * cInput + - widthInput * (yInput - paddingTop) + + inputIndex = batchIdx * inputWidth * inputHeight * inputChannels + + inputWidth * inputHeight * cInput + + inputWidth * (yInput - paddingTop) + xInput - paddingLeft; } diff --git a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp index 20905646d7..7b298df967 100644 --- a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp +++ b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.cpp @@ -23,15 +23,13 @@ void RefConvolution2dFloat32Workload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dFloat32Workload_Execute"); - float* outputData = GetOutputTensorDataFloat(0, m_Data); const float* inputData = GetInputTensorDataFloat(0, m_Data); - const float* weightData = m_Weight->template GetConstTensor(); - const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor() : nullptr; + const float* filterData = m_Weight->template GetConstTensor(); + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor() : nullptr; const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); ConvImpl( - m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo); + m_Data, inputData, 0.0f, 0, filterData, 0.0f, 0, biasData, 0.0f, 0, filterInfo); } } //namespace armnn diff --git a/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp b/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp index 881e9bf6b0..af2c7ad0d6 100644 --- a/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp +++ b/src/backends/reference/workloads/RefConvolution2dUint8Workload.cpp @@ -27,10 +27,7 @@ void RefConvolution2dUint8Workload::Execute() const const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); const uint8_t* weightsData = m_Weight->template GetConstTensor(); const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get()); - const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor() : - nullptr; - uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); + const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor() : nullptr; const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); @@ -39,7 +36,7 @@ void RefConvolution2dUint8Workload::Execute() const inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), biasData, - outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo); + outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo); } } //namespace armnn diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp index e89013b9bd..756e958753 100644 --- a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.cpp @@ -23,15 +23,13 @@ void RefDepthwiseConvolution2dFloat32Workload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dFloat32Workload_Execute"); - float* outputData = GetOutputTensorDataFloat(0, m_Data); const float* inputData = GetInputTensorDataFloat(0, m_Data); const float* weightData = m_Weight->template GetConstTensor(); - const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor() : nullptr; + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor() : nullptr; const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); ConvImpl - (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo, true); + (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, 0.0f, 0, filterInfo, true); } } //namespace armnn diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp index e8e501d6ae..629b729ea6 100644 --- a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.cpp @@ -28,10 +28,7 @@ void RefDepthwiseConvolution2dUint8Workload::Execute() const const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); const uint8_t* weightsData = m_Weight->template GetConstTensor(); const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get()); - const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? - m_Bias->template GetConstTensor() : - nullptr; - uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); + const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->template GetConstTensor() : nullptr; const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); @@ -40,7 +37,7 @@ void RefDepthwiseConvolution2dUint8Workload::Execute() const inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), biasData, - outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true); + outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true); } } //namespace armnn -- cgit v1.2.1