From d8df0260ced49a2796ff70e96284cf00eb316bcc Mon Sep 17 00:00:00 2001 From: Teresa Charlin Date: Mon, 11 Nov 2019 12:28:15 +0000 Subject: IVGCVSW-4079 Add support of per-axis quantization to DepthwiseConvolution2d !android-nn-driver:2260 Signed-off-by: Teresa Charlin Change-Id: Iad93c1940568ffa65ed314c8871ea66caf4f9e4a --- src/backends/reference/workloads/ConvImpl.hpp | 177 -------------------------- 1 file changed, 177 deletions(-) (limited to 'src/backends/reference/workloads') diff --git a/src/backends/reference/workloads/ConvImpl.hpp b/src/backends/reference/workloads/ConvImpl.hpp index 3551ba8f90..7dba760d87 100644 --- a/src/backends/reference/workloads/ConvImpl.hpp +++ b/src/backends/reference/workloads/ConvImpl.hpp @@ -50,183 +50,6 @@ private: int32_t m_RightShift; }; -/// An implementation shared by normal and depthwise convolution. -template -static void ConvImpl(ConvData data, - const InputType* inputData, - float inputScale, - int32_t inputOffset, - const InputType* filterData, - float filterScale, - int32_t filterOffset, - const BiasType* biasData, - float outputScale, - int32_t outputOffset, - const TensorInfo& filterInfo, - bool depthwise = false) -{ - if (data.m_Parameters.m_BiasEnabled && !biasData) - { - throw InvalidArgumentException("Bias is enabled but the bias data is invalid"); - } - - const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]); - const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]); - - TensorBufferArrayView output(outputInfo.GetShape(), - GetOutputTensorData(0, data), - data.m_Parameters.m_DataLayout); - - const armnnUtils::DataLayoutIndexed dataLayoutIndexed(data.m_Parameters.m_DataLayout); - - const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex(); - const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex(); - const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex(); - - unsigned int depthMultiplier = depthwise ? filterInfo.GetShape()[0] : 1; - unsigned int inputChannels = depthwise ? filterInfo.GetShape()[1] : filterInfo.GetShape()[channelsIndex]; - unsigned int outputChannels = depthwise ? inputChannels * depthMultiplier : filterInfo.GetShape()[0]; - - unsigned int batchSize = outputInfo.GetShape()[0]; - unsigned int outputHeight = outputInfo.GetShape()[heightIndex]; - unsigned int outputWidth = outputInfo.GetShape()[widthIndex]; - unsigned int inputHeight = inputInfo.GetShape()[heightIndex]; - unsigned int inputWidth = inputInfo.GetShape()[widthIndex]; - - unsigned int filterHeight = depthwise ? filterInfo.GetShape()[2] : filterInfo.GetShape()[heightIndex]; - unsigned int filterWidth = depthwise ? filterInfo.GetShape()[3] : filterInfo.GetShape()[widthIndex]; - - unsigned int paddingTop = data.m_Parameters.m_PadTop; - unsigned int paddingLeft = data.m_Parameters.m_PadLeft; - unsigned int xStride = data.m_Parameters.m_StrideX; - unsigned int yStride = data.m_Parameters.m_StrideY; - unsigned int xDilation = data.m_Parameters.m_DilationX; - unsigned int yDilation = data.m_Parameters.m_DilationY; - - // The world's least efficient convolution. - for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++) - { - for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++) - { - for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++) - { - for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++) - { - // This loop goes over each output element. - AccumulatorType sum = AccumulatorType(); - - // For depthwise, each output channel corresponds to exactly one input channel. - // For normal, must loop over each input channel. - for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++) - { - unsigned int depthwiseMultiplierIdx = 0; - if (depthwise) - { - cInput = cOutput / depthMultiplier; - depthwiseMultiplierIdx = cOutput % depthMultiplier; - } - - for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++) - { - for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++) - { - // This loop goes over each input element for each output element. - - unsigned int filterIndex = 0; - - // Since dimensionality of kernel depends on depthwiseness, so does index. - if (depthwise) - { - filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels + - cInput * filterWidth * filterHeight + - yFilter * filterWidth + - xFilter; - } - else - { - if (data.m_Parameters.m_DataLayout == DataLayout::NHWC) - { - filterIndex = cOutput * filterHeight * filterWidth * inputChannels + - yFilter * filterWidth * inputChannels + - xFilter * inputChannels + - cInput; - } - else - { - filterIndex = cOutput * filterWidth * filterHeight * inputChannels + - cInput * filterWidth * filterHeight + - yFilter * filterWidth + - xFilter; - } - } - - AccumulatorType filterValue = filterData[filterIndex] - - boost::numeric_cast(filterOffset); - - unsigned int yInput = yOutput * yStride + yFilter * yDilation; - unsigned int xInput = xOutput * xStride + xFilter * xDilation; - - AccumulatorType inputValue; - - // Check if we're in the padding. - if (yInput < paddingTop || yInput >= inputHeight + paddingTop || - xInput < paddingLeft || xInput >= inputWidth + paddingLeft ) - { - inputValue = AccumulatorType(); - } - else - { - unsigned int inputIndex; - - if (data.m_Parameters.m_DataLayout == DataLayout::NHWC) - { - inputIndex = batchIdx * inputHeight * inputWidth * inputChannels + - (yInput - paddingTop) * inputWidth * inputChannels + - (xInput - paddingLeft) * inputChannels + - cInput; - - } - else - { - inputIndex = batchIdx * inputWidth * inputHeight * inputChannels + - inputWidth * inputHeight * cInput + - inputWidth * (yInput - paddingTop) + - xInput - paddingLeft; - } - - inputValue = inputData[inputIndex] - - boost::numeric_cast(inputOffset); - - } - sum += filterValue * inputValue; - } - } - } - - if (data.m_Parameters.m_BiasEnabled) - { - sum += biasData[cOutput]; - } - - if (outputScale != 0.0f) - { - float multiplier = (inputScale * filterScale) / outputScale; - // Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent - // with the AndroidNN CPU implementation. This should be (roughly) equivalent to: - // sum = std::round(multiplier * sum + outputOffset); - sum = boost::numeric_cast( - QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast(sum)) - + boost::numeric_cast(outputOffset); - sum = std::min(std::max(sum, 0), 255); - } - - output.Get(batchIdx, cOutput, yOutput, xOutput) = boost::numeric_cast(sum); - } - } - } - } -} - void Convolve(const TensorShape& rInputShape, Decoder& rInputDecoder, const TensorShape& rOutputShape, -- cgit v1.2.1