From d8df0260ced49a2796ff70e96284cf00eb316bcc Mon Sep 17 00:00:00 2001
From: Teresa Charlin <teresa.charlinreyes@arm.com>
Date: Mon, 11 Nov 2019 12:28:15 +0000
Subject: IVGCVSW-4079 Add support of per-axis quantization to
 DepthwiseConvolution2d

!android-nn-driver:2260

Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: Iad93c1940568ffa65ed314c8871ea66caf4f9e4a
---
 src/backends/reference/workloads/ConvImpl.hpp | 177 --------------------------
 1 file changed, 177 deletions(-)

(limited to 'src/backends/reference/workloads')
diff --git a/src/backends/reference/workloads/ConvImpl.hpp b/src/backends/reference/workloads/ConvImpl.hpp
index 3551ba8f90..7dba760d87 100644
--- a/src/backends/reference/workloads/ConvImpl.hpp
+++ b/src/backends/reference/workloads/ConvImpl.hpp
@@ -50,183 +50,6 @@ private:
     int32_t m_RightShift;
 };
 
-/// An implementation shared by normal and depthwise convolution.
-template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType>
-static void ConvImpl(ConvData data,
-                     const InputType* inputData,
-                     float inputScale,
-                     int32_t inputOffset,
-                     const InputType* filterData,
-                     float filterScale,
-                     int32_t filterOffset,
-                     const BiasType* biasData,
-                     float outputScale,
-                     int32_t outputOffset,
-                     const TensorInfo& filterInfo,
-                     bool depthwise = false)
-{
-    if (data.m_Parameters.m_BiasEnabled && !biasData)
-    {
-        throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
-    }
-
-    const TensorInfo& inputInfo  = GetTensorInfo(data.m_Inputs[0]);
-    const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);
-
-    TensorBufferArrayView<InputType> output(outputInfo.GetShape(),
-                                            GetOutputTensorData<InputType>(0, data),
-                                            data.m_Parameters.m_DataLayout);
-
-    const armnnUtils::DataLayoutIndexed dataLayoutIndexed(data.m_Parameters.m_DataLayout);
-
-    const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
-    const unsigned int heightIndex   = dataLayoutIndexed.GetHeightIndex();
-    const unsigned int widthIndex    = dataLayoutIndexed.GetWidthIndex();
-
-    unsigned int depthMultiplier = depthwise ? filterInfo.GetShape()[0] : 1;
-    unsigned int inputChannels   = depthwise ? filterInfo.GetShape()[1] : filterInfo.GetShape()[channelsIndex];
-    unsigned int outputChannels  = depthwise ? inputChannels * depthMultiplier : filterInfo.GetShape()[0];
-
-    unsigned int batchSize    = outputInfo.GetShape()[0];
-    unsigned int outputHeight = outputInfo.GetShape()[heightIndex];
-    unsigned int outputWidth  = outputInfo.GetShape()[widthIndex];
-    unsigned int inputHeight  = inputInfo.GetShape()[heightIndex];
-    unsigned int inputWidth   = inputInfo.GetShape()[widthIndex];
-
-    unsigned int filterHeight = depthwise ? filterInfo.GetShape()[2] : filterInfo.GetShape()[heightIndex];
-    unsigned int filterWidth  = depthwise ? filterInfo.GetShape()[3] : filterInfo.GetShape()[widthIndex];
-
-    unsigned int paddingTop  = data.m_Parameters.m_PadTop;
-    unsigned int paddingLeft = data.m_Parameters.m_PadLeft;
-    unsigned int xStride     = data.m_Parameters.m_StrideX;
-    unsigned int yStride     = data.m_Parameters.m_StrideY;
-    unsigned int xDilation   = data.m_Parameters.m_DilationX;
-    unsigned int yDilation   = data.m_Parameters.m_DilationY;
-
-    // The world's least efficient convolution.
-    for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
-    {
-        for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
-        {
-            for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
-            {
-                for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
-                {
-                    // This loop goes over each output element.
-                    AccumulatorType sum = AccumulatorType();
-
-                    // For depthwise, each output channel corresponds to exactly one input channel.
-                    // For normal, must loop over each input channel.
-                    for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
-                    {
-                        unsigned int depthwiseMultiplierIdx = 0;
-                        if (depthwise)
-                        {
-                            cInput = cOutput / depthMultiplier;
-                            depthwiseMultiplierIdx = cOutput % depthMultiplier;
-                        }
-
-                        for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
-                        {
-                            for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
-                            {
-                                // This loop goes over each input element for each output element.
-
-                                unsigned int filterIndex = 0;
-
-                                // Since dimensionality of kernel depends on depthwiseness, so does index.
-                                if (depthwise)
-                                {
-                                    filterIndex = depthwiseMultiplierIdx * filterWidth * filterHeight * inputChannels +
-                                                  cInput * filterWidth * filterHeight +
-                                                  yFilter * filterWidth +
-                                                  xFilter;
-                                }
-                                else
-                                {
-                                    if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)
-                                    {
-                                        filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
-                                                      yFilter * filterWidth * inputChannels +
-                                                      xFilter * inputChannels +
-                                                      cInput;
-                                    }
-                                    else
-                                    {
-                                        filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
-                                                      cInput  * filterWidth * filterHeight +
-                                                      yFilter * filterWidth +
-                                                      xFilter;
-                                    }
-                                }
-
-                                AccumulatorType filterValue = filterData[filterIndex] -
-                                    boost::numeric_cast<AccumulatorType>(filterOffset);
-
-                                unsigned int yInput = yOutput * yStride + yFilter * yDilation;
-                                unsigned int xInput = xOutput * xStride + xFilter * xDilation;
-
-                                AccumulatorType inputValue;
-
-                                // Check if we're in the padding.
-                                if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
-                                    xInput < paddingLeft || xInput >= inputWidth + paddingLeft )
-                                {
-                                    inputValue = AccumulatorType();
-                                }
-                                else
-                                {
-                                    unsigned int inputIndex;
-
-                                    if (data.m_Parameters.m_DataLayout == DataLayout::NHWC)
-                                    {
-                                        inputIndex = batchIdx * inputHeight * inputWidth  * inputChannels +
-                                                     (yInput - paddingTop) * inputWidth * inputChannels +
-                                                     (xInput - paddingLeft) * inputChannels +
-                                                     cInput;
-
-                                    }
-                                    else
-                                    {
-                                        inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
-                                                     inputWidth * inputHeight * cInput +
-                                                     inputWidth * (yInput - paddingTop) +
-                                                     xInput - paddingLeft;
-                                    }
-
-                                    inputValue = inputData[inputIndex] -
-                                                    boost::numeric_cast<AccumulatorType>(inputOffset);
-
-                                }
-                                sum += filterValue * inputValue;
-                            }
-                        }
-                    }
-
-                    if (data.m_Parameters.m_BiasEnabled)
-                    {
-                        sum += biasData[cOutput];
-                    }
-
-                    if (outputScale != 0.0f)
-                    {
-                        float multiplier = (inputScale * filterScale) / outputScale;
-                        // Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent
-                        // with the AndroidNN CPU implementation. This should be (roughly) equivalent to:
-                        //  sum = std::round(multiplier * sum + outputOffset);
-                        sum = boost::numeric_cast<AccumulatorType>(
-                                QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast<int32_t>(sum))
-                            + boost::numeric_cast<AccumulatorType>(outputOffset);
-                        sum = std::min<AccumulatorType>(std::max<AccumulatorType>(sum, 0), 255);
-                    }
-
-                    output.Get(batchIdx, cOutput, yOutput, xOutput) = boost::numeric_cast<InputType>(sum);
-                }
-            }
-        }
-    }
-}
-
 void Convolve(const TensorShape& rInputShape,
               Decoder<float>& rInputDecoder,
               const TensorShape& rOutputShape,
-- 
cgit v1.2.1