1 files changed, 0 insertions, 187 deletions
diff --git a/src/armnn/backends/RefWorkloads/ConvImpl.hpp b/src/armnn/backends/RefWorkloads/ConvImpl.hpp
deleted file mode 100644
index 4c9ab2a644..0000000000
--- a/src/armnn/backends/RefWorkloads/ConvImpl.hpp
+++ /dev/null
@@ -1,187 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-#include "RefWorkloadUtils.hpp"
-
-#include <armnn/Tensor.hpp>
-
-#include <boost/assert.hpp>
-#include <boost/numeric/conversion/cast.hpp>
-
-#include <cmath>
-#include <limits>
-
-namespace armnn
-{
-
-/// Performs multiplication of an integer with a multiplier which is less than one,
-/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor.
-struct QuantizedMultiplierSmallerThanOne
-{
-public:
-    /// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.
-    /// This stores the appropriate integer quantities (derived from the given multiplier) for later use.
-    /// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne().
-    QuantizedMultiplierSmallerThanOne(float multiplier);
-
-    /// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne().
-    int32_t operator*(int32_t rhs) const;
-
-private:
-    /// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul().
-    static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b);
-
-    /// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT().
-    static int32_t RoundingDivideByPOT(int32_t x, int exponent);
-
-    int32_t m_Multiplier;
-    int32_t m_RightShift;
-};
-
-/// An implementation shared by normal and depthwise convolution.
-template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType>
-static void ConvImpl(ConvData data,
-                     const InputType* inputData,
-                     float inputScale,
-                     int32_t inputOffset,
-                     const InputType* filterData,
-                     float filterScale,
-                     int32_t filterOffset,
-                     const BiasType* biasData,
-                     InputType* outputData,
-                     float outputScale,
-                     int32_t outputOffset,
-                     const TensorInfo& filterInfo,
-                     bool depthwise = false)
-{
-    if (data.m_Parameters.m_BiasEnabled && !biasData)
-    {
-        throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
-    }
-
-    const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
-    const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]);
-
-    unsigned int depthMult      = depthwise ? filterInfo.GetShape()[0] : 1;
-    unsigned int channelsInput  = filterInfo.GetShape()[1];
-    unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0];
-
-    unsigned int batchSize    = outputInfo0.GetShape()[0];
-    unsigned int heightOutput = outputInfo0.GetShape()[2];
-    unsigned int widthOutput  = outputInfo0.GetShape()[3];
-    unsigned int heightInput  = inputInfo0.GetShape()[2];
-    unsigned int widthInput   = inputInfo0.GetShape()[3];
-
-    unsigned int heightFilter = filterInfo.GetShape()[2];
-    unsigned int widthFilter  = filterInfo.GetShape()[3];
-
-    unsigned int paddingTop = data.m_Parameters.m_PadTop;
-    unsigned int paddingLeft = data.m_Parameters.m_PadLeft;
-    unsigned int hStride  = data.m_Parameters.m_StrideY;
-    unsigned int xStride  = data.m_Parameters.m_StrideX;
-
-    // The world's least efficient convolution.
-    for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
-    {
-        for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++)
-        {
-            for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++)
-            {
-                for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++)
-                {
-                    // This loop goes over each output element.
-                    AccumulatorType sum = AccumulatorType();
-
-                    // For depthwise, each output channel corresponds to exactly one input channel.
-                    // For normal, must loop over each input channel.
-                    for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++)
-                    {
-                        unsigned int depthwiseMultiplierIdx = 0;
-                        if (depthwise)
-                        {
-                            cInput = cOutput / depthMult;
-                            depthwiseMultiplierIdx = cOutput % depthMult;
-                        }
-
-                        for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++)
-                        {
-                            for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++)
-                            {
-                                // This loop goes over each input element for each output element.
-
-                                unsigned int filterIndex;
-
-                                // Since dimensionality of kernel depends on depthwiseness, so does index.
-                                if (depthwise)
-                                {
-                                    filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter * channelsInput +
-                                                  cInput * widthFilter * heightFilter +
-                                                  yFilter * widthFilter +
-                                                  xFilter;
-                                }
-                                else
-                                {
-                                    filterIndex = cOutput * widthFilter * heightFilter * channelsInput +
-                                                  cInput  * widthFilter * heightFilter +
-                                                  yFilter * widthFilter +
-                                                  xFilter;
-                                }
-                                AccumulatorType filterValue = filterData[filterIndex] -
-                                    boost::numeric_cast<AccumulatorType>(filterOffset);
-
-                                unsigned int yInput = yOutput * hStride + yFilter;
-                                unsigned int xInput = xOutput * xStride + xFilter;
-
-                                AccumulatorType inputValue;
-
-                                // Check if we're in the padding.
-                                if (yInput < paddingTop || yInput >= heightInput + paddingTop ||
-                                    xInput < paddingLeft || xInput >= widthInput + paddingLeft )
-                                {
-                                    inputValue = AccumulatorType();
-                                }
-                                else
-                                {
-                                    inputValue = inputData[batchIdx * widthInput * heightInput * channelsInput +
-                                                                      widthInput * heightInput * cInput +
-                                                                      widthInput * (yInput - paddingTop) +
-                                                                      xInput - paddingLeft] -
-                                        boost::numeric_cast<AccumulatorType>(inputOffset);
-                                }
-                                sum += filterValue * inputValue;
-                            }
-                        }
-                    }
-
-                    if (data.m_Parameters.m_BiasEnabled)
-                    {
-                        sum += biasData[cOutput];
-                    }
-
-                    if (outputScale != 0.0f)
-                    {
-                        float multiplier = (inputScale * filterScale) / outputScale;
-                        // Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent
-                        // with the AndroidNN CPU implementation. This should be (roughly) equivalent to:
-                        //  sum = std::round(multiplier * sum + outputOffset);
-                        sum = boost::numeric_cast<AccumulatorType>(
-                                QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast<int32_t>(sum))
-                            + boost::numeric_cast<AccumulatorType>(outputOffset);
-                        sum = std::min<AccumulatorType>(std::max<AccumulatorType>(sum, 0), 255);
-                    }
-
-                    outputData[batchIdx * widthOutput * heightOutput * channelsOutput +
-                                          widthOutput * heightOutput * cOutput +
-                                          widthOutput * yOutput +
-                                          xOutput] = boost::numeric_cast<InputType>(sum);
-                }
-            }
-        }
-    }
-}
-
-} //namespace armnn