aboutsummaryrefslogtreecommitdiff
path: root/src/backends/reference/workloads/ConvImpl.hpp
diff options
context:
space:
mode:
authorDavid Beck <david.beck@arm.com>2018-09-24 13:18:27 +0100
committerMatthew Bentham <matthew.bentham@arm.com>2018-10-10 16:16:57 +0100
commitb4540bef0b0327683fe8e63f727c1212800dc2a9 (patch)
treee1ea8bb6ee981640a1c469ceb556ed648ffde411 /src/backends/reference/workloads/ConvImpl.hpp
parent2d9dd36fb6bc20b370701ab15463359b9db35f18 (diff)
downloadarmnn-b4540bef0b0327683fe8e63f727c1212800dc2a9.tar.gz
IVGCVSW-1898 : Ref backend folder structure
* Reference backend is renamed to backends/reference as per https://confluence.arm.com/display/MLENG/Pluggable+backends Change-Id: I27a13c274eb60995dfb459e3c49c0e2f60bcd32c
Diffstat (limited to 'src/backends/reference/workloads/ConvImpl.hpp')
-rw-r--r--src/backends/reference/workloads/ConvImpl.hpp187
1 files changed, 187 insertions, 0 deletions
diff --git a/src/backends/reference/workloads/ConvImpl.hpp b/src/backends/reference/workloads/ConvImpl.hpp
new file mode 100644
index 0000000000..4c9ab2a644
--- /dev/null
+++ b/src/backends/reference/workloads/ConvImpl.hpp
@@ -0,0 +1,187 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "RefWorkloadUtils.hpp"
+
+#include <armnn/Tensor.hpp>
+
+#include <boost/assert.hpp>
+#include <boost/numeric/conversion/cast.hpp>
+
+#include <cmath>
+#include <limits>
+
+namespace armnn
+{
+
+/// Performs multiplication of an integer with a multiplier which is less than one,
+/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor.
+struct QuantizedMultiplierSmallerThanOne
+{
+public:
+ /// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier.
+ /// This stores the appropriate integer quantities (derived from the given multiplier) for later use.
+ /// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne().
+ QuantizedMultiplierSmallerThanOne(float multiplier);
+
+ /// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne().
+ int32_t operator*(int32_t rhs) const;
+
+private:
+ /// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul().
+ static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b);
+
+ /// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT().
+ static int32_t RoundingDivideByPOT(int32_t x, int exponent);
+
+ int32_t m_Multiplier;
+ int32_t m_RightShift;
+};
+
+/// An implementation shared by normal and depthwise convolution.
+template<typename ConvData, typename InputType, typename BiasType, typename AccumulatorType>
+static void ConvImpl(ConvData data,
+ const InputType* inputData,
+ float inputScale,
+ int32_t inputOffset,
+ const InputType* filterData,
+ float filterScale,
+ int32_t filterOffset,
+ const BiasType* biasData,
+ InputType* outputData,
+ float outputScale,
+ int32_t outputOffset,
+ const TensorInfo& filterInfo,
+ bool depthwise = false)
+{
+ if (data.m_Parameters.m_BiasEnabled && !biasData)
+ {
+ throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
+ }
+
+ const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
+ const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]);
+
+ unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1;
+ unsigned int channelsInput = filterInfo.GetShape()[1];
+ unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0];
+
+ unsigned int batchSize = outputInfo0.GetShape()[0];
+ unsigned int heightOutput = outputInfo0.GetShape()[2];
+ unsigned int widthOutput = outputInfo0.GetShape()[3];
+ unsigned int heightInput = inputInfo0.GetShape()[2];
+ unsigned int widthInput = inputInfo0.GetShape()[3];
+
+ unsigned int heightFilter = filterInfo.GetShape()[2];
+ unsigned int widthFilter = filterInfo.GetShape()[3];
+
+ unsigned int paddingTop = data.m_Parameters.m_PadTop;
+ unsigned int paddingLeft = data.m_Parameters.m_PadLeft;
+ unsigned int hStride = data.m_Parameters.m_StrideY;
+ unsigned int xStride = data.m_Parameters.m_StrideX;
+
+ // The world's least efficient convolution.
+ for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
+ {
+ for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++)
+ {
+ for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++)
+ {
+ for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++)
+ {
+ // This loop goes over each output element.
+ AccumulatorType sum = AccumulatorType();
+
+ // For depthwise, each output channel corresponds to exactly one input channel.
+ // For normal, must loop over each input channel.
+ for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++)
+ {
+ unsigned int depthwiseMultiplierIdx = 0;
+ if (depthwise)
+ {
+ cInput = cOutput / depthMult;
+ depthwiseMultiplierIdx = cOutput % depthMult;
+ }
+
+ for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++)
+ {
+ for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++)
+ {
+ // This loop goes over each input element for each output element.
+
+ unsigned int filterIndex;
+
+ // Since dimensionality of kernel depends on depthwiseness, so does index.
+ if (depthwise)
+ {
+ filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter * channelsInput +
+ cInput * widthFilter * heightFilter +
+ yFilter * widthFilter +
+ xFilter;
+ }
+ else
+ {
+ filterIndex = cOutput * widthFilter * heightFilter * channelsInput +
+ cInput * widthFilter * heightFilter +
+ yFilter * widthFilter +
+ xFilter;
+ }
+ AccumulatorType filterValue = filterData[filterIndex] -
+ boost::numeric_cast<AccumulatorType>(filterOffset);
+
+ unsigned int yInput = yOutput * hStride + yFilter;
+ unsigned int xInput = xOutput * xStride + xFilter;
+
+ AccumulatorType inputValue;
+
+ // Check if we're in the padding.
+ if (yInput < paddingTop || yInput >= heightInput + paddingTop ||
+ xInput < paddingLeft || xInput >= widthInput + paddingLeft )
+ {
+ inputValue = AccumulatorType();
+ }
+ else
+ {
+ inputValue = inputData[batchIdx * widthInput * heightInput * channelsInput +
+ widthInput * heightInput * cInput +
+ widthInput * (yInput - paddingTop) +
+ xInput - paddingLeft] -
+ boost::numeric_cast<AccumulatorType>(inputOffset);
+ }
+ sum += filterValue * inputValue;
+ }
+ }
+ }
+
+ if (data.m_Parameters.m_BiasEnabled)
+ {
+ sum += biasData[cOutput];
+ }
+
+ if (outputScale != 0.0f)
+ {
+ float multiplier = (inputScale * filterScale) / outputScale;
+ // Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent
+ // with the AndroidNN CPU implementation. This should be (roughly) equivalent to:
+ // sum = std::round(multiplier * sum + outputOffset);
+ sum = boost::numeric_cast<AccumulatorType>(
+ QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast<int32_t>(sum))
+ + boost::numeric_cast<AccumulatorType>(outputOffset);
+ sum = std::min<AccumulatorType>(std::max<AccumulatorType>(sum, 0), 255);
+ }
+
+ outputData[batchIdx * widthOutput * heightOutput * channelsOutput +
+ widthOutput * heightOutput * cOutput +
+ widthOutput * yOutput +
+ xOutput] = boost::numeric_cast<InputType>(sum);
+ }
+ }
+ }
+ }
+}
+
+} //namespace armnn